Skip to content

Commit

Permalink
Enable partial matching on allow-only/fail-on
Browse files Browse the repository at this point in the history
This changeset introduces a way to do partial matching on license names.

Say, to match all BSD licenses for a given --fail-on=BSD.

To do so, it:
* Introduces a new cli boolean parameter (default false), called --partial-match
  When enabled, enables partial matching.
* Introduces code to mimic slower, partial-match inntersection/diff methods
* Introduces tests to test how this works
  • Loading branch information
costasd committed Mar 26, 2024
1 parent 6ab64cd commit 7ccee8f
Show file tree
Hide file tree
Showing 2 changed files with 132 additions and 6 deletions.
50 changes: 44 additions & 6 deletions piplicenses.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -316,9 +316,15 @@ def get_python_sys_path(executable: str) -> list[str]:
)

if fail_on_licenses:
failed_licenses = case_insensitive_set_intersect(
license_names, fail_on_licenses
)
failed_licenses = set()
if not args.partial_match:
failed_licenses = case_insensitive_set_intersect(
license_names, fail_on_licenses
)
else:
failed_licenses = case_insensitive_partial_match_set_intersect(
license_names, fail_on_licenses
)
if failed_licenses:
sys.stderr.write(
"fail-on license {} was found for package "
Expand All @@ -331,9 +337,16 @@ def get_python_sys_path(executable: str) -> list[str]:
sys.exit(1)

if allow_only_licenses:
uncommon_licenses = case_insensitive_set_diff(
license_names, allow_only_licenses
)
uncommon_licenses = set()
if not args.partial_match:
uncommon_licenses = case_insensitive_set_diff(
license_names, allow_only_licenses
)
else:
uncommon_licenses = case_insensitive_partial_match_set_diff(
license_names, allow_only_licenses
)

if len(uncommon_licenses) == len(license_names):
sys.stderr.write(
"license {} not in allow-only licenses was found"
Expand Down Expand Up @@ -409,6 +422,24 @@ def case_insensitive_set_intersect(set_a, set_b):
return common_items


def case_insensitive_partial_match_set_intersect(set_a, set_b):
common_items = set()
for item_a in set_a:
for item_b in set_b:
if item_b.lower() in item_a.lower():
common_items.add(item_a)
return common_items


def case_insensitive_partial_match_set_diff(set_a, set_b):
uncommon_items = set_a.copy()
for item_a in set_a:
for item_b in set_b:
if item_b.lower() in item_a.lower():
uncommon_items.remove(item_a)
return uncommon_items


def case_insensitive_set_diff(set_a, set_b):
"""Same as set.difference() but case-insensitive"""
uncommon_items = set()
Expand Down Expand Up @@ -761,6 +792,7 @@ class CustomNamespace(argparse.Namespace):
with_notice_file: bool
filter_strings: bool
filter_code_page: str
partial_match: bool
fail_on: Optional[str]
allow_only: Optional[str]

Expand Down Expand Up @@ -1055,6 +1087,12 @@ def create_parser() -> CompatibleArgumentParser:
help="fail (exit with code 1) on the first occurrence "
"of the licenses not in the semicolon-separated list",
)
verify_options.add_argument(
"--partial-match",
action="store_true",
default=False,
help="enables partial matching for --allow-only/--fail-on",
)

return parser

Expand Down
88 changes: 88 additions & 0 deletions test_piplicenses.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
CompatibleArgumentParser,
FromArg,
__pkgname__,
case_insensitive_partial_match_set_diff,
case_insensitive_partial_match_set_intersect,
case_insensitive_set_diff,
case_insensitive_set_intersect,
create_licenses_table,
Expand Down Expand Up @@ -769,6 +771,42 @@ def test_case_insensitive_set_intersect(self) -> None:
self.assertTrue({"revised BSD"} == b_intersect_c)
self.assertTrue(len(a_intersect_empty) == 0)

def test_case_insensitive_partial_match_set_diff(self) -> None:
set_a = {"MIT License"}
set_b = {"Mit", "BSD License"}
set_c = {"mit license"}
a_diff_b = case_insensitive_partial_match_set_diff(set_a, set_b)
a_diff_c = case_insensitive_partial_match_set_diff(set_a, set_c)
b_diff_c = case_insensitive_partial_match_set_diff(set_b, set_c)
a_diff_empty = case_insensitive_partial_match_set_diff(set_a, set())

self.assertTrue(len(a_diff_b) == 0)
self.assertTrue(len(a_diff_c) == 0)
self.assertIn("BSD License", b_diff_c)
self.assertIn("MIT License", a_diff_empty)

def test_case_insensitive_partial_match_set_intersect(self) -> None:
set_a = {"Revised BSD"}
set_b = {"Apache License", "revised BSD"}
set_c = {"bsd"}
a_intersect_b = case_insensitive_partial_match_set_intersect(
set_a, set_b
)
a_intersect_c = case_insensitive_partial_match_set_intersect(
set_a, set_c
)
b_intersect_c = case_insensitive_partial_match_set_intersect(
set_b, set_c
)
a_intersect_empty = case_insensitive_partial_match_set_intersect(
set_a, set()
)

self.assertTrue(set_a == a_intersect_b)
self.assertTrue(set_a == a_intersect_c)
self.assertTrue({"revised BSD"} == b_intersect_c)
self.assertTrue(len(a_intersect_empty) == 0)


class MockStdStream(object):
def __init__(self) -> None:
Expand Down Expand Up @@ -850,6 +888,35 @@ def test_allow_only(monkeypatch) -> None:
)


def test_allow_only_partial(monkeypatch) -> None:
licenses = (
"Bsd",
"Apache",
"Mozilla Public License 2.0 (MPL 2.0)",
"Python Software Foundation License",
"Public Domain",
"GNU General Public License (GPL)",
"GNU Library or Lesser General Public License (LGPL)",
)
allow_only_args = [
"--partial-match",
"--allow-only={}".format(";".join(licenses)),
]
mocked_stdout = MockStdStream()
mocked_stderr = MockStdStream()
monkeypatch.setattr(sys.stdout, "write", mocked_stdout.write)
monkeypatch.setattr(sys.stderr, "write", mocked_stderr.write)
monkeypatch.setattr(sys, "exit", lambda n: None)
args = create_parser().parse_args(allow_only_args)
create_licenses_table(args)

assert "" == mocked_stdout.printed
assert (
"license MIT License not in allow-only licenses was found for "
"package" in mocked_stderr.printed
)


def test_different_python() -> None:
import tempfile

Expand Down Expand Up @@ -891,6 +958,27 @@ def test_fail_on(monkeypatch) -> None:
)


def test_fail_on_partial_match(monkeypatch) -> None:
licenses = ("MIT",)
allow_only_args = [
"--partial-match",
"--fail-on={}".format(";".join(licenses)),
]
mocked_stdout = MockStdStream()
mocked_stderr = MockStdStream()
monkeypatch.setattr(sys.stdout, "write", mocked_stdout.write)
monkeypatch.setattr(sys.stderr, "write", mocked_stderr.write)
monkeypatch.setattr(sys, "exit", lambda n: None)
args = create_parser().parse_args(allow_only_args)
create_licenses_table(args)

assert "" == mocked_stdout.printed
assert (
"fail-on license MIT License was found for "
"package" in mocked_stderr.printed
)


def test_enums() -> None:
class TestEnum(Enum):
PLAIN = P = auto()
Expand Down

0 comments on commit 7ccee8f

Please sign in to comment.