Skip to content

Commit

Permalink
Merge pull request #186 from costasd/add_partial_match
Browse files Browse the repository at this point in the history
Enable partial matching on allow-only/fail-on
  • Loading branch information
raimon49 authored Mar 28, 2024
2 parents 6ab64cd + f69136c commit c6e0a0d
Show file tree
Hide file tree
Showing 3 changed files with 176 additions and 6 deletions.
44 changes: 44 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ Dump the software license list of Python packages installed with pip.
* [Verify options](#verify-options)
* [Option: fail\-on](#option-fail-on)
* [Option: allow\-only](#option-allow-only)
* [Option: partial\-match](#option-partial-match)
* [More Information](#more-information)
* [Dockerfile](#dockerfile)
* [About UnicodeEncodeError](#about-unicodeencodeerror)
Expand Down Expand Up @@ -545,6 +546,49 @@ $ echo $?
1
```

#### Option: partial\-match

If set, enables partial (substring) matching for `--fail-on` or `--allow-only`. Default is unset (False).

Usage:

```bash
(venv) $ pip-licenses --partial-match --allow-only="MIT License;BSD License"
(venv) $ pip-licenses --partial-match --fail-on="MIT License;BSD License"

```

**Note:** Semantics are the same as with `--fail-on` or `--allow-only`. This only enables substring matching.
```
# keyring library has 2 licenses
$ pip-licenses --package keyring
Name Version License
keyring 23.0.1 MIT License; Python Software Foundation License
# One or both licenses must be specified (order and case does not matter). Following checks will pass:
$ pip-licenses --package keyring --allow-only="MIT License"
$ pip-licenses --package keyring --allow-only="mit License"
$ pip-licenses --package keyring --allow-only="BSD License;MIT License"
$ pip-licenses --package keyring --allow-only="Python Software Foundation License"
$ pip-licenses --package keyring --allow-only="Python Software Foundation License;MIT License"
# These won't pass, as they're not a full match against one of the licenses
$ pip-licenses --package keyring --allow-only="MIT"
$ echo $?
1
$ pip-licenses --package keyring --allow-only="mit"
$ echo $?
1
# with --partial-match, they pass
$ pip-licenses --package keyring --partial-match --allow-only="MIT"
$ echo $?
0
$ pip-licenses --package keyring --partial-match --allow-only="mit"
$ echo $?
0
```


### More Information

Expand Down
50 changes: 44 additions & 6 deletions piplicenses.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -316,9 +316,15 @@ def get_python_sys_path(executable: str) -> list[str]:
)

if fail_on_licenses:
failed_licenses = case_insensitive_set_intersect(
license_names, fail_on_licenses
)
failed_licenses = set()
if not args.partial_match:
failed_licenses = case_insensitive_set_intersect(
license_names, fail_on_licenses
)
else:
failed_licenses = case_insensitive_partial_match_set_intersect(
license_names, fail_on_licenses
)
if failed_licenses:
sys.stderr.write(
"fail-on license {} was found for package "
Expand All @@ -331,9 +337,16 @@ def get_python_sys_path(executable: str) -> list[str]:
sys.exit(1)

if allow_only_licenses:
uncommon_licenses = case_insensitive_set_diff(
license_names, allow_only_licenses
)
uncommon_licenses = set()
if not args.partial_match:
uncommon_licenses = case_insensitive_set_diff(
license_names, allow_only_licenses
)
else:
uncommon_licenses = case_insensitive_partial_match_set_diff(
license_names, allow_only_licenses
)

if len(uncommon_licenses) == len(license_names):
sys.stderr.write(
"license {} not in allow-only licenses was found"
Expand Down Expand Up @@ -409,6 +422,24 @@ def case_insensitive_set_intersect(set_a, set_b):
return common_items


def case_insensitive_partial_match_set_intersect(set_a, set_b):
common_items = set()
for item_a in set_a:
for item_b in set_b:
if item_b.lower() in item_a.lower():
common_items.add(item_a)
return common_items


def case_insensitive_partial_match_set_diff(set_a, set_b):
uncommon_items = set_a.copy()
for item_a in set_a:
for item_b in set_b:
if item_b.lower() in item_a.lower():
uncommon_items.remove(item_a)
return uncommon_items


def case_insensitive_set_diff(set_a, set_b):
"""Same as set.difference() but case-insensitive"""
uncommon_items = set()
Expand Down Expand Up @@ -761,6 +792,7 @@ class CustomNamespace(argparse.Namespace):
with_notice_file: bool
filter_strings: bool
filter_code_page: str
partial_match: bool
fail_on: Optional[str]
allow_only: Optional[str]

Expand Down Expand Up @@ -1055,6 +1087,12 @@ def create_parser() -> CompatibleArgumentParser:
help="fail (exit with code 1) on the first occurrence "
"of the licenses not in the semicolon-separated list",
)
verify_options.add_argument(
"--partial-match",
action="store_true",
default=False,
help="enables partial matching for --allow-only/--fail-on",
)

return parser

Expand Down
88 changes: 88 additions & 0 deletions test_piplicenses.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
CompatibleArgumentParser,
FromArg,
__pkgname__,
case_insensitive_partial_match_set_diff,
case_insensitive_partial_match_set_intersect,
case_insensitive_set_diff,
case_insensitive_set_intersect,
create_licenses_table,
Expand Down Expand Up @@ -769,6 +771,42 @@ def test_case_insensitive_set_intersect(self) -> None:
self.assertTrue({"revised BSD"} == b_intersect_c)
self.assertTrue(len(a_intersect_empty) == 0)

def test_case_insensitive_partial_match_set_diff(self) -> None:
set_a = {"MIT License"}
set_b = {"Mit", "BSD License"}
set_c = {"mit license"}
a_diff_b = case_insensitive_partial_match_set_diff(set_a, set_b)
a_diff_c = case_insensitive_partial_match_set_diff(set_a, set_c)
b_diff_c = case_insensitive_partial_match_set_diff(set_b, set_c)
a_diff_empty = case_insensitive_partial_match_set_diff(set_a, set())

self.assertTrue(len(a_diff_b) == 0)
self.assertTrue(len(a_diff_c) == 0)
self.assertIn("BSD License", b_diff_c)
self.assertIn("MIT License", a_diff_empty)

def test_case_insensitive_partial_match_set_intersect(self) -> None:
set_a = {"Revised BSD"}
set_b = {"Apache License", "revised BSD"}
set_c = {"bsd"}
a_intersect_b = case_insensitive_partial_match_set_intersect(
set_a, set_b
)
a_intersect_c = case_insensitive_partial_match_set_intersect(
set_a, set_c
)
b_intersect_c = case_insensitive_partial_match_set_intersect(
set_b, set_c
)
a_intersect_empty = case_insensitive_partial_match_set_intersect(
set_a, set()
)

self.assertTrue(set_a == a_intersect_b)
self.assertTrue(set_a == a_intersect_c)
self.assertTrue({"revised BSD"} == b_intersect_c)
self.assertTrue(len(a_intersect_empty) == 0)


class MockStdStream(object):
def __init__(self) -> None:
Expand Down Expand Up @@ -850,6 +888,35 @@ def test_allow_only(monkeypatch) -> None:
)


def test_allow_only_partial(monkeypatch) -> None:
licenses = (
"Bsd",
"Apache",
"Mozilla Public License 2.0 (MPL 2.0)",
"Python Software Foundation License",
"Public Domain",
"GNU General Public License (GPL)",
"GNU Library or Lesser General Public License (LGPL)",
)
allow_only_args = [
"--partial-match",
"--allow-only={}".format(";".join(licenses)),
]
mocked_stdout = MockStdStream()
mocked_stderr = MockStdStream()
monkeypatch.setattr(sys.stdout, "write", mocked_stdout.write)
monkeypatch.setattr(sys.stderr, "write", mocked_stderr.write)
monkeypatch.setattr(sys, "exit", lambda n: None)
args = create_parser().parse_args(allow_only_args)
create_licenses_table(args)

assert "" == mocked_stdout.printed
assert (
"license MIT License not in allow-only licenses was found for "
"package" in mocked_stderr.printed
)


def test_different_python() -> None:
import tempfile

Expand Down Expand Up @@ -891,6 +958,27 @@ def test_fail_on(monkeypatch) -> None:
)


def test_fail_on_partial_match(monkeypatch) -> None:
licenses = ("MIT",)
allow_only_args = [
"--partial-match",
"--fail-on={}".format(";".join(licenses)),
]
mocked_stdout = MockStdStream()
mocked_stderr = MockStdStream()
monkeypatch.setattr(sys.stdout, "write", mocked_stdout.write)
monkeypatch.setattr(sys.stderr, "write", mocked_stderr.write)
monkeypatch.setattr(sys, "exit", lambda n: None)
args = create_parser().parse_args(allow_only_args)
create_licenses_table(args)

assert "" == mocked_stdout.printed
assert (
"fail-on license MIT License was found for "
"package" in mocked_stderr.printed
)


def test_enums() -> None:
class TestEnum(Enum):
PLAIN = P = auto()
Expand Down

0 comments on commit c6e0a0d

Please sign in to comment.