Skip to content

Commit

Permalink
Create class for storing settings, improve commandline options.
Browse files Browse the repository at this point in the history
Signed-off-by: Caroline Russell <caroline@appthreat.dev>
  • Loading branch information
cerrussell committed Jun 2, 2024
1 parent f540f93 commit a598482
Show file tree
Hide file tree
Showing 7 changed files with 237 additions and 124 deletions.
25 changes: 21 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,25 +11,38 @@ ignore in the comparison and sorts all fields.
`pip install custom-json-diff`

## CLI Usage

```
usage: cjd [-h] -i INPUT INPUT [-o OUTPUT] [-b] (-c CONFIG | -x EXCLUDE [EXCLUDE ...] | -p {cdxgen,cdxgen-extended})
usage: cjd [-h] -i INPUT INPUT [-o OUTPUT] (-c CONFIG | -x EXCLUDE [EXCLUDE ...] | -p {cdxgen,cdxgen-extended}) {bom-diff} ...
positional arguments:
{bom-diff} subcommand help
bom-diff compare CycloneDX BOMs
options:
-h, --help show this help message and exit
-i INPUT INPUT, --input INPUT INPUT
Two JSON files to compare.
-o OUTPUT, --output OUTPUT
Export JSON of differences to this file.
-a, --allow-new-versions
Allow new versions in BOM comparison.
-b, --bom-diff Produce a comparison of CycloneDX BOMs.
-c CONFIG, --config-file CONFIG
Import TOML configuration file.
-x EXCLUDE [EXCLUDE ...], --exclude EXCLUDE [EXCLUDE ...]
Exclude field(s) from comparison.
-p {cdxgen,cdxgen-extended}, --preset {cdxgen,cdxgen-extended}
Preset to use
```

bom-diff usage
```
usage: cjd bom-diff [-h] [-a] [-r REPORT_TEMPLATE]
options:
-h, --help show this help message and exit
-a, --allow-new-versions
Allow new versions in BOM comparison.
-r REPORT_TEMPLATE, --report-template REPORT_TEMPLATE
Jinja2 template to use for report generation.
```

## Specifying fields to exclude
Expand Down Expand Up @@ -65,6 +78,9 @@ objects, you would specify `field1.field3.[].a` (do NOT include the array index,
Multiple fields may be specified separated by a space. To better understand what your fields should
be, check out json-flatten, which is the package used for this function.

>Note: In the context of BOM diffing, this list is only used for the metadata, not the components,
> services, or dependencies.
## Sorting

custom-json-diff will sort the imported JSON alphabetically. If your JSON document contains arrays
Expand All @@ -80,4 +96,5 @@ sort_keys = ["url", "content", "ref", "name", "value"]

[bom_diff]
allow_new_versions = false
report_template = "my_template.j2"
```
57 changes: 36 additions & 21 deletions custom_json_diff/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,28 @@
from custom_json_diff.custom_diff import (
compare_dicts, get_diff, perform_bom_diff, report_results
)
from custom_json_diff.custom_diff_classes import Options


def build_args():
parser = argparse.ArgumentParser()
subparsers = parser.add_subparsers(help="subcommand help")
parser_bom_diff = subparsers.add_parser("bom-diff", help="compare CycloneDX BOMs")
parser_bom_diff.set_defaults(bom_diff=True)
parser_bom_diff.add_argument(
"-a",
"--allow-new-versions",
action="store_true",
help="Allow new versions in BOM comparison.",
dest="allow_new_versions",
)
# parser_bom_diff.add_argument(
# "-r",
# "--report-template",
# action="store",
# help="Jinja2 template to use for report generation.",
# dest="report_template",
# )
parser.add_argument(
"-i",
"--input",
Expand All @@ -23,21 +41,7 @@ def build_args():
help="Export JSON of differences to this file.",
dest="output",
)
parser.add_argument(
"-a",
"--allow-new-versions",
action="store_true",
help="Allow new versions in BOM comparison.",
dest="allow_new_versions",
)
parser.add_argument(
"-b",
"--bom-diff",
action="store_true",
help="Produce a comparison of CycloneDX BOMs.",
dest="bom_diff",
)
arg_group = parser.add_mutually_exclusive_group(required=True)
arg_group = parser.add_mutually_exclusive_group()
arg_group.add_argument(
"-c",
"--config-file",
Expand All @@ -58,7 +62,7 @@ def build_args():
"-p",
"--preset",
action="store",
help="Preset to use",
help="Preset to use.",
choices=["cdxgen", "cdxgen-extended"],
dest="preset",
)
Expand All @@ -67,14 +71,25 @@ def build_args():

def main():
args = build_args()
settings = args.preset or args.config or args.exclude
result, j1, j2 = compare_dicts(args.input[0], args.input[1], settings, args.bom_diff, args.allow_new_versions)
options = Options(
allow_new_versions=args.allow_new_versions,
bom_diff=args.bom_diff,
config=args.config,
exclude=args.exclude,
file_1=args.input[0],
file_2=args.input[1],
output=args.output,
preset=args.preset,
report_template=args.report_template,
sort_keys=[],
)
result, j1, j2 = compare_dicts(options)

if args.bom_diff:
result_summary = perform_bom_diff(j1, j2)
result_summary = perform_bom_diff(j1, j2, options)
else:
result_summary = get_diff(args.input[0], args.input[1], j1, j2)
report_results(result, result_summary, args.output)
result_summary = get_diff(j1, j2, options)
report_results(result, result_summary, options)


if __name__ == "__main__":
Expand Down
158 changes: 78 additions & 80 deletions custom_json_diff/custom_diff.py
Original file line number Diff line number Diff line change
@@ -1,39 +1,81 @@
import json
import logging
import os
import re
import sys
from pathlib import Path
from typing import Dict, List, Set, Tuple

import toml
from jinja2 import Environment
from json_flatten import flatten, unflatten # type: ignore

from custom_json_diff.custom_diff_classes import BomDicts, FlatDicts
from custom_json_diff.custom_diff_classes import BomDicts, FlatDicts, Options


def check_regex(regex_keys: Set[re.Pattern], key: str) -> bool:
return any(regex.match(key) for regex in regex_keys)


def compare_dicts(json1: str, json2: str, settings: str | List[str], bom_diff: bool, allow_new_versions: bool) -> Tuple[int, FlatDicts | BomDicts, FlatDicts | BomDicts]:
json_1_data = load_json(json1, allow_new_versions=allow_new_versions, settings=settings,
bom_diff=bom_diff)
json_2_data = load_json(json2, allow_new_versions=allow_new_versions, settings=settings,
bom_diff=bom_diff)
if json_1_data.data == json_2_data.data:
def compare_dicts(options: Options) -> Tuple[int, FlatDicts | BomDicts, FlatDicts | BomDicts]:
json_1_data = load_json(options.file_1, options)
json_2_data = load_json(options.file_2, options)
if json_1_data == json_2_data:
return 0, json_1_data, json_2_data
else:
return 1, json_1_data, json_2_data


def export_html_report(outfile: str, diffs: Dict, options: Options) -> None:
template_file = options.report_template or os.path.join(
os.path.dirname(os.path.realpath(__file__)), "bom_diff_template.j2")
with open(template_file, "r", encoding="utf-8") as tmpl_file:
template = tmpl_file.read()
jinja_env = Environment(autoescape=False)
jinja_tmpl = jinja_env.from_string(template)
purl_regex = re.compile(r"[^/]+@[^?\s]+")
diffs["diff_summary"][options.file_1]["dependencies"] = parse_purls(
diffs["diff_summary"][options.file_1]["dependencies"], purl_regex)
diffs["diff_summary"][options.file_2]["dependencies"] = parse_purls(
diffs["diff_summary"][options.file_2]["dependencies"], purl_regex)
diffs["commons_summary"]["dependencies"] = parse_purls(
diffs["commons_summary"]["dependencies"], purl_regex)
report_result = jinja_tmpl.render(
common_lib=diffs.get("commons_summary", {}).get("components", {}).get("libraries", []),
common_frameworks=diffs.get("commons_summary", {}).get("components", {}).get("frameworks", []),
common_services=diffs.get("commons_summary", {}).get("services", []),
common_deps=diffs.get("commons_summary", {}).get("dependencies", []),
diff_lib_1=diffs.get("diff_summary", {}).get(options.file_1, {}).get("components", {}).get("libraries", []),
diff_lib_2=diffs.get("diff_summary", {}).get(options.file_2, {}).get("components", {}).get("libraries", []),
diff_frameworks_1=diffs.get("diff_summary", {}).get(options.file_1, {}).get("components", {}).get("frameworks", []),
diff_frameworks_2=diffs.get("diff_summary", {}).get(options.file_2, {}).get("components", {}).get("frameworks", []),
diff_services_1=diffs.get("diff_summary", {}).get(options.file_1, {}).get("services", []),
diff_services_2=diffs.get("diff_summary", {}).get(options.file_2, {}).get("services", []),
diff_deps_1=diffs.get("diff_summary", {}).get(options.file_1, {}).get("dependencies", []),
diff_deps_2=diffs.get("diff_summary", {}).get(options.file_2, {}).get("dependencies", []),
bom_1=options.file_1,
bom_2=options.file_2
)
with open(outfile, "w", encoding="utf-8") as f:
f.write(report_result)
print(f"HTML report generated: {outfile}")


def parse_purls(deps: List[Dict], regex: re.Pattern) -> List[Dict]:
if not deps:
return deps
for i in deps:
i["short_ref"] = match[0] if (match := regex.findall(i["ref"])) else i["ref"]
return deps


def export_results(outfile: str, diffs: Dict) -> None:
with open(outfile, "w", encoding="utf-8") as f:
f.write(json.dumps(diffs, indent=2))
print(f"JSON report generated: {outfile}")


def filter_dict(data: Dict, exclude_keys: Set[str], sort_keys: List[str]) -> FlatDicts:
data = flatten(sort_dict(data, sort_keys))
return FlatDicts(data).filter_out_keys(exclude_keys)
def filter_dict(data: Dict, options: Options) -> FlatDicts:
data = flatten(sort_dict(data, options.sort_keys))
return FlatDicts(data).filter_out_keys(options.exclude)


def get_bom_commons(bom_1: BomDicts, bom_2: BomDicts) -> Dict:
Expand All @@ -46,17 +88,17 @@ def get_bom_commons(bom_1: BomDicts, bom_2: BomDicts) -> Dict:
return commons


def get_bom_diff(bom_1: BomDicts, bom_2: BomDicts) -> Dict:
diff = get_diff(bom_1.filename, bom_2.filename, bom_1.data, bom_2.data)
def get_bom_diff(bom_1: BomDicts, bom_2: BomDicts, options: Options) -> Dict:
diff = get_diff(bom_1.data, bom_2.data, options)
diff[bom_1.filename] |= populate_bom_diff(bom_1, bom_2)
diff[bom_2.filename] |= populate_bom_diff(bom_2, bom_1)
return diff


def get_diff(f1: str | Path, f2: str | Path, j1: FlatDicts, j2: FlatDicts) -> Dict:
def get_diff(j1: FlatDicts, j2: FlatDicts, options: Options) -> Dict:
diff_1 = (j1 - j2).to_dict(unflat=True)
diff_2 = (j2 - j1).to_dict(unflat=True)
return {str(f1): diff_1, str(f2): diff_2}
return {options.file_1: diff_1, options.file_2: diff_2}


def get_sort_key(data: Dict, sort_keys: List[str]) -> str | bool:
Expand All @@ -70,23 +112,7 @@ def handle_results(outfile: str, diffs: Dict) -> None:
print(json.dumps(diffs, indent=2))


def import_toml(toml_file_path: str) -> Tuple[Set[str], List[str], bool]:
with open(toml_file_path, "r", encoding="utf-8") as f:
try:
toml_data = toml.load(f)
except toml.TomlDecodeError:
logging.error("Invalid TOML.")
sys.exit(1)
return (
set(toml_data.get("settings", {}).get("excluded_fields", [])),
toml_data.get("settings", {}).get("sort_keys", []),
toml_data.get("bom_diff", {}).get("allow_new_versions", False))


def load_json(json_file: str, allow_new_versions: bool,
settings: str | List[str] | None = None, exclude_keys: Set[str] | None = None,
sort_keys: List[str] | None = None,
bom_diff: bool | None = False) -> FlatDicts | BomDicts:
def load_json(json_file: str, options: Options) -> FlatDicts | BomDicts:
try:
with open(json_file, "r", encoding="utf-8") as f:
data = json.load(f)
Expand All @@ -96,31 +122,17 @@ def load_json(json_file: str, allow_new_versions: bool,
except json.JSONDecodeError:
logging.error("Invalid JSON: %s", json_file)
sys.exit(1)
if bom_diff:
if options.bom_diff:
data = sort_dict(data, ["url", "content", "ref", "name", "value"])
return BomDicts(allow_new_versions, json_file, data)
if settings:
exclude_keys, sort_keys, allow_new_versions = load_settings(settings)
elif not exclude_keys:
exclude_keys = set()
if not sort_keys:
sort_keys = []
return filter_dict(data, exclude_keys, sort_keys)


def load_settings(settings: str | List[str]) -> Tuple[Set[str], List[str], bool]:
if isinstance(settings, str):
if settings.endswith(".toml"):
exclude_keys, sort_keys, allow_new_versions = import_toml(settings)
else:
exclude_keys, sort_keys, allow_new_versions = set_excluded_fields(settings)
else:
exclude_keys, sort_keys, allow_new_versions = set(excluded), [], False # type: ignore
return exclude_keys, sort_keys, allow_new_versions
return BomDicts(options.allow_new_versions, json_file, data)
return filter_dict(data, options)


def perform_bom_diff(bom_1: BomDicts, bom_2: BomDicts) -> Dict:
return {"commons_summary":get_bom_commons(bom_1, bom_2), "diff_summary": get_bom_diff(bom_1, bom_2)}
def perform_bom_diff(bom_1: BomDicts, bom_2: BomDicts, options: Options) -> Dict:
return {
"commons_summary": get_bom_commons(bom_1, bom_2),
"diff_summary": get_bom_diff(bom_1, bom_2, options)
}


def populate_bom_diff(bom_1: BomDicts, bom_2: BomDicts) -> Dict:
Expand All @@ -130,41 +142,27 @@ def populate_bom_diff(bom_1: BomDicts, bom_2: BomDicts) -> Dict:
"libraries": [
i.original_data
for i in bom_1.components
if i not in bom_2.components and i.component_type == "library"
],
"frameworks": [
i.original_data for i in bom_1.components if
i not in bom_2.components and i.component_type == "framework"
]}
if i not in bom_2.components and i.component_type == "library"],
"frameworks": [
i.original_data for i in bom_1.components if
i not in bom_2.components and i.component_type == "framework"
]
}
}
diff |= {"services": [i.original_data for i in bom_1.services if i not in bom_2.services]}
diff |= {"dependencies": [i.original_data for i in bom_1.dependencies if i not in bom_2.dependencies]}
return diff


def report_results(status: int, diffs: Dict, outfile: str):
def report_results(status: int, diffs: Dict, options: Options) -> None:
if status == 0:
print("No differences found.")
else:
print("Differences found.")
handle_results(outfile, diffs)


def set_excluded_fields(preset: str) -> Tuple[Set[str], List[str], bool]:
excluded = []
sort_fields = []
if preset.startswith("cdxgen"):
excluded.extend(["metadata.timestamp", "serialNumber",
"metadata.tools.components.[].version",
"metadata.tools.components.[].purl",
"metadata.tools.components.[].bom-ref",
"components.[].properties",
"components.[].evidence"
])
if preset == "cdxgen-extended":
excluded.append("components.[].licenses")
sort_fields.extend(["url", "content", "ref", "name", "value"])
return set(excluded), sort_fields, False
handle_results(options.output, diffs)
if options.bom_diff and options.output:
report_file = options.output.replace(".json", "") + ".html"
export_html_report(report_file, diffs, options)


def sort_dict(result: Dict, sort_keys: List[str]) -> Dict:
Expand Down
Loading

0 comments on commit a598482

Please sign in to comment.