diff --git a/run_community_analyzer.py b/run_community_analyzer.py index be722d24..04abedf0 100644 --- a/run_community_analyzer.py +++ b/run_community_analyzer.py @@ -1,5 +1,6 @@ import argparse import json +import logging import os import os.path @@ -9,8 +10,17 @@ sentry.initialize() +logger = logging.getLogger(__name__) +logging.basicConfig( + format="%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s", + datefmt="%Y-%m-%d:%H:%M:%S", + level=logging.DEBUG, +) + class CommunityAnalyzerArgs: + """Arguments for the community analyzer.""" + analyzer: str @@ -20,7 +30,7 @@ def get_issue_map(analyzer_name: str) -> str: return os.path.join(analyzers_dir, analyzer_name, "utils", "issue_map.json") -def get_files_to_analyze() -> set[str]: +def get_files_to_analyze(code_path: str) -> set[str]: """ Read the analysis config to get the list of files to analyze. Always raise issues only in these files. @@ -34,11 +44,16 @@ def get_files_to_analyze() -> set[str]: with open(analysis_config_path) as file: analysis_config = json.load(file) - return set(analysis_config["files"]) + logger.info("Files in analysis config: %s", analysis_config["files"]) + return { + os.path.relpath(analysis_file, code_path) + for analysis_file in analysis_config["files"] + } def main(argv: list[str] | None = None) -> None: """Runs the CLI.""" + code_path = os.getenv("CODE_PATH", "/code") toolbox_path = os.getenv("TOOLBOX_PATH", "/toolbox") output_path = os.path.join(toolbox_path, "analysis_results.json") artifacts_path = os.getenv("ARTIFACTS_PATH", "/artifacts") @@ -53,7 +68,7 @@ def main(argv: list[str] | None = None) -> None: analyzer_name = args.analyzer issue_map_path = get_issue_map(analyzer_name) - modified_files = get_files_to_analyze() + modified_files = get_files_to_analyze(code_path) run_sarif_parser( artifacts_path, output_path, issue_map_path, modified_files=modified_files ) diff --git a/sarif-parser/src/sarif_parser/__init__.py b/sarif-parser/src/sarif_parser/__init__.py index e7b4679a..8f55099e 100644 --- a/sarif-parser/src/sarif_parser/__init__.py +++ b/sarif-parser/src/sarif_parser/__init__.py @@ -3,11 +3,19 @@ import hashlib import json +import logging import os.path from typing import Any, Sequence, TypedDict, Union import sentry +logger = logging.getLogger(__name__) +logging.basicConfig( + format="%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s", + datefmt="%Y-%m-%d:%H:%M:%S", + level=logging.DEBUG, +) + class Issue(TypedDict): issue_code: str @@ -16,16 +24,22 @@ class Issue(TypedDict): class IssueLocation(TypedDict): + """Location of an issue in a file.""" + path: str position: IssuePosition class IssuePosition(TypedDict): + """Position of an issue in a file.""" + begin: LineColumn end: LineColumn class LineColumn(TypedDict): + """Line and column of an issue in a file.""" + line: int column: int @@ -42,8 +56,9 @@ def parse( issue_map = {} deepsource_issues: list[Issue] = [] - + total_report_issues = 0 for run in sarif_data["runs"]: + total_report_issues += len(run["results"]) for issue in run["results"]: assert len(issue["locations"]) == 1 location = issue["locations"][0]["physicalLocation"] @@ -104,6 +119,13 @@ def parse( ) deepsource_issues.append(deepsource_issue) + logger.info( + "Total issues in SARIF report: %s. \n" + "Issues extracted for the run in files sent for analysis: %s", + total_report_issues, + len(deepsource_issues), + ) + return deepsource_issues diff --git a/tests/test_community_analyzer.py b/tests/test_community_analyzer.py index 24486e52..a5b3f997 100644 --- a/tests/test_community_analyzer.py +++ b/tests/test_community_analyzer.py @@ -184,10 +184,12 @@ def test_community_analyzer(tmp_path: Path) -> None: """Test for `run_community_analyzer.main()`, to test `issue_map.json` parsing.""" + code_path = "/code" toolbox_path = tmp_path.as_posix() artifacts_path = os.path.join(os.path.dirname(__file__), "test_artifacts") analysis_config_path = os.path.join(toolbox_path, "analysis_config.json") - modified_files = extract_filepaths_from_deepsource_json(expected_result) + modified_files = extract_filepaths_from_deepsource_json(code_path, expected_result) + os.environ["CODE_PATH"] = code_path os.environ["TOOLBOX_PATH"] = toolbox_path os.environ["ARTIFACTS_PATH"] = artifacts_path @@ -214,7 +216,7 @@ def test_community_analyzer(tmp_path: Path) -> None: # Note: There are 7 issues in this file in our report fixture. # See `expected_result`. modified_files = [ - "charts/runner/templates/tests/test-connection.yaml", + os.path.join(code_path, "charts/runner/templates/tests/test-connection.yaml"), ] with temp_analysis_config(analysis_config_path, modified_files): run_community_analyzer.main(["--analyzer=kube-linter"]) diff --git a/tests/test_duplicate_artifacts.py b/tests/test_duplicate_artifacts.py index 53fc4bf4..ec4d0f34 100644 --- a/tests/test_duplicate_artifacts.py +++ b/tests/test_duplicate_artifacts.py @@ -29,6 +29,7 @@ def patch_env_values( def test_duplicate_artifacts(tmp_path: pathlib.Path) -> None: """Make sure results are not duplicated when same artifacts are reported more than""" # create a temporary directory to store duplicate artifacts + code_path = "/code" toolbox_path = tmp_path / "toolbox" artifacts_dir = tmp_path / "artifacts" toolbox_path.mkdir() @@ -44,7 +45,7 @@ def test_duplicate_artifacts(tmp_path: pathlib.Path) -> None: with open(artifact_path) as fp: data = json.load(fp) sarif_data = json.loads(data["data"]) - modified_filepath = extract_filepaths_from_sarif(sarif_data) + modified_filepath = extract_filepaths_from_sarif(code_path, sarif_data) temp_analysis_config_path = os.path.join(toolbox_path, "analysis_config.json") diff --git a/tests/test_report_parsing.py b/tests/test_report_parsing.py index 4c00e0e4..17083249 100644 --- a/tests/test_report_parsing.py +++ b/tests/test_report_parsing.py @@ -30,9 +30,10 @@ def parse_single_artifact( """ Run community analyzer on a single artifact and return the deepsource result object. """ + code_path = "/code" artifact_path = make_artifact(report_path) artifact_filepaths = extract_filepaths_from_sarif( - json.loads(json.load(open(artifact_path))["data"]) + code_path, json.loads(json.load(open(artifact_path))["data"]) ) toolbox_path = tempfile.gettempdir() os.environ["ARTIFACTS_PATH"] = artifact_path diff --git a/tests/testutils.py b/tests/testutils.py index 28b85271..27da8e4d 100644 --- a/tests/testutils.py +++ b/tests/testutils.py @@ -4,8 +4,8 @@ from typing import Any, Iterator -def extract_filepaths_from_sarif(sarif: dict[str, Any]) -> list[str]: - """Extracts filepaths from a SARIF file.""" +def extract_filepaths_from_sarif(code_path: str, sarif: dict[str, Any]) -> list[str]: + """Extracts filepaths from a SARIF file, and prefix it with code path.""" filepaths = [] for run in sarif["runs"]: for result in run["results"]: @@ -13,18 +13,18 @@ def extract_filepaths_from_sarif(sarif: dict[str, Any]) -> list[str]: "uri" ] - filepaths.append(filepath) + filepaths.append(os.path.join(code_path, filepath)) return filepaths def extract_filepaths_from_deepsource_json( - deepsource_json: dict[str, Any] + code_path: str, deepsource_json: dict[str, Any] ) -> list[str]: - """Extracts filepaths from a DeepSource JSON file.""" + """Extracts filepaths from a DeepSource JSON file, and prefix it with code path.""" filepaths = [] for issue in deepsource_json["issues"]: - filepaths.append(issue["location"]["path"]) + filepaths.append(os.path.join(code_path, issue["location"]["path"])) return filepaths