DeepSourceCorp · srijan-deepsource · Dec 4, 2023 · Dec 1, 2023 · Dec 4, 2023 · Dec 4, 2023
diff --git a/run_community_analyzer.py b/run_community_analyzer.py
@@ -1,5 +1,6 @@
 import argparse
 import json
+import logging
 import os
 import os.path
 
@@ -9,8 +10,17 @@
 
 sentry.initialize()
 
+logger = logging.getLogger(__name__)
+logging.basicConfig(
+    format="%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s",
+    datefmt="%Y-%m-%d:%H:%M:%S",
+    level=logging.DEBUG,
+)
+
 
 class CommunityAnalyzerArgs:
+    """Arguments for the community analyzer."""
+
     analyzer: str
 
 
@@ -20,7 +30,7 @@ def get_issue_map(analyzer_name: str) -> str:
     return os.path.join(analyzers_dir, analyzer_name, "utils", "issue_map.json")
 
 
-def get_files_to_analyze() -> set[str]:
+def get_files_to_analyze(code_path: str) -> set[str]:
     """
     Read the analysis config to get the list of files to analyze.
     Always raise issues only in these files.
@@ -34,11 +44,16 @@ def get_files_to_analyze() -> set[str]:
     with open(analysis_config_path) as file:
         analysis_config = json.load(file)
 
-    return set(analysis_config["files"])
+    logger.info("Files in analysis config: %s", analysis_config["files"])
+    return {
+        os.path.relpath(analysis_file, code_path)
+        for analysis_file in analysis_config["files"]
+    }
 
 
 def main(argv: list[str] | None = None) -> None:
     """Runs the CLI."""
+    code_path = os.getenv("CODE_PATH", "/code")
     toolbox_path = os.getenv("TOOLBOX_PATH", "/toolbox")
     output_path = os.path.join(toolbox_path, "analysis_results.json")
     artifacts_path = os.getenv("ARTIFACTS_PATH", "/artifacts")
@@ -53,7 +68,7 @@ def main(argv: list[str] | None = None) -> None:
 
     analyzer_name = args.analyzer
     issue_map_path = get_issue_map(analyzer_name)
-    modified_files = get_files_to_analyze()
+    modified_files = get_files_to_analyze(code_path)
     run_sarif_parser(
         artifacts_path, output_path, issue_map_path, modified_files=modified_files
     )

diff --git a/sarif-parser/src/sarif_parser/__init__.py b/sarif-parser/src/sarif_parser/__init__.py
@@ -3,11 +3,19 @@
 
 import hashlib
 import json
+import logging
 import os.path
 from typing import Any, Sequence, TypedDict, Union
 
 import sentry
 
+logger = logging.getLogger(__name__)
+logging.basicConfig(
+    format="%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s",
+    datefmt="%Y-%m-%d:%H:%M:%S",
+    level=logging.DEBUG,
+)
+
 
 class Issue(TypedDict):
     issue_code: str
@@ -16,16 +24,22 @@ class Issue(TypedDict):
 
 
 class IssueLocation(TypedDict):
+    """Location of an issue in a file."""
+
     path: str
     position: IssuePosition
 
 
 class IssuePosition(TypedDict):
+    """Position of an issue in a file."""
+
     begin: LineColumn
     end: LineColumn
 
 
 class LineColumn(TypedDict):
+    """Line and column of an issue in a file."""
+
     line: int
     column: int
 
@@ -42,8 +56,9 @@ def parse(
         issue_map = {}
 
     deepsource_issues: list[Issue] = []
-
+    total_report_issues = 0
     for run in sarif_data["runs"]:
+        total_report_issues += len(run["results"])
         for issue in run["results"]:
             assert len(issue["locations"]) == 1
             location = issue["locations"][0]["physicalLocation"]
@@ -104,6 +119,13 @@ def parse(
             )
             deepsource_issues.append(deepsource_issue)
 
+    logger.info(
+        "Total issues in SARIF report: %s. \n"
+        "Issues extracted for the run in files sent for analysis: %s",
+        total_report_issues,
+        len(deepsource_issues),
+    )
+
     return deepsource_issues
 
 

diff --git a/tests/test_community_analyzer.py b/tests/test_community_analyzer.py
@@ -184,10 +184,12 @@
 
 def test_community_analyzer(tmp_path: Path) -> None:
     """Test for `run_community_analyzer.main()`, to test `issue_map.json` parsing."""
+    code_path = "/code"
     toolbox_path = tmp_path.as_posix()
     artifacts_path = os.path.join(os.path.dirname(__file__), "test_artifacts")
     analysis_config_path = os.path.join(toolbox_path, "analysis_config.json")
-    modified_files = extract_filepaths_from_deepsource_json(expected_result)
+    modified_files = extract_filepaths_from_deepsource_json(code_path, expected_result)
+    os.environ["CODE_PATH"] = code_path
     os.environ["TOOLBOX_PATH"] = toolbox_path
     os.environ["ARTIFACTS_PATH"] = artifacts_path
 
@@ -214,7 +216,7 @@ def test_community_analyzer(tmp_path: Path) -> None:
     # Note: There are 7 issues in this file in our report fixture.
     # See `expected_result`.
     modified_files = [
-        "charts/runner/templates/tests/test-connection.yaml",
+        os.path.join(code_path, "charts/runner/templates/tests/test-connection.yaml"),
     ]
     with temp_analysis_config(analysis_config_path, modified_files):
         run_community_analyzer.main(["--analyzer=kube-linter"])

diff --git a/tests/test_duplicate_artifacts.py b/tests/test_duplicate_artifacts.py
@@ -29,6 +29,7 @@ def patch_env_values(
 def test_duplicate_artifacts(tmp_path: pathlib.Path) -> None:
     """Make sure results are not duplicated when same artifacts are reported more than"""
     # create a temporary directory to store duplicate artifacts
+    code_path = "/code"
     toolbox_path = tmp_path / "toolbox"
     artifacts_dir = tmp_path / "artifacts"
     toolbox_path.mkdir()
@@ -44,7 +45,7 @@ def test_duplicate_artifacts(tmp_path: pathlib.Path) -> None:
     with open(artifact_path) as fp:
         data = json.load(fp)
         sarif_data = json.loads(data["data"])
-        modified_filepath = extract_filepaths_from_sarif(sarif_data)
+        modified_filepath = extract_filepaths_from_sarif(code_path, sarif_data)
 
     temp_analysis_config_path = os.path.join(toolbox_path, "analysis_config.json")
 

diff --git a/tests/test_report_parsing.py b/tests/test_report_parsing.py
@@ -30,9 +30,10 @@ def parse_single_artifact(
     """
     Run community analyzer on a single artifact and return the deepsource result object.
     """
+    code_path = "/code"
     artifact_path = make_artifact(report_path)
     artifact_filepaths = extract_filepaths_from_sarif(
-        json.loads(json.load(open(artifact_path))["data"])
+        code_path, json.loads(json.load(open(artifact_path))["data"])
     )
     toolbox_path = tempfile.gettempdir()
     os.environ["ARTIFACTS_PATH"] = artifact_path

diff --git a/tests/testutils.py b/tests/testutils.py
@@ -4,27 +4,27 @@
 from typing import Any, Iterator
 
 
-def extract_filepaths_from_sarif(sarif: dict[str, Any]) -> list[str]:
-    """Extracts filepaths from a SARIF file."""
+def extract_filepaths_from_sarif(code_path: str, sarif: dict[str, Any]) -> list[str]:
+    """Extracts filepaths from a SARIF file, and prefix it with code path."""
     filepaths = []
     for run in sarif["runs"]:
         for result in run["results"]:
             filepath = result["locations"][0]["physicalLocation"]["artifactLocation"][
                 "uri"
             ]
 
-            filepaths.append(filepath)
+            filepaths.append(os.path.join(code_path, filepath))
 
     return filepaths
 
 
 def extract_filepaths_from_deepsource_json(
-    deepsource_json: dict[str, Any]
+    code_path: str, deepsource_json: dict[str, Any]
 ) -> list[str]:
-    """Extracts filepaths from a DeepSource JSON file."""
+    """Extracts filepaths from a DeepSource JSON file, and prefix it with code path."""
     filepaths = []
     for issue in deepsource_json["issues"]:
-        filepaths.append(issue["location"]["path"])
+        filepaths.append(os.path.join(code_path, issue["location"]["path"]))
 
     return filepaths