Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: add code path handling and logs #16

Merged
merged 4 commits into from
Dec 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 18 additions & 3 deletions run_community_analyzer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import argparse
import json
import logging
import os
import os.path

Expand All @@ -9,8 +10,17 @@

sentry.initialize()

logger = logging.getLogger(__name__)
logging.basicConfig(
format="%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s",
datefmt="%Y-%m-%d:%H:%M:%S",
level=logging.DEBUG,
)


class CommunityAnalyzerArgs:
"""Arguments for the community analyzer."""

analyzer: str


Expand All @@ -20,7 +30,7 @@ def get_issue_map(analyzer_name: str) -> str:
return os.path.join(analyzers_dir, analyzer_name, "utils", "issue_map.json")


def get_files_to_analyze() -> set[str]:
def get_files_to_analyze(code_path: str) -> set[str]:
"""
Read the analysis config to get the list of files to analyze.
Always raise issues only in these files.
Expand All @@ -34,11 +44,16 @@ def get_files_to_analyze() -> set[str]:
with open(analysis_config_path) as file:
analysis_config = json.load(file)

return set(analysis_config["files"])
logger.info("Files in analysis config: %s", analysis_config["files"])
return {
os.path.relpath(analysis_file, code_path)
for analysis_file in analysis_config["files"]
}


def main(argv: list[str] | None = None) -> None:
"""Runs the CLI."""
code_path = os.getenv("CODE_PATH", "/code")
toolbox_path = os.getenv("TOOLBOX_PATH", "/toolbox")
output_path = os.path.join(toolbox_path, "analysis_results.json")
artifacts_path = os.getenv("ARTIFACTS_PATH", "/artifacts")
Expand All @@ -53,7 +68,7 @@ def main(argv: list[str] | None = None) -> None:

analyzer_name = args.analyzer
issue_map_path = get_issue_map(analyzer_name)
modified_files = get_files_to_analyze()
modified_files = get_files_to_analyze(code_path)
run_sarif_parser(
artifacts_path, output_path, issue_map_path, modified_files=modified_files
)
Expand Down
24 changes: 23 additions & 1 deletion sarif-parser/src/sarif_parser/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,19 @@

import hashlib
import json
import logging
import os.path
from typing import Any, Sequence, TypedDict, Union

import sentry

logger = logging.getLogger(__name__)
logging.basicConfig(
format="%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s",
datefmt="%Y-%m-%d:%H:%M:%S",
level=logging.DEBUG,
)


class Issue(TypedDict):
issue_code: str
Expand All @@ -16,16 +24,22 @@ class Issue(TypedDict):


class IssueLocation(TypedDict):
"""Location of an issue in a file."""

path: str
position: IssuePosition


class IssuePosition(TypedDict):
"""Position of an issue in a file."""

begin: LineColumn
end: LineColumn


class LineColumn(TypedDict):
"""Line and column of an issue in a file."""

line: int
column: int

Expand All @@ -42,8 +56,9 @@ def parse(
issue_map = {}

deepsource_issues: list[Issue] = []

total_report_issues = 0
for run in sarif_data["runs"]:
total_report_issues += len(run["results"])
for issue in run["results"]:
assert len(issue["locations"]) == 1
location = issue["locations"][0]["physicalLocation"]
Expand Down Expand Up @@ -104,6 +119,13 @@ def parse(
)
deepsource_issues.append(deepsource_issue)

logger.info(
"Total issues in SARIF report: %s. \n"
"Issues extracted for the run in files sent for analysis: %s",
total_report_issues,
len(deepsource_issues),
)

return deepsource_issues


Expand Down
6 changes: 4 additions & 2 deletions tests/test_community_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,10 +184,12 @@

def test_community_analyzer(tmp_path: Path) -> None:
"""Test for `run_community_analyzer.main()`, to test `issue_map.json` parsing."""
code_path = "/code"
toolbox_path = tmp_path.as_posix()
artifacts_path = os.path.join(os.path.dirname(__file__), "test_artifacts")
analysis_config_path = os.path.join(toolbox_path, "analysis_config.json")
modified_files = extract_filepaths_from_deepsource_json(expected_result)
modified_files = extract_filepaths_from_deepsource_json(code_path, expected_result)
os.environ["CODE_PATH"] = code_path
os.environ["TOOLBOX_PATH"] = toolbox_path
os.environ["ARTIFACTS_PATH"] = artifacts_path

Expand All @@ -214,7 +216,7 @@ def test_community_analyzer(tmp_path: Path) -> None:
# Note: There are 7 issues in this file in our report fixture.
# See `expected_result`.
modified_files = [
"charts/runner/templates/tests/test-connection.yaml",
os.path.join(code_path, "charts/runner/templates/tests/test-connection.yaml"),
]
with temp_analysis_config(analysis_config_path, modified_files):
run_community_analyzer.main(["--analyzer=kube-linter"])
Expand Down
3 changes: 2 additions & 1 deletion tests/test_duplicate_artifacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def patch_env_values(
def test_duplicate_artifacts(tmp_path: pathlib.Path) -> None:
"""Make sure results are not duplicated when same artifacts are reported more than"""
# create a temporary directory to store duplicate artifacts
code_path = "/code"
toolbox_path = tmp_path / "toolbox"
artifacts_dir = tmp_path / "artifacts"
toolbox_path.mkdir()
Expand All @@ -44,7 +45,7 @@ def test_duplicate_artifacts(tmp_path: pathlib.Path) -> None:
with open(artifact_path) as fp:
data = json.load(fp)
sarif_data = json.loads(data["data"])
modified_filepath = extract_filepaths_from_sarif(sarif_data)
modified_filepath = extract_filepaths_from_sarif(code_path, sarif_data)

temp_analysis_config_path = os.path.join(toolbox_path, "analysis_config.json")

Expand Down
3 changes: 2 additions & 1 deletion tests/test_report_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,10 @@ def parse_single_artifact(
"""
Run community analyzer on a single artifact and return the deepsource result object.
"""
code_path = "/code"
artifact_path = make_artifact(report_path)
artifact_filepaths = extract_filepaths_from_sarif(
json.loads(json.load(open(artifact_path))["data"])
code_path, json.loads(json.load(open(artifact_path))["data"])
)
toolbox_path = tempfile.gettempdir()
os.environ["ARTIFACTS_PATH"] = artifact_path
Expand Down
12 changes: 6 additions & 6 deletions tests/testutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,27 +4,27 @@
from typing import Any, Iterator


def extract_filepaths_from_sarif(sarif: dict[str, Any]) -> list[str]:
"""Extracts filepaths from a SARIF file."""
def extract_filepaths_from_sarif(code_path: str, sarif: dict[str, Any]) -> list[str]:
"""Extracts filepaths from a SARIF file, and prefix it with code path."""
filepaths = []
for run in sarif["runs"]:
for result in run["results"]:
filepath = result["locations"][0]["physicalLocation"]["artifactLocation"][
"uri"
]

filepaths.append(filepath)
filepaths.append(os.path.join(code_path, filepath))

return filepaths


def extract_filepaths_from_deepsource_json(
deepsource_json: dict[str, Any]
code_path: str, deepsource_json: dict[str, Any]
) -> list[str]:
"""Extracts filepaths from a DeepSource JSON file."""
"""Extracts filepaths from a DeepSource JSON file, and prefix it with code path."""
filepaths = []
for issue in deepsource_json["issues"]:
filepaths.append(issue["location"]["path"])
filepaths.append(os.path.join(code_path, issue["location"]["path"]))

return filepaths

Expand Down