From f5836a1c9bd96b587a8d7011032d3a717eee8ec2 Mon Sep 17 00:00:00 2001
From: Matthias Goerens <mgoerens@redhat.com>
Date: Thu, 7 Mar 2024 16:34:18 +0100
Subject: [PATCH] Initialize the Submission data structure

This is the first step towards solving #296. The code added by this
commit is currently *not* put in use in the certification pipeline and
is a partial duplicate of the "checkprcontent" package.

The Submission data structure can collect and run early validation
checks on all information concerning a user's submission (i.e. a GitHub
Pull Request). In its current state, given an api_url (link to a given
PR), it can collect the list of modified files, and extract the
category, organization, chart's name and version, as well as running
basic checks, such as SemVer compatibility of the provided version.

This commit also implements a custom JSON Serializer / Deserializer. The
Submission object can therefore easily be dumped to / read from a file.
Such file can later be an GitHub workflow artifact, populated in a
future "pre-check" job and read in subsequent jobs.

Signed-off-by: Matthias Goerens <mgoerens@redhat.com>
---
 scripts/requirements.txt                |   1 +
 scripts/src/checkprcontent/checkpr.py   |   2 +-
 scripts/src/precheck/__init__.py        |   0
 scripts/src/precheck/serializer.py      |  47 ++++
 scripts/src/precheck/serializer_test.py |  95 +++++++
 scripts/src/precheck/submission.py      | 336 ++++++++++++++++++++++
 scripts/src/precheck/submission_test.py | 358 ++++++++++++++++++++++++
 7 files changed, 838 insertions(+), 1 deletion(-)
 create mode 100644 scripts/src/precheck/__init__.py
 create mode 100644 scripts/src/precheck/serializer.py
 create mode 100644 scripts/src/precheck/serializer_test.py
 create mode 100644 scripts/src/precheck/submission.py
 create mode 100644 scripts/src/precheck/submission_test.py

diff --git a/scripts/requirements.txt b/scripts/requirements.txt
index 98e38b4ff..ef110623c 100644
--- a/scripts/requirements.txt
+++ b/scripts/requirements.txt
@@ -25,6 +25,7 @@ pytest-forked==1.3.0
 pytest-xdist==2.4.0
 PyYAML==6.0.1
 requests==2.26.0
+responses==0.23.1
 retrying==1.3.3
 semantic-version==2.8.5
 semver==2.13.0
diff --git a/scripts/src/checkprcontent/checkpr.py b/scripts/src/checkprcontent/checkpr.py
index 01c85b1a5..483449268 100644
--- a/scripts/src/checkprcontent/checkpr.py
+++ b/scripts/src/checkprcontent/checkpr.py
@@ -113,7 +113,7 @@ def get_file_match_compiled_patterns():
 
     pattern = re.compile(base + r"/.*")
     reportpattern = re.compile(base + r"/report.yaml")
-    tarballpattern = re.compile(base + r"/(.*\.tgz$)")
+    tarballpattern = re.compile(base + r"/(.*\.tgz)")
     return pattern, reportpattern, tarballpattern
 
 
diff --git a/scripts/src/precheck/__init__.py b/scripts/src/precheck/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/scripts/src/precheck/serializer.py b/scripts/src/precheck/serializer.py
new file mode 100644
index 000000000..8dba1b6a2
--- /dev/null
+++ b/scripts/src/precheck/serializer.py
@@ -0,0 +1,47 @@
+"""Contains the logic to serialize / deserialize a Submission object to / from JSON.
+
+A pair of custom JSONEncoder / JSONDecoder is required due to the fact that the Submission class
+contains nested classes.
+
+"""
+
+import copy
+import json
+
+from precheck import submission
+
+
+class SubmissionEncoder(json.JSONEncoder):
+    def default(self, obj):
+        if isinstance(obj, submission.Submission):
+            obj_dict = copy.deepcopy(obj.__dict__)
+            obj_dict["chart"] = obj_dict["chart"].__dict__
+            obj_dict["report"] = obj_dict["report"].__dict__
+            obj_dict["source"] = obj_dict["source"].__dict__
+            obj_dict["tarball"] = obj_dict["tarball"].__dict__
+            return obj_dict
+
+        return json.JSONEncoder.default(self, obj)
+
+
+class SubmissionDecoder(json.JSONDecoder):
+    def __init__(self, *args, **kwargs):
+        json.JSONDecoder.__init__(self, object_hook=self.object_hook, *args, **kwargs)
+
+    def object_hook(self, dct):
+        if "chart" in dct:
+            chart_obj = submission.Chart(**dct["chart"])
+            report_obj = submission.Report(**dct["report"])
+            source_obj = submission.Source(**dct["source"])
+            tarball_obj = submission.Tarball(**dct["tarball"])
+
+            to_merge_dct = {
+                "chart": chart_obj,
+                "report": report_obj,
+                "source": source_obj,
+                "tarball": tarball_obj,
+            }
+
+            new_dct = dct | to_merge_dct
+            return submission.Submission(**new_dct)
+        return dct
diff --git a/scripts/src/precheck/serializer_test.py b/scripts/src/precheck/serializer_test.py
new file mode 100644
index 000000000..d055af08a
--- /dev/null
+++ b/scripts/src/precheck/serializer_test.py
@@ -0,0 +1,95 @@
+import json
+
+from precheck import serializer
+from precheck import submission
+
+submission_json = """
+{
+    "api_url": "https://api.github.com/repos/openshift-helm-charts/charts/pulls/1",
+    "modified_files": ["charts/partners/acme/awesome/1.42.0/report.yaml"],
+    "chart": {
+        "category": "partners",
+        "organization": "acme",
+        "name": "awesome",
+        "version": "1.42.0"
+    },
+    "report": {
+        "found": true,
+        "signed": false,
+        "path": "charts/partners/acme/awesome/1.42.0/report.yaml"
+    },
+    "source": {
+        "found": false,
+        "path": null
+    },
+    "tarball": {
+        "found": false,
+        "path": null,
+        "provenance": null
+    },
+    "modified_owners": [],
+    "modified_unknown": []
+}
+"""
+
+
+def sanitize_json_string(json_string: str):
+    """Remove the newlines from the JSON string. This is done by
+    loading and dumping the string representation of the JSON object.
+    Goal is to allow comparison with other JSON string.
+    """
+    json_dict = json.loads(json_string)
+    return json.dumps(json_dict)
+
+
+def test_submission_serializer():
+    s = json.loads(submission_json, cls=serializer.SubmissionDecoder)
+
+    assert isinstance(s, submission.Submission)
+    assert (
+        s.api_url == "https://api.github.com/repos/openshift-helm-charts/charts/pulls/1"
+    )
+    assert "charts/partners/acme/awesome/1.42.0/report.yaml" in s.modified_files
+    assert s.chart.category == "partners"
+    assert s.chart.organization == "acme"
+    assert s.chart.name == "awesome"
+    assert s.chart.version == "1.42.0"
+    assert s.report.found
+    assert not s.report.signed
+    assert s.report.path == "charts/partners/acme/awesome/1.42.0/report.yaml"
+    assert not s.source.found
+    assert not s.source.path
+    assert not s.tarball.found
+    assert not s.tarball.path
+    assert not s.tarball.provenance
+
+
+def test_submission_deserializer():
+    s = submission.Submission(
+        api_url="https://api.github.com/repos/openshift-helm-charts/charts/pulls/1",
+        modified_files=["charts/partners/acme/awesome/1.42.0/report.yaml"],
+        chart=submission.Chart(
+            category="partners",
+            organization="acme",
+            name="awesome",
+            version="1.42.0",
+        ),
+        report=submission.Report(
+            found=True,
+            signed=False,
+            path="charts/partners/acme/awesome/1.42.0/report.yaml",
+        ),
+        source=submission.Source(
+            found=False,
+            path=None,
+        ),
+        tarball=submission.Tarball(
+            found=False,
+            path=None,
+            provenance=None,
+        ),
+    )
+
+    assert serializer.SubmissionEncoder().encode(s) == sanitize_json_string(
+        submission_json
+    )
diff --git a/scripts/src/precheck/submission.py b/scripts/src/precheck/submission.py
new file mode 100644
index 000000000..8ef7659ec
--- /dev/null
+++ b/scripts/src/precheck/submission.py
@@ -0,0 +1,336 @@
+import os
+import re
+import semver
+
+from dataclasses import dataclass, field
+
+from checkprcontent import checkpr
+from tools import gitutils
+from reporegex import matchers
+
+xRateLimit = "X-RateLimit-Limit"
+xRateRemain = "X-RateLimit-Remaining"
+
+
+class SubmissionError(Exception):
+    """Root Exception for handling any error with the submission"""
+
+    pass
+
+
+class DuplicateChartError(SubmissionError):
+    """This Exception is to be raised when the user attempts to submit a PR with more than one chart"""
+
+    pass
+
+
+class VersionError(SubmissionError):
+    """This Exception is to be raised when the version of the chart is not semver compatible"""
+
+    pass
+
+
+@dataclass
+class Chart:
+    """Represents a Helm Chart
+
+    Once set, the category, organization, name and version of the chart cannot be modified.
+
+    """
+
+    category: str = None
+    organization: str = None
+    name: str = None
+    version: str = None
+
+    def register_chart_info(self, category, organization, name, version):
+        if (
+            (self.category and self.category != category)
+            or (self.organization and self.organization != organization)
+            or (self.name and self.name != name)
+            or (self.version and self.version != version)
+        ):
+            msg = "[ERROR] A PR must contain only one chart. Current PR includes files for multiple charts."
+            raise DuplicateChartError(msg)
+
+        if not semver.VersionInfo.isvalid(version):
+            msg = (
+                f"[ERROR] Helm chart version is not a valid semantic version: {version}"
+            )
+            raise VersionError(msg)
+
+        self.category = category
+        self.organization = organization
+        self.name = name
+        self.version = version
+
+    def get_owners_path(self):
+        return f"charts/{self.category}/{self.organization}/{self.name}/OWNERS"
+
+
+@dataclass
+class Report:
+    found: bool = False
+    signed: bool = False
+    path: str = None
+
+
+@dataclass
+class Source:
+    found: bool = False
+    path: str = None  # Path to the Chart.yaml
+
+
+@dataclass
+class Tarball:
+    found: bool = False
+    path: str = None
+    provenance: str = None
+
+
+@dataclass
+class Submission:
+    """Represents a GitHub PR, opened to either certify a new Helm chart or add / modify an OWNERS file.
+
+    A Submission can be instantiated either:
+    * by solely providing the URL of a given PR (represented by the api_url attribute). Upon
+    initialization (see __post_init__ method), the rest of the information is retrieved from the
+    GitHub API. This should typically occur once per pipeline run, at the start.
+    * by providing all class attributes. This is typically done by loading a JSON representation of
+    a Submission from a file, and should be done several times per pipeline runs, in later jobs.
+
+    """
+
+    api_url: str
+    modified_files: list[str] = None
+    chart: Chart = field(default_factory=lambda: Chart())
+    report: Report = field(default_factory=lambda: Report())
+    source: Source = field(default_factory=lambda: Source())
+    tarball: Tarball = field(default_factory=lambda: Tarball())
+    modified_owners: list[str] = field(default_factory=list)
+    modified_unknown: list[str] = field(default_factory=list)
+
+    def __post_init__(self):
+        """Complete the initialization of the Submission object.
+
+        Only retrieve PR information from the GitHub API if requiered, by checking for the presence
+        of a value for the modified_files attributes. This check allows to make the distinction
+        between the two aforementioned cases of initialization of a Submission object:
+        * If modified_files is not set, we're in the case of initializing a brand new Submission
+        and need to retrieve the rest of the information from the GitHub API.
+        * If a value is set for modified_files, that means we are loading an existing Submission
+        object from a file.
+
+        """
+        if not self.modified_files:
+            self.modified_files = []
+            self._get_modified_files()
+            self._parse_modified_files()
+
+    def _get_modified_files(self):
+        """Query the GitHub API in order to retrieve the list of files that are added / modified by
+        this PR"""
+        page_number = 1
+        max_page_size, page_size = 100, 100
+        files_api_url = re.sub(r"^https://api\.github\.com/", "", self.api_url)
+
+        while page_size == max_page_size:
+            files_api_query = (
+                f"{files_api_url}/files?per_page={page_size}&page={page_number}"
+            )
+            print(f"[INFO] Query files : {files_api_query}")
+
+            try:
+                r = gitutils.github_api(
+                    "get", files_api_query, os.environ.get("BOT_TOKEN")
+                )
+            except SystemExit as e:
+                raise SubmissionError(e)
+
+            files = r.json()
+            page_size = len(files)
+            page_number += 1
+
+            if xRateLimit in r.headers:
+                print(f"[DEBUG] {xRateLimit} : {r.headers[xRateLimit]}")
+            if xRateRemain in r.headers:
+                print(f"[DEBUG] {xRateRemain}  : {r.headers[xRateRemain]}")
+
+            if "message" in files:
+                msg = f'[ERROR] getting pr files: {files["message"]}'
+                raise SubmissionError(msg)
+            else:
+                for file in files:
+                    if "filename" in file:
+                        self.modified_files.append(file["filename"])
+
+    def _parse_modified_files(self):
+        """Classify the list of modified files.
+
+        Modified files are categorized into 5 groups, mapping to 5 class attributes:
+        - The `report` attribute has information about files related to the chart-verifier report:
+        the report.yaml itself and, if signed, its signature report.yaml.asc.
+        - The `source` attribute has information about files related to the chart's source: all
+        files, if any, under the src/ directory.
+        - The `tarball` attribute has information about files related to the chart's source as
+        tarball: the .tgz tarball itself and, if signed, the .prov provenance file.
+        - A list of added / modified OWNERS files is recorded in the `modified_owners` attribute.
+        - The rest of the files are classified in the `modified_unknown` attribute.
+
+        Raises a SubmissionError if:
+        * The Submission concerns more than one chart
+        * The version of the chart is not SemVer compatible
+        * The tarball file is named incorrectly
+
+        """
+        for file_path in self.modified_files:
+            file_category, match = get_file_type(file_path)
+            if file_category == "report":
+                self.chart.register_chart_info(*match.groups())
+                self.set_report(file_path)
+            elif file_category == "source":
+                self.chart.register_chart_info(*match.groups())
+                self.set_source(file_path)
+            elif file_category == "tarball":
+                category, organization, name, version, _ = match.groups()
+                self.chart.register_chart_info(category, organization, name, version)
+                self.set_tarball(file_path, match)
+            elif file_category == "owners":
+                self.modified_owners.append(file_path)
+            elif file_category == "unknwown":
+                self.modified_unknown.append(file_path)
+
+    def set_report(self, file_path):
+        """Action to take when a file related to the chart-verifier is found.
+
+        This can either be the report.yaml itself, or the signing key report.yaml.asc
+
+        """
+        if os.path.basename(file_path) == "report.yaml":
+            print(f"[INFO] Report found: {file_path}")
+            self.report.found = True
+            self.report.path = file_path
+        elif os.path.basename(file_path) == "report.yaml.asc":
+            self.report.signed = True
+        else:
+            self.modified_unknown.append(file_path)
+
+    def set_source(self, file_path):
+        """Action to take when a file related to the chart's source is found.
+
+        Note that while the source of the Chart can be composed of many files, only the Chart.yaml
+        is actually registered.
+
+        """
+        if os.path.basename(file_path) == "Chart.yaml":
+            self.source.found = True
+            self.source.path = file_path
+
+    def set_tarball(self, file_path, tarball_match):
+        """Action to take when a file related to the tarball is found.
+
+        This can either be the .tgz tarball itself, or the .prov provenance key.
+
+        """
+        _, file_extension = os.path.splitext(file_path)
+        if file_extension == ".tgz":
+            print(f"[INFO] tarball found: {file_path}")
+            self.tarball.found = True
+            self.tarball.path = file_path
+
+            _, _, chart_name, chart_version, tar_name = tarball_match.groups()
+            expected_tar_name = f"{chart_name}-{chart_version}.tgz"
+            if tar_name != expected_tar_name:
+                msg = f"[ERROR] the tgz file is named incorrectly. Expected: {expected_tar_name}. Got: {tar_name}"
+                raise SubmissionError(msg)
+        elif file_extension == ".prov":
+            self.tarball.provenance = file_path
+        else:
+            self.modified_unknown.append(file_path)
+
+    def is_valid_certification_submission(self):
+        """Check wether the files in this Submission are valid to attempt to certify a Chart
+
+        We expect the user to provide either:
+        * Only a report file
+        * Only a chart - either as source or tarball
+        * Both the report and the chart
+
+        Returns False if:
+        * The user attempts to create the OWNERS file for its project.
+        * The PR contains additional files, not related to the Chart being submitted
+
+        Returns True in all other cases
+
+        """
+        if self.modified_owners:
+            return False, "[ERROR] Send OWNERS file by itself in a separate PR."
+
+        if self.modified_unknown:
+            msg = (
+                "[ERROR] PR includes one or more files not related to charts: "
+                + ", ".join(self.modified_unknown)
+            )
+            return False, msg
+
+        if self.report.found or self.source.found or self.tarball.found:
+            return True, ""
+
+        return False, ""
+
+    def is_valid_owners_submission(self):
+        """Check wether the file in this Submission are valid for an OWNERS PR
+
+        Returns True if the PR only modified files is an OWNERS file.
+
+        Returns False in all other cases.
+        """
+        if len(self.modified_owners) == 1 and len(self.modified_files) == 1:
+            return True, ""
+
+        msg = ""
+        if self.modified_owners:
+            msg = "[ERROR] Send OWNERS file by itself in a separate PR."
+        else:
+            msg = "No OWNERS file provided"
+
+        return False, msg
+
+
+def get_file_type(file_path):
+    """Determine the category of a given file
+
+    As part of a PR, a modified file can relate to one of 5 categories:
+    - The chart-verifier report
+    - The source of the chart
+    - The tarball of the chart
+    - OWNERS file
+    - or another "unknown" category
+
+    """
+    pattern, reportpattern, tarballpattern = checkpr.get_file_match_compiled_patterns()
+    owners_pattern = re.compile(
+        matchers.submission_path_matcher(include_version_matcher=False) + r"/OWNERS"
+    )
+    src_pattern = re.compile(matchers.submission_path_matcher() + r"/src/")
+
+    # Match all files under charts/<category>/<organization>/<name>/<version>
+    match = pattern.match(file_path)
+    if match:
+        report_match = reportpattern.match(file_path)
+        if report_match:
+            return "report", report_match
+
+        src_match = src_pattern.match(file_path)
+        if src_match:
+            return "source", src_match
+
+        tar_match = tarballpattern.match(file_path)
+        if tar_match:
+            return "tarball", tar_match
+    else:
+        owners_match = owners_pattern.match(file_path)
+        if owners_match:
+            return "owners", owners_match
+
+    return "unknwown", None
diff --git a/scripts/src/precheck/submission_test.py b/scripts/src/precheck/submission_test.py
new file mode 100644
index 000000000..c74ee6c26
--- /dev/null
+++ b/scripts/src/precheck/submission_test.py
@@ -0,0 +1,358 @@
+import contextlib
+import pytest
+import responses
+
+from dataclasses import dataclass, field
+
+from precheck import submission
+
+expected_category = "partners"
+expected_organization = "acme"
+expected_name = "awesome"
+expected_version = "1.42.0"
+
+expected_chart = submission.Chart(
+    category=expected_category,
+    organization=expected_organization,
+    name=expected_name,
+    version=expected_version,
+)
+
+
+@dataclass
+class SubmissionInitScenario:
+    api_url: str
+    modified_files: list[str]
+    expected_submission: submission.Submission = None
+    excepted_exception: contextlib.ContextDecorator = field(
+        default_factory=lambda: contextlib.nullcontext()
+    )
+
+
+scenarios_submission_init = [
+    # PR contains a unique and unsigned report.yaml
+    SubmissionInitScenario(
+        api_url="https://api.github.com/repos/openshift-helm-charts/charts/pulls/1",
+        modified_files=[
+            f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/report.yaml"
+        ],
+        expected_submission=submission.Submission(
+            api_url="https://api.github.com/repos/openshift-helm-charts/charts/pulls/1",
+            modified_files=[
+                f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/report.yaml"
+            ],
+            chart=expected_chart,
+            report=submission.Report(
+                found=True,
+                signed=False,
+                path=f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/report.yaml",
+            ),
+        ),
+    ),
+    # PR contains a signed report
+    SubmissionInitScenario(
+        api_url="https://api.github.com/repos/openshift-helm-charts/charts/pulls/2",
+        modified_files=[
+            f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/report.yaml",
+            f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/report.yaml.asc",
+        ],
+        expected_submission=submission.Submission(
+            api_url="https://api.github.com/repos/openshift-helm-charts/charts/pulls/2",
+            modified_files=[
+                f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/report.yaml",
+                f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/report.yaml.asc",
+            ],
+            chart=expected_chart,
+            report=submission.Report(
+                found=True,
+                signed=True,
+                path=f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/report.yaml",
+            ),
+        ),
+    ),
+    # PR contains the chart's source
+    SubmissionInitScenario(
+        api_url="https://api.github.com/repos/openshift-helm-charts/charts/pulls/3",
+        modified_files=[
+            f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/src/Chart.yaml",
+            f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/src/templates/buildconfig.yam"
+            f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/src/templates/deployment.yaml",
+            f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/src/templates/imagestream.yam"
+            f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/src/templates/route.yaml",
+            f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/src/templates/service.yaml",
+            f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/src/values.schema.json",
+            f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/src/values.yaml",
+        ],
+        expected_submission=submission.Submission(
+            api_url="https://api.github.com/repos/openshift-helm-charts/charts/pulls/3",
+            modified_files=[
+                f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/src/Chart.yaml",
+                f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/src/templates/buildconfig.yam"
+                f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/src/templates/deployment.yaml",
+                f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/src/templates/imagestream.yam"
+                f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/src/templates/route.yaml",
+                f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/src/templates/service.yaml",
+                f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/src/values.schema.json",
+                f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/src/values.yaml",
+            ],
+            chart=expected_chart,
+            source=submission.Source(
+                found=True,
+                path=f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/src/Chart.yaml",
+            ),
+        ),
+    ),
+    # PR contains an unsigned tarball
+    SubmissionInitScenario(
+        api_url="https://api.github.com/repos/openshift-helm-charts/charts/pulls/4",
+        modified_files=[
+            f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/{expected_name}-{expected_version}.tgz"
+        ],
+        expected_submission=submission.Submission(
+            api_url="https://api.github.com/repos/openshift-helm-charts/charts/pulls/4",
+            modified_files=[
+                f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/{expected_name}-{expected_version}.tgz"
+            ],
+            chart=expected_chart,
+            tarball=submission.Tarball(
+                found=True,
+                provenance=None,
+                path=f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/{expected_name}-{expected_version}.tgz",
+            ),
+        ),
+    ),
+    # PR contains a signed tarball
+    SubmissionInitScenario(
+        api_url="https://api.github.com/repos/openshift-helm-charts/charts/pulls/5",
+        modified_files=[
+            f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/{expected_name}-{expected_version}.tgz",
+            f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/{expected_name}-{expected_version}.tgz.prov",
+        ],
+        expected_submission=submission.Submission(
+            api_url="https://api.github.com/repos/openshift-helm-charts/charts/pulls/5",
+            modified_files=[
+                f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/{expected_name}-{expected_version}.tgz",
+                f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/{expected_name}-{expected_version}.tgz.prov",
+            ],
+            chart=expected_chart,
+            tarball=submission.Tarball(
+                found=True,
+                provenance=f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/{expected_name}-{expected_version}.tgz.prov",
+                path=f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/{expected_name}-{expected_version}.tgz",
+            ),
+        ),
+    ),
+    # PR contains an OWNERS file
+    SubmissionInitScenario(
+        api_url="https://api.github.com/repos/openshift-helm-charts/charts/pulls/6",
+        modified_files=[
+            f"charts/{expected_category}/{expected_organization}/{expected_name}/OWNERS"
+        ],
+        expected_submission=submission.Submission(
+            api_url="https://api.github.com/repos/openshift-helm-charts/charts/pulls/6",
+            modified_files=[
+                f"charts/{expected_category}/{expected_organization}/{expected_name}/OWNERS"
+            ],
+            modified_owners=[
+                f"charts/{expected_category}/{expected_organization}/{expected_name}/OWNERS"
+            ],
+        ),
+    ),
+    # PR contains additional files, not fitting into any expected category
+    SubmissionInitScenario(
+        api_url="https://api.github.com/repos/openshift-helm-charts/charts/pulls/7",
+        modified_files=["charts/path/to/some/file"],
+        expected_submission=submission.Submission(
+            api_url="https://api.github.com/repos/openshift-helm-charts/charts/pulls/7",
+            modified_files=["charts/path/to/some/file"],
+            modified_unknown=["charts/path/to/some/file"],
+        ),
+    ),
+    # Invalid PR contains multiple reports, referencing multiple charts
+    SubmissionInitScenario(
+        api_url="https://api.github.com/repos/openshift-helm-charts/charts/pulls/101",
+        modified_files=[
+            f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/report.yaml",
+            f"charts/{expected_category}/{expected_organization}/other-chart/{expected_version}/report.yaml",
+        ],
+        excepted_exception=pytest.raises(submission.DuplicateChartError),
+    ),
+    # Invalid PR contains a tarball with an incorrect name
+    SubmissionInitScenario(
+        api_url="https://api.github.com/repos/openshift-helm-charts/charts/pulls/102",
+        modified_files=[
+            f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/incorrectly-named.tgz"
+        ],
+        excepted_exception=pytest.raises(submission.SubmissionError),
+    ),
+    # Invalid PR references a Chart with a version that is not Semver compatible
+    SubmissionInitScenario(
+        api_url="https://api.github.com/repos/openshift-helm-charts/charts/pulls/103",
+        modified_files=[
+            f"charts/{expected_category}/{expected_organization}/{expected_name}/0.1.2.3.4/report.yaml"
+        ],
+        excepted_exception=pytest.raises(submission.VersionError),
+    ),
+]
+
+
+@pytest.mark.parametrize("test_scenario", scenarios_submission_init)
+@responses.activate
+def test_submission_init(test_scenario):
+    """Test the instantiation of a Submission in different scenarios"""
+
+    # Mock GitHub API
+    responses.get(
+        f"{test_scenario.api_url}/files",
+        json=[{"filename": file} for file in test_scenario.modified_files],
+    )
+
+    with test_scenario.excepted_exception:
+        s = submission.Submission(api_url=test_scenario.api_url)
+        assert s == test_scenario.expected_submission
+
+
+@responses.activate
+def test_submission_not_exist():
+    """Test creating a Submission for an unexisting PR"""
+    api_url_doesnt_exist = (
+        "https://api.github.com/repos/openshift-helm-charts/charts/pulls/9999"
+    )
+
+    responses.get(
+        f"{api_url_doesnt_exist}/files",
+        json={
+            "message": "Not Found",
+            "documentation_url": "https://docs.github.com/rest/pulls/pulls#list-pull-requests-files",
+        },
+    )
+
+    with pytest.raises(submission.SubmissionError):
+        submission.Submission(api_url=api_url_doesnt_exist)
+
+
+@dataclass
+class CertificationScenario:
+    input_submission: submission.Submission
+    expected_is_valid_certification: bool
+    expected_reason: str = ""
+
+
+scenarios_certification_submission = [
+    # Valid certification Submission contains only a report
+    CertificationScenario(
+        input_submission=submission.Submission(
+            api_url="https://api.github.com/repos/openshift-helm-charts/charts/pulls/1",
+            modified_files=[
+                f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/report.yaml"
+            ],
+            chart=expected_chart,
+            report=submission.Report(
+                found=True,
+                signed=False,
+                path=f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/report.yaml",
+            ),
+        ),
+        expected_is_valid_certification=True,
+    ),
+    # Invalid certification Submission contains OWNERS file
+    CertificationScenario(
+        input_submission=submission.Submission(
+            api_url="https://api.github.com/repos/openshift-helm-charts/charts/pulls/1",
+            modified_files=[
+                f"charts/{expected_category}/{expected_organization}/{expected_name}/OWNERS"
+            ],
+            modified_owners=[
+                f"charts/{expected_category}/{expected_organization}/{expected_name}/OWNERS"
+            ],
+        ),
+        expected_is_valid_certification=False,
+        expected_reason="[ERROR] Send OWNERS file by itself in a separate PR.",
+    ),
+    # Invalid certification Submission contains unknown files
+    CertificationScenario(
+        input_submission=submission.Submission(
+            api_url="https://api.github.com/repos/openshift-helm-charts/charts/pulls/1",
+            modified_files=["charts/path/to/some/file"],
+            modified_unknown=["charts/path/to/some/file"],
+        ),
+        expected_is_valid_certification=False,
+        expected_reason="[ERROR] PR includes one or more files not related to charts:",
+    ),
+]
+
+
+@pytest.mark.parametrize("test_scenario", scenarios_certification_submission)
+def test_is_valid_certification(test_scenario):
+    is_valid_certification, reason = (
+        test_scenario.input_submission.is_valid_certification_submission()
+    )
+    assert test_scenario.expected_is_valid_certification == is_valid_certification
+    assert test_scenario.expected_reason in reason
+
+
+@dataclass
+class OwnersScenario:
+    input_submission: submission.Submission
+    expected_is_valid_owners: bool
+    expected_reason: str = ""
+
+
+scenarios_owners_submission = [
+    # Valid submission contains only one OWNERS file
+    OwnersScenario(
+        input_submission=submission.Submission(
+            api_url="https://api.github.com/repos/openshift-helm-charts/charts/pulls/1",
+            modified_files=[
+                f"charts/{expected_category}/{expected_organization}/{expected_name}/OWNERS"
+            ],
+            modified_owners=[
+                f"charts/{expected_category}/{expected_organization}/{expected_name}/OWNERS"
+            ],
+        ),
+        expected_is_valid_owners=True,
+    ),
+    # Invalid submission contains multiple OWNERS file
+    OwnersScenario(
+        input_submission=submission.Submission(
+            api_url="https://api.github.com/repos/openshift-helm-charts/charts/pulls/1",
+            modified_files=[
+                f"charts/{expected_category}/{expected_organization}/{expected_name}/OWNERS",
+                f"charts/{expected_category}/{expected_organization}/another_chart/OWNERS",
+            ],
+            modified_owners=[
+                f"charts/{expected_category}/{expected_organization}/{expected_name}/OWNERS",
+                f"charts/{expected_category}/{expected_organization}/another_chart/OWNERS",
+            ],
+        ),
+        expected_is_valid_owners=False,
+        expected_reason="[ERROR] Send OWNERS file by itself in a separate PR.",
+    ),
+    # Invalid submission contains unknown files
+    OwnersScenario(
+        input_submission=submission.Submission(
+            api_url="https://api.github.com/repos/openshift-helm-charts/charts/pulls/1",
+            modified_files=[
+                f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/report.yaml"
+            ],
+            chart=expected_chart,
+            report=submission.Report(
+                found=True,
+                signed=False,
+                path=f"charts/{expected_category}/{expected_organization}/{expected_name}/{expected_version}/report.yaml",
+            ),
+        ),
+        expected_is_valid_owners=False,
+        expected_reason="No OWNERS file provided",
+    ),
+    # Invalid submission doesn't contain an OWNER file
+]
+
+
+@pytest.mark.parametrize("test_scenario", scenarios_owners_submission)
+def test_is_valid_owners(test_scenario):
+    is_valid_owners, reason = (
+        test_scenario.input_submission.is_valid_owners_submission()
+    )
+    assert test_scenario.expected_is_valid_owners == is_valid_owners
+    assert test_scenario.expected_reason in reason