#4331 Update validation on finding count to account for award referen…

…ce version mismatch
GSA-TTS · Oct 1, 2024 · 01016aa · 01016aa
1 parent 9b58d1a
commit 01016aa
Show file tree

Hide file tree

Showing 2 changed files with 131 additions and 18 deletions.
diff --git a/backend/audit/cross_validation/check_findings_count_consistency.py b/backend/audit/cross_validation/check_findings_count_consistency.py
@@ -26,37 +26,137 @@ def check_findings_count_consistency(sac_dict, *_args, **_kwargs):
     expected_award_refs_count = {}
     found_award_refs_count = defaultdict(int)
     errors = []
-    if (
+    if _should_skip_validation(data_source):
+        return errors
+
+    expected_award_refs_count, declared_award_ref_max_length = _get_federal_award_refs(
+        federal_awards
+    )
+    found_award_refs_count, reported_award_ref_max_length = _get_findings_award_refs(
+        findings_uniform_guidance, expected_award_refs_count
+    )
+
+    updated_expected_refs_count, updated_found_refs_count = (
+        _normalize_award_ref_lengths(
+            declared_award_ref_max_length,
+            reported_award_ref_max_length,
+            federal_awards,
+            findings_uniform_guidance,
+        )
+    )
+
+    if updated_expected_refs_count:
+        expected_award_refs_count = updated_expected_refs_count
+
+    if updated_found_refs_count:
+        found_award_refs_count = updated_found_refs_count
+
+    errors = _validate_findings(expected_award_refs_count, found_award_refs_count)
+
+    return errors
+
+
+def _should_skip_validation(data_source):
+    # Skip this validation if it is an historical audit report with incorrect findings count
+    return (
         data_source == settings.CENSUS_DATA_SOURCE
         and "check_findings_count_consistency"
         in InvalidRecord.fields["validations_to_skip"]
-    ):
-        # Skip this validation if it is an historical audit report with incorrect findings count
-        return errors
+    )
+
+
+def _get_federal_award_refs(federal_awards):
+    declared_award_ref_max_length = 0
+    expected_award_refs_count = {}
 
     for award in federal_awards:
-        award_reference = award.get("award_reference", None)
+        award_reference = award.get("award_reference")
         if award_reference:
+            declared_award_ref_max_length = max(
+                declared_award_ref_max_length, len(award_reference)
+            )
             expected_award_refs_count[award_reference] = award["program"][
                 "number_of_audit_findings"
             ]
 
+    return expected_award_refs_count, declared_award_ref_max_length
+
+
+def _get_findings_award_refs(findings_uniform_guidance, expected_award_refs_count):
+    reported_award_ref_max_length = 0
+    found_award_refs_count = defaultdict(int)
+
     for finding in findings_uniform_guidance:
         award_ref = finding["program"]["award_reference"]
-        if award_ref in expected_award_refs_count:
-            found_award_refs_count[award_ref] += 1
+        if award_ref:
+            reported_award_ref_max_length = max(
+                reported_award_ref_max_length, len(award_ref)
+            )
+            if award_ref in expected_award_refs_count:
+                found_award_refs_count[award_ref] += 1
+
+    return found_award_refs_count, reported_award_ref_max_length
 
+
+def _validate_findings(expected_award_refs_count, found_award_refs_count):
+    errors = []
     for award_ref, expected in expected_award_refs_count.items():
         counted = found_award_refs_count[award_ref]
         if counted != expected:
             errors.append(
-                {
-                    "error": err_findings_count_inconsistent(
-                        expected,
-                        counted,
-                        award_ref,
-                    )
-                }
+                {"error": err_findings_count_inconsistent(expected, counted, award_ref)}
             )
-
     return errors
+
+
+def _normalize_award_ref_lengths(
+    declared_award_ref_max_length,
+    reported_award_ref_max_length,
+    federal_awards,
+    findings_uniform_guidance,
+):
+    """
+    Normalize the lengths of the award references in the Federal Awards and
+    Federal Awards Audit Findings workbooks before validation.
+    """
+    expected_award_refs_count = {}
+    found_award_refs_count = defaultdict(int)
+
+    if declared_award_ref_max_length != reported_award_ref_max_length:
+        # Determine the required padding based on the difference in lengths.
+        diff = abs(reported_award_ref_max_length - declared_award_ref_max_length)
+        padding = "0" * diff
+
+        if declared_award_ref_max_length < reported_award_ref_max_length:
+            # This is means the version of the Federal Awards Audit Findings workbook
+            # is newer than the version of the Federal Awards workbook.
+            for award in federal_awards:
+                award_reference = award.get("award_reference")
+                if award_reference:
+                    award_reference = _pad_award_ref(award_reference, padding)
+                    expected_award_refs_count[award_reference] = award["program"][
+                        "number_of_audit_findings"
+                    ]
+            for finding in findings_uniform_guidance:
+                award_ref = finding["program"]["award_reference"]
+                if award_ref in expected_award_refs_count:
+                    found_award_refs_count[award_ref] += 1
+        else:
+            # This is unlikely to happen. It means the version of
+            # the Federal Awards workbook is newer than
+            # the version of the Federal Awards Audit Findings workbook.
+            for finding in findings_uniform_guidance:
+                award_ref = finding["program"]["award_reference"]
+                if award_ref:
+                    award_ref = _pad_award_ref(award_ref, padding)
+                    if award_ref in expected_award_refs_count:
+                        found_award_refs_count[award_ref] += 1
+    else:
+        # No normalization needed if the lengths match
+        pass
+
+    return expected_award_refs_count, found_award_refs_count
+
+
+def _pad_award_ref(award_ref, padding):
+    return f"{award_ref.split('-')[0]}-{padding}{award_ref.split('-')[1]}"
diff --git a/backend/audit/cross_validation/test_check_findings_count_consistency.py b/backend/audit/cross_validation/test_check_findings_count_consistency.py
@@ -29,10 +29,12 @@ def _make_federal_awards(self, findings_count) -> dict:
             }
         }
 
-    def _make_findings_uniform_guidance(self, awards, mismatch) -> dict:
+    def _make_findings_uniform_guidance(self, awards, mismatch, padding) -> dict:
         entries = []
         for award in awards["FederalAwards"]["federal_awards"]:
             award_reference = award["award_reference"]
+            if padding:
+                award_reference = f"{award_reference.split('-')[0]}-{padding}{award_reference.split('-')[1]}"
             count = award["program"]["number_of_audit_findings"]
             for _ in range(count + mismatch):
                 entries.append({"program": {"award_reference": award_reference}})
@@ -48,11 +50,11 @@ def _make_findings_uniform_guidance(self, awards, mismatch) -> dict:
 
         return {"FindingsUniformGuidance": findings}
 
-    def _make_sac(self, findings_count, mismatch=0) -> SingleAuditChecklist:
+    def _make_sac(self, findings_count, mismatch=0, padding="") -> SingleAuditChecklist:
         sac = baker.make(SingleAuditChecklist)
         sac.federal_awards = self._make_federal_awards(findings_count)
         sac.findings_uniform_guidance = self._make_findings_uniform_guidance(
-            sac.federal_awards, mismatch
+            sac.federal_awards, mismatch, padding
         )
         return sac
 
@@ -101,3 +103,14 @@ def test_declared_findings_exceed_reported_count(self):
         self._test_findings_count_mismatch(
             generate_random_integer(2, 4), generate_random_integer(-2, -1)
         )
+
+    def test_normalize_award_ref_lengths_with_padding(self):
+        """
+        Ensure that award reference normalization occurs when declared and reported
+        award reference lengths differ. Leading zeros are added appropriately.
+        """
+        sac = self._make_sac(
+            generate_random_integer(self.FINDINGS_MIN, self.FINDINGS_MAX), 0, "0"
+        )
+        errors = check_findings_count_consistency(sac_validation_shape(sac))
+        self.assertEqual(errors, [])