* better risk score calculation for Anonymize and Sensitive scanners

protectai · Apr 18, 2024 · f31f065 · f31f065
1 parent d3c891c
commit f31f065
Show file tree

Hide file tree

Showing 2 changed files with 12 additions and 6 deletions.
diff --git a/llm_guard/input_scanners/anonymize.py b/llm_guard/input_scanners/anonymize.py
@@ -6,7 +6,7 @@
 from presidio_anonymizer.entities import PIIEntity, RecognizerResult
 
 from ..exception import LLMGuardValidationError
-from ..util import get_logger
+from ..util import calculate_risk_score, get_logger
 from ..vault import Vault
 from .anonymize_helpers import (
     DEBERTA_AI4PRIVACY_v2_CONF,
@@ -321,7 +321,7 @@ def scan(self, prompt: str) -> (str, bool, float):
             max(analyzer_result.score for analyzer_result in analyzer_results)
             if analyzer_results
             else 0.0,
-            1,
+            2,
         )
         analyzer_results = self._remove_conflicts_and_get_text_manipulation_data(analyzer_results)
         merged_results = self._merge_entities_with_whitespace_between(prompt, analyzer_results)
@@ -339,7 +339,11 @@ def scan(self, prompt: str) -> (str, bool, float):
             for entity_placeholder, entity_value in anonymized_results:
                 if not self._vault.placeholder_exists(entity_placeholder):
                     self._vault.append((entity_placeholder, entity_value))
-            return self._preamble + sanitized_prompt, False, risk_score
+            return (
+                self._preamble + sanitized_prompt,
+                False,
+                calculate_risk_score(risk_score, self._threshold),
+            )
 
         LOGGER.debug("Prompt does not have sensitive data to replace", risk_score=risk_score)
 

diff --git a/llm_guard/output_scanners/sensitive.py b/llm_guard/output_scanners/sensitive.py
@@ -9,7 +9,7 @@
     get_regex_patterns,
     get_transformers_recognizer,
 )
-from llm_guard.util import get_logger
+from llm_guard.util import calculate_risk_score, get_logger
 
 from .base import Scanner
 
@@ -86,9 +86,11 @@ def scan(self, prompt: str, output: str) -> (str, bool, float):
                 result = self._anonymizer.anonymize(text=output, analyzer_results=analyzer_results)
                 output = result.text
 
-            risk_score = max(analyzer_result.score for analyzer_result in analyzer_results)
+            risk_score = round(
+                max(analyzer_result.score for analyzer_result in analyzer_results), 2
+            )
             LOGGER.warning("Found sensitive data in the output", results=analyzer_results)
-            return output, False, risk_score
+            return output, False, calculate_risk_score(risk_score, self._threshold)
 
         LOGGER.debug("No sensitive data found in the output")
         return output, True, 0.0