Skip to content

Commit

Permalink
* better risk score calculation for Anonymize and Sensitive scanners
Browse files Browse the repository at this point in the history
  • Loading branch information
asofter committed Apr 18, 2024
1 parent d3c891c commit f31f065
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 6 deletions.
10 changes: 7 additions & 3 deletions llm_guard/input_scanners/anonymize.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from presidio_anonymizer.entities import PIIEntity, RecognizerResult

from ..exception import LLMGuardValidationError
from ..util import get_logger
from ..util import calculate_risk_score, get_logger
from ..vault import Vault
from .anonymize_helpers import (
DEBERTA_AI4PRIVACY_v2_CONF,
Expand Down Expand Up @@ -321,7 +321,7 @@ def scan(self, prompt: str) -> (str, bool, float):
max(analyzer_result.score for analyzer_result in analyzer_results)
if analyzer_results
else 0.0,
1,
2,
)
analyzer_results = self._remove_conflicts_and_get_text_manipulation_data(analyzer_results)
merged_results = self._merge_entities_with_whitespace_between(prompt, analyzer_results)
Expand All @@ -339,7 +339,11 @@ def scan(self, prompt: str) -> (str, bool, float):
for entity_placeholder, entity_value in anonymized_results:
if not self._vault.placeholder_exists(entity_placeholder):
self._vault.append((entity_placeholder, entity_value))
return self._preamble + sanitized_prompt, False, risk_score
return (
self._preamble + sanitized_prompt,
False,
calculate_risk_score(risk_score, self._threshold),
)

LOGGER.debug("Prompt does not have sensitive data to replace", risk_score=risk_score)

Expand Down
8 changes: 5 additions & 3 deletions llm_guard/output_scanners/sensitive.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
get_regex_patterns,
get_transformers_recognizer,
)
from llm_guard.util import get_logger
from llm_guard.util import calculate_risk_score, get_logger

from .base import Scanner

Expand Down Expand Up @@ -86,9 +86,11 @@ def scan(self, prompt: str, output: str) -> (str, bool, float):
result = self._anonymizer.anonymize(text=output, analyzer_results=analyzer_results)
output = result.text

risk_score = max(analyzer_result.score for analyzer_result in analyzer_results)
risk_score = round(
max(analyzer_result.score for analyzer_result in analyzer_results), 2
)
LOGGER.warning("Found sensitive data in the output", results=analyzer_results)
return output, False, risk_score
return output, False, calculate_risk_score(risk_score, self._threshold)

LOGGER.debug("No sensitive data found in the output")
return output, True, 0.0

0 comments on commit f31f065

Please sign in to comment.