From e562f3b28cacce7c594c7c00b23175dc8c4894b5 Mon Sep 17 00:00:00 2001 From: Sid Mohan Date: Mon, 19 Aug 2024 14:42:59 -0700 Subject: [PATCH] cli tests passed --- datafog/client.py | 56 ++++++++++++++ tests/test_client.py | 177 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 233 insertions(+) diff --git a/datafog/client.py b/datafog/client.py index 9c5e07c3..69495c94 100644 --- a/datafog/client.py +++ b/datafog/client.py @@ -17,6 +17,7 @@ from .config import get_config from .main import DataFog +from .models.anonymizer import Anonymizer, AnonymizerType, HashType from .models.spacy_nlp import SpacyAnnotator app = typer.Typer() @@ -161,5 +162,60 @@ def list_entities(): typer.echo(annotator.list_entities()) +@app.command() +def redact_text(text: str = typer.Argument(..., help="Text to redact")): + """ + Redact PII in text. + + Args: + text: Text to redact. + + Prints the redacted text. + """ + annotator = SpacyAnnotator() + anonymizer = Anonymizer(anonymizer_type=AnonymizerType.REDACT) + annotations = annotator.annotate_text(text) + result = anonymizer.anonymize(text, annotations) + typer.echo(result.anonymized_text) + + +@app.command() +def replace_text(text: str = typer.Argument(..., help="Text to replace PII")): + """ + Replace PII in text with anonymized values. + + Args: + text: Text to replace PII. + + Prints the text with PII replaced. + """ + annotator = SpacyAnnotator() + anonymizer = Anonymizer(anonymizer_type=AnonymizerType.REPLACE) + annotations = annotator.annotate_text(text) + result = anonymizer.anonymize(text, annotations) + typer.echo(result.anonymized_text) + + +@app.command() +def hash_text( + text: str = typer.Argument(..., help="Text to hash PII"), + hash_type: HashType = typer.Option(HashType.SHA256, help="Hash algorithm to use"), +): + """ + Choose from SHA256, MD5, or SHA3-256 algorithms to hash detected PII in text. + + Args: + text: Text to hash PII. + hash_type: Hash algorithm to use. + + Prints the text with PII hashed. + """ + annotator = SpacyAnnotator() + anonymizer = Anonymizer(anonymizer_type=AnonymizerType.HASH, hash_type=hash_type) + annotations = annotator.annotate_text(text) + result = anonymizer.anonymize(text, annotations) + typer.echo(result.anonymized_text) + + if __name__ == "__main__": app() diff --git a/tests/test_client.py b/tests/test_client.py index 86443304..4e870c25 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -4,6 +4,14 @@ from typer.testing import CliRunner from datafog.client import app +from datafog.models.annotator import AnnotationResult, AnnotatorMetadata +from datafog.models.anonymizer import ( + AnonymizationResult, + Anonymizer, + AnonymizerType, + HashType, +) +from datafog.models.common import EntityTypes runner = CliRunner() @@ -14,6 +22,38 @@ def mock_datafog(): yield mock +@pytest.fixture +def sample_text(): + return "Jeff Smith works at DigiCorp Incorporated in Paris." + + +@pytest.fixture +def sample_annotations(): + return [ + AnnotationResult( + start=0, + end=9, + score=1.0, + entity_type=EntityTypes.PERSON, + recognition_metadata=AnnotatorMetadata(), + ), + AnnotationResult( + start=20, + end=42, + score=1.0, + entity_type=EntityTypes.ORGANIZATION, + recognition_metadata=AnnotatorMetadata(), + ), + AnnotationResult( + start=46, + end=51, + score=1.0, + entity_type=EntityTypes.LOCATION, + recognition_metadata=AnnotatorMetadata(), + ), + ] + + def test_scan_image_no_urls(): result = runner.invoke(app, ["scan-image"]) assert result.exit_code == 1 @@ -101,3 +141,140 @@ def test_list_entities(mock_spacy_annotator): result = runner.invoke(app, ["list-entities"]) assert result.exit_code == 0 assert "['PERSON', 'ORG']" in result.stdout + + +@patch("datafog.client.SpacyAnnotator") +@patch("datafog.client.Anonymizer") +def test_redact_text(mock_anonymizer, mock_spacy_annotator, sample_annotations): + mock_annotator = mock_spacy_annotator.return_value + mock_anonymizer_instance = mock_anonymizer.return_value + + sample_text = "John Doe works at Acme Corp" + sample_annotations = [ + AnnotationResult( + start=0, + end=8, + score=1.0, + entity_type=EntityTypes.PERSON, + recognition_metadata=AnnotatorMetadata(), + ), + AnnotationResult( + start=18, + end=27, + score=1.0, + entity_type=EntityTypes.ORGANIZATION, + recognition_metadata=AnnotatorMetadata(), + ), + ] + mock_annotator.annotate_text.return_value = sample_annotations + + mock_anonymizer_instance.anonymize.return_value = AnonymizationResult( + anonymized_text="[REDACTED] works at [REDACTED]", anonymized_entities=[] + ) + + result = runner.invoke(app, ["redact-text", sample_text]) + + assert result.exit_code == 0 + assert "[REDACTED] works at [REDACTED]" in result.stdout + mock_spacy_annotator.assert_called_once() + mock_anonymizer.assert_called_once_with(anonymizer_type=AnonymizerType.REDACT) + mock_annotator.annotate_text.assert_called_once_with(sample_text) + mock_anonymizer_instance.anonymize.assert_called_once_with( + sample_text, sample_annotations + ) + + +@patch("datafog.client.SpacyAnnotator") +@patch("datafog.client.Anonymizer") +def test_replace_text(mock_anonymizer, mock_spacy_annotator): + mock_annotator = mock_spacy_annotator.return_value + mock_anonymizer_instance = mock_anonymizer.return_value + + sample_text = "John Doe works at Acme Corp" + sample_annotations = [ + AnnotationResult( + start=0, + end=8, + score=1.0, + entity_type=EntityTypes.PERSON, + recognition_metadata=AnnotatorMetadata(), + ), + AnnotationResult( + start=18, + end=27, + score=1.0, + entity_type=EntityTypes.ORGANIZATION, + recognition_metadata=AnnotatorMetadata(), + ), + ] + mock_annotator.annotate_text.return_value = sample_annotations + + mock_anonymizer_instance.anonymize.return_value = AnonymizationResult( + anonymized_text="Jane Smith works at TechCo Inc", anonymized_entities=[] + ) + + result = runner.invoke(app, ["replace-text", sample_text]) + + assert result.exit_code == 0 + assert "Jane Smith works at TechCo Inc" in result.stdout + mock_spacy_annotator.assert_called_once() + mock_anonymizer.assert_called_once_with(anonymizer_type=AnonymizerType.REPLACE) + mock_annotator.annotate_text.assert_called_once_with(sample_text) + mock_anonymizer_instance.anonymize.assert_called_once_with( + sample_text, sample_annotations + ) + + +@patch("datafog.client.SpacyAnnotator") +@patch("datafog.client.Anonymizer") +def test_hash_text(mock_anonymizer, mock_spacy_annotator): + mock_annotator = mock_spacy_annotator.return_value + mock_anonymizer_instance = mock_anonymizer.return_value + + sample_text = "John Doe works at Acme Corp" + sample_annotations = [ + AnnotationResult( + start=0, + end=8, + score=1.0, + entity_type=EntityTypes.PERSON, + recognition_metadata=AnnotatorMetadata(), + ), + AnnotationResult( + start=18, + end=27, + score=1.0, + entity_type=EntityTypes.ORGANIZATION, + recognition_metadata=AnnotatorMetadata(), + ), + ] + mock_annotator.annotate_text.return_value = sample_annotations + + mock_anonymizer_instance.anonymize.return_value = AnonymizationResult( + anonymized_text="5ab5c95f works at 7b23f032", anonymized_entities=[] + ) + + result = runner.invoke(app, ["hash-text", sample_text]) + + assert result.exit_code == 0 + assert "5ab5c95f works at 7b23f032" in result.stdout + mock_spacy_annotator.assert_called_once() + mock_anonymizer.assert_called_once_with( + anonymizer_type=AnonymizerType.HASH, hash_type=HashType.SHA256 + ) + mock_annotator.annotate_text.assert_called_once_with(sample_text) + mock_anonymizer_instance.anonymize.assert_called_once_with( + sample_text, sample_annotations + ) + + # Test with custom hash type + result = runner.invoke(app, ["hash-text", sample_text, "--hash-type", "md5"]) + + print(f"Exit code: {result.exit_code}") + print(f"Output: {result.stdout}") + print(f"Exception: {result.exception}") + + assert result.exit_code == 0 + mock_anonymizer.assert_called_with( + anonymizer_type=AnonymizerType.HASH, hash_type=HashType.MD5 + )