From ae68c09595dea3492c8d1e00c1caf061316c415a Mon Sep 17 00:00:00 2001 From: Sid Mohan Date: Sun, 31 Mar 2024 18:48:01 -0700 Subject: [PATCH] pre-commit run pass --- examples/uploading-file-types.ipynb | 5 ++--- src/datafog/__init__.py | 13 +++++++------ tests/test_datafog.py | 10 ++++++---- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/examples/uploading-file-types.ipynb b/examples/uploading-file-types.ipynb index 18313db3..1b61c290 100644 --- a/examples/uploading-file-types.ipynb +++ b/examples/uploading-file-types.ipynb @@ -122,7 +122,7 @@ "metadata": {}, "outputs": [], "source": [ - "%pip list\n" + "%pip list" ] }, { @@ -141,8 +141,7 @@ "source": [ "input_file = \"/Users/sidmohan/Desktop/datafog-v2.4.0/datafog-python/tests/files/input_files/agi-builder-meetup.pdf\"\n", "output = datafog.DataFog.upload_file(uploaded_file_path=input_file)\n", - "print(output)\n", - "\n" + "print(output)" ] } ], diff --git a/src/datafog/__init__.py b/src/datafog/__init__.py index cbae6269..7c12a76c 100644 --- a/src/datafog/__init__.py +++ b/src/datafog/__init__.py @@ -2,12 +2,12 @@ import json import logging import tempfile +from pathlib import Path + import pandas as pd import requests import spacy from unstructured.partition.auto import partition -from io import BytesIO -from pathlib import Path from .__about__ import __version__ from .pii_tools import PresidioEngine @@ -40,7 +40,6 @@ def __init__(self): """ self.nlp = spacy.load("en_spacy_pii_fast") - @staticmethod def client(): """ @@ -50,7 +49,7 @@ def client(): DataFog: A new instance of the DataFog client. """ return DataFog() - + @staticmethod def upload_file(uploaded_file_path): uploaded_file_path = Path(uploaded_file_path) @@ -60,7 +59,9 @@ def upload_file(uploaded_file_path): if not uploaded_file_path.exists(): return "File not found." else: - temp_file = tempfile.NamedTemporaryFile(delete=True, suffix=uploaded_file_path.suffix) + temp_file = tempfile.NamedTemporaryFile( + delete=True, suffix=uploaded_file_path.suffix + ) temp_file.write(bytes_data) elements = partition(temp_file.name) text = "" @@ -69,7 +70,7 @@ def upload_file(uploaded_file_path): texts[uploaded_file_path.name] = text return texts - + def __call__(self, input_source, privacy_operation): """ Process the input data and apply the specified privacy operation. diff --git a/tests/test_datafog.py b/tests/test_datafog.py index c87766b2..cc32e5e0 100644 --- a/tests/test_datafog.py +++ b/tests/test_datafog.py @@ -1,6 +1,5 @@ # test_datafog.py import pytest -import requests from datafog import DataFog @@ -9,11 +8,14 @@ def datafog(): return DataFog() + def test_upload_file(datafog): - uploaded_file="tests/files/input_files/agi-builder-meetup.pdf" + uploaded_file = "tests/files/input_files/agi-builder-meetup.pdf" result = datafog.upload_file(uploaded_file_path=uploaded_file) - file_text = result[uploaded_file.split('/')[-1]] # Extract the text using the file name as key - assert "Cloudflare" in file_text # confirms that OCR is not on + file_text = result[ + uploaded_file.split("/")[-1] + ] # Extract the text using the file name as key + assert "Cloudflare" in file_text # confirms that OCR is not on assert "SF" in file_text assert "Laurie" in file_text assert "BentoML" in file_text