Skip to content

Commit

Permalink
response to pirana comments
Browse files Browse the repository at this point in the history
  • Loading branch information
Sid Mohan authored and Sid Mohan committed Aug 6, 2024
1 parent 6534097 commit 6e88412
Show file tree
Hide file tree
Showing 6 changed files with 14 additions and 22 deletions.
4 changes: 1 addition & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,6 @@ Note: The DataFog library uses asynchronous programming for OCR, so make sure to

## Examples

TODO: Update README. switch installation method to install requirements-dev.txt and then -e

For more detailed examples, check out our Jupyter notebooks in the `examples/` directory:

- `text_annotation_example.ipynb`: Demonstrates text PII annotation
Expand Down Expand Up @@ -118,7 +116,7 @@ For local development:
```
5. Install the package in editable mode:
```
pip install -e .
pip install -r requirements-dev.txt
```
6. Set up the project:
```
Expand Down
11 changes: 10 additions & 1 deletion datafog/processing/text_processing/spacy_pii_annotator.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,16 @@ def create(cls) -> "SpacyPIIAnnotator":
import subprocess

subprocess.run(
["python", "-m", "spacy", "download", "en_core_web_lg"], check=True
[
"python",
"-m",
"pip",
"install",
"--no-deps",
"--no-cache-dir",
"https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.7.1/en_core_web_lg-3.7.1-py3-none-any.whl",
],
check=True,
)
nlp = spacy.load("en_core_web_lg")

Expand Down
9 changes: 0 additions & 9 deletions datafog/services/image_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
from PIL import Image

from datafog.processing.image_processing.donut_processor import DonutProcessor

# from datafog.processing.image_processing.image_downloader import ImageDownloader
from datafog.processing.image_processing.pytesseract_processor import (
PytesseractProcessor,
)
Expand Down Expand Up @@ -41,13 +39,6 @@ def __init__(self, use_donut: bool = False, use_tesseract: bool = True):
PytesseractProcessor() if self.use_tesseract else None
)

# async def download_images(self, urls: List[str]) -> List[Image.Image]:
# async def download_image(url: str) -> Image.Image:
# return await self.downloader.download_image(url)

# tasks = [asyncio.create_task(download_image(url)) for url in urls]
# return await asyncio.gather(*tasks)

async def download_images(self, urls: List[str]) -> List[Image.Image]:
async def download_image(url: str) -> Image.Image:
return await self.downloader.download_image(url)
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
pandas
Requests
requests==2.32.3
spacy==3.7.5
pydantic>=2.8.2,<3.0.0
Pillow
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
long_description = f.read()

# Use a single source of truth for the version
__version__ = "3.4.0b1"
__version__ = "3.4.0"

project_urls = {
"Homepage": "https://datafog.ai",
Expand All @@ -26,7 +26,7 @@
packages=find_packages(),
install_requires=[
"pandas",
"Requests",
"requests==2.32.3",
"spacy==3.7.5",
"pydantic",
"Pillow",
Expand Down
6 changes: 0 additions & 6 deletions tests/test_image_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,6 @@
]


# @pytest.mark.asyncio
# async def test_download_images():
# image_service1 = ImageService()
# images = await image_service1.download_images(urls)
# assert len(images) == 2
# assert all(isinstance(image, Image.Image) for image in images)
@pytest.mark.asyncio
async def test_download_images():
image_service = ImageService()
Expand Down

0 comments on commit 6e88412

Please sign in to comment.