Skip to content

Commit

Permalink
fix: removed en-core-web-sm as an explicit dependency
Browse files Browse the repository at this point in the history
  • Loading branch information
adubovik committed Sep 5, 2024
1 parent 1c6897f commit 4b7caf9
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 26 deletions.
Original file line number Diff line number Diff line change
@@ -1,14 +1,20 @@
import re
from collections import defaultdict
from functools import cache
from typing import Dict, List, Optional

from aidial_sdk.pydantic_v1 import BaseModel
from spacy import load
from spacy import load as load_model
from spacy.cli.download import download as download_model
from spacy.language import Language

from aidial_interceptors_sdk.examples.utils.markdown import MarkdownTable

_PIPELINE = load("en_core_web_sm")
# Find spaCy models here: https://spacy.io/models/
DEFAULT_MODEL = "en_core_web_sm"

# Find the full list of entities here:
# https://github.com/explosion/spacy-models/blob/e46017f5c8241096c1b30fae080f0e0709c8038c/meta/en_core_web_sm-3.7.0.json#L121-L140
DEFAULT_LABELS_TO_REDACT = [
"PERSON",
"ORG",
Expand All @@ -17,6 +23,12 @@
]


@cache
def _get_pipeline(model: str) -> Language:
download_model(model)
return load_model(model)


class Replacement(BaseModel):
entity_type: str
idx: int
Expand Down Expand Up @@ -65,7 +77,7 @@ def _is_replacement(self, text: str, start: int, end: int) -> bool:
)

def anonymize(self, text: str) -> str:
doc = _PIPELINE(text)
doc = _get_pipeline(DEFAULT_MODEL)(text)
redacted = []
idx = 0

Expand Down
21 changes: 2 additions & 19 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 2 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "aidial-interceptors-sdk"
version = "0.1.0"
version = "0.1.0rc"
description = "Framework for creating interceptors for AI DIAL"
authors = ["EPAM RAIL <SpecialEPM-DIALDevTeam@epam.com>"]
homepage = "https://epam-rail.com"
Expand Down Expand Up @@ -30,14 +30,12 @@ aiostream = "^0.6.2"
aidial-sdk = { version = "^0.13.0", extras = ["telemetry"] }

# Extras
# Spacy pipeline: https://spacy.io/models/
en-core-web-sm = { url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.0/en_core_web_sm-3.7.0-py3-none-any.whl", optional = true }
pillow = { version = "^10.4.0", optional = true }
numpy = { version = "^1.26.1", optional = true }
spacy = { version = "^3.7.5", optional = true }

[tool.poetry.extras]
examples = ["en-core-web-sm", "pillow", "numpy", "spacy"]
examples = ["pillow", "numpy", "spacy"]

[tool.poetry.group.test.dependencies]
pytest = "7.4.0"
Expand Down

0 comments on commit 4b7caf9

Please sign in to comment.