Skip to content

Commit

Permalink
feat:update ability to validate nltk assets
Browse files Browse the repository at this point in the history
  • Loading branch information
christinestraub committed Jan 3, 2025
1 parent e7333e6 commit dad9f87
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion unstructured/nlp/tokenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ def check_for_nltk_package(package_name: str, package_category: str) -> bool:

# Ensure NLTK data exists in the specified path (pre-baked in Docker)
def validate_nltk_assets():
if not os.path.exists(NLTK_DATA_PATH):
copy_nltk_packages()
"""Validate that required NLTK packages are preloaded in the image."""
required_assets = [
("punkt_tab", "tokenizers"),
Expand All @@ -57,7 +59,6 @@ def validate_nltk_assets():
f"Ensure it is baked into the Docker image at '{NLTK_DATA_PATH}'."
)


# Validate NLTK assets at import time
validate_nltk_assets()

Expand Down

0 comments on commit dad9f87

Please sign in to comment.