From 279624e2d0fdffdb096cfdcebb3d741e3270dc06 Mon Sep 17 00:00:00 2001 From: SeeknnDestroy Date: Mon, 16 Oct 2023 18:29:26 +0300 Subject: [PATCH] minor update --- README.md | 39 +++++++++++++++++++ ...ment_provider.py => document_providers.py} | 0 autollm/utils/hash_utils.py | 7 ++-- 3 files changed, 42 insertions(+), 4 deletions(-) rename autollm/utils/{document_provider.py => document_providers.py} (100%) diff --git a/README.md b/README.md index 6e715a24..ece9f9c1 100644 --- a/README.md +++ b/README.md @@ -171,6 +171,39 @@ LLM Total Token Cost: $0.002317 """ ``` +### Document Providers (Powerful Github and Local Solutions) + +Unlock the potential of your content with AutoLLM's robust document providers. Seamlessly pull, process, and analyze documents from GitHub repositories or local directories. + +#### GitHub Document Provider + +Fetch up-to-date documents directly from your GitHub repositories—ideal for real-time data pipelines and collaborative projects. + +```python +from autollm.utils.document_providers import github_document_provider + +git_repo_url = "https://github.com/safevideo.git" +local_repo_path = Path("/safevideo/") +# Specify where to find the documents in the repo +relative_docs_path = Path("docs/") + +# Fetch and process documents +documents = github_document_provider(git_repo_url, local_repo_path, relative_docs_path) +``` + +#### Local Document Provider + +Process documents from local directories—ideal for offline data pipelines and local development. + +```python +from autollm.utils.document_providers import local_document_provider + +input_dir = "/local/documents/path" + +# Read files as documents from local directory +documents = local_document_provider(input_dir=input_dir) +``` + ______________________________________________________________________ ## FAQ @@ -205,6 +238,12 @@ Our roadmap outlines upcoming features and integrations aimed at making QuickLLM - [ ] Add unit tests for online vectorDB integrations +- [ ] **Additional Document Providers**: + + - [ ] Amazon S3-based document provider + - [ ] FTP-based document provider + - [ ] Google Drive-based document provider + ______________________________________________________________________ ## Contributing diff --git a/autollm/utils/document_provider.py b/autollm/utils/document_providers.py similarity index 100% rename from autollm/utils/document_provider.py rename to autollm/utils/document_providers.py diff --git a/autollm/utils/hash_utils.py b/autollm/utils/hash_utils.py index 879fac06..e256366b 100644 --- a/autollm/utils/hash_utils.py +++ b/autollm/utils/hash_utils.py @@ -5,8 +5,6 @@ from llama_index.schema import Document -from autollm.vectorstores.base import BaseVS - logger = logging.getLogger(__name__) @@ -27,13 +25,14 @@ def get_md5(file_path: Path) -> str: return hasher.hexdigest() -def check_for_changes(documents: Sequence[Document], vs: BaseVS) -> Tuple[Sequence[Document], List[str]]: +# TODO: add vs type +def check_for_changes(documents: Sequence[Document], vs) -> Tuple[Sequence[Document], List[str]]: """ Check for file changes based on their hashes. Parameters: documents (Sequence[Document]): List of documents to check for changes. - vs (BaseVS): The vector store to check for changes in. + vs: The vector store to check for changes in. Returns: changed_documents (Sequence[Document]): List of documents that have changed.