diff --git a/Dockerfile b/Dockerfile index ecdade864..b4f192ea9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,7 +11,7 @@ COPY pyproject.toml poetry.lock* /app/ RUN pip install poetry keyring # Export dependencies to requirements.txt -RUN poetry export --without-hashes --format=requirements.txt --output=requirements.txt -E eval -E parsing +RUN poetry export --without-hashes --format=requirements.txt --output=requirements.txt -E eval -E sentence_transformers # Install dependencies from requirements.txt RUN pip install --no-cache-dir --upgrade -r requirements.txt @@ -22,6 +22,11 @@ RUN pip install gunicorn uvicorn # Copy the rest of the application code COPY . /app +# Set the default configuration option +ENV CONFIG_OPTION=default + +# Expose the port EXPOSE 8000 -CMD ["gunicorn", "r2r.examples.servers.basic_pipeline:app", "--bind", "0.0.0.0:8000", "--workers", "2", "--threads", "8", "--timeout", "0", "--worker-class", "uvicorn.workers.UvicornWorker"] \ No newline at end of file +# Set the command to run the application with Gunicorn +CMD ["sh", "-c", "gunicorn \"r2r.examples.servers.basic_pipeline:create_app('$CONFIG_OPTION')\" --bind 0.0.0.0:8000 --workers 2 --threads 8 --timeout 0 --worker-class uvicorn.workers.UvicornWorker"] \ No newline at end of file diff --git a/README.md b/README.md index 5175355c9..367ac8ae6 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ Note - the example above uses [SciPhi Cloud](https://app.sciphi.ai) to pair with ```bash # use the `'r2r[all]'` to download all required deps -pip install 'r2r[parsing,eval]' +pip install 'r2r[eval]' # setup env export OPENAI_API_KEY=sk-... @@ -65,8 +65,11 @@ export LOCAL_DB_PATH=local.sqlite ```bash docker pull emrgntcmplxty/r2r:latest -# Place your secrets in `.env` -docker run -d --name r2r_container -p 8000:8000 --env-file .env r2r +# Choose from CONFIG_OPTION in {`default`, `local_ollama`} +# For cloud deployment, select `default` and place your secrets in `.env` +# For local deployment, select `local_ollama` +docker run -d --name r2r_container -p 8000:8000 -e CONFIG_OPTION=local_ollama --env-file .env emrgntcmplxty/r2r:latest + ``` ## Basic Example diff --git a/docs/pages/development/full-install.mdx b/docs/pages/development/full-install.mdx index ca7d13261..e56e3553a 100644 --- a/docs/pages/development/full-install.mdx +++ b/docs/pages/development/full-install.mdx @@ -23,7 +23,7 @@ To install poetry, visit the [official Poetry website](https://python-poetry.org ```bash # See pyproject.toml for available extras # use "all" to include every optional dependency - poetry install -E parsing -E eval + poetry install -E eval ``` ##### Configure Environment Variables: diff --git a/docs/pages/getting-started/basic-example.mdx b/docs/pages/getting-started/basic-example.mdx index 07e8643b5..b8a1c7b8c 100644 --- a/docs/pages/getting-started/basic-example.mdx +++ b/docs/pages/getting-started/basic-example.mdx @@ -5,7 +5,7 @@ This example demonstrates setting up a local server using the R2R framework to h ```bash # use the `'r2r[all]'` to download all required deps -pip install 'r2r[parsing,eval]' +pip install 'r2r[eval]' export OPENAI_API_KEY="replace with your openai key" export LOCAL_DB_PATH=local.sqlite diff --git a/docs/pages/getting-started/quick-install.mdx b/docs/pages/getting-started/quick-install.mdx index dd6325600..896a2b683 100644 --- a/docs/pages/getting-started/quick-install.mdx +++ b/docs/pages/getting-started/quick-install.mdx @@ -4,7 +4,7 @@ Install R2R swiftly using `pip` to get started with minimal setup. This method w ```bash # `'r2r[all]'` to download all optional deps -pip install 'r2r[parsing,eval]' +pip install 'r2r[eval]' # setup env export OPENAI_API_KEY=sk-... diff --git a/docs/pages/tutorials/local_rag.mdx b/docs/pages/tutorials/local_rag.mdx index a126f42c7..422e7f681 100644 --- a/docs/pages/tutorials/local_rag.mdx +++ b/docs/pages/tutorials/local_rag.mdx @@ -18,7 +18,7 @@ Next, let's install R2R itself. We'll use pip to manage our Python dependencies. ```bash pip install --upgrade pip -pip install 'r2r[eval,parsing,local_llm]' +pip install 'r2r[eval,local_llm]' ``` This will install R2R along with the dependencies needed to run local LLMs. diff --git a/poetry.lock b/poetry.lock index 2ba7ed2b6..843661ea9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -321,7 +321,7 @@ dev = ["freezegun (>=1.0,<2.0)", "pytest (>=6.0)", "pytest-cov"] name = "beautifulsoup4" version = "4.12.3" description = "Screen-scraping library" -optional = true +optional = false python-versions = ">=3.6.0" files = [ {file = "beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed"}, @@ -447,7 +447,7 @@ crt = ["awscrt (==0.19.19)"] name = "bs4" version = "0.0.2" description = "Dummy package for Beautiful Soup (beautifulsoup4)" -optional = true +optional = false python-versions = "*" files = [ {file = "bs4-0.0.2-py2.py3-none-any.whl", hash = "sha256:abf8742c0805ef7f662dce4b51cca104cffe52b835238afc169142ab9b3fbccc"}, @@ -4464,17 +4464,17 @@ windows-terminal = ["colorama (>=0.4.6)"] [[package]] name = "pypdf" -version = "4.1.0" +version = "4.2.0" description = "A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files" -optional = true +optional = false python-versions = ">=3.6" files = [ - {file = "pypdf-4.1.0-py3-none-any.whl", hash = "sha256:16cac912a05200099cef3f347c4c7e0aaf0a6d027603b8f9a973c0ea500dff89"}, - {file = "pypdf-4.1.0.tar.gz", hash = "sha256:01c3257ec908676efd60a4537e525b89d48e0852bc92b4e0aa4cc646feda17cc"}, + {file = "pypdf-4.2.0-py3-none-any.whl", hash = "sha256:dc035581664e0ad717e3492acebc1a5fc23dba759e788e3d4a9fc9b1a32e72c1"}, + {file = "pypdf-4.2.0.tar.gz", hash = "sha256:fe63f3f7d1dcda1c9374421a94c1bba6c6f8c4a62173a59b64ffd52058f846b1"}, ] [package.dependencies] -typing_extensions = {version = ">=3.7.4.3", markers = "python_version < \"3.10\""} +typing_extensions = {version = ">=4.0", markers = "python_version < \"3.11\""} [package.extras] crypto = ["PyCryptodome", "cryptography"] @@ -5740,7 +5740,7 @@ files = [ name = "soupsieve" version = "2.5" description = "A modern CSS selector implementation for Beautiful Soup." -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "soupsieve-2.5-py3-none-any.whl", hash = "sha256:eaa337ff55a1579b6549dc679565eac1e3d000563bcb1c8ab0d0fefbc0c2cdc7"}, @@ -7109,7 +7109,6 @@ exa = ["exa-py"] ionic = ["ionic-api-sdk"] local-llm = ["llama-cpp-python", "sentence-transformers"] monitoring = ["sentry-sdk"] -parsing = ["bs4", "pypdf"] postgres = ["psycopg2-binary"] qdrant = ["qdrant_client"] reducto = ["boto3"] @@ -7118,4 +7117,4 @@ streaming = ["datasets"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<3.13" -content-hash = "b7c9c5be81f85c374165d3f4a05a5bd3007c556f21536a90bda379a4d2c76a3f" +content-hash = "1d3987c692e06ccb98e69876cf857c3b66ea000bbc31f12060d2ecfe57c065ee" diff --git a/pyproject.toml b/pyproject.toml index deb727e1a..cef5a6e95 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "r2r" -version = "0.1.31" +version = "0.1.32" description = "SciPhi R2R" authors = ["Owen Colegrove "] license = "MIT" @@ -16,12 +16,14 @@ include = ["config.json"] python = ">=3.9,<3.13" # Required dependencies +bs4= "^0.0.2" fastapi = "^0.109.2" fire = "^0.5.0" gunicorn = "^21.2.0" litellm = "^1.34.0" openai = "^1.11.1" pydantic = "^2.6.3" +pypdf = "^4.2.0" python-dotenv = "^1.0.1" python-multipart = "^0.0.9" requests = "^2.31.0" @@ -31,8 +33,6 @@ vecs = "^0.4.0" # Optional dependencies -bs4 = {version = "^0.0.2", optional = true} -pypdf = {version = "^4.0.2", optional = true} tiktoken = {version = "^0.5.2", optional = true} datasets = {version = "^2.16.1", optional = true} qdrant_client = {version = "^1.7.0", optional = true} @@ -48,7 +48,6 @@ llama-cpp-python = {version = "^0.2.57", optional = true} sentence-transformers = {version = "^2.6.1", optional = true} [tool.poetry.extras] -parsing = ["bs4", "pypdf"] embedding = ["tiktoken"] streaming = ["datasets"] qdrant = ["qdrant_client"] @@ -59,6 +58,7 @@ eval = ["parea-ai"] ionic = ["ionic-api-sdk"] reducto = ["boto3"] exa = ["exa-py"] +sentence_transformers = ["sentence-transformers"] local_llm = ["llama-cpp-python", "sentence-transformers"] all = ["bs4", "pypdf", "tiktoken", "datasets", "qdrant_client", "psycopg2-binary", "sentry-sdk", "parea-ai", "boto3", "exa-py", "llama-cpp-python"] diff --git a/r2r/core/adapters/base.py b/r2r/core/adapters/base.py index aaab4b0bd..f1c63810a 100644 --- a/r2r/core/adapters/base.py +++ b/r2r/core/adapters/base.py @@ -87,5 +87,5 @@ def adapt(self, data: bytes) -> list[str]: page_text = "".join( filter(lambda x: x in string.printable, page_text) ) - text += page_text + text += page_text + " " return [text] diff --git a/r2r/eval/parea/base.py b/r2r/eval/parea/base.py index 3565709fd..1799e0d35 100644 --- a/r2r/eval/parea/base.py +++ b/r2r/eval/parea/base.py @@ -37,7 +37,7 @@ def create_log(query: str, context: str, completion: str) -> Log: self._create_log = create_log except ImportError: raise ImportError( - "Parea is not installed. Please install it using `pip install parea`." + "Parea is not installed. Please install it using `pip install parea-ai`." ) if not os.getenv("OPENAI_API_KEY"): raise ValueError( diff --git a/r2r/examples/servers/basic_pipeline.py b/r2r/examples/servers/basic_pipeline.py index 806b078d0..9f709421c 100644 --- a/r2r/examples/servers/basic_pipeline.py +++ b/r2r/examples/servers/basic_pipeline.py @@ -14,6 +14,18 @@ "local_llama_cpp": os.path.join(configs_path, "local_llama_cpp.json"), } + +def create_app(config_name: str = "default"): + config_path = OPTIONS[config_name] + + app = E2EPipelineFactory.create_pipeline( + config=R2RConfig.load_config(config_path) + ) + return app + + +app = create_app() + if __name__ == "__main__": parser = argparse.ArgumentParser(description="R2R Pipeline") parser.add_argument( @@ -23,17 +35,6 @@ choices=OPTIONS.keys(), help="Configuration option for the pipeline", ) - args = parser.parse_args() - - config_path = OPTIONS[args.config] - - # Creates a pipeline with the specified configuration - # This is the main entry point for the application - # The pipeline is built using the specified configuration file - # Read more about the configuration in the documentation [https://r2r-docs.sciphi.ai/core-features/factory] - app = E2EPipelineFactory.create_pipeline( - config=R2RConfig.load_config(config_path) - ) + args, _ = parser.parse_known_args() - # Run the FastAPI application using Uvicorn uvicorn.run(app, host="0.0.0.0", port=8000) diff --git a/r2r/main/app.py b/r2r/main/app.py index c7bb5fd74..b8082e736 100644 --- a/r2r/main/app.py +++ b/r2r/main/app.py @@ -227,9 +227,10 @@ async def rag_completion( "run_id": str(rag_pipeline.pipeline_run_info["run_id"]), "settings": query.settings.rag_settings.dict(), } - background_tasks.add_task( - requests.post, f"{url}/eval", json=payload - ) + if config.evals.get("frequency", 0.0) > 0.0: + background_tasks.add_task( + requests.post, f"{url}/eval", json=payload + ) return rag_completion diff --git a/r2r/pipelines/basic/eval.py b/r2r/pipelines/basic/eval.py index 847f27fab..d66999f0b 100644 --- a/r2r/pipelines/basic/eval.py +++ b/r2r/pipelines/basic/eval.py @@ -42,7 +42,7 @@ def __init__( from r2r.eval import PareaEvalProvider except ImportError: raise ImportError( - "Parea is not installed. Please install it using `pip install parea`." + "Parea is not installed. Please install it using `pip install parea-ai`." ) self.eval_provider = PareaEvalProvider( provider,