add quikcstart collab notebook (#48)

* add setuptools classifiers * add open in collab buton * update pre-commit codespell to skip .ipynb * add quickstart collab notebook * update notebook * add required extensions option to document readers (#50) * add required exts option to document readers * update autollm version * update version to 0.0.12 (#52) * add setuptools classifiers * add open in collab buton * update pre-commit codespell to skip .ipynb * add quickstart collab notebook * update notebook * update readme * update * add example notebook
safevideo · Oct 28, 2023 · aaa70f4 · aaa70f4
1 parent 009f08f
commit aaa70f4
Show file tree

Hide file tree

Showing 4 changed files with 308 additions and 3 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -62,7 +62,7 @@ repos:
         args:
           - --ignore-words-list=crate,nd,strack,dota,ane,segway,fo,gool,winn
           # skip inline comments
-          - --skip="*.py:.*#.*"
+          - --skip="*.py:.*#.*,*ipynb"
 
   - repo: https://github.com/PyCQA/docformatter
     rev: v1.7.5

diff --git a/README.md b/README.md
@@ -11,6 +11,7 @@
 [![version](https://badge.fury.io/py/autollm.svg)](https://badge.fury.io/py/autollm)
 [![Python 3.10](https://img.shields.io/badge/python-3.10-blue.svg)](https://www.python.org/downloads/release/python-3100/)
 [![GNU AGPL 3.0](https://img.shields.io/badge/license-AGPL_3.0-green)](LICENSE)
+[![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/safevideo/autollm/blob/main/examples/quickstart.ipynb)
 
 </div>
 
@@ -44,7 +45,6 @@ ______________________________________________________________________
 ### create a query engine in seconds
 
 ```python
->>> from autollm.utils.document_reading import read_local_files_as_documents
 >>> from autollm import AutoQueryEngine
 
 >>> query_engine = AutoQueryEngine.from_parameters(

diff --git a/examples/quickstart.ipynb b/examples/quickstart.ipynb
@@ -0,0 +1,291 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/safevideo/autollm/blob/main/examples/quickstart.ipynb)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 0. Preparation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "- Install latest version of autollm:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "vscode": {
+     "languageId": "shellscript"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "!pip install autollm -Uq\n",
+    "!pip install gradio -Uq\n",
+    "!pip install gitpython -Uq\n",
+    "!pip install nbconvert -Uq"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "- Import required modules:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# import required functions, classes\n",
+    "from autollm import AutoQueryEngine\n",
+    "from autollm.utils.document_reading import read_github_repo_as_documents\n",
+    "import os\n",
+    "import gradio as gr"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "- Set your OpenAI API key in order to use default gpt-3.5-turbo model:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "os.environ[\"OPENAI_API_KEY\"] = \"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\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "os.environ[\"OPENAI_API_KEY\"] = \"YOUR_API_KEY\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1. Read Files as Documents"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "- Either set input directory or input files:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "git_repo_url = \"https://github.com/langchain-ai/langchain.git\"\n",
+    "relative_folder_path = \"docs\"   # relative path from the repo root to the folder containing documents\n",
+    "required_exts = [\".md\", \".ipynb\"] # only files with these extensions will be read\n",
+    "\n",
+    "documents = read_github_repo_as_documents(git_repo_url=git_repo_url, relative_folder_path=\"docs\", required_exts=required_exts)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2. Configuration of AutoQueryEngine"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Basic Usage"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "- You can completely skip configuration if you want to use default settings."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "- 🌟 **pro tip**: autollm defaults to lancedb as the vector store since it is lightweight, scales from development to production and is 100x cheaper than alternatives!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "query_engine = AutoQueryEngine.from_parameters(documents=documents)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Advanced Usage"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "- You can configure the AutoQueryEngine to your needs:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "system_prompt = \"You are an friendly ai assistant that help users find the most relevant and accurate answers to their questions based on the documents you have access to. When answering the questions, mostly rely on the info in documents.\"\n",
+    "\n",
+    "query_wrapper_prompt = '''\n",
+    "The document information is below.\n",
+    "---------------------\n",
+    "{context_str}\n",
+    "---------------------\n",
+    "Using the document information and mostly relying on it,\n",
+    "answer the query.\n",
+    "Query: {query_str}\n",
+    "Answer:\n",
+    "'''\n",
+    "\n",
+    "enable_cost_calculator = True\n",
+    "\n",
+    "# llm params\n",
+    "model = \"gpt-3.5-turbo\"\n",
+    "\n",
+    "# vector store params\n",
+    "vector_store_type = \"LanceDBVectorStore\"\n",
+    "# specific params for LanceDBVectorStore\n",
+    "uri = \"tmp/lancedb\" \n",
+    "table_name = \"vectors\"\n",
+    "\n",
+    "# service context params\n",
+    "chunk_size = 1024\n",
+    "\n",
+    "# query engine params\n",
+    "similarity_top_k = 5"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm_params = {\"model\": model}\n",
+    "vector_store_params = {\"vector_store_type\": vector_store_type, \"uri\": uri, \"table_name\": table_name}\n",
+    "service_context_params = {\"chunk_size\": chunk_size}\n",
+    "query_engine_params = {\"similarity_top_k\": similarity_top_k}\n",
+    "\n",
+    "query_engine = AutoQueryEngine.from_parameters(documents=documents, system_prompt=system_prompt, query_wrapper_prompt=query_wrapper_prompt, enable_cost_calculator=enable_cost_calculator, llm_params=llm_params, vector_store_params=vector_store_params, service_context_params=service_context_params, query_engine_params=query_engine_params)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3. Ask Anything to Your Documents"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "response = query_engine.query(\"who are you\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "response.response"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "- Or play with it in the gradio app 🚀"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import gradio as gr\n",
+    "\n",
+    "def greet(query):\n",
+    "    return query_engine.query(query).response\n",
+    "\n",
+    "demo = gr.Interface(fn=greet, inputs=\"text\", outputs=\"text\")\n",
+    "    \n",
+    "demo.launch()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### If you found this project useful, [give it a ⭐️ on GitHub](https://github.com/safevideo/autollm) to show your support!"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "aidocs",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/setup.py b/setup.py
@@ -51,8 +51,22 @@ def get_license():
     description="Ship RAG based LLM Web API's, in seconds.",
     long_description=get_long_description(),
     long_description_content_type='text/markdown',
-    packages=setuptools.find_packages(exclude=["tests"]),
+    url='https://github.com/safevideo/autollm',
+    packages=setuptools.find_packages(exclude=["tests", "examples"]),
     install_requires=get_requirements(),
     extras_require={'dev': DEV_REQUIREMETNS},
     python_requires='>=3.8',
+    classifiers=[
+        'Intended Audience :: Developers', 'Intended Audience :: Information Technology',
+        'Intended Audience :: Science/Research',
+        'License :: OSI Approved :: GNU Affero General Public License v3',
+        'Operating System :: MacOS :: MacOS X', 'Operating System :: Microsoft :: Windows',
+        'Operating System :: POSIX', 'Programming Language :: Python :: 3.8',
+        'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10',
+        'Programming Language :: Python :: 3.11', 'Topic :: Software Development',
+        'Topic :: Software Development :: Libraries',
+        'Topic :: Software Development :: Libraries :: Python Modules',
+        'Topic :: Scientific/Engineering :: Artificial Intelligence',
+        'Topic :: Internet :: WWW/HTTP :: HTTP Servers'
+    ],
 )