diff --git a/docs/07-scaling-and-pinecone.ipynb b/docs/07-scaling-and-pinecone.ipynb new file mode 100644 index 00000000..504b1336 --- /dev/null +++ b/docs/07-scaling-and-pinecone.ipynb @@ -0,0 +1,876 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "UxqB7_Ieur0s" + }, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/aurelio-labs/semantic-router/blob/main/docs/07-scaling-and-pinecone.ipynb) [![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/nbviewer-shield.svg)](https://nbviewer.org/github/aurelio-labs/semantic-router/blob/main/docs/07-scaling-and-pinecone.ipynb)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EduhQaNAur0u" + }, + "source": [ + "# Scaling to Many Routes and Using Pinecone" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_4JgNeX4ur0v" + }, + "source": [ + "Semantic router can be used with many hundreds, thousands, or even more routes. At very large scales it can be useful to use a vector database to store and search though your route vector space. Although we do not demonstrate _very large_ scale in this notebook, we will demonstrate more routes than usual and we will also see how to use the `PineconeIndex` for potential scalability and route persistence beyond our local machines." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bbmw8CO4ur0v" + }, + "source": [ + "## Installing the Library" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dLElfRhgur0v" + }, + "outputs": [], + "source": [ + "!pip install -qU \\\n", + " \"semantic-router[local, pinecone]==0.0.22\" \\\n", + " datasets==2.17.0" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Downloading Routes" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/jamesbriggs/opt/anaconda3/envs/decision-layer/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n", + "Using the latest cached version of the dataset since aurelio-ai/generic-routes couldn't be found on the Hugging Face Hub\n", + "Found the latest cached dataset configuration 'default' at /Users/jamesbriggs/.cache/huggingface/datasets/aurelio-ai___generic-routes/default/0.0.0/5ed6ce316bb803dc716232e6c5f0eb1c7400e24d (last modified on Sun Feb 18 15:49:32 2024).\n" + ] + }, + { + "data": { + "text/plain": [ + "Dataset({\n", + " features: ['name', 'utterances', 'description', 'function_schema', 'llm', 'score_threshold'],\n", + " num_rows: 50\n", + "})" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from datasets import load_dataset\n", + "\n", + "data = load_dataset(\"aurelio-ai/generic-routes\", split=\"train\")\n", + "data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Each row in this dataset is a single route:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'name': 'politics',\n", + " 'utterances': [\"isn't politics the best thing ever\",\n", + " \"why don't you tell me about your political opinions\",\n", + " \"don't you just love the presidentdon't you just hate the president\",\n", + " \"they're going to destroy this country!\",\n", + " 'they will save the country!'],\n", + " 'description': None,\n", + " 'function_schema': None,\n", + " 'llm': None,\n", + " 'score_threshold': 0.82}" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We transform these into `Route` objects like so:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Route(name='politics', utterances=[\"isn't politics the best thing ever\", \"why don't you tell me about your political opinions\", \"don't you just love the presidentdon't you just hate the president\", \"they're going to destroy this country!\", 'they will save the country!'], description=None, function_schema=None, llm=None, score_threshold=0.82)" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from semantic_router import Route\n", + "\n", + "routes = [Route(**data[i]) for i in range(len(data))]\n", + "routes[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next we initialize an `encoder`. We will use a simple `HuggingFaceEncoder`, we can also use popular encoder APIs like `CohereEncoder` and `OpenAIEncoder`." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "BI9AiDspur0y", + "outputId": "27329a54-3f16-44a5-ac20-13a6b26afb97" + }, + "outputs": [], + "source": [ + "from semantic_router.encoders import HuggingFaceEncoder\n", + "\n", + "encoder = HuggingFaceEncoder()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we initialize our `PineconeIndex`, all it requires is a [Pinecone API key](https://app.pinecone.io) (you do need to be using Pinecone Serverless)." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[33m2024-02-18 17:11:50 WARNING semantic_router.utils.logger Index could not be initialized.\u001b[0m\n" + ] + } + ], + "source": [ + "import os\n", + "from getpass import getpass\n", + "from semantic_router.index import PineconeIndex\n", + "\n", + "os.environ[\"PINECONE_API_KEY\"] = os.environ.get(\"PINECONE_API_KEY\") or getpass(\"Enter Pinecone API key: \")\n", + "\n", + "index = PineconeIndex()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2024-02-18 17:12:21 INFO semantic_router.utils.logger local\u001b[0m\n" + ] + } + ], + "source": [ + "from semantic_router import RouteLayer\n", + "\n", + "rl = RouteLayer(encoder=encoder, routes=routes, index=index)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GuLCeIS5ur0y" + }, + "source": [ + "We run the solely static routes layer:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "_rNREh7gur0y", + "outputId": "f3a1dc0b-d760-4efb-b634-d3547011dcb7" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'chitchat'" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rl(\"how's the weather today?\").name" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "_If you see a warning about no classification being found, wait a moment and run the above cell again._" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "McbLKO26ur0y" + }, + "source": [ + "## Loading Index From Previous Initialization" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ANAoEjxYur0y" + }, + "source": [ + "Because we're using Pinecone our route index can now persist / be access from different locations by simply connecting to the pre-existing index, by default this index uses the identifier `\"semantic-router--index\"` — this is the index we'll be loading here, but we can change the name via the `index_name` parameter if preferred.\n", + "\n", + "First, let's delete our old route layer, `index`, and `routes`." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "5jaF1Xa5ur0y" + }, + "outputs": [], + "source": [ + "del rl, index, routes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's load our index first. As mentioned, `\"index\"` is the default index name, so we don't need to specify this parameter — but we do so below for demonstrative purposes." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "index = PineconeIndex(index_name=\"index\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We load the pre-existing routes from this index like so:" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[('fitness_tips', 'suggest a workout routine'),\n", + " ('daily_inspiration', 'give me an inspirational quote'),\n", + " ('creative_writing_and_literature', 'how can I improve my writing skills?'),\n", + " ('chitchat', \"let's go to the chippy\"),\n", + " ('astronomy_and_space_exploration',\n", + " 'what are some interesting facts about the universe?'),\n", + " ('chitchat', 'the weather is horrendous'),\n", + " ('cloud_computing', 'AWS vs Azure vs Google Cloud'),\n", + " ('chitchat', 'how are things going?'),\n", + " ('educational_facts', 'tell me an interesting fact'),\n", + " ('chitchat', \"how's the weather today?\"),\n", + " ('ethical_considerations_in_tech', 'ethical hacking and its importance'),\n", + " ('compliments', 'say something nice about me'),\n", + " ('food_and_recipes', \"what's your favorite food?\"),\n", + " ('interview_preparation', 'common programming interview questions'),\n", + " ('gaming_and_esports', 'what are the popular games right now?'),\n", + " ('frameworks_and_libraries',\n", + " \"what's the difference between React and Angular?\"),\n", + " ('environmental_awareness', 'how can I be more eco-friendly?'),\n", + " ('career_advice_in_tech',\n", + " 'how to build a portfolio for software development'),\n", + " ('educational_facts', 'do you know any historical trivia?'),\n", + " ('interview_preparation', 'tips for technical interviews'),\n", + " ('data_structures_and_algorithms', 'algorithms every developer should know'),\n", + " ('cybersecurity_best_practices', 'securing your web applications'),\n", + " ('jokes', 'know any good jokes?'),\n", + " ('interview_preparation', 'how to prepare for a coding interview'),\n", + " ('coding_standards_and_conventions', 'maintaining consistency in codebase'),\n", + " ('cloud_computing', 'best practices for cloud security'),\n", + " ('historical_events', 'tell me about a significant historical event'),\n", + " ('coding_standards_and_conventions', 'JavaScript coding conventions'),\n", + " ('career_advice_in_tech', 'navigating career growth in tech'),\n", + " ('development_tools', 'best Git clients for macOS'),\n", + " ('environmental_awareness', 'what are some ways to save the planet?'),\n", + " ('historical_events', 'who was a notable figure in ancient history?'),\n", + " ('career_advice', 'suggest some career development tips'),\n", + " ('compliments', 'I need some positive vibes'),\n", + " ('frameworks_and_libraries', 'best Python libraries for data analysis'),\n", + " ('book_recommendations', \"what's your favorite book?\"),\n", + " ('gardening_and_horticulture', 'suggest some easy-care indoor plants'),\n", + " ('mental_health_support', 'what are ways to improve mental health?'),\n", + " ('data_structures_and_algorithms', 'basic data structures for beginners'),\n", + " ('hobbies_and_interests', 'suggest me a hobby'),\n", + " ('career_advice_in_tech', 'tips for landing your first tech job'),\n", + " ('art_and_culture', \"what's an interesting cultural tradition?\"),\n", + " ('language_learning', 'suggest ways to learn a new language'),\n", + " ('cybersecurity_best_practices',\n", + " 'introduction to ethical hacking for developers'),\n", + " ('debugging_tips', 'tips for debugging asynchronous code'),\n", + " ('coding_standards_and_conventions', 'why coding standards matter'),\n", + " ('daily_inspiration', 'share something uplifting'),\n", + " ('environmental_awareness', 'tell me about sustainability practices'),\n", + " ('career_advice', 'how can I improve my resume?'),\n", + " ('daily_inspiration', 'I need some inspiration for today'),\n", + " ('debugging_tips', 'best tools for JavaScript debugging'),\n", + " ('food_and_recipes', 'tell me about a dish from your country'),\n", + " ('jokes', 'make me laugh'),\n", + " ('best_practices', 'best practices for error handling in JavaScript'),\n", + " ('gaming_and_esports', 'suggest a good game for beginners'),\n", + " ('hobbies_and_interests', 'what are your interests?'),\n", + " ('machine_learning_in_development', 'using TensorFlow for beginners'),\n", + " ('language_syntax', 'how do closures work in JavaScript?'),\n", + " ('machine_learning_in_development',\n", + " 'machine learning model deployment best practices'),\n", + " ('gaming_and_esports', 'tell me about upcoming esports events'),\n", + " ('art_and_culture', 'suggest some must-visit museums'),\n", + " ('language_learning', 'how can I improve my Spanish?'),\n", + " ('mindfulness_and_wellness', 'how can I relax?'),\n", + " ('astronomy_and_space_exploration', 'tell me about the latest space mission'),\n", + " ('machine_learning_in_development',\n", + " 'how to start with machine learning in Python'),\n", + " ('frameworks_and_libraries', 'introduction to Django for web development'),\n", + " ('data_structures_and_algorithms', 'complexity analysis of algorithms'),\n", + " ('debugging_tips', 'how do I debug segmentation faults in C++?'),\n", + " ('career_advice', 'what are the emerging career fields?'),\n", + " ('creative_writing_and_literature', 'suggest some classic literature'),\n", + " ('hobbies_and_interests', \"I'm looking for a new pastime\"),\n", + " ('best_practices', 'how to write clean code in Python'),\n", + " ('fitness_tips', 'how can I stay active at home?'),\n", + " ('ethical_considerations_in_tech',\n", + " 'the role of ethics in artificial intelligence'),\n", + " ('cloud_computing', 'introduction to cloud storage options'),\n", + " ('ethical_considerations_in_tech', 'privacy concerns in app development'),\n", + " ('language_syntax', 'explain the syntax of Python functions'),\n", + " ('creative_writing_and_literature', 'what are some tips for storytelling?'),\n", + " ('cybersecurity_best_practices', 'common security vulnerabilities to avoid'),\n", + " ('book_recommendations', 'I need a book recommendation'),\n", + " ('mental_health_support', 'how can I manage stress?'),\n", + " ('chitchat', 'lovely weather today'),\n", + " ('mental_health_support', 'share some self-care practices'),\n", + " ('best_practices', 'what are the best practices for REST API design?'),\n", + " ('food_and_recipes', 'suggest a recipe for dinner'),\n", + " ('language_syntax', 'what are the new features in Java 15?'),\n", + " ('gardening_and_horticulture', 'how do I start a vegetable garden?'),\n", + " ('language_learning',\n", + " 'what are some effective language learning techniques?'),\n", + " ('historical_events', 'share an interesting piece of medieval history'),\n", + " ('mindfulness_and_wellness', 'tell me about mindfulness'),\n", + " ('development_tools', 'using Docker in development'),\n", + " ('book_recommendations', 'suggest a good book to read'),\n", + " ('gardening_and_horticulture',\n", + " 'what are some tips for sustainable gardening?'),\n", + " ('art_and_culture', 'tell me about your favorite artist'),\n", + " ('educational_facts', 'share a science fact'),\n", + " ('astronomy_and_space_exploration', 'how can I stargaze effectively?'),\n", + " ('fitness_tips', 'give me a fitness tip'),\n", + " ('development_tools', 'recommendations for Python IDEs'),\n", + " ('jokes', 'tell me a joke'),\n", + " ('compliments', 'give me a compliment'),\n", + " ('politics', \"why don't you tell me about your political opinions\"),\n", + " ('pet_care_advice', 'suggest some tips for cat care'),\n", + " ('music_discovery', 'suggest some new music'),\n", + " ('personal_questions', \"what's your favorite color?\"),\n", + " ('travel_stories', 'tell me about your favorite travel destination'),\n", + " ('tech_trends', 'tell me about the latest gadgets'),\n", + " ('science_and_innovation', 'tell me about a recent innovation'),\n", + " ('programming_challenges', 'suggest a coding challenge for beginners'),\n", + " ('project_management_in_tech', 'agile vs waterfall project management'),\n", + " ('science_and_innovation', 'what are the latest scientific discoveries?'),\n", + " ('programming_challenges', 'where can I find algorithmic puzzles?'),\n", + " ('personal_questions', 'what do you like to do for fun?'),\n", + " ('open_source_contributions', 'best practices for open-source contributors'),\n", + " ('music_discovery', 'who are the top artists right now?'),\n", + " ('mobile_app_development', 'optimizing performance in mobile apps'),\n", + " ('open_source_contributions', 'how to start contributing to open source'),\n", + " ('programming_challenges', 'programming tasks to improve problem-solving'),\n", + " ('politics', \"isn't politics the best thing ever\"),\n", + " ('politics',\n", + " \"don't you just love the presidentdon't you just hate the president\"),\n", + " ('project_management_in_tech', 'how to lead a development team'),\n", + " ('philosophical_questions', 'what is the meaning of life?'),\n", + " ('version_control_systems', 'introduction to SVN for beginners'),\n", + " ('software_architecture', 'explain microservices architecture'),\n", + " ('version_control_systems', 'best practices for branching in Git'),\n", + " ('pet_care_advice', 'what should I know about keeping a pet rabbit?'),\n", + " ('politics', 'they will save the country!'),\n", + " ('pet_care_advice', 'how can I train my dog?'),\n", + " ('philosophical_questions', 'what are your thoughts on free will?'),\n", + " ('mobile_app_development',\n", + " 'best tools for cross-platform mobile development'),\n", + " ('personal_questions', 'do you have any hobbies?'),\n", + " ('travel_stories', 'share a travel story'),\n", + " ('science_and_innovation', 'how does AI impact our daily lives?'),\n", + " ('movie_suggestions', \"what's your favorite movie?\"),\n", + " ('mobile_app_development', 'Kotlin vs Swift for mobile development'),\n", + " ('mindfulness_and_wellness', 'give me a wellness tip'),\n", + " ('motivation', 'I need some motivation'),\n", + " ('music_discovery', 'recommend songs for a workout playlist'),\n", + " ('software_architecture', 'introduction to domain-driven design'),\n", + " ('software_architecture', 'differences between MVC and MVVM'),\n", + " ('movie_suggestions', 'suggest a good movie for tonight'),\n", + " ('web_development_trends', 'emerging back-end technologies'),\n", + " ('philosophical_questions', 'do you believe in fate?'),\n", + " ('web_development_trends', 'the future of web development'),\n", + " ('web_development_trends', \"what's new in front-end development?\"),\n", + " ('motivation', 'give me a motivational quote'),\n", + " ('tech_trends', \"what's new in technology?\"),\n", + " ('version_control_systems', 'how to revert a commit in Git'),\n", + " ('project_management_in_tech', 'tools for managing tech projects'),\n", + " ('movie_suggestions', 'recommend a movie'),\n", + " ('motivation', 'inspire me'),\n", + " ('travel_stories', \"what's the most interesting place you've visited?\"),\n", + " ('tech_trends', 'what are the emerging tech trends?'),\n", + " ('politics', \"they're going to destroy this country!\"),\n", + " ('open_source_contributions', 'finding projects that accept contributions')]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "index.get_routes()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will transform these into a dictionary format that we can use to initialize our `Route` objects." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "routes_dict = {}\n", + "for route, utterance in index.get_routes():\n", + " if route not in routes_dict:\n", + " routes_dict[route] = []\n", + " routes_dict[route].append(utterance)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'jokes': ['tell me a joke', 'make me laugh', 'know any good jokes?'],\n", + " 'career_advice': ['suggest some career development tips',\n", + " 'what are the emerging career fields?',\n", + " 'how can I improve my resume?'],\n", + " 'environmental_awareness': ['tell me about sustainability practices',\n", + " 'how can I be more eco-friendly?',\n", + " 'what are some ways to save the planet?'],\n", + " 'data_structures_and_algorithms': ['algorithms every developer should know',\n", + " 'basic data structures for beginners',\n", + " 'complexity analysis of algorithms'],\n", + " 'chitchat': ['lovely weather today',\n", + " \"how's the weather today?\",\n", + " 'how are things going?',\n", + " 'the weather is horrendous',\n", + " \"let's go to the chippy\"],\n", + " 'daily_inspiration': ['share something uplifting',\n", + " 'give me an inspirational quote',\n", + " 'I need some inspiration for today'],\n", + " 'career_advice_in_tech': ['how to build a portfolio for software development',\n", + " 'navigating career growth in tech',\n", + " 'tips for landing your first tech job'],\n", + " 'cloud_computing': ['best practices for cloud security',\n", + " 'introduction to cloud storage options',\n", + " 'AWS vs Azure vs Google Cloud'],\n", + " 'language_syntax': ['explain the syntax of Python functions',\n", + " 'how do closures work in JavaScript?',\n", + " 'what are the new features in Java 15?'],\n", + " 'art_and_culture': [\"what's an interesting cultural tradition?\",\n", + " 'suggest some must-visit museums',\n", + " 'tell me about your favorite artist'],\n", + " 'hobbies_and_interests': [\"I'm looking for a new pastime\",\n", + " 'what are your interests?',\n", + " 'suggest me a hobby'],\n", + " 'mental_health_support': ['what are ways to improve mental health?',\n", + " 'how can I manage stress?',\n", + " 'share some self-care practices'],\n", + " 'gardening_and_horticulture': ['how do I start a vegetable garden?',\n", + " 'suggest some easy-care indoor plants',\n", + " 'what are some tips for sustainable gardening?'],\n", + " 'book_recommendations': ['I need a book recommendation',\n", + " \"what's your favorite book?\",\n", + " 'suggest a good book to read'],\n", + " 'development_tools': ['best Git clients for macOS',\n", + " 'using Docker in development',\n", + " 'recommendations for Python IDEs'],\n", + " 'debugging_tips': ['best tools for JavaScript debugging',\n", + " 'how do I debug segmentation faults in C++?',\n", + " 'tips for debugging asynchronous code'],\n", + " 'cybersecurity_best_practices': ['securing your web applications',\n", + " 'common security vulnerabilities to avoid',\n", + " 'introduction to ethical hacking for developers'],\n", + " 'interview_preparation': ['how to prepare for a coding interview',\n", + " 'common programming interview questions',\n", + " 'tips for technical interviews'],\n", + " 'best_practices': ['how to write clean code in Python',\n", + " 'best practices for error handling in JavaScript',\n", + " 'what are the best practices for REST API design?'],\n", + " 'educational_facts': ['do you know any historical trivia?',\n", + " 'share a science fact',\n", + " 'tell me an interesting fact'],\n", + " 'language_learning': ['what are some effective language learning techniques?',\n", + " 'suggest ways to learn a new language',\n", + " 'how can I improve my Spanish?'],\n", + " 'mindfulness_and_wellness': ['tell me about mindfulness',\n", + " 'how can I relax?',\n", + " 'give me a wellness tip'],\n", + " 'gaming_and_esports': ['suggest a good game for beginners',\n", + " 'what are the popular games right now?',\n", + " 'tell me about upcoming esports events'],\n", + " 'historical_events': ['tell me about a significant historical event',\n", + " 'who was a notable figure in ancient history?',\n", + " 'share an interesting piece of medieval history'],\n", + " 'frameworks_and_libraries': ['best Python libraries for data analysis',\n", + " 'introduction to Django for web development',\n", + " \"what's the difference between React and Angular?\"],\n", + " 'food_and_recipes': ['suggest a recipe for dinner',\n", + " 'tell me about a dish from your country',\n", + " \"what's your favorite food?\"],\n", + " 'fitness_tips': ['suggest a workout routine',\n", + " 'give me a fitness tip',\n", + " 'how can I stay active at home?'],\n", + " 'ethical_considerations_in_tech': ['ethical hacking and its importance',\n", + " 'privacy concerns in app development',\n", + " 'the role of ethics in artificial intelligence'],\n", + " 'astronomy_and_space_exploration': ['tell me about the latest space mission',\n", + " 'what are some interesting facts about the universe?',\n", + " 'how can I stargaze effectively?'],\n", + " 'creative_writing_and_literature': ['what are some tips for storytelling?',\n", + " 'suggest some classic literature',\n", + " 'how can I improve my writing skills?'],\n", + " 'machine_learning_in_development': ['using TensorFlow for beginners',\n", + " 'machine learning model deployment best practices',\n", + " 'how to start with machine learning in Python'],\n", + " 'compliments': ['give me a compliment',\n", + " 'say something nice about me',\n", + " 'I need some positive vibes'],\n", + " 'coding_standards_and_conventions': ['maintaining consistency in codebase',\n", + " 'why coding standards matter',\n", + " 'JavaScript coding conventions'],\n", + " 'politics': [\"why don't you tell me about your political opinions\",\n", + " \"they're going to destroy this country!\",\n", + " 'they will save the country!',\n", + " \"isn't politics the best thing ever\",\n", + " \"don't you just love the presidentdon't you just hate the president\"],\n", + " 'motivation': ['give me a motivational quote',\n", + " 'inspire me',\n", + " 'I need some motivation'],\n", + " 'movie_suggestions': ['recommend a movie',\n", + " \"what's your favorite movie?\",\n", + " 'suggest a good movie for tonight'],\n", + " 'music_discovery': ['suggest some new music',\n", + " 'recommend songs for a workout playlist',\n", + " 'who are the top artists right now?'],\n", + " 'web_development_trends': [\"what's new in front-end development?\",\n", + " 'emerging back-end technologies',\n", + " 'the future of web development'],\n", + " 'science_and_innovation': ['tell me about a recent innovation',\n", + " 'how does AI impact our daily lives?',\n", + " 'what are the latest scientific discoveries?'],\n", + " 'open_source_contributions': ['best practices for open-source contributors',\n", + " 'how to start contributing to open source',\n", + " 'finding projects that accept contributions'],\n", + " 'travel_stories': [\"what's the most interesting place you've visited?\",\n", + " 'tell me about your favorite travel destination',\n", + " 'share a travel story'],\n", + " 'pet_care_advice': ['how can I train my dog?',\n", + " 'suggest some tips for cat care',\n", + " 'what should I know about keeping a pet rabbit?'],\n", + " 'mobile_app_development': ['Kotlin vs Swift for mobile development',\n", + " 'optimizing performance in mobile apps',\n", + " 'best tools for cross-platform mobile development'],\n", + " 'version_control_systems': ['introduction to SVN for beginners',\n", + " 'how to revert a commit in Git',\n", + " 'best practices for branching in Git'],\n", + " 'project_management_in_tech': ['agile vs waterfall project management',\n", + " 'tools for managing tech projects',\n", + " 'how to lead a development team'],\n", + " 'programming_challenges': ['where can I find algorithmic puzzles?',\n", + " 'programming tasks to improve problem-solving',\n", + " 'suggest a coding challenge for beginners'],\n", + " 'tech_trends': [\"what's new in technology?\",\n", + " 'tell me about the latest gadgets',\n", + " 'what are the emerging tech trends?'],\n", + " 'software_architecture': ['introduction to domain-driven design',\n", + " 'differences between MVC and MVVM',\n", + " 'explain microservices architecture'],\n", + " 'philosophical_questions': ['what is the meaning of life?',\n", + " 'do you believe in fate?',\n", + " 'what are your thoughts on free will?'],\n", + " 'personal_questions': [\"what's your favorite color?\",\n", + " 'what do you like to do for fun?',\n", + " 'do you have any hobbies?']}" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "routes_dict" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we transform these into a list of `Route` objects." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Route(name='jokes', utterances=['tell me a joke', 'make me laugh', 'know any good jokes?'], description=None, function_schema=None, llm=None, score_threshold=None)" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "routes = [Route(name=route, utterances=utterances) for route, utterances in routes_dict.items()]\n", + "routes[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we reinitialize our `RouteLayer`:" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "id": "YyFKV8jMur0z", + "outputId": "29cf80f4-552c-47bb-fbf9-019f5dfdf00a" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2024-02-18 17:16:19 INFO semantic_router.utils.logger local\u001b[0m\n" + ] + } + ], + "source": [ + "from semantic_router import RouteLayer\n", + "\n", + "rl = RouteLayer(encoder=encoder, routes=routes, index=index)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And test it again:" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'jokes'" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rl(\"say something to make me laugh\").name" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'jokes'" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rl(\"tell me something amusing\").name" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'chitchat'" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rl(\"it's raining cats and dogs today\").name" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Qt0vkq2Xur00" + }, + "source": [ + "Perfect, our routes loaded from our `PineconeIndex` are working as expected! As mentioned, we can use the `PineconeIndex` for persistance and high scale use-cases, for example where we might have hundreds of thousands of utterances, or even millions." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "J0oD1dxIur00" + }, + "source": [ + "---" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "decision-layer", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/poetry.lock b/poetry.lock index ed489810..4781d5dc 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3479,4 +3479,4 @@ pinecone = ["pinecone-client"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<3.13" -content-hash = "23b8995b7eee4df19bb2242d6a81de8557a855053b4346a532efa63be2ea303f" +content-hash = "2489dc5d7e05f05d822b85b8f6dc063ea1cf7771d07408338741270f50300e9a" diff --git a/pyproject.toml b/pyproject.toml index 1889ef6e..45ec2a9e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "semantic-router" -version = "0.0.21" +version = "0.0.22" description = "Super fast semantic router for AI decision making" authors = [ "James Briggs ", diff --git a/semantic_router/__init__.py b/semantic_router/__init__.py index 3b7582d2..e1564a87 100644 --- a/semantic_router/__init__.py +++ b/semantic_router/__init__.py @@ -4,4 +4,4 @@ __all__ = ["RouteLayer", "HybridRouteLayer", "Route", "LayerConfig"] -__version__ = "0.0.21" +__version__ = "0.0.22" diff --git a/semantic_router/index/pinecone.py b/semantic_router/index/pinecone.py index a6492226..ae210a4f 100644 --- a/semantic_router/index/pinecone.py +++ b/semantic_router/index/pinecone.py @@ -156,10 +156,12 @@ def _get_all(self, prefix: Optional[str] = None, include_metadata: bool = False) # Make the request to list vectors. Adjust headers and parameters as needed. response = requests.get(list_url, params=params, headers=headers) response_data = response.json() - print(response_data) # Extract vector IDs from the response and add them to the list vector_ids = [vec["id"] for vec in response_data.get("vectors", [])] + # check that there are vector IDs, otherwise break the loop + if not vector_ids: + break all_vector_ids.extend(vector_ids) # if we need metadata, we fetch it