From 80c0f95d7b4574047a670b967ed4102767b51d4f Mon Sep 17 00:00:00 2001 From: Gleb Otochkin Date: Mon, 28 Oct 2024 10:15:03 -0400 Subject: [PATCH] feat: movie search sample app with Pinecone and PostgreSQL backend (#261) --- infrastructure/movie-search-app/.gitignore | 14 + infrastructure/movie-search-app/Procfile | 1 + infrastructure/movie-search-app/README.md | 169 ++++++ .../movie-search-app/connect_tcp.py | 124 +++++ infrastructure/movie-search-app/data_model.py | 51 ++ .../movie-search-app/gemini_model.py | 93 ++++ .../movie-search-app/movie_search.py | 504 ++++++++++++++++++ .../movie-search-app/pinecone_model.py | 108 ++++ .../movie-search-app/requirements.txt | 8 + 9 files changed, 1072 insertions(+) create mode 100644 infrastructure/movie-search-app/.gitignore create mode 100644 infrastructure/movie-search-app/Procfile create mode 100644 infrastructure/movie-search-app/README.md create mode 100644 infrastructure/movie-search-app/connect_tcp.py create mode 100644 infrastructure/movie-search-app/data_model.py create mode 100644 infrastructure/movie-search-app/gemini_model.py create mode 100644 infrastructure/movie-search-app/movie_search.py create mode 100644 infrastructure/movie-search-app/pinecone_model.py create mode 100644 infrastructure/movie-search-app/requirements.txt diff --git a/infrastructure/movie-search-app/.gitignore b/infrastructure/movie-search-app/.gitignore new file mode 100644 index 0000000..b563789 --- /dev/null +++ b/infrastructure/movie-search-app/.gitignore @@ -0,0 +1,14 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# Environment +.env +.venv + +# Jupyter Notebook +.ipynb_checkpoints + +# Misc +.python-version \ No newline at end of file diff --git a/infrastructure/movie-search-app/Procfile b/infrastructure/movie-search-app/Procfile new file mode 100644 index 0000000..f2ee4cb --- /dev/null +++ b/infrastructure/movie-search-app/Procfile @@ -0,0 +1 @@ +web: gunicorn --bind :$PORT --workers 1 --threads 8 --timeout 0 movie_search:me \ No newline at end of file diff --git a/infrastructure/movie-search-app/README.md b/infrastructure/movie-search-app/README.md new file mode 100644 index 0000000..ea71319 --- /dev/null +++ b/infrastructure/movie-search-app/README.md @@ -0,0 +1,169 @@ + +# Sample app for genai embeddings using Pinecone or PostgreSQL compaible database +## Description +- The demo shows a sample moview seach chat assistant using either Pinecone or PostgreSQL compatible database as a backend. +- In both cases the Google AI studio is used for conversations and embedding generation. + +### Architecture +- The application can be deployed on a VM or any other environment supporting Python 3.11 +- It connects to a Pinecone environment using Pinecone API token +- It uses Google AI Studio to generate responses (using model gemini-1.5-flash) or to generate embeddings (model textetext-embedding-004) + +## Requirements +- Platform to deploy the application supporting Python 3.11 +- Token in Google AI studio (you can get it from [here](https://ai.google.dev/gemini-api/docs/api-key)) +- Token for Pinecone API (optional) +- Project in Google Cloud with enabled APIs for all components. + + +## Deployment for Pinecone Backend + +The dataset with movies and how to deploy it to the Pinecone environment is not discussed here. + +### Prepare Virtual machine +- Enable the required APIs in Google Cloud +``` +gcloud services enable compute.googleapis.com +``` +- Create a GCE VM in a Google Cloud project +- Connect to the VM ussing SSH +- Clone the software +``` +git clone https://github.com/GoogleCloudPlatform/devrel-demos.git +``` +- Prepare Python 3.11 +``` +sudo apt install -y python3.11-venv git +python3 -m venv .venv +source .venv/bin/activate +pip install --upgrade pip +``` +### Run the application +- Change directory +``` +cd devrel-demos/infrastructure/movie-search-app +``` +- Install dependencies +``` +pip install -r requirements.txt +``` +- Set environment variables (Pinecone index name) +``` +export PINECONE_INDEX_NAME=netflix-index-01 +export PORT=8080 +``` +- Start the application from command line +``` +gunicorn --bind :$PORT --workers 1 --threads 8 --timeout 0 movie_search:me +``` +- Connect to the chat using the VM host:port to get the application interface + +### Work with application +- Click at the bottom of the app to choose backend. +- Put Google AI API token and Pinecone API token at the top (you need both to use the Pinecone backend). +- Select Pinecone as a backend and confirm the choice. +- Post your question in the input window at the bottom and click the arrow. + +Ask sample questions about the movies + +### You can deploy your application to Cloud Run +Optionally you can deploy the application to Cloud Run. + +## Deployment with AlloyDB Backend +You will need AlloyDB database as a backend for the application. + +Assuming all the actions are performed in the same Google Cloud project. +### Enable all required APIs usng gcloud command +``` +gcloud services enable alloydb.googleapis.com \ + compute.googleapis.com \ + cloudresourcemanager.googleapis.com \ + servicenetworking.googleapis.com \ + vpcaccess.googleapis.com \ + aiplatform.googleapis.com \ + cloudbuild.googleapis.com \ + artifactregistry.googleapis.com \ + run.googleapis.com \ + iam.googleapis.com +``` + +### Create AlloyDB cluster +Please follow instruction in the documentation to create an AlloyDB cluster and primary instance in the same project where the application is going to be deployed. + +Here is the [link to the documentation for AlloyDB](https://cloud.google.com/alloydb/docs/quickstart/create-and-connect) + +### Create a database in AlloyDB +Create a database with the name movies and the user movies_owner. You can choose your own names for the database and the user. The application takes it from environment variables. Optionally you can modify the application to use secret manager in Google Cloud as more secured approach. + +### Migrate data from Pinecone to AlloyDB +- Move the data from Pinecone to AlloyDB + +### Enable virtual environment for Python +You can use either your laptop or a virtual machnie for deployment. Using a VM deployed in the same Google Cloud project simplifies deployeent and network configuration. On a Debian Linux you can enable it in the shell using the following command: +``` +sudo apt-get update +sudo apt install python3.11-venv git postgresql-client +python3 -m venv venv +source venv/bin/activate +``` + +### Clone the software +Clone the software using git: +``` +git clone https://github.com/gotochkin/devrel-demos.git +``` +### Run the application +- Change directory +``` +cd devrel-demos/infrastructure/movie-search-app +``` +- Install dependencies +``` +pip install -r requirements.txt +``` +- Set environment variables (Pinecone index name) +``` +export PINECONE_INDEX_NAME=netflix-index-01 +export PORT=8080 +export DB_USER=movies_owner +export DB_PASS=DatabasePassword +export DB_NAME=movies +export INSTANCE_HOST=ALLOYDB_IP +export DB_PORT=5432 +``` +- Start the application from command line +``` +gunicorn --bind :$PORT --workers 1 --threads 8 --timeout 0 movie_search:me +``` +- Connect to the chat using the VM host:port to get the application interface + +### Deploy the applicaion to Cloud Run +Create a service account cymbal-store-identity and grant role VertexAI User to the account - optional now since we are not using Vertex AI as of now. +Build and deploy application to the Cloud Run service. + +``` +gcloud alpha run deploy movie-search-app \ + --source=./ \ + --no-allow-unauthenticated \ + --service-account movie-search-identity \ + --region us-central1 \ + --network=default \ + --set-env-vars=DB_USER=cymbaldb_owner,DB_PASS=StrongPassword,DB_NAME=cymbaldb,INSTANCE_HOST=127.0.0.1,DB_PORT=5432 \ + --quiet +``` +### Work with application +- Click at the bottom of the app to choose backend. +- Put Google AI API token and Pinecone API token at the top (you need both to use the Pinecone backend). +- Select Pinecone as a backend and confirm the choice. +- Post your question in the input window at the bottom and click the arrow. + +Ask sample questions about the movies + +# TO DO +- Add support for other models and providers + +# License +Apache License Version 2.0; +Copyright 2024 Google LLC + + diff --git a/infrastructure/movie-search-app/connect_tcp.py b/infrastructure/movie-search-app/connect_tcp.py new file mode 100644 index 0000000..a8bd454 --- /dev/null +++ b/infrastructure/movie-search-app/connect_tcp.py @@ -0,0 +1,124 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# TODO (https://github.com/GoogleCloudPlatform/python-docs-samples/issues/8253): remove old region tags +# [START cloud_sql_postgres_sqlalchemy_connect_tcp] +# [START cloud_sql_postgres_sqlalchemy_sslcerts] +# [START cloud_sql_postgres_sqlalchemy_connect_tcp_sslcerts] +import os +import ssl +import logging + +import sqlalchemy + + +def connect_tcp_socket() -> sqlalchemy.engine.base.Engine: + """Initializes a TCP connection pool for a PostgreSQL instance of Postgres.""" + # Note: Saving credentials in environment variables is convenient, but not + # secure - consider a more secure solution such as + # Cloud Secret Manager (https://cloud.google.com/secret-manager) to help + # keep secrets safe. + if os.environ.get("INSTANCE_HOST"): + db_host = os.environ[ + "INSTANCE_HOST" + ] # e.g. '127.0.0.1' ('172.17.0.1' if deployed to GAE Flex) + else: + db_host = "127.0.0.1" + logging.warning(("INSTANCE_HOST is not set using default: %s", db_host)) + if os.environ.get("DB_PORT"): + db_port = os.environ["DB_PORT"] # e.g. '5432' + else: + db_port = "5432" + logging.warning(("DB_PORT is not set using default: %s", db_port)) + if os.environ.get("DB_USER"): + db_user = os.environ["DB_USER"] # e.g. 'my-db-user' + else: + db_user = "movies_owner" + logging.warning(("DB_USER is not set using default: %s", db_user)) + if os.environ.get("DB_PASS"): + db_pass = os.environ["DB_PASS"] # e.g. 'my-db-password' + else: + db_pass = "password" + logging.warning(("DB_PASS is not set using default: %s", db_pass)) + if os.environ.get("DB_NAME"): + db_name = os.environ["DB_NAME"] # e.g. 'my-database' + else: + db_name = "movies" + logging.warning(("DB_NAME is not set using default: %s", db_name)) + + + # [END cloud_sql_postgres_sqlalchemy_connect_tcp] + connect_args = {} + # For deployments that connect directly to a PostgreSQL instance without + # using the PostgreSQL Proxy, configuring SSL certificates will ensure the + # connection is encrypted. + if os.environ.get("DB_ROOT_CERT"): + db_root_cert = os.environ["DB_ROOT_CERT"] # e.g. '/path/to/my/server-ca.pem' + db_cert = os.environ["DB_CERT"] # e.g. '/path/to/my/client-cert.pem' + db_key = os.environ["DB_KEY"] # e.g. '/path/to/my/client-key.pem' + + ssl_context = ssl.SSLContext() + ssl_context.verify_mode = ssl.CERT_REQUIRED + ssl_context.load_verify_locations(db_root_cert) + ssl_context.load_cert_chain(db_cert, db_key) + connect_args["ssl_context"] = ssl_context + + # [START cloud_sql_postgres_sqlalchemy_connect_tcp] + pool = sqlalchemy.create_engine( + # Equivalent URL: + # postgresql+pg8000://:@:/ + sqlalchemy.engine.url.URL.create( + drivername="postgresql+pg8000", + username=db_user, + password=db_pass, + host=db_host, + port=db_port, + database=db_name, + ), + # [END cloud_sql_postgres_sqlalchemy_connect_tcp] + connect_args=connect_args, + # [START cloud_sql_postgres_sqlalchemy_connect_tcp] + # [START_EXCLUDE] + # [START cloud_sql_postgres_sqlalchemy_limit] + # Pool size is the maximum number of permanent connections to keep. + pool_size=5, + # Temporarily exceeds the set pool_size if no connections are available. + max_overflow=2, + # The total number of concurrent connections for your application will be + # a total of pool_size and max_overflow. + # [END cloud_sql_postgres_sqlalchemy_limit] + # [START cloud_sql_postgres_sqlalchemy_backoff] + # SQLAlchemy automatically uses delays between failed connection attempts, + # but provides no arguments for configuration. + # [END cloud_sql_postgres_sqlalchemy_backoff] + # [START cloud_sql_postgres_sqlalchemy_timeout] + # 'pool_timeout' is the maximum number of seconds to wait when retrieving a + # new connection from the pool. After the specified amount of time, an + # exception will be thrown. + pool_timeout=30, # 30 seconds + # [END cloud_sql_postgres_sqlalchemy_timeout] + # [START cloud_sql_postgres_sqlalchemy_lifetime] + # 'pool_recycle' is the maximum number of seconds a connection can persist. + # Connections that live longer than the specified amount of time will be + # re-established + pool_recycle=1800, # 30 minutes + # [END cloud_sql_postgres_sqlalchemy_lifetime] + # [END_EXCLUDE] + ) + return pool + + +# [END cloud_sql_postgres_sqlalchemy_connect_tcp_sslcerts] +# [END cloud_sql_postgres_sqlalchemy_sslcerts] +# [END cloud_sql_postgres_sqlalchemy_connect_tcp] \ No newline at end of file diff --git a/infrastructure/movie-search-app/data_model.py b/infrastructure/movie-search-app/data_model.py new file mode 100644 index 0000000..79b140e --- /dev/null +++ b/infrastructure/movie-search-app/data_model.py @@ -0,0 +1,51 @@ +# Copyright 2024 Google LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import mesop as me +from dataclasses import dataclass, field +from enum import Enum +from typing import Literal + +Role = Literal["user", "model"] + +# Data Model +@dataclass(kw_only=True) +class ChatMessage: + role: Role = "user" + content: str = "" + in_progress: bool = False + +class Models(Enum): + GEMINI_1_5_FLASH = "PostgreSQL" + PINECONE = "Pinecone" + +@dataclass +class Conversation: + model: str = "" + messages: list[ChatMessage] = field(default_factory=list) + +@me.stateclass +class State: + is_model_picker_dialog_open: bool = False + input: str = "" + conversations: list[Conversation] = field(default_factory=list) + models: list[str] = field(default_factory=list) + gemini_api_key: str = "" + pinecone_api_key: str = "" + location: str = "" + in_progress: bool = False + +@me.stateclass +class ModelDialogState: + selected_models: list[str] = field(default_factory=list) \ No newline at end of file diff --git a/infrastructure/movie-search-app/gemini_model.py b/infrastructure/movie-search-app/gemini_model.py new file mode 100644 index 0000000..080e5ab --- /dev/null +++ b/infrastructure/movie-search-app/gemini_model.py @@ -0,0 +1,93 @@ +# Copyright 2024 Google LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import google.generativeai as genai +from typing import Iterable +import logging +import json +from data_model import ChatMessage, State +import mesop as me + +generation_config = { + "temperature": 1, + "top_p": 0.95, + "top_k": 64, + "max_output_tokens": 8192, +} + +def configure_gemini(): + state = me.state(State) + genai.configure(api_key=state.gemini_api_key) + +def classify_intent(input: str) -> str: + configure_gemini() + model = genai.GenerativeModel( + model_name="gemini-1.5-flash-latest", + generation_config=generation_config, + system_instruction=[intent_prompt], + ) + json_resp = model.generate_content(input) + logging.info(f"INTENT: {json_resp}") + return json_resp.text + +def generate_embedding(input: str) -> list[float]: + result = genai.embed_content( + model="models/text-embedding-004", + content=input, + task_type="retrieval_document", + title="Embedding of single string") + return result + + +def send_prompt_flash(input: str, history: list[ChatMessage],sys_instruction: list[str]) -> Iterable[str]: + configure_gemini() + model = genai.GenerativeModel( + model_name="gemini-1.5-flash-latest", + generation_config=generation_config, + system_instruction=sys_instruction, + ) + chat_session = model.start_chat( + history=[ + {"role": message.role, "parts": [message.content]} for message in history + ] + ) + for chunk in chat_session.send_message(input, stream=True): + yield chunk.text + +intent_prompt = """ +Answer the following questions as a Json string based solely on provided chat history. Do not assume anything that the user did not explicitly say. + + isOnTopic: true or false, indicating whether the most recent query is on topic. + shouldRecommendMovie: true of false, indicating whether the user has asked for a movie or show recommendation and has given enough information to make a recommendation. If it is a follow up question related to a product or to a previous recommendation then it is true. + shouldRecommendMovieReasoning: A string explaning what information to obtain to make a movie or show recommendation. + summary: If isOnTopic is true, output a summary of what the user is looking for. +Examples + + History: [{'role': 'user', 'content': "Hi"}] + Answer: { + "isOnTopic": true, + "shouldRecommendMovie": false, + "shouldRecommendMovieReasoning": "User has not mention what they are looking for.", + "summary": "" + } + + History: [{'role': 'user', 'content': "Hi, I am looking for a movie about a spy changing faces."}] + Answer: { + "isOnTopic": true, + "shouldRecommendMovie": true, + "shouldRecommendMovieReasoning": "User is looking for a movie recommendation.", + "summary": "A movie about a spy changing faces." + } + Do not use markdown for the output, respond with only JSON + """ \ No newline at end of file diff --git a/infrastructure/movie-search-app/movie_search.py b/infrastructure/movie-search-app/movie_search.py new file mode 100644 index 0000000..ec05c18 --- /dev/null +++ b/infrastructure/movie-search-app/movie_search.py @@ -0,0 +1,504 @@ +# Copyright 2024 Google LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import mesop as me +from data_model import State, Models, ModelDialogState, Conversation, ChatMessage +import gemini_model +import pinecone_model +import os +import logging +import json +import sqlalchemy +from connect_tcp import connect_tcp_socket +from dataclasses import dataclass, field +from typing import Literal + +Role = Literal["user", "model"] + +# Dialog +@me.content_component +def dialog(is_open: bool): + with me.box( + style=me.Style( + background="rgba(0,0,0,0.4)", + display="block" if is_open else "none", + height="100%", + overflow_x="auto", + overflow_y="auto", + position="fixed", + width="100%", + z_index=1000, + ) + ): + with me.box( + style=me.Style( + align_items="center", + display="grid", + height="100vh", + justify_items="center", + ) + ): + with me.box( + style=me.Style( + background="#fff", + border_radius=20, + box_sizing="content-box", + box_shadow=( + "0 3px 1px -2px #0003, 0 2px 2px #00000024, 0 1px 5px #0000001f" + ), + margin=me.Margin.symmetric(vertical="0", horizontal="auto"), + padding=me.Padding.all(20), + ) + ): + me.slot() + +@me.content_component +def dialog_actions(): + with me.box( + style=me.Style( + display="flex", justify_content="end", margin=me.Margin(top=20) + ) + ): + me.slot() + +# App +Role = Literal["user", "model"] +_ROLE_USER = "user" +_ROLE_ASSISTANT = "model" + +_COLOR_BACKGROUND = me.theme_var("background") +_COLOR_CHAT_BUBBLE_YOU = me.theme_var("surface-container-low") +_COLOR_CHAT_BUBBLE_BOT = me.theme_var("secondary-container") + +_DEFAULT_PADDING = me.Padding.all(20) +_DEFAULT_BORDER_SIDE = me.BorderSide( + width="1px", style="solid", color=me.theme_var("secondary-fixed") +) + +_STYLE_APP_CONTAINER = me.Style( + background=_COLOR_BACKGROUND, + display="flex", + flex_direction="column", + height="100%", + margin=me.Margin.symmetric(vertical=0, horizontal="auto"), + width="min(1024px, 100%)", + box_shadow=("0 3px 1px -2px #0003, 0 2px 2px #00000024, 0 1px 5px #0000001f"), + padding=me.Padding(top=20, left=20, right=20), +) +_STYLE_TITLE = me.Style(padding=me.Padding(left=10)) + +_STYLE_CHAT_BOX = me.Style( + flex_grow=1, + overflow_y="scroll", + padding=_DEFAULT_PADDING, + margin=me.Margin(bottom=20), + border_radius="10px", + border=me.Border( + left=_DEFAULT_BORDER_SIDE, + right=_DEFAULT_BORDER_SIDE, + top=_DEFAULT_BORDER_SIDE, + bottom=_DEFAULT_BORDER_SIDE, + ), +) +_STYLE_CHAT_INPUT = me.Style(width="100%") +_STYLE_CHAT_INPUT_BOX = me.Style( + padding=me.Padding(top=30), display="flex", flex_direction="row" +) +_STYLE_CHAT_BUTTON = me.Style(margin=me.Margin(top=8, left=8)) +_STYLE_CHAT_BUBBLE_NAME = me.Style( + font_weight="bold", + font_size="13px", + padding=me.Padding(left=15, right=15, bottom=5), +) +_STYLE_CHAT_BUBBLE_PLAINTEXT = me.Style(margin=me.Margin.symmetric(vertical=15)) + +_LABEL_BUTTON = "send" +_LABEL_BUTTON_IN_PROGRESS = "pending" +_LABEL_INPUT = "Enter your prompt" + +def _make_style_chat_bubble_wrapper(role: Role) -> me.Style: + """Generates styles for chat bubble position. + + Args: + role: Chat bubble alignment depends on the role + """ + align_items = "end" if role == _ROLE_USER else "start" + return me.Style( + display="flex", + flex_direction="column", + align_items=align_items, + ) + +def _make_chat_bubble_style(role: Role) -> me.Style: + """Generates styles for chat bubble. + + Args: + role: Chat bubble background color depends on the role + """ + background = ( + _COLOR_CHAT_BUBBLE_YOU if role == _ROLE_USER else _COLOR_CHAT_BUBBLE_BOT + ) + return me.Style( + width="80%", + font_size="16px", + line_height="1.5", + background=background, + border_radius="15px", + padding=me.Padding(right=15, left=15, bottom=3), + margin=me.Margin(bottom=10), + border=me.Border( + left=_DEFAULT_BORDER_SIDE, + right=_DEFAULT_BORDER_SIDE, + top=_DEFAULT_BORDER_SIDE, + bottom=_DEFAULT_BORDER_SIDE, + ), + ) + +db = None +logger = logging.getLogger() + +def init_connection_pool() -> sqlalchemy.engine.base.Engine: + """Sets up connection pool for the app.""" + if os.environ.get("INSTANCE_HOST"): + db_host = os.environ[ + 'INSTANCE_HOST' + ] # e.g. '127.0.0.1' ('172.17.0.1' if deployed to GAE Flex) + else: + db_host = "127.0.0.1" + logging.warning("INSTANCE_HOST is not set using default: %s", db_host) + print("INSTANCE_HOST is not set using default: %s", db_host) + + # use a TCP socket when INSTANCE_HOST (e.g. 127.0.0.1) is defined + if db_host: + return connect_tcp_socket() + + # # use the connector when INSTANCE_CONNECTION_NAME (e.g. project:region:instance) is defined + # if os.environ.get("INSTANCE_CONNECTION_NAME"): + # # Either a DB_USER or a DB_IAM_USER should be defined. If both are + # # defined, DB_IAM_USER takes precedence. + # return ( + # connect_with_connector_auto_iam_authn() + # if os.environ.get("DB_IAM_USER") + # else connect_with_connector() + # ) + + # raise ValueError( + # "Missing database connection type. Please define one of INSTANCE_HOST, INSTANCE_UNIX_SOCKET, or INSTANCE_CONNECTION_NAME" + # ) +def init_db() -> sqlalchemy.engine.base.Engine: + """Initiates connection to database and its structure.""" + global db + if db is None: + db = init_connection_pool() + + +def get_movies(db: sqlalchemy.engine.base.Engine, embeddings: str) -> dict: + movies=[] + + stmt = sqlalchemy.text( + """ + SELECT + mj.metadata->'title' as title, + mj.metadata->'summary' as summary, + mj.metadata->'director' as director, + mj.metadata->'actors' as actors, + (mj.embedding <=> (:embeddings)::vector) as distance + FROM + movies_json mj + ORDER BY + distance ASC + LIMIT 5; + """ + ) + try: + with db.connect() as conn: + app_movies = conn.execute(stmt, parameters={"embeddings": embeddings}).fetchall() + except Exception as e: + logger.exception(e) + for row in app_movies: + movies.append({"title":row[0],"summary":row[1],"director":row[2],"actors": row[3]}) + return movies + +def change_model_option(e: me.CheckboxChangeEvent): + s = me.state(ModelDialogState) + if e.checked: + s.selected_models.append(e.key) + else: + s.selected_models.remove(e.key) + +def set_gemini_api_key(e: me.InputBlurEvent): + me.state(State).gemini_api_key = e.value + +def set_pinecone_api_key(e: me.InputBlurEvent): + me.state(State).pinecone_api_key = e.value + +def model_picker_dialog(): + state = me.state(State) + with dialog(state.is_model_picker_dialog_open): + with me.box(style=me.Style(display="flex", flex_direction="column", gap=12)): + me.text("API keys") + me.input( + label="Gemini API Key", + value=state.gemini_api_key, + on_blur=set_gemini_api_key, + ) + me.input( + label="Pinecone API Key", + value=state.pinecone_api_key, + on_blur=set_pinecone_api_key, + ) + me.text("Pick a backend") + for model in Models: + if model.name.startswith("GEMINI"): + disabled = not state.gemini_api_key + elif model.name.startswith("PINECONE"): + disabled = not state.pinecone_api_key or not state.gemini_api_key + else: + disabled = False + me.checkbox( + key=model.value, + label=model.value, + checked=model.value in state.models, + disabled=disabled, + on_change=change_model_option, + style=me.Style( + display="flex", + flex_direction="column", + gap=4, + padding=me.Padding(top=12), + ), + ) + with dialog_actions(): + me.button("Cancel", on_click=close_model_picker_dialog) + me.button("Confirm", on_click=confirm_model_picker_dialog) + +def close_model_picker_dialog(e: me.ClickEvent): + state = me.state(State) + state.is_model_picker_dialog_open = False + +def confirm_model_picker_dialog(e: me.ClickEvent): + dialog_state = me.state(ModelDialogState) + state = me.state(State) + state.is_model_picker_dialog_open = False + state.models = dialog_state.selected_models + + + +ROOT_BOX_STYLE = me.Style( + background="#e7f2ff", + height="100%", + font_family="Inter", + display="flex", + flex_direction="column", +) + + + +@me.page( + path="/", + stylesheets=[ + "https://fonts.googleapis.com/css2?family=Inter:wght@100..900&display=swap" + ], + title = "Movie Search Assistant" +) + + +def page(): + bot_user = "model" + print("starting") + global db + # initialize db within request context + if not db: + # initiate a connection pool to a Postgres database + db = init_connection_pool() + model_picker_dialog() + def toggle_theme(e: me.ClickEvent): + if me.theme_brightness() == "light": + me.set_theme_mode("dark") + else: + me.set_theme_mode("light") + + + def on_input_enter(e: me.InputEnterEvent): + state = me.state(State) + state.input = e.value + print(state.input) + yield from send_prompt(e) + + + + with me.box(style=_STYLE_APP_CONTAINER): + with me.content_button( + type="icon", + style=me.Style(position="absolute", right=4, top=8), + on_click=toggle_theme, + ): + me.icon("light_mode" if me.theme_brightness() == "dark" else "dark_mode") + + title = "Movie Search Virtual Assistant" + + if title: + me.text(title, type="headline-5", style=_STYLE_TITLE) + + with me.box(style=_STYLE_CHAT_BOX): + state = me.state(State) + for conversation in state.conversations: + for message in conversation.messages: + with me.box(style=_make_style_chat_bubble_wrapper(message.role)): + if message.role == _ROLE_ASSISTANT: + me.text(bot_user, style=_STYLE_CHAT_BUBBLE_NAME) + with me.box(style=_make_chat_bubble_style(message.role)): + if message.role == _ROLE_USER: + me.text(message.content, style=_STYLE_CHAT_BUBBLE_PLAINTEXT) + else: + me.markdown(message.content) + + + with me.box(style=_STYLE_CHAT_INPUT_BOX): + with me.box(style=me.Style(flex_grow=1)): + me.input( + label=_LABEL_INPUT, + # Workaround: update key to clear input. + key=f"input-{len(state.conversations)}", + on_blur=on_blur, + on_enter=on_input_enter, + style=_STYLE_CHAT_INPUT, + ) + with me.box( + style=me.Style( + display="flex", + padding=me.Padding(left=12, bottom=12), + cursor="pointer", + ), + on_click=switch_model, + ): + me.text( + "Backend:", + style=me.Style(font_weight=500, padding=me.Padding(right=6)), + ) + if state.models: + me.text(", ".join(state.models)) + else: + me.text("(no backend selected)") + with me.content_button( + color="primary", + type="flat", + disabled=state.in_progress, + on_click=send_prompt, + style=_STYLE_CHAT_BUTTON, + ): + me.icon( + _LABEL_BUTTON_IN_PROGRESS if state.in_progress else _LABEL_BUTTON + ) + + +def switch_model(e: me.ClickEvent): + state = me.state(State) + state.is_model_picker_dialog_open = True + dialog_state = me.state(ModelDialogState) + dialog_state.selected_models = state.models[:] + + +def on_blur(e: me.InputBlurEvent): + state = me.state(State) + state.input = e.value + + +def send_prompt(e: me.ClickEvent): + state = me.state(State) + if not state.conversations: + for model in state.models: + state.conversations.append(Conversation(model=model, messages=[])) + input = state.input + state.input = "" + yield + + for conversation in state.conversations: + model = conversation.model + messages = conversation.messages + history = messages[:] + messages.append(ChatMessage(role="user", content=input)) + messages.append(ChatMessage(role="model", in_progress=True)) + yield + + if model == Models.GEMINI_1_5_FLASH.value: + while True: + intent_str = gemini_model.classify_intent(input) + print(intent_str) + logging.info(f"MOVIES LIST: {intent_str}") + try: + json_intent = json.loads(intent_str) + except json.JSONDecodeError as e: + print(f"Error decoding JSON: {e}") + continue + break + + if json_intent["shouldRecommendMovie"] is True: + search_embedding = gemini_model.generate_embedding(json_intent["summary"]) + movies_list = get_movies(db, str(search_embedding["embedding"])) + logging.info(f"MOVIES LIST: {movies_list}") + print(movies_list) + persona="You are friendly assistance helping to find a movie or show based on the client's request" + safeguards="You should give information about the movie or show, year, main actors and any supplemental information. Do not invent any new movies, names and use for the answer the list of shows defined in the context" + context=""" + Based on the client request we have loaded a list of shows closely related to search. + The list in JSON format with list of values like {"title":"Sparring","summary":"some description","director":"somebody","genre": "Drama", "actors": "Mathieu Kassovitz, Souleymane M'Baye"} + Here is the list of shows:\n + """+str(movies_list) + system_instruction=[persona,safeguards,context] + else: + persona="You are friendly assistance helping to find a movie or show based on the client's request" + safeguards="You should give information about the movie or show, year, main actors and any supplemental information. Do not invent any new movies, names and use for the answer the list of shows defined in the context" + system_instruction=[persona,safeguards] + llm_response = gemini_model.send_prompt_flash(input, history,system_instruction) + + elif model == Models.PINECONE.value: + while True: + intent_str = pinecone_model.classify_intent(input) + print(intent_str) + logging.info(f"INTENT: {intent_str}") + try: + json_intent = json.loads(intent_str) + except json.JSONDecodeError as e: + print(f"Error decoding JSON: {e}") + continue + break + if json_intent["shouldRecommendMovie"] is True: + search_embedding = pinecone_model.generate_embedding(json_intent["summary"]) + movies_list = pinecone_model.get_movies(search_embedding["embedding"]) + logging.info(f"MOVIES LIST: {movies_list}") + print(movies_list) + persona="You are friendly assistance helping to find a movie or show based on the client's request" + safeguards="You should give information about the movie or show, year, main actors and any supplemental information. Do not invent any new movies, names and use for the answer the list of shows defined in the context" + context=""" + Based on the client request we have loaded a list of shows closely related to search. + The list in JSON format with list of values like {"title":"Sparring","summary":"some description","director":"somebody","genre": "Drama", "actors": "Mathieu Kassovitz, Souleymane M'Baye"} + Here is the list of shows:\n + """+str(movies_list) + system_instruction=[persona,safeguards,context] + else: + persona="You are friendly assistance helping to find a movie or show based on the client's request" + safeguards="You should give information about the movie or show, year, main actors and any supplemental information. Do not invent any new movies, names and use for the answer the list of shows defined in the context" + system_instruction=[persona,safeguards] + llm_response = pinecone_model.send_prompt_flash(input, history,system_instruction) + # llm_response = pinecone.call_pinecone(input, history) + else: + raise Exception("Unhandled model", model) + + for chunk in llm_response: + messages[-1].content += chunk + yield + messages[-1].in_progress = False + yield + diff --git a/infrastructure/movie-search-app/pinecone_model.py b/infrastructure/movie-search-app/pinecone_model.py new file mode 100644 index 0000000..d28d7d1 --- /dev/null +++ b/infrastructure/movie-search-app/pinecone_model.py @@ -0,0 +1,108 @@ +# Copyright 2024 Google LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import google.generativeai as genai +from typing import Iterable +from pinecone import Pinecone # as Pinecone +import logging +import os +from data_model import ChatMessage, State +import mesop as me + +generation_config = { + "temperature": 1, + "top_p": 0.95, + "top_k": 64, + "max_output_tokens": 8192, +} + +def configure_gemini(): + state = me.state(State) + genai.configure(api_key=state.gemini_api_key) + +def classify_intent(input: str) -> str: + configure_gemini() + model = genai.GenerativeModel( + model_name="gemini-1.5-flash-latest", + generation_config=generation_config, + system_instruction=[intent_prompt], + ) + json_resp = model.generate_content(input) + logging.info(f"INTENT: {json_resp}") + return json_resp.text + +def generate_embedding(input: str) -> list[float]: + result = genai.embed_content( + model="models/text-embedding-004", + content=input, + task_type="retrieval_document", + title="Embedding of single string") + return result + +def get_movies(embedding: list[float]) -> dict: + state = me.state(State) + PINECONE_INDEX_NAME = os.environ.get("PINECONE_INDEX_NAME") + if PINECONE_INDEX_NAME is None: + PINECONE_INDEX_NAME = "netflix-index-01" + logging.warning("PINECONE_INDEX_NAME not set, using default: %s", PINECONE_INDEX_NAME) + pc = Pinecone(api_key=state.pinecone_api_key) + index = pc.Index(name=PINECONE_INDEX_NAME) + query_resp = index.query(vector=embedding, namespace="sandpaper", top_k=5) + movies_list = [] + for match in query_resp.matches: + meta = index.fetch(ids=[match['id']], namespace="sandpaper")["vectors"][match['id']]["metadata"] + movies_list.append({"title":meta["title"],"summary":meta["summary"],"director":meta["director"],"genre": meta["genre"],"actors": meta["actors"]}) + return movies_list + +def send_prompt_flash(input: str, history: list[ChatMessage],sys_instruction: list[str]) -> Iterable[str]: + configure_gemini() + model = genai.GenerativeModel( + model_name="gemini-1.5-flash-latest", + generation_config=generation_config, + system_instruction=sys_instruction, + ) + chat_session = model.start_chat( + history=[ + {"role": message.role, "parts": [message.content]} for message in history + ] + ) + for chunk in chat_session.send_message(input, stream=True): + yield chunk.text + +intent_prompt = """ +Answer the following questions as a Json string based solely on provided chat history. Do not assume anything that the user did not explicitly say. + + isOnTopic: true or false, indicating whether the most recent query is on topic. + shouldRecommendMovie: true of false, indicating whether the user has asked for a movie or show recommendation and has given enough information to make a recommendation. If it is a follow up question related to a product or to a previous recommendation then it is true. + shouldRecommendMovieReasoning: A string explaning what information to obtain to make a movie or show recommendation. + summary: If isOnTopic is true, output a summary of what the user is looking for. +Examples + + History: [{'role': 'user', 'content': "Hi"}] + Answer: { + "isOnTopic": true, + "shouldRecommendMovie": false, + "shouldRecommendMovieReasoning": "User has not mention what they are looking for.", + "summary": "" + } + + History: [{'role': 'user', 'content': "Hi, I am looking for a movie about a spy changing faces."}] + Answer: { + "isOnTopic": true, + "shouldRecommendMovie": true, + "shouldRecommendMovieReasoning": "User is looking for a movie recommendation.", + "summary": "A movie about a spy changing faces." + } + Do not use markdown for the output, respond with only JSON + """ \ No newline at end of file diff --git a/infrastructure/movie-search-app/requirements.txt b/infrastructure/movie-search-app/requirements.txt new file mode 100644 index 0000000..64fea70 --- /dev/null +++ b/infrastructure/movie-search-app/requirements.txt @@ -0,0 +1,8 @@ +mesop +gunicorn +anthropic +google-generativeai +pg8000 +SQLAlchemy +pinecone[grpc] +pandas \ No newline at end of file