diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 05e6c60cf7..61c41f5e48 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -71,3 +71,4 @@
/generative-ai/open-models/serving @polong-lin @GoogleCloudPlatform/generative-ai-devrel
/generative-ai/open-models/serving/cloud_run_ollama_gemma2_rag_qa.ipynb @eliasecchig @GoogleCloudPlatform/generative-ai-devrel
/generative-ai/open-models/serving/vertex_ai_text_generation_inference_gemma.ipynb @alvarobartt @philschmid @pagezyhf @jeffboudier
+/generative-ai/gemini/use-cases/applying-llms-to-data/semantic-search-in-bigquery/stackoverflow_questions_semantic_search.ipynb @sethijaideep @GoogleCloudPlatform/generative-ai-devrel
diff --git a/.github/actions/spelling/allow.txt b/.github/actions/spelling/allow.txt
index 35e42078c4..199219b230 100644
--- a/.github/actions/spelling/allow.txt
+++ b/.github/actions/spelling/allow.txt
@@ -426,6 +426,7 @@ ipynb
isa
itables
iterrows
+ivf
jegadesh
jetbrains
jsonify
diff --git a/gemini/prompts/prompt_optimizer/vapo_lib.py b/gemini/prompts/prompt_optimizer/vapo_lib.py
index 5d0b2aa53c..2365f801cc 100644
--- a/gemini/prompts/prompt_optimizer/vapo_lib.py
+++ b/gemini/prompts/prompt_optimizer/vapo_lib.py
@@ -13,6 +13,7 @@
# limitations under the License.
"""Utility functions and classes for the VAPO notebook."""
+import csv
import io
import json
import re
@@ -61,6 +62,78 @@ def is_run_target_required(eval_metric_types: list[str], source_model: str) -> b
_TARGET_KEY = "target"
+def load_file_from_gcs(dataset: str) -> str:
+ """Loads the file from GCS and returns it as a string."""
+ if dataset.startswith("gs://"):
+ with gfile.GFile(dataset, "r") as f:
+ return f.read()
+ else:
+ raise ValueError(
+ "Unsupported file location. Only GCS paths starting with 'gs://' are"
+ " supported."
+ )
+
+
+def parse_jsonl(data_str: str) -> list[dict[str, str]]:
+ """Parses the content of a JSONL file and returns a list of dictionaries."""
+ data = []
+ lines = data_str.splitlines()
+ for line in lines:
+ if line:
+ try:
+ data.append(json.loads(line))
+ except json.JSONDecodeError as e:
+ raise ValueError(
+ f"Error decoding JSON on line: {line}. Error: {e}"
+ ) from e
+ return data
+
+
+def parse_and_validate_csv(data_str: str) -> list[dict[str, str]]:
+ """Parses and validates the content of a CSV file and returns a list of dictionaries."""
+ data = []
+ csv_reader = csv.reader(io.StringIO(data_str))
+
+ # Extract and validate headers
+ try:
+ headers = next(csv_reader)
+ if not headers:
+ raise ValueError("The CSV file has an empty or invalid header row.")
+ except StopIteration as e:
+ raise ValueError("The CSV file is empty.") from e
+
+ # Validate and process rows
+ for row_number, row in enumerate(csv_reader, start=2):
+ if len(row) != len(headers):
+ raise ValueError(
+ f"Row {row_number} has an inconsistent number of fields. "
+ f"Expected {len(headers)} fields but found {len(row)}."
+ )
+ # Create dictionary for each row using headers as keys
+ item = dict(zip(headers, row))
+ data.append(item)
+
+ return data
+
+
+def load_dataset(dataset: str) -> list[dict[str, str]]:
+ """Loads and parses the dataset based on its file type ('.jsonl' or '.csv')."""
+ # Load the file from GCS
+ data_str = load_file_from_gcs(dataset)
+
+ # Parse based on file type
+ if dataset.endswith(".jsonl"):
+ return parse_jsonl(data_str)
+
+ if dataset.endswith(".csv"):
+ return parse_and_validate_csv(data_str)
+
+ raise ValueError(
+ "Unsupported file type. Please provide a file with '.jsonl' or '.csv'"
+ " extension."
+ )
+
+
def validate_prompt_and_data(
template: str,
dataset_path: str,
@@ -68,10 +141,10 @@ def validate_prompt_and_data(
label_enforced: bool,
) -> None:
"""Validates the prompt template and the dataset."""
- placeholder_to_content = json.loads(placeholder_to_content)
- with gfile.GFile(dataset_path, "r") as f:
- data = [json.loads(line) for line in f.readlines()]
-
+ data = load_dataset(dataset_path)
+ placeholder_to_content_json = json.loads(placeholder_to_content)
+ template = re.sub(r"(? pd.DataFrame:
return pd.DataFrame()
with gfile.GFile(filename, "r") as f:
- data = json.load(f)
-
+ try:
+ data = json.load(f)
+ except json.JSONDecodeError:
+ return pd.DataFrame()
return pd.json_normalize(data)
@@ -227,6 +302,15 @@ class ProgressForm:
def __init__(self, params: dict[str, str]) -> None:
"""Initialize the progress form."""
+ self.instruction_progress_bar = None
+ self.instruction_display = None
+ self.instruction_best = None
+ self.instruction_score = None
+ self.demo_progress_bar = None
+ self.demo_display = None
+ self.demo_best = None
+ self.demo_score = None
+
self.job_state_display = display(
HTML("Job State: Not Started!"), display_id=True
)
@@ -262,7 +346,7 @@ def __init__(self, params: dict[str, str]) -> None:
# pylint: disable=too-many-arguments
def update_progress(
self,
- progress_bar: widgets.IntProgress,
+ progress_bar: widgets.IntProgress | None,
templates_file: str,
df: pd.DataFrame | None,
df_display: DisplayHandle,
diff --git a/gemini/prompts/prompt_optimizer/vertex_ai_prompt_optimizer_ui.ipynb b/gemini/prompts/prompt_optimizer/vertex_ai_prompt_optimizer_ui.ipynb
index 224565f968..940cd61a1c 100644
--- a/gemini/prompts/prompt_optimizer/vertex_ai_prompt_optimizer_ui.ipynb
+++ b/gemini/prompts/prompt_optimizer/vertex_ai_prompt_optimizer_ui.ipynb
@@ -113,9 +113,7 @@
"outputs": [],
"source": [
"SYSTEM_INSTRUCTION = \"Answer the following question. Let's think step by step.\\n\" # @param {type:\"string\"}\n",
- "PROMPT_TEMPLATE = (\n",
- " \"Question: {{question}}\\n\\nAnswer:{{target}}\" # @param {type:\"string\"}\n",
- ")"
+ "PROMPT_TEMPLATE = \"Question: {question}\\n\\nAnswer:{target}\" # @param {type:\"string\"}"
]
},
{
@@ -203,9 +201,9 @@
"# @markdown * Number of the demonstrations to include in each prompt.\n",
"\n",
"# @markdown **Model Configs**:
\n",
- "TARGET_MODEL_QPS = 3 # @param {type:\"integer\"}\n",
- "SOURCE_MODEL_QPS = 3 # @param {type:\"integer\"}\n",
- "EVAL_MODEL_QPS = 3 # @param {type:\"integer\"}\n",
+ "TARGET_MODEL_QPS = 3.0 # @param {type:\"number\"}\n",
+ "SOURCE_MODEL_QPS = 3.0 # @param {type:\"number\"}\n",
+ "EVAL_QPS = 3.0 # @param {type:\"number\"}\n",
"# @markdown * The QPS for calling the eval model, which is currently gemini-1.5-pro-001.\n",
"\n",
"# @markdown **Multi-metric Configs**:
\n",
@@ -280,15 +278,15 @@
"params = {\n",
" \"project\": PROJECT_ID,\n",
" \"num_steps\": NUM_INST_OPTIMIZATION_STEPS,\n",
- " \"prompt_template\": SYSTEM_INSTRUCTION,\n",
- " \"demo_and_query_template\": PROMPT_TEMPLATE,\n",
+ " \"system_instruction\": SYSTEM_INSTRUCTION,\n",
+ " \"prompt_template\": PROMPT_TEMPLATE,\n",
" \"target_model\": TARGET_MODEL,\n",
" \"target_model_qps\": TARGET_MODEL_QPS,\n",
" \"target_model_location\": LOCATION,\n",
" \"source_model\": SOURCE_MODEL,\n",
" \"source_model_qps\": SOURCE_MODEL_QPS,\n",
" \"source_model_location\": LOCATION,\n",
- " \"eval_model_qps\": EVAL_MODEL_QPS,\n",
+ " \"eval_qps\": EVAL_QPS,\n",
" \"eval_model_location\": LOCATION,\n",
" \"optimization_mode\": OPTIMIZATION_MODE,\n",
" \"num_demo_set_candidates\": NUM_DEMO_OPTIMIZATION_STEPS,\n",
@@ -346,7 +344,7 @@
"source": [
"from IPython.display import HTML, display\n",
"\n",
- "RESULT_PATH = \"gs://prompt_design_demo\" # @param {type:\"string\"}\n",
+ "RESULT_PATH = \"[OUTPUT_PATH]\" # @param {type:\"string\"}\n",
"# @markdown * Specify a GCS path that contains artifacts of a single or multiple VAPO runs.\n",
"\n",
"results_ui = vapo_lib.ResultsUI(RESULT_PATH)\n",
diff --git a/gemini/reasoning-engine/tutorial_langgraph_rag_agent.ipynb b/gemini/reasoning-engine/tutorial_langgraph_rag_agent.ipynb
new file mode 100644
index 0000000000..d8d88a57b0
--- /dev/null
+++ b/gemini/reasoning-engine/tutorial_langgraph_rag_agent.ipynb
@@ -0,0 +1,1112 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "3YcBnq20nC6r"
+ },
+ "outputs": [],
+ "source": [
+ "# Copyright 2024 Google LLC\n",
+ "#\n",
+ "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+ "# you may not use this file except in compliance with the License.\n",
+ "# You may obtain a copy of the License at\n",
+ "#\n",
+ "# https://www.apache.org/licenses/LICENSE-2.0\n",
+ "#\n",
+ "# Unless required by applicable law or agreed to in writing, software\n",
+ "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+ "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+ "# See the License for the specific language governing permissions and\n",
+ "# limitations under the License."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "xU0F5ObiGgF4"
+ },
+ "source": [
+ "# Building a Multi-Agent RAG Application with LangGraph and Reasoning Engine\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " Run in Colab\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " Run in Colab Enterprise\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " View on GitHub\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " Open in Vertex AI Workbench\n",
+ " \n",
+ " | \n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "4sA9r45YMz_O"
+ },
+ "source": [
+ "| | |\n",
+ "|-|-|\n",
+ "|Author(s) | [Xiaolong Yang](https://github.com/shawn-yang-google) |"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "GZft-jYpHmYv"
+ },
+ "source": [
+ "## Overview\n",
+ "\n",
+ "[Reasoning Engine](https://cloud.google.com/vertex-ai/generative-ai/docs/reasoning-engine/overview) (LangChain on Vertex AI) is a managed service in Vertex AI that helps you to build and deploy an agent reasoning framework. It gives you the flexibility to choose how much reasoning you want to delegate to the LLM and how much you want to handle with customized code.\n",
+ "\n",
+ "RAG (Retrieval-Augmented Generation) is an AI framework that combines the strengths of traditional information retrieval systems (such as databases) with the capabilities of generative large language models (LLMs). \n",
+ "\n",
+ "[LangGraph](https://langchain-ai.github.io/langgraph/) is a library for building stateful, multi-actor applications with LLMs, used to create agent and multi-agent workflows.\n",
+ "\n",
+ "This notebook demonstrates how to build, deploy, and test a LangGraph + RAG application using [Reasoning Engine](https://cloud.google.com/vertex-ai/generative-ai/docs/reasoning-engine/overview) in Vertex AI.\n",
+ "\n",
+ "\n",
+ "## Context\n",
+ "\n",
+ "In previous tutorials:\n",
+ "* [LangGraph application with Reasoning Engine](https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/reasoning-engine/tutorial_langgraph.ipynb?)\n",
+ "You have learned how to combine LangGraph's workflow orchestration with the scalability of Vertex AI, which enables you to build custom generative AI applications.\n",
+ "* [RAG application with Reasoning Engine and Cloud SQL for PostgreSQL](https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/reasoning-engine/tutorial_cloud_sql_pg_rag_agent.ipynb)\n",
+ "By combining this extra knowledge with its own language skills, the AI can write text that is more accurate, up-to-date, and relevant to your specific needs.\n",
+ "Your [LangChain](https://python.langchain.com/docs/get_started/introduction) agent uses an [Postgres Vector Store](https://github.com/googleapis/langchain-google-cloud-sql-pg-python/tree/main) to perform a similary search and retrieve related data to ground the LLM response.\n",
+ "\n",
+ "## Objectives\n",
+ "\n",
+ "In this tutorial, you will learn how to build and deploy an agent (model, tools, and reasoning) using the Vertex AI SDK for Python and Cloud SQL for PostgreSQL LangGraph integration.\n",
+ "\n",
+ "We're using the `Multi Agent Collaboration` [approach](https://blog.langchain.dev/langgraph-multi-agent-workflows/). \n",
+ "This sample notebook could be adapted to use other multi-agent implementations described in the [link](https://blog.langchain.dev/langgraph-multi-agent-workflows/), such as the `Agent Supervisor` or other approaches. \n",
+ "\n",
+ "You will develop a LangGraph Application like: \n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "QL58mPu9Hw7g"
+ },
+ "source": [
+ "## Before you begin\n",
+ "\n",
+ "1. In the Google Cloud console, on the project selector page, select or [create a Google Cloud project](https://cloud.google.com/resource-manager/docs/creating-managing-projects).\n",
+ "2. [Make sure that billing is enabled for your Google Cloud project](https://cloud.google.com/billing/docs/how-to/verify-billing-enabled#console).\n",
+ "3. Follow the instruction in [RAG application with Reasoning Engine and Cloud SQL for PostgreSQL](https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/reasoning-engine/tutorial_cloud_sql_pg_rag_agent.ipynb), set up Cloud SQL for PostgreSQL.\n",
+ "\n",
+ "### Required roles\n",
+ "\n",
+ "To get the permissions that you need to complete the tutorial, ask your administrator to grant you the [Owner](https://cloud.google.com/iam/docs/understanding-roles#owner) (`roles/owner`) IAM role on your project. For more information about granting roles, see [Manage access](https://cloud.google.com/iam/docs/granting-changing-revoking-access).\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "-RYpMytsZ882"
+ },
+ "source": [
+ "### Install and import dependencies"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "w_94DKOCX5pG"
+ },
+ "outputs": [],
+ "source": [
+ "!pip install --upgrade --user --quiet \\\n",
+ " \"google-cloud-aiplatform[reasoningengine,langchain]\"==1.60.0 \\\n",
+ " langchain-google-cloud-sql-pg==0.6.1 \\\n",
+ " cloud-sql-python-connector==1.9.0 \\\n",
+ " langchain-google-vertexai==1.0.4 \\\n",
+ " cloudpickle==3.0.0 \\\n",
+ " pydantic==2.7.4 \\\n",
+ " langgraph==0.0.51 \\\n",
+ " httpx==0.27.2"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "R5Xep4W9lq-Z"
+ },
+ "source": [
+ "### Restart runtime\n",
+ "\n",
+ "To use the newly installed packages in this Jupyter runtime, you must restart the runtime. You can do this by running the cell below, which restarts the current kernel.\n",
+ "\n",
+ "The restart might take a minute or longer. After it's restarted, continue to the next step."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "XRvKdaPDTznN"
+ },
+ "outputs": [],
+ "source": [
+ "import IPython\n",
+ "\n",
+ "app = IPython.Application.instance()\n",
+ "app.kernel.do_shutdown(True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "bThFamq9351N"
+ },
+ "source": [
+ "### Import libraries\n",
+ "\n",
+ "Import the necessary Python libraries. These libraries provide the tools we need to interact with LangGraph, Vertex AI, and other components of our application."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "id": "KNt0YeKaMz_Q"
+ },
+ "outputs": [],
+ "source": [
+ "import json\n",
+ "from typing import Literal\n",
+ "import uuid\n",
+ "\n",
+ "from google.cloud import storage\n",
+ "from langchain_core.documents import Document\n",
+ "from langchain_core.messages import BaseMessage, HumanMessage\n",
+ "from langchain_core.tools import tool\n",
+ "from langchain_google_cloud_sql_pg import PostgresEngine, PostgresVectorStore\n",
+ "from langchain_google_vertexai import ChatVertexAI, VertexAIEmbeddings\n",
+ "from langgraph.graph import END, MessageGraph\n",
+ "from langgraph.prebuilt import ToolNode\n",
+ "import vertexai\n",
+ "from vertexai.preview import reasoning_engines"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "yPKXjZrFZuUZ"
+ },
+ "source": [
+ "### Authenticate to Google Cloud\n",
+ "\n",
+ "Authenticate to Google Cloud as the IAM user logged into this notebook in order to access your Google Cloud Project."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "id": "NyKGtVQjgx13"
+ },
+ "outputs": [],
+ "source": [
+ "import sys\n",
+ "\n",
+ "if \"google.colab\" in sys.modules:\n",
+ " from google.colab import auth\n",
+ "\n",
+ " auth.authenticate_user()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "9aGBuLA7aQ6O"
+ },
+ "source": [
+ "### Define project information\n",
+ "\n",
+ "Initialize `gcloud` with your Project ID and resource location. At this time, only `us-central1` is supported."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "DF4l8DTdWgPY"
+ },
+ "source": [
+ "### Set Google Cloud project information and initialize Vertex AI SDK\n",
+ "\n",
+ "To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com).\n",
+ "\n",
+ "Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "Nqwi-5ufWp_B"
+ },
+ "outputs": [],
+ "source": [
+ "PROJECT_ID = \"[your-project-id]\" # @param {type:\"string\"}\n",
+ "LOCATION = \"us-central1\" # @param {type:\"string\"}\n",
+ "STAGING_BUCKET = \"gs://[your-staging-bucket]\" # @param {type:\"string\"}\n",
+ "\n",
+ "vertexai.init(project=PROJECT_ID, location=LOCATION, staging_bucket=STAGING_BUCKET)\n",
+ "!gcloud config set project {PROJECT_ID}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "S_yG0kddIvr7"
+ },
+ "source": [
+ "## Set up Cloud SQL\n",
+ "\n",
+ "You should have already set up Cloud SQL in [RAG application with Reasoning Engine and Cloud SQL for PostgreSQL](https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/reasoning-engine/tutorial_cloud_sql_pg_rag_agent.ipynb):\n",
+ "* Enable APIs.\n",
+ "* Create a Cloud SQL instance.\n",
+ "* Create a database.\n",
+ "* Initialize multiple vector store tables.\n",
+ "* Create a user.\n",
+ "\n",
+ "\n",
+ "In this Colab, we will create two new vector store tables: Book and Movie.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "XtiB5-LVVkv0"
+ },
+ "outputs": [],
+ "source": [
+ "REGION = \"us-central1\" # @param {type:\"string\"}\n",
+ "INSTANCE = \"langgraph-rag-instance\" # @param {type:\"string\"}\n",
+ "DATABASE = \"harry_potter_data\" # @param {type:\"string\"}\n",
+ "MOVIE_TABLE_NAME = \"my-movie\" # @param {type:\"string\"}\n",
+ "BOOK_TABLE_NAME = \"my-book\" # @param {type:\"string\"}\n",
+ "PASSWORD = input(\"Please provide a password to be used for 'postgres' database user: \")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "dqDjyLpS5zCm"
+ },
+ "source": [
+ "### Grant access to vector store table to IAM users"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {
+ "id": "24NjnjF95ySA"
+ },
+ "outputs": [],
+ "source": [
+ "engine = await PostgresEngine.afrom_instance(\n",
+ " PROJECT_ID, REGION, INSTANCE, DATABASE, user=\"postgres\", password=PASSWORD\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "OaP1LRhPi0y7"
+ },
+ "source": [
+ "### Initialize multiple vector store tables\n",
+ "\n",
+ "The `PostgresEngine` has a helper method `init_vectorstore_table()` that can be used to create a table with the proper schema to store vector embeddings."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {
+ "id": "GGd89YWIi2qg"
+ },
+ "outputs": [],
+ "source": [
+ "for table_name in [MOVIE_TABLE_NAME, BOOK_TABLE_NAME]:\n",
+ " engine = await PostgresEngine.afrom_instance(\n",
+ " PROJECT_ID, REGION, INSTANCE, DATABASE, user=\"postgres\", password=PASSWORD\n",
+ " )\n",
+ "\n",
+ " await engine.ainit_vectorstore_table(\n",
+ " table_name=table_name,\n",
+ " vector_size=768, # Vector size for VertexAI model(textembedding-gecko@latest)\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "sQ1MI8ARi5Rr"
+ },
+ "source": [
+ "### Add embeddings to the vector store\n",
+ "\n",
+ "Load data from a CSV file to generate and insert embeddings to the vector store.\n",
+ "\n",
+ "We will use two datasets:\n",
+ "\n",
+ "* Harry Potter Movie\n",
+ " - Intro: https://www.kaggle.com/datasets/rounakbanik/the-movies-dataset\n",
+ " - Data: gs://github-repo/generative-ai/gemini/reasoning-engine/sample_data/harry_potter_movies.json\n",
+ "* Harry Potter Book\n",
+ " - Intro: https://www.kaggle.com/datasets/shubhammaindola/harry-potter-books\n",
+ " - Data: gs://github-repo/generative-ai/gemini/reasoning-engine/sample_data/harry_potter_books.json\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {
+ "id": "Kcawj2ILdNmN"
+ },
+ "outputs": [],
+ "source": [
+ "def get_docs_from_gcs(bucket_name, gcs_dir, blob_name):\n",
+ " \"\"\"Fetches a JSON file from GCS, deserializes it, and returns the data.\n",
+ "\n",
+ " Args:\n",
+ " bucket_name: Name of the GCS bucket (e.g., 'my-bucket').\n",
+ " gcs_dir: Directory within the bucket where the JSON file is located.\n",
+ " blob_name: Path and filename within the bucket\n",
+ " (e.g., 'my_data.json').\n",
+ "\n",
+ " Returns:\n",
+ " A Python object representing the Document, or None if the file\n",
+ " is not found or an error occurs.\n",
+ " \"\"\"\n",
+ "\n",
+ " storage_client = storage.Client()\n",
+ " bucket = storage_client.bucket(bucket_name)\n",
+ " blob = bucket.blob(blob_name)\n",
+ "\n",
+ " if not blob.exists():\n",
+ " print(f\"File not found: gs://{bucket_name}/{gcs_dir}/{blob_name}\")\n",
+ " return None\n",
+ "\n",
+ " try:\n",
+ " with blob.open(\"r\") as f:\n",
+ " json_docs = json.loads(f.read())\n",
+ " except json.JSONDecodeError:\n",
+ " print(f\"Error: Invalid JSON format in gs://{bucket_name}/{gcs_dir}/{blob_name}\")\n",
+ " return None\n",
+ "\n",
+ " docs = []\n",
+ " for json_doc in json_docs:\n",
+ " docs.append(Document(**(json_doc[\"kwargs\"])))\n",
+ "\n",
+ " return docs"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "Pg5k6FyykfzW"
+ },
+ "source": [
+ "#### Movies"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "dkMjEXEmi4ro"
+ },
+ "outputs": [],
+ "source": [
+ "# Initialize the vector store for movies\n",
+ "vector_store = await PostgresVectorStore.create(\n",
+ " engine,\n",
+ " table_name=MOVIE_TABLE_NAME,\n",
+ " embedding_service=VertexAIEmbeddings(\n",
+ " model_name=\"textembedding-gecko@latest\", project=PROJECT_ID\n",
+ " ),\n",
+ ")\n",
+ "docs = get_docs_from_gcs(\n",
+ " \"reasoning-engine-test-1-bucket\", \"harry_potter\", \"harry_potter_movies.json\"\n",
+ ")\n",
+ "# Add data to the vector store\n",
+ "ids = [str(uuid.uuid4()) for i in range(len(docs))]\n",
+ "await vector_store.aadd_documents(docs, ids=ids)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "GBdIVxqVkjT-"
+ },
+ "source": [
+ "#### Books"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "PWAElYbEkyzB"
+ },
+ "outputs": [],
+ "source": [
+ "# Initialize the vector store for books\n",
+ "vector_store = await PostgresVectorStore.create(\n",
+ " engine,\n",
+ " table_name=BOOK_TABLE_NAME,\n",
+ " embedding_service=VertexAIEmbeddings(\n",
+ " model_name=\"textembedding-gecko@latest\", project=PROJECT_ID\n",
+ " ),\n",
+ ")\n",
+ "docs = get_docs_from_gcs(\n",
+ " \"reasoning-engine-test-1-bucket\", \"harry_potter\", \"harry_potter_books.json\"\n",
+ ")\n",
+ "# Add data to the vector store\n",
+ "ids = [str(uuid.uuid4()) for i in range(len(docs))]\n",
+ "await vector_store.aadd_documents(docs, ids=ids)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "XCra5kJVKyg5"
+ },
+ "source": [
+ "## Define the retriever tool\n",
+ "\n",
+ "Tools are interfaces that an agent, chain, or LLM can use to enable the Gemini model to interact with external systems, databases, document stores, and other APIs so that the model can get the most up-to-date information or take action with those systems.\n",
+ "\n",
+ "In this example, you'll define a function that will retrieve similar documents from the vector store using semantic search."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {
+ "id": "vLx7O_UdqDyr"
+ },
+ "outputs": [],
+ "source": [
+ "@tool\n",
+ "def movie_similarity_search(query: str) -> str:\n",
+ " \"\"\"\n",
+ " Perform a similarity search for movies based on the user's last message.\n",
+ "\n",
+ " Args:\n",
+ " query str: The current conversation state, where the last message contains the query.\n",
+ "\n",
+ " Returns:\n",
+ " str: A list of BaseMessage containing the search results.\n",
+ " \"\"\"\n",
+ " engine = PostgresEngine.from_instance(\n",
+ " PROJECT_ID,\n",
+ " REGION,\n",
+ " INSTANCE,\n",
+ " DATABASE,\n",
+ " quota_project=PROJECT_ID,\n",
+ " user=\"postgres\",\n",
+ " password=PASSWORD,\n",
+ " )\n",
+ "\n",
+ " vector_store = PostgresVectorStore.create_sync(\n",
+ " engine,\n",
+ " table_name=MOVIE_TABLE_NAME,\n",
+ " embedding_service=VertexAIEmbeddings(\n",
+ " model_name=\"textembedding-gecko@latest\", project=PROJECT_ID\n",
+ " ),\n",
+ " )\n",
+ " retriever = vector_store.as_retriever()\n",
+ " return str([doc for doc in retriever.invoke(query)])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {
+ "id": "XVjf2fT_bIGa"
+ },
+ "outputs": [],
+ "source": [
+ "@tool\n",
+ "def book_similarity_search(query: str) -> str:\n",
+ " \"\"\"\n",
+ " Perform a similarity search for books based on the user's last message.\n",
+ "\n",
+ " Args:\n",
+ " state (List[BaseMessage]): The current conversation state, where the last message contains the query.\n",
+ "\n",
+ " Returns:\n",
+ " List[BaseMessage]: A list of BaseMessage containing the search results.\n",
+ " \"\"\"\n",
+ " engine = PostgresEngine.from_instance(\n",
+ " PROJECT_ID,\n",
+ " REGION,\n",
+ " INSTANCE,\n",
+ " DATABASE,\n",
+ " quota_project=PROJECT_ID,\n",
+ " # Uncomment to use built-in authentication instead of IAM authentication\n",
+ " user=\"postgres\",\n",
+ " password=PASSWORD,\n",
+ " )\n",
+ "\n",
+ " vector_store = PostgresVectorStore.create_sync(\n",
+ " engine,\n",
+ " table_name=BOOK_TABLE_NAME,\n",
+ " embedding_service=VertexAIEmbeddings(\n",
+ " model_name=\"textembedding-gecko@latest\", project=PROJECT_ID\n",
+ " ),\n",
+ " )\n",
+ " retriever = vector_store.as_retriever()\n",
+ " return str([doc for doc in retriever.invoke(query)])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "BF8xqd84351O"
+ },
+ "source": [
+ "### Define router\n",
+ "\n",
+ "We're using the `Multi Agent Collaboration` [approach](https://blog.langchain.dev/langgraph-multi-agent-workflows/). \n",
+ "This sample notebook could be adapted to use other multi-agent implementations described in the [link](https://blog.langchain.dev/langgraph-multi-agent-workflows/), such as the `Agent Supervisor` or other approaches. \n",
+ "\n",
+ "Then, you'll define a router to control the flow of the conversation, determining which tool to use based on user input or the state of the interaction. Here we'll use a simple router setup, and you can customize the behavior of your router to handle multiple tools, custom logic, or multi-agent workflows.\n",
+ "\n",
+ "In this example, the router will invoke different nodes in the graph based on whether the user prompt contains the word 'book' or 'movie'."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {
+ "id": "7m9XRSn3351O"
+ },
+ "outputs": [],
+ "source": [
+ "def router(\n",
+ " state: list[BaseMessage],\n",
+ ") -> Literal[\"book_similarity_search\", \"movie_similarity_search\", \"__end__\"]:\n",
+ " if not state[0].content or len(state[1].tool_calls) == 0:\n",
+ " return \"__end__\"\n",
+ " if \"book\" in state[0].content:\n",
+ " return \"book_similarity_search\"\n",
+ " if \"movie\" in state[0].content:\n",
+ " return \"movie_similarity_search\"\n",
+ " return \"__end__\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "CFeOIbed351O"
+ },
+ "source": [
+ "## Define LangGraph application\n",
+ "\n",
+ "Now you'll bring everything together to define your LangGraph application as a custom template in Reasoning Engine.\n",
+ "\n",
+ "This application will use the tool and router that you just defined. LangGraph provides a powerful way to structure these interactions and leverage the capabilities of LLMs."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "tZYtR6-zqudb"
+ },
+ "source": [
+ "#### Multi stage"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {
+ "id": "WWjFaLeW351O"
+ },
+ "outputs": [],
+ "source": [
+ "class MultiStageLangGraphApp:\n",
+ " def __init__(self, project: str, location: str) -> None:\n",
+ " self.project_id = project\n",
+ " self.location = location\n",
+ "\n",
+ " # The set_up method is used to define application initialization logic\n",
+ " def set_up(self) -> None:\n",
+ " model = ChatVertexAI(model=\"gemini-1.5-pro\")\n",
+ " builder = MessageGraph()\n",
+ "\n",
+ " # Checker node\n",
+ " def checker(state: list[BaseMessage]):\n",
+ " if not state[0].content:\n",
+ " return \"__end__\"\n",
+ " user_question = state[0].content\n",
+ " response = model.invoke(\n",
+ " [\n",
+ " HumanMessage(\n",
+ " content=(\n",
+ " f\"What is the type of the question? {user_question}\"\n",
+ " \"Think step by step, then answer one of the following:\"\n",
+ " \"* movie\"\n",
+ " \"* book\"\n",
+ " \"* no\"\n",
+ " )\n",
+ " )\n",
+ " ]\n",
+ " )\n",
+ " table_name = response.content.split(\"\")[1].split(\"\")[0]\n",
+ " # Multiturn requests alternate between user and model.\n",
+ " state[0].content = f\"query:{state[0].content},table_name:{table_name}\"\n",
+ "\n",
+ " builder.add_node(\"checker\", checker)\n",
+ " # Set entry point to checker node so it is reachable\n",
+ " builder.set_entry_point(\"checker\")\n",
+ "\n",
+ " # Tool node.\n",
+ " model_with_tools = model.bind_tools(\n",
+ " [book_similarity_search, movie_similarity_search]\n",
+ " )\n",
+ " builder.add_node(\"tools\", model_with_tools)\n",
+ " # Add edge from tools to checker so the flow is checker->tools->router...\n",
+ " builder.add_edge(\"checker\", \"tools\")\n",
+ "\n",
+ " # Summerize node.\n",
+ " # node\n",
+ " def summerizar(state: list[BaseMessage]):\n",
+ " question = state[0].content\n",
+ " related_docs = state[-1].content\n",
+ " response = model.invoke(\n",
+ " [\n",
+ " HumanMessage(\n",
+ " content=(\n",
+ " f\"\"\"\n",
+ " Use the docs: {related_docs} to answer question:{question}.\n",
+ " The answer format should be json dict.\n",
+ " \"\"\"\n",
+ " )\n",
+ " )\n",
+ " ]\n",
+ " )\n",
+ " # Multiturn requests alternate between user and model.\n",
+ " state.append(response)\n",
+ "\n",
+ " builder.add_node(\"summerizar_node\", summerizar)\n",
+ " builder.add_edge(\"summerizar_node\", END)\n",
+ " # Book retrieval node\n",
+ " book_node = ToolNode([book_similarity_search])\n",
+ " builder.add_node(\"book_similarity_search\", book_node)\n",
+ " builder.add_edge(\"book_similarity_search\", \"summerizar_node\")\n",
+ "\n",
+ " # Movie retrieval node\n",
+ " movie_node = ToolNode([movie_similarity_search])\n",
+ " builder.add_node(\"movie_similarity_search\", movie_node)\n",
+ " builder.add_edge(\"movie_similarity_search\", \"summerizar_node\")\n",
+ "\n",
+ " # Router to check condition.\n",
+ " builder.add_conditional_edges(\"tools\", router)\n",
+ "\n",
+ " self.runnable = builder.compile()\n",
+ "\n",
+ " # The query method will be used to send inputs to the agent\n",
+ " def query(self, message: str):\n",
+ " \"\"\"Query the application.\n",
+ "\n",
+ " Args:\n",
+ " message: The user message.\n",
+ "\n",
+ " Returns:\n",
+ " str: The LLM response.\n",
+ " \"\"\"\n",
+ " chat_history = self.runnable.invoke(HumanMessage(message))\n",
+ "\n",
+ " return chat_history[-1].content"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "sEfQYtgSm9ol"
+ },
+ "source": [
+ "### Local test"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "metadata": {
+ "id": "IcWux9IskE-c"
+ },
+ "outputs": [],
+ "source": [
+ "agent = MultiStageLangGraphApp(project=PROJECT_ID, location=LOCATION)\n",
+ "agent.set_up()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "0aaf11c1677a"
+ },
+ "source": [
+ "Expect a JSON format answer like \n",
+ "```json\n",
+ "{\"company\": [\"Warner Bros.\", \"Heyday Films\"]}\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "J9yUujSokJpQ"
+ },
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "string"
+ },
+ "text/plain": [
+ "```json\n",
+ "{\n",
+ " 'answer': 'Warner Bros and Heyday Films produce Harry Potter and the Deathly Hallows: Part 2.'\n",
+ "}\n",
+ "```"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "agent.query(message=\"Which company produces and distributes Harry Potter films\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "077f1396f641"
+ },
+ "source": [
+ "Expect a JSON format answer like \n",
+ "```json\n",
+ "{\n",
+ " \"answer\": [\"Daniel Radcliffe\", \"Darren Criss\"]\n",
+ "}\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "Vn1wBUEyLGSG"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "```json\n",
+ "[\n",
+ " {\n",
+ " \"actor\": \"Darren Criss\",\n",
+ " \"movie\": \"A Very Potter Musical\"\n",
+ " },\n",
+ " {\n",
+ " \"actor\": \"Daniel Radcliffe\",\n",
+ " \"movie\": \"Harry Potter and the Deathly Hallows: Part 2\"\n",
+ " }\n",
+ "]\n",
+ "```\n"
+ ]
+ }
+ ],
+ "source": [
+ "agent.query(message=\"Who acts as Harry Potter\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "df468170bc6d"
+ },
+ "source": [
+ "Expect a JSON format answer like \n",
+ "```json\n",
+ "{\n",
+ " \"answer\": \"Harry Potter and the Chamber of Secrets.\"\n",
+ "}\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "EPGRJjdEb228"
+ },
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "string"
+ },
+ "text/plain": [
+ "'```json\n",
+ "{\n",
+ " \"book\": \"Harry Potter and the Chamber of Secrets\"\n",
+ "}\n",
+ "```'"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "agent.query(message=\"In which book Harry Potter drives car\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "EdvJRUWRNGHE"
+ },
+ "source": [
+ "## Building and deploying a LangGraph app on Reasoning Engine\n",
+ "\n",
+ "In the following sections, we'll walk through the process of building and deploying a LangGraph application using Reasoning Engine in Vertex AI."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "ERxxgFTcI3DC"
+ },
+ "source": [
+ "## Deploy the service\n",
+ "\n",
+ "Now that you've specified a model, tools, and reasoning for your agent and tested it out, you're ready to deploy your agent as a remote service in Vertex AI!\n",
+ "\n",
+ "Here, you'll use the LangChain agent template provided in the Vertex AI SDK for Reasoning Engine, which brings together the model, tools, and reasoning that you've built up so far."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "k2nGSr2_JWcc"
+ },
+ "outputs": [],
+ "source": [
+ "remote_app = reasoning_engines.ReasoningEngine.create(\n",
+ " MultiStageLangGraphApp(project=PROJECT_ID, location=LOCATION),\n",
+ " requirements=[\n",
+ " \"google-cloud-aiplatform[reasoningengine,langchain]==1.60.0\",\n",
+ " \"langchain-google-cloud-sql-pg==0.6.1\",\n",
+ " \"cloud-sql-python-connector==1.9.0\",\n",
+ " \"langchain-google-vertexai==1.0.4\",\n",
+ " \"cloudpickle==3.0.0\",\n",
+ " \"pydantic==2.7.4\",\n",
+ " \"langgraph==0.0.51\",\n",
+ " \"httpx==0.27.2\",\n",
+ " ],\n",
+ " display_name=\"Reasoning Engine with LangGraph Rag Agent\",\n",
+ " description=\"This is a sample custom application in Reasoning Engine that uses LangGraph and sql pg rag\",\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "TYqMpB16I4iH"
+ },
+ "source": [
+ "## Try it out\n",
+ "\n",
+ "Query the remote app directly or retrieve the application endpoint via the resource ID or display name. The endpoint can be used from any Python environment."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "5A-5oNnQjePC"
+ },
+ "source": [
+ "### Ask question that can only be answered by the movie."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "P9-7ZcQugWkJ"
+ },
+ "outputs": [],
+ "source": [
+ "response = remote_app.query(message=\"Who acts as Harry Potter\")\n",
+ "print(response)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "UuACdm6zgaLm"
+ },
+ "outputs": [],
+ "source": [
+ "response = remote_app.query(\n",
+ " message=\"Which company produces and distributes Harry Potter film\"\n",
+ ")\n",
+ "print(response)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "oXVavFQfi9Hz"
+ },
+ "source": [
+ "### Ask question that can only be answered by the book."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "NxiCmdMnalY-"
+ },
+ "outputs": [],
+ "source": [
+ "response = remote_app.query(message=\"In which book Harry Potter drives car\")\n",
+ "print(response)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "MrZ9IjnAI5v9"
+ },
+ "source": [
+ "## Clean up\n",
+ "\n",
+ "If you created a new project for this tutorial, delete the project. If you used an existing project and wish to keep it without the changes added in this tutorial, delete resources created for the tutorial."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "tBc48ZHOJS6J"
+ },
+ "source": [
+ "### Deleting the project\n",
+ "\n",
+ "The easiest way to eliminate billing is to delete the project that you created for the tutorial.\n",
+ "\n",
+ "1. In the Google Cloud console, go to the [Manage resources](https://console.cloud.google.com/iam-admin/projects?_ga=2.235586881.1783688455.1719351858-1945987529.1719351858) page.\n",
+ "1. In the project list, select the project that you want to delete, and then click Delete.\n",
+ "1. In the dialog, type the project ID, and then click Shut down to delete the project.\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "Ed-BFtW-JPbI"
+ },
+ "source": [
+ "### Deleting tutorial resources\n",
+ "\n",
+ "Delete the reasoning engine instance(s) and Cloud SQL instance."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "LgNlHrxkb6c-"
+ },
+ "outputs": [],
+ "source": [
+ "# Delete the ReasoningEngine instance\n",
+ "remote_app.delete()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "goyrqS2_I8Hs"
+ },
+ "outputs": [],
+ "source": [
+ "# Or delete all Reasoning Engine apps\n",
+ "apps = reasoning_engines.ReasoningEngine.list()\n",
+ "for app in apps:\n",
+ " app.delete()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "odvj8aKpb3Wi"
+ },
+ "outputs": [],
+ "source": [
+ "# Delete the Cloud SQL instance\n",
+ "!gcloud sql instances delete {INSTANCE} \\\n",
+ " --project={PROJECT_ID}"
+ ]
+ }
+ ],
+ "metadata": {
+ "colab": {
+ "collapsed_sections": [
+ "-RYpMytsZ882",
+ "R5Xep4W9lq-Z",
+ "OaP1LRhPi0y7",
+ "GBdIVxqVkjT-"
+ ],
+ "name": "tutorial_langgraph_rag_agent.ipynb",
+ "toc_visible": true
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/gemini/use-cases/applying-llms-to-data/semantic-search-in-bigquery/stackoverflow_questions_semantic_search.ipynb b/gemini/use-cases/applying-llms-to-data/semantic-search-in-bigquery/stackoverflow_questions_semantic_search.ipynb
new file mode 100644
index 0000000000..f0104b4f01
--- /dev/null
+++ b/gemini/use-cases/applying-llms-to-data/semantic-search-in-bigquery/stackoverflow_questions_semantic_search.ipynb
@@ -0,0 +1,518 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "gNz_7idNEdlE"
+ },
+ "outputs": [],
+ "source": [
+ "# Copyright 2024 Google LLC\n",
+ "#\n",
+ "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+ "# you may not use this file except in compliance with the License.\n",
+ "# You may obtain a copy of the License at\n",
+ "#\n",
+ "# https://www.apache.org/licenses/LICENSE-2.0\n",
+ "#\n",
+ "# Unless required by applicable law or agreed to in writing, software\n",
+ "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+ "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+ "# See the License for the specific language governing permissions and\n",
+ "# limitations under the License."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "IXbw-R0ZGiWf"
+ },
+ "source": [
+ "# Performing Semantic Search in BigQuery\n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " Open in Colab\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " Open in Colab Enterprise\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " Open in Vertex AI Workbench\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " View on GitHub\n",
+ " \n",
+ " | \n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "4qTZ1THsriwh"
+ },
+ "source": [
+ "| | |\n",
+ "|-|-|\n",
+ "|Author(s) | [Jaideep Sethi](https://github.com/sethijaideep) |"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "_cGtn8TvG7SB"
+ },
+ "source": [
+ "## Overview\n",
+ "\n",
+ "The objective is to demonstrate how to perform semantic search in BigQuery using Vector Search, including:\n",
+ "\n",
+ "\n",
+ "* Completing setup steps for accessing Vertex AI from BigQuery\n",
+ "* Creating a remote model in BigQuery\n",
+ "* Generating text embedding using the remote model\n",
+ "* Creating a vector index to optimize the semantic search\n",
+ "* Performing semantic search using `VECTOR_SEARCH` function in BigQuery\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "sf3ioTDBzuHR"
+ },
+ "source": [
+ "## About the dataset\n",
+ "\n",
+ "We are going to use Stack Overflow public dataset available in BigQuery. The data is an archive of Stack Overflow posts, votes, tags and badges.\n",
+ "\n",
+ "The dataset can be accessed [here](https://console.cloud.google.com/bigquery(cameo:product/stack-exchange/stack-overflow))."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "E8sI7ir1fVgI"
+ },
+ "source": [
+ "## Services and Costs\n",
+ "\n",
+ "This tutorial uses the following Google Cloud data analytics and ML services, they are billable components of Google Cloud:\n",
+ "\n",
+ "* BigQuery & BigQuery ML [(pricing)](https://cloud.google.com/bigquery/pricing)\n",
+ "* Vertex AI API [(pricing)](https://cloud.google.com/vertex-ai/pricing)\n",
+ "\n",
+ "Use the [Pricing Calculator](https://cloud.google.com/products/calculator/) to generate a cost estimate based on your projected usage.\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "P10XEmXlzeOF"
+ },
+ "source": [
+ "# Setup steps for accessing Vertex AI models from BigQuery"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "6xxnNycUFs8Z"
+ },
+ "source": [
+ "## Enable the Vertex AI and BigQuery Connection APIs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "52cf2e9fc5ec"
+ },
+ "outputs": [],
+ "source": [
+ "!gcloud services enable aiplatform.googleapis.com bigqueryconnection.googleapis.com"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "DyOBjt1yFuKA"
+ },
+ "source": [
+ "## Create a Cloud resource connection\n",
+ "You can learn more about Cloud resource connection [here](https://cloud.google.com/bigquery/docs/create-cloud-resource-connection)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "65046bc876c6"
+ },
+ "outputs": [],
+ "source": [
+ "!bq mk --connection --location=us \\\n",
+ " --connection_type=CLOUD_RESOURCE vertex_conn"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "oo3okAPNF7QD"
+ },
+ "source": [
+ "## Grant the \"Vertex AI User\" role to the service account used by the Cloud resource connection\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "d443f951c280"
+ },
+ "outputs": [],
+ "source": [
+ "SERVICE_ACCT = !bq show --format=prettyjson --connection us.vertex_conn | grep \"serviceAccountId\" | cut -d '\"' -f 4\n",
+ "SERVICE_ACCT_EMAIL = SERVICE_ACCT[-1]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "a7d37cb9eb65"
+ },
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "\n",
+ "PROJECT_ID = os.environ[\"GOOGLE_CLOUD_PROJECT\"]\n",
+ "!gcloud projects add-iam-policy-binding --format=none $PROJECT_ID --member=serviceAccount:$SERVICE_ACCT_EMAIL --role=roles/aiplatform.user"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "xCbjG3fb3def"
+ },
+ "source": [
+ "# Create the remote model in BigQuery ML"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "fm840uHo4kHP"
+ },
+ "source": [
+ "## Create a new dataset named `'bigquery_demo'`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "fdSiqoR04jeQ"
+ },
+ "outputs": [],
+ "source": [
+ "%%bigquery\n",
+ "CREATE SCHEMA\n",
+ " `bigquery_demo` OPTIONS (location = 'US');"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "diwD_Bv0F_Sa"
+ },
+ "source": [
+ "## Create the remote model for Text Embedding in BigQuery ML\n",
+ "Text embeddings model converts textual data into numerical vectors.These vector representations are designed to capture the semantic meaning and context of the words they represent.To generate embeddings we are using `text-embedding-004` model, which is one of the text embedding models available on Vertex AI platform.\n",
+ "\n",
+ "You can learn more about Embeddings APIs [here](https://cloud.google.com/vertex-ai/generative-ai/docs/embeddings)\n",
+ "\n",
+ "Note: If you encounter a permission error while accessing or using the endpoint for the service account, please wait a minute and try again."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "KxAuo3YyGHp2"
+ },
+ "outputs": [],
+ "source": [
+ "%%bigquery\n",
+ "CREATE OR REPLACE MODEL `bigquery_demo.text_embedding_004`\n",
+ "REMOTE WITH CONNECTION `us.vertex_conn`\n",
+ "OPTIONS (endpoint = 'text-embedding-004')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "JcYVG4qNKAuY"
+ },
+ "source": [
+ "# Prepare the dataset for semantic search\n",
+ "Semantic search is a technology that interprets the meaning of words and phrases."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "KCZ-nme8GjTN"
+ },
+ "source": [
+ "## Generate text embeddings for title and body associated with Stack Overflow questions\n",
+ "\n",
+ "For our use case we are going to use `title` and `body` fields from the Stack Overflow `posts_questions` table to generate text embeddings and perform semantic search using the `VECTOR_SEARCH` function.\n",
+ "\n",
+ "Note: To limit costs for this demo, we'll use the top 10,000 iOS-related posts."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "1VPgFMlBGi_5"
+ },
+ "outputs": [],
+ "source": [
+ "%%bigquery\n",
+ "CREATE OR REPLACE TABLE\n",
+ " `bigquery_demo.posts_questions_embedding` AS\n",
+ "SELECT\n",
+ " *\n",
+ "FROM\n",
+ " ML.GENERATE_EMBEDDING( MODEL `bigquery_demo.text_embedding_004`,\n",
+ " (\n",
+ " SELECT\n",
+ " id,\n",
+ " title,\n",
+ " body,\n",
+ " CONCAT (title, body ) AS CONTENT\n",
+ " FROM\n",
+ " `bigquery-public-data.stackoverflow.posts_questions`\n",
+ " WHERE\n",
+ " tags LIKE '%ios%'\n",
+ " ORDER BY\n",
+ " view_Count DESC\n",
+ " LIMIT\n",
+ " 10000 ),\n",
+ " STRUCT ( TRUE AS flatten_json_output,\n",
+ " 'SEMANTIC_SIMILARITY' AS task_type ) );"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "f324515ea4d6"
+ },
+ "source": [
+ "Let's now check the new table containing the embedding fields."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "8bcyGsNWHPeZ"
+ },
+ "outputs": [],
+ "source": [
+ "%%bigquery\n",
+ "SELECT * FROM `bigquery_demo.posts_questions_embedding` LIMIT 100;"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "ceUN_Mgm0vQa"
+ },
+ "source": [
+ "## Create Vector Index on the embeddings to help with efficient semantic search\n",
+ "A vector index is a data structure designed to let the `VECTOR_SEARCH` function perform a more efficient vector search of embeddings.You can learn more about vector index [here](https://cloud.google.com/bigquery/docs/vector-index)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "Gi1WMtnp1Tvh"
+ },
+ "outputs": [],
+ "source": [
+ "%%bigquery\n",
+ " CREATE OR REPLACE VECTOR INDEX ix_posts_questions\n",
+ " ON\n",
+ " `bigquery_demo.posts_questions_embedding` (ml_generate_embedding_result) OPTIONS(index_type = 'IVF',\n",
+ " distance_type = 'COSINE',\n",
+ " ivf_options = '{\"num_lists\":500}');"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "M4swkWSH08yE"
+ },
+ "source": [
+ "## Verify vector index creation"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "v-MJ0Gtui_BZ"
+ },
+ "source": [
+ "Note: The vector index is populated asynchronously.You can check whether the index is ready to be used by querying the `INFORMATION_SCHEMA.VECTOR_INDEXES` view and verifying that the `coverage_percentage` column value is greater than 0 and the `last_refresh_time` column value isn't `NULL`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "Znj6z0nstGBy"
+ },
+ "outputs": [],
+ "source": [
+ "%%bigquery\n",
+ "SELECT\n",
+ " table_name,\n",
+ " index_name,\n",
+ " index_status,\n",
+ " coverage_percentage,\n",
+ " last_refresh_time,\n",
+ " disable_reason\n",
+ "FROM\n",
+ " `bigquery_demo.INFORMATION_SCHEMA.VECTOR_INDEXES`;"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "Vj7gGTPy2qpQ"
+ },
+ "source": [
+ "# Perform semantic search\n",
+ "\n",
+ "Using text embeddings to perform similarity search on a new question"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "1SZ1OmVc2IN5"
+ },
+ "source": [
+ "## Match input question text to existing question's using vector search\n",
+ "Now let's perform a semantic search using the `VECTOR_SEARCH` function to find the top 5 closest results in our `posts_questions_embedding` table to a given question."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "N4pBoQPP2Y3k"
+ },
+ "outputs": [],
+ "source": [
+ "%%bigquery\n",
+ "SELECT\n",
+ " query.query as input_question,\n",
+ " base.id matching_question_id,\n",
+ " base.title as matching_question_title,\n",
+ " base.content as matching_question_content ,\n",
+ " distance,\n",
+ "FROM\n",
+ " VECTOR_SEARCH( TABLE `bigquery_demo.posts_questions_embedding`,\n",
+ " 'ml_generate_embedding_result',\n",
+ " (\n",
+ " SELECT\n",
+ " ml_generate_embedding_result,\n",
+ " content AS query\n",
+ " FROM\n",
+ " ML.GENERATE_EMBEDDING( MODEL `bigquery_demo.text_embedding_004`,\n",
+ " (\n",
+ " SELECT\n",
+ " 'Why does my iOS app crash with a low memory warning despite minimal memory usage?' AS content) ) ),\n",
+ " top_k => 5,\n",
+ " OPTIONS => '{\"fraction_lists_to_search\": 0.10}')\n",
+ "ORDER BY\n",
+ " distance ASC ;"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "835f45a9fdac"
+ },
+ "source": [
+ "Summary: The results demonstrate that `VECTOR_SEARCH` effectively identified the top 5 most similar questions.You can use this same approach to implement semantic search in BigQuery on any dataset."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "UB8VKTHJJlKx"
+ },
+ "source": [
+ "# Cleaning up\n",
+ "\n",
+ "To clean up all Google Cloud resources used in this project, you can [delete the Google Cloud project](https://cloud.google.com/resource-manager/docs/creating-managing-projects#shutting_down_projects) you used for the tutorial.\n",
+ "\n",
+ "Otherwise, you can delete the individual resources you created in this tutorial by uncommenting the below:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "nSF-ZhhPMjfs"
+ },
+ "outputs": [],
+ "source": [
+ "#\n",
+ "# !bq rm -r -f $PROJECT_ID:bigquery_demo\n",
+ "# !bq rm --connection --project_id=$PROJECT_ID --location=us vertex_conn\n",
+ "#"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "gFWBsvpbhGtE"
+ },
+ "source": [
+ "#Wrap up\n",
+ "\n",
+ "In this you have seen an example of how to integrate BQML with Vertex AI LLMs, how to generate embeddings with `ML.GENERATE_EMBEDDING` and perform semantic search using `VECTOR_SEARCH` in BigQuery.\n",
+ "\n",
+ "Check out our BigQuery ML documentation on [generating embeddings](https://cloud.google.com/bigquery/docs/generate-text-embedding) and [vector search](https://cloud.google.com/bigquery/docs/vector-search) to learn more about generative AI in BigQuery."
+ ]
+ }
+ ],
+ "metadata": {
+ "colab": {
+ "name": "stackoverflow_questions_semantic_search.ipynb",
+ "toc_visible": true
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/vision/use-cases/video-thumbnail-generation/video_thumbnail_generation.ipynb b/vision/use-cases/video-thumbnail-generation/video_thumbnail_generation.ipynb
new file mode 100644
index 0000000000..6e83cbdab1
--- /dev/null
+++ b/vision/use-cases/video-thumbnail-generation/video_thumbnail_generation.ipynb
@@ -0,0 +1,689 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "ijGzTHJJUCPY"
+ },
+ "outputs": [],
+ "source": [
+ "# Copyright 2024 Google LLC\n",
+ "#\n",
+ "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+ "# you may not use this file except in compliance with the License.\n",
+ "# You may obtain a copy of the License at\n",
+ "#\n",
+ "# https://www.apache.org/licenses/LICENSE-2.0\n",
+ "#\n",
+ "# Unless required by applicable law or agreed to in writing, software\n",
+ "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+ "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+ "# See the License for the specific language governing permissions and\n",
+ "# limitations under the License."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "VEqbX8OhE8y9"
+ },
+ "source": [
+ "# Video Thumbnail Generation using Gemini 1.5 Pro (API & Python SDK)\n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " Run in Colab\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " Run in Colab Enterprise\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " View on GitHub\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " Open in Vertex AI Workbench\n",
+ " \n",
+ " | \n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "f0cc0f48513b"
+ },
+ "source": [
+ "| | |\n",
+ "|-|-|\n",
+ "|Author(s) | [Kartik Chaudhary](https://github.com/kartikgill)|"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "DrkcqHrrwMAo"
+ },
+ "source": [
+ "## Objectives\n",
+ "\n",
+ "In this tutorial, you will learn how to extract meaningful thumbnail images from a video using Gemini 1.5 Pro (`gemini-1.5-pro`) model.\n",
+ "\n",
+ "You will complete the following tasks:\n",
+ "\n",
+ "- Install the Vertex AI SDK for Python\n",
+ "- Use the Vertex AI Gemini API to interact with the Gemini 1.5 Pro model\n",
+ " - Extract thumbnails for a Video along with captions using Gemini 1.5 Pro\n",
+ " - Use **[moviepy](https://zulko.github.io/moviepy/)** python library for frame extraction for a given timestamp\n",
+ " - Using a better prompt to improve results"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "C9nEPojogw-g"
+ },
+ "source": [
+ "### Costs\n",
+ "\n",
+ "This tutorial uses billable components of Google Cloud:\n",
+ "\n",
+ "- Vertex AI\n",
+ "\n",
+ "Learn about [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing) and use the [Pricing Calculator](https://cloud.google.com/products/calculator/) to generate a cost estimate based on your projected usage.\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "r11Gu7qNgx1p"
+ },
+ "source": [
+ "## Getting Started\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "No17Cw5hgx12"
+ },
+ "source": [
+ "### Install libraries for Python\n",
+ "\n",
+ "- **[Vertex AI SDK](https://cloud.google.com/vertex-ai/docs/python-sdk/use-vertex-ai-python-sdk)**: to call the Vertex AI Gemini API.\n",
+ "- **[moviepy](https://zulko.github.io/moviepy/)**: A module for video editing."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "tFy3H3aPgx12"
+ },
+ "outputs": [],
+ "source": [
+ "! pip3 install --upgrade --user google-cloud-aiplatform\n",
+ "! pip3 install --upgrade --user moviepy"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "R5Xep4W9lq-Z"
+ },
+ "source": [
+ "### Restart current runtime\n",
+ "\n",
+ "To use the newly installed packages in this Jupyter runtime, you must restart the runtime. You can do this by running the cell below, which will restart the current kernel."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "XRvKdaPDTznN"
+ },
+ "outputs": [],
+ "source": [
+ "# Restart kernel after installs so that your environment can access the new packages\n",
+ "import IPython\n",
+ "\n",
+ "app = IPython.Application.instance()\n",
+ "app.kernel.do_shutdown(True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "SbmM4z7FOBpM"
+ },
+ "source": [
+ "\n",
+ "⚠️ The kernel is going to restart. Please wait until it is finished before continuing to the next step. ⚠️\n",
+ "
\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "dmWOrTJ3gx13"
+ },
+ "source": [
+ "### Authenticate your notebook environment (Colab only)\n",
+ "\n",
+ "If you are running this notebook on Google Colab, run the following cell to authenticate your environment. This step is not required if you are using [Vertex AI Workbench](https://cloud.google.com/vertex-ai-workbench).\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "NyKGtVQjgx13"
+ },
+ "outputs": [],
+ "source": [
+ "import sys\n",
+ "\n",
+ "# Additional authentication is required for Google Colab\n",
+ "if \"google.colab\" in sys.modules:\n",
+ " # Authenticate user to Google Cloud\n",
+ " from google.colab import auth\n",
+ "\n",
+ " auth.authenticate_user()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "DF4l8DTdWgPY"
+ },
+ "source": [
+ "### Set Google Cloud project information and initialize Vertex AI SDK\n",
+ "\n",
+ "To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com).\n",
+ "\n",
+ "Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "Nqwi-5ufWp_B"
+ },
+ "outputs": [],
+ "source": [
+ "PROJECT_ID = \"your-project-id\" # @param {type:\"string\"}\n",
+ "LOCATION = \"us-central1\" # @param {type:\"string\"}\n",
+ "\n",
+ "# Initialize Vertex AI\n",
+ "import vertexai\n",
+ "\n",
+ "vertexai.init(project=PROJECT_ID, location=LOCATION)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "jXHfaVS66_01"
+ },
+ "source": [
+ "### Import libraries\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "lslYAvw37JGQ"
+ },
+ "outputs": [],
+ "source": [
+ "import json\n",
+ "\n",
+ "import matplotlib.pyplot as plt\n",
+ "import moviepy\n",
+ "from moviepy.editor import VideoFileClip\n",
+ "from vertexai.generative_models import GenerationConfig, GenerativeModel, Part"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "4437b7608c8e"
+ },
+ "source": [
+ "## Using the Gemini 1.5 Pro model\n",
+ "\n",
+ "The Gemini 1.5 Pro (`gemini-1.5-pro`) model is a foundation model that performs well at a variety of multimodal tasks such as visual understanding, classification, summarization, and creating content from image, audio and video. It's adept at processing visual and text inputs such as photographs, documents, infographics, and screenshots.\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "BY1nfXrqRxVX"
+ },
+ "source": [
+ "### Load the Gemini 1.5 Pro model\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "2998506fe6d1"
+ },
+ "outputs": [],
+ "source": [
+ "model = GenerativeModel(\"gemini-1.5-pro\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "f0b295e7dee5"
+ },
+ "source": [
+ "### Sample Video path from Google Cloud Storage\n",
+ "\n",
+ "#### [Click here to watch/download this video](https://cloud.google.com/vertex-ai/generative-ai/docs/prompt-gallery/samples/video_video_q_and_a_89?hl=en)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "d3df3e7686dd"
+ },
+ "outputs": [],
+ "source": [
+ "video_uri = \"gs://sample-videofile/sample_video_google_trips.webm\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "e332de1fb5df"
+ },
+ "source": [
+ "### Creating a local copy of the video for easy frame extraction"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "a1a71d47c5c1"
+ },
+ "outputs": [],
+ "source": [
+ "!gsutil cp {video_uri} sample_video.webm"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "125906be5cfa"
+ },
+ "source": [
+ "### Creating a MoviePy Clip Object (Helps in extracting frame at a given timestamp)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "931570104d40"
+ },
+ "outputs": [],
+ "source": [
+ "clip = VideoFileClip(\"sample_video.webm\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "fd2aea00f91f"
+ },
+ "source": [
+ "### Define a function to Call Gemini API"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "bf07e42bf777"
+ },
+ "outputs": [],
+ "source": [
+ "def call_gemini(\n",
+ " prompt: str,\n",
+ " gcs_video_path: str,\n",
+ " model: vertexai.generative_models.GenerativeModel,\n",
+ ") -> str:\n",
+ " \"\"\"Call Gemini 1.5 Pro API with video and prompt.\"\"\"\n",
+ " # define fixed schema for Gemini outputs\n",
+ " response_schema = {\n",
+ " \"type\": \"array\",\n",
+ " \"items\": {\n",
+ " \"type\": \"object\",\n",
+ " \"properties\": {\n",
+ " \"timestamp\": {\n",
+ " \"type\": \"string\",\n",
+ " },\n",
+ " \"caption\": {\n",
+ " \"type\": \"string\",\n",
+ " },\n",
+ " },\n",
+ " \"required\": [\"timestamp\", \"caption\"],\n",
+ " },\n",
+ " }\n",
+ " # model configurations\n",
+ " generation_config = GenerationConfig(\n",
+ " temperature=1,\n",
+ " top_p=0.8,\n",
+ " max_output_tokens=8192,\n",
+ " response_schema=response_schema,\n",
+ " response_mime_type=\"application/json\",\n",
+ " )\n",
+ " # creating video input for API call\n",
+ " video_input = Part.from_uri(\n",
+ " mime_type=\"video/webm\",\n",
+ " uri=gcs_video_path,\n",
+ " )\n",
+ " # calling Gemini API\n",
+ " responses = model.generate_content(\n",
+ " [video_input, prompt],\n",
+ " generation_config=generation_config,\n",
+ " stream=False,\n",
+ " )\n",
+ " return responses.text"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "8908704c327f"
+ },
+ "source": [
+ "### Defining a function to parse output and display results"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "b27574e5c060"
+ },
+ "outputs": [],
+ "source": [
+ "def display_results(\n",
+ " response_text: str,\n",
+ " clip: moviepy.video.io.VideoFileClip.VideoFileClip,\n",
+ ") -> None:\n",
+ " \"\"\"Parse json output, extract thumbnail frames and display.\"\"\"\n",
+ " # loading json output object\n",
+ " json_response = json.loads(response_text)\n",
+ "\n",
+ " # Image plotting settings\n",
+ " fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(12, 9))\n",
+ "\n",
+ " # extract frame for each timestamp and plot the images\n",
+ " counter = 0\n",
+ " for item in json_response:\n",
+ " timestamp = item[\"timestamp\"]\n",
+ " caption = item[\"caption\"]\n",
+ " frame = clip.get_frame(timestamp)\n",
+ " row, col = counter // 2, counter % 2\n",
+ " ax[row, col].imshow(frame)\n",
+ " ax[row, col].set_title(caption, fontdict={\"fontsize\": 9})\n",
+ " counter += 1\n",
+ "\n",
+ " fig.show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "973247741f50"
+ },
+ "source": [
+ "# Case 1: Using a Simple Prompt"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "39445140da28"
+ },
+ "source": [
+ "### Writing a basic prompt for thumbnail generation"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "29bc908cd474"
+ },
+ "outputs": [],
+ "source": [
+ "basic_prompt = (\n",
+ " \"\"\"Generate 4 thumbnail images from the given video file with short captions.\"\"\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "7ad3135b0247"
+ },
+ "source": [
+ "### calling Gemini API with our prompt and video"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "35ce6c942d21"
+ },
+ "outputs": [],
+ "source": [
+ "response_text = call_gemini(\n",
+ " prompt=basic_prompt,\n",
+ " gcs_video_path=video_uri,\n",
+ " model=model,\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "916b62896ee6"
+ },
+ "source": [
+ "### showing JSON output from Gemini"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "2c3e94043a97"
+ },
+ "outputs": [],
+ "source": [
+ "print(json.loads(response_text))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "a4a72354a934"
+ },
+ "source": [
+ "### displaying thumbnail results with captions"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "b57da95884ee"
+ },
+ "outputs": [],
+ "source": [
+ "display_results(response_text, clip)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "903857971e6b"
+ },
+ "source": [
+ "# Case 2: Using an Advanced Prompt"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "50d6d00f63bd"
+ },
+ "source": [
+ "### Writing an advanced prompt for better thumbnail generation"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "19771c8d3bf3"
+ },
+ "outputs": [],
+ "source": [
+ "advanced_prompt = \"\"\"You are an expert in video content creation and content marketing.\n",
+ "You have the ability to find best thumbnails from a video and provide meaningful and short and catchy captions for them.\n",
+ "Your task is to find the best 4 thumbnails from a given video along with short, and meaningful captions that is good for marketing.\n",
+ "Consider the following rules while generating thubmnails:\n",
+ "\n",
+ "- Thumbnail should have clear focus on the key objects and people, less focus on background\n",
+ "- Thumbnail image should be high quality and bright, avoid blurry images\n",
+ "- Thumbnail image and caption together tell a story\n",
+ "- Thumbnail caption is good for marketing\n",
+ "\"\"\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "a22dd4dc041c"
+ },
+ "source": [
+ "### calling Gemini API with advanced prompt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "32c86bd57475"
+ },
+ "outputs": [],
+ "source": [
+ "response_text_advanced = call_gemini(\n",
+ " prompt=advanced_prompt,\n",
+ " gcs_video_path=video_uri,\n",
+ " model=model,\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "0c67055675ae"
+ },
+ "source": [
+ "### showing JSON output string"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "91ce4125090e"
+ },
+ "outputs": [],
+ "source": [
+ "print(json.loads(response_text_advanced))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "11616f4f3f17"
+ },
+ "source": [
+ "### displaying final thumbnails with captions"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "40e7a1c967fd"
+ },
+ "outputs": [],
+ "source": [
+ "display_results(response_text_advanced, clip)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "4fc9eac1d116"
+ },
+ "source": [
+ "### Observations\n",
+ "\n",
+ "#### Better prompting shows the following effects on results\n",
+ "- Results have improved in quality\n",
+ "- Captions are more meaningful\n",
+ "- Thumbnail images and captions tell a better story"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "d0d201f5af81"
+ },
+ "source": [
+ "## Conclusion\n",
+ "\n",
+ "- We just saw that Gemini 1.5 Pro has multimodal capabilities, and can be used for video understanding.\n",
+ "- Results can be improved by better prompting with proper guidelines and expectations."
+ ]
+ }
+ ],
+ "metadata": {
+ "colab": {
+ "name": "video_thumbnail_generation.ipynb",
+ "toc_visible": true
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}