From 9c14e564e6886ec39fe74fa3bec640110c134ec2 Mon Sep 17 00:00:00 2001
From: ethan <ethan.yang@intel.com>
Date: Fri, 22 Mar 2024 19:24:56 -0700
Subject: [PATCH] add workflow diagram

reset
---
 .ci/spellcheck/.pyspelling.wordlist.txt       |   1 +
 .../254-llm-chatbot/254-rag-chatbot.ipynb     | 126 +++++++-----------
 .../254-llm-chatbot/ov_embedding_model.py     |   8 +-
 notebooks/254-llm-chatbot/ov_rerank_model.py  |   8 +-
 4 files changed, 56 insertions(+), 87 deletions(-)

diff --git a/.ci/spellcheck/.pyspelling.wordlist.txt b/.ci/spellcheck/.pyspelling.wordlist.txt
index ed59bb5610f..5e820c9c081 100644
--- a/.ci/spellcheck/.pyspelling.wordlist.txt
+++ b/.ci/spellcheck/.pyspelling.wordlist.txt
@@ -614,6 +614,7 @@ Rescaling
 ResNet
 resnet
 RetinaFace
+RetroMAE
 RGB
 Riffusion
 riffusion
diff --git a/notebooks/254-llm-chatbot/254-rag-chatbot.ipynb b/notebooks/254-llm-chatbot/254-rag-chatbot.ipynb
index b082b4443c1..ad7ffc9afba 100644
--- a/notebooks/254-llm-chatbot/254-rag-chatbot.ipynb
+++ b/notebooks/254-llm-chatbot/254-rag-chatbot.ipynb
@@ -18,7 +18,11 @@
     "- Download and convert the model from a public source using the [OpenVINO integration with Hugging Face Optimum](https://huggingface.co/blog/openvino).\n",
     "- Compress model weights to 4-bit or 8-bit data types using [NNCF](https://github.com/openvinotoolkit/nncf)\n",
     "- Create a RAG chain pipeline\n",
-    "- Run chat pipeline\n",
+    "- Run Q&A pipeline\n",
+    "\n",
+    "In this example, the customized RAG pipeline consists of following components, where embedding, rerank and LLM will be deployed with OpenVINO to optimize their inference performance.\n",
+    "\n",
+    "![RAG](https://github.com/openvinotoolkit/openvino_notebooks/assets/91237924/0076f6c7-75e4-4c2e-9015-87b355e5ca28)\n",
     "\n",
     "\n",
     "#### Table of contents:\n",
@@ -104,7 +108,7 @@
     "* **bge-large-en-v1.5** [model card](https://huggingface.co/BAAI/bge-large-en-v1.5)\n",
     "* **bge-large-zh-v1.5** [model card](https://huggingface.co/BAAI/bge-large-zh-v1.5)\n",
     "\n",
-    "BGE embedding is a general Embedding Model. The model is  pre-trained using retromae and train them on large-scale pair data using contrastive learning.\n",
+    "BGE embedding is a general Embedding Model. The model is pre-trained using RetroMAE and trained on large-scale pair data using contrastive learning.\n",
     "\n",
     "The available rerank model options are:\n",
     "\n",
@@ -940,38 +944,6 @@
     "print(f\"Embedding model will be loaded to {embedding_device.value} device for text embedding\")"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "id": "eddd5cad",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "5ecea71a683047bd8aaa37b787e80f90",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Checkbox(value=True, description='Rerank')"
-      ]
-     },
-     "execution_count": 22,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "run_rerank = widgets.Checkbox(\n",
-    "    value=True,\n",
-    "    description='Rerank',\n",
-    "    disabled=False,\n",
-    ")\n",
-    "\n",
-    "run_rerank"
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "81b2644c",
@@ -981,35 +953,6 @@
     "[back to top ⬆️](#Table-of-contents:)"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "id": "5c32f230-a4c6-4e34-aca2-c1dd7e9b4567",
-   "metadata": {},
-   "source": [
-    "Let's load `skip magic` extension to skip rerank if `run_rerank` is not selected"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "id": "4169f47e",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "The skip_kernel_extension extension is already loaded. To reload it, use:\n",
-      "  %reload_ext skip_kernel_extension\n"
-     ]
-    }
-   ],
-   "source": [
-    "import sys\n",
-    "sys.path.append(\"../utils\")\n",
-    "%load_ext skip_kernel_extension"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 32,
@@ -1017,8 +960,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%%skip not $run_rerank.value\n",
-    "\n",
     "rerank_device = widgets.Dropdown(\n",
     "    options=core.available_devices + [\"AUTO\"],\n",
     "    value=\"CPU\",\n",
@@ -1029,6 +970,16 @@
     "rerank_device"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7b7a76b2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(f\"Rerenk model will be loaded to {rerank_device.value} device for text reranking\")"
+   ]
+  },
   {
    "attachments": {},
    "cell_type": "markdown",
@@ -1123,10 +1074,12 @@
     "from ov_embedding_model import OVBgeEmbeddings\n",
     "\n",
     "encode_kwargs = {'normalize_embeddings': embedding_model_configuration[\"do_norm\"]}\n",
+    "embedding_model_kwargs = {\"device\": embedding_device.value}\n",
+    "\n",
     "embedding = OVBgeEmbeddings(\n",
     "    model_dir=embedding_model_id.value,\n",
+    "    model_kwargs=embedding_model_kwargs,\n",
     "    encode_kwargs=encode_kwargs,\n",
-    "    device=embedding_device.value\n",
     ")"
    ]
   },
@@ -1138,26 +1091,45 @@
     "### Load rerank model\n",
     "[back to top ⬆️](#Table-of-contents:)\n",
     "\n",
-    "Wrapper around a text rerank model for LangChain, used for reranking Top-k results from vector search. It's a optional model in RAG pipeline."
+    "Wrapper around a text rerank model for LangChain, used for reranking Top-k results from vector search. It's a optional model in RAG pipeline.\n",
+    "\n",
+    ">**Note**: Rerank can be skipped in RAG. You can select if adding it to RAG pipeline through following checkbox."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "b67b39f2-8394-45fb-9b2b-ea63e267a2d3",
+   "id": "1b3bee69",
    "metadata": {},
    "outputs": [],
    "source": [
-    "%%skip not $run_rerank.value\n",
-    "\n",
-    "from ov_rerank_model import OVRanker\n",
+    "run_rerank = widgets.Checkbox(\n",
+    "    value=True,\n",
+    "    description='Rerank',\n",
+    "    disabled=False,\n",
+    ")\n",
     "\n",
-    "rerank_top_n = 3\n",
-    "reranker = OVRanker(\n",
-    "    model_dir=rerank_model_id.value,\n",
-    "    device=rerank_device.value,\n",
-    "    top_n=rerank_top_n\n",
-    ")"
+    "run_rerank"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b67b39f2-8394-45fb-9b2b-ea63e267a2d3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if run_rerank.value:   \n",
+    "    from ov_rerank_model import OVRanker\n",
+    "\n",
+    "    rerank_top_n = 3\n",
+    "    rerank_model_kwargs = {\"device\": rerank_device.value}\n",
+    "                    \n",
+    "    reranker = OVRanker(\n",
+    "        model_dir=rerank_model_id.value,\n",
+    "        model_kwargs=rerank_model_kwargs,\n",
+    "        top_n=rerank_top_n\n",
+    "    )"
    ]
   },
   {
diff --git a/notebooks/254-llm-chatbot/ov_embedding_model.py b/notebooks/254-llm-chatbot/ov_embedding_model.py
index e7bd6e22906..9505c725ff3 100644
--- a/notebooks/254-llm-chatbot/ov_embedding_model.py
+++ b/notebooks/254-llm-chatbot/ov_embedding_model.py
@@ -31,10 +31,8 @@ class OVBgeEmbeddings(BaseModel, Embeddings):
     """Tokenizer for embedding model."""
     model_dir: str
     """Path to store models."""
-    device: str = "CPU"
-    """Device for model deployment. """
-    ov_config: Dict[str, Any] = Field(default_factory=dict)
-    """OpenVINO configuration arguments to pass to the model."""
+    model_kwargs: Dict[str, Any]
+    """Keyword arguments passed to the model."""
     encode_kwargs: Dict[str, Any] = Field(default_factory=dict)
     """Keyword arguments to pass when calling the `encode` method of the model."""
     query_instruction: str = DEFAULT_QUERY_BGE_INSTRUCTION_EN
@@ -45,7 +43,7 @@ def __init__(self, **kwargs: Any):
         super().__init__(**kwargs)
 
         self.ov_model = OVModelForFeatureExtraction.from_pretrained(
-            self.model_dir, device=self.device, ov_config=self.ov_config)
+            self.model_dir, **self.model_kwargs)
         self.tokenizer = AutoTokenizer.from_pretrained(self.model_dir)
 
         if "-zh" in self.model_dir:
diff --git a/notebooks/254-llm-chatbot/ov_rerank_model.py b/notebooks/254-llm-chatbot/ov_rerank_model.py
index ca666094e73..513ec868371 100644
--- a/notebooks/254-llm-chatbot/ov_rerank_model.py
+++ b/notebooks/254-llm-chatbot/ov_rerank_model.py
@@ -32,10 +32,8 @@ class OVRanker(BaseDocumentCompressor):
     """Tokenizer for embedding model."""
     model_dir: str
     """Path to store models."""
-    device: str = "CPU"
-    """Device for model deployment. """
-    ov_config: Dict[str, Any] = Field(default_factory=dict)
-    """OpenVINO configuration arguments to pass to the model."""
+    model_kwargs: Dict[str, Any]
+    """Keyword arguments passed to the model."""
     top_n: int = 4
     """return Top n texts."""
 
@@ -43,7 +41,7 @@ def __init__(self, **kwargs: Any):
         super().__init__(**kwargs)
         self.tokenizer = self._get_tokenizer()
         self.ov_model = OVModelForSequenceClassification.from_pretrained(
-            self.model_dir, device=self.device, ov_config=self.ov_config)
+            self.model_dir, **self.model_kwargs)
 
     def _load_vocab(self, vocab_file):