From 9c14e564e6886ec39fe74fa3bec640110c134ec2 Mon Sep 17 00:00:00 2001 From: ethan Date: Fri, 22 Mar 2024 19:24:56 -0700 Subject: [PATCH] add workflow diagram reset --- .ci/spellcheck/.pyspelling.wordlist.txt | 1 + .../254-llm-chatbot/254-rag-chatbot.ipynb | 126 +++++++----------- .../254-llm-chatbot/ov_embedding_model.py | 8 +- notebooks/254-llm-chatbot/ov_rerank_model.py | 8 +- 4 files changed, 56 insertions(+), 87 deletions(-) diff --git a/.ci/spellcheck/.pyspelling.wordlist.txt b/.ci/spellcheck/.pyspelling.wordlist.txt index ed59bb5610f..5e820c9c081 100644 --- a/.ci/spellcheck/.pyspelling.wordlist.txt +++ b/.ci/spellcheck/.pyspelling.wordlist.txt @@ -614,6 +614,7 @@ Rescaling ResNet resnet RetinaFace +RetroMAE RGB Riffusion riffusion diff --git a/notebooks/254-llm-chatbot/254-rag-chatbot.ipynb b/notebooks/254-llm-chatbot/254-rag-chatbot.ipynb index b082b4443c1..ad7ffc9afba 100644 --- a/notebooks/254-llm-chatbot/254-rag-chatbot.ipynb +++ b/notebooks/254-llm-chatbot/254-rag-chatbot.ipynb @@ -18,7 +18,11 @@ "- Download and convert the model from a public source using the [OpenVINO integration with Hugging Face Optimum](https://huggingface.co/blog/openvino).\n", "- Compress model weights to 4-bit or 8-bit data types using [NNCF](https://github.com/openvinotoolkit/nncf)\n", "- Create a RAG chain pipeline\n", - "- Run chat pipeline\n", + "- Run Q&A pipeline\n", + "\n", + "In this example, the customized RAG pipeline consists of following components, where embedding, rerank and LLM will be deployed with OpenVINO to optimize their inference performance.\n", + "\n", + "![RAG](https://github.com/openvinotoolkit/openvino_notebooks/assets/91237924/0076f6c7-75e4-4c2e-9015-87b355e5ca28)\n", "\n", "\n", "#### Table of contents:\n", @@ -104,7 +108,7 @@ "* **bge-large-en-v1.5** [model card](https://huggingface.co/BAAI/bge-large-en-v1.5)\n", "* **bge-large-zh-v1.5** [model card](https://huggingface.co/BAAI/bge-large-zh-v1.5)\n", "\n", - "BGE embedding is a general Embedding Model. The model is pre-trained using retromae and train them on large-scale pair data using contrastive learning.\n", + "BGE embedding is a general Embedding Model. The model is pre-trained using RetroMAE and trained on large-scale pair data using contrastive learning.\n", "\n", "The available rerank model options are:\n", "\n", @@ -940,38 +944,6 @@ "print(f\"Embedding model will be loaded to {embedding_device.value} device for text embedding\")" ] }, - { - "cell_type": "code", - "execution_count": 22, - "id": "eddd5cad", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "5ecea71a683047bd8aaa37b787e80f90", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Checkbox(value=True, description='Rerank')" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "run_rerank = widgets.Checkbox(\n", - " value=True,\n", - " description='Rerank',\n", - " disabled=False,\n", - ")\n", - "\n", - "run_rerank" - ] - }, { "cell_type": "markdown", "id": "81b2644c", @@ -981,35 +953,6 @@ "[back to top ⬆️](#Table-of-contents:)" ] }, - { - "cell_type": "markdown", - "id": "5c32f230-a4c6-4e34-aca2-c1dd7e9b4567", - "metadata": {}, - "source": [ - "Let's load `skip magic` extension to skip rerank if `run_rerank` is not selected" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "4169f47e", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The skip_kernel_extension extension is already loaded. To reload it, use:\n", - " %reload_ext skip_kernel_extension\n" - ] - } - ], - "source": [ - "import sys\n", - "sys.path.append(\"../utils\")\n", - "%load_ext skip_kernel_extension" - ] - }, { "cell_type": "code", "execution_count": 32, @@ -1017,8 +960,6 @@ "metadata": {}, "outputs": [], "source": [ - "%%skip not $run_rerank.value\n", - "\n", "rerank_device = widgets.Dropdown(\n", " options=core.available_devices + [\"AUTO\"],\n", " value=\"CPU\",\n", @@ -1029,6 +970,16 @@ "rerank_device" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "7b7a76b2", + "metadata": {}, + "outputs": [], + "source": [ + "print(f\"Rerenk model will be loaded to {rerank_device.value} device for text reranking\")" + ] + }, { "attachments": {}, "cell_type": "markdown", @@ -1123,10 +1074,12 @@ "from ov_embedding_model import OVBgeEmbeddings\n", "\n", "encode_kwargs = {'normalize_embeddings': embedding_model_configuration[\"do_norm\"]}\n", + "embedding_model_kwargs = {\"device\": embedding_device.value}\n", + "\n", "embedding = OVBgeEmbeddings(\n", " model_dir=embedding_model_id.value,\n", + " model_kwargs=embedding_model_kwargs,\n", " encode_kwargs=encode_kwargs,\n", - " device=embedding_device.value\n", ")" ] }, @@ -1138,26 +1091,45 @@ "### Load rerank model\n", "[back to top ⬆️](#Table-of-contents:)\n", "\n", - "Wrapper around a text rerank model for LangChain, used for reranking Top-k results from vector search. It's a optional model in RAG pipeline." + "Wrapper around a text rerank model for LangChain, used for reranking Top-k results from vector search. It's a optional model in RAG pipeline.\n", + "\n", + ">**Note**: Rerank can be skipped in RAG. You can select if adding it to RAG pipeline through following checkbox." ] }, { "cell_type": "code", "execution_count": null, - "id": "b67b39f2-8394-45fb-9b2b-ea63e267a2d3", + "id": "1b3bee69", "metadata": {}, "outputs": [], "source": [ - "%%skip not $run_rerank.value\n", - "\n", - "from ov_rerank_model import OVRanker\n", + "run_rerank = widgets.Checkbox(\n", + " value=True,\n", + " description='Rerank',\n", + " disabled=False,\n", + ")\n", "\n", - "rerank_top_n = 3\n", - "reranker = OVRanker(\n", - " model_dir=rerank_model_id.value,\n", - " device=rerank_device.value,\n", - " top_n=rerank_top_n\n", - ")" + "run_rerank" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b67b39f2-8394-45fb-9b2b-ea63e267a2d3", + "metadata": {}, + "outputs": [], + "source": [ + "if run_rerank.value: \n", + " from ov_rerank_model import OVRanker\n", + "\n", + " rerank_top_n = 3\n", + " rerank_model_kwargs = {\"device\": rerank_device.value}\n", + " \n", + " reranker = OVRanker(\n", + " model_dir=rerank_model_id.value,\n", + " model_kwargs=rerank_model_kwargs,\n", + " top_n=rerank_top_n\n", + " )" ] }, { diff --git a/notebooks/254-llm-chatbot/ov_embedding_model.py b/notebooks/254-llm-chatbot/ov_embedding_model.py index e7bd6e22906..9505c725ff3 100644 --- a/notebooks/254-llm-chatbot/ov_embedding_model.py +++ b/notebooks/254-llm-chatbot/ov_embedding_model.py @@ -31,10 +31,8 @@ class OVBgeEmbeddings(BaseModel, Embeddings): """Tokenizer for embedding model.""" model_dir: str """Path to store models.""" - device: str = "CPU" - """Device for model deployment. """ - ov_config: Dict[str, Any] = Field(default_factory=dict) - """OpenVINO configuration arguments to pass to the model.""" + model_kwargs: Dict[str, Any] + """Keyword arguments passed to the model.""" encode_kwargs: Dict[str, Any] = Field(default_factory=dict) """Keyword arguments to pass when calling the `encode` method of the model.""" query_instruction: str = DEFAULT_QUERY_BGE_INSTRUCTION_EN @@ -45,7 +43,7 @@ def __init__(self, **kwargs: Any): super().__init__(**kwargs) self.ov_model = OVModelForFeatureExtraction.from_pretrained( - self.model_dir, device=self.device, ov_config=self.ov_config) + self.model_dir, **self.model_kwargs) self.tokenizer = AutoTokenizer.from_pretrained(self.model_dir) if "-zh" in self.model_dir: diff --git a/notebooks/254-llm-chatbot/ov_rerank_model.py b/notebooks/254-llm-chatbot/ov_rerank_model.py index ca666094e73..513ec868371 100644 --- a/notebooks/254-llm-chatbot/ov_rerank_model.py +++ b/notebooks/254-llm-chatbot/ov_rerank_model.py @@ -32,10 +32,8 @@ class OVRanker(BaseDocumentCompressor): """Tokenizer for embedding model.""" model_dir: str """Path to store models.""" - device: str = "CPU" - """Device for model deployment. """ - ov_config: Dict[str, Any] = Field(default_factory=dict) - """OpenVINO configuration arguments to pass to the model.""" + model_kwargs: Dict[str, Any] + """Keyword arguments passed to the model.""" top_n: int = 4 """return Top n texts.""" @@ -43,7 +41,7 @@ def __init__(self, **kwargs: Any): super().__init__(**kwargs) self.tokenizer = self._get_tokenizer() self.ov_model = OVModelForSequenceClassification.from_pretrained( - self.model_dir, device=self.device, ov_config=self.ov_config) + self.model_dir, **self.model_kwargs) def _load_vocab(self, vocab_file):