[RAG]update rag with text examples (#1967)

including: 1. update rag with text examples 2. change the PDF loader for better accuracy
openvinotoolkit · Apr 26, 2024 · 1283dc0 · 1283dc0
1 parent 631607a
commit 1283dc0
Showing 1 changed file with 95 additions and 41 deletions.
diff --git a/notebooks/llm-rag-langchain/llm-rag-langchain.ipynb b/notebooks/llm-rag-langchain/llm-rag-langchain.ipynb
@@ -59,10 +59,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 50,
    "id": "1f077b32-5d36-44b0-9041-407e996283a3",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[33mWARNING: Skipping openvino-dev as it is not installed.\u001b[0m\u001b[33m\n",
+      "\u001b[0m\u001b[33mWARNING: Skipping openvino as it is not installed.\u001b[0m\u001b[33m\n",
+      "\u001b[0mNote: you may need to restart the kernel to use updated packages.\n",
+      "Note: you may need to restart the kernel to use updated packages.\n"
+     ]
+    }
+   ],
    "source": [
     "%pip uninstall -q -y openvino-dev openvino openvino-nightly optimum optimum-intel\n",
     "%pip install -q --extra-index-url https://download.pytorch.org/whl/cpu\\\n",
@@ -72,7 +83,7 @@
     "\"accelerate\"\\\n",
     "\"openvino-nightly\"\\\n",
     "\"gradio\"\\\n",
-    "\"onnx\" \"einops\" \"transformers_stream_generator\" \"tiktoken\" \"transformers>=4.38.1\" \"bitsandbytes\" \"chromadb\" \"sentence_transformers\" \"langchain>=0.1.15\" \"langchainhub\" \"unstructured\" \"scikit-learn\" \"python-docx\" \"pdfminer.six\" "
+    "\"onnx\" \"einops\" \"transformers_stream_generator\" \"tiktoken\" \"transformers>=4.38.1\" \"bitsandbytes\" \"chromadb\" \"sentence_transformers\" \"langchain>=0.1.15\" \"langchainhub\" \"unstructured\" \"scikit-learn\" \"python-docx\" \"pypdf\" "
    ]
   },
   {
@@ -85,19 +96,36 @@
     "import shutil\n",
     "from pathlib import Path\n",
     "import requests\n",
+    "import io\n",
     "\n",
     "# fetch model configuration\n",
     "\n",
     "config_shared_path = Path(\"../../utils/llm_config.py\")\n",
     "config_dst_path = Path(\"llm_config.py\")\n",
+    "text_example_en_path = Path(\"text_example_en.pdf\")\n",
+    "text_example_cn_path = Path(\"text_example_cn.pdf\")\n",
+    "text_example_en = \"https://github.com/openvinotoolkit/openvino_notebooks/files/15039728/Platform.Brief_Intel.vPro.with.Intel.Core.Ultra_Final.pdf\"\n",
+    "text_example_cn = \"https://github.com/openvinotoolkit/openvino_notebooks/files/15039713/Platform.Brief_Intel.vPro.with.Intel.Core.Ultra_Final_CH.pdf\"\n",
     "\n",
     "if not config_dst_path.exists():\n",
     "    if config_shared_path.exists():\n",
     "        shutil.copy(config_shared_path, config_dst_path)\n",
     "    else:\n",
     "        r = requests.get(url=\"https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/llm_config.py\")\n",
     "        with open(\"llm_config.py\", \"w\") as f:\n",
-    "            f.write(r.text)"
+    "            f.write(r.text)\n",
+    "\n",
+    "if not text_example_en_path.exists():\n",
+    "    r = requests.get(url=text_example_en)\n",
+    "    content = io.BytesIO(r.content)\n",
+    "    with open(\"text_example_en.pdf\", \"wb\") as f:\n",
+    "        f.write(content.read())\n",
+    "\n",
+    "if not text_example_cn_path.exists():\n",
+    "    r = requests.get(url=text_example_cn)\n",
+    "    content = io.BytesIO(r.content)\n",
+    "    with open(\"text_example_cn.pdf\", \"wb\") as f:\n",
+    "        f.write(content.read())"
    ]
   },
   {
@@ -173,7 +201,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "8aff9565ac6f4c0993359ef2df59734c",
+       "model_id": "ed92457ad3cf42b2a22ca224141fe738",
        "version_major": 2,
        "version_minor": 0
       },
@@ -214,12 +242,12 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "84cb9d80223e4cab856c3e5ea9d88ea0",
+       "model_id": "aad6acd73a9a40fbae5ec40195512a80",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
-       "Dropdown(description='Model:', index=10, options=('tiny-llama-1b-chat', 'gemma-2b-it', 'red-pajama-3b-chat', '…"
+       "Dropdown(description='Model:', index=9, options=('tiny-llama-1b-chat', 'gemma-2b-it', 'red-pajama-3b-chat', 'g…"
       ]
      },
      "execution_count": 4,
@@ -250,7 +278,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Selected LLM model llama-3-8b-instruct\n"
+      "Selected LLM model neural-chat-7b-v3-1\n"
      ]
     }
    ],
@@ -308,7 +336,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "82f25885896749728c96d9f909f65a5a",
+       "model_id": "009a1f80d9e24964a084d5d31cfecccd",
        "version_major": 2,
        "version_minor": 0
       },
@@ -322,7 +350,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "1731c06ddd1b4ad792516efb23b51f80",
+       "model_id": "f79b9054accd4501acad346d05ef3c6d",
        "version_major": 2,
        "version_minor": 0
       },
@@ -336,7 +364,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "6a8bd8732be14625ba4474989c4d6545",
+       "model_id": "a1af239531b44c04a2cac15dacb629bd",
        "version_major": 2,
        "version_minor": 0
       },
@@ -522,7 +550,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Size of model with INT4 compressed weights is 5106.56 MB\n"
+      "Size of model with INT4 compressed weights is 5069.90 MB\n"
      ]
     }
    ],
@@ -562,7 +590,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "7cdeb3e1ce7c464f84c81d118c3856b8",
+       "model_id": "7ff9bbb6909d40968b638df90683f360",
        "version_major": 2,
        "version_minor": 0
       },
@@ -648,7 +676,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "060d8facb29e4ab99ba5c5a51a36a0cd",
+       "model_id": "ff064eaa037440a7b22af61047422e9c",
        "version_major": 2,
        "version_minor": 0
       },
@@ -741,12 +769,12 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "31871e2ec744496ca8ccfad377310c5c",
+       "model_id": "1448cdadd58b41fa8116605d114285ef",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
-       "Dropdown(description='Device:', options=('CPU', 'GPU.0', 'GPU.1', 'AUTO'), value='CPU')"
+       "Dropdown(description='Device:', options=('CPU', 'GPU', 'AUTO'), value='CPU')"
       ]
      },
      "execution_count": 15,
@@ -803,12 +831,12 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "1c269bd0ac424e8eb1a9c5738d1709d6",
+       "model_id": "321d8385fc0345498f07154637c09eb4",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
-       "Dropdown(description='Device:', options=('CPU', 'GPU.0', 'GPU.1', 'AUTO'), value='CPU')"
+       "Dropdown(description='Device:', options=('CPU', 'GPU', 'AUTO'), value='CPU')"
       ]
      },
      "execution_count": 17,
@@ -865,12 +893,12 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "889f3eb31f0d4cfaacefb453d58065df",
+       "model_id": "1f833b438e18488c9b9c85cb87786b26",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
-       "Dropdown(description='Device:', options=('CPU', 'GPU.0', 'GPU.1', 'AUTO'), value='CPU')"
+       "Dropdown(description='Device:', options=('CPU', 'GPU', 'AUTO'), value='CPU')"
       ]
      },
      "execution_count": 19,
@@ -941,16 +969,14 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2024-04-19 00:18:35.178975: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
-      "2024-04-19 00:18:35.181441: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n",
-      "2024-04-19 00:18:35.212245: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
-      "2024-04-19 00:18:35.212267: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
-      "2024-04-19 00:18:35.212295: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
-      "2024-04-19 00:18:35.219017: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n",
-      "2024-04-19 00:18:35.219908: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
+      "2024-04-25 23:24:53.325545: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
+      "2024-04-25 23:24:53.329247: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n",
+      "2024-04-25 23:24:53.374367: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
+      "2024-04-25 23:24:53.374399: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
+      "2024-04-25 23:24:53.374437: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
+      "2024-04-25 23:24:53.385838: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
       "To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
-      "2024-04-19 00:18:36.155999: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n",
-      "The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable.\n",
+      "2024-04-25 23:24:54.133894: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n",
       "Compiling the model to CPU ...\n"
      ]
     },
@@ -1050,7 +1076,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "b815fe3af56542cbb08c2d06e9e9dd26",
+       "model_id": "42271e02cc19435f89659d127241fb1a",
        "version_major": 2,
        "version_minor": 0
       },
@@ -1096,20 +1122,26 @@
    "id": "f7f708db-8de1-4efd-94b2-fcabc48d52f4",
    "metadata": {},
    "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "The argument `trust_remote_code` is to be used along with export=True. It will be ignored.\n"
+     ]
+    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Loading model from llama-3-8b-instruct/INT4_compressed_weights\n"
+      "Loading model from neural-chat-7b-v3-1/INT4_compressed_weights\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
-      "The argument `trust_remote_code` is to be used along with export=True. It will be ignored.\n",
-      "Compiling the model to CPU ...\n"
+      "Compiling the model to CPU ...\n",
+      "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n"
      ]
     },
     {
@@ -1208,7 +1240,7 @@
     "from langchain.document_loaders import (\n",
     "    CSVLoader,\n",
     "    EverNoteLoader,\n",
-    "    PDFMinerLoader,\n",
+    "    PyPDFLoader,\n",
     "    TextLoader,\n",
     "    UnstructuredEPubLoader,\n",
     "    UnstructuredHTMLLoader,\n",
@@ -1255,11 +1287,30 @@
     "    \".html\": (UnstructuredHTMLLoader, {}),\n",
     "    \".md\": (UnstructuredMarkdownLoader, {}),\n",
     "    \".odt\": (UnstructuredODTLoader, {}),\n",
-    "    \".pdf\": (PDFMinerLoader, {}),\n",
+    "    \".pdf\": (PyPDFLoader, {}),\n",
     "    \".ppt\": (UnstructuredPowerPointLoader, {}),\n",
     "    \".pptx\": (UnstructuredPowerPointLoader, {}),\n",
     "    \".txt\": (TextLoader, {\"encoding\": \"utf8\"}),\n",
-    "}"
+    "}\n",
+    "\n",
+    "chinese_examples = [\n",
+    "    [\"英特尔®酷睿™ Ultra处理器可以降低多少功耗？\"],\n",
+    "    [\"相比英特尔之前的移动处理器产品，英特尔®酷睿™ Ultra处理器的AI推理性能提升了多少？\"],\n",
+    "    [\"英特尔博锐® Enterprise系统提供哪些功能？\"],\n",
+    "]\n",
+    "\n",
+    "english_examples = [\n",
+    "    [\"How much power consumption can Intel® Core™ Ultra Processors help save?\"],\n",
+    "    [\"Compared to Intel’s previous mobile processor, what is the advantage of Intel® Core™ Ultra Processors for Artificial Intelligence?\"],\n",
+    "    [\"What can Intel vPro® Enterprise systems offer?\"],\n",
+    "]\n",
+    "\n",
+    "if model_language.value == \"English\":\n",
+    "    text_example_path = \"text_example_en.pdf\"\n",
+    "else:\n",
+    "    text_example_path = \"text_example_cn.pdf\"\n",
+    "\n",
+    "examples = chinese_examples if (model_language.value == \"Chinese\") else english_examples"
    ]
   },
   {
@@ -1388,6 +1439,7 @@
     "\n",
     "    global combine_docs_chain\n",
     "    combine_docs_chain = create_stuff_documents_chain(llm, prompt)\n",
+    "\n",
     "    global rag_chain\n",
     "    rag_chain = create_retrieval_chain(retriever, combine_docs_chain)\n",
     "\n",
@@ -1488,6 +1540,7 @@
     "        with gr.Column(scale=1):\n",
     "            docs = gr.File(\n",
     "                label=\"Step 1: Load text files\",\n",
+    "                value=[text_example_path],\n",
     "                file_count=\"multiple\",\n",
     "                file_types=[\n",
     "                    \".csv\",\n",
@@ -1608,6 +1661,7 @@
     "                        submit = gr.Button(\"Submit\")\n",
     "                        stop = gr.Button(\"Stop\")\n",
     "                        clear = gr.Button(\"Clear\")\n",
+    "            gr.Examples(examples, inputs=msg, label=\"Click on any example and press the 'Submit' button\")\n",
     "            retriever_argument = gr.Accordion(\"Retriever Configuration\", open=True)\n",
     "            with retriever_argument:\n",
     "                with gr.Row():\n",
@@ -1685,11 +1739,11 @@
     "        queue=False,\n",
     "    )\n",
     "    clear.click(lambda: None, None, chatbot, queue=False)\n",
-    "    vector_search_top_k.change(\n",
+    "    vector_search_top_k.release(\n",
     "        update_retriever,\n",
     "        [vector_search_top_k, vector_rerank_top_n, do_rerank, search_method],\n",
     "    )\n",
-    "    vector_rerank_top_n.change(\n",
+    "    vector_rerank_top_n.release(\n",
     "        update_retriever,\n",
     "        [vector_search_top_k, vector_rerank_top_n, do_rerank, search_method],\n",
     "    )\n",
@@ -1726,7 +1780,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -1740,7 +1794,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.4"
+   "version": "3.10.12"
   },
   "openvino_notebooks": {
    "imageUrl": "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/304aa048-f10c-41c6-bb31-6d2bfdf49cf5",