From 03ae70fc0fcaeb5c06cf1d1122dcc098a0f80c21 Mon Sep 17 00:00:00 2001
From: Ethan Yang <ethan.yang@intel.com>
Date: Tue, 21 May 2024 15:17:04 +0800
Subject: [PATCH] update langchain dependency (#2030)

1. as the new [change in LangChain
0.2](https://python.langchain.com/v0.2/docs/versions/overview/#tldr),
`langchain-community` will be not installed automatically with
`langchain-core` and `langchain` .
2. add option in RAG notebook to turn on/off RAG mode, so we can
demostrate the results with/without RAG
---
 .../llm-agent-langchain.ipynb                 |  2 +-
 .../llm-rag-langchain/llm-rag-langchain.ipynb | 65 +++++++++++--------
 2 files changed, 38 insertions(+), 29 deletions(-)

diff --git a/notebooks/llm-agent-langchain/llm-agent-langchain.ipynb b/notebooks/llm-agent-langchain/llm-agent-langchain.ipynb
index 3da79f4308b..e14f56c0bbd 100644
--- a/notebooks/llm-agent-langchain/llm-agent-langchain.ipynb
+++ b/notebooks/llm-agent-langchain/llm-agent-langchain.ipynb
@@ -57,7 +57,7 @@
     "\"accelerate\"\\\n",
     "\"openvino-nightly\"\\\n",
     "\"gradio\"\\\n",
-    "\"transformers>=4.38.1\" \"langchain>=0.1.14\" \"wikipedia\""
+    "\"transformers>=4.38.1\" \"langchain>=0.2.0\" \"langchain-community>=0.2.0\" \"wikipedia\""
    ]
   },
   {
diff --git a/notebooks/llm-rag-langchain/llm-rag-langchain.ipynb b/notebooks/llm-rag-langchain/llm-rag-langchain.ipynb
index ebb9ccbc8be..5905d264a40 100644
--- a/notebooks/llm-rag-langchain/llm-rag-langchain.ipynb
+++ b/notebooks/llm-rag-langchain/llm-rag-langchain.ipynb
@@ -57,7 +57,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 50,
+   "execution_count": 23,
    "id": "1f077b32-5d36-44b0-9041-407e996283a3",
    "metadata": {},
    "outputs": [
@@ -81,7 +81,7 @@
     "\"accelerate\"\\\n",
     "\"openvino-nightly\"\\\n",
     "\"gradio\"\\\n",
-    "\"onnx\" \"einops\" \"transformers_stream_generator\" \"tiktoken\" \"transformers>=4.38.1\" \"bitsandbytes\" \"chromadb\" \"sentence_transformers\" \"langchain>=0.1.15\" \"langchainhub\" \"unstructured\" \"scikit-learn\" \"python-docx\" \"pypdf\" "
+    "\"onnx\" \"einops\" \"transformers_stream_generator\" \"tiktoken\" \"transformers>=4.38.1\" \"bitsandbytes\" \"chromadb\" \"sentence_transformers\" \"langchain>=0.2.0\" \"langchain-community>=0.2.0\" \"langchainhub\" \"unstructured\" \"scikit-learn\" \"python-docx\" \"pypdf\" "
    ]
   },
   {
@@ -197,7 +197,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "19014cafd0614e31838477b41fb405ef",
+       "model_id": "29eb764154d94cd3a3be154936723743",
        "version_major": 2,
        "version_minor": 0
       },
@@ -238,7 +238,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "e12d1bdee7894f82a015cb3e0a85c4f5",
+       "model_id": "3c9bf47dce91460ab6d46933e0eb3cb5",
        "version_major": 2,
        "version_minor": 0
       },
@@ -330,7 +330,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "57d17e276ae14a66b72326067dcfe694",
+       "model_id": "9e3540804aa44aff93ca56143dca1ec2",
        "version_major": 2,
        "version_minor": 0
       },
@@ -344,7 +344,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "3eac17e4d1a44d3a899b191de049cab7",
+       "model_id": "3939bfbd676044089258c1a32eae4cdb",
        "version_major": 2,
        "version_minor": 0
       },
@@ -358,7 +358,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "dc5a4a193e3f46aebe68a212a8e88e52",
+       "model_id": "8fcda6bce8464527b9d74fe139f7e2e2",
        "version_major": 2,
        "version_minor": 0
       },
@@ -582,7 +582,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "12de29912eb44365971be56e128d3266",
+       "model_id": "5f4645b80aa446dda3c7f71ca671f7ad",
        "version_major": 2,
        "version_minor": 0
       },
@@ -668,7 +668,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "3fc1e02144ae4bc788a21a9804cd4e3f",
+       "model_id": "67395acbcb1247dab14655b33428ec16",
        "version_major": 2,
        "version_minor": 0
       },
@@ -760,7 +760,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "9f6ce6f571c244e79e838c28abdc52bd",
+       "model_id": "3efcde86a1924f5ea78c2f127dd2a6d7",
        "version_major": 2,
        "version_minor": 0
       },
@@ -827,7 +827,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "2fcb244b0ec04e50a313f40e96edc949",
+       "model_id": "9d4b92ba4b704450b4c76e36f9a99632",
        "version_major": 2,
        "version_minor": 0
       },
@@ -888,7 +888,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "dd67900c499d4e5bb7bf1aec67d8d47f",
+       "model_id": "098308c72b3947ddb6f9f16de3ae4d28",
        "version_major": 2,
        "version_minor": 0
       },
@@ -963,14 +963,14 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2024-04-28 21:05:33.318682: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
-      "2024-04-28 21:05:33.322370: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n",
-      "2024-04-28 21:05:33.366644: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
-      "2024-04-28 21:05:33.366676: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
-      "2024-04-28 21:05:33.366714: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
-      "2024-04-28 21:05:33.376052: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
+      "2024-05-20 21:07:15.482275: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
+      "2024-05-20 21:07:15.486309: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n",
+      "2024-05-20 21:07:15.532928: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
+      "2024-05-20 21:07:15.532964: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
+      "2024-05-20 21:07:15.533011: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
+      "2024-05-20 21:07:15.545396: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
       "To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
-      "2024-04-28 21:05:34.068587: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n",
+      "2024-05-20 21:07:16.363308: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n",
       "Compiling the model to CPU ...\n"
      ]
     },
@@ -1069,7 +1069,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "b9610591570744258f024a79b31a39b1",
+       "model_id": "52ccbda6c01348b7bb7122e2a23b0ead",
        "version_major": 2,
        "version_minor": 0
       },
@@ -1133,8 +1133,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Compiling the model to CPU ...\n",
-      "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n"
+      "Compiling the model to CPU ...\n"
      ]
     },
     {
@@ -1476,7 +1475,7 @@
     "    return \"\", history + [[message, \"\"]]\n",
     "\n",
     "\n",
-    "def bot(history, temperature, top_p, top_k, repetition_penalty, hide_full_prompt):\n",
+    "def bot(history, temperature, top_p, top_k, repetition_penalty, hide_full_prompt, do_rag):\n",
     "    \"\"\"\n",
     "    callback function for running chatbot on submit button click\n",
     "\n",
@@ -1488,6 +1487,7 @@
     "      top_k: parameter for control the range of tokens considered by the AI model based on their cumulative probability, selecting number of tokens with highest probability.\n",
     "      repetition_penalty: parameter for penalizing tokens based on how frequently they occur in the text.\n",
     "      hide_full_prompt: whether to show searching results in promopt.\n",
+    "      do_rag: whether do RAG when generating texts.\n",
     "\n",
     "    \"\"\"\n",
     "    streamer = TextIteratorStreamer(\n",
@@ -1508,7 +1508,10 @@
     "    if stop_tokens is not None:\n",
     "        llm.pipeline._forward_params[\"stopping_criteria\"] = StoppingCriteriaList(stop_tokens)\n",
     "\n",
-    "    t1 = Thread(target=rag_chain.invoke, args=({\"input\": history[-1][0]},))\n",
+    "    if do_rag:\n",
+    "        t1 = Thread(target=rag_chain.invoke, args=({\"input\": history[-1][0]},))\n",
+    "    else:\n",
+    "        t1 = Thread(target=llm.invoke, args=(history[-1][0],))\n",
     "    t1.start()\n",
     "\n",
     "    # Initialize an empty string to store the generated text\n",
@@ -1563,8 +1566,8 @@
     "\n",
     "                chunk_size = gr.Slider(\n",
     "                    label=\"Chunk size\",\n",
-    "                    value=700,\n",
-    "                    minimum=100,\n",
+    "                    value=300,\n",
+    "                    minimum=50,\n",
     "                    maximum=2000,\n",
     "                    step=50,\n",
     "                    interactive=True,\n",
@@ -1586,6 +1589,12 @@
     "                value=\"Vector Store is Not ready\",\n",
     "                interactive=False,\n",
     "            )\n",
+    "            do_rag = gr.Checkbox(\n",
+    "                value=True,\n",
+    "                label=\"RAG is ON\",\n",
+    "                interactive=True,\n",
+    "                info=\"Whether to do RAG for generation\",\n",
+    "            )\n",
     "            with gr.Accordion(\"Generation Configuration\", open=False):\n",
     "                with gr.Row():\n",
     "                    with gr.Column():\n",
@@ -1716,13 +1725,13 @@
     "    )\n",
     "    submit_event = msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(\n",
     "        bot,\n",
-    "        [chatbot, temperature, top_p, top_k, repetition_penalty, hide_context],\n",
+    "        [chatbot, temperature, top_p, top_k, repetition_penalty, hide_context, do_rag],\n",
     "        chatbot,\n",
     "        queue=True,\n",
     "    )\n",
     "    submit_click_event = submit.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(\n",
     "        bot,\n",
-    "        [chatbot, temperature, top_p, top_k, repetition_penalty, hide_context],\n",
+    "        [chatbot, temperature, top_p, top_k, repetition_penalty, hide_context, do_rag],\n",
     "        chatbot,\n",
     "        queue=True,\n",
     "    )\n",