From 03ae70fc0fcaeb5c06cf1d1122dcc098a0f80c21 Mon Sep 17 00:00:00 2001 From: Ethan Yang Date: Tue, 21 May 2024 15:17:04 +0800 Subject: [PATCH] update langchain dependency (#2030) 1. as the new [change in LangChain 0.2](https://python.langchain.com/v0.2/docs/versions/overview/#tldr), `langchain-community` will be not installed automatically with `langchain-core` and `langchain` . 2. add option in RAG notebook to turn on/off RAG mode, so we can demostrate the results with/without RAG --- .../llm-agent-langchain.ipynb | 2 +- .../llm-rag-langchain/llm-rag-langchain.ipynb | 65 +++++++++++-------- 2 files changed, 38 insertions(+), 29 deletions(-) diff --git a/notebooks/llm-agent-langchain/llm-agent-langchain.ipynb b/notebooks/llm-agent-langchain/llm-agent-langchain.ipynb index 3da79f4308b..e14f56c0bbd 100644 --- a/notebooks/llm-agent-langchain/llm-agent-langchain.ipynb +++ b/notebooks/llm-agent-langchain/llm-agent-langchain.ipynb @@ -57,7 +57,7 @@ "\"accelerate\"\\\n", "\"openvino-nightly\"\\\n", "\"gradio\"\\\n", - "\"transformers>=4.38.1\" \"langchain>=0.1.14\" \"wikipedia\"" + "\"transformers>=4.38.1\" \"langchain>=0.2.0\" \"langchain-community>=0.2.0\" \"wikipedia\"" ] }, { diff --git a/notebooks/llm-rag-langchain/llm-rag-langchain.ipynb b/notebooks/llm-rag-langchain/llm-rag-langchain.ipynb index ebb9ccbc8be..5905d264a40 100644 --- a/notebooks/llm-rag-langchain/llm-rag-langchain.ipynb +++ b/notebooks/llm-rag-langchain/llm-rag-langchain.ipynb @@ -57,7 +57,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 23, "id": "1f077b32-5d36-44b0-9041-407e996283a3", "metadata": {}, "outputs": [ @@ -81,7 +81,7 @@ "\"accelerate\"\\\n", "\"openvino-nightly\"\\\n", "\"gradio\"\\\n", - "\"onnx\" \"einops\" \"transformers_stream_generator\" \"tiktoken\" \"transformers>=4.38.1\" \"bitsandbytes\" \"chromadb\" \"sentence_transformers\" \"langchain>=0.1.15\" \"langchainhub\" \"unstructured\" \"scikit-learn\" \"python-docx\" \"pypdf\" " + "\"onnx\" \"einops\" \"transformers_stream_generator\" \"tiktoken\" \"transformers>=4.38.1\" \"bitsandbytes\" \"chromadb\" \"sentence_transformers\" \"langchain>=0.2.0\" \"langchain-community>=0.2.0\" \"langchainhub\" \"unstructured\" \"scikit-learn\" \"python-docx\" \"pypdf\" " ] }, { @@ -197,7 +197,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "19014cafd0614e31838477b41fb405ef", + "model_id": "29eb764154d94cd3a3be154936723743", "version_major": 2, "version_minor": 0 }, @@ -238,7 +238,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "e12d1bdee7894f82a015cb3e0a85c4f5", + "model_id": "3c9bf47dce91460ab6d46933e0eb3cb5", "version_major": 2, "version_minor": 0 }, @@ -330,7 +330,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "57d17e276ae14a66b72326067dcfe694", + "model_id": "9e3540804aa44aff93ca56143dca1ec2", "version_major": 2, "version_minor": 0 }, @@ -344,7 +344,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "3eac17e4d1a44d3a899b191de049cab7", + "model_id": "3939bfbd676044089258c1a32eae4cdb", "version_major": 2, "version_minor": 0 }, @@ -358,7 +358,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "dc5a4a193e3f46aebe68a212a8e88e52", + "model_id": "8fcda6bce8464527b9d74fe139f7e2e2", "version_major": 2, "version_minor": 0 }, @@ -582,7 +582,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "12de29912eb44365971be56e128d3266", + "model_id": "5f4645b80aa446dda3c7f71ca671f7ad", "version_major": 2, "version_minor": 0 }, @@ -668,7 +668,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "3fc1e02144ae4bc788a21a9804cd4e3f", + "model_id": "67395acbcb1247dab14655b33428ec16", "version_major": 2, "version_minor": 0 }, @@ -760,7 +760,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "9f6ce6f571c244e79e838c28abdc52bd", + "model_id": "3efcde86a1924f5ea78c2f127dd2a6d7", "version_major": 2, "version_minor": 0 }, @@ -827,7 +827,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "2fcb244b0ec04e50a313f40e96edc949", + "model_id": "9d4b92ba4b704450b4c76e36f9a99632", "version_major": 2, "version_minor": 0 }, @@ -888,7 +888,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "dd67900c499d4e5bb7bf1aec67d8d47f", + "model_id": "098308c72b3947ddb6f9f16de3ae4d28", "version_major": 2, "version_minor": 0 }, @@ -963,14 +963,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-04-28 21:05:33.318682: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", - "2024-04-28 21:05:33.322370: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n", - "2024-04-28 21:05:33.366644: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", - "2024-04-28 21:05:33.366676: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", - "2024-04-28 21:05:33.366714: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", - "2024-04-28 21:05:33.376052: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "2024-05-20 21:07:15.482275: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", + "2024-05-20 21:07:15.486309: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n", + "2024-05-20 21:07:15.532928: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "2024-05-20 21:07:15.532964: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "2024-05-20 21:07:15.533011: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "2024-05-20 21:07:15.545396: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2024-04-28 21:05:34.068587: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", + "2024-05-20 21:07:16.363308: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", "Compiling the model to CPU ...\n" ] }, @@ -1069,7 +1069,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "b9610591570744258f024a79b31a39b1", + "model_id": "52ccbda6c01348b7bb7122e2a23b0ead", "version_major": 2, "version_minor": 0 }, @@ -1133,8 +1133,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Compiling the model to CPU ...\n", - "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n" + "Compiling the model to CPU ...\n" ] }, { @@ -1476,7 +1475,7 @@ " return \"\", history + [[message, \"\"]]\n", "\n", "\n", - "def bot(history, temperature, top_p, top_k, repetition_penalty, hide_full_prompt):\n", + "def bot(history, temperature, top_p, top_k, repetition_penalty, hide_full_prompt, do_rag):\n", " \"\"\"\n", " callback function for running chatbot on submit button click\n", "\n", @@ -1488,6 +1487,7 @@ " top_k: parameter for control the range of tokens considered by the AI model based on their cumulative probability, selecting number of tokens with highest probability.\n", " repetition_penalty: parameter for penalizing tokens based on how frequently they occur in the text.\n", " hide_full_prompt: whether to show searching results in promopt.\n", + " do_rag: whether do RAG when generating texts.\n", "\n", " \"\"\"\n", " streamer = TextIteratorStreamer(\n", @@ -1508,7 +1508,10 @@ " if stop_tokens is not None:\n", " llm.pipeline._forward_params[\"stopping_criteria\"] = StoppingCriteriaList(stop_tokens)\n", "\n", - " t1 = Thread(target=rag_chain.invoke, args=({\"input\": history[-1][0]},))\n", + " if do_rag:\n", + " t1 = Thread(target=rag_chain.invoke, args=({\"input\": history[-1][0]},))\n", + " else:\n", + " t1 = Thread(target=llm.invoke, args=(history[-1][0],))\n", " t1.start()\n", "\n", " # Initialize an empty string to store the generated text\n", @@ -1563,8 +1566,8 @@ "\n", " chunk_size = gr.Slider(\n", " label=\"Chunk size\",\n", - " value=700,\n", - " minimum=100,\n", + " value=300,\n", + " minimum=50,\n", " maximum=2000,\n", " step=50,\n", " interactive=True,\n", @@ -1586,6 +1589,12 @@ " value=\"Vector Store is Not ready\",\n", " interactive=False,\n", " )\n", + " do_rag = gr.Checkbox(\n", + " value=True,\n", + " label=\"RAG is ON\",\n", + " interactive=True,\n", + " info=\"Whether to do RAG for generation\",\n", + " )\n", " with gr.Accordion(\"Generation Configuration\", open=False):\n", " with gr.Row():\n", " with gr.Column():\n", @@ -1716,13 +1725,13 @@ " )\n", " submit_event = msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(\n", " bot,\n", - " [chatbot, temperature, top_p, top_k, repetition_penalty, hide_context],\n", + " [chatbot, temperature, top_p, top_k, repetition_penalty, hide_context, do_rag],\n", " chatbot,\n", " queue=True,\n", " )\n", " submit_click_event = submit.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(\n", " bot,\n", - " [chatbot, temperature, top_p, top_k, repetition_penalty, hide_context],\n", + " [chatbot, temperature, top_p, top_k, repetition_penalty, hide_context, do_rag],\n", " chatbot,\n", " queue=True,\n", " )\n",