Update Agent notebook (#2186)

1. replace the example tool with offline version 2. change quantization ratio from 0.8 to 0.72 for better accuracy on GPU.
openvinotoolkit · Jul 11, 2024 · c20240f · c20240f
1 parent 717f7a8
commit c20240f
Showing 1 changed file with 38 additions and 130 deletions.
diff --git a/notebooks/llm-agent-functioncall/llm-agent-functioncall-qwen.ipynb b/notebooks/llm-agent-functioncall/llm-agent-functioncall-qwen.ipynb
@@ -45,75 +45,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 16,
    "id": "47d43de7-9946-482d-84cb-222294c1cda8",
    "metadata": {},
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
-      "To disable this warning, you can either:\n",
-      "\t- Avoid using `tokenizers` before the fork if possible\n",
-      "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Note: you may need to restart the kernel to use updated packages.\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
-      "To disable this warning, you can either:\n",
-      "\t- Avoid using `tokenizers` before the fork if possible\n",
-      "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Note: you may need to restart the kernel to use updated packages.\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
-      "To disable this warning, you can either:\n",
-      "\t- Avoid using `tokenizers` before the fork if possible\n",
-      "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Note: you may need to restart the kernel to use updated packages.\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
-      "To disable this warning, you can either:\n",
-      "\t- Avoid using `tokenizers` before the fork if possible\n",
-      "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
-     ]
-    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "Note: you may need to restart the kernel to use updated packages.\n",
+      "Note: you may need to restart the kernel to use updated packages.\n",
+      "Note: you may need to restart the kernel to use updated packages.\n",
       "\u001b[33mWARNING: typer 0.12.3 does not provide the extra 'all'\u001b[0m\u001b[33m\n",
       "\u001b[0mNote: you may need to restart the kernel to use updated packages.\n"
      ]
@@ -141,32 +83,7 @@
    "execution_count": 1,
    "id": "2ec33075",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "d15bea5cc94949328b44d9a2ed0efaa0",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "openvino_log.png: 0.00B [00:00, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "PosixPath('/home/ethan/intel/openvino_notebooks/notebooks/llm-agent-qwen/openvino_log.png')"
-      ]
-     },
-     "execution_count": 1,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "import requests\n",
     "from PIL import Image\n",
@@ -212,28 +129,19 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import requests\n",
-    "\n",
+    "import json\n",
     "\n",
-    "def get_current_weather(city_name):\n",
-    "    \"\"\"Get the current weather in a given city name\"\"\"\n",
-    "    if not isinstance(city_name, str):\n",
-    "        raise TypeError(\"City name must be a string\")\n",
-    "    key_selection = {\n",
-    "        \"current_condition\": [\n",
-    "            \"temp_C\",\n",
-    "            \"FeelsLikeC\",\n",
-    "            \"humidity\",\n",
-    "            \"weatherDesc\",\n",
-    "            \"observation_time\",\n",
-    "        ],\n",
-    "    }\n",
-    "    resp = requests.get(f\"https://wttr.in/{city_name}?format=j1\")\n",
-    "    resp.raise_for_status()\n",
-    "    resp = resp.json()\n",
-    "    ret = {k: {_v: resp[k][0][_v] for _v in v} for k, v in key_selection.items()}\n",
     "\n",
-    "    return str(ret)"
+    "def get_current_weather(location, unit=\"fahrenheit\"):\n",
+    "    \"\"\"Get the current weather in a given location\"\"\"\n",
+    "    if \"tokyo\" in location.lower():\n",
+    "        return json.dumps({\"location\": \"Tokyo\", \"temperature\": \"10\", \"unit\": \"celsius\"})\n",
+    "    elif \"san francisco\" in location.lower():\n",
+    "        return json.dumps({\"location\": \"San Francisco\", \"temperature\": \"72\", \"unit\": \"fahrenheit\"})\n",
+    "    elif \"paris\" in location.lower():\n",
+    "        return json.dumps({\"location\": \"Paris\", \"temperature\": \"22\", \"unit\": \"celsius\"})\n",
+    "    else:\n",
+    "        return json.dumps({\"location\": location, \"temperature\": \"unknown\"})"
    ]
   },
   {
@@ -246,24 +154,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 9,
    "id": "5ea4ce13",
    "metadata": {},
    "outputs": [],
    "source": [
     "functions = [\n",
     "    {\n",
     "        \"name\": \"get_current_weather\",\n",
-    "        \"description\": \"Get the current weather in a given city name\",\n",
+    "        \"description\": \"Get the current weather in a given location\",\n",
     "        \"parameters\": {\n",
     "            \"type\": \"object\",\n",
     "            \"properties\": {\n",
-    "                \"city_name\": {\n",
+    "                \"location\": {\n",
     "                    \"type\": \"string\",\n",
     "                    \"description\": \"The city and state, e.g. San Francisco, CA\",\n",
     "                },\n",
+    "                \"unit\": {\"type\": \"string\", \"enum\": [\"celsius\", \"fahrenheit\"]},\n",
     "            },\n",
-    "            \"required\": [\"city_name\"],\n",
+    "            \"required\": [\"location\"],\n",
     "        },\n",
     "    }\n",
     "]"
@@ -288,7 +197,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 10,
    "id": "981df8fe-cfcf-455a-919e-dda36f3b5dfb",
    "metadata": {
     "test_replace": {
@@ -303,7 +212,7 @@
     "model_path = \"Qwen2-7B-Instruct-ov\"\n",
     "\n",
     "if not Path(model_path).exists():\n",
-    "    !optimum-cli export openvino --model {model_id} --task text-generation-with-past --trust-remote-code --weight-format int4 {model_path}"
+    "    !optimum-cli export openvino --model {model_id} --task text-generation-with-past --trust-remote-code --weight-format int4 --ratio 0.72 {model_path}"
    ]
   },
   {
@@ -326,7 +235,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "851fc266add14641961d0483cc65718c",
+       "model_id": "f981d582698c4400ac8997e9108c5558",
        "version_major": 2,
        "version_minor": 0
       },
@@ -413,7 +322,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "id": "ee406254",
    "metadata": {},
    "outputs": [],
@@ -464,7 +373,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 14,
    "id": "4799540b-eee0-491f-a5b6-5bae68c22af9",
    "metadata": {},
    "outputs": [
@@ -475,17 +384,15 @@
       "# User question:\n",
       "[{'role': 'user', 'content': \"What's the weather like in San Francisco?\"}]\n",
       "# Assistant Response 1:\n",
-      "{'role': 'assistant', 'content': '', 'function_call': {'name': 'get_current_weather', 'arguments': '{\"city_name\": \"San Francisco, CA\"}'}}\n",
+      "[{'role': 'assistant', 'content': '', 'function_call': {'name': 'get_current_weather', 'arguments': '{\"location\": \"San Francisco, CA\"}'}}]\n",
       "# Function Response:\n",
-      "{'current_condition': {'temp_C': '18', 'FeelsLikeC': '18', 'humidity': '72', 'weatherDesc': [{'value': 'Clear'}], 'observation_time': '07:58 AM'}}\n",
+      "{\"location\": \"San Francisco\", \"temperature\": \"72\", \"unit\": \"fahrenheit\"}\n",
       "# Assistant Response 2:\n",
-      "{'role': 'assistant', 'content': 'The current weather in San Francisco is clear with a temperature of 18°C and a humidity level of 72%. The time of this observation was 07:58 AM.'}\n"
+      "[{'role': 'assistant', 'content': 'The current weather in San Francisco is 72 degrees Fahrenheit.'}]\n"
      ]
     }
    ],
    "source": [
-    "import json\n",
-    "\n",
     "print(\"# User question:\")\n",
     "messages = [{\"role\": \"user\", \"content\": \"What's the weather like in San Francisco?\"}]\n",
     "print(messages)\n",
@@ -494,14 +401,14 @@
     "responses = []\n",
     "\n",
     "# Step 1: Role `user` sending the request\n",
-    "for responses in llm.chat(\n",
+    "responses = llm.chat(\n",
     "    messages=messages,\n",
     "    functions=functions,\n",
     "    stream=False,\n",
-    "):\n",
-    "    print(responses)\n",
+    ")\n",
+    "print(responses)\n",
     "\n",
-    "messages.append(responses)\n",
+    "messages.extend(responses)\n",
     "\n",
     "# Step 2: check if the model wanted to call a function, and call the function if needed\n",
     "last_response = messages[-1]\n",
@@ -513,7 +420,7 @@
     "    function_to_call = available_functions[function_name]\n",
     "    function_args = json.loads(last_response[\"function_call\"][\"arguments\"])\n",
     "    function_response = function_to_call(\n",
-    "        city_name=function_args.get(\"city_name\"),\n",
+    "        location=function_args.get(\"location\"),\n",
     "    )\n",
     "    print(\"# Function Response:\")\n",
     "    print(function_response)\n",
@@ -529,12 +436,12 @@
     "\n",
     "    print(\"# Assistant Response 2:\")\n",
     "    # Step 4: Consolidate the observation from function into final response\n",
-    "    for responses in llm.chat(\n",
+    "    responses = llm.chat(\n",
     "        messages=messages,\n",
     "        functions=functions,\n",
     "        stream=False,\n",
-    "    ):  # get a new response from the model where it can see the function response\n",
-    "        print(responses)"
+    "    )\n",
+    "    print(responses)"
    ]
   },
   {
@@ -580,6 +487,7 @@
    "source": [
     "import urllib.parse\n",
     "import json5\n",
+    "import requests\n",
     "from qwen_agent.tools.base import BaseTool, register_tool\n",
     "\n",
     "\n",