diff --git a/.ci/skipped_notebooks.yml b/.ci/skipped_notebooks.yml index 3b2a94ff544..6e9b9ae9d61 100644 --- a/.ci/skipped_notebooks.yml +++ b/.ci/skipped_notebooks.yml @@ -585,10 +585,3 @@ - os: - macos-12 - windows-2019 -- notebook: notebooks/whisper-asr-genai/whisper-asr-genai.ipynb - skips: - - python: - - '3.8' - - '3.9' - - os: - - macos-12 diff --git a/notebooks/whisper-asr-genai/README.md b/notebooks/whisper-asr-genai/README.md index 3812a60fe5b..1ea38b3abb4 100644 --- a/notebooks/whisper-asr-genai/README.md +++ b/notebooks/whisper-asr-genai/README.md @@ -14,7 +14,10 @@ The tutorial consists of following steps: 3. Convert the model using OpenVINO Integration with HuggingFace Optimum. 4. Run the model using Generate API. 5. Compare the performance of PyTorch and the OpenVINO model. -6. Launch an interactive demo for speech recognition +6. Quantize the OpenVINO model with NNCF. +7. Check quantized model result for the demo video. +8. Compare model size, performance and accuracy of original and quantized models. +9. Launch an interactive demo for speech recognition ## Installation Instructions diff --git a/notebooks/whisper-asr-genai/whisper-asr-genai.ipynb b/notebooks/whisper-asr-genai/whisper-asr-genai.ipynb index 5ca282320c4..24517286c8c 100644 --- a/notebooks/whisper-asr-genai/whisper-asr-genai.ipynb +++ b/notebooks/whisper-asr-genai/whisper-asr-genai.ipynb @@ -64,7 +64,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 1, "id": "bb9fc7f3-cea0-4adf-9ee6-4a3d15931db7", "metadata": { "ExecuteTime": { @@ -79,6 +79,22 @@ "text": [ "Note: you may need to restart the kernel to use updated packages.\n", "Note: you may need to restart the kernel to use updated packages.\n", + "Looking in indexes: https://pypi.org/simple, https://storage.openvinotoolkit.org/simple/wheels/nightly\n", + "Requirement already satisfied: openvino in /home/labuser/work/notebook/genai_whisper/lib/python3.10/site-packages (2024.5.0.dev20240919)\n", + "Collecting openvino\n", + " Using cached https://storage.openvinotoolkit.org/wheels/nightly/openvino/openvino-2024.5.0.dev20240927-16853-cp310-cp310-manylinux2014_x86_64.whl (43.1 MB)\n", + "Requirement already satisfied: openvino-tokenizers in /home/labuser/work/notebook/genai_whisper/lib/python3.10/site-packages (2024.5.0.0)\n", + "Requirement already satisfied: openvino-genai in /home/labuser/work/notebook/genai_whisper/lib/python3.10/site-packages (2024.5.0.0)\n", + "Requirement already satisfied: packaging in /home/labuser/work/notebook/genai_whisper/lib/python3.10/site-packages (from openvino) (24.1)\n", + "Requirement already satisfied: numpy<2.1.0,>=1.16.6 in /home/labuser/work/notebook/genai_whisper/lib/python3.10/site-packages (from openvino) (1.26.4)\n", + "Requirement already satisfied: openvino-telemetry>=2023.2.1 in /home/labuser/work/notebook/genai_whisper/lib/python3.10/site-packages (from openvino) (2024.1.0)\n", + "Installing collected packages: openvino\n", + " Attempting uninstall: openvino\n", + " Found existing installation: openvino 2024.5.0.dev20240919\n", + " Uninstalling openvino-2024.5.0.dev20240919:\n", + " Successfully uninstalled openvino-2024.5.0.dev20240919\n", + "Successfully installed openvino-2024.5.0.dev20240927\n", + "Note: you may need to restart the kernel to use updated packages.\n", "Note: you may need to restart the kernel to use updated packages.\n", "Note: you may need to restart the kernel to use updated packages.\n" ] @@ -87,8 +103,8 @@ "source": [ "%pip install -q \"transformers>=4.35\" \"torch>=2.3\" \"torchvision>=0.18.1\" \"onnx>=1.16.1\" --extra-index-url https://download.pytorch.org/whl/cpu\n", "%pip install -q \"git+https://github.com/huggingface/optimum-intel.git\"\n", - "%pip install --pre -U openvino openvino-tokenizers openvino-genai --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly\n", - "%pip install -q datasets \"gradio>=4.0\" \"librosa\" \"soundfile\"\n", + "%pip install -q --pre -U \"openvino\" \"openvino-tokenizers\" \"openvino-genai\" --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly\n", + "%pip install -q datasets \"gradio>=4.0\" \"soundfile>=0.12\" \"librosa\" \"python-ffmpeg<=1.0.16\"\n", "%pip install -q \"nncf>=2.13.0\" \"jiwer\"" ] }, @@ -139,7 +155,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "2f3c906b0aae4f5c92d8d51be2ac7abb", + "model_id": "c5035bf98145426fbc8f05edc8d7924a", "version_major": 2, "version_minor": 0 }, @@ -192,7 +208,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "f06648d4e8df483889894f39e1683a3c", + "model_id": "3209303207294a53b9b41ac90241982c", "version_major": 2, "version_minor": 0 }, @@ -218,7 +234,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 4, "id": "e5382431-497e-4688-b4ec-8958a92163e7", "metadata": { "ExecuteTime": { @@ -258,7 +274,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 5, "id": "620020a6", "metadata": {}, "outputs": [ @@ -275,7 +291,7 @@ "PosixPath('/home/labuser/work/notebook/openvino_notebooks/notebooks/whisper-asr-genai/data/librispeech_asr_demo_validation_short.wav')" ] }, - "execution_count": 29, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -295,7 +311,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 6, "id": "61558185", "metadata": {}, "outputs": [], @@ -317,7 +333,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 7, "id": "218836e9", "metadata": {}, "outputs": [ @@ -338,6 +354,14 @@ "metadata": {}, "output_type": "display_data" }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/labuser/work/notebook/genai_whisper/lib/python3.10/site-packages/transformers/models/whisper/generation_whisper.py:496: FutureWarning: The input name `inputs` is deprecated. Please make sure to use `input_features` instead.\n", + " warnings.warn(\n" + ] + }, { "name": "stdout", "output_type": "stream", @@ -370,22 +394,22 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 8, "id": "9c55f94d", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "ca8a0245d45f44daa925d9e6cca30386", + "model_id": "88e7bfa5930f4d4ab70361207e3011ef", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "Dropdown(description='Dataset language:', options=('german', 'dutch', 'french', 'spanish', 'italian', 'portugu…" + "Dropdown(description='Dataset language:', index=4, options=('japanese', 'dutch', 'french', 'spanish', 'italian…" ] }, - "execution_count": 32, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -393,13 +417,13 @@ "source": [ "import ipywidgets as widgets\n", "\n", - "languages = {\"german\": \"<|de|>\", \"dutch\": \"<|da|>\", \"french\": \"<|fr|>\", \"spanish\": \"<|es|>\", \"italian\": \"<|it|>\", \"portuguese\": \"<|pt|>\", \"polish\": \"<|pl|>\"}\n", + "languages = {\"japanese\": \"ja_jp\", \"dutch\": \"da_dk\", \"french\": \"fr_fr\", \"spanish\": \"ca_es\", \"italian\": \"it_it\", \"portuguese\": \"pt_br\", \"polish\": \"pl_pl\"}\n", "\n", "SAMPLE_LANG = None\n", "if model_type.value == \"Multilingual models\":\n", " SAMPLE_LANG = widgets.Dropdown(\n", " options=languages.keys(),\n", - " value=\"german\",\n", + " value=\"italian\",\n", " description=\"Dataset language:\",\n", " disabled=False,\n", " )\n", @@ -409,52 +433,23 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 9, "id": "515bec62", "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "116dddc67cd64780b793bc063108e346", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Resolving data files: 0%| | 0/48 [00:00\n", - " \n", + " \n", " Your browser does not support the audio element.\n", " \n", " " @@ -479,9 +474,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "Reference: denken sie soeben weilten meine gedanken bei ihnen in adelaide und ich wünschte mir sie herzaubern zu können nun der zauber ist gelungen lachte münchhausen da bin ich und was mich herführt\n", + "Reference: Il blog è uno strumento che si prefigge di incoraggiare la collaborazione e sviluppare l'apprendimento degli studenti ben oltre la giornata scolastica normale.\n", "\n", - "Result: Think, my thoughts just now remained with you in Adelaide, and I wished to be able to enchant you. — Well, the enchantment has been made, laughed Munchhausen, there I am, and what leads me here?\n" + "Result: The blog is our tool that is prefilled to encourage collaboration and develop the learning of the students and to attract a normal school class.\n" ] } ], @@ -490,7 +485,7 @@ " sample = copy.deepcopy(mls_example[\"audio\"])\n", "\n", " display(ipd.Audio(sample[\"array\"], rate=sample[\"sampling_rate\"]))\n", - " print(f\"Reference: {mls_example['transcript']}\")\n", + " print(f\"Reference: {mls_example['raw_transcription']}\")\n", "\n", " pt_result = pipe_pt(sample, generate_kwargs={\"task\": \"translate\"})\n", " print(f\"\\nResult: {pt_result['text']}\")" @@ -518,10 +513,17 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 11, "id": "36f756d5", "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, onnx, openvino\n" + ] + }, { "data": { "text/markdown": [ @@ -537,7 +539,7 @@ { "data": { "text/markdown": [ - "`optimum-cli export openvino --model openai/whisper-large-v2 --library transformers --task automatic-speech-recognition-with-past --framework pt whisper-large-v2`" + "`optimum-cli export openvino --model openai/whisper-tiny --library transformers --task automatic-speech-recognition-with-past --framework pt whisper-tiny`" ], "text/plain": [ "" @@ -605,14 +607,14 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 12, "id": "49665de3", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "946f69f6f5f74f619906740ecfe12595", + "model_id": "85a51314434140b0b86368a2160bb2ff", "version_major": 2, "version_minor": 0 }, @@ -620,7 +622,7 @@ "Dropdown(description='Device:', options=('CPU', 'AUTO'), value='CPU')" ] }, - "execution_count": 36, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -628,14 +630,14 @@ "source": [ "from notebook_utils import device_widget\n", "\n", - "device = device_widget(default=\"CPU\")\n", + "device = device_widget(default=\"CPU\", exclude=[\"NPU\"])\n", "\n", "device" ] }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 13, "id": "e2896f4c", "metadata": {}, "outputs": [], @@ -655,7 +657,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 14, "id": "a8ad6087", "metadata": {}, "outputs": [ @@ -704,7 +706,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 15, "id": "4d821d5e", "metadata": {}, "outputs": [ @@ -713,7 +715,7 @@ "text/html": [ "\n", " \n", " " @@ -729,20 +731,30 @@ "name": "stdout", "output_type": "stream", "text": [ - "Reference: denken sie soeben weilten meine gedanken bei ihnen in adelaide und ich wünschte mir sie herzaubern zu können nun der zauber ist gelungen lachte münchhausen da bin ich und was mich herführt\n", + "Reference: Il blog è uno strumento che si prefigge di incoraggiare la collaborazione e sviluppare l'apprendimento degli studenti ben oltre la giornata scolastica normale.\n", "\n", - "Result: Think, my thoughts just now remained with you in Adelaide, and I wished to be able to enchant you. — Well, the enchantment has been made, laughed Munchhausen, there I am, and what leads me here?\n" + "Result: The blog is our tool that is prefilled to encourage collaboration and develop the learning of the students and to attract a normal school class.\n" ] } ], "source": [ + "languages_genai = {\n", + " \"japanese\": \"<|ja|>\",\n", + " \"dutch\": \"<|da|>\",\n", + " \"french\": \"<|fr|>\",\n", + " \"spanish\": \"<|es|>\",\n", + " \"italian\": \"<|it|>\",\n", + " \"portuguese\": \"<|pt|>\",\n", + " \"polish\": \"<|pl|>\",\n", + "}\n", + "\n", "if model_type.value == \"Multilingual models\":\n", " sample = copy.deepcopy(mls_example[\"audio\"])\n", "\n", - " genai_result_ml = ov_pipe.generate(sample[\"array\"], max_new_tokens=100, task=\"translate\", language=languages[SAMPLE_LANG.value])\n", + " genai_result_ml = ov_pipe.generate(sample[\"array\"], max_new_tokens=100, task=\"translate\", language=languages_genai[SAMPLE_LANG.value])\n", "\n", " display(ipd.Audio(sample[\"array\"], rate=sample[\"sampling_rate\"]))\n", - " print(f\"Reference: {mls_example['transcript']}\")\n", + " print(f\"Reference: {mls_example['raw_transcription']}\")\n", " print(f\"\\nResult: {genai_result_ml}\")" ] }, @@ -758,7 +770,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 16, "id": "6ddafe5c-3238-40d3-b8ed-9d50c73f0d8a", "metadata": { "ExecuteTime": { @@ -789,7 +801,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 17, "id": "94025726-5c09-42b8-9046-9fbbe73afc47", "metadata": { "ExecuteTime": { @@ -801,7 +813,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "548f6426c3a04d0494ffaa42cb2e371a", + "model_id": "e3457f845611449794cd97a9c91c03fe", "version_major": 2, "version_minor": 0 }, @@ -815,7 +827,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "6ecf7f3fb6d541cb969c9018749db7a8", + "model_id": "4df786e07f43403a96622ac0048fd7a8", "version_major": 2, "version_minor": 0 }, @@ -834,7 +846,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 18, "id": "31a08241-497e-4fd9-9ca9-d59c2602b8d4", "metadata": { "ExecuteTime": { @@ -847,9 +859,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "Mean torch openai/whisper-large-v2 generation time: 6.086s\n", - "Mean openvino openai/whisper-large-v2 generation time: 3.980s\n", - "Performance openai/whisper-large-v2 openvino speedup: 1.529\n" + "Mean torch openai/whisper-tiny generation time: 0.291s\n", + "Mean openvino openai/whisper-tiny generation time: 0.159s\n", + "Performance openai/whisper-tiny openvino speedup: 1.832\n" ] } ], @@ -882,14 +894,14 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 19, "id": "00597544", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "0ccae748f8a644e2a7de82e0f2f74e78", + "model_id": "76d00c1fe75f4b4aa8ecbfa18e80b295", "version_major": 2, "version_minor": 0 }, @@ -897,7 +909,7 @@ "Checkbox(value=True, description='Quantization')" ] }, - "execution_count": 43, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -912,19 +924,10 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 20, "id": "ead4ab0b", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The skip_kernel_extension extension is already loaded. To reload it, use:\n", - " %reload_ext skip_kernel_extension\n" - ] - } - ], + "outputs": [], "source": [ "# Fetch `skip_kernel_extension` module\n", "import requests\n", @@ -964,7 +967,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 21, "id": "c9c4ee71", "metadata": {}, "outputs": [ @@ -998,7 +1001,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 22, "id": "97e0015d", "metadata": {}, "outputs": [], @@ -1049,7 +1052,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": null, "id": "233c1436", "metadata": {}, "outputs": [], @@ -1074,7 +1077,7 @@ "\n", "\n", "CALIBRATION_DATASET_SIZE = 30\n", - "quantized_model_path = Path(f\"{model_path}_quantized\")\n", + "quantized_model_path = Path(f\"{model_path}-quantized\")\n", "\n", "\n", "def quantize(ov_model: OVModelForSpeechSeq2Seq, calibration_dataset_size: int):\n", @@ -1089,7 +1092,7 @@ " subset_size=len(encoder_calibration_data),\n", " model_type=nncf.ModelType.TRANSFORMER,\n", " # Smooth Quant algorithm reduces activation quantization error; optimal alpha value was obtained through grid search\n", - " advanced_parameters=nncf.AdvancedQuantizationParameters(smooth_quant_alpha=0.50)\n", + " advanced_parameters=nncf.AdvancedQuantizationParameters(smooth_quant_alpha=0.80)\n", " )\n", " ov.save_model(quantized_encoder, quantized_model_path / \"openvino_encoder_model.xml\")\n", " del quantized_encoder\n", @@ -1148,7 +1151,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 24, "id": "fae6280a", "metadata": {}, "outputs": [ @@ -1174,7 +1177,7 @@ "output_type": "stream", "text": [ "Original : Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel.\n", - "Quantized: Mr Quilter is the apostle of the middle classes, and we are glad to welcome his gospel.\n" + "Quantized: Mr Quilter is the apostle of the middle classes and we are glad to welcome his gospel.\n" ] } ], @@ -1207,14 +1210,14 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 25, "id": "e61446fa", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "b7e553f342b74de5ab3271c8f1d63756", + "model_id": "0662e84788804369927b0648a2be3b53", "version_major": 2, "version_minor": 0 }, @@ -1228,7 +1231,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "cdb75aff26144b00b65a70f664712337", + "model_id": "5ae11e7aea904de89ee5bcd4bd96c65c", "version_major": 2, "version_minor": 0 }, @@ -1240,19 +1243,12 @@ "output_type": "display_data" }, { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[49], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mget_ipython\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_cell_magic\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mskip\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mnot $to_quantize.value\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;124;43mimport time\u001b[39;49m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;124;43mfrom contextlib import contextmanager\u001b[39;49m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;124;43mfrom jiwer import wer, wer_standardize\u001b[39;49m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;124;43mTEST_DATASET_SIZE = 50\u001b[39;49m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;124;43mdef calculate_transcription_time_and_accuracy(ov_model, test_samples):\u001b[39;49m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;124;43m whole_infer_times = []\u001b[39;49m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;124;43m ground_truths = []\u001b[39;49m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;124;43m predictions = []\u001b[39;49m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;124;43m for data_item in tqdm(test_samples, desc=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mMeasuring performance and accuracy\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m):\u001b[39;49m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;124;43m start_time = time.perf_counter()\u001b[39;49m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;124;43m transcription = ov_model.generate(data_item[\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43maudio\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m][\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43marray\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m])\u001b[39;49m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;124;43m end_time = time.perf_counter()\u001b[39;49m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;124;43m whole_infer_times.append(end_time - start_time)\u001b[39;49m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;124;43m ground_truths.append(data_item[\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtext\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m])\u001b[39;49m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;124;43m predictions.append(transcription.texts[0])\u001b[39;49m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;124;43m word_accuracy = (1 - wer(ground_truths, predictions, reference_transform=wer_standardize,\u001b[39;49m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;124;43m hypothesis_transform=wer_standardize)) * 100\u001b[39;49m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;124;43m mean_whole_infer_time = sum(whole_infer_times)\u001b[39;49m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;124;43m return word_accuracy, mean_whole_infer_time\u001b[39;49m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;124;43mtest_dataset = load_dataset(\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mopenslr/librispeech_asr\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m, \u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mclean\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m, split=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtest\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m, streaming=True, trust_remote_code=True)\u001b[39;49m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;124;43mtest_dataset = test_dataset.shuffle(seed=42).take(TEST_DATASET_SIZE)\u001b[39;49m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;124;43mtest_samples = [sample for sample in test_dataset]\u001b[39;49m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;124;43maccuracy_original, times_original = calculate_transcription_time_and_accuracy(ov_pipe, test_samples)\u001b[39;49m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;124;43maccuracy_quantized, times_quantized = calculate_transcription_time_and_accuracy(ov_quantized_pipe, test_samples)\u001b[39;49m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;124;43mprint(f\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mWhole pipeline performance speedup: \u001b[39;49m\u001b[38;5;124;43m{\u001b[39;49m\u001b[38;5;124;43mtimes_original / times_quantized:.3f}\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m)\u001b[39;49m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;124;43mprint(f\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mWhisper transcription word accuracy. Original model: \u001b[39;49m\u001b[38;5;132;43;01m{accuracy_original:.2f}\u001b[39;49;00m\u001b[38;5;124;43m%\u001b[39;49m\u001b[38;5;124;43m. Quantized model: \u001b[39;49m\u001b[38;5;132;43;01m{accuracy_quantized:.2f}\u001b[39;49;00m\u001b[38;5;124;43m%\u001b[39;49m\u001b[38;5;124;43m.\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m)\u001b[39;49m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;124;43mprint(f\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mAccuracy drop: \u001b[39;49m\u001b[38;5;124;43m{\u001b[39;49m\u001b[38;5;124;43maccuracy_original - accuracy_quantized:.2f}\u001b[39;49m\u001b[38;5;124;43m%\u001b[39;49m\u001b[38;5;124;43m.\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m)\u001b[39;49m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/work/notebook/genai_whisper/lib/python3.10/site-packages/IPython/core/interactiveshell.py:2541\u001b[0m, in \u001b[0;36mInteractiveShell.run_cell_magic\u001b[0;34m(self, magic_name, line, cell)\u001b[0m\n\u001b[1;32m 2539\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbuiltin_trap:\n\u001b[1;32m 2540\u001b[0m args \u001b[38;5;241m=\u001b[39m (magic_arg_s, cell)\n\u001b[0;32m-> 2541\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2543\u001b[0m \u001b[38;5;66;03m# The code below prevents the output from being displayed\u001b[39;00m\n\u001b[1;32m 2544\u001b[0m \u001b[38;5;66;03m# when using magics with decorator @output_can_be_silenced\u001b[39;00m\n\u001b[1;32m 2545\u001b[0m \u001b[38;5;66;03m# when the last Python token in the expression is a ';'.\u001b[39;00m\n\u001b[1;32m 2546\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(fn, magic\u001b[38;5;241m.\u001b[39mMAGIC_OUTPUT_CAN_BE_SILENCED, \u001b[38;5;28;01mFalse\u001b[39;00m):\n", - "File \u001b[0;32m~/work/notebook/openvino_notebooks/notebooks/whisper-asr-genai/skip_kernel_extension.py:9\u001b[0m, in \u001b[0;36mskip\u001b[0;34m(line, cell)\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28meval\u001b[39m(line):\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[0;32m----> 9\u001b[0m \u001b[43mget_ipython\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mex\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcell\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/work/notebook/genai_whisper/lib/python3.10/site-packages/IPython/core/interactiveshell.py:2878\u001b[0m, in \u001b[0;36mInteractiveShell.ex\u001b[0;34m(self, cmd)\u001b[0m\n\u001b[1;32m 2876\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Execute a normal python statement in user namespace.\"\"\"\u001b[39;00m\n\u001b[1;32m 2877\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbuiltin_trap:\n\u001b[0;32m-> 2878\u001b[0m \u001b[43mexec\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcmd\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43muser_global_ns\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43muser_ns\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m:34\u001b[0m\n", - "File \u001b[0;32m:17\u001b[0m, in \u001b[0;36mcalculate_transcription_time_and_accuracy\u001b[0;34m(ov_model, test_samples)\u001b[0m\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + "name": "stdout", + "output_type": "stream", + "text": [ + "Whole pipeline performance speedup: 1.339\n", + "Whisper transcription word accuracy. Original model: 82.88%. Quantized model: 84.13%.\n", + "Accuracy drop: -1.25%.\n" ] } ], @@ -1319,36 +1315,7 @@ }, "is_executing": true }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running on local URL: http://127.0.0.1:7860\n", - "\n", - "To create a public link, set `share=True` in `launch()`.\n" - ] - }, - { - "data": { - "text/html": [ - "
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Keyboard interruption in main thread... closing server.\n" - ] - } - ], + "outputs": [], "source": [ "import requests\n", "\n", @@ -1372,24 +1339,6 @@ "# demo.launch(server_name='your server name', server_port='server port in int')\n", "# Read more in the docs: https://gradio.app/docs/" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "63bf84b0", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "openai/whisper-tiny\n" - ] - } - ], - "source": [ - "print(model_id.value)" - ] } ], "metadata": {