From 4026937e497459716be46266d5dd47d62ba32c0a Mon Sep 17 00:00:00 2001 From: Aleksandr Mokrov Date: Thu, 19 Sep 2024 14:02:31 +0200 Subject: [PATCH 01/10] Update notebooks with new properties import --- notebooks/auto-device/auto-device.ipynb | 14 +- .../clip-language-saliency-map.ipynb | 3 +- .../cross-lingual-books-alignment.ipynb | 9 +- .../async_pipeline.py | 5 +- .../dolly-2-instruction-following.ipynb | 8 +- notebooks/florence2/gradio_helper.py | 1 - notebooks/florence2/ov_florence2_helper.py | 1 - notebooks/gpu-device/gpu-device.ipynb | 32 ++-- .../grounded-segment-anything.ipynb | 1 - notebooks/hello-npu/hello-npu.ipynb | 24 +-- notebooks/internvl2/gradio_helper.py | 1 - notebooks/internvl2/internvl2_helper.py | 5 - ...tent-consistency-models-optimum-demo.ipynb | 4 +- .../llm-agent-functioncall-qwen.ipynb | 13 +- .../llm-agent-rag-llamaindex.ipynb | 7 +- .../llm-agent-react-langchain.ipynb | 12 +- notebooks/llm-chatbot/gradio_helper_genai.py | 4 +- notebooks/llm-chatbot/llm-chatbot.ipynb | 7 +- .../llm-rag-langchain/llm-rag-langchain.ipynb | 7 +- .../llm-rag-llamaindex.ipynb | 7 +- notebooks/openvino-api/openvino-api.ipynb | 5 +- .../paddle-to-openvino-classification.ipynb | 5 +- notebooks/pixart/pixart.ipynb | 2 - .../pose-estimation.ipynb | 5 +- ...orch-post-training-quantization-nncf.ipynb | 5 +- .../pytorch-quantization-aware-training.ipynb | 5 +- ...quantization-sparsity-aware-training.ipynb | 5 +- .../pytorch-onnx-to-openvino.ipynb | 5 +- notebooks/qwen2-audio/gradio_helper.py | 1 - .../qwen2-audio/ov_qwen2_audio_helper.py | 1 - notebooks/qwen2-vl/gradio_helper.py | 3 - notebooks/qwen2-vl/ov_qwen2_vl.py | 2 - .../stable-diffusion-v2-optimum-demo.ipynb | 4 +- notebooks/stable-fast-3d/Untitled.ipynb | 150 ++++++++++++++++++ .../stable-video-diffusion.ipynb | 2 - ...nsorflow-quantization-aware-training.ipynb | 5 +- .../vision-monodepth/vision-monodepth.ipynb | 5 +- 37 files changed, 300 insertions(+), 75 deletions(-) create mode 100644 notebooks/stable-fast-3d/Untitled.ipynb diff --git a/notebooks/auto-device/auto-device.ipynb b/notebooks/auto-device/auto-device.ipynb index 18ab90edee5..a5d17ab0896 100644 --- a/notebooks/auto-device/auto-device.ipynb +++ b/notebooks/auto-device/auto-device.ipynb @@ -187,8 +187,11 @@ } ], "source": [ + "import openvino.properties.log as log\n", + "\n", + "\n", "# Set LOG_LEVEL to LOG_INFO.\n", - "core.set_property(\"AUTO\", {\"LOG_LEVEL\": \"LOG_INFO\"})\n", + "core.set_property(\"AUTO\", {log.level: log.Level.INFO})\n", "\n", "# Load the model onto the target device.\n", "compiled_model = core.compile_model(ov_model)\n", @@ -249,7 +252,7 @@ ], "source": [ "# Set LOG_LEVEL to LOG_NONE.\n", - "core.set_property(\"AUTO\", {\"LOG_LEVEL\": \"LOG_NONE\"})\n", + "core.set_property(\"AUTO\", {log.level: log.Level.NO})\n", "\n", "compiled_model = core.compile_model(model=ov_model, device_name=\"AUTO\")\n", "\n", @@ -611,12 +614,15 @@ } ], "source": [ + "import openvino.properties.hint as hints\n", + "\n", + "\n", "THROUGHPUT_hint_context = InferContext(metrics_update_interval, metrics_update_num)\n", "\n", "print(\"Compiling Model for AUTO device with THROUGHPUT hint\")\n", "sys.stdout.flush()\n", "\n", - "compiled_model = core.compile_model(model=ov_model, config={\"PERFORMANCE_HINT\": \"THROUGHPUT\"})\n", + "compiled_model = core.compile_model(model=ov_model, config={hints.performance_mode: hints.PerformanceMode.THROUGHPUT})\n", "\n", "infer_queue = ov.AsyncInferQueue(compiled_model, 0) # Setting to 0 will query optimal number by default.\n", "infer_queue.set_callback(completion_callback)\n", @@ -680,7 +686,7 @@ "print(\"Compiling Model for AUTO Device with LATENCY hint\")\n", "sys.stdout.flush()\n", "\n", - "compiled_model = core.compile_model(model=ov_model, config={\"PERFORMANCE_HINT\": \"LATENCY\"})\n", + "compiled_model = core.compile_model(model=ov_model, config={hints.performance_mode: hints.PerformanceMode.LATENCY})\n", "\n", "# Setting to 0 will query optimal number by default.\n", "infer_queue = ov.AsyncInferQueue(compiled_model, 0)\n", diff --git a/notebooks/clip-language-saliency-map/clip-language-saliency-map.ipynb b/notebooks/clip-language-saliency-map/clip-language-saliency-map.ipynb index 63206625bc0..088b5ec4e1e 100644 --- a/notebooks/clip-language-saliency-map/clip-language-saliency-map.ipynb +++ b/notebooks/clip-language-saliency-map/clip-language-saliency-map.ipynb @@ -759,6 +759,7 @@ "outputs": [], "source": [ "from typing import Dict, Any\n", + "import openvino.properties.hint as hints\n", "\n", "\n", "image_model = core.read_model(image_model_path)\n", @@ -766,7 +767,7 @@ "image_model = core.compile_model(\n", " model=image_model,\n", " device_name=device.value,\n", - " config={\"PERFORMANCE_HINT\": \"THROUGHPUT\"},\n", + " config={hints.performance_mode: hints.PerformanceMode.THROUGHPUT},\n", ")" ] }, diff --git a/notebooks/cross-lingual-books-alignment/cross-lingual-books-alignment.ipynb b/notebooks/cross-lingual-books-alignment/cross-lingual-books-alignment.ipynb index f2e1b5565af..622d75cdaa3 100644 --- a/notebooks/cross-lingual-books-alignment/cross-lingual-books-alignment.ipynb +++ b/notebooks/cross-lingual-books-alignment/cross-lingual-books-alignment.ipynb @@ -1105,11 +1105,13 @@ "source": [ "from typing import Any\n", "\n", + "import openvino.properties.hint as hints\n", + "\n", "\n", "compiled_throughput_hint = core.compile_model(\n", " ov_model,\n", " device_name=device.value,\n", - " config={\"PERFORMANCE_HINT\": \"THROUGHPUT\"},\n", + " config={hints.performance_mode: hints.PerformanceMode.THROUGHPUT},\n", ")" ] }, @@ -1323,7 +1325,10 @@ } ], "source": [ - "cpu_name = core.get_property(\"CPU\", \"FULL_DEVICE_NAME\")\n", + "import openvino.properties as props\n", + "\n", + "\n", + "cpu_name = core.get_property(\"CPU\", props.device.full_name)\n", "\n", "plot = sns.barplot(benchmark_dataframe, errorbar=\"sd\")\n", "plot.set(ylabel=\"Sentences Per Second\", title=f\"Sentence Embeddings Benchmark\\n{cpu_name}\")\n", diff --git a/notebooks/ct-segmentation-quantize/async_pipeline.py b/notebooks/ct-segmentation-quantize/async_pipeline.py index 2758a3deff0..89b23eccdad 100644 --- a/notebooks/ct-segmentation-quantize/async_pipeline.py +++ b/notebooks/ct-segmentation-quantize/async_pipeline.py @@ -24,6 +24,7 @@ import cv2 +import openvino.properties as props from custom_segmentation import Model @@ -169,7 +170,7 @@ def __init__(self, ie, model, plugin_config, device="CPU", max_num_requests=0): cache_path.mkdir(exist_ok=True) # Enable model caching for GPU devices if "GPU" in device and "GPU" in ie.available_devices: - ie.set_property(device_name="GPU", properties={"CACHE_DIR": str(cache_path)}) + ie.set_property(device_name="GPU", properties={props.cache_dir: str(cache_path)}) self.model = model self.logger = logging.getLogger() @@ -177,7 +178,7 @@ def __init__(self, ie, model, plugin_config, device="CPU", max_num_requests=0): self.logger.info("Loading network to {} plugin...".format(device)) self.exec_net = ie.compile_model(self.model.net, device, plugin_config) if max_num_requests == 0: - max_num_requests = self.exec_net.get_property("OPTIMAL_NUMBER_OF_INFER_REQUESTS") + 1 + max_num_requests = self.exec_net.get_property(props.optimal_number_of_infer_requests) + 1 self.requests = [self.exec_net.create_infer_request() for _ in range(max_num_requests)] self.empty_requests = deque(self.requests) self.completed_request_results = {} diff --git a/notebooks/dolly-2-instruction-following/dolly-2-instruction-following.ipynb b/notebooks/dolly-2-instruction-following/dolly-2-instruction-following.ipynb index 2a75c221ef9..55c69ef0842 100644 --- a/notebooks/dolly-2-instruction-following/dolly-2-instruction-following.ipynb +++ b/notebooks/dolly-2-instruction-following/dolly-2-instruction-following.ipynb @@ -564,9 +564,15 @@ ], "source": [ "from pathlib import Path\n", + "\n", "from transformers import AutoTokenizer\n", "from optimum.intel.openvino import OVModelForCausalLM\n", "\n", + "import openvino.properties as props\n", + "import openvino.properties.hint as hints\n", + "import openvino.properties.streams as streams\n", + "\n", + "\n", "if model_to_run.value == \"INT4\":\n", " model_dir = int4_model_dir\n", "elif model_to_run.value == \"INT8\":\n", @@ -579,7 +585,7 @@ "\n", "current_device = device.value\n", "\n", - "ov_config = {\"PERFORMANCE_HINT\": \"LATENCY\", \"NUM_STREAMS\": \"1\", \"CACHE_DIR\": \"\"}\n", + "ov_config = {hints.performance_mode: hints.PerformanceMode.LATENCY, streams.num: \"1\", props.cache_dir: \"\"}\n", "\n", "ov_model = OVModelForCausalLM.from_pretrained(model_dir, device=current_device, ov_config=ov_config)" ] diff --git a/notebooks/florence2/gradio_helper.py b/notebooks/florence2/gradio_helper.py index dc8e212270e..76e0b5484ae 100644 --- a/notebooks/florence2/gradio_helper.py +++ b/notebooks/florence2/gradio_helper.py @@ -63,7 +63,6 @@ def plot_bbox(image, data): def draw_polygons(image, prediction, fill_mask=False): - draw = ImageDraw.Draw(image) scale = 1 for polygons, label in zip(prediction["polygons"], prediction["labels"]): diff --git a/notebooks/florence2/ov_florence2_helper.py b/notebooks/florence2/ov_florence2_helper.py index f1209a3f4ae..01a3cc8ef70 100644 --- a/notebooks/florence2/ov_florence2_helper.py +++ b/notebooks/florence2/ov_florence2_helper.py @@ -353,7 +353,6 @@ def __init__(self, model_dir, device, ov_config=None) -> None: self.language_model = OVFlorence2LangModel(model_dir, self.config.text_config, device, ov_config) def generate(self, input_ids, inputs_embeds=None, pixel_values=None, **kwargs): - if inputs_embeds is None: # 1. Extra the input embeddings if input_ids is not None: diff --git a/notebooks/gpu-device/gpu-device.ipynb b/notebooks/gpu-device/gpu-device.ipynb index 56da2089a39..756f9d04144 100644 --- a/notebooks/gpu-device/gpu-device.ipynb +++ b/notebooks/gpu-device/gpu-device.ipynb @@ -256,9 +256,12 @@ } ], "source": [ + "import openvino.properties as props\n", + "\n", + "\n", "device = \"GPU\"\n", "\n", - "core.get_property(device, \"FULL_DEVICE_NAME\")" + "core.get_property(device, props.device.full_name)" ] }, { @@ -267,7 +270,7 @@ "id": "aac3129a-129f-49aa-aba0-71ae1e892ada", "metadata": {}, "source": [ - "Each device also has a specific property called `SUPPORTED_PROPERTIES`, that enables viewing all the available properties in the device. We can check the value for each property by simply looping through the dictionary returned by `core.get_property(\"GPU\", \"SUPPORTED_PROPERTIES\")` and then querying for that property." + "Each device also has a specific property called `SUPPORTED_PROPERTIES`, that enables viewing all the available properties in the device. We can check the value for each property by simply looping through the dictionary returned by `core.get_property(\"GPU\", props.supported_properties)` and then querying for that property." ] }, { @@ -321,7 +324,7 @@ ], "source": [ "print(f\"{device} SUPPORTED_PROPERTIES:\\n\")\n", - "supported_properties = core.get_property(device, \"SUPPORTED_PROPERTIES\")\n", + "supported_properties = core.get_property(device, props.supported_properties)\n", "indent = len(max(supported_properties, key=len))\n", "\n", "for property_key in supported_properties:\n", @@ -677,7 +680,7 @@ "core = ov.Core()\n", "\n", "# Set cache folder\n", - "core.set_property({\"CACHE_DIR\": cache_folder})\n", + "core.set_property({props.cache_dir: cache_folder})\n", "\n", "# Compile the model as before\n", "model = core.read_model(model=model_path)\n", @@ -717,7 +720,7 @@ "source": [ "start = time.time()\n", "core = ov.Core()\n", - "core.set_property({\"CACHE_DIR\": \"cache\"})\n", + "core.set_property({props.cache_dir: \"cache\"})\n", "model = core.read_model(model=model_path)\n", "compiled_model = core.compile_model(model, device)\n", "print(f\"Cache enabled - compile time: {time.time() - start}s\")\n", @@ -765,7 +768,7 @@ "id": "7077b662-22f3-4c52-9c80-e5ac1309c482", "metadata": {}, "source": [ - "To use the \"LATENCY\" performance hint, add `{\"PERFORMANCE_HINT\": \"LATENCY\"}` when compiling the model as shown below. For GPUs, this automatically minimizes the batch size and number of parallel streams such that all of the compute resources can focus on completing a single inference as fast as possible." + "To use the \"LATENCY\" performance hint, add `{hints.performance_mode: hints.PerformanceMode.LATENCY}` when compiling the model as shown below. For GPUs, this automatically minimizes the batch size and number of parallel streams such that all of the compute resources can focus on completing a single inference as fast as possible." ] }, { @@ -780,7 +783,10 @@ }, "outputs": [], "source": [ - "compiled_model = core.compile_model(model, device, {\"PERFORMANCE_HINT\": \"LATENCY\"})" + "import openvino.properties.hint as hints\n", + "\n", + "\n", + "compiled_model = core.compile_model(model, device, {hints.performance_mode: hints.PerformanceMode.LATENCY})" ] }, { @@ -789,7 +795,7 @@ "id": "06589f38-ce35-457f-8395-a4a3f6327ea0", "metadata": {}, "source": [ - "To use the \"THROUGHPUT\" performance hint, add `{\"PERFORMANCE_HINT\": \"THROUGHPUT\"}` when compiling the model. For GPUs, this creates multiple processing streams to efficiently utilize all the execution cores and optimizes the batch size to fill the available memory." + "To use the \"THROUGHPUT\" performance hint, add `{hints.performance_mode: hints.PerformanceMode.THROUGHPUT}` when compiling the model. For GPUs, this creates multiple processing streams to efficiently utilize all the execution cores and optimizes the batch size to fill the available memory." ] }, { @@ -804,7 +810,7 @@ }, "outputs": [], "source": [ - "compiled_model = core.compile_model(model, device, {\"PERFORMANCE_HINT\": \"THROUGHPUT\"})" + "compiled_model = core.compile_model(model, device, {hints.performance_mode: hints.PerformanceMode.THROUGHPUT})" ] }, { @@ -836,7 +842,9 @@ "Note that we always need to explicitly specify the device list for MULTI to work, otherwise MULTI does not know which devices are available for inference. However, this is not the only way to use multiple devices in OpenVINO. There is another performance hint called \"CUMULATIVE_THROUGHPUT\" that works similar to MULTI, except it uses the devices automatically selected by AUTO. This way, we do not need to manually specify devices to use. Below is an example showing how to use \"CUMULATIVE_THROUGHPUT\", equivalent to the MULTI one:\n", "\n", "`\n", - "compiled_model = core.compile_model(model=model, device_name=\"AUTO\", config={\"PERFORMANCE_HINT\": \"CUMULATIVE_THROUGHPUT\"})\n", + "\n", + "\n", + "compiled_model = core.compile_model(model=model, device_name=\"AUTO\", config={hints.performance_mode: hints.PerformanceMode.CUMULATIVE_THROUGHPUT})\n", "`\n", "\n", "> **Important**: **The “THROUGHPUT”, “MULTI”, and “CUMULATIVE_THROUGHPUT” modes are only applicable to asynchronous inferencing pipelines. The example at the end of this article shows how to set up an asynchronous pipeline that takes advantage of parallelism to increase throughput.** To learn more, see [Asynchronous Inferencing](https://docs.openvino.ai/2024/documentation/openvino-extensibility/openvino-plugin-library/asynch-inference-request.html) in OpenVINO as well as the [Asynchronous Inference notebook](../async-api/async-api.ipynb)." @@ -1584,7 +1592,7 @@ "# Read model and compile it on GPU in THROUGHPUT mode\n", "model = core.read_model(model=model_path)\n", "device_name = \"GPU\"\n", - "compiled_model = core.compile_model(model=model, device_name=device_name, config={\"PERFORMANCE_HINT\": \"THROUGHPUT\"})\n", + "compiled_model = core.compile_model(model=model, device_name=device_name, config={hints.performance_mode: hints.PerformanceMode.THROUGHPUT})\n", "\n", "# Get the input and output nodes\n", "input_layer = compiled_model.input(0)\n", @@ -1996,7 +2004,7 @@ " )\n", " cv2.putText(\n", " frame,\n", - " f\"hint {compiled_model.get_property('PERFORMANCE_HINT')}\",\n", + " f\"hint {compiled_model.get_property(hints.performance_mode)}\",\n", " (5, 60),\n", " cv2.FONT_ITALIC,\n", " 0.6,\n", diff --git a/notebooks/grounded-segment-anything/grounded-segment-anything.ipynb b/notebooks/grounded-segment-anything/grounded-segment-anything.ipynb index d20b33771db..d2be2507f8e 100644 --- a/notebooks/grounded-segment-anything/grounded-segment-anything.ipynb +++ b/notebooks/grounded-segment-anything/grounded-segment-anything.ipynb @@ -966,7 +966,6 @@ "outputs": [], "source": [ "def draw_mask(mask, draw, random_color=False):\n", - "\n", " if random_color:\n", " color = (\n", " np.random.randint(0, 255),\n", diff --git a/notebooks/hello-npu/hello-npu.ipynb b/notebooks/hello-npu/hello-npu.ipynb index db4640b7791..3bea3d27c51 100644 --- a/notebooks/hello-npu/hello-npu.ipynb +++ b/notebooks/hello-npu/hello-npu.ipynb @@ -196,9 +196,12 @@ } ], "source": [ + "import openvino.properties as props\n", + "\n", + "\n", "device = \"NPU\"\n", "\n", - "core.get_property(device, \"FULL_DEVICE_NAME\")" + "core.get_property(device, props.device.full_name)" ] }, { @@ -206,7 +209,7 @@ "id": "70889c34-74f8-4a7a-b23d-166311c7c02d", "metadata": {}, "source": [ - "Each device also has a specific property called ```SUPPORTED_PROPERTIES```, that enables viewing all the available properties in the device. We can check the value for each property by simply looping through the dictionary returned by ```core.get_property(\"NPU\", \"SUPPORTED_PROPERTIES\")``` and then querying for that property." + "Each device also has a specific property called ```SUPPORTED_PROPERTIES```, that enables viewing all the available properties in the device. We can check the value for each property by simply looping through the dictionary returned by ```core.get_property(\"NPU\", props.supported_properties)``` and then querying for that property." ] }, { @@ -217,7 +220,7 @@ "outputs": [], "source": [ "print(f\"{device} SUPPORTED_PROPERTIES:\\n\")\n", - "supported_properties = core.get_property(device, \"SUPPORTED_PROPERTIES\")\n", + "supported_properties = core.get_property(device, props.supported_properties)\n", "indent = len(max(supported_properties, key=len))\n", "\n", "for property_key in supported_properties:\n", @@ -527,7 +530,7 @@ "core = ov.Core()\n", "\n", "# Set cache folder\n", - "core.set_property({\"CACHE_DIR\": cache_folder})\n", + "core.set_property({props.cache_dir: cache_folder})\n", "\n", "# Compile the model\n", "model = core.read_model(model=model_path)\n", @@ -538,7 +541,7 @@ "core = ov.Core()\n", "\n", "# Set cache folder\n", - "core.set_property({\"CACHE_DIR\": cache_folder})\n", + "core.set_property({props.cache_dir: cache_folder})\n", "\n", "# Compile the model as before\n", "model = core.read_model(model=model_path)\n", @@ -606,7 +609,7 @@ "id": "1cccd1b5-4d5a-41f3-8d8a-4ee0bc235a9e", "metadata": {}, "source": [ - "To use the \"LATENCY\" performance hint, add `{\"PERFORMANCE_HINT\": \"LATENCY\"}` when compiling the model as shown below. For NPU, this automatically minimizes the batch size and number of parallel streams such that all of the compute resources can focus on completing a single inference as fast as possible." + "To use the \"LATENCY\" performance hint, add `{hints.performance_mode: hints.PerformanceMode.LATENCY}` when compiling the model as shown below. For NPU, this automatically minimizes the batch size and number of parallel streams such that all of the compute resources can focus on completing a single inference as fast as possible." ] }, { @@ -616,7 +619,10 @@ "metadata": {}, "outputs": [], "source": [ - "compiled_model = core.compile_model(model, device, {\"PERFORMANCE_HINT\": \"LATENCY\"})" + "import openvino.properties.hint as hints\n", + "\n", + "\n", + "compiled_model = core.compile_model(model, device, {hints.performance_mode: hints.PerformanceMode.LATENCY})" ] }, { @@ -624,7 +630,7 @@ "id": "7ca1f3d8-202c-4a98-85bc-b66110120dfb", "metadata": {}, "source": [ - "To use the \"THROUGHPUT\" performance hint, add `{\"PERFORMANCE_HINT\": \"THROUGHPUT\"}` when compiling the model. For NPUs, this creates multiple processing streams to efficiently utilize all the execution cores and optimizes the batch size to fill the available memory." + "To use the \"THROUGHPUT\" performance hint, add `{hints.performance_mode: hints.PerformanceMode.THROUGHPUT}` when compiling the model. For NPUs, this creates multiple processing streams to efficiently utilize all the execution cores and optimizes the batch size to fill the available memory." ] }, { @@ -634,7 +640,7 @@ "metadata": {}, "outputs": [], "source": [ - "compiled_model = core.compile_model(model, device, {\"PERFORMANCE_HINT\": \"THROUGHPUT\"})" + "compiled_model = core.compile_model(model, device, {hints.performance_mode: hints.PerformanceMode.THROUGHPUT})" ] }, { diff --git a/notebooks/internvl2/gradio_helper.py b/notebooks/internvl2/gradio_helper.py index e95307761a2..24414fe23a4 100644 --- a/notebooks/internvl2/gradio_helper.py +++ b/notebooks/internvl2/gradio_helper.py @@ -377,7 +377,6 @@ def bot( max_new_tokens, max_input_tiles, ): - streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) generation_config = { "num_beams": 1, diff --git a/notebooks/internvl2/internvl2_helper.py b/notebooks/internvl2/internvl2_helper.py index 96c72b5aeab..9ec48083177 100644 --- a/notebooks/internvl2/internvl2_helper.py +++ b/notebooks/internvl2/internvl2_helper.py @@ -254,7 +254,6 @@ def convert_internvl2_model(model_id, output_dir, quantization_config): print("✅ Input embedding model successfully converted") if not image_embed_path.exists(): - print("⌛ Convert Image embedding model") model.forward = model.extract_feature @@ -535,7 +534,6 @@ def __call__(self, *args, **kwargs): class OVInternVLChatModel: - def __init__(self, model_dir: Path, device: str): config = AutoConfig.from_pretrained(model_dir, trust_remote_code=True) image_size = config.force_image_size or config.vision_config.image_size @@ -564,7 +562,6 @@ def forward( past_key_values: Optional[List[torch.FloatTensor]] = None, use_cache: Optional[bool] = None, ) -> Union[Tuple, CausalLMOutputWithPast]: - image_flags = image_flags.squeeze(-1) input_embeds = self.language_model.embed_tokens(input_ids) @@ -678,7 +675,6 @@ def chat( IMG_CONTEXT_TOKEN="", verbose=False, ): - from conversation import get_conv_template if history is None and pixel_values is not None and "" not in question: @@ -740,7 +736,6 @@ def generate( return_dict: Optional[bool] = None, **generate_kwargs, ) -> torch.LongTensor: - assert self.img_context_token_id is not None if pixel_values is not None: if visual_features is not None: diff --git a/notebooks/latent-consistency-models-image-generation/latent-consistency-models-optimum-demo.ipynb b/notebooks/latent-consistency-models-image-generation/latent-consistency-models-optimum-demo.ipynb index 50f25a1d4bf..41b2904a6dd 100644 --- a/notebooks/latent-consistency-models-image-generation/latent-consistency-models-optimum-demo.ipynb +++ b/notebooks/latent-consistency-models-image-generation/latent-consistency-models-optimum-demo.ipynb @@ -116,12 +116,14 @@ ], "source": [ "import openvino as ov\n", + "import openvino.properties as props\n", + "\n", "\n", "core = ov.Core()\n", "devices = core.available_devices\n", "\n", "for device in devices:\n", - " device_name = core.get_property(device, \"FULL_DEVICE_NAME\")\n", + " device_name = core.get_property(device, props.device.full_name)\n", " print(f\"{device}: {device_name}\")" ] }, diff --git a/notebooks/llm-agent-functioncall/llm-agent-functioncall-qwen.ipynb b/notebooks/llm-agent-functioncall/llm-agent-functioncall-qwen.ipynb index 5a694cda366..82a3b3156c6 100644 --- a/notebooks/llm-agent-functioncall/llm-agent-functioncall-qwen.ipynb +++ b/notebooks/llm-agent-functioncall/llm-agent-functioncall-qwen.ipynb @@ -282,7 +282,12 @@ "source": [ "from qwen_agent.llm import get_chat_model\n", "\n", - "ov_config = {\"PERFORMANCE_HINT\": \"LATENCY\", \"NUM_STREAMS\": \"1\", \"CACHE_DIR\": \"\"}\n", + "import openvino.properties as props\n", + "import openvino.properties.hint as hints\n", + "import openvino.properties.streams as streams\n", + "\n", + "\n", + "ov_config = {hints.performance_mode: hints.PerformanceMode.LATENCY, streams.num: \"1\", props.cache_dir: \"\"}\n", "llm_cfg = {\n", " \"ov_model_dir\": model_path,\n", " \"model_type\": \"openvino\",\n", @@ -313,9 +318,9 @@ "ov_config = {\n", " \"KV_CACHE_PRECISION\": \"u8\",\n", " \"DYNAMIC_QUANTIZATION_GROUP_SIZE\": \"32\",\n", - " \"PERFORMANCE_HINT\": \"LATENCY\",\n", - " \"NUM_STREAMS\": \"1\",\n", - " \"CACHE_DIR\": \"\",\n", + " hints.performance_mode: hints.PerformanceMode.LATENCY,\n", + " streams.num: \"1\",\n", + " props.cache_dir: \"\",\n", "}" ] }, diff --git a/notebooks/llm-agent-react/llm-agent-rag-llamaindex.ipynb b/notebooks/llm-agent-react/llm-agent-rag-llamaindex.ipynb index 36456482115..93da34400d7 100644 --- a/notebooks/llm-agent-react/llm-agent-rag-llamaindex.ipynb +++ b/notebooks/llm-agent-react/llm-agent-rag-llamaindex.ipynb @@ -301,7 +301,12 @@ "source": [ "from llama_index.llms.openvino import OpenVINOLLM\n", "\n", - "ov_config = {\"PERFORMANCE_HINT\": \"LATENCY\", \"NUM_STREAMS\": \"1\", \"CACHE_DIR\": \"\"}\n", + "import openvino.properties as props\n", + "import openvino.properties.hint as hints\n", + "import openvino.properties.streams as streams\n", + "\n", + "\n", + "ov_config = {hints.performance_mode: hints.PerformanceMode.LATENCY, streams.num: \"1\", props.cache_dir: \"\"}\n", "\n", "\n", "def phi_completion_to_prompt(completion):\n", diff --git a/notebooks/llm-agent-react/llm-agent-react-langchain.ipynb b/notebooks/llm-agent-react/llm-agent-react-langchain.ipynb index 41b68752c99..a4505688a1f 100644 --- a/notebooks/llm-agent-react/llm-agent-react-langchain.ipynb +++ b/notebooks/llm-agent-react/llm-agent-react-langchain.ipynb @@ -380,6 +380,10 @@ "from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline\n", "from transformers.generation.stopping_criteria import StoppingCriteriaList, StoppingCriteria\n", "\n", + "import openvino.properties as props\n", + "import openvino.properties.hint as hints\n", + "import openvino.properties.streams as streams\n", + "\n", "\n", "class StopSequenceCriteria(StoppingCriteria):\n", " \"\"\"\n", @@ -403,7 +407,7 @@ " return any(decoded_output.endswith(stop_sequence) for stop_sequence in self.stop_sequences)\n", "\n", "\n", - "ov_config = {\"PERFORMANCE_HINT\": \"LATENCY\", \"NUM_STREAMS\": \"1\", \"CACHE_DIR\": \"\"}\n", + "ov_config = {hints.performance_mode: hints.PerformanceMode.LATENCY, streams.num: \"1\", props.cache_dir: \"\"}\n", "stop_tokens = [\"Observation:\"]\n", "\n", "ov_llm = HuggingFacePipeline.from_model_id(\n", @@ -442,9 +446,9 @@ "ov_config = {\n", " \"KV_CACHE_PRECISION\": \"u8\",\n", " \"DYNAMIC_QUANTIZATION_GROUP_SIZE\": \"32\",\n", - " \"PERFORMANCE_HINT\": \"LATENCY\",\n", - " \"NUM_STREAMS\": \"1\",\n", - " \"CACHE_DIR\": \"\",\n", + " hints.performance_mode: hints.PerformanceMode.LATENCY,\n", + " streams.num: \"1\",\n", + " props.cache_dir: \"\",\n", "}" ] }, diff --git a/notebooks/llm-chatbot/gradio_helper_genai.py b/notebooks/llm-chatbot/gradio_helper_genai.py index 07cdaad3e69..011e15fcc60 100644 --- a/notebooks/llm-chatbot/gradio_helper_genai.py +++ b/notebooks/llm-chatbot/gradio_helper_genai.py @@ -58,7 +58,9 @@ def get_system_prompt(model_language): return ( DEFAULT_SYSTEM_PROMPT_CHINESE if (model_language == "Chinese") - else DEFAULT_SYSTEM_PROMPT_JAPANESE if (model_language == "Japanese") else DEFAULT_SYSTEM_PROMPT + else DEFAULT_SYSTEM_PROMPT_JAPANESE + if (model_language == "Japanese") + else DEFAULT_SYSTEM_PROMPT ) diff --git a/notebooks/llm-chatbot/llm-chatbot.ipynb b/notebooks/llm-chatbot/llm-chatbot.ipynb index 6d53e7200df..4b41df4c0bb 100644 --- a/notebooks/llm-chatbot/llm-chatbot.ipynb +++ b/notebooks/llm-chatbot/llm-chatbot.ipynb @@ -928,6 +928,11 @@ "from transformers import AutoConfig, AutoTokenizer\n", "from optimum.intel.openvino import OVModelForCausalLM\n", "\n", + "import openvino.properties as props\n", + "import openvino.properties.hint as hints\n", + "import openvino.properties.streams as streams\n", + "\n", + "\n", "if model_to_run.value == \"INT4\":\n", " model_dir = int4_model_dir\n", "elif model_to_run.value == \"INT8\":\n", @@ -936,7 +941,7 @@ " model_dir = fp16_model_dir\n", "print(f\"Loading model from {model_dir}\")\n", "\n", - "ov_config = {\"PERFORMANCE_HINT\": \"LATENCY\", \"NUM_STREAMS\": \"1\", \"CACHE_DIR\": \"\"}\n", + "ov_config = {hints.performance_mode: hints.PerformanceMode.LATENCY, streams.num: \"1\", props.cache_dir: \"\"}\n", "\n", "if \"GPU\" in device.value and \"qwen2-7b-instruct\" in model_id.value:\n", " ov_config[\"GPU_ENABLE_SDPA_OPTIMIZATION\"] = \"NO\"\n", diff --git a/notebooks/llm-rag-langchain/llm-rag-langchain.ipynb b/notebooks/llm-rag-langchain/llm-rag-langchain.ipynb index cd90aaf5a2f..a3c7059801d 100644 --- a/notebooks/llm-rag-langchain/llm-rag-langchain.ipynb +++ b/notebooks/llm-rag-langchain/llm-rag-langchain.ipynb @@ -1293,6 +1293,11 @@ "source": [ "from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline\n", "\n", + "import openvino.properties as props\n", + "import openvino.properties.hint as hints\n", + "import openvino.properties.streams as streams\n", + "\n", + "\n", "if model_to_run.value == \"INT4\":\n", " model_dir = int4_model_dir\n", "elif model_to_run.value == \"INT8\":\n", @@ -1301,7 +1306,7 @@ " model_dir = fp16_model_dir\n", "print(f\"Loading model from {model_dir}\")\n", "\n", - "ov_config = {\"PERFORMANCE_HINT\": \"LATENCY\", \"NUM_STREAMS\": \"1\", \"CACHE_DIR\": \"\"}\n", + "ov_config = {hints.performance_mode: hints.PerformanceMode.LATENCY, streams.num: \"1\", props.cache_dir: \"\"}\n", "\n", "if \"GPU\" in llm_device.value and \"qwen2-7b-instruct\" in llm_model_id.value:\n", " ov_config[\"GPU_ENABLE_SDPA_OPTIMIZATION\"] = \"NO\"\n", diff --git a/notebooks/llm-rag-llamaindex/llm-rag-llamaindex.ipynb b/notebooks/llm-rag-llamaindex/llm-rag-llamaindex.ipynb index 524639e4dfd..18ddab2c2cc 100644 --- a/notebooks/llm-rag-llamaindex/llm-rag-llamaindex.ipynb +++ b/notebooks/llm-rag-llamaindex/llm-rag-llamaindex.ipynb @@ -1271,6 +1271,11 @@ "source": [ "from llama_index.llms.openvino import OpenVINOLLM\n", "\n", + "import openvino.properties as props\n", + "import openvino.properties.hint as hints\n", + "import openvino.properties.streams as streams\n", + "\n", + "\n", "if model_to_run.value == \"INT4\":\n", " model_dir = int4_model_dir\n", "elif model_to_run.value == \"INT8\":\n", @@ -1279,7 +1284,7 @@ " model_dir = fp16_model_dir\n", "print(f\"Loading model from {model_dir}\")\n", "\n", - "ov_config = {\"PERFORMANCE_HINT\": \"LATENCY\", \"NUM_STREAMS\": \"1\", \"CACHE_DIR\": \"\"}\n", + "ov_config = {hints.performance_mode: hints.PerformanceMode.LATENCY, streams.num: \"1\", props.cache_dir: \"\"}\n", "\n", "stop_tokens = llm_model_configuration.get(\"stop_tokens\")\n", "completion_to_prompt = llm_model_configuration.get(\"completion_to_prompt\")\n", diff --git a/notebooks/openvino-api/openvino-api.ipynb b/notebooks/openvino-api/openvino-api.ipynb index 2d0a205885f..de3558451f1 100644 --- a/notebooks/openvino-api/openvino-api.ipynb +++ b/notebooks/openvino-api/openvino-api.ipynb @@ -114,10 +114,13 @@ } ], "source": [ + "import openvino.properties as props\n", + "\n", + "\n", "devices = core.available_devices\n", "\n", "for device in devices:\n", - " device_name = core.get_property(device, \"FULL_DEVICE_NAME\")\n", + " device_name = core.get_property(device, props.device.full_name)\n", " print(f\"{device}: {device_name}\")" ] }, diff --git a/notebooks/paddle-to-openvino/paddle-to-openvino-classification.ipynb b/notebooks/paddle-to-openvino/paddle-to-openvino-classification.ipynb index e625c3e4722..ce5e7d74c0a 100644 --- a/notebooks/paddle-to-openvino/paddle-to-openvino-classification.ipynb +++ b/notebooks/paddle-to-openvino/paddle-to-openvino-classification.ipynb @@ -537,12 +537,15 @@ } ], "source": [ + "import openvino.properties as props\n", + "\n", + "\n", "# Show device information\n", "core = ov.Core()\n", "devices = core.available_devices\n", "\n", "for device_name in devices:\n", - " device_full_name = core.get_property(device_name, \"FULL_DEVICE_NAME\")\n", + " device_full_name = core.get_property(device_name, props.device.full_name)\n", " print(f\"{device_name}: {device_full_name}\")" ] }, diff --git a/notebooks/pixart/pixart.ipynb b/notebooks/pixart/pixart.ipynb index a9972fac7c4..71408c28740 100644 --- a/notebooks/pixart/pixart.ipynb +++ b/notebooks/pixart/pixart.ipynb @@ -247,7 +247,6 @@ " self.transformer = transformer\n", "\n", " def forward(self, hidden_states=None, timestep=None, encoder_hidden_states=None, encoder_attention_mask=None, resolution=None, aspect_ratio=None):\n", - "\n", " return self.transformer.forward(\n", " hidden_states,\n", " timestep=timestep,\n", @@ -289,7 +288,6 @@ "outputs": [], "source": [ "class VAEDecoderWrapper(torch.nn.Module):\n", - "\n", " def __init__(self, vae):\n", " super().__init__()\n", " self.vae = vae\n", diff --git a/notebooks/pose-estimation-webcam/pose-estimation.ipynb b/notebooks/pose-estimation-webcam/pose-estimation.ipynb index 15fb4502c88..7deee9fb68f 100644 --- a/notebooks/pose-estimation-webcam/pose-estimation.ipynb +++ b/notebooks/pose-estimation-webcam/pose-estimation.ipynb @@ -155,12 +155,15 @@ "metadata": {}, "outputs": [], "source": [ + "import openvino.properties.hint as hints\n", + "\n", + "\n", "# Initialize OpenVINO Runtime\n", "core = ov.Core()\n", "# Read the network from a file.\n", "model = core.read_model(model_path)\n", "# Let the AUTO device decide where to load the model (you can use CPU, GPU as well).\n", - "compiled_model = core.compile_model(model=model, device_name=device.value, config={\"PERFORMANCE_HINT\": \"LATENCY\"})\n", + "compiled_model = core.compile_model(model=model, device_name=device.value, config={hints.performance_mode: hints.PerformanceMode.LATENCY})\n", "\n", "# Get the input and output names of nodes.\n", "input_layer = compiled_model.input(0)\n", diff --git a/notebooks/pytorch-post-training-quantization-nncf/pytorch-post-training-quantization-nncf.ipynb b/notebooks/pytorch-post-training-quantization-nncf/pytorch-post-training-quantization-nncf.ipynb index 28fbfb28b3b..eb5ccbe456b 100644 --- a/notebooks/pytorch-post-training-quantization-nncf/pytorch-post-training-quantization-nncf.ipynb +++ b/notebooks/pytorch-post-training-quantization-nncf/pytorch-post-training-quantization-nncf.ipynb @@ -957,11 +957,14 @@ } ], "source": [ + "import openvino.properties as props\n", + "\n", + "\n", "core = ov.Core()\n", "devices = core.available_devices\n", "\n", "for device_name in devices:\n", - " device_full_name = core.get_property(device_name, \"FULL_DEVICE_NAME\")\n", + " device_full_name = core.get_property(device_name, props.device.full_name)\n", " print(f\"{device_name}: {device_full_name}\")" ] } diff --git a/notebooks/pytorch-quantization-aware-training/pytorch-quantization-aware-training.ipynb b/notebooks/pytorch-quantization-aware-training/pytorch-quantization-aware-training.ipynb index 59e074197a5..2d5acc6477d 100644 --- a/notebooks/pytorch-quantization-aware-training/pytorch-quantization-aware-training.ipynb +++ b/notebooks/pytorch-quantization-aware-training/pytorch-quantization-aware-training.ipynb @@ -1108,8 +1108,11 @@ } ], "source": [ + "import openvino.properties as props\n", + "\n", + "\n", "core = ov.Core()\n", - "core.get_property(device.value, \"FULL_DEVICE_NAME\")" + "core.get_property(device.value, props.device.full_name)" ] } ], diff --git a/notebooks/pytorch-quantization-sparsity-aware-training/pytorch-quantization-sparsity-aware-training.ipynb b/notebooks/pytorch-quantization-sparsity-aware-training/pytorch-quantization-sparsity-aware-training.ipynb index f5e3ad999a4..8d32cd866cb 100644 --- a/notebooks/pytorch-quantization-sparsity-aware-training/pytorch-quantization-sparsity-aware-training.ipynb +++ b/notebooks/pytorch-quantization-sparsity-aware-training/pytorch-quantization-sparsity-aware-training.ipynb @@ -707,7 +707,10 @@ }, "outputs": [], "source": [ - "core.get_property(device.value, \"FULL_DEVICE_NAME\")" + "import openvino.properties as props\n", + "\n", + "\n", + "core.get_property(device.value, props.device.full_name)" ] } ], diff --git a/notebooks/pytorch-to-openvino/pytorch-onnx-to-openvino.ipynb b/notebooks/pytorch-to-openvino/pytorch-onnx-to-openvino.ipynb index c8da17ee224..64bd4a1efe5 100644 --- a/notebooks/pytorch-to-openvino/pytorch-onnx-to-openvino.ipynb +++ b/notebooks/pytorch-to-openvino/pytorch-onnx-to-openvino.ipynb @@ -843,9 +843,12 @@ } ], "source": [ + "import openvino.properties as props\n", + "\n", + "\n", "devices = core.available_devices\n", "for device in devices:\n", - " device_name = core.get_property(device, \"FULL_DEVICE_NAME\")\n", + " device_name = core.get_property(device, props.device.full_name)\n", " print(f\"{device}: {device_name}\")" ] }, diff --git a/notebooks/qwen2-audio/gradio_helper.py b/notebooks/qwen2-audio/gradio_helper.py index 1e942e9f741..9ca1999859d 100644 --- a/notebooks/qwen2-audio/gradio_helper.py +++ b/notebooks/qwen2-audio/gradio_helper.py @@ -6,7 +6,6 @@ def make_demo(model, processor): - def add_text(chatbot, task_history, input): text_content = input.text content = [] diff --git a/notebooks/qwen2-audio/ov_qwen2_audio_helper.py b/notebooks/qwen2-audio/ov_qwen2_audio_helper.py index 2ad7a816ada..7eb353940b0 100644 --- a/notebooks/qwen2-audio/ov_qwen2_audio_helper.py +++ b/notebooks/qwen2-audio/ov_qwen2_audio_helper.py @@ -676,7 +676,6 @@ def forward( use_cache: bool = True, return_dict: bool = True, ) -> Union[Tuple, Qwen2AudioCausalLMOutputWithPast]: - if input_features is not None: input_features = input_features feature_attention_mask = feature_attention_mask diff --git a/notebooks/qwen2-vl/gradio_helper.py b/notebooks/qwen2-vl/gradio_helper.py index ee940ac33d9..1e93738a101 100644 --- a/notebooks/qwen2-vl/gradio_helper.py +++ b/notebooks/qwen2-vl/gradio_helper.py @@ -71,7 +71,6 @@ def transform_messages(original_messages): def make_demo(model, processor): def call_local_model(model, processor, messages): - messages = transform_messages(messages) text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) @@ -92,7 +91,6 @@ def call_local_model(model, processor, messages): yield generated_text def create_predict_fn(): - def predict(_chatbot, task_history): chat_query = _chatbot[-1][0] query = task_history[-1][0] @@ -131,7 +129,6 @@ def predict(_chatbot, task_history): return predict def create_regenerate_fn(): - def regenerate(_chatbot, task_history): if not task_history: return _chatbot diff --git a/notebooks/qwen2-vl/ov_qwen2_vl.py b/notebooks/qwen2-vl/ov_qwen2_vl.py index de578cdb5ee..e944c3b8284 100644 --- a/notebooks/qwen2-vl/ov_qwen2_vl.py +++ b/notebooks/qwen2-vl/ov_qwen2_vl.py @@ -252,7 +252,6 @@ def convert_qwen2vl_model(model_id, output_dir, quantization_config): print("✅ Input embedding model successfully converted") if not image_embed_path.exists() or not image_embed_merger_path.exists(): - print("⌛ Convert Image embedding model") vision_embed_tokens = model.visual @@ -269,7 +268,6 @@ def image_embed_forward(self, hidden_states: torch.Tensor, attention_mask: torch return self.merger(hidden_states) def sdpa_attn_forward(self, hidden_states: torch.Tensor, attention_mask: torch.Tensor, rotary_pos_emb: torch.Tensor = None) -> torch.Tensor: - from transformers.models.qwen2_vl.modeling_qwen2_vl import apply_rotary_pos_emb_vision seq_length = hidden_states.shape[0] diff --git a/notebooks/stable-diffusion-v2/stable-diffusion-v2-optimum-demo.ipynb b/notebooks/stable-diffusion-v2/stable-diffusion-v2-optimum-demo.ipynb index 8bb0265a9a5..d40345164c4 100644 --- a/notebooks/stable-diffusion-v2/stable-diffusion-v2-optimum-demo.ipynb +++ b/notebooks/stable-diffusion-v2/stable-diffusion-v2-optimum-demo.ipynb @@ -105,12 +105,14 @@ ], "source": [ "import openvino as ov\n", + "import openvino.properties as props\n", + "\n", "\n", "core = ov.Core()\n", "devices = core.available_devices\n", "\n", "for device in devices:\n", - " device_name = core.get_property(device, \"FULL_DEVICE_NAME\")\n", + " device_name = core.get_property(device, props.device.full_name)\n", " print(f\"{device}: {device_name}\")" ] }, diff --git a/notebooks/stable-fast-3d/Untitled.ipynb b/notebooks/stable-fast-3d/Untitled.ipynb new file mode 100644 index 00000000000..5cd333a486a --- /dev/null +++ b/notebooks/stable-fast-3d/Untitled.ipynb @@ -0,0 +1,150 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "b302116b-8f62-402a-ae59-dc1a2ab79536", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Cloning into 'TripoSR'...\n" + ] + } + ], + "source": [ + "import os\n", + "import sys\n", + "from pathlib import Path\n", + "\n", + "if not Path(\"TripoSR\").exists():\n", + " exit_code = os.system(\"git clone https://huggingface.co/spaces/stabilityai/TripoSR\")\n", + "\n", + " if exit_code != 0:\n", + " raise Exception(\"Failed to clone repository!\")\n", + "\n", + "sys.path.append(\"TripoSR\")" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "904d6fff-6dcf-4028-8516-8e4fda3d7749", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Cloning into 'TripoSR'...\n", + "fatal: unable to access 'https://huggingface.co/spaces/stabilityai/TripoSR/': Could not resolve proxy: proxy-dmz.intel.com\n" + ] + }, + { + "ename": "Exception", + "evalue": "Failed to clone repository!", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mException\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[17], line 8\u001b[0m\n\u001b[1;32m 5\u001b[0m exit_code \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39msystem(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mgit clone https://huggingface.co/spaces/stabilityai/TripoSR\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m exit_code \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m----> 8\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mFailed to clone repository!\u001b[39m\u001b[38;5;124m'\u001b[39m)\n", + "\u001b[0;31mException\u001b[0m: Failed to clone repository!" + ] + } + ], + "source": [ + "import os\n", + "import sys\n", + "\n", + "\n", + "exit_code = os.system(\"git clone https://huggingface.co/spaces/stabilityai/TripoSR\")\n", + "\n", + "if exit_code != 0:\n", + " raise Exception(\"Failed to clone repository!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "06a9ff20-9abc-44f2-9556-632eb4031ec7", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Cloning into 'TripoSR'...\n", + "fatal: unable to access 'https://huggingface.co/spaces/stabilityai/TripoSR/': Could not resolve proxy: proxy-dmz.intel.com\n" + ] + }, + { + "data": { + "text/plain": [ + "32768" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "os.system(\"git clone https://huggingface.co/spaces/stabilityai/TripoSR\")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "fa1fc652-135c-40a9-855a-932966f1d1af", + "metadata": {}, + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'trimesh'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[2], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtsr\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msystem\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m TSR\n", + "File \u001b[0;32m/mnt/c/Users/amokrov/PycharmProjects/default/my_openvino_notebooks/notebooks/stable-fast-3d/TripoSR/tsr/system.py:10\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mnn\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfunctional\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mF\u001b[39;00m\n\u001b[0;32m---> 10\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtrimesh\u001b[39;00m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01meinops\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m rearrange\n\u001b[1;32m 12\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mhuggingface_hub\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m hf_hub_download\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'trimesh'" + ] + } + ], + "source": [ + "from tsr.system import TSR" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6555adaa-b4f5-4d16-8d9c-e853b5fbf414", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/stable-video-diffusion/stable-video-diffusion.ipynb b/notebooks/stable-video-diffusion/stable-video-diffusion.ipynb index 91ce767b3ef..43e92786ca9 100644 --- a/notebooks/stable-video-diffusion/stable-video-diffusion.ipynb +++ b/notebooks/stable-video-diffusion/stable-video-diffusion.ipynb @@ -813,7 +813,6 @@ " ks = ks[0] + 1, ks[1]\n", "\n", " if (ks[1] % 2) == 0:\n", - "\n", " ks = ks[0], ks[1] + 1\n", "\n", " input = _gaussian_blur2d(input, ks, sigmas)\n", @@ -877,7 +876,6 @@ " x = (torch.arange(window_size, device=sigma.device, dtype=sigma.dtype) - window_size // 2).expand(batch_size, -1)\n", "\n", " if window_size % 2 == 0:\n", - "\n", " x = x + 0.5\n", "\n", " gauss = torch.exp(-x.pow(2.0) / (2 * sigma.pow(2.0)))\n", diff --git a/notebooks/tensorflow-quantization-aware-training/tensorflow-quantization-aware-training.ipynb b/notebooks/tensorflow-quantization-aware-training/tensorflow-quantization-aware-training.ipynb index d4f0ed79417..e918367756b 100644 --- a/notebooks/tensorflow-quantization-aware-training/tensorflow-quantization-aware-training.ipynb +++ b/notebooks/tensorflow-quantization-aware-training/tensorflow-quantization-aware-training.ipynb @@ -690,8 +690,11 @@ } ], "source": [ + "import openvino.properties as props\n", + "\n", + "\n", "core = ov.Core()\n", - "core.get_property(device.value, \"FULL_DEVICE_NAME\")" + "core.get_property(device.value, props.device.full_name)" ] } ], diff --git a/notebooks/vision-monodepth/vision-monodepth.ipynb b/notebooks/vision-monodepth/vision-monodepth.ipynb index 5710746ca09..b8b26f56bba 100644 --- a/notebooks/vision-monodepth/vision-monodepth.ipynb +++ b/notebooks/vision-monodepth/vision-monodepth.ipynb @@ -297,12 +297,15 @@ }, "outputs": [], "source": [ + "import openvino.properties as props\n", + "\n", + "\n", "# Create cache folder\n", "cache_folder = Path(\"cache\")\n", "cache_folder.mkdir(exist_ok=True)\n", "\n", "core = ov.Core()\n", - "core.set_property({\"CACHE_DIR\": cache_folder})\n", + "core.set_property({props.cache_dir: cache_folder})\n", "model = core.read_model(model_xml_path)\n", "compiled_model = core.compile_model(model=model, device_name=device.value)\n", "\n", From 96a8f4c3670f03ab489dd5f69580f9a3ee64ffac Mon Sep 17 00:00:00 2001 From: Aleksandr Mokrov Date: Thu, 19 Sep 2024 14:03:06 +0200 Subject: [PATCH 02/10] Update notebooks with new properties import --- .../notebooks/phi3_chatbot_demo.ipynb | 10 +++++++--- .../notebooks/phi3_rag_on_client.ipynb | 9 ++++++--- supplementary_materials/qwen2/chat.py | 6 +++++- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/supplementary_materials/notebooks/phi3_chatbot_demo.ipynb b/supplementary_materials/notebooks/phi3_chatbot_demo.ipynb index 9d118e82cfe..d2cd1f4b830 100644 --- a/supplementary_materials/notebooks/phi3_chatbot_demo.ipynb +++ b/supplementary_materials/notebooks/phi3_chatbot_demo.ipynb @@ -130,13 +130,17 @@ "metadata": {}, "outputs": [], "source": [ + "import openvino.properties as props\n", + "import openvino.properties.hint as hints\n", + "\n", + "\n", "# Load kwargs\n", "load_kwargs = {\n", " \"device\": device,\n", " \"ov_config\": {\n", - " \"PERFORMANCE_HINT\": \"LATENCY\",\n", - " \"INFERENCE_PRECISION_HINT\": precision,\n", - " \"CACHE_DIR\": os.path.join(save_name, \"model_cache\"), # OpenVINO will use this directory as cache\n", + " hints.performance_mode: hints.PerformanceMode.LATENCY,\n", + " hints.inference_precision: precision,\n", + " props.cache_dir: os.path.join(save_name, \"model_cache\"), # OpenVINO will use this directory as cache\n", " },\n", " \"compile\": False,\n", " \"quantization_config\": quantization_config,\n", diff --git a/supplementary_materials/notebooks/phi3_rag_on_client.ipynb b/supplementary_materials/notebooks/phi3_rag_on_client.ipynb index 9b21571b947..fbc1f8ae6d6 100644 --- a/supplementary_materials/notebooks/phi3_rag_on_client.ipynb +++ b/supplementary_materials/notebooks/phi3_rag_on_client.ipynb @@ -280,6 +280,9 @@ "from optimum.intel import OVModelForCausalLM, OVWeightQuantizationConfig\n", "from functools import wraps\n", "\n", + "import openvino.properties as props\n", + "import openvino.properties.hint as hints\n", + "\n", "\n", "model_name = \"microsoft/Phi-3-mini-4k-instruct\"\n", "save_name = model_name.split(\"/\")[-1] + \"_openvino\"\n", @@ -295,9 +298,9 @@ "load_kwargs = {\n", " \"device\": device,\n", " \"ov_config\": {\n", - " \"PERFORMANCE_HINT\": \"LATENCY\",\n", - " \"INFERENCE_PRECISION_HINT\": precision,\n", - " \"CACHE_DIR\": os.path.join(save_name, \"model_cache\"), # OpenVINO will use this directory as cache\n", + " hints.performance_mode: hints.PerformanceMode.LATENCY,\n", + " hints.inference_precision: precision,\n", + " props.cache_dir: os.path.join(save_name, \"model_cache\"), # OpenVINO will use this directory as cache\n", " },\n", " \"quantization_config\": quantization_config,\n", " \"trust_remote_code\": True,\n", diff --git a/supplementary_materials/qwen2/chat.py b/supplementary_materials/qwen2/chat.py index eb963524a29..d26b82472af 100644 --- a/supplementary_materials/qwen2/chat.py +++ b/supplementary_materials/qwen2/chat.py @@ -5,6 +5,10 @@ from optimum.intel.openvino import OVModelForCausalLM from transformers import AutoTokenizer, AutoConfig, TextIteratorStreamer, StoppingCriteriaList, StoppingCriteria +import openvino.properties as props +import openvino.properties.hint as hints +import openvino.properties.streams as streams + class StopOnTokens(StoppingCriteria): def __init__(self, token_ids): @@ -26,7 +30,7 @@ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwa args = parser.parse_args() model_dir = args.model_path - ov_config = {"PERFORMANCE_HINT": "LATENCY", "NUM_STREAMS": "1", "CACHE_DIR": ""} + ov_config = {hints.performance_mode: hints.PerformanceMode.LATENCY, streams.num: "1", props.cache_dir: ""} tokenizer = AutoTokenizer.from_pretrained(model_dir) print("====Compiling model====") From db01b2faea82d4cc45d510929380b2c2ea02cdfb Mon Sep 17 00:00:00 2001 From: Aleksandr Mokrov Date: Thu, 19 Sep 2024 14:07:36 +0200 Subject: [PATCH 03/10] Remove test file --- notebooks/stable-fast-3d/Untitled.ipynb | 150 ------------------------ 1 file changed, 150 deletions(-) delete mode 100644 notebooks/stable-fast-3d/Untitled.ipynb diff --git a/notebooks/stable-fast-3d/Untitled.ipynb b/notebooks/stable-fast-3d/Untitled.ipynb deleted file mode 100644 index 5cd333a486a..00000000000 --- a/notebooks/stable-fast-3d/Untitled.ipynb +++ /dev/null @@ -1,150 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "b302116b-8f62-402a-ae59-dc1a2ab79536", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Cloning into 'TripoSR'...\n" - ] - } - ], - "source": [ - "import os\n", - "import sys\n", - "from pathlib import Path\n", - "\n", - "if not Path(\"TripoSR\").exists():\n", - " exit_code = os.system(\"git clone https://huggingface.co/spaces/stabilityai/TripoSR\")\n", - "\n", - " if exit_code != 0:\n", - " raise Exception(\"Failed to clone repository!\")\n", - "\n", - "sys.path.append(\"TripoSR\")" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "904d6fff-6dcf-4028-8516-8e4fda3d7749", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Cloning into 'TripoSR'...\n", - "fatal: unable to access 'https://huggingface.co/spaces/stabilityai/TripoSR/': Could not resolve proxy: proxy-dmz.intel.com\n" - ] - }, - { - "ename": "Exception", - "evalue": "Failed to clone repository!", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mException\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[17], line 8\u001b[0m\n\u001b[1;32m 5\u001b[0m exit_code \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39msystem(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mgit clone https://huggingface.co/spaces/stabilityai/TripoSR\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m exit_code \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m----> 8\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mFailed to clone repository!\u001b[39m\u001b[38;5;124m'\u001b[39m)\n", - "\u001b[0;31mException\u001b[0m: Failed to clone repository!" - ] - } - ], - "source": [ - "import os\n", - "import sys\n", - "\n", - "\n", - "exit_code = os.system(\"git clone https://huggingface.co/spaces/stabilityai/TripoSR\")\n", - "\n", - "if exit_code != 0:\n", - " raise Exception(\"Failed to clone repository!\")" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "06a9ff20-9abc-44f2-9556-632eb4031ec7", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Cloning into 'TripoSR'...\n", - "fatal: unable to access 'https://huggingface.co/spaces/stabilityai/TripoSR/': Could not resolve proxy: proxy-dmz.intel.com\n" - ] - }, - { - "data": { - "text/plain": [ - "32768" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "os.system(\"git clone https://huggingface.co/spaces/stabilityai/TripoSR\")" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "fa1fc652-135c-40a9-855a-932966f1d1af", - "metadata": {}, - "outputs": [ - { - "ename": "ModuleNotFoundError", - "evalue": "No module named 'trimesh'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[2], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtsr\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msystem\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m TSR\n", - "File \u001b[0;32m/mnt/c/Users/amokrov/PycharmProjects/default/my_openvino_notebooks/notebooks/stable-fast-3d/TripoSR/tsr/system.py:10\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mnn\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfunctional\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mF\u001b[39;00m\n\u001b[0;32m---> 10\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtrimesh\u001b[39;00m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01meinops\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m rearrange\n\u001b[1;32m 12\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mhuggingface_hub\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m hf_hub_download\n", - "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'trimesh'" - ] - } - ], - "source": [ - "from tsr.system import TSR" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6555adaa-b4f5-4d16-8d9c-e853b5fbf414", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} From 3c342488f7a8df8420eca7d4b587f00b4a81874c Mon Sep 17 00:00:00 2001 From: Aleksandr Mokrov Date: Thu, 19 Sep 2024 14:13:39 +0200 Subject: [PATCH 04/10] Spellchecking --- .ci/spellcheck/.pyspelling.wordlist.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/.ci/spellcheck/.pyspelling.wordlist.txt b/.ci/spellcheck/.pyspelling.wordlist.txt index d4f3e90ed7a..123e9b2349e 100644 --- a/.ci/spellcheck/.pyspelling.wordlist.txt +++ b/.ci/spellcheck/.pyspelling.wordlist.txt @@ -617,6 +617,7 @@ perceptron Patil PEFT perceiver +PerformanceMode performant PersonaGPT PGI From 347643adfc5c1c15cfef5f4aaaa6b7b66cbcee34 Mon Sep 17 00:00:00 2001 From: Aleksandr Mokrov Date: Thu, 19 Sep 2024 15:49:52 +0200 Subject: [PATCH 05/10] Code check --- notebooks/llm-chatbot/gradio_helper_genai.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/notebooks/llm-chatbot/gradio_helper_genai.py b/notebooks/llm-chatbot/gradio_helper_genai.py index 011e15fcc60..07cdaad3e69 100644 --- a/notebooks/llm-chatbot/gradio_helper_genai.py +++ b/notebooks/llm-chatbot/gradio_helper_genai.py @@ -58,9 +58,7 @@ def get_system_prompt(model_language): return ( DEFAULT_SYSTEM_PROMPT_CHINESE if (model_language == "Chinese") - else DEFAULT_SYSTEM_PROMPT_JAPANESE - if (model_language == "Japanese") - else DEFAULT_SYSTEM_PROMPT + else DEFAULT_SYSTEM_PROMPT_JAPANESE if (model_language == "Japanese") else DEFAULT_SYSTEM_PROMPT ) From f835eb576dd2c1293fd613d8101a75d09bea40c9 Mon Sep 17 00:00:00 2001 From: Aleksandr Mokrov Date: Thu, 19 Sep 2024 16:56:34 +0200 Subject: [PATCH 06/10] Fix num streams --- .../dolly-2-instruction-following.ipynb | 2 +- .../llm-agent-functioncall/llm-agent-functioncall-qwen.ipynb | 4 ++-- notebooks/llm-agent-react/llm-agent-rag-llamaindex.ipynb | 2 +- notebooks/llm-agent-react/llm-agent-react-langchain.ipynb | 4 ++-- notebooks/llm-chatbot/llm-chatbot.ipynb | 2 +- notebooks/llm-rag-langchain/llm-rag-langchain.ipynb | 2 +- notebooks/llm-rag-llamaindex/llm-rag-llamaindex.ipynb | 2 +- supplementary_materials/qwen2/chat.py | 2 +- 8 files changed, 10 insertions(+), 10 deletions(-) diff --git a/notebooks/dolly-2-instruction-following/dolly-2-instruction-following.ipynb b/notebooks/dolly-2-instruction-following/dolly-2-instruction-following.ipynb index 55c69ef0842..8f297d93be5 100644 --- a/notebooks/dolly-2-instruction-following/dolly-2-instruction-following.ipynb +++ b/notebooks/dolly-2-instruction-following/dolly-2-instruction-following.ipynb @@ -585,7 +585,7 @@ "\n", "current_device = device.value\n", "\n", - "ov_config = {hints.performance_mode: hints.PerformanceMode.LATENCY, streams.num: \"1\", props.cache_dir: \"\"}\n", + "ov_config = {hints.performance_mode: hints.PerformanceMode.LATENCY, streams.num: streams.Num(1), props.cache_dir: \"\"}\n", "\n", "ov_model = OVModelForCausalLM.from_pretrained(model_dir, device=current_device, ov_config=ov_config)" ] diff --git a/notebooks/llm-agent-functioncall/llm-agent-functioncall-qwen.ipynb b/notebooks/llm-agent-functioncall/llm-agent-functioncall-qwen.ipynb index 82a3b3156c6..5783c34cd88 100644 --- a/notebooks/llm-agent-functioncall/llm-agent-functioncall-qwen.ipynb +++ b/notebooks/llm-agent-functioncall/llm-agent-functioncall-qwen.ipynb @@ -287,7 +287,7 @@ "import openvino.properties.streams as streams\n", "\n", "\n", - "ov_config = {hints.performance_mode: hints.PerformanceMode.LATENCY, streams.num: \"1\", props.cache_dir: \"\"}\n", + "ov_config = {hints.performance_mode: hints.PerformanceMode.LATENCY, streams.num: streams.Num(1), props.cache_dir: \"\"}\n", "llm_cfg = {\n", " \"ov_model_dir\": model_path,\n", " \"model_type\": \"openvino\",\n", @@ -319,7 +319,7 @@ " \"KV_CACHE_PRECISION\": \"u8\",\n", " \"DYNAMIC_QUANTIZATION_GROUP_SIZE\": \"32\",\n", " hints.performance_mode: hints.PerformanceMode.LATENCY,\n", - " streams.num: \"1\",\n", + " streams.num: streams.Num(1),\n", " props.cache_dir: \"\",\n", "}" ] diff --git a/notebooks/llm-agent-react/llm-agent-rag-llamaindex.ipynb b/notebooks/llm-agent-react/llm-agent-rag-llamaindex.ipynb index 93da34400d7..92dbb39c864 100644 --- a/notebooks/llm-agent-react/llm-agent-rag-llamaindex.ipynb +++ b/notebooks/llm-agent-react/llm-agent-rag-llamaindex.ipynb @@ -306,7 +306,7 @@ "import openvino.properties.streams as streams\n", "\n", "\n", - "ov_config = {hints.performance_mode: hints.PerformanceMode.LATENCY, streams.num: \"1\", props.cache_dir: \"\"}\n", + "ov_config = {hints.performance_mode: hints.PerformanceMode.LATENCY, streams.num: streams.Num(1), props.cache_dir: \"\"}\n", "\n", "\n", "def phi_completion_to_prompt(completion):\n", diff --git a/notebooks/llm-agent-react/llm-agent-react-langchain.ipynb b/notebooks/llm-agent-react/llm-agent-react-langchain.ipynb index a4505688a1f..f40db4af033 100644 --- a/notebooks/llm-agent-react/llm-agent-react-langchain.ipynb +++ b/notebooks/llm-agent-react/llm-agent-react-langchain.ipynb @@ -407,7 +407,7 @@ " return any(decoded_output.endswith(stop_sequence) for stop_sequence in self.stop_sequences)\n", "\n", "\n", - "ov_config = {hints.performance_mode: hints.PerformanceMode.LATENCY, streams.num: \"1\", props.cache_dir: \"\"}\n", + "ov_config = {hints.performance_mode: hints.PerformanceMode.LATENCY, streams.num: streams.Num(1), props.cache_dir: \"\"}\n", "stop_tokens = [\"Observation:\"]\n", "\n", "ov_llm = HuggingFacePipeline.from_model_id(\n", @@ -447,7 +447,7 @@ " \"KV_CACHE_PRECISION\": \"u8\",\n", " \"DYNAMIC_QUANTIZATION_GROUP_SIZE\": \"32\",\n", " hints.performance_mode: hints.PerformanceMode.LATENCY,\n", - " streams.num: \"1\",\n", + " streams.num: streams.Num(1),\n", " props.cache_dir: \"\",\n", "}" ] diff --git a/notebooks/llm-chatbot/llm-chatbot.ipynb b/notebooks/llm-chatbot/llm-chatbot.ipynb index 4b41df4c0bb..d7962690de3 100644 --- a/notebooks/llm-chatbot/llm-chatbot.ipynb +++ b/notebooks/llm-chatbot/llm-chatbot.ipynb @@ -941,7 +941,7 @@ " model_dir = fp16_model_dir\n", "print(f\"Loading model from {model_dir}\")\n", "\n", - "ov_config = {hints.performance_mode: hints.PerformanceMode.LATENCY, streams.num: \"1\", props.cache_dir: \"\"}\n", + "ov_config = {hints.performance_mode: hints.PerformanceMode.LATENCY, streams.num: streams.Num(1), props.cache_dir: \"\"}\n", "\n", "if \"GPU\" in device.value and \"qwen2-7b-instruct\" in model_id.value:\n", " ov_config[\"GPU_ENABLE_SDPA_OPTIMIZATION\"] = \"NO\"\n", diff --git a/notebooks/llm-rag-langchain/llm-rag-langchain.ipynb b/notebooks/llm-rag-langchain/llm-rag-langchain.ipynb index a3c7059801d..ab638d4be66 100644 --- a/notebooks/llm-rag-langchain/llm-rag-langchain.ipynb +++ b/notebooks/llm-rag-langchain/llm-rag-langchain.ipynb @@ -1306,7 +1306,7 @@ " model_dir = fp16_model_dir\n", "print(f\"Loading model from {model_dir}\")\n", "\n", - "ov_config = {hints.performance_mode: hints.PerformanceMode.LATENCY, streams.num: \"1\", props.cache_dir: \"\"}\n", + "ov_config = {hints.performance_mode: hints.PerformanceMode.LATENCY, streams.num: streams.Num(1), props.cache_dir: \"\"}\n", "\n", "if \"GPU\" in llm_device.value and \"qwen2-7b-instruct\" in llm_model_id.value:\n", " ov_config[\"GPU_ENABLE_SDPA_OPTIMIZATION\"] = \"NO\"\n", diff --git a/notebooks/llm-rag-llamaindex/llm-rag-llamaindex.ipynb b/notebooks/llm-rag-llamaindex/llm-rag-llamaindex.ipynb index 18ddab2c2cc..0bcee402b08 100644 --- a/notebooks/llm-rag-llamaindex/llm-rag-llamaindex.ipynb +++ b/notebooks/llm-rag-llamaindex/llm-rag-llamaindex.ipynb @@ -1284,7 +1284,7 @@ " model_dir = fp16_model_dir\n", "print(f\"Loading model from {model_dir}\")\n", "\n", - "ov_config = {hints.performance_mode: hints.PerformanceMode.LATENCY, streams.num: \"1\", props.cache_dir: \"\"}\n", + "ov_config = {hints.performance_mode: hints.PerformanceMode.LATENCY, streams.num: streams.Num(1), props.cache_dir: \"\"}\n", "\n", "stop_tokens = llm_model_configuration.get(\"stop_tokens\")\n", "completion_to_prompt = llm_model_configuration.get(\"completion_to_prompt\")\n", diff --git a/supplementary_materials/qwen2/chat.py b/supplementary_materials/qwen2/chat.py index d26b82472af..901bdab1d1f 100644 --- a/supplementary_materials/qwen2/chat.py +++ b/supplementary_materials/qwen2/chat.py @@ -30,7 +30,7 @@ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwa args = parser.parse_args() model_dir = args.model_path - ov_config = {hints.performance_mode: hints.PerformanceMode.LATENCY, streams.num: "1", props.cache_dir: ""} + ov_config = {hints.performance_mode: hints.PerformanceMode.LATENCY, streams.num: streams.Num(1), props.cache_dir: ""} tokenizer = AutoTokenizer.from_pretrained(model_dir) print("====Compiling model====") From c3d1f8d0c59a0ae1d050c15f893cd3f48ab65a89 Mon Sep 17 00:00:00 2001 From: Aleksandr Mokrov Date: Thu, 19 Sep 2024 18:12:11 +0200 Subject: [PATCH 07/10] Fix errors --- notebooks/auto-device/auto-device.ipynb | 8 ++++---- .../clip-language-saliency-map.ipynb | 2 +- .../cross-lingual-books-alignment.ipynb | 2 +- .../ct-segmentation-quantize/async_pipeline.py | 2 +- .../dolly-2-instruction-following.ipynb | 2 +- notebooks/gpu-device/gpu-device.ipynb | 16 ++++++++-------- notebooks/hello-npu/hello-npu.ipynb | 12 ++++++------ .../llm-agent-functioncall-qwen.ipynb | 8 ++++---- .../llm-agent-rag-llamaindex.ipynb | 2 +- .../llm-agent-react-langchain.ipynb | 8 ++++---- notebooks/llm-chatbot/llm-chatbot.ipynb | 2 +- .../llm-rag-langchain/llm-rag-langchain.ipynb | 2 +- .../llm-rag-llamaindex/llm-rag-llamaindex.ipynb | 2 +- .../pose-estimation-webcam/pose-estimation.ipynb | 2 +- .../vision-monodepth/vision-monodepth.ipynb | 2 +- .../notebooks/phi3_chatbot_demo.ipynb | 4 ++-- .../notebooks/phi3_rag_on_client.ipynb | 4 ++-- supplementary_materials/qwen2/chat.py | 2 +- 18 files changed, 41 insertions(+), 41 deletions(-) diff --git a/notebooks/auto-device/auto-device.ipynb b/notebooks/auto-device/auto-device.ipynb index a5d17ab0896..7482c9f2fb8 100644 --- a/notebooks/auto-device/auto-device.ipynb +++ b/notebooks/auto-device/auto-device.ipynb @@ -191,7 +191,7 @@ "\n", "\n", "# Set LOG_LEVEL to LOG_INFO.\n", - "core.set_property(\"AUTO\", {log.level: log.Level.INFO})\n", + "core.set_property(\"AUTO\", {log.level(): log.Level.INFO})\n", "\n", "# Load the model onto the target device.\n", "compiled_model = core.compile_model(ov_model)\n", @@ -252,7 +252,7 @@ ], "source": [ "# Set LOG_LEVEL to LOG_NONE.\n", - "core.set_property(\"AUTO\", {log.level: log.Level.NO})\n", + "core.set_property(\"AUTO\", {log.level(): log.Level.NO})\n", "\n", "compiled_model = core.compile_model(model=ov_model, device_name=\"AUTO\")\n", "\n", @@ -622,7 +622,7 @@ "print(\"Compiling Model for AUTO device with THROUGHPUT hint\")\n", "sys.stdout.flush()\n", "\n", - "compiled_model = core.compile_model(model=ov_model, config={hints.performance_mode: hints.PerformanceMode.THROUGHPUT})\n", + "compiled_model = core.compile_model(model=ov_model, config={hints.performance_mode(): hints.PerformanceMode.THROUGHPUT})\n", "\n", "infer_queue = ov.AsyncInferQueue(compiled_model, 0) # Setting to 0 will query optimal number by default.\n", "infer_queue.set_callback(completion_callback)\n", @@ -686,7 +686,7 @@ "print(\"Compiling Model for AUTO Device with LATENCY hint\")\n", "sys.stdout.flush()\n", "\n", - "compiled_model = core.compile_model(model=ov_model, config={hints.performance_mode: hints.PerformanceMode.LATENCY})\n", + "compiled_model = core.compile_model(model=ov_model, config={hints.performance_mode(): hints.PerformanceMode.LATENCY})\n", "\n", "# Setting to 0 will query optimal number by default.\n", "infer_queue = ov.AsyncInferQueue(compiled_model, 0)\n", diff --git a/notebooks/clip-language-saliency-map/clip-language-saliency-map.ipynb b/notebooks/clip-language-saliency-map/clip-language-saliency-map.ipynb index 088b5ec4e1e..2b8c9138b5a 100644 --- a/notebooks/clip-language-saliency-map/clip-language-saliency-map.ipynb +++ b/notebooks/clip-language-saliency-map/clip-language-saliency-map.ipynb @@ -767,7 +767,7 @@ "image_model = core.compile_model(\n", " model=image_model,\n", " device_name=device.value,\n", - " config={hints.performance_mode: hints.PerformanceMode.THROUGHPUT},\n", + " config={hints.performance_mode(): hints.PerformanceMode.THROUGHPUT},\n", ")" ] }, diff --git a/notebooks/cross-lingual-books-alignment/cross-lingual-books-alignment.ipynb b/notebooks/cross-lingual-books-alignment/cross-lingual-books-alignment.ipynb index 622d75cdaa3..8f8408cda5f 100644 --- a/notebooks/cross-lingual-books-alignment/cross-lingual-books-alignment.ipynb +++ b/notebooks/cross-lingual-books-alignment/cross-lingual-books-alignment.ipynb @@ -1111,7 +1111,7 @@ "compiled_throughput_hint = core.compile_model(\n", " ov_model,\n", " device_name=device.value,\n", - " config={hints.performance_mode: hints.PerformanceMode.THROUGHPUT},\n", + " config={hints.performance_mode(): hints.PerformanceMode.THROUGHPUT},\n", ")" ] }, diff --git a/notebooks/ct-segmentation-quantize/async_pipeline.py b/notebooks/ct-segmentation-quantize/async_pipeline.py index 89b23eccdad..ee7f31f1d1d 100644 --- a/notebooks/ct-segmentation-quantize/async_pipeline.py +++ b/notebooks/ct-segmentation-quantize/async_pipeline.py @@ -170,7 +170,7 @@ def __init__(self, ie, model, plugin_config, device="CPU", max_num_requests=0): cache_path.mkdir(exist_ok=True) # Enable model caching for GPU devices if "GPU" in device and "GPU" in ie.available_devices: - ie.set_property(device_name="GPU", properties={props.cache_dir: str(cache_path)}) + ie.set_property(device_name="GPU", properties={props.cache_dir(): str(cache_path)}) self.model = model self.logger = logging.getLogger() diff --git a/notebooks/dolly-2-instruction-following/dolly-2-instruction-following.ipynb b/notebooks/dolly-2-instruction-following/dolly-2-instruction-following.ipynb index 8f297d93be5..3e2efebb991 100644 --- a/notebooks/dolly-2-instruction-following/dolly-2-instruction-following.ipynb +++ b/notebooks/dolly-2-instruction-following/dolly-2-instruction-following.ipynb @@ -585,7 +585,7 @@ "\n", "current_device = device.value\n", "\n", - "ov_config = {hints.performance_mode: hints.PerformanceMode.LATENCY, streams.num: streams.Num(1), props.cache_dir: \"\"}\n", + "ov_config = {hints.performance_mode(): hints.PerformanceMode.LATENCY, streams.num(): \"1\", props.cache_dir(): \"\"}\n", "\n", "ov_model = OVModelForCausalLM.from_pretrained(model_dir, device=current_device, ov_config=ov_config)" ] diff --git a/notebooks/gpu-device/gpu-device.ipynb b/notebooks/gpu-device/gpu-device.ipynb index 756f9d04144..5a27a471a38 100644 --- a/notebooks/gpu-device/gpu-device.ipynb +++ b/notebooks/gpu-device/gpu-device.ipynb @@ -680,7 +680,7 @@ "core = ov.Core()\n", "\n", "# Set cache folder\n", - "core.set_property({props.cache_dir: cache_folder})\n", + "core.set_property({props.cache_dir(): cache_folder})\n", "\n", "# Compile the model as before\n", "model = core.read_model(model=model_path)\n", @@ -720,7 +720,7 @@ "source": [ "start = time.time()\n", "core = ov.Core()\n", - "core.set_property({props.cache_dir: \"cache\"})\n", + "core.set_property({props.cache_dir(): \"cache\"})\n", "model = core.read_model(model=model_path)\n", "compiled_model = core.compile_model(model, device)\n", "print(f\"Cache enabled - compile time: {time.time() - start}s\")\n", @@ -768,7 +768,7 @@ "id": "7077b662-22f3-4c52-9c80-e5ac1309c482", "metadata": {}, "source": [ - "To use the \"LATENCY\" performance hint, add `{hints.performance_mode: hints.PerformanceMode.LATENCY}` when compiling the model as shown below. For GPUs, this automatically minimizes the batch size and number of parallel streams such that all of the compute resources can focus on completing a single inference as fast as possible." + "To use the \"LATENCY\" performance hint, add `{hints.performance_mode(): hints.PerformanceMode.LATENCY}` when compiling the model as shown below. For GPUs, this automatically minimizes the batch size and number of parallel streams such that all of the compute resources can focus on completing a single inference as fast as possible." ] }, { @@ -786,7 +786,7 @@ "import openvino.properties.hint as hints\n", "\n", "\n", - "compiled_model = core.compile_model(model, device, {hints.performance_mode: hints.PerformanceMode.LATENCY})" + "compiled_model = core.compile_model(model, device, {hints.performance_mode(): hints.PerformanceMode.LATENCY})" ] }, { @@ -795,7 +795,7 @@ "id": "06589f38-ce35-457f-8395-a4a3f6327ea0", "metadata": {}, "source": [ - "To use the \"THROUGHPUT\" performance hint, add `{hints.performance_mode: hints.PerformanceMode.THROUGHPUT}` when compiling the model. For GPUs, this creates multiple processing streams to efficiently utilize all the execution cores and optimizes the batch size to fill the available memory." + "To use the \"THROUGHPUT\" performance hint, add `{hints.performance_mode(): hints.PerformanceMode.THROUGHPUT}` when compiling the model. For GPUs, this creates multiple processing streams to efficiently utilize all the execution cores and optimizes the batch size to fill the available memory." ] }, { @@ -810,7 +810,7 @@ }, "outputs": [], "source": [ - "compiled_model = core.compile_model(model, device, {hints.performance_mode: hints.PerformanceMode.THROUGHPUT})" + "compiled_model = core.compile_model(model, device, {hints.performance_mode(): hints.PerformanceMode.THROUGHPUT})" ] }, { @@ -844,7 +844,7 @@ "`\n", "\n", "\n", - "compiled_model = core.compile_model(model=model, device_name=\"AUTO\", config={hints.performance_mode: hints.PerformanceMode.CUMULATIVE_THROUGHPUT})\n", + "compiled_model = core.compile_model(model=model, device_name=\"AUTO\", config={hints.performance_mode(): hints.PerformanceMode.CUMULATIVE_THROUGHPUT})\n", "`\n", "\n", "> **Important**: **The “THROUGHPUT”, “MULTI”, and “CUMULATIVE_THROUGHPUT” modes are only applicable to asynchronous inferencing pipelines. The example at the end of this article shows how to set up an asynchronous pipeline that takes advantage of parallelism to increase throughput.** To learn more, see [Asynchronous Inferencing](https://docs.openvino.ai/2024/documentation/openvino-extensibility/openvino-plugin-library/asynch-inference-request.html) in OpenVINO as well as the [Asynchronous Inference notebook](../async-api/async-api.ipynb)." @@ -1592,7 +1592,7 @@ "# Read model and compile it on GPU in THROUGHPUT mode\n", "model = core.read_model(model=model_path)\n", "device_name = \"GPU\"\n", - "compiled_model = core.compile_model(model=model, device_name=device_name, config={hints.performance_mode: hints.PerformanceMode.THROUGHPUT})\n", + "compiled_model = core.compile_model(model=model, device_name=device_name, config={hints.performance_mode(): hints.PerformanceMode.THROUGHPUT})\n", "\n", "# Get the input and output nodes\n", "input_layer = compiled_model.input(0)\n", diff --git a/notebooks/hello-npu/hello-npu.ipynb b/notebooks/hello-npu/hello-npu.ipynb index 3bea3d27c51..cc2062e77b4 100644 --- a/notebooks/hello-npu/hello-npu.ipynb +++ b/notebooks/hello-npu/hello-npu.ipynb @@ -530,7 +530,7 @@ "core = ov.Core()\n", "\n", "# Set cache folder\n", - "core.set_property({props.cache_dir: cache_folder})\n", + "core.set_property({props.cache_dir(): cache_folder})\n", "\n", "# Compile the model\n", "model = core.read_model(model=model_path)\n", @@ -541,7 +541,7 @@ "core = ov.Core()\n", "\n", "# Set cache folder\n", - "core.set_property({props.cache_dir: cache_folder})\n", + "core.set_property({props.cache_dir(): cache_folder})\n", "\n", "# Compile the model as before\n", "model = core.read_model(model=model_path)\n", @@ -609,7 +609,7 @@ "id": "1cccd1b5-4d5a-41f3-8d8a-4ee0bc235a9e", "metadata": {}, "source": [ - "To use the \"LATENCY\" performance hint, add `{hints.performance_mode: hints.PerformanceMode.LATENCY}` when compiling the model as shown below. For NPU, this automatically minimizes the batch size and number of parallel streams such that all of the compute resources can focus on completing a single inference as fast as possible." + "To use the \"LATENCY\" performance hint, add `{hints.performance_mode(): hints.PerformanceMode.LATENCY}` when compiling the model as shown below. For NPU, this automatically minimizes the batch size and number of parallel streams such that all of the compute resources can focus on completing a single inference as fast as possible." ] }, { @@ -622,7 +622,7 @@ "import openvino.properties.hint as hints\n", "\n", "\n", - "compiled_model = core.compile_model(model, device, {hints.performance_mode: hints.PerformanceMode.LATENCY})" + "compiled_model = core.compile_model(model, device, {hints.performance_mode(): hints.PerformanceMode.LATENCY})" ] }, { @@ -630,7 +630,7 @@ "id": "7ca1f3d8-202c-4a98-85bc-b66110120dfb", "metadata": {}, "source": [ - "To use the \"THROUGHPUT\" performance hint, add `{hints.performance_mode: hints.PerformanceMode.THROUGHPUT}` when compiling the model. For NPUs, this creates multiple processing streams to efficiently utilize all the execution cores and optimizes the batch size to fill the available memory." + "To use the \"THROUGHPUT\" performance hint, add `{hints.performance_mode(): hints.PerformanceMode.THROUGHPUT}` when compiling the model. For NPUs, this creates multiple processing streams to efficiently utilize all the execution cores and optimizes the batch size to fill the available memory." ] }, { @@ -640,7 +640,7 @@ "metadata": {}, "outputs": [], "source": [ - "compiled_model = core.compile_model(model, device, {hints.performance_mode: hints.PerformanceMode.THROUGHPUT})" + "compiled_model = core.compile_model(model, device, {hints.performance_mode(): hints.PerformanceMode.THROUGHPUT})" ] }, { diff --git a/notebooks/llm-agent-functioncall/llm-agent-functioncall-qwen.ipynb b/notebooks/llm-agent-functioncall/llm-agent-functioncall-qwen.ipynb index 5783c34cd88..7c7e4e9fdd6 100644 --- a/notebooks/llm-agent-functioncall/llm-agent-functioncall-qwen.ipynb +++ b/notebooks/llm-agent-functioncall/llm-agent-functioncall-qwen.ipynb @@ -287,7 +287,7 @@ "import openvino.properties.streams as streams\n", "\n", "\n", - "ov_config = {hints.performance_mode: hints.PerformanceMode.LATENCY, streams.num: streams.Num(1), props.cache_dir: \"\"}\n", + "ov_config = {hints.performance_mode(): hints.PerformanceMode.LATENCY, streams.num(): \"1\", props.cache_dir(): \"\"}\n", "llm_cfg = {\n", " \"ov_model_dir\": model_path,\n", " \"model_type\": \"openvino\",\n", @@ -318,9 +318,9 @@ "ov_config = {\n", " \"KV_CACHE_PRECISION\": \"u8\",\n", " \"DYNAMIC_QUANTIZATION_GROUP_SIZE\": \"32\",\n", - " hints.performance_mode: hints.PerformanceMode.LATENCY,\n", - " streams.num: streams.Num(1),\n", - " props.cache_dir: \"\",\n", + " hints.performance_mode(): hints.PerformanceMode.LATENCY,\n", + " streams.num(): \"\",\n", + " props.cache_dir(): \"\",\n", "}" ] }, diff --git a/notebooks/llm-agent-react/llm-agent-rag-llamaindex.ipynb b/notebooks/llm-agent-react/llm-agent-rag-llamaindex.ipynb index 92dbb39c864..89d930bbd03 100644 --- a/notebooks/llm-agent-react/llm-agent-rag-llamaindex.ipynb +++ b/notebooks/llm-agent-react/llm-agent-rag-llamaindex.ipynb @@ -306,7 +306,7 @@ "import openvino.properties.streams as streams\n", "\n", "\n", - "ov_config = {hints.performance_mode: hints.PerformanceMode.LATENCY, streams.num: streams.Num(1), props.cache_dir: \"\"}\n", + "ov_config = {hints.performance_mode(): hints.PerformanceMode.LATENCY, streams.num(): \"1\", props.cache_dir(): \"\"}\n", "\n", "\n", "def phi_completion_to_prompt(completion):\n", diff --git a/notebooks/llm-agent-react/llm-agent-react-langchain.ipynb b/notebooks/llm-agent-react/llm-agent-react-langchain.ipynb index f40db4af033..e60dc38f816 100644 --- a/notebooks/llm-agent-react/llm-agent-react-langchain.ipynb +++ b/notebooks/llm-agent-react/llm-agent-react-langchain.ipynb @@ -407,7 +407,7 @@ " return any(decoded_output.endswith(stop_sequence) for stop_sequence in self.stop_sequences)\n", "\n", "\n", - "ov_config = {hints.performance_mode: hints.PerformanceMode.LATENCY, streams.num: streams.Num(1), props.cache_dir: \"\"}\n", + "ov_config = {hints.performance_mode(): hints.PerformanceMode.LATENCY, streams.num(): \"1\", props.cache_dir(): \"\"}\n", "stop_tokens = [\"Observation:\"]\n", "\n", "ov_llm = HuggingFacePipeline.from_model_id(\n", @@ -446,9 +446,9 @@ "ov_config = {\n", " \"KV_CACHE_PRECISION\": \"u8\",\n", " \"DYNAMIC_QUANTIZATION_GROUP_SIZE\": \"32\",\n", - " hints.performance_mode: hints.PerformanceMode.LATENCY,\n", - " streams.num: streams.Num(1),\n", - " props.cache_dir: \"\",\n", + " hints.performance_mode(): hints.PerformanceMode.LATENCY,\n", + " streams.num(): \"1\",\n", + " props.cache_dir(): \"\",\n", "}" ] }, diff --git a/notebooks/llm-chatbot/llm-chatbot.ipynb b/notebooks/llm-chatbot/llm-chatbot.ipynb index d7962690de3..0acfaf39044 100644 --- a/notebooks/llm-chatbot/llm-chatbot.ipynb +++ b/notebooks/llm-chatbot/llm-chatbot.ipynb @@ -941,7 +941,7 @@ " model_dir = fp16_model_dir\n", "print(f\"Loading model from {model_dir}\")\n", "\n", - "ov_config = {hints.performance_mode: hints.PerformanceMode.LATENCY, streams.num: streams.Num(1), props.cache_dir: \"\"}\n", + "ov_config = {hints.performance_mode(): hints.PerformanceMode.LATENCY, streams.num(): \"1\", props.cache_dir(): \"\"}\n", "\n", "if \"GPU\" in device.value and \"qwen2-7b-instruct\" in model_id.value:\n", " ov_config[\"GPU_ENABLE_SDPA_OPTIMIZATION\"] = \"NO\"\n", diff --git a/notebooks/llm-rag-langchain/llm-rag-langchain.ipynb b/notebooks/llm-rag-langchain/llm-rag-langchain.ipynb index ab638d4be66..527834c7b7d 100644 --- a/notebooks/llm-rag-langchain/llm-rag-langchain.ipynb +++ b/notebooks/llm-rag-langchain/llm-rag-langchain.ipynb @@ -1306,7 +1306,7 @@ " model_dir = fp16_model_dir\n", "print(f\"Loading model from {model_dir}\")\n", "\n", - "ov_config = {hints.performance_mode: hints.PerformanceMode.LATENCY, streams.num: streams.Num(1), props.cache_dir: \"\"}\n", + "ov_config = {hints.performance_mode(): hints.PerformanceMode.LATENCY, streams.num(): \"1\", props.cache_dir(): \"\"}\n", "\n", "if \"GPU\" in llm_device.value and \"qwen2-7b-instruct\" in llm_model_id.value:\n", " ov_config[\"GPU_ENABLE_SDPA_OPTIMIZATION\"] = \"NO\"\n", diff --git a/notebooks/llm-rag-llamaindex/llm-rag-llamaindex.ipynb b/notebooks/llm-rag-llamaindex/llm-rag-llamaindex.ipynb index 0bcee402b08..4e46df09a1f 100644 --- a/notebooks/llm-rag-llamaindex/llm-rag-llamaindex.ipynb +++ b/notebooks/llm-rag-llamaindex/llm-rag-llamaindex.ipynb @@ -1284,7 +1284,7 @@ " model_dir = fp16_model_dir\n", "print(f\"Loading model from {model_dir}\")\n", "\n", - "ov_config = {hints.performance_mode: hints.PerformanceMode.LATENCY, streams.num: streams.Num(1), props.cache_dir: \"\"}\n", + "ov_config = {hints.performance_mode(): hints.PerformanceMode.LATENCY, streams.num(): \"1\", props.cache_dir(): \"\"}\n", "\n", "stop_tokens = llm_model_configuration.get(\"stop_tokens\")\n", "completion_to_prompt = llm_model_configuration.get(\"completion_to_prompt\")\n", diff --git a/notebooks/pose-estimation-webcam/pose-estimation.ipynb b/notebooks/pose-estimation-webcam/pose-estimation.ipynb index 7deee9fb68f..66213ac6118 100644 --- a/notebooks/pose-estimation-webcam/pose-estimation.ipynb +++ b/notebooks/pose-estimation-webcam/pose-estimation.ipynb @@ -163,7 +163,7 @@ "# Read the network from a file.\n", "model = core.read_model(model_path)\n", "# Let the AUTO device decide where to load the model (you can use CPU, GPU as well).\n", - "compiled_model = core.compile_model(model=model, device_name=device.value, config={hints.performance_mode: hints.PerformanceMode.LATENCY})\n", + "compiled_model = core.compile_model(model=model, device_name=device.value, config={hints.performance_mode(): hints.PerformanceMode.LATENCY})\n", "\n", "# Get the input and output names of nodes.\n", "input_layer = compiled_model.input(0)\n", diff --git a/notebooks/vision-monodepth/vision-monodepth.ipynb b/notebooks/vision-monodepth/vision-monodepth.ipynb index b8b26f56bba..280e81ef03f 100644 --- a/notebooks/vision-monodepth/vision-monodepth.ipynb +++ b/notebooks/vision-monodepth/vision-monodepth.ipynb @@ -305,7 +305,7 @@ "cache_folder.mkdir(exist_ok=True)\n", "\n", "core = ov.Core()\n", - "core.set_property({props.cache_dir: cache_folder})\n", + "core.set_property({props.cache_dir(): cache_folder})\n", "model = core.read_model(model_xml_path)\n", "compiled_model = core.compile_model(model=model, device_name=device.value)\n", "\n", diff --git a/supplementary_materials/notebooks/phi3_chatbot_demo.ipynb b/supplementary_materials/notebooks/phi3_chatbot_demo.ipynb index d2cd1f4b830..9fc218a7d91 100644 --- a/supplementary_materials/notebooks/phi3_chatbot_demo.ipynb +++ b/supplementary_materials/notebooks/phi3_chatbot_demo.ipynb @@ -138,9 +138,9 @@ "load_kwargs = {\n", " \"device\": device,\n", " \"ov_config\": {\n", - " hints.performance_mode: hints.PerformanceMode.LATENCY,\n", + " hints.performance_mode(): hints.PerformanceMode.LATENCY,\n", " hints.inference_precision: precision,\n", - " props.cache_dir: os.path.join(save_name, \"model_cache\"), # OpenVINO will use this directory as cache\n", + " props.cache_dir(): os.path.join(save_name, \"model_cache\"), # OpenVINO will use this directory as cache\n", " },\n", " \"compile\": False,\n", " \"quantization_config\": quantization_config,\n", diff --git a/supplementary_materials/notebooks/phi3_rag_on_client.ipynb b/supplementary_materials/notebooks/phi3_rag_on_client.ipynb index fbc1f8ae6d6..7a5682ac98a 100644 --- a/supplementary_materials/notebooks/phi3_rag_on_client.ipynb +++ b/supplementary_materials/notebooks/phi3_rag_on_client.ipynb @@ -298,9 +298,9 @@ "load_kwargs = {\n", " \"device\": device,\n", " \"ov_config\": {\n", - " hints.performance_mode: hints.PerformanceMode.LATENCY,\n", + " hints.performance_mode(): hints.PerformanceMode.LATENCY,\n", " hints.inference_precision: precision,\n", - " props.cache_dir: os.path.join(save_name, \"model_cache\"), # OpenVINO will use this directory as cache\n", + " props.cache_dir(): os.path.join(save_name, \"model_cache\"), # OpenVINO will use this directory as cache\n", " },\n", " \"quantization_config\": quantization_config,\n", " \"trust_remote_code\": True,\n", diff --git a/supplementary_materials/qwen2/chat.py b/supplementary_materials/qwen2/chat.py index 901bdab1d1f..b426a901cd2 100644 --- a/supplementary_materials/qwen2/chat.py +++ b/supplementary_materials/qwen2/chat.py @@ -30,7 +30,7 @@ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwa args = parser.parse_args() model_dir = args.model_path - ov_config = {hints.performance_mode: hints.PerformanceMode.LATENCY, streams.num: streams.Num(1), props.cache_dir: ""} + ov_config = {hints.performance_mode(): hints.PerformanceMode.LATENCY, streams.num(): "1", props.cache_dir(): ""} tokenizer = AutoTokenizer.from_pretrained(model_dir) print("====Compiling model====") From 4e9dcb72af0aba5791fad87a9384683d5752102c Mon Sep 17 00:00:00 2001 From: Aleksandr Mokrov Date: Thu, 19 Sep 2024 19:58:13 +0200 Subject: [PATCH 08/10] Fix llm-agent notebooks --- notebooks/llm-agent-react/llm-agent-rag-llamaindex.ipynb | 1 + notebooks/llm-agent-react/llm-agent-react-langchain.ipynb | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/notebooks/llm-agent-react/llm-agent-rag-llamaindex.ipynb b/notebooks/llm-agent-react/llm-agent-rag-llamaindex.ipynb index 89d930bbd03..ce8d5893b7c 100644 --- a/notebooks/llm-agent-react/llm-agent-rag-llamaindex.ipynb +++ b/notebooks/llm-agent-react/llm-agent-rag-llamaindex.ipynb @@ -90,6 +90,7 @@ " \"--extra-index-url\",\n", " \"https://download.pytorch.org/whl/cpu\",\n", " \"llama-index\",\n", + " \"llama-index-llms-huggingface==0.3.3\", # pin to keep compatibility due to https://github.com/run-llama/llama_index/commit/f037de8d0471b37f9c4069ebef5dfb329633d2c6\n", " \"llama-index-readers-file\",\n", " \"llama-index-llms-openvino>=0.2.2\",\n", " \"llama-index-embeddings-openvino>=0.2.0\",\n", diff --git a/notebooks/llm-agent-react/llm-agent-react-langchain.ipynb b/notebooks/llm-agent-react/llm-agent-react-langchain.ipynb index e60dc38f816..d92e39294f0 100644 --- a/notebooks/llm-agent-react/llm-agent-react-langchain.ipynb +++ b/notebooks/llm-agent-react/llm-agent-react-langchain.ipynb @@ -621,10 +621,11 @@ "source": [ "from langchain_community.tools import WikipediaQueryRun\n", "from langchain_community.utilities import WikipediaAPIWrapper\n", - "from langchain_core.pydantic_v1 import BaseModel, Field\n", "from langchain_core.callbacks import CallbackManagerForToolRun\n", "from typing import Optional\n", "\n", + "from pydantic import BaseModel, Field\n", + "\n", "\n", "class WikipediaQueryRunWrapper(WikipediaQueryRun):\n", " def _run(\n", From b1396bd88d0b935203e49e1769b5075bc540aab5 Mon Sep 17 00:00:00 2001 From: Aleksandr Mokrov Date: Thu, 19 Sep 2024 21:16:09 +0200 Subject: [PATCH 09/10] Skip python3.8 for llm-agent-react/llm-agent-react-langchain.ipynb --- .ci/skipped_notebooks.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.ci/skipped_notebooks.yml b/.ci/skipped_notebooks.yml index ddde19d8561..6974caacd8e 100644 --- a/.ci/skipped_notebooks.yml +++ b/.ci/skipped_notebooks.yml @@ -586,4 +586,8 @@ - python: - '3.8' - os: - - macos-12 \ No newline at end of file + - macos-12 +- notebook: notebooks/llm-agent-react/llm-agent-react-langchain.ipynb + skips: + - python: + - '3.8' \ No newline at end of file From 2331df3a67b5abf7b35a889517cdae158181356c Mon Sep 17 00:00:00 2001 From: Aleksandr Mokrov Date: Fri, 20 Sep 2024 13:01:56 +0200 Subject: [PATCH 10/10] Restrict numpy version <2 to fix error --- notebooks/auto-device/auto-device.ipynb | 2 +- .../clip-language-saliency-map/clip-language-saliency-map.ipynb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/notebooks/auto-device/auto-device.ipynb b/notebooks/auto-device/auto-device.ipynb index 7482c9f2fb8..5f4a42bd954 100644 --- a/notebooks/auto-device/auto-device.ipynb +++ b/notebooks/auto-device/auto-device.ipynb @@ -70,7 +70,7 @@ "import platform\n", "\n", "# Install required packages\n", - "%pip install -q \"openvino>=2023.1.0\" Pillow torch torchvision tqdm --extra-index-url https://download.pytorch.org/whl/cpu\n", + "%pip install -q \"openvino>=2023.1.0\" \"numpy<2\" Pillow torch torchvision tqdm --extra-index-url https://download.pytorch.org/whl/cpu\n", "\n", "if platform.system() != \"Windows\":\n", " %pip install -q \"matplotlib>=3.4\"\n", diff --git a/notebooks/clip-language-saliency-map/clip-language-saliency-map.ipynb b/notebooks/clip-language-saliency-map/clip-language-saliency-map.ipynb index 2b8c9138b5a..b3d0d07c022 100644 --- a/notebooks/clip-language-saliency-map/clip-language-saliency-map.ipynb +++ b/notebooks/clip-language-saliency-map/clip-language-saliency-map.ipynb @@ -90,7 +90,7 @@ "source": [ "# Install requirements\n", "%pip install -q \"openvino>=2023.1.0\"\n", - "%pip install -q --extra-index-url https://download.pytorch.org/whl/cpu transformers \"torch>=2.1\" \"gradio>=4.19\"" + "%pip install -q --extra-index-url https://download.pytorch.org/whl/cpu transformers \"numpy<2\" \"torch>=2.1\" \"gradio>=4.19\"" ] }, {