From 2fc5a4c8dc51d57c638bcea61b159d72e95dcb29 Mon Sep 17 00:00:00 2001 From: Ekaterina Aidova Date: Fri, 5 Jul 2024 09:33:36 +0400 Subject: [PATCH] update vlm models for transformers 4.42 (#2172) --- .../kosmos2-multimodal-large-language-model.ipynb | 1 + .../llava-multimodal-chatbot/llava-multimodal-chatbot.ipynb | 1 + .../videollava-multimodal-chatbot.ipynb | 1 + .../llava-next-multimodal-chatbot.ipynb | 1 + .../mobilevlm-language-assistant.ipynb | 1 + .../nano-llava-multimodal-chatbot.ipynb | 5 ++--- 6 files changed, 7 insertions(+), 3 deletions(-) diff --git a/notebooks/kosmos2-multimodal-large-language-model/kosmos2-multimodal-large-language-model.ipynb b/notebooks/kosmos2-multimodal-large-language-model/kosmos2-multimodal-large-language-model.ipynb index b00b60ddfb5..6b3e6b8a02a 100644 --- a/notebooks/kosmos2-multimodal-large-language-model/kosmos2-multimodal-large-language-model.ipynb +++ b/notebooks/kosmos2-multimodal-large-language-model/kosmos2-multimodal-large-language-model.ipynb @@ -766,6 +766,7 @@ " out_features=model.text_model.config.vocab_size,\n", " bias=False,\n", " )\n", + " self._supports_cache_class = False\n", "\n", " def get_input_embeddings(self) -> nn.Module:\n", " return self.model.embed_tokens\n", diff --git a/notebooks/llava-multimodal-chatbot/llava-multimodal-chatbot.ipynb b/notebooks/llava-multimodal-chatbot/llava-multimodal-chatbot.ipynb index 7d824ba1cbe..eae217c5461 100644 --- a/notebooks/llava-multimodal-chatbot/llava-multimodal-chatbot.ipynb +++ b/notebooks/llava-multimodal-chatbot/llava-multimodal-chatbot.ipynb @@ -929,6 +929,7 @@ " self.main_input_name = \"input_ids\"\n", " self.device = torch.device(\"cpu\")\n", " self.num_pkv = 2\n", + " self._supports_cache_class = False\n", "\n", " def can_generate(self):\n", " \"\"\"Returns True to validate the check that the model using `GenerationMixin.generate()` can indeed generate.\"\"\"\n", diff --git a/notebooks/llava-multimodal-chatbot/videollava-multimodal-chatbot.ipynb b/notebooks/llava-multimodal-chatbot/videollava-multimodal-chatbot.ipynb index 14b7566aa55..9a0e375478e 100644 --- a/notebooks/llava-multimodal-chatbot/videollava-multimodal-chatbot.ipynb +++ b/notebooks/llava-multimodal-chatbot/videollava-multimodal-chatbot.ipynb @@ -793,6 +793,7 @@ " self.main_input_name = \"input_ids\"\n", " self.device = torch.device(\"cpu\")\n", " self.num_pkv = 2\n", + " self._supports_cache_class = False\n", "\n", " def can_generate(self):\n", " \"\"\"Returns True to validate the check that the model using `GenerationMixin.generate()` can indeed generate.\"\"\"\n", diff --git a/notebooks/llava-next-multimodal-chatbot/llava-next-multimodal-chatbot.ipynb b/notebooks/llava-next-multimodal-chatbot/llava-next-multimodal-chatbot.ipynb index e9f679a6b9f..2c96e32aa2b 100644 --- a/notebooks/llava-next-multimodal-chatbot/llava-next-multimodal-chatbot.ipynb +++ b/notebooks/llava-next-multimodal-chatbot/llava-next-multimodal-chatbot.ipynb @@ -887,6 +887,7 @@ " self.image_newline = torch.zeros(self.config.text_config.hidden_size, dtype=torch.float32)\n", " self.pad_token_id = self.config.pad_token_id if self.config.pad_token_id is not None else -1\n", " self.past_len = 0\n", + " self._supports_cache_class = False\n", "\n", " def can_generate(self):\n", " \"\"\"Returns True to validate the check that the model using `GenerationMixin.generate()` can indeed generate.\"\"\"\n", diff --git a/notebooks/mobilevlm-language-assistant/mobilevlm-language-assistant.ipynb b/notebooks/mobilevlm-language-assistant/mobilevlm-language-assistant.ipynb index a18e8336c50..ddc57d5888b 100644 --- a/notebooks/mobilevlm-language-assistant/mobilevlm-language-assistant.ipynb +++ b/notebooks/mobilevlm-language-assistant/mobilevlm-language-assistant.ipynb @@ -464,6 +464,7 @@ " self.key_value_output_names = [key for key in self.output_names if \"present\" in key]\n", " stage2 = core.compile_model(self.stage2, device)\n", " self.request = stage2.create_infer_request()\n", + " self._supports_cache_class = False\n", "\n", " def can_generate(self):\n", " \"\"\"Returns True to validate the check that the model using `GenerationMixin.generate()` can indeed generate.\"\"\"\n", diff --git a/notebooks/nano-llava-multimodal-chatbot/nano-llava-multimodal-chatbot.ipynb b/notebooks/nano-llava-multimodal-chatbot/nano-llava-multimodal-chatbot.ipynb index 00196abc8a5..74a1aab1b4b 100644 --- a/notebooks/nano-llava-multimodal-chatbot/nano-llava-multimodal-chatbot.ipynb +++ b/notebooks/nano-llava-multimodal-chatbot/nano-llava-multimodal-chatbot.ipynb @@ -657,6 +657,7 @@ " self.device = torch.device(\"cpu\")\n", " self.num_pkv = 2\n", " self.image_processor = ImageProcessor()\n", + " self._supports_cache_class = False\n", "\n", " def can_generate(self):\n", " \"\"\"Returns True to validate the check that the model using `GenerationMixin.generate()` can indeed generate.\"\"\"\n", @@ -923,9 +924,7 @@ "\n", "\n", "### Select device\n", - "[back to top ⬆️](#Table-of-contents:)\n", - "\n", - "### Select device" + "[back to top ⬆️](#Table-of-contents:)" ] }, {