move on transformers 4.45

openvinotoolkit · Sep 26, 2024 · cdad697 · cdad697
1 parent 581262a
commit cdad697
Show file tree

Hide file tree

Showing 3 changed files with 303 additions and 175 deletions.
diff --git a/notebooks/mllama-3.2/mllama-3.2.ipynb b/notebooks/mllama-3.2/mllama-3.2.ipynb
diff --git a/notebooks/mllama-3.2/ov_mllama_compression.py b/notebooks/mllama-3.2/ov_mllama_compression.py
@@ -97,17 +97,17 @@ def compress(
             shutil.rmtree(saving_path)
             shutil.rmtree(saving_path.with_suffix(".bin"))
         else:
-            print(f"Compressed model already exists and can be found in {saving_path}")
+            print(f"✅ Compressed model already exists and can be found in {saving_path}")
             return saving_path
     nncf_dataset = None
     if awq or lora or gptq or scale_estimation:
-        print("Dataset preparation started")
+        print("⌛ Dataset preparation started")
         dataset = prepare_dataset_llm(model_dir, dataset_size)
         nncf_dataset = Dataset(dataset)
         gc.collect()
-        print("Dataset preparation finished")
+        print("✅ Dataset preparation finished")
 
-    print("Model compression started")
+    print("⌛ Model compression started")
     print(
         f"Compression parameters:\n\t\n\talgorithm {algo}\n\tgroup size - {group_size}\n\tratio - {ratio}\n\tawq - {awq}\n\t\scale estimation - {scale_estimation}\n\tlora correction - {lora}\n\tgptq - {gptq}\n\tall_layers - {all_layers}"
     )
@@ -130,6 +130,6 @@ def compress(
     del lm_model
     gc.collect()
 
-    print(f"Model compression finished. Compressed model can be found in {saving_path}")
+    print(f"✅ Model compression finished. Compressed model can be found in {saving_path}")
 
     return saving_path
diff --git a/notebooks/mllama-3.2/ov_mllama_helper.py b/notebooks/mllama-3.2/ov_mllama_helper.py
@@ -261,18 +261,19 @@ def convert_mllama(model_id, out_dir):
 
     requires_conversion = not all([img_encoder_path.exists(), lang_model_path.exists()])
     if not requires_conversion:
-        print(f"model already converted and can be found in {out_dir}")
+        print(f"✅ Model already converted and can be found in {out_dir}")
         return
-    print("Load original model")
+    print("⌛ Load original model")
     model = MllamaForConditionalGeneration.from_pretrained(model_id, torch_dtype=torch.float16)
     model.eval()
     model.config.save_pretrained(out_dir)
     model.generation_config.save_pretrained(out_dir)
+    __make_16bit_traceable(model)
     processor = AutoProcessor.from_pretrained(model_id)
     processor.save_pretrained(out_dir)
 
     if not img_encoder_path.exists():
-        print("Convert vision model...")
+        print("⌛ Convert vision model...")
 
         class VisionEncoder(torch.nn.Module):
             def __init__(self, model):
@@ -281,7 +282,7 @@ def __init__(self, model):
 
             def forward(self, pixel_values, aspect_ratio_ids, aspect_ratio_mask):
                 bsz = pixel_values.shape[0]
-                cross_attention_states = self.model.vision_model(pixel_values, aspect_ratio_ids, aspect_ratio_mask)
+                cross_attention_states = self.model.vision_model(pixel_values, aspect_ratio_ids, aspect_ratio_mask)[0]
                 cross_attention_states = self.model.multi_modal_projector(cross_attention_states).reshape(
                     -1, cross_attention_states.shape[-2], self.model.hidden_size
                 )
@@ -303,7 +304,6 @@ def forward(self, pixel_values, aspect_ratio_ids, aspect_ratio_mask):
 
         image_encoder = VisionEncoder(model)
         image_encoder.eval()
-        __make_16bit_traceable(image_encoder)
 
         with torch.no_grad():
             ov_model = ov.convert_model(
@@ -329,9 +329,10 @@ def forward(self, pixel_values, aspect_ratio_ids, aspect_ratio_mask):
         del image_encoder
         gc.collect()
 
-        print("Vision model successfully converted")
+        print("✅ Vision model successfully converted")
 
     if not lang_model_path.exists():
+        print("⌛ Convert language model...")
 
         def lm_forward_wrapper(
             self,
@@ -459,8 +460,6 @@ def cross_attn_forward(
 
         example_inpit["past_key_values"] = past_key_values
         example_inpit["cross_attn_key_values"] = cross_attn_key_values
-
-        __make_16bit_traceable(model.language_model)
         model.language_model.eval()
 
         with torch.no_grad():
@@ -481,6 +480,9 @@ def cross_attn_forward(
         cleanup_torchscript_cache()
         del model
         gc.collect()
+        print("✅ Language model successfully converted")
+    print(f"✅ Model sucessfully converted and can be found in {out_dir}")
+
 
 
 core = ov.Core()