Skip to content

Commit

Permalink
move on transformers 4.45
Browse files Browse the repository at this point in the history
  • Loading branch information
eaidova committed Sep 26, 2024
1 parent 581262a commit cdad697
Show file tree
Hide file tree
Showing 3 changed files with 303 additions and 175 deletions.
450 changes: 288 additions & 162 deletions notebooks/mllama-3.2/mllama-3.2.ipynb

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions notebooks/mllama-3.2/ov_mllama_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,17 +97,17 @@ def compress(
shutil.rmtree(saving_path)
shutil.rmtree(saving_path.with_suffix(".bin"))
else:
print(f"Compressed model already exists and can be found in {saving_path}")
print(f"Compressed model already exists and can be found in {saving_path}")
return saving_path
nncf_dataset = None
if awq or lora or gptq or scale_estimation:
print("Dataset preparation started")
print("Dataset preparation started")
dataset = prepare_dataset_llm(model_dir, dataset_size)
nncf_dataset = Dataset(dataset)
gc.collect()
print("Dataset preparation finished")
print("Dataset preparation finished")

print("Model compression started")
print("Model compression started")
print(
f"Compression parameters:\n\t\n\talgorithm {algo}\n\tgroup size - {group_size}\n\tratio - {ratio}\n\tawq - {awq}\n\t\scale estimation - {scale_estimation}\n\tlora correction - {lora}\n\tgptq - {gptq}\n\tall_layers - {all_layers}"
)
Expand All @@ -130,6 +130,6 @@ def compress(
del lm_model
gc.collect()

print(f"Model compression finished. Compressed model can be found in {saving_path}")
print(f"Model compression finished. Compressed model can be found in {saving_path}")

return saving_path
18 changes: 10 additions & 8 deletions notebooks/mllama-3.2/ov_mllama_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,18 +261,19 @@ def convert_mllama(model_id, out_dir):

requires_conversion = not all([img_encoder_path.exists(), lang_model_path.exists()])
if not requires_conversion:
print(f"model already converted and can be found in {out_dir}")
print(f"✅ Model already converted and can be found in {out_dir}")
return
print("Load original model")
print("Load original model")
model = MllamaForConditionalGeneration.from_pretrained(model_id, torch_dtype=torch.float16)
model.eval()
model.config.save_pretrained(out_dir)
model.generation_config.save_pretrained(out_dir)
__make_16bit_traceable(model)
processor = AutoProcessor.from_pretrained(model_id)
processor.save_pretrained(out_dir)

if not img_encoder_path.exists():
print("Convert vision model...")
print("Convert vision model...")

class VisionEncoder(torch.nn.Module):
def __init__(self, model):
Expand All @@ -281,7 +282,7 @@ def __init__(self, model):

def forward(self, pixel_values, aspect_ratio_ids, aspect_ratio_mask):
bsz = pixel_values.shape[0]
cross_attention_states = self.model.vision_model(pixel_values, aspect_ratio_ids, aspect_ratio_mask)
cross_attention_states = self.model.vision_model(pixel_values, aspect_ratio_ids, aspect_ratio_mask)[0]
cross_attention_states = self.model.multi_modal_projector(cross_attention_states).reshape(
-1, cross_attention_states.shape[-2], self.model.hidden_size
)
Expand All @@ -303,7 +304,6 @@ def forward(self, pixel_values, aspect_ratio_ids, aspect_ratio_mask):

image_encoder = VisionEncoder(model)
image_encoder.eval()
__make_16bit_traceable(image_encoder)

with torch.no_grad():
ov_model = ov.convert_model(
Expand All @@ -329,9 +329,10 @@ def forward(self, pixel_values, aspect_ratio_ids, aspect_ratio_mask):
del image_encoder
gc.collect()

print("Vision model successfully converted")
print("Vision model successfully converted")

if not lang_model_path.exists():
print("⌛ Convert language model...")

def lm_forward_wrapper(
self,
Expand Down Expand Up @@ -459,8 +460,6 @@ def cross_attn_forward(

example_inpit["past_key_values"] = past_key_values
example_inpit["cross_attn_key_values"] = cross_attn_key_values

__make_16bit_traceable(model.language_model)
model.language_model.eval()

with torch.no_grad():
Expand All @@ -481,6 +480,9 @@ def cross_attn_forward(
cleanup_torchscript_cache()
del model
gc.collect()
print("✅ Language model successfully converted")
print(f"✅ Model sucessfully converted and can be found in {out_dir}")



core = ov.Core()
Expand Down

0 comments on commit cdad697

Please sign in to comment.