Skip to content

Commit

Permalink
Transition to OV 2024.4 (#914)
Browse files Browse the repository at this point in the history
* Add GroupNormalization to ignored scope; tweak minicpm test

* Check tests on OV nightly

* Make ignored scope non-strict

* Update to openvino 2024.4.1 dev

* Minor reorder

* Style
  • Loading branch information
nikita-savelyevv authored Sep 30, 2024
1 parent 19c3983 commit d20554e
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 4 deletions.
16 changes: 14 additions & 2 deletions optimum/intel/openvino/quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,13 +415,20 @@ def _quantize_ovbasemodel(
if calibration_dataset is None:
raise ValueError("Calibration dataset is required to run quantization.")

# TODO: remove after update to NNCF 2.14
model_type = nncf.ModelType(quantization_config.model_type)
ignored_scope = quantization_config.get_ignored_scope_instance()
if model_type == nncf.ModelType.TRANSFORMER:
ignored_scope.types += ["GroupNormalization"]
ignored_scope.validate = False

# Actual model quantization
quantized_model = nncf.quantize(
self.model.model,
calibration_dataset,
subset_size=quantization_config.num_samples,
ignored_scope=quantization_config.get_ignored_scope_instance(),
model_type=nncf.ModelType(quantization_config.model_type),
ignored_scope=ignored_scope,
model_type=model_type,
preset=nncf.QuantizationPreset.PERFORMANCE if quantization_config.sym else nncf.QuantizationPreset.MIXED,
fast_bias_correction=quantization_config.fast_bias_correction,
advanced_parameters=nncf.AdvancedQuantizationParameters(
Expand Down Expand Up @@ -916,6 +923,11 @@ def _hybrid_quantization(

ptq_ignored_scope = quantization_config.get_ignored_scope_instance()
ptq_ignored_scope.names += ops_to_compress

# TODO: remove after update to NNCF 2.14
ptq_ignored_scope.types += ["GroupNormalization"]
ptq_ignored_scope.validate = False

subset_size = quantization_config.num_samples if quantization_config.num_samples else 200
quantized_model = nncf.quantize(
model=compressed_model,
Expand Down
6 changes: 5 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,11 @@

EXTRAS_REQUIRE = {
"neural-compressor": ["neural-compressor[pt]>3.0", "accelerate"],
"openvino": ["openvino>=2023.3,<2024.4", "nncf>=2.11.0", "openvino-tokenizers[transformers]<2024.4"],
"openvino": [
"openvino==2024.4.1.dev20240926",
"nncf>=2.11.0",
"openvino-tokenizers[transformers]==2024.4.1.0.dev20240926",
],
"nncf": ["nncf>=2.11.0"],
"ipex": ["intel-extension-for-pytorch", "transformers>=4.39,<4.45"],
"diffusers": ["diffusers"],
Expand Down
3 changes: 2 additions & 1 deletion tests/openvino/test_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -842,7 +842,8 @@ def test_compare_to_transformers(self, model_arch):
transformers_outputs = transformers_model(**tokens)

# Compare tensor outputs
self.assertTrue(torch.allclose(ov_outputs.logits, transformers_outputs.logits, equal_nan=True, atol=1e-4))
atol = 1e-3 if model_arch == "minicpm" else 1e-4
self.assertTrue(torch.allclose(ov_outputs.logits, transformers_outputs.logits, equal_nan=True, atol=atol))

# Qwen tokenizer does not support padding

Expand Down

0 comments on commit d20554e

Please sign in to comment.