forked from quic/efficient-transformers
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix: FP16 clipping applied to INT types (quic#79)
* fix: FP16 clipping applied to int types - Create FP16Clip transform which applies correct logic or FP16 clipping - Transforms now return flag indicating transformation - FP16 clipping should be applied only for FLOAT types - Added tests for FP16Clip transform Signed-off-by: Ilango Rajagopal <quic_irajagop@quicinc.com> * Update comment Signed-off-by: Ilango Rajagopal <quic_irajagop@quicinc.com> * Fix gather index while running in ORT - Simplify test_causal_lm_models Signed-off-by: Ilango Rajagopal <quic_irajagop@quicinc.com> * Re-enable asserts in test_causal_lm_models Signed-off-by: Ilango Rajagopal <quic_irajagop@quicinc.com> * No need of checking the onnx path Signed-off-by: Ilango Rajagopal <quic_irajagop@quicinc.com> * Fix onnx path expected with suffix "_clipped_fp16" Signed-off-by: Ilango Rajagopal <quic_irajagop@quicinc.com> * Pass external_data dir while clipping FP16 Signed-off-by: Ilango Rajagopal <quic_irajagop@quicinc.com> * Fix export API tests Signed-off-by: Ilango Rajagopal <quic_irajagop@quicinc.com> * Fix compile API tests Signed-off-by: Ilango Rajagopal <quic_irajagop@quicinc.com> * compile API test catch no onnx file found case Signed-off-by: Ilango Rajagopal <quic_irajagop@quicinc.com> * fix: Use correct suffix for unclipped onnx Signed-off-by: Ilango Rajagopal <quic_irajagop@quicinc.com> * Remove unneeded config.json Signed-off-by: Ilango Rajagopal <quic_irajagop@quicinc.com> * ONNXTransform take `onnx_base_dir` param Signed-off-by: Ilango Rajagopal <quic_irajagop@quicinc.com> * Fix output type and typos Signed-off-by: Ilango Rajagopal <quic_irajagop@quicinc.com> * Added KVCache & CustomOps transforms Signed-off-by: Ilango Rajagopal <quic_irajagop@quicinc.com> * Removed "transforms.py" Signed-off-by: Ilango Rajagopal <quic_irajagop@quicinc.com> * removed assert statement from infer.py as it's already there in tests Signed-off-by: Onkar Chougule <quic_ochougul@quicinc.com> --------- Signed-off-by: Ilango Rajagopal <quic_irajagop@quicinc.com> Signed-off-by: Onkar Chougule <quic_ochougul@quicinc.com> Co-authored-by: Onkar Chougule <quic_ochougul@quicinc.com>
- Loading branch information
Showing
15 changed files
with
191 additions
and
166 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
# ----------------------------------------------------------------------------- | ||
# | ||
# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. | ||
# SPDX-License-Identifier: BSD-3-Clause | ||
# | ||
# ---------------------------------------------------------------------------- | ||
|
||
import os | ||
import shutil | ||
|
||
import numpy as np | ||
import onnx | ||
import pytest | ||
|
||
from QEfficient.base.onnx_transforms import FP16Clip | ||
|
||
|
||
@pytest.fixture | ||
def external_path(): | ||
external_dir = "tmp_external_data" | ||
os.makedirs(external_dir, exist_ok=True) | ||
yield external_dir | ||
shutil.rmtree(external_dir) | ||
|
||
|
||
def test_fp16clip_transform(): | ||
test_onnx = onnx.parser.parse_model(""" | ||
< | ||
ir_version: 8, | ||
opset_import: ["" : 17] | ||
> | ||
test_fp16clip (float [n, 32] x) => (float [n, 32] y) | ||
< | ||
float val1 = {65505.0}, | ||
int64[1] slice_ends = {2147483647}, | ||
float zero = {0.0} | ||
> | ||
{ | ||
mask = Greater(x, zero) | ||
val2 = Constant<value = float {-1e7}>() | ||
masked = Where(mask, val1, val2) | ||
slice_starts = Constant<value = int64[1] {0}>() | ||
y = Slice(masked, slice_starts, slice_ends) | ||
} | ||
""") | ||
onnx.checker.check_model(test_onnx, True, True, True) | ||
transformed_onnx, transformed = FP16Clip.apply(test_onnx) | ||
assert transformed | ||
assert onnx.numpy_helper.to_array(transformed_onnx.graph.initializer[0]) == 65504.0 | ||
assert onnx.numpy_helper.to_array(transformed_onnx.graph.initializer[1]) == 2147483647 | ||
assert onnx.numpy_helper.to_array(transformed_onnx.graph.node[1].attribute[0].t) == -65504.0 | ||
|
||
|
||
def test_fp16clip_transform_external(external_path): | ||
external_weight_file = "fp32_min.weight" | ||
test_onnx = onnx.parser.parse_model( | ||
""" | ||
< | ||
ir_version: 8, | ||
opset_import: ["" : 17] | ||
> | ||
test_fp16clip (float [n, 32] x) => (float [n, 32] y) | ||
< | ||
float min_val = [ "location": "<external_weight_file>" ], | ||
float zero = {0.0} | ||
> | ||
{ | ||
mask = Greater(x, zero) | ||
y = Where(mask, x, min_val) | ||
} | ||
""".replace("<external_weight_file>", str(external_weight_file)) | ||
) | ||
|
||
# Write onnx and external_data | ||
onnx_path = os.path.join(external_path, "test_fp16_clip_external.onnx") | ||
onnx.save(test_onnx, onnx_path) | ||
np.array(-1e10, dtype="float32").tofile(os.path.join(external_path, external_weight_file)) | ||
|
||
onnx.checker.check_model(onnx_path, True, True, True) | ||
transformed_onnx, transformed = FP16Clip.apply(test_onnx, external_path) | ||
assert transformed | ||
assert onnx.numpy_helper.to_array(transformed_onnx.graph.initializer[0]) == -65504.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.