Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[PDPD FE] : Refactored PaddlePaddle Quantization #26347

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/frontends/paddle/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
# SPDX-License-Identifier: Apache-2.0
#

add_subdirectory(src)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../core/dev_api)

add_subdirectory(src)
if(ENABLE_TESTS)
add_subdirectory(tests)
endif()
16 changes: 0 additions & 16 deletions src/frontends/paddle/src/frontend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
#include "default_opset.hpp"
#include "framework.pb.h"
#include "input_model.hpp"
#include "internal/pass/transform_fakequantize.hpp"
#include "internal/pass/transform_if.hpp"
#include "internal/pass/transform_tensorarray.hpp"
#include "internal/pass/transform_while.hpp"
Expand Down Expand Up @@ -355,18 +354,6 @@ void FrontEnd::try_remove_internal_ops(const std::vector<std::shared_ptr<Model>>
}
}

void FrontEnd::fuse_fakequantize_ops(const std::vector<std::shared_ptr<Model>>& models) const {
for (auto& model : models) {
ov::pass::Manager manager("Frontend:Paddle:fuse_fakequantize_ops");
manager.register_pass<ov::frontend::paddle::pass::TransformFakeQuantize>();
manager.run_passes(model);
}
if (models.size() > 0) {
// revalidate as child models are transformed after parent models.
models[0]->validate_nodes_and_infer_types();
}
}

bool FrontEnd::supported_impl(const std::vector<ov::Any>& variants) const {
// Last boolean flag in `variants` (if presented) is reserved for FE configuration
size_t extra_variants_num = variants.size() > 0 && variants[variants.size() - 1].is<bool>() ? 1 : 0;
Expand Down Expand Up @@ -478,7 +465,6 @@ std::shared_ptr<ov::Model> FrontEnd::convert(const InputModel::Ptr& model) const
return paddle::make_ng_node(nodes_dict, op_place, m_op_translators);
});

fuse_fakequantize_ops(f);
try_remove_internal_ops(f);
normalize(f[0]);
return f[0];
Expand All @@ -494,7 +480,6 @@ void FrontEnd::convert(const std::shared_ptr<ov::Model>& partiallyConverted) con
result->validate_and_infer_types();
}

fuse_fakequantize_ops({partiallyConverted});
try_remove_internal_ops({partiallyConverted});
normalize(partiallyConverted);
}
Expand Down Expand Up @@ -527,7 +512,6 @@ std::shared_ptr<ov::Model> FrontEnd::convert_partially(const InputModel::Ptr& mo
return named_outputs;
});

fuse_fakequantize_ops(f);
try_remove_internal_ops(f);
normalize(f[0]);
return f[0];
Expand Down
126 changes: 0 additions & 126 deletions src/frontends/paddle/src/internal/pass/transform_fakequantize.cpp

This file was deleted.

This file was deleted.

111 changes: 50 additions & 61 deletions src/frontends/paddle/src/op/dequantize_linear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,80 +3,69 @@
//

#include "default_opset.hpp"
#include "openvino/core/validation_util.hpp"
#include "openvino/frontend/paddle/node_context.hpp"
#include "openvino/op/constant.hpp"
#include "openvino/op/fake_quantize.hpp"

namespace ov {
namespace frontend {
namespace paddle {
namespace op {
NamedOutputs dequantize_linear(const NodeContext& node) {
// extract the INPUTS
const auto x = node.get_input("X");
const auto scale = node.get_input("Scale");
const auto zero_point = node.get_input("ZeroPoint");

// assert shape of scale and zero_point
const auto& scale_shape = scale.get_partial_shape();
PADDLE_OP_CHECK(node, scale.get_partial_shape().rank().is_static(), "dequantize_linear scale rank must be static.");
const auto& scale_shape_length = scale.get_partial_shape().rank().get_length();

if (scale_shape_length == 1) {
PADDLE_OP_CHECK(node,
scale.get_partial_shape() == zero_point.get_partial_shape(),
"dequantize_linear shape of scale and zero_point doesn't match.");
} else if (scale_shape_length == 2) {
PADDLE_OP_CHECK(node,
scale.get_partial_shape()[1] == zero_point.get_partial_shape()[0],
"dequantize_linear shape of scale and zero_point doesn't match.");
namespace {
ov::Output<ov::Node> get_zero_point(const NodeContext& node) {
if (node.has_input("ZeroPoint")) {
return node.get_input("ZeroPoint");
} else {
PADDLE_OP_CHECK(node, false, "dims of scale should not be greater than 2.");
return std::make_shared<default_opset::Constant>(ov::element::i32, ov::Shape{1}, 0);
}
}

const auto bit_length = node.get_attribute<int32_t>("bit_length");
const auto range = (1 << (bit_length - 1)) - 1;
const auto range_node = std::make_shared<default_opset::Constant>(element::f32, Shape{1}, (1.0 / range));
const auto real_scale = std::make_shared<default_opset::Multiply>(scale, range_node);

auto q_node = std::make_shared<default_opset::Convert>(x, element::f32);
// extract the ATTRIBUTES and explaination for quant_axis:
// / [-1] --- per-tensor, scale is always 1-D
// quant_axis - [0 or 1] --- per-channel, scale may be 1-D or 2-D, needing to reshape for input shape.
// \ [others] --- unsupported
auto quant_axis = node.get_attribute<int32_t>("quant_axis");
std::vector<int32_t> quant_axis_range{-1, 0, 1};
PADDLE_OP_CHECK(node,
std::any_of(quant_axis_range.begin(),
quant_axis_range.end(),
[&quant_axis](int32_t value) {
return quant_axis == value;
}),
"dequantize_linear quant_axis is NOT in the range of [-1, 0, 1].");
if (quant_axis == -1) {
const auto zp_node = std::make_shared<default_opset::Convert>(zero_point, element::f32);
const auto out_node =
std::make_shared<default_opset::Multiply>(std::make_shared<default_opset::Subtract>(q_node, zp_node),
real_scale);
return node.default_single_output_mapping({out_node}, {"Y"});
} else {
// But for per-channel scenario, the shape of scale is NOT stable.
// Sometimes scale is 1-D and sometimes scale is 2-D. But the last dim(e.g. s[len-1]) really makes sense.
// Let's prepare a pattern to reshape operation according to the scale shape.
std::vector<size_t> reshape_pattern(x.get_partial_shape().rank().get_length(), 1);
reshape_pattern.at(quant_axis) = scale_shape[scale_shape_length - 1].get_length();
const auto reshape_node =
std::make_shared<default_opset::Constant>(element::i32, Shape{reshape_pattern.size()}, reshape_pattern);
const auto reshape_scale = std::make_shared<default_opset::Reshape>(real_scale, reshape_node, true);
const auto zp_node = std::make_shared<default_opset::Convert>(
std::make_shared<default_opset::Reshape>(zero_point, reshape_node, true),
element::f32);
const auto out_node =
std::make_shared<default_opset::Multiply>(std::make_shared<default_opset::Subtract>(q_node, zp_node),
reshape_scale);
return node.default_single_output_mapping({out_node}, {"Y"});
ov::Output<ov::Node> reshape_for_broadcast(const ov::Output<ov::Node>& input, int64_t axis, const ov::Shape& x_shape) {
if (input.get_partial_shape().rank().get_length() == 0) {
return input;
}

ov::Shape target_shape(x_shape.size(), 1);
target_shape[axis] = input.get_shape()[0];

auto shape_const =
std::make_shared<default_opset::Constant>(ov::element::i64, ov::Shape{target_shape.size()}, target_shape);
return std::make_shared<default_opset::Reshape>(input, shape_const, true);
}

} // namespace

NamedOutputs dequantize_linear(const NodeContext& node) {
auto x = node.get_input("X");
auto y_scale = node.get_input("Scale");
auto y_zero_point = get_zero_point(node);
auto axis = node.get_attribute<int64_t>("axis", 1);

const auto& x_shape = x.get_partial_shape();
PADDLE_OP_CHECK(node, x_shape.rank().is_static(), "Rank of input tensor must be static");
axis = ov::util::normalize_axis(axis, x_shape.rank().get_length());

const auto& input_type = x.get_element_type();
const auto& output_type = ov::element::f32;

y_scale = reshape_for_broadcast(y_scale, axis, x_shape.get_shape());
y_zero_point = reshape_for_broadcast(y_zero_point, axis, x_shape.get_shape());

auto zero_point = std::make_shared<default_opset::Convert>(y_zero_point, input_type);
auto scale = std::make_shared<default_opset::Convert>(y_scale, output_type);

// Dequantization formula: (x - zero_point) * scale
auto dequantized = std::make_shared<default_opset::Multiply>(
std::make_shared<default_opset::Subtract>(std::make_shared<default_opset::Convert>(x, output_type),
std::make_shared<default_opset::Convert>(zero_point, output_type)),
scale);

return node.default_single_output_mapping({dequantized}, {"Y"});
}

} // namespace op
} // namespace paddle
} // namespace frontend
} // namespace ov
} // namespace ov
Loading
Loading