openvinotoolkit · Pey-crypto · Sep 1, 2024 · Sep 2, 2024 · Sep 6, 2024 · Sep 9, 2024
@@ -2,8 +2,9 @@
 # SPDX-License-Identifier: Apache-2.0
 #
 
-add_subdirectory(src)
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../core/dev_api)
 
+add_subdirectory(src)
 if(ENABLE_TESTS)
    add_subdirectory(tests)
 endif()
@@ -23,7 +23,6 @@
 #include "default_opset.hpp"
 #include "framework.pb.h"
 #include "input_model.hpp"
-#include "internal/pass/transform_fakequantize.hpp"
 #include "internal/pass/transform_if.hpp"
 #include "internal/pass/transform_tensorarray.hpp"
 #include "internal/pass/transform_while.hpp"
@@ -355,18 +354,6 @@ void FrontEnd::try_remove_internal_ops(const std::vector<std::shared_ptr<Model>>
     }
 }
 
-void FrontEnd::fuse_fakequantize_ops(const std::vector<std::shared_ptr<Model>>& models) const {
-    for (auto& model : models) {
-        ov::pass::Manager manager("Frontend:Paddle:fuse_fakequantize_ops");
-        manager.register_pass<ov::frontend::paddle::pass::TransformFakeQuantize>();
-        manager.run_passes(model);
-    }
-    if (models.size() > 0) {
-        // revalidate as child models are transformed after parent models.
-        models[0]->validate_nodes_and_infer_types();
-    }
-}
-
 bool FrontEnd::supported_impl(const std::vector<ov::Any>& variants) const {
     // Last boolean flag in `variants` (if presented) is reserved for FE configuration
     size_t extra_variants_num = variants.size() > 0 && variants[variants.size() - 1].is<bool>() ? 1 : 0;
@@ -478,7 +465,6 @@ std::shared_ptr<ov::Model> FrontEnd::convert(const InputModel::Ptr& model) const
             return paddle::make_ng_node(nodes_dict, op_place, m_op_translators);
         });
 
-    fuse_fakequantize_ops(f);
     try_remove_internal_ops(f);
     normalize(f[0]);
     return f[0];
@@ -494,7 +480,6 @@ void FrontEnd::convert(const std::shared_ptr<ov::Model>& partiallyConverted) con
         result->validate_and_infer_types();
     }
 
-    fuse_fakequantize_ops({partiallyConverted});
     try_remove_internal_ops({partiallyConverted});
     normalize(partiallyConverted);
 }
@@ -527,7 +512,6 @@ std::shared_ptr<ov::Model> FrontEnd::convert_partially(const InputModel::Ptr& mo
             return named_outputs;
         });
 
-    fuse_fakequantize_ops(f);
     try_remove_internal_ops(f);
     normalize(f[0]);
     return f[0];

@@ -3,80 +3,69 @@
 //
 
 #include "default_opset.hpp"
+#include "openvino/core/validation_util.hpp"
 #include "openvino/frontend/paddle/node_context.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/op/fake_quantize.hpp"
 
 namespace ov {
 namespace frontend {
 namespace paddle {
 namespace op {
-NamedOutputs dequantize_linear(const NodeContext& node) {
-    // extract the INPUTS
-    const auto x = node.get_input("X");
-    const auto scale = node.get_input("Scale");
-    const auto zero_point = node.get_input("ZeroPoint");
-
-    // assert shape of scale and zero_point
-    const auto& scale_shape = scale.get_partial_shape();
-    PADDLE_OP_CHECK(node, scale.get_partial_shape().rank().is_static(), "dequantize_linear scale rank must be static.");
-    const auto& scale_shape_length = scale.get_partial_shape().rank().get_length();
 
-    if (scale_shape_length == 1) {
-        PADDLE_OP_CHECK(node,
-                        scale.get_partial_shape() == zero_point.get_partial_shape(),
-                        "dequantize_linear shape of scale and zero_point doesn't match.");
-    } else if (scale_shape_length == 2) {
-        PADDLE_OP_CHECK(node,
-                        scale.get_partial_shape()[1] == zero_point.get_partial_shape()[0],
-                        "dequantize_linear shape of scale and zero_point doesn't match.");
+namespace {
+ov::Output<ov::Node> get_zero_point(const NodeContext& node) {
+    if (node.has_input("ZeroPoint")) {
+        return node.get_input("ZeroPoint");
     } else {
-        PADDLE_OP_CHECK(node, false, "dims of scale should not be greater than 2.");
+        return std::make_shared<default_opset::Constant>(ov::element::i32, ov::Shape{1}, 0);
     }
+}
 
-    const auto bit_length = node.get_attribute<int32_t>("bit_length");
-    const auto range = (1 << (bit_length - 1)) - 1;
-    const auto range_node = std::make_shared<default_opset::Constant>(element::f32, Shape{1}, (1.0 / range));
-    const auto real_scale = std::make_shared<default_opset::Multiply>(scale, range_node);
-
-    auto q_node = std::make_shared<default_opset::Convert>(x, element::f32);
-    // extract the ATTRIBUTES and explaination for quant_axis:
-    //             / [-1]      --- per-tensor, scale is always 1-D
-    // quant_axis  - [0 or 1]  --- per-channel, scale may be 1-D or 2-D, needing to reshape for input shape.
-    //             \ [others]  --- unsupported
-    auto quant_axis = node.get_attribute<int32_t>("quant_axis");
-    std::vector<int32_t> quant_axis_range{-1, 0, 1};
-    PADDLE_OP_CHECK(node,
-                    std::any_of(quant_axis_range.begin(),
-                                quant_axis_range.end(),
-                                [&quant_axis](int32_t value) {
-                                    return quant_axis == value;
-                                }),
-                    "dequantize_linear quant_axis is NOT in the range of [-1, 0, 1].");
-    if (quant_axis == -1) {
-        const auto zp_node = std::make_shared<default_opset::Convert>(zero_point, element::f32);
-        const auto out_node =
-            std::make_shared<default_opset::Multiply>(std::make_shared<default_opset::Subtract>(q_node, zp_node),
-                                                      real_scale);
-        return node.default_single_output_mapping({out_node}, {"Y"});
-    } else {
-        // But for per-channel scenario, the shape of scale is NOT stable.
-        // Sometimes scale is 1-D and sometimes scale is 2-D. But the last dim(e.g. s[len-1]) really makes sense.
-        // Let's prepare a pattern to reshape operation according to the scale shape.
-        std::vector<size_t> reshape_pattern(x.get_partial_shape().rank().get_length(), 1);
-        reshape_pattern.at(quant_axis) = scale_shape[scale_shape_length - 1].get_length();
-        const auto reshape_node =
-            std::make_shared<default_opset::Constant>(element::i32, Shape{reshape_pattern.size()}, reshape_pattern);
-        const auto reshape_scale = std::make_shared<default_opset::Reshape>(real_scale, reshape_node, true);
-        const auto zp_node = std::make_shared<default_opset::Convert>(
-            std::make_shared<default_opset::Reshape>(zero_point, reshape_node, true),
-            element::f32);
-        const auto out_node =
-            std::make_shared<default_opset::Multiply>(std::make_shared<default_opset::Subtract>(q_node, zp_node),
-                                                      reshape_scale);
-        return node.default_single_output_mapping({out_node}, {"Y"});
+ov::Output<ov::Node> reshape_for_broadcast(const ov::Output<ov::Node>& input, int64_t axis, const ov::Shape& x_shape) {
+    if (input.get_partial_shape().rank().get_length() == 0) {
+        return input;
     }
+
+    ov::Shape target_shape(x_shape.size(), 1);
+    target_shape[axis] = input.get_shape()[0];
+
+    auto shape_const =
+        std::make_shared<default_opset::Constant>(ov::element::i64, ov::Shape{target_shape.size()}, target_shape);
+    return std::make_shared<default_opset::Reshape>(input, shape_const, true);
+}
+
+}  // namespace
+
+NamedOutputs dequantize_linear(const NodeContext& node) {
+    auto x = node.get_input("X");
+    auto y_scale = node.get_input("Scale");
+    auto y_zero_point = get_zero_point(node);
+    auto axis = node.get_attribute<int64_t>("axis", 1);
+
+    const auto& x_shape = x.get_partial_shape();
+    PADDLE_OP_CHECK(node, x_shape.rank().is_static(), "Rank of input tensor must be static");
+    axis = ov::util::normalize_axis(axis, x_shape.rank().get_length());
+
+    const auto& input_type = x.get_element_type();
+    const auto& output_type = ov::element::f32;
+
+    y_scale = reshape_for_broadcast(y_scale, axis, x_shape.get_shape());
+    y_zero_point = reshape_for_broadcast(y_zero_point, axis, x_shape.get_shape());
+
+    auto zero_point = std::make_shared<default_opset::Convert>(y_zero_point, input_type);
+    auto scale = std::make_shared<default_opset::Convert>(y_scale, output_type);
+
+    // Dequantization formula: (x - zero_point) * scale
+    auto dequantized = std::make_shared<default_opset::Multiply>(
+        std::make_shared<default_opset::Subtract>(std::make_shared<default_opset::Convert>(x, output_type),
+                                                  std::make_shared<default_opset::Convert>(zero_point, output_type)),
+        scale);
+
+    return node.default_single_output_mapping({dequantized}, {"Y"});
 }
 
 }  // namespace op
 }  // namespace paddle
 }  // namespace frontend
-}  // namespace ov
+}  // namespace ov