Skip to content

Commit

Permalink
[CPU] [LPT] CPU limitation (openvinotoolkit#22522)
Browse files Browse the repository at this point in the history
* [CPU] [LPT] CPU limitation

* tests

* comments fixes

* tests fix

* tests refactoring
  • Loading branch information
eshoguli authored Jan 31, 2024
1 parent db4b33c commit 2e6d061
Show file tree
Hide file tree
Showing 12 changed files with 101 additions and 49 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -41,16 +41,7 @@ namespace precision_set {
LP_TRANSFORMATIONS_API const std::vector<element::Type>& get_int8_support();
LP_TRANSFORMATIONS_API const std::vector<element::Type>& get_int8_int16_int32_support();
} // namespace precision_set
enum levels : size_t {
int4 = 16,
int4_narrow_range = 15,
int8 = 256,
int8_narrow_range = 255,
int16 = 65536,
int16_narrow_range = 65535,
int32 = size_t(4294967296), // for ARM and ia32 platforms where this number bigger than size_t but never used
int32_narrow_range = 4294967295
};

class LP_TRANSFORMATIONS_API DataPrecision {
public:
DataPrecision() : precision(element::undefined), min(0.f), max(0.f), hasZeroPoint(false) {}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include <vector>
#include <memory>
#include <unordered_set>

// one place to include all Low Precision Transformations from ov::pass::low_precision
#include "low_precision/rt_info/intervals_alignment_attribute.hpp"
Expand Down Expand Up @@ -59,7 +60,7 @@ class ov::pass::low_precision::TypeRelaxedReplacer : public ov::pass::GraphRewri
TypeRelaxedReplacer();
};

class ov::pass::low_precision::LowPrecision : public ov::pass::ModelPass {
class LP_TRANSFORMATIONS_API ov::pass::low_precision::LowPrecision : public ov::pass::ModelPass {
public:
OPENVINO_RTTI("LowPrecision", "0");
LowPrecision(
Expand All @@ -68,7 +69,9 @@ class ov::pass::low_precision::LowPrecision : public ov::pass::ModelPass {
const LayerTransformation::Params = LayerTransformation::Params());
bool run_on_model(const std::shared_ptr<ov::Model>& m) override;

static bool isFunctionQuantized(const std::shared_ptr<const ov::Model>& model);
static bool isFunctionQuantized(
const std::shared_ptr<const ov::Model>& model,
const std::set<levels>& supported_levels = all_levels);
static bool isFQLevelsPresent(const std::shared_ptr<const ov::Model>& model, const std::set<size_t>& levels);

template <typename T, class... Args>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#pragma once

#include <memory>
#include <unordered_set>
#include <ostream>
#include <vector>

Expand All @@ -15,6 +16,24 @@ namespace ov {
namespace pass {
namespace low_precision {

enum levels : size_t {
int4 = 16,
int4_narrow_range = 15,
int8 = 256,
int8_narrow_range = 255,
int16 = 65536,
int16_narrow_range = 65535,
int32 = size_t(4294967296), // for ARM and ia32 platforms where this number bigger than size_t but never used
int32_narrow_range = 4294967295
};

static std::set<levels> all_levels = {
levels::int4, levels::int4_narrow_range,
levels::int8, levels::int8_narrow_range,
levels::int16, levels::int16_narrow_range,
levels::int32, levels::int32_narrow_range
};

class LP_TRANSFORMATIONS_API QuantizationDetails {
public:
QuantizationDetails();
Expand Down Expand Up @@ -50,7 +69,9 @@ class LP_TRANSFORMATIONS_API QuantizationDetails {

bool empty() const noexcept;

static bool isSupportedLevel(const size_t level);
static bool isSupportedLevel(
const size_t level,
const std::set<levels>& supported_levels = all_levels);

const size_t levels;
const std::vector<float> inputLowValues;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,9 @@ bool ov::pass::low_precision::LowPrecision::run_on_model(const std::shared_ptr<o
return false;
}

bool ov::pass::low_precision::LowPrecision::isFunctionQuantized(const std::shared_ptr<const ov::Model>& model) {
bool ov::pass::low_precision::LowPrecision::isFunctionQuantized(
const std::shared_ptr<const ov::Model>& model,
const std::set<levels>& supported_levels) {
std::set<std::shared_ptr<ov::Node>> handledNodes;
std::deque<std::shared_ptr<ov::Node>> nodes;
for (const auto& result : model->get_results()) {
Expand All @@ -316,7 +318,7 @@ bool ov::pass::low_precision::LowPrecision::isFunctionQuantized(const std::share

if (const auto fakeQuantize = ov::as_type_ptr<ov::opset1::FakeQuantize>(parent)) {
if (QuantizationDetails::outputLayoutIsSupported(fakeQuantize, true) &&
QuantizationDetails::isSupportedLevel(fakeQuantize->get_levels())) {
QuantizationDetails::isSupportedLevel(fakeQuantize->get_levels(), supported_levels)) {
return true;
}
} else if (const auto multiSubGraph = ov::as_type_ptr<ov::op::util::MultiSubGraphOp>(parent)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,15 +170,10 @@ bool QuantizationDetails::empty() const noexcept {
return (levels == 0ul) && inputLowValues.empty() && inputHighValues.empty() && outputLowValues.empty() && outputHighValues.empty();
}

bool QuantizationDetails::isSupportedLevel(const size_t level) {
using ov::pass::low_precision::levels;
static const std::unordered_set<size_t> supported_levels = {
levels::int4, levels::int4_narrow_range,
levels::int8, levels::int8_narrow_range,
levels::int16, levels::int16_narrow_range,
levels::int32, levels::int32_narrow_range
};
return supported_levels.find(level) != supported_levels.end();
bool QuantizationDetails::isSupportedLevel(
const size_t quantization_level,
const std::set<ov::pass::low_precision::levels>& supported_levels) {
return supported_levels.find(static_cast<ov::pass::low_precision::levels>(quantization_level)) != supported_levels.end();
}

} // namespace low_precision
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -210,28 +210,24 @@ bool Transformations::fuse_type_to_convert(const std::shared_ptr<ov::Node>& node
}

void Transformations::UpToLpt() {
using namespace ov::pass::low_precision;
static const std::set<levels>& supported_fq_levels = {
levels::int4,
levels::int4_narrow_range,
levels::int8,
levels::int8_narrow_range
};

const bool useLpt = enableLpt &&
ov::pass::low_precision::LowPrecision::isFunctionQuantized(model) &&
LowPrecision::isFunctionQuantized(model, supported_fq_levels) &&
CPU_DEBUG_CAP_IS_TRANSFORMATION_ENABLED(config.debugCaps, Lpt);

auto defaultPrecisions = useLpt ? ov::pass::low_precision::precision_set::get_int8_support() : std::vector<ov::element::Type>{};
bool hasINT16orINT32Levels = false;

if (useLpt) {
CPU_LPT_SCOPE(LowPrecisionTransformations_Part1);
hasINT16orINT32Levels = ov::pass::low_precision::LowPrecision::isFQLevelsPresent(
model,
{ov::pass::low_precision::levels::int16, ov::pass::low_precision::levels::int16_narrow_range,
ov::pass::low_precision::levels::int32, ov::pass::low_precision::levels::int32_narrow_range});
if (hasINT16orINT32Levels) {
defaultPrecisions = ov::pass::low_precision::precision_set::get_int8_int16_int32_support();
}
}
const auto defaultPrecisions = useLpt ? precision_set::get_int8_support() : std::vector<ov::element::Type>{};

PreLpt(defaultPrecisions, isLegacyApi);

if (useLpt)
Lpt(hasINT16orINT32Levels, defaultPrecisions);
Lpt(defaultPrecisions);
}

void Transformations::CpuSpecificOpSet(void) {
Expand Down Expand Up @@ -512,7 +508,7 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
manager.run_passes(model);
}

void Transformations::Lpt(const bool hasINT16orINT32Levels, const std::vector<ov::element::Type>& defaultPrecisions) {
void Transformations::Lpt(const std::vector<ov::element::Type>& defaultPrecisions) {
CPU_DEBUG_CAP_TRANSFORMATION_SCOPE(this, Lpt);

using namespace ov::pass::low_precision;
Expand Down Expand Up @@ -571,18 +567,11 @@ void Transformations::Lpt(const bool hasINT16orINT32Levels, const std::vector<ov
QuantizationGranularityRestriction::create<ov::opset1::ConvolutionBackpropData>({0})
});

// for GNA networks reference execution
bool updatePrecision = true;
if (hasINT16orINT32Levels) {
updatePrecision = false;
supportedPrecisions = std::vector<PrecisionsRestriction>({});
}

ov::pass::Manager lptManager;
CPU_REGISTER_PASS_COMMON(lptManager, LowPrecision,
supportedPrecisions,
quantizationRestrictions,
LayerTransformation::Params(updatePrecision, ov::element::f32, defaultPrecisions));
LayerTransformation::Params(true, ov::element::f32, defaultPrecisions));

CPU_SET_CALLBACK_COMMON(lptManager, [](const_node_ptr& node) -> bool {
return ov::is_type<ov::opset1::Multiply>(node) &&
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ class Transformations {

void PreLpt(const std::vector<ov::element::Type>& defaultPrecisions, const bool isLegacyApi);

void Lpt(const bool hasINT16orINT32Levels, const std::vector<ov::element::Type>& defaultPrecisions);
void Lpt(const std::vector<ov::element::Type>& defaultPrecisions);

void MainSnippets(void);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,16 @@ const std::vector<ReshapeTransformationParam> params = {
"Reshape",
"f32"
},

// int16 is not supported: no dequantization after Reshape: Reshape => Output
{
{ 1, 3, 32 },
{ 1, 3, 4, 8 },
{ 65536ul, ov::Shape{ 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 25.5f } },
"Reshape",
"f32",
{ "Reshape", "Output" }
},
};

INSTANTIATE_TEST_SUITE_P(smoke_LPT, ReshapeTransformation,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class ReshapeTransformationParam {
ov::builder::subgraph::FakeQuantizeOnData fakeQuantize;
std::string layerType;
std::string expectedKernelType;
std::vector<std::string> executionOrder;
};

typedef std::tuple<
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ void ReshapeTransformation::run() {
LayerTransformation::run();

const auto params = std::get<3>(GetParam());

EXPECT_TRUE(check_execution_order(params.executionOrder));

auto actualPrecision = get_runtime_precision_by_type(params.layerType);
const auto expectedPrecision = params.expectedKernelType;
if ((expectedPrecision == "FP32") && (actualPrecision == "FP16")) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ class LayerTransformation : virtual public ov::test::SubgraphBaseTest {
// get runtime precision by operation friendly name which can be fused
std::string get_runtime_precision_by_fused_name(const std::string& layerName);

// check operation sequence in an execution graph and orderedOpsTypes
// orderedOpsTypes can consist only necessary operations (fewer than exist in the execution graph)
bool check_execution_order(const std::vector<std::string>& orderedOpsTypes);

std::map<std::string, ov::Node::RTMap> get_runtime_info();

void init_input_shapes(const ov::PartialShape& shape);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,39 @@ std::string LayerTransformation::get_runtime_precision_by_fused_name(const std::
return find_node_by_runtime_precision(compiledModel, is_node_f);
}

bool LayerTransformation::check_execution_order(const std::vector<std::string>& orderedOpsTypes) {
if (orderedOpsTypes.empty()) {
return true;
}

size_t comparisonIndex = 0;
const std::shared_ptr<const ov::Model>& execFunction = compiledModel.get_runtime_model();
for (const auto& op : execFunction->get_ordered_ops()) {
const auto& rtInfo = op->get_rt_info();
const auto& typeIt = rtInfo.find("layerType");
OPENVINO_ASSERT(typeIt != rtInfo.end(), "layerType is not found");

const auto layerType = typeIt->second.as<std::string>();
if (orderedOpsTypes[comparisonIndex] == layerType) {
// if comparisonIndex == 0 then start comparision
// if comparisonIndex != 0 then comparision has been started, check next operation type in sequence
comparisonIndex++;

if (comparisonIndex >= orderedOpsTypes.size()) {
// all operation types in sequence were checked, comparision is ended
return true;
}
} else if (comparisonIndex != 0) {
// if comparision has been started and operation type is not equal then exit
return false;
}
}

// actually we can be here only if operation sequence too long
// (execution graph doesn't have some operations from operations sequence)
return comparisonIndex == orderedOpsTypes.size();
}

std::map<std::string, ov::Node::RTMap> LayerTransformation::get_runtime_info() {
const ov::CompiledModel& execNet = compiledModel;
const std::shared_ptr<const ov::Model>& function = execNet.get_runtime_model();
Expand Down

0 comments on commit 2e6d061

Please sign in to comment.