Skip to content

Commit

Permalink
cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
vladimir-paramuzov committed Sep 4, 2024
1 parent 4a34a04 commit ad6103f
Show file tree
Hide file tree
Showing 17 changed files with 56 additions and 164 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,6 @@ class ProgramBuilder final {

void add_primitive(const ov::Node& op, std::shared_ptr<cldnn::primitive> prim, std::vector<std::string> aliases = {});

bool requires_new_shape_infer(const std::shared_ptr<ov::Node>& op) const;
bool is_inner_program() const { return m_is_inner_program; }
bool is_query_mode() { return queryMode; }

Expand Down
113 changes: 50 additions & 63 deletions src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_padding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,53 @@
using namespace cldnn;
using namespace ov::intel_gpu;

namespace {

template<typename T>
padding convert_paddings(const padding& current_pad, const T& pad_begin, const T& pad_end, size_t spatial_rank) {
tensor::value_type pb_z = std::max<std::ptrdiff_t>(pad_begin.size() >= 3 ? pad_begin[pad_begin.size() - 3] : 0, 0);
tensor::value_type pb_y = std::max<std::ptrdiff_t>(pad_begin.size() >= 2 ? pad_begin[pad_begin.size() - 2] : 0, 0);
tensor::value_type pb_x = std::max<std::ptrdiff_t>(pad_begin.size() >= 1 ? pad_begin[pad_begin.size() - 1] : 0, 0);

tensor::value_type pe_z = std::max<std::ptrdiff_t>(pad_end.size() >= 3 ? pad_end[pad_end.size() - 3] : 0, 0);
tensor::value_type pe_y = std::max<std::ptrdiff_t>(pad_end.size() >= 2 ? pad_end[pad_end.size() - 2] : 0, 0);
tensor::value_type pe_x = std::max<std::ptrdiff_t>(pad_end.size() >= 1 ? pad_end[pad_end.size() - 1] : 0, 0);

const auto& lower_sizes = current_pad._lower_size;
const auto& upper_sizes = current_pad._upper_size;

std::vector<int32_t> needed_lpad, needed_upad;
needed_lpad.push_back(lower_sizes[0]);
needed_lpad.push_back(lower_sizes[1]);

needed_upad.push_back(upper_sizes[0]);
needed_upad.push_back(upper_sizes[1]);
if (spatial_rank == 3) {
needed_lpad.push_back(std::max(pb_z, lower_sizes[2]));
needed_lpad.push_back(std::max(pb_y, lower_sizes[3]));
needed_lpad.push_back(std::max(pb_x, lower_sizes[4]));

needed_upad.push_back(std::max(pe_z, upper_sizes[2]));
needed_upad.push_back(std::max(pe_y, upper_sizes[3]));
needed_upad.push_back(std::max(pe_x, upper_sizes[4]));
} else if (spatial_rank == 2) {
needed_lpad.push_back(std::max(pb_y, lower_sizes[2]));
needed_lpad.push_back(std::max(pb_x, lower_sizes[3]));

needed_upad.push_back(std::max(pe_y, upper_sizes[2]));
needed_upad.push_back(std::max(pe_x, upper_sizes[3]));
} else {
needed_lpad.push_back(std::max(pb_x, lower_sizes[2]));
needed_upad.push_back(std::max(pb_x, upper_sizes[2]));
}

padding needed_padding(needed_lpad, needed_upad);

return needed_padding;
}

} // namespace

void prepare_padding::run(program& p) {
if (output_size_handling_enabled) {
// Prepare upper padding for primitives that support output_size parameter.
Expand Down Expand Up @@ -65,43 +112,7 @@ void prepare_padding::run(program& p) {
auto padding_begin = prim->padding_begin;
auto padding_end = prim->padding_end;

tensor::value_type pb_z = std::max<std::ptrdiff_t>(padding_begin.size() >= 3 ? padding_begin[padding_begin.size() - 3] : 0, 0);
tensor::value_type pb_y = std::max<std::ptrdiff_t>(padding_begin.size() >= 2 ? padding_begin[padding_begin.size() - 2] : 0, 0);
tensor::value_type pb_x = std::max<std::ptrdiff_t>(padding_begin.size() >= 1 ? padding_begin[padding_begin.size() - 1] : 0, 0);

tensor::value_type pe_z = std::max<std::ptrdiff_t>(padding_end.size() >= 3 ? padding_end[padding_end.size() - 3] : 0, 0);
tensor::value_type pe_y = std::max<std::ptrdiff_t>(padding_end.size() >= 2 ? padding_end[padding_end.size() - 2] : 0, 0);
tensor::value_type pe_x = std::max<std::ptrdiff_t>(padding_end.size() >= 1 ? padding_end[padding_end.size() - 1] : 0, 0);

const auto& lower_sizes = in_layout.data_padding._lower_size;
const auto& upper_sizes = in_layout.data_padding._upper_size;

std::vector<int32_t> needed_lpad, needed_upad;
needed_lpad.push_back(lower_sizes[0]);
needed_lpad.push_back(lower_sizes[1]);

needed_upad.push_back(upper_sizes[0]);
needed_upad.push_back(upper_sizes[1]);
if (spatial_rank == 3) {
needed_lpad.push_back(std::max(pb_z, lower_sizes[2]));
needed_lpad.push_back(std::max(pb_y, lower_sizes[3]));
needed_lpad.push_back(std::max(pb_x, lower_sizes[4]));

needed_upad.push_back(std::max(pe_z, upper_sizes[2]));
needed_upad.push_back(std::max(pe_y, upper_sizes[3]));
needed_upad.push_back(std::max(pe_x, upper_sizes[4]));
} else if (spatial_rank == 2) {
needed_lpad.push_back(std::max(pb_y, lower_sizes[2]));
needed_lpad.push_back(std::max(pb_x, lower_sizes[3]));

needed_upad.push_back(std::max(pe_y, upper_sizes[2]));
needed_upad.push_back(std::max(pe_x, upper_sizes[3]));
} {
needed_lpad.push_back(std::max(pb_x, lower_sizes[2]));
needed_upad.push_back(std::max(pb_x, upper_sizes[2]));
}

padding needed_padding(needed_lpad, needed_upad);
auto needed_padding = convert_paddings(in_layout.data_padding, padding_begin, padding_end, spatial_rank);

add_required_padding(prim_node, needed_padding);
} else if (node->is_type<deconvolution>()) {
Expand Down Expand Up @@ -130,33 +141,9 @@ void prepare_padding::run(program& p) {
auto padding_begin = prim->pads_begin;
auto padding_end = prim->pads_end;

tensor::value_type pb_z = std::max<std::ptrdiff_t>(padding_begin.size() >= 3 ? padding_begin[padding_begin.size() - 3] : 0, 0);
tensor::value_type pb_y = std::max<std::ptrdiff_t>(padding_begin.size() >= 2 ? padding_begin[padding_begin.size() - 2] : 0, 0);
tensor::value_type pb_x = std::max<std::ptrdiff_t>(padding_begin.size() >= 1 ? padding_begin[padding_begin.size() - 1] : 0, 0);

tensor::value_type pe_z = std::max<std::ptrdiff_t>(padding_end.size() >= 3 ? padding_end[padding_end.size() - 3] : 0, 0);
tensor::value_type pe_y = std::max<std::ptrdiff_t>(padding_end.size() >= 2 ? padding_end[padding_end.size() - 2] : 0, 0);
tensor::value_type pe_x = std::max<std::ptrdiff_t>(padding_end.size() >= 1 ? padding_end[padding_end.size() - 1] : 0, 0);

tensor pad_l = tensor(0);
tensor pad_u = tensor(0);
pad_l.spatial[0] = pb_x;
pad_l.spatial[1] = pb_y;
pad_l.spatial[2] = pb_z;

pad_u.spatial[0] = pe_x;
pad_u.spatial[1] = pe_y;
pad_u.spatial[2] = pe_z;

auto in_layout = prim_node.get_input_layout();

const auto& actual_lpad = in_layout.data_padding.lower_size();
const auto& actual_upad = in_layout.data_padding.upper_size();

auto needed_lpad = tensor::max(pad_l, actual_lpad);
auto needed_upad = tensor::max(pad_u, actual_upad);

padding needed_padding(needed_lpad.sizes(), needed_upad.sizes());
const auto spatial_rank = in_layout.get_spatial_rank();
auto needed_padding = convert_paddings(in_layout.data_padding, padding_begin, padding_end, spatial_rank);

add_required_padding(prim_node, needed_padding);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ kernel_selector::dim_tensor<T> convert_dim_vector(const tensor& t) {
static_cast<T>(sizes[5])};
}


inline kernel_selector::DimTensor<uint32_t> convert_vec_to_dim_tensor(const std::vector<int32_t>& p, size_t out_rank, int32_t default_value) {
auto sizes = p;
auto format = cldnn::format::get_default_format(out_rank);
Expand Down
29 changes: 4 additions & 25 deletions src/plugins/intel_gpu/src/plugin/ops/gather tree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,32 +18,11 @@ static void CreateGatherTreeOp(ProgramBuilder& p, const std::shared_ptr<ov::op::
auto inputs = p.GetInputInfo(op);
std::string layerName = layer_type_name_ID(op);

std::vector<cldnn::input_info> reordered_inputs;
reordered_inputs.resize(inputs.size());

for (size_t portIndex = 0; portIndex < inputs.size(); portIndex++) {
auto inputDataType = cldnn::element_type_to_data_type(op->get_input_element_type(portIndex));
if (inputDataType == cldnn::data_types::i64) {
// GPU primitive does not support i64 inputs,
// so we need additional reorders to convert them to i32
auto reorderPrimName = inputs[portIndex].pid + "_" + op->get_friendly_name() + ProgramBuilder::m_preProcessTag;
auto targetFormat = cldnn::format::get_default_format(op->get_input_shape(portIndex).size());
auto preprocessPrim = cldnn::reorder(reorderPrimName,
inputs[portIndex],
targetFormat,
cldnn::data_types::i32);
p.add_primitive(*op, preprocessPrim);
reordered_inputs[portIndex] = cldnn::input_info(reorderPrimName);
} else {
reordered_inputs[portIndex] = inputs[portIndex];
}
}

auto gatherTreePrim = cldnn::gather_tree(layerName,
reordered_inputs[0],
reordered_inputs[1],
reordered_inputs[2],
reordered_inputs[3]);
inputs[0],
inputs[1],
inputs[2],
inputs[3]);

p.add_primitive(*op, gatherTreePrim);
}
Expand Down
25 changes: 0 additions & 25 deletions src/plugins/intel_gpu/src/plugin/ops/gather.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ void CreateGatherOpBase(ProgramBuilder& p, const std::shared_ptr<T>& op, const i
ov::Shape out_shape = is_static ? op->get_output_shape(0) : ov::Shape{};

// Update output_shape in case of scalar indice
bool need_reshape = false;
auto out_shape_original = out_shape;

// WA for NMS->Gather construction. NMS fills part of the output blob by the -1 if these values
Expand All @@ -55,9 +54,6 @@ void CreateGatherOpBase(ProgramBuilder& p, const std::shared_ptr<T>& op, const i

// Set layer name for Gather
auto reshapeName = layerName + "";
if (need_reshape) {
layerName = layerName + "_reshape_output";
}

// Check if Gather could be converted to other primitive
const auto input_shape = op->get_input_partial_shape(0);
Expand Down Expand Up @@ -140,27 +136,6 @@ void CreateGatherOpBase(ProgramBuilder& p, const std::shared_ptr<T>& op, const i
p.add_primitive(*op, gatherPrim);
}
}

// Add reorder and reshape for scalar indice
if (need_reshape) {
auto input = inputs[0];
input.pid = layerName;

auto targetFormat = cldnn::format::get_default_format(out_shape_original.size());
if (targetFormat.value != cldnn::format::get_default_format(out_shape.size()).value) {
auto reorderName = layerName + "_cldnn_in_reorder";
auto targetDatatype = cldnn::element_type_to_data_type(op->get_input_element_type(0));
auto reorderPrim = cldnn::reorder(reorderName,
input,
targetFormat,
targetDatatype);
p.add_primitive(*op, reorderPrim);
input.pid = reorderName;
}

auto reshapePrim = cldnn::reshape(reshapeName, input, tensor_from_dims(out_shape_original));
p.add_primitive(*op, reshapePrim);
}
}

static void CreateGatherOp(ProgramBuilder& p, const std::shared_ptr<ov::op::v1::Gather>& op) {
Expand Down
31 changes: 0 additions & 31 deletions src/plugins/intel_gpu/src/plugin/program_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -315,37 +315,6 @@ void ProgramBuilder::add_primitive(const ov::Node& op, std::shared_ptr<cldnn::pr
m_topology->add_primitive(prim);
}

bool ProgramBuilder::requires_new_shape_infer(const std::shared_ptr<ov::Node>& op) const {
if (op->is_dynamic()) {
return true;
}

if (ov::is_type<ov::op::v5::Loop>(op)) {
const auto body_function = std::static_pointer_cast<ov::op::v5::Loop>(op)->get_function();
if (body_function->is_dynamic())
return true;
}
// When input node has dynamic shape with 4 dimension, this function return false
// because op.is_dynamic() which only checks input shapes return false.
// So, in the case of input data, we need to check output shape.
for (size_t i = 0; i < op->get_output_size(); i++) {
if (op->get_output_partial_shape(i).is_dynamic())
return true;
}

for (size_t i = 0; i < op->get_output_size(); i++) {
if (op->get_output_partial_shape(i).size() > 6)
return true;
}

for (size_t i = 0; i < op->get_input_size(); i++) {
if (op->get_input_partial_shape(i).size() > 6)
return true;
}

return false;
}

int64_t ProgramBuilder::get_parameter_index(const std::shared_ptr<ov::op::v0::Parameter>& parameter) const {
return m_model->get_parameter_index(parameter);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -614,7 +614,6 @@ TEST_P(fc_fp16_eltwise_prod_unfused_dynamic, basic) {
);

bool is_dynamic = true;
cfg_not_fused.set_property(ov::intel_gpu::allow_new_shape_infer(is_dynamic));
tolerance = 1e-2f;
execute(p, false, is_dynamic);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,6 @@ TEST(kernels_cache, reuse_kernels_property) {
reorder("output", input_info("sum"), {{3, 2}, data_types::f16, format::bfyx}));

ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
config.set_property(ov::intel_gpu::hint::enable_kernels_reuse(true));
auto prog = program::build_program(engine, topology, config, false, false);
auto& cache = prog->get_kernels_cache();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,6 @@ TEST(post_optimize_weights, onednn_group_conv_weights_reorder_test) {

ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::optimize_data(true));
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));

if (engine.get_device_info().supports_immad) {
ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, std::string(""), impl_types::onednn };
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -829,7 +829,6 @@ TEST(prepare_buffer_fusing, in_place_crop_dynamic_reshape_unsqueeze) {
);

auto config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
config.set_property(ov::intel_gpu::optimize_data(true));
network network(engine, topology, config);

Expand Down Expand Up @@ -899,7 +898,6 @@ TEST(prepare_buffer_fusing, in_place_crop_dynamic_reshape_squeeze_crop_axis) {
);

auto config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
config.set_property(ov::intel_gpu::optimize_data(true));
network network(engine, topology, config);

Expand Down Expand Up @@ -981,7 +979,6 @@ TEST(prepare_buffer_fusing, in_place_crop_dynamic_split_lengths) {
);

auto config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
config.set_property(ov::intel_gpu::optimize_data(true));
network network(engine, topology, config);

Expand Down Expand Up @@ -1063,7 +1060,6 @@ TEST(prepare_buffer_fusing, in_place_crop_dynamic_mvn) {
);

auto config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
config.set_property(ov::intel_gpu::optimize_data(true));
network network(engine, topology, config);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ TEST(test_select_preferred_formats, fsv2_fallback_to_byxf) {
topology.add(convolution("conv1", input_info("reorder"), "weights", "", 2, {1, 1}, {1, 1}, {2, 1}, {0, 1}, true));

ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
ov::intel_gpu::ImplementationDesc impl = { format::b_fs_yx_fsv16, std::string(""), impl_types::onednn };
config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"conv1", impl} }));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4671,7 +4671,6 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_activatio
reorder("out", input_info("conv"), format::bfyx, data_types::f32));

ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
ov::intel_gpu::ImplementationDesc conv_impl;
if (engine.get_device_info().supports_immad) {
conv_impl = { format::b_fs_yx_fsv16, "", impl_types::onednn };
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ class dynamic_quantization_gpu_tests: public ::testing::Test {
);

auto config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
config.set_property(ov::intel_gpu::optimize_data(true));

ov::intel_gpu::ImplementationDesc dyn_quan_impl_desc = { format::bfyx, "dynamic_quantize_gpu_ref", impl_types::ocl };
Expand All @@ -87,7 +86,6 @@ class dynamic_quantization_gpu_tests: public ::testing::Test {
);

auto config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
config.set_property(ov::intel_gpu::optimize_data(true));

network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ void generic_eltwise_int_test(cldnn::format test_input_fmt,
int input2_max_val) {
static_assert(std::is_integral<T>::value, "T must be an integral type");
static_assert(std::is_integral<TOut>::value, "TOut must be an integral type");

tests::random_generator rg(GET_SUITE_NAME);

VVVVF<T> input1_rnd = rg.generate_random_4d<T>(input_b, input_f, input_y, input_x, input1_min_val, input1_max_val);
Expand Down Expand Up @@ -299,7 +299,7 @@ void generic_eltwise_int_test(cldnn::format test_input_fmt,
bool test_is_correct = true;
VF<TOut> output_cpu_vec = flatten_4d<TOut>(test_input_fmt, output_cpu);
for (size_t i = 0; i < output_cpu_vec.size(); ++i) {
const TOut cpu_val = output_cpu_vec[i];
const TOut cpu_val = output_cpu_vec[i];
const TOut gpu_val = output_ptr[i];
if (cpu_val != gpu_val) {
test_is_correct = false;
Expand Down Expand Up @@ -1680,7 +1680,6 @@ TEST(eltwise_gpu_f32, dynamic_padding) {
0.5f, 2.5f });

ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
// config.set_property(ov::intel_gpu::optimize_data(true));
network network(engine, topology, config);
network.set_input_data("input1", input1);
Expand Down
Loading

0 comments on commit ad6103f

Please sign in to comment.