Skip to content

Commit

Permalink
Fix memory access violations in the CPU float16 min and max operators (
Browse files Browse the repository at this point in the history
…#22135)

### Description

Fixes the logic for getting the number of elements for the input and
output spans in the `MinMaxMLFloat16` method. This was incorrectly using
the full number of elements in the output rather than the number of
elements in the current span, which worked fine with 1D inputs but
breaks with 2D inputs.

This meant that as the `BroadcastLooper` iterated over spans,
`MinMaxMLFloat16` would start at a position further forward in the input
and output and read and write further beyond the end of the input and
output respectively, causing the asan error in #21558 and sometimes
segfaults in larger examples.

### Motivation and Context

Fixes #21558.

From further testing, this issue didn't only cause asan errors in tests
but causes segfaults with larger sized inputs.
  • Loading branch information
adamreeve authored Sep 20, 2024
1 parent b0ef1f3 commit f3cbe76
Show file tree
Hide file tree
Showing 2 changed files with 101 additions and 3 deletions.
6 changes: 3 additions & 3 deletions onnxruntime/core/providers/cpu/math/element_wise_ops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -748,7 +748,7 @@ static Status MinMaxMLFloat16(const OpKernel& inst, OpKernelContext* context) {

ProcessBroadcastSpanFuncs funcs{
[](BroadcastHelper& per_iter_bh) {
auto num_elements = per_iter_bh.NumOutputElements();
auto num_elements = per_iter_bh.EigenInput1<MLFloat16>().rows();

const auto* input_1 = reinterpret_cast<const Eigen::half*>(per_iter_bh.EigenInput1<MLFloat16>().data());
ConstEigenVectorArrayMap<Eigen::half> input_1_vec_map(input_1, num_elements);
Expand All @@ -763,7 +763,7 @@ static Status MinMaxMLFloat16(const OpKernel& inst, OpKernelContext* context) {
}
},
[](BroadcastHelper& per_iter_bh) {
auto num_elements = per_iter_bh.NumOutputElements();
auto num_elements = per_iter_bh.EigenInput0<MLFloat16>().rows();

const auto* input_0 = reinterpret_cast<const Eigen::half*>(per_iter_bh.EigenInput0<MLFloat16>().data());
ConstEigenVectorArrayMap<Eigen::half> input_0_vec_map(input_0, num_elements);
Expand All @@ -778,7 +778,7 @@ static Status MinMaxMLFloat16(const OpKernel& inst, OpKernelContext* context) {
}
},
[](BroadcastHelper& per_iter_bh) {
auto num_elements = per_iter_bh.NumOutputElements();
auto num_elements = per_iter_bh.EigenInput0<MLFloat16>().rows();

const auto* input_0 = reinterpret_cast<const Eigen::half*>(per_iter_bh.EigenInput0<MLFloat16>().data());
ConstEigenVectorArrayMap<Eigen::half> input_0_vec_map(input_0, num_elements);
Expand Down
98 changes: 98 additions & 0 deletions onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1787,6 +1787,54 @@ TEST(MathOpTest, Min_12_MLFloat16_Scalar1) {
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); // TensorRT: Input batch size is inconsistent
}

TEST(MathOpTest, Min_12_MLFloat16_MatrixVector) {
OpTester test("Min", 12);
test.AddInput<MLFloat16>("data_0", {3, 3},
MakeMLFloat16({1.0f, 1.0f, 1.0f,
-0.5f, 0.0f, -2.0f,
0.5f, 0.0f, 2.0f}));
test.AddInput<MLFloat16>("data_1", {3, 1},
MakeMLFloat16({0.0f, -1.0f, 1.0f}));
test.AddOutput<MLFloat16>("min", {3, 3},
MakeMLFloat16({0.0f, 0.0f, 0.0f,
-1.0f, -1.0f, -2.0f,
0.5f, 0.0f, 1.0f}));
if (nullptr != DefaultCpuExecutionProvider()) {
std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
execution_providers.push_back(DefaultCpuExecutionProvider());
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
}
if (nullptr != DefaultCudaExecutionProvider()) {
std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
execution_providers.push_back(DefaultCudaExecutionProvider());
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
}
}

TEST(MathOpTest, Min_12_MLFloat16_VectorMatrix) {
OpTester test("Min", 12);
test.AddInput<MLFloat16>("data_0", {3, 1},
MakeMLFloat16({0.0f, -1.0f, 1.0f}));
test.AddInput<MLFloat16>("data_1", {3, 4},
MakeMLFloat16({1.0f, 1.0f, 1.0f, -1.0f,
-0.5f, 0.0f, -2.0f, -1.25f,
0.5f, 0.0f, 2.0f, 1.5f}));
test.AddOutput<MLFloat16>("min", {3, 4},
MakeMLFloat16({0.0f, 0.0f, 0.0f, -1.0f,
-1.0f, -1.0f, -2.0f, -1.25f,
0.5f, 0.0f, 1.0f, 1.0f}));
if (nullptr != DefaultCpuExecutionProvider()) {
std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
execution_providers.push_back(DefaultCpuExecutionProvider());
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
}
if (nullptr != DefaultCudaExecutionProvider()) {
std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
execution_providers.push_back(DefaultCudaExecutionProvider());
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
}
}

TEST(MathOpTest, Max_6) {
OpTester test("Max", 6);
std::vector<int64_t> dims{3, 3};
Expand Down Expand Up @@ -2137,6 +2185,56 @@ TEST(MathOpTest, Max_12_MLFloat16_Scalar1) {
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); // TensorRT: Input batch size is inconsistent
}

TEST(MathOpTest, Max_12_MLFloat16_MatrixVector) {
OpTester test("Max", 12);
test.AddInput<MLFloat16>("data_0", {4, 3},
MakeMLFloat16({1.0f, 1.0f, 1.0f,
-0.5f, 0.0f, -2.0f,
0.0f, 0.5f, 0.75f,
0.5f, 0.0f, 2.0f}));
test.AddInput<MLFloat16>("data_1", {4, 1},
MakeMLFloat16({0.0f, -1.0f, 0.5f, 1.0f}));
test.AddOutput<MLFloat16>("max", {4, 3},
MakeMLFloat16({1.0f, 1.0f, 1.0f,
-0.5f, 0.0f, -1.0f,
0.5f, 0.5f, 0.75f,
1.0f, 1.0f, 2.0f}));
if (nullptr != DefaultCpuExecutionProvider()) {
std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
execution_providers.push_back(DefaultCpuExecutionProvider());
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
}
if (nullptr != DefaultCudaExecutionProvider()) {
std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
execution_providers.push_back(DefaultCudaExecutionProvider());
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
}
}

TEST(MathOpTest, Max_12_MLFloat16_VectorMatrix) {
OpTester test("Max", 12);
test.AddInput<MLFloat16>("data_0", {3, 1},
MakeMLFloat16({0.0f, -1.0f, 1.0f}));
test.AddInput<MLFloat16>("data_1", {3, 3},
MakeMLFloat16({1.0f, 1.0f, 1.0f,
-0.5f, 0.0f, -2.0f,
0.5f, 0.0f, 2.0f}));
test.AddOutput<MLFloat16>("max", {3, 3},
MakeMLFloat16({1.0f, 1.0f, 1.0f,
-0.5f, 0.0f, -1.0f,
1.0f, 1.0f, 2.0f}));
if (nullptr != DefaultCpuExecutionProvider()) {
std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
execution_providers.push_back(DefaultCpuExecutionProvider());
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
}
if (nullptr != DefaultCudaExecutionProvider()) {
std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
execution_providers.push_back(DefaultCudaExecutionProvider());
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
}
}

TEST(MathOpTest, Not) {
OpTester test("Not");
std::vector<int64_t> dims{2};
Expand Down

0 comments on commit f3cbe76

Please sign in to comment.