Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[NVIDIA] FP16 Test Fixes #674

Merged
merged 3 commits into from
Jul 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ void GraphTransformer::transform(const CUDA::Device& device,
const Configuration& config) const {
auto inference_precision = config.get_inference_precision();
if (inference_precision == ov::element::f16 && !isHalfSupported(device)) {
throw_ov_exception("Inference precision f16 is not supported by device!");
inference_precision = ov::element::f32;
}
auto upscale_precision = [&]() -> bool {
return !isHalfSupported(device) || inference_precision == ov::element::f32;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,7 @@ TEST_F(InferenceRequestBasicTest, AsyncParameterResultCancel) {
fillBlobs(inferRequest, inputsInfo, 1);
ASSERT_NO_THROW(inferRequest.StartAsync());
ASSERT_NO_THROW(inferRequest.Cancel());
ASSERT_THROW(inferRequest.Wait(5000), std::exception);
ASSERT_NO_THROW(inferRequest.Wait(5000));
}

TEST_F(smoke_InferenceRequestTest, PerformanceCounters) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,32 +13,38 @@

namespace LayerTestsDefinitions {

constexpr int SEED_FIRST = 10;
constexpr float THRESHOLD_FP16 = 0.05f;

class CUDNNGRUCellTest : public UnsymmetricalComparer<GRUCellTest> {
protected:
void SetUp() override {
GRUCellTest::SetUp();

constexpr float up_to = 1.5f;
constexpr float start_from = -1.5f;
const auto hiddenSize = std::get<2>(this->GetParam());

// All the weights and biases are initialized from u(-sqrt(k), sqrt(k)), where k = 1 / hidden_size
// https://pytorch.org/docs/stable/generated/torch.nn.GRUCell.html
const auto k_root = std::sqrt(1.0f / static_cast<float>(hiddenSize));

const float up_to = k_root;
const float start_from = -k_root;

int seed = 1;
const auto& ops = function->get_ordered_ops();
int seed = SEED_FIRST;
for (const auto& op : ops) {
if (std::dynamic_pointer_cast<ngraph::opset1::Constant>(op)) {
const auto constant = ngraph::builder::makeConstant(
op->get_element_type(), op->get_shape(), std::vector<float>{}, true, up_to, start_from, seed++);
op->get_element_type(), op->get_shape(), std::vector<float>{}, true, up_to, start_from, seed);
function->replace_node(op, constant);
++seed;
}
}
}
};

// this class sets lesser precision because of test failures on some hardware, e.g. RTX2080
class FP16CUDNNGRUCellTest : public CUDNNGRUCellTest {
protected:
void SetUp() override {
CUDNNGRUCellTest::SetUp();
threshold = 0.07f;
const auto& netPrecision = std::get<InferenceEngine::Precision>(this->GetParam());
if (netPrecision == InferenceEngine::Precision::FP16) {
this->threshold = THRESHOLD_FP16;
}
}
};

Expand All @@ -47,11 +53,6 @@ TEST_P(CUDNNGRUCellTest, CompareWithRefs) {
Run();
}

TEST_P(FP16CUDNNGRUCellTest, CompareWithRefs) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
Run();
}

namespace {

const bool should_decompose = false;
Expand Down Expand Up @@ -103,7 +104,7 @@ INSTANTIATE_TEST_CASE_P(smoke_GRUCellCommon_02_FP32,
GRUCellTest::getTestCaseName);

INSTANTIATE_TEST_CASE_P(smoke_GRUCellCommon_02_FP16,
FP16CUDNNGRUCellTest,
CUDNNGRUCellTest,
::testing::Combine(::testing::Values(should_decompose),
::testing::Values(smoke_batch_02),
::testing::ValuesIn(smoke_hidden_sizes_02),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,25 @@

namespace LayerTestsDefinitions {

constexpr int SEED_FIRST = 10;
constexpr float THRESHOLD_FP16 = 0.06f;

class CUDNNLSTMCellTest : public LSTMCellTest {
public:
void SetUp() override {
LSTMCellTest::SetUp();
constexpr float up_to = 5.0f;
constexpr float start_from = -5.0f;

const auto hiddenSize = std::get<2>(this->GetParam());

// All the weights and biases are initialized from u(-sqrt(k), sqrt(k)), where k = 1 / hidden_size
// https://pytorch.org/docs/stable/generated/torch.nn.LSTMCell.html
const auto k_root = std::sqrt(1.0f / static_cast<float>(hiddenSize));

const float up_to = k_root;
const float start_from = -k_root;

const auto& ops = function->get_ordered_ops();
int seed = 1;
int seed = SEED_FIRST;
for (const auto& op : ops) {
if (std::dynamic_pointer_cast<ngraph::opset1::Constant>(op)) {
const auto constant = ngraph::builder::makeConstant(
Expand All @@ -32,6 +42,11 @@ class CUDNNLSTMCellTest : public LSTMCellTest {
++seed;
}
}

const auto& netPrecision = std::get<InferenceEngine::Precision>(this->GetParam());
if (netPrecision == InferenceEngine::Precision::FP16) {
this->threshold = THRESHOLD_FP16;
}
}
};

Expand Down
11 changes: 10 additions & 1 deletion modules/nvidia_plugin/tests/functional/skip_tests_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#include "functional_test_utils/skip_tests_config.hpp"

#include <cuda/runtime.hpp>
#include <string>
#include <vector>

Expand Down Expand Up @@ -138,5 +139,13 @@ std::vector<std::string> disabledTestPatterns() {
retVector.emplace_back(R"(.*ReferenceTopKTest.*topk_max_sort_none)");
retVector.emplace_back(R"(.*ReferenceTopKTest.*topk_min_sort_none)");
#endif

if (!CUDA::isHalfSupported(CUDA::Device{})) {
retVector.emplace_back(
R"(.*OVExecGraphImportExportTest.*importExportedFunctionParameterResultOnly.*targetDevice=NVIDIA_elementType=f16.*)");
retVector.emplace_back(
R"(.*OVExecGraphImportExportTest.*importExportedIENetworkParameterResultOnly.*targetDevice=NVIDIA_elementType=f16.*)");
}

return retVector;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@

#include <gtest/gtest.h>

#include <cuda/runtime.hpp>
#include <memory>
#include "openvino/opsets/opset10.hpp"
#include "transformer/cuda_graph_transformer.hpp"

#include "common_test_utils/ngraph_test_utils.hpp"
#include "openvino/opsets/opset10.hpp"
#include "transformer/cuda_graph_transformer.hpp"

using namespace testing;

Expand All @@ -23,12 +24,16 @@ TEST(TransformationTests, cuda_transformations_f16) {
model = std::make_shared<ov::Model>(ov::NodeVector{divide}, ov::ParameterVector{data});

// Run transformation
const CUDA::Device device{0};
const CUDA::Device device{};
const auto config = ov::nvidia_gpu::Configuration(ov::AnyMap{ov::hint::inference_precision(ov::element::f16)});
ov::nvidia_gpu::GraphTransformer().transform(device, model, config);

// Check that after applying transformation all runtime info attributes was correctly propagated
ASSERT_NO_THROW(check_rt_info(model));

if (!CUDA::isHalfSupported(device)) {
GTEST_SKIP() << "f16 precision isn't fully supported on the device";
}
}

{
Expand Down