openvinotoolkit · nkogteva · Jul 6, 2023 · Jun 23, 2023 · Jun 23, 2023 · Jun 26, 2023
@@ -45,7 +45,7 @@ void GraphTransformer::transform(const CUDA::Device& device,
                                  const Configuration& config) const {
     auto inference_precision = config.get_inference_precision();
     if (inference_precision == ov::element::f16 && !isHalfSupported(device)) {
-        throw_ov_exception("Inference precision f16 is not supported by device!");
+        inference_precision = ov::element::f32;
     }
     auto upscale_precision = [&]() -> bool {
         return !isHalfSupported(device) || inference_precision == ov::element::f32;

@@ -385,7 +385,7 @@ TEST_F(InferenceRequestBasicTest, AsyncParameterResultCancel) {
     fillBlobs(inferRequest, inputsInfo, 1);
     ASSERT_NO_THROW(inferRequest.StartAsync());
     ASSERT_NO_THROW(inferRequest.Cancel());
-    ASSERT_THROW(inferRequest.Wait(5000), std::exception);
+    ASSERT_NO_THROW(inferRequest.Wait(5000));
 }
 
 TEST_F(smoke_InferenceRequestTest, PerformanceCounters) {

@@ -13,32 +13,38 @@
 
 namespace LayerTestsDefinitions {
 
+constexpr int SEED_FIRST = 10;
+constexpr float THRESHOLD_FP16 = 0.05f;
+
 class CUDNNGRUCellTest : public UnsymmetricalComparer<GRUCellTest> {
 protected:
     void SetUp() override {
         GRUCellTest::SetUp();
 
-        constexpr float up_to = 1.5f;
-        constexpr float start_from = -1.5f;
+        const auto hiddenSize = std::get<2>(this->GetParam());
+
+        // All the weights and biases are initialized from u(-sqrt(k), sqrt(k)), where k = 1 / hidden_size
+        // https://pytorch.org/docs/stable/generated/torch.nn.GRUCell.html
+        const auto k_root = std::sqrt(1.0f / static_cast<float>(hiddenSize));
+
+        const float up_to = k_root;
+        const float start_from = -k_root;
 
-        int seed = 1;
         const auto& ops = function->get_ordered_ops();
+        int seed = SEED_FIRST;
         for (const auto& op : ops) {
             if (std::dynamic_pointer_cast<ngraph::opset1::Constant>(op)) {
                 const auto constant = ngraph::builder::makeConstant(
-                    op->get_element_type(), op->get_shape(), std::vector<float>{}, true, up_to, start_from, seed++);
+                    op->get_element_type(), op->get_shape(), std::vector<float>{}, true, up_to, start_from, seed);
                 function->replace_node(op, constant);
+                ++seed;
             }
         }
-    }
-};
 
-// this class sets lesser precision because of test failures on some hardware, e.g. RTX2080
-class FP16CUDNNGRUCellTest : public CUDNNGRUCellTest {
-protected:
-    void SetUp() override {
-        CUDNNGRUCellTest::SetUp();
-        threshold = 0.07f;
+        const auto& netPrecision = std::get<InferenceEngine::Precision>(this->GetParam());
+        if (netPrecision == InferenceEngine::Precision::FP16) {
+            this->threshold = THRESHOLD_FP16;
+        }
     }
 };
 
@@ -47,11 +53,6 @@ TEST_P(CUDNNGRUCellTest, CompareWithRefs) {
     Run();
 }
 
-TEST_P(FP16CUDNNGRUCellTest, CompareWithRefs) {
-    SKIP_IF_CURRENT_TEST_IS_DISABLED()
-    Run();
-}
-
 namespace {
 
 const bool should_decompose = false;
@@ -103,7 +104,7 @@ INSTANTIATE_TEST_CASE_P(smoke_GRUCellCommon_02_FP32,
                         GRUCellTest::getTestCaseName);
 
 INSTANTIATE_TEST_CASE_P(smoke_GRUCellCommon_02_FP16,
-                        FP16CUDNNGRUCellTest,
+                        CUDNNGRUCellTest,
                         ::testing::Combine(::testing::Values(should_decompose),
                                            ::testing::Values(smoke_batch_02),
                                            ::testing::ValuesIn(smoke_hidden_sizes_02),

@@ -15,15 +15,25 @@
 
 namespace LayerTestsDefinitions {
 
+constexpr int SEED_FIRST = 10;
+constexpr float THRESHOLD_FP16 = 0.06f;
+
 class CUDNNLSTMCellTest : public LSTMCellTest {
 public:
     void SetUp() override {
         LSTMCellTest::SetUp();
-        constexpr float up_to = 5.0f;
-        constexpr float start_from = -5.0f;
+
+        const auto hiddenSize = std::get<2>(this->GetParam());
+
+        // All the weights and biases are initialized from u(-sqrt(k), sqrt(k)), where k = 1 / hidden_size
+        // https://pytorch.org/docs/stable/generated/torch.nn.LSTMCell.html
+        const auto k_root = std::sqrt(1.0f / static_cast<float>(hiddenSize));
+
+        const float up_to = k_root;
+        const float start_from = -k_root;
 
         const auto& ops = function->get_ordered_ops();
-        int seed = 1;
+        int seed = SEED_FIRST;
         for (const auto& op : ops) {
             if (std::dynamic_pointer_cast<ngraph::opset1::Constant>(op)) {
                 const auto constant = ngraph::builder::makeConstant(
@@ -32,6 +42,11 @@ class CUDNNLSTMCellTest : public LSTMCellTest {
                 ++seed;
             }
         }
+
+        const auto& netPrecision = std::get<InferenceEngine::Precision>(this->GetParam());
+        if (netPrecision == InferenceEngine::Precision::FP16) {
+            this->threshold = THRESHOLD_FP16;
+        }
     }
 };
 

@@ -4,6 +4,7 @@
 
 #include "functional_test_utils/skip_tests_config.hpp"
 
+#include <cuda/runtime.hpp>
 #include <string>
 #include <vector>
 
@@ -138,5 +139,13 @@ std::vector<std::string> disabledTestPatterns() {
     retVector.emplace_back(R"(.*ReferenceTopKTest.*topk_max_sort_none)");
     retVector.emplace_back(R"(.*ReferenceTopKTest.*topk_min_sort_none)");
 #endif
+
+    if (!CUDA::isHalfSupported(CUDA::Device{})) {
+        retVector.emplace_back(
+            R"(.*OVExecGraphImportExportTest.*importExportedFunctionParameterResultOnly.*targetDevice=NVIDIA_elementType=f16.*)");
+        retVector.emplace_back(
+            R"(.*OVExecGraphImportExportTest.*importExportedIENetworkParameterResultOnly.*targetDevice=NVIDIA_elementType=f16.*)");
+    }
+
     return retVector;
-}
+}
@@ -4,11 +4,12 @@
 
 #include <gtest/gtest.h>
 
+#include <cuda/runtime.hpp>
 #include <memory>
-#include "openvino/opsets/opset10.hpp"
-#include "transformer/cuda_graph_transformer.hpp"
 
 #include "common_test_utils/ngraph_test_utils.hpp"
+#include "openvino/opsets/opset10.hpp"
+#include "transformer/cuda_graph_transformer.hpp"
 
 using namespace testing;
 
@@ -23,12 +24,16 @@ TEST(TransformationTests, cuda_transformations_f16) {
         model = std::make_shared<ov::Model>(ov::NodeVector{divide}, ov::ParameterVector{data});
 
         // Run transformation
-        const CUDA::Device device{0};
+        const CUDA::Device device{};
         const auto config = ov::nvidia_gpu::Configuration(ov::AnyMap{ov::hint::inference_precision(ov::element::f16)});
         ov::nvidia_gpu::GraphTransformer().transform(device, model, config);
 
         // Check that after applying transformation all runtime info attributes was correctly propagated
         ASSERT_NO_THROW(check_rt_info(model));
+
+        if (!CUDA::isHalfSupported(device)) {
+            GTEST_SKIP() << "f16 precision isn't fully supported on the device";
+        }
     }
 
     {