From 3ed00bdf7900800b5636058ffc8fe23ac342051c Mon Sep 17 00:00:00 2001 From: Yaroslav Torziuk Date: Mon, 26 Jun 2023 20:28:43 +0200 Subject: [PATCH] Add tests which cover CudaGraphTopologyRunner functionality --- .../unit/cuda_graph_topology_runner_test.cpp | 127 ++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 modules/nvidia_plugin/tests/unit/cuda_graph_topology_runner_test.cpp diff --git a/modules/nvidia_plugin/tests/unit/cuda_graph_topology_runner_test.cpp b/modules/nvidia_plugin/tests/unit/cuda_graph_topology_runner_test.cpp new file mode 100644 index 0000000000..f676506787 --- /dev/null +++ b/modules/nvidia_plugin/tests/unit/cuda_graph_topology_runner_test.cpp @@ -0,0 +1,127 @@ +// Copyright (C) 2020-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include +#include +#include +#include "test_networks.hpp" + +using namespace ov::nvidia_gpu; +using namespace testing; + +class CudaGraphTopologyRunnerTest : public Test { +protected: + static std::map PopulateInputIndeces(std::shared_ptr model) { + std::map inputIndeces; + for (const auto& parameter : model->get_parameters()) { + const auto& parameter_index = model->get_parameter_index(parameter); + inputIndeces.emplace(ParameterOp::GetInputTensorName(*parameter), parameter_index); + } + return inputIndeces; + } + + static std::map PopulateOutputIndeces(std::shared_ptr model) { + std::map outputIndeces; + for (auto& result : model->get_results()) { + const auto& result_index = model->get_result_index(result->input_value(0)); + for (const auto& outputName : ResultOp::GetOutputTensorName(*result)) { + outputIndeces.emplace(outputName, result_index); + } + } + return outputIndeces; + } + + static std::vector> PopulateTensors(const std::vector>& nodes) { + std::vector> ret; + for (const auto& node : nodes) + ret.push_back(std::make_shared(node.get_element_type(), node.get_shape())); + return ret; + } + + std::shared_ptr model_ {CreateMatMulTestNetwork()}; + CreationContext creationContext_ {{}, false}; + ThreadContext threadContext_ {{}}; + CancellationToken cancellationToken_ {}; + CudaGraphContext cudaGraphContext_ {}; + CudaGraphTopologyRunner runner_ {creationContext_, model_}; + Profiler profiler_ {false, runner_.GetSubGraph()}; + std::vector> inputTensors_ {PopulateTensors(model_->inputs())}; + std::vector> outputTensors_ {PopulateTensors(model_->outputs())}; + std::map inputIndeces_ {PopulateInputIndeces(model_)}; + std::map outputIndeces_ {PopulateOutputIndeces(model_)}; + InferenceRequestContext inferRequestContext_{inputTensors_, + inputIndeces_, + outputTensors_, + outputIndeces_, + threadContext_, + cancellationToken_, + profiler_, + cudaGraphContext_, + false}; + DeviceMemBlock deviceMemBlock_ {runner_.GetSubGraph().memoryManager().mutableTensorsMemoryModel()}; +}; + +TEST_F(CudaGraphTopologyRunnerTest, InstantiateGraphExec) { + runner_.UpdateContext(inferRequestContext_, deviceMemBlock_); + EXPECT_TRUE(inferRequestContext_.getCudaGraphContext().graphExec.has_value()); +} + +TEST_F(CudaGraphTopologyRunnerTest, BasicRun) { + EXPECT_NO_THROW(runner_.UpdateContext(inferRequestContext_, deviceMemBlock_)); + EXPECT_NO_THROW(runner_.Run(inferRequestContext_, deviceMemBlock_)); +} + +TEST_F(CudaGraphTopologyRunnerTest, CheckGraphExecIsInstantiatedOnce) { + runner_.UpdateContext(inferRequestContext_, deviceMemBlock_); + CUDA::GraphExec* exec = &inferRequestContext_.getCudaGraphContext().graphExec.value(); + runner_.UpdateContext(inferRequestContext_, deviceMemBlock_); + EXPECT_EQ(&inferRequestContext_.getCudaGraphContext().graphExec.value(), exec); +} + +TEST_F(CudaGraphTopologyRunnerTest, CheckMemcpyNodesArePopulated) { + runner_.UpdateContext(inferRequestContext_, deviceMemBlock_); + EXPECT_GT(inferRequestContext_.getCudaGraphContext().parameterNodes.size(), 0); + EXPECT_GT(inferRequestContext_.getCudaGraphContext().resultNodes.size(), 0); +} + +TEST_F(CudaGraphTopologyRunnerTest, CheckMemcpyNodesAreUpdated) { + runner_.UpdateContext(inferRequestContext_, deviceMemBlock_); + auto paramNodes = cudaGraphContext_.parameterNodes; + auto resultNodes = cudaGraphContext_.resultNodes; + std::vector> inputTensors {PopulateTensors(model_->inputs())}; + std::vector> outputTensors {PopulateTensors(model_->outputs())}; + InferenceRequestContext inferRequestContext{inputTensors, + inputIndeces_, + outputTensors, + outputIndeces_, + threadContext_, + cancellationToken_, + profiler_, + cudaGraphContext_, + false}; + runner_.UpdateContext(inferRequestContext, deviceMemBlock_); + EXPECT_NE(cudaGraphContext_.parameterNodes, paramNodes); + EXPECT_NE(cudaGraphContext_.resultNodes, resultNodes); +} + +TEST_F(CudaGraphTopologyRunnerTest, CheckMemcpyNodesAreNotUpdatedIfPointersUnchanged) { + runner_.UpdateContext(inferRequestContext_, deviceMemBlock_); + auto paramNodes = cudaGraphContext_.parameterNodes; + auto resultNodes = cudaGraphContext_.resultNodes; + InferenceRequestContext inferRequestContext{inputTensors_, + inputIndeces_, + outputTensors_, + outputIndeces_, + threadContext_, + cancellationToken_, + profiler_, + cudaGraphContext_, + false}; + runner_.UpdateContext(inferRequestContext, deviceMemBlock_); + EXPECT_EQ(cudaGraphContext_.parameterNodes, paramNodes); + EXPECT_EQ(cudaGraphContext_.resultNodes, resultNodes); +}