diff --git a/cmssw/MLProf/ONNXRuntimeModule/plugins/ONNXPluginRuntime.cpp b/cmssw/MLProf/ONNXRuntimeModule/plugins/ONNXPluginRuntime.cpp index e1b973a..cc4f804 100644 --- a/cmssw/MLProf/ONNXRuntimeModule/plugins/ONNXPluginRuntime.cpp +++ b/cmssw/MLProf/ONNXRuntimeModule/plugins/ONNXPluginRuntime.cpp @@ -1,38 +1,39 @@ /* - * Example plugin to demonstrate the direct multi-threaded inference with ONNX Runtime. + * Example plugin to demonstrate the direct multi-threaded inference with ONNX + * Runtime. */ #include #include +#include #include #include #include #include -#include #include "FWCore/Framework/interface/Event.h" #include "FWCore/Framework/interface/Frameworkfwd.h" #include "FWCore/Framework/interface/MakerMacros.h" #include "FWCore/Framework/interface/stream/EDAnalyzer.h" #include "FWCore/ParameterSet/interface/ParameterSet.h" - -#include "PhysicsTools/ONNXRuntime/interface/ONNXRuntime.h" - #include "MLProf/Utils/interface/utils.h" +#include "PhysicsTools/ONNXRuntime/interface/ONNXRuntime.h" using namespace cms::Ort; -class ONNXRuntimePlugin : public edm::stream::EDAnalyzer> { -public: +class ONNXRuntimePlugin + : public edm::stream::EDAnalyzer> { + public: explicit ONNXRuntimePlugin(const edm::ParameterSet &, const ONNXRuntime *); - static void fillDescriptions(edm::ConfigurationDescriptions&); + static void fillDescriptions(edm::ConfigurationDescriptions &); - static std::unique_ptr initializeGlobalCache(const edm::ParameterSet &); + static std::unique_ptr initializeGlobalCache( + const edm::ParameterSet &); static void globalEndJob(const ONNXRuntime *); -private: + private: void beginJob(); - void analyze(const edm::Event&, const edm::EventSetup&); + void analyze(const edm::Event &, const edm::EventSetup &); void endJob(); inline float drawNormal() { return normalPdf_(rndGen_); } @@ -56,12 +57,13 @@ class ONNXRuntimePlugin : public edm::stream::EDAnalyzer normalPdf_; std::vector> input_shapes_; - FloatArrays data_; // each stream hosts its own data + FloatArrays inputArrays_; // each stream hosts its own data }; - -void ONNXRuntimePlugin::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { - // defining this function will lead to a *_cfi file being generated when compiling +void ONNXRuntimePlugin::fillDescriptions( + edm::ConfigurationDescriptions &descriptions) { + // defining this function will lead to a *_cfi file being generated when + // compiling edm::ParameterSetDescription desc; // the path to the file containing the graph desc.add("graphPath"); @@ -76,22 +78,23 @@ void ONNXRuntimePlugin::fillDescriptions(edm::ConfigurationDescriptions& descrip // the rank (number of dimensions) of each input tensor desc.add>("inputRanks"); // flat list of sizes of each dimension of each input tensor - // (for a graph with a 1D and a 2D input tensor, this would be a vector of three values) + // (for a graph with a 1D and a 2D input tensor, this would be a vector of + // three values) desc.add>("flatInputSizes"); // batch sizes to test desc.add("batchSize"); // the number of calls to the graph to measure the runtime desc.add("nCalls"); - // desc.add("model_path", edm::FileInPath("MLProf/ONNXRuntimeModule/data/model.onnx")); - // desc.add>("input_names", std::vector({"my_input"})); descriptions.addWithDefaultLabel(desc); } - -ONNXRuntimePlugin::ONNXRuntimePlugin(const edm::ParameterSet &iConfig, const ONNXRuntime *cache) - : inputTensorNames_(iConfig.getParameter>("inputTensorNames")), - outputTensorNames_(iConfig.getParameter>("outputTensorNames")), +ONNXRuntimePlugin::ONNXRuntimePlugin(const edm::ParameterSet &iConfig, + const ONNXRuntime *cache) + : inputTensorNames_( + iConfig.getParameter>("inputTensorNames")), + outputTensorNames_( + iConfig.getParameter>("outputTensorNames")), outputFile_(iConfig.getParameter("outputFile")), inputTypeStr_(iConfig.getParameter("inputType")), inputRanks_(iConfig.getParameter>("inputRanks")), @@ -101,32 +104,37 @@ ONNXRuntimePlugin::ONNXRuntimePlugin(const edm::ParameterSet &iConfig, const ONN nInputs_(inputTensorNames_.size()), nPreCalls_(10), rndGen_(rnd_()), - normalPdf_(0.0, 1.0) - { + normalPdf_(0.0, 1.0) { // the number of input ranks must match the number of input tensors if ((int)inputRanks_.size() != nInputs_) { - throw cms::Exception("InvalidInputRanks") << "number of input ranks must match number of input tensors"; + throw cms::Exception("InvalidInputRanks") + << "number of input ranks must match number of input tensors"; } // the input must be at least 1 dimensional for (auto rank : inputRanks_) { if (rank < 1) { - throw cms::Exception("InvalidRank") << "only ranks above 0 are supported, got " << rank; + throw cms::Exception("InvalidRank") + << "only ranks above 0 are supported, got " << rank; } } // the sum of ranks must match the number of flat input sizes - if (std::accumulate(inputRanks_.begin(), inputRanks_.end(), 0) != (int)flatInputSizes_.size()) { + if (std::accumulate(inputRanks_.begin(), inputRanks_.end(), 0) != + (int)flatInputSizes_.size()) { throw cms::Exception("InvalidFlatInputSizes") - << "sum of input ranks must match number of flat input sizes, got " << flatInputSizes_.size(); + << "sum of input ranks must match number of flat input sizes, got " + << flatInputSizes_.size(); } // batch size must be positive if (batchSize_ < 1) { - throw cms::Exception("InvalidBatchSize") << "batch sizes must be positive, got " << batchSize_; + throw cms::Exception("InvalidBatchSize") + << "batch sizes must be positive, got " << batchSize_; } // input sizes must be positive for (auto size : flatInputSizes_) { if (size < 1) { - throw cms::Exception("InvalidInputSize") << "input sizes must be positive, got " << size; + throw cms::Exception("InvalidInputSize") + << "input sizes must be positive, got " << size; } } // check the input type @@ -138,39 +146,47 @@ ONNXRuntimePlugin::ONNXRuntimePlugin(const edm::ParameterSet &iConfig, const ONN inputType_ = mlprof::InputType::Zeros; } else { throw cms::Exception("InvalidInputType") - << "input type must be either 'incremental', 'zeros' or 'random', got " << inputTypeStr_; + << "input type must be either 'incremental', 'zeros' or 'random', got " + << inputTypeStr_; } // initialize the input_shapes array with inputRanks_ and flatInputSizes_ int i = 0; for (auto rank : inputRanks_) { - std::vector input_shape(flatInputSizes_.begin() + i, flatInputSizes_.begin() + i + rank); + std::vector input_shape(flatInputSizes_.begin() + i, + flatInputSizes_.begin() + i + rank); input_shape.insert(input_shape.begin(), batchSize_); input_shapes_.push_back(input_shape); i += rank; } // initialize the input data arrays - // note there is only one element in the FloatArrays type (i.e. vector>) variable + // note there is only one element in the FloatArrays type (i.e. + // vector>) variable for (int i = 0; i < nInputs_; i++) { - data_.emplace_back(flatInputSizes_[i] * batchSize_, 0); + inputArrays_.emplace_back(batchSize_ * flatInputSizes_[i], 0); } } - -std::unique_ptr ONNXRuntimePlugin::initializeGlobalCache(const edm::ParameterSet &iConfig) { - return std::make_unique(edm::FileInPath(iConfig.getParameter("graphPath")).fullPath()); +std::unique_ptr ONNXRuntimePlugin::initializeGlobalCache( + const edm::ParameterSet &iConfig) { + return std::make_unique( + edm::FileInPath(iConfig.getParameter("graphPath")) + .fullPath()); } void ONNXRuntimePlugin::globalEndJob(const ONNXRuntime *cache) {} -void ONNXRuntimePlugin::analyze(const edm::Event &iEvent, const edm::EventSetup &iSetup) { +void ONNXRuntimePlugin::analyze(const edm::Event &iEvent, + const edm::EventSetup &iSetup) { for (int i = 0; i < nInputs_; i++) { - std::vector &group_data = data_[i]; + std::vector &group_data = inputArrays_[i]; // fill the input for (int i = 0; i < (int)group_data.size(); i++) { - group_data[i] = inputType_ == mlprof::InputType::Incremental ? float(i) : - inputType_ == mlprof::InputType::Zeros ? float(0) : - drawNormal(); + group_data[i] = + inputType_ == mlprof::InputType::Incremental + ? float(i) + : (inputType_ == mlprof::InputType::Zeros ? float(0) + : drawNormal()); } } @@ -179,38 +195,23 @@ void ONNXRuntimePlugin::analyze(const edm::Event &iEvent, const edm::EventSetup // pre calls to "warm up" for (int r = 0; r < nPreCalls_; r++) { - outputs = globalCache()->run(inputTensorNames_, data_, input_shapes_, outputTensorNames_, batchSize_); - // std::cout << "nprerun" << r << std::endl; + outputs = globalCache()->run(inputTensorNames_, inputArrays_, input_shapes_, + outputTensorNames_, batchSize_); } // actual calls to measure runtimes std::vector runtimes; for (int r = 0; r < nCalls_; r++) { auto start = std::chrono::high_resolution_clock::now(); - outputs = globalCache()->run(inputTensorNames_, data_, input_shapes_, outputTensorNames_, batchSize_); + outputs = globalCache()->run(inputTensorNames_, inputArrays_, input_shapes_, + outputTensorNames_, batchSize_); auto end = std::chrono::high_resolution_clock::now(); std::chrono::duration runtime_in_seconds = (end - start); - // std::cout << "nrun" << r << std::endl; - // std::cout << "runtime in seconds" << runtime_in_seconds.count() << std::endl; runtimes.push_back(runtime_in_seconds.count() * 1000); } - // // print the input and output data - // std::cout << "input data -> "; - // for ( const auto &input_tensor : data_ ){ - // for ( const auto &value : input_tensor ) std::cout << value << ' '; - // std::cout << std::endl; - // } - // std::cout << std::endl << "output data -> "; - // for (auto &output_tensor: outputs) { - // for ( const auto &value : output_tensor ) std::cout << value << ' '; - // std::cout << std::endl; - // } - // std::cout << std::endl; - // save them mlprof::writeRuntimes(outputFile_, batchSize_, runtimes); } - DEFINE_FWK_MODULE(ONNXRuntimePlugin); diff --git a/mlprof/tasks/parameters.py b/mlprof/tasks/parameters.py index 5ae7ae0..88fb66a 100644 --- a/mlprof/tasks/parameters.py +++ b/mlprof/tasks/parameters.py @@ -185,8 +185,8 @@ def store_parts(self): # ) # model_labels = luigi.Parameter( # default=law.NO_STR, -# description="when set, use this label in plots; when empty, the 'network_name' field in the model json data is " -# "used when existing, and full_model_name otherwise; default: empty", +# description="when set, use this label in plots; when empty, the 'network_name' field in the " +# "model json data is used when existing, and full_model_name otherwise; default: empty", # ) # def __init__(self, *args, **kwargs):