diff --git a/cmssw/MLProf/ONNXRuntimeModule/plugins/BuildFile.xml b/cmssw/MLProf/ONNXRuntimeModule/plugins/BuildFile.xml
deleted file mode 100644
index d807b98..0000000
--- a/cmssw/MLProf/ONNXRuntimeModule/plugins/BuildFile.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-<use name="FWCore/Framework" />
-<use name="FWCore/PluginManager" />
-<use name="FWCore/ParameterSet" />
-<use name="PhysicsTools/ONNXRuntime" />
-
-<flags EDM_PLUGIN="1" />
diff --git a/cmssw/MLProf/ONNXRuntimeModule/test/onnx_runtime_template_cfg.py b/cmssw/MLProf/ONNXRuntimeModule/test/onnx_runtime_template_cfg.py
deleted file mode 100644
index 1724a65..0000000
--- a/cmssw/MLProf/ONNXRuntimeModule/test/onnx_runtime_template_cfg.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# coding: utf-8
-
-import FWCore.ParameterSet.Config as cms
-from FWCore.ParameterSet.VarParsing import VarParsing
-
-# setup minimal options
-options = VarParsing("python")
-options.register(
-    "batchSizes",
-    [1],
-    VarParsing.multiplicity.list,
-    VarParsing.varType.int,
-    "Batch sizes to be tested",
-)
-options.register(
-    "csvFile",
-    "results.csv",
-    VarParsing.multiplicity.singleton,
-    VarParsing.varType.string,
-    "The path of the csv file to save results",
-)
-options.parseArguments()
-
-
-# define the process to run
-process = cms.Process("MLPROF")
-
-# minimal configuration
-process.load("FWCore.MessageService.MessageLogger_cfi")
-process.MessageLogger.cerr.FwkReport.reportEvery = 1
-process.maxEvents = cms.untracked.PSet(
-    input=cms.untracked.int32(__N_EVENTS__),  # noqa
-)
-process.source = cms.Source(
-    "PoolSource",
-    fileNames=cms.untracked.vstring(*__INPUT_FILES__),  # noqa
-)
-
-# process options
-process.options = cms.untracked.PSet(
-    allowUnscheduled=cms.untracked.bool(True),
-    wantSummary=cms.untracked.bool(False),
-)
-
-# setup options for multithreaded
-process.options.numberOfThreads=cms.untracked.uint32(1)
-process.options.numberOfStreams=cms.untracked.uint32(0)
-process.options.numberOfConcurrentLuminosityBlocks=cms.untracked.uint32(1)
-
-
-# setup MyPlugin by loading the auto-generated cfi (see MyPlugin.fillDescriptions)
-process.load("MLProf.ONNXRuntimeModule.onnxRuntimePlugin_cfi")
-process.onnxRuntimePlugin.graphPath = cms.string("__GRAPH_PATH__")
-process.onnxRuntimePlugin.inputTensorNames = cms.vstring(__INPUT_TENSOR_NAMES__)  # noqa
-process.onnxRuntimePlugin.outputTensorNames = cms.vstring(__OUTPUT_TENSOR_NAMES__)  # noqa
-process.onnxRuntimePlugin.outputFile = cms.string(options.csvFile)
-process.onnxRuntimePlugin.inputType = cms.string("__INPUT_TYPE__")
-process.onnxRuntimePlugin.inputRanks = cms.vint32(__INPUT_RANKS__)  # noqa
-process.onnxRuntimePlugin.flatInputSizes = cms.vint32(__FLAT_INPUT_SIZES__)  # noqa
-process.onnxRuntimePlugin.batchSize = cms.int32(options.batchSizes[0])
-process.onnxRuntimePlugin.nCalls = cms.int32(__N_CALLS__)  # noqa
-
-# define what to run in the path
-process.p = cms.Path(process.onnxRuntimePlugin)
diff --git a/cmssw/MLProf/RuntimeMeasurement/plugins/BuildFile.xml b/cmssw/MLProf/RuntimeMeasurement/plugins/BuildFile.xml
index 5fe6592..14e5d4a 100644
--- a/cmssw/MLProf/RuntimeMeasurement/plugins/BuildFile.xml
+++ b/cmssw/MLProf/RuntimeMeasurement/plugins/BuildFile.xml
@@ -1,6 +1,21 @@
-<use name="FWCore/Framework" />
-<use name="FWCore/PluginManager" />
-<use name="FWCore/ParameterSet" />
-<use name="PhysicsTools/TensorFlow" />
+<library name="MLProfRuntimeMeasurementTFInference" file="TFInference.cc">
+  <use name="FWCore/Framework"/>
+  <use name="FWCore/PluginManager"/>
+  <use name="FWCore/ParameterSet"/>
 
-<flags EDM_PLUGIN="1" />
+  <use name="PhysicsTools/TensorFlow"/>
+  <use name="MLProf/Utils"/>
+
+  <flags EDM_PLUGIN="1"/>
+</library>
+
+<library name="MLProfRuntimeMeasurementONNXInference" file="ONNXInference.cc">
+  <use name="FWCore/Framework"/>
+  <use name="FWCore/PluginManager"/>
+  <use name="FWCore/ParameterSet"/>
+
+  <use name="PhysicsTools/ONNXRuntime"/>
+  <use name="MLProf/Utils"/>
+
+  <flags EDM_PLUGIN="1"/>
+</library>
diff --git a/cmssw/MLProf/ONNXRuntimeModule/plugins/ONNXPluginRuntime.cpp b/cmssw/MLProf/RuntimeMeasurement/plugins/ONNXInference.cc
similarity index 67%
rename from cmssw/MLProf/ONNXRuntimeModule/plugins/ONNXPluginRuntime.cpp
rename to cmssw/MLProf/RuntimeMeasurement/plugins/ONNXInference.cc
index cc4f804..61c0720 100644
--- a/cmssw/MLProf/ONNXRuntimeModule/plugins/ONNXPluginRuntime.cpp
+++ b/cmssw/MLProf/RuntimeMeasurement/plugins/ONNXInference.cc
@@ -1,6 +1,5 @@
 /*
- * Example plugin to demonstrate the direct multi-threaded inference with ONNX
- * Runtime.
+ * Plugin to measure the inference runtime of an onnx model.
  */
 
 #include <chrono>
@@ -16,24 +15,24 @@
 #include "FWCore/Framework/interface/MakerMacros.h"
 #include "FWCore/Framework/interface/stream/EDAnalyzer.h"
 #include "FWCore/ParameterSet/interface/ParameterSet.h"
-#include "MLProf/Utils/interface/utils.h"
 #include "PhysicsTools/ONNXRuntime/interface/ONNXRuntime.h"
 
+#include "MLProf/Utils/interface/utils.h"
+
 using namespace cms::Ort;
 
-class ONNXRuntimePlugin
-    : public edm::stream::EDAnalyzer<edm::GlobalCache<ONNXRuntime>> {
- public:
-  explicit ONNXRuntimePlugin(const edm::ParameterSet &, const ONNXRuntime *);
-  static void fillDescriptions(edm::ConfigurationDescriptions &);
+class ONNXInference : public edm::stream::EDAnalyzer<edm::GlobalCache<ONNXRuntime>> {
+public:
+  explicit ONNXInference(const edm::ParameterSet&, const ONNXRuntime*);
+  ~ONNXInference(){};
 
-  static std::unique_ptr<ONNXRuntime> initializeGlobalCache(
-      const edm::ParameterSet &);
-  static void globalEndJob(const ONNXRuntime *);
+  static void fillDescriptions(edm::ConfigurationDescriptions&);
+  static std::unique_ptr<ONNXRuntime> initializeGlobalCache(const edm::ParameterSet&);
+  static void globalEndJob(const ONNXRuntime*);
 
- private:
+private:
   void beginJob();
-  void analyze(const edm::Event &, const edm::EventSetup &);
+  void analyze(const edm::Event&, const edm::EventSetup&);
   void endJob();
 
   inline float drawNormal() { return normalPdf_(rndGen_); }
@@ -60,8 +59,7 @@ class ONNXRuntimePlugin
   FloatArrays inputArrays_;  // each stream hosts its own data
 };
 
-void ONNXRuntimePlugin::fillDescriptions(
-    edm::ConfigurationDescriptions &descriptions) {
+void ONNXInference::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
   // defining this function will lead to a *_cfi file being generated when
   // compiling
   edm::ParameterSetDescription desc;
@@ -89,12 +87,9 @@ void ONNXRuntimePlugin::fillDescriptions(
   descriptions.addWithDefaultLabel(desc);
 }
 
-ONNXRuntimePlugin::ONNXRuntimePlugin(const edm::ParameterSet &iConfig,
-                                     const ONNXRuntime *cache)
-    : inputTensorNames_(
-          iConfig.getParameter<std::vector<std::string>>("inputTensorNames")),
-      outputTensorNames_(
-          iConfig.getParameter<std::vector<std::string>>("outputTensorNames")),
+ONNXInference::ONNXInference(const edm::ParameterSet& iConfig, const ONNXRuntime* cache)
+    : inputTensorNames_(iConfig.getParameter<std::vector<std::string>>("inputTensorNames")),
+      outputTensorNames_(iConfig.getParameter<std::vector<std::string>>("outputTensorNames")),
       outputFile_(iConfig.getParameter<std::string>("outputFile")),
       inputTypeStr_(iConfig.getParameter<std::string>("inputType")),
       inputRanks_(iConfig.getParameter<std::vector<int>>("inputRanks")),
@@ -107,34 +102,28 @@ ONNXRuntimePlugin::ONNXRuntimePlugin(const edm::ParameterSet &iConfig,
       normalPdf_(0.0, 1.0) {
   // the number of input ranks must match the number of input tensors
   if ((int)inputRanks_.size() != nInputs_) {
-    throw cms::Exception("InvalidInputRanks")
-        << "number of input ranks must match number of input tensors";
+    throw cms::Exception("InvalidInputRanks") << "number of input ranks must match number of input tensors";
   }
   // the input must be at least 1 dimensional
   for (auto rank : inputRanks_) {
     if (rank < 1) {
-      throw cms::Exception("InvalidRank")
-          << "only ranks above 0 are supported, got " << rank;
+      throw cms::Exception("InvalidRank") << "only ranks above 0 are supported, got " << rank;
     }
   }
   // the sum of ranks must match the number of flat input sizes
-  if (std::accumulate(inputRanks_.begin(), inputRanks_.end(), 0) !=
-      (int)flatInputSizes_.size()) {
+  if (std::accumulate(inputRanks_.begin(), inputRanks_.end(), 0) != (int)flatInputSizes_.size()) {
     throw cms::Exception("InvalidFlatInputSizes")
-        << "sum of input ranks must match number of flat input sizes, got "
-        << flatInputSizes_.size();
+        << "sum of input ranks must match number of flat input sizes, got " << flatInputSizes_.size();
   }
   // batch size must be positive
   if (batchSize_ < 1) {
-    throw cms::Exception("InvalidBatchSize")
-        << "batch sizes must be positive, got " << batchSize_;
+    throw cms::Exception("InvalidBatchSize") << "batch size must be positive, got " << batchSize_;
   }
 
   // input sizes must be positive
   for (auto size : flatInputSizes_) {
     if (size < 1) {
-      throw cms::Exception("InvalidInputSize")
-          << "input sizes must be positive, got " << size;
+      throw cms::Exception("InvalidInputSize") << "input sizes must be positive, got " << size;
     }
   }
   // check the input type
@@ -146,15 +135,13 @@ ONNXRuntimePlugin::ONNXRuntimePlugin(const edm::ParameterSet &iConfig,
     inputType_ = mlprof::InputType::Zeros;
   } else {
     throw cms::Exception("InvalidInputType")
-        << "input type must be either 'incremental', 'zeros' or 'random', got "
-        << inputTypeStr_;
+        << "input type must be either 'incremental', 'zeros' or 'random', got " << inputTypeStr_;
   }
 
   // initialize the input_shapes array with inputRanks_ and flatInputSizes_
   int i = 0;
   for (auto rank : inputRanks_) {
-    std::vector<int64_t> input_shape(flatInputSizes_.begin() + i,
-                                     flatInputSizes_.begin() + i + rank);
+    std::vector<int64_t> input_shape(flatInputSizes_.begin() + i, flatInputSizes_.begin() + i + rank);
     input_shape.insert(input_shape.begin(), batchSize_);
     input_shapes_.push_back(input_shape);
     i += rank;
@@ -167,26 +154,20 @@ ONNXRuntimePlugin::ONNXRuntimePlugin(const edm::ParameterSet &iConfig,
   }
 }
 
-std::unique_ptr<ONNXRuntime> ONNXRuntimePlugin::initializeGlobalCache(
-    const edm::ParameterSet &iConfig) {
-  return std::make_unique<ONNXRuntime>(
-      edm::FileInPath(iConfig.getParameter<std::string>("graphPath"))
-          .fullPath());
+std::unique_ptr<ONNXRuntime> ONNXInference::initializeGlobalCache(const edm::ParameterSet& iConfig) {
+  return std::make_unique<ONNXRuntime>(edm::FileInPath(iConfig.getParameter<std::string>("graphPath")).fullPath());
 }
 
-void ONNXRuntimePlugin::globalEndJob(const ONNXRuntime *cache) {}
+void ONNXInference::globalEndJob(const ONNXRuntime* cache) {}
 
-void ONNXRuntimePlugin::analyze(const edm::Event &iEvent,
-                                const edm::EventSetup &iSetup) {
+void ONNXInference::analyze(const edm::Event& iEvent, const edm::EventSetup& iSetup) {
   for (int i = 0; i < nInputs_; i++) {
-    std::vector<float> &group_data = inputArrays_[i];
+    std::vector<float>& group_data = inputArrays_[i];
     // fill the input
     for (int i = 0; i < (int)group_data.size(); i++) {
-      group_data[i] =
-          inputType_ == mlprof::InputType::Incremental
-              ? float(i)
-              : (inputType_ == mlprof::InputType::Zeros ? float(0)
-                                                        : drawNormal());
+      group_data[i] = inputType_ == mlprof::InputType::Incremental
+                          ? float(i)
+                          : (inputType_ == mlprof::InputType::Zeros ? float(0) : drawNormal());
     }
   }
 
@@ -195,16 +176,14 @@ void ONNXRuntimePlugin::analyze(const edm::Event &iEvent,
 
   // pre calls to "warm up"
   for (int r = 0; r < nPreCalls_; r++) {
-    outputs = globalCache()->run(inputTensorNames_, inputArrays_, input_shapes_,
-                                 outputTensorNames_, batchSize_);
+    outputs = globalCache()->run(inputTensorNames_, inputArrays_, input_shapes_, outputTensorNames_, batchSize_);
   }
 
   // actual calls to measure runtimes
   std::vector<float> runtimes;
   for (int r = 0; r < nCalls_; r++) {
     auto start = std::chrono::high_resolution_clock::now();
-    outputs = globalCache()->run(inputTensorNames_, inputArrays_, input_shapes_,
-                                 outputTensorNames_, batchSize_);
+    outputs = globalCache()->run(inputTensorNames_, inputArrays_, input_shapes_, outputTensorNames_, batchSize_);
     auto end = std::chrono::high_resolution_clock::now();
     std::chrono::duration<float> runtime_in_seconds = (end - start);
     runtimes.push_back(runtime_in_seconds.count() * 1000);
@@ -214,4 +193,4 @@ void ONNXRuntimePlugin::analyze(const edm::Event &iEvent,
   mlprof::writeRuntimes(outputFile_, batchSize_, runtimes);
 }
 
-DEFINE_FWK_MODULE(ONNXRuntimePlugin);
+DEFINE_FWK_MODULE(ONNXInference);
diff --git a/cmssw/MLProf/RuntimeMeasurement/plugins/TFRuntime.cpp b/cmssw/MLProf/RuntimeMeasurement/plugins/TFInference.cc
similarity index 67%
rename from cmssw/MLProf/RuntimeMeasurement/plugins/TFRuntime.cpp
rename to cmssw/MLProf/RuntimeMeasurement/plugins/TFInference.cc
index e0f2425..dbd56fa 100644
--- a/cmssw/MLProf/RuntimeMeasurement/plugins/TFRuntime.cpp
+++ b/cmssw/MLProf/RuntimeMeasurement/plugins/TFInference.cc
@@ -1,5 +1,5 @@
 /*
- * Plugin to measure the runtime of a tensorflow graph.
+ * Plugin to measure the inference runtime of a tensorflow model.
  */
 
 #include <chrono>
@@ -19,13 +19,12 @@
 
 #include "MLProf/Utils/interface/utils.h"
 
-class TFRuntime : public edm::stream::EDAnalyzer<edm::GlobalCache<tensorflow::SessionCache>> {
+class TFInference : public edm::stream::EDAnalyzer<edm::GlobalCache<tensorflow::SessionCache>> {
 public:
-  explicit TFRuntime(const edm::ParameterSet&, const tensorflow::SessionCache*);
-  ~TFRuntime(){};
+  explicit TFInference(const edm::ParameterSet&, const tensorflow::SessionCache*);
+  ~TFInference(){};
 
   static void fillDescriptions(edm::ConfigurationDescriptions&);
-
   static std::unique_ptr<tensorflow::SessionCache> initializeGlobalCache(const edm::ParameterSet&);
   static void globalEndJob(const tensorflow::SessionCache*);
 
@@ -44,7 +43,7 @@ class TFRuntime : public edm::stream::EDAnalyzer<edm::GlobalCache<tensorflow::Se
   std::string inputTypeStr_;
   std::vector<int> inputRanks_;
   std::vector<int> flatInputSizes_;
-  std::vector<int> batchSizes_;
+  int batchSize_;
   int nCalls_;
 
   // other members
@@ -57,16 +56,16 @@ class TFRuntime : public edm::stream::EDAnalyzer<edm::GlobalCache<tensorflow::Se
   const tensorflow::Session* session_;
 };
 
-std::unique_ptr<tensorflow::SessionCache> TFRuntime::initializeGlobalCache(const edm::ParameterSet& params) {
+std::unique_ptr<tensorflow::SessionCache> TFInference::initializeGlobalCache(const edm::ParameterSet& params) {
   std::string graphPath = edm::FileInPath(params.getParameter<std::string>("graphPath")).fullPath();
   // cpu-only for now
   tensorflow::Options options{tensorflow::Backend::cpu};
   return std::make_unique<tensorflow::SessionCache>(graphPath, options);
 }
 
-void TFRuntime::globalEndJob(const tensorflow::SessionCache* cache) {}
+void TFInference::globalEndJob(const tensorflow::SessionCache* cache) {}
 
-void TFRuntime::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
+void TFInference::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
   edm::ParameterSetDescription desc;
 
   // the path to the file containing the graph
@@ -85,21 +84,21 @@ void TFRuntime::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
   // (for a graph with a 1D and a 2D input tensor, this would be a vector of three values)
   desc.add<std::vector<int>>("flatInputSizes");
   // batch sizes to test
-  desc.add<std::vector<int>>("batchSizes");
+  desc.add<int>("batchSize");
   // the number of calls to the graph to measure the runtime
   desc.add<int>("nCalls");
 
   descriptions.addWithDefaultLabel(desc);
 }
 
-TFRuntime::TFRuntime(const edm::ParameterSet& config, const tensorflow::SessionCache* cache)
+TFInference::TFInference(const edm::ParameterSet& config, const tensorflow::SessionCache* cache)
     : inputTensorNames_(config.getParameter<std::vector<std::string>>("inputTensorNames")),
       outputTensorNames_(config.getParameter<std::vector<std::string>>("outputTensorNames")),
       outputFile_(config.getParameter<std::string>("outputFile")),
       inputTypeStr_(config.getParameter<std::string>("inputType")),
       inputRanks_(config.getParameter<std::vector<int>>("inputRanks")),
       flatInputSizes_(config.getParameter<std::vector<int>>("flatInputSizes")),
-      batchSizes_(config.getParameter<std::vector<int>>("batchSizes")),
+      batchSize_(config.getParameter<int>("batchSize")),
       nCalls_(config.getParameter<int>("nCalls")),
       nInputs_(inputTensorNames_.size()),
       nPreCalls_(10),
@@ -122,10 +121,8 @@ TFRuntime::TFRuntime(const edm::ParameterSet& config, const tensorflow::SessionC
         << "sum of input ranks must match number of flat input sizes, got " << flatInputSizes_.size();
   }
   // batch size must be positive
-  for (auto batchSize : batchSizes_) {
-    if (batchSize < 1) {
-      throw cms::Exception("InvalidBatchSize") << "batch sizes must be positive, got " << batchSize;
-    }
+  if (batchSize_ < 1) {
+    throw cms::Exception("InvalidBatchSize") << "batch size must be positive, got " << batchSize_;
   }
   // input sizes must be positive
   for (auto size : flatInputSizes_) {
@@ -146,11 +143,11 @@ TFRuntime::TFRuntime(const edm::ParameterSet& config, const tensorflow::SessionC
   }
 }
 
-void TFRuntime::beginJob() {}
+void TFInference::beginJob() {}
 
-void TFRuntime::endJob() {}
+void TFInference::endJob() {}
 
-tensorflow::Tensor TFRuntime::createInputTensor(int rank, std::vector<int> shape) {
+tensorflow::Tensor TFInference::createInputTensor(int rank, std::vector<int> shape) {
   // convert the shape to a tf shape
   tensorflow::TensorShape tShape;
   for (auto dim : shape) {
@@ -163,50 +160,48 @@ tensorflow::Tensor TFRuntime::createInputTensor(int rank, std::vector<int> shape
   // fill it
   float* data = tensor.flat<float>().data();
   for (int i = 0; i < tensor.NumElements(); i++, data++) {
-    *data = inputType_ == mlprof::InputType::Incremental ? float(i) :
-    inputType_ == mlprof::InputType::Zeros ? float(0) :
-    drawNormal();
+    *data = inputType_ == mlprof::InputType::Incremental ? float(i)
+            : inputType_ == mlprof::InputType::Zeros     ? float(0)
+                                                         : drawNormal();
   }
 
   return tensor;
 }
 
-void TFRuntime::analyze(const edm::Event& event, const edm::EventSetup& setup) {
-  for (int batchSize : batchSizes_) {
-    // prepare inputs
-    std::vector<std::pair<std::string, tensorflow::Tensor>> inputs;
-    int sizeOffset = 0;
-    for (int i = 0; i < nInputs_; i++) {
-      // build the shape
-      std::vector<int> shape = {batchSize};
-      for (int j = 0; j < inputRanks_[i]; j++, sizeOffset++) {
-        shape.push_back(flatInputSizes_[sizeOffset]);
-      }
-      // create and save it
-      inputs.push_back({inputTensorNames_[i], createInputTensor(inputRanks_[i], shape)});
+void TFInference::analyze(const edm::Event& event, const edm::EventSetup& setup) {
+  // prepare inputs
+  std::vector<std::pair<std::string, tensorflow::Tensor>> inputs;
+  int sizeOffset = 0;
+  for (int i = 0; i < nInputs_; i++) {
+    // build the shape
+    std::vector<int> shape = {batchSize_};
+    for (int j = 0; j < inputRanks_[i]; j++, sizeOffset++) {
+      shape.push_back(flatInputSizes_[sizeOffset]);
     }
+    // create and save it
+    inputs.push_back({inputTensorNames_[i], createInputTensor(inputRanks_[i], shape)});
+  }
 
-    // prepare output vectors
-    std::vector<tensorflow::Tensor> outputs;
-
-    // pre calls to "warm up"
-    for (int r = 0; r < nPreCalls_; r++) {
-      tensorflow::run(session_, inputs, outputTensorNames_, &outputs);
-    }
+  // prepare output vectors
+  std::vector<tensorflow::Tensor> outputs;
 
-    // actual calls to measure runtimes
-    std::vector<float> runtimes;
-    for (int r = 0; r < nCalls_; r++) {
-      auto start = std::chrono::high_resolution_clock::now();
-      tensorflow::run(session_, inputs, outputTensorNames_, &outputs);
-      auto end = std::chrono::high_resolution_clock::now();
-      std::chrono::duration<float> runtime_in_seconds = (end - start);
-      runtimes.push_back(runtime_in_seconds.count() * 1000);
-    }
+  // pre calls to "warm up"
+  for (int r = 0; r < nPreCalls_; r++) {
+    tensorflow::run(session_, inputs, outputTensorNames_, &outputs);
+  }
 
-    // save them
-    mlprof::writeRuntimes(outputFile_, batchSize, runtimes);
+  // actual calls to measure runtimes
+  std::vector<float> runtimes;
+  for (int r = 0; r < nCalls_; r++) {
+    auto start = std::chrono::high_resolution_clock::now();
+    tensorflow::run(session_, inputs, outputTensorNames_, &outputs);
+    auto end = std::chrono::high_resolution_clock::now();
+    std::chrono::duration<float> runtime_in_seconds = (end - start);
+    runtimes.push_back(runtime_in_seconds.count() * 1000);
   }
+
+  // save them
+  mlprof::writeRuntimes(outputFile_, batchSize_, runtimes);
 }
 
-DEFINE_FWK_MODULE(TFRuntime);
+DEFINE_FWK_MODULE(TFInference);
diff --git a/cmssw/MLProf/RuntimeMeasurement/test/onnx_runtime_template_cfg.py b/cmssw/MLProf/RuntimeMeasurement/test/onnx_runtime_template_cfg.py
new file mode 100644
index 0000000..5e5133e
--- /dev/null
+++ b/cmssw/MLProf/RuntimeMeasurement/test/onnx_runtime_template_cfg.py
@@ -0,0 +1,63 @@
+# coding: utf-8
+
+import FWCore.ParameterSet.Config as cms
+from FWCore.ParameterSet.VarParsing import VarParsing
+
+# setup minimal options
+options = VarParsing("python")
+options.register(
+    "batchSize",
+    1,
+    VarParsing.multiplicity.singleton,
+    VarParsing.varType.int,
+    "Batch sizes to be tested",
+)
+options.register(
+    "csvFile",
+    "results.csv",
+    VarParsing.multiplicity.singleton,
+    VarParsing.varType.string,
+    "The path of the csv file to save results",
+)
+options.parseArguments()
+
+
+# define the process to run
+process = cms.Process("MLPROF")
+
+# minimal configuration
+process.load("FWCore.MessageService.MessageLogger_cfi")
+process.MessageLogger.cerr.FwkReport.reportEvery = 1
+process.maxEvents = cms.untracked.PSet(
+    input=cms.untracked.int32(__N_EVENTS__),  # noqa
+)
+process.source = cms.Source(
+    "PoolSource",
+    fileNames=cms.untracked.vstring(*__INPUT_FILES__),  # noqa
+)
+
+# process options
+process.options = cms.untracked.PSet(
+    allowUnscheduled=cms.untracked.bool(True),
+    wantSummary=cms.untracked.bool(False),
+)
+
+# multi-threading options
+process.options.numberOfThreads = cms.untracked.uint32(1)
+process.options.numberOfStreams = cms.untracked.uint32(0)
+process.options.numberOfConcurrentLuminosityBlocks = cms.untracked.uint32(1)
+
+# setup the plugin
+process.load("MLProf.RuntimeMeasurement.onnxInference_cfi")
+process.onnxInference.graphPath = cms.string("__GRAPH_PATH__")
+process.onnxInference.inputTensorNames = cms.vstring(__INPUT_TENSOR_NAMES__)  # noqa
+process.onnxInference.outputTensorNames = cms.vstring(__OUTPUT_TENSOR_NAMES__)  # noqa
+process.onnxInference.outputFile = cms.string(options.csvFile)
+process.onnxInference.inputType = cms.string("__INPUT_TYPE__")
+process.onnxInference.inputRanks = cms.vint32(__INPUT_RANKS__)  # noqa
+process.onnxInference.flatInputSizes = cms.vint32(__FLAT_INPUT_SIZES__)  # noqa
+process.onnxInference.batchSize = cms.int32(options.batchSize)
+process.onnxInference.nCalls = cms.int32(__N_CALLS__)  # noqa
+
+# define what to run in the path
+process.p = cms.Path(process.onnxInference)
diff --git a/cmssw/MLProf/RuntimeMeasurement/test/tf_runtime_template_cfg.py b/cmssw/MLProf/RuntimeMeasurement/test/tf_runtime_template_cfg.py
index def47f7..8729df1 100644
--- a/cmssw/MLProf/RuntimeMeasurement/test/tf_runtime_template_cfg.py
+++ b/cmssw/MLProf/RuntimeMeasurement/test/tf_runtime_template_cfg.py
@@ -6,9 +6,9 @@
 # setup minimal options
 options = VarParsing("python")
 options.register(
-    "batchSizes",
-    [1],
-    VarParsing.multiplicity.list,
+    "batchSize",
+    1,
+    VarParsing.multiplicity.singleton,
     VarParsing.varType.int,
     "Batch sizes to be tested",
 )
@@ -42,17 +42,22 @@
     wantSummary=cms.untracked.bool(False),
 )
 
+# multi-threading options
+process.options.numberOfThreads = cms.untracked.uint32(1)
+process.options.numberOfStreams = cms.untracked.uint32(0)
+process.options.numberOfConcurrentLuminosityBlocks = cms.untracked.uint32(1)
+
 # setup the plugin
-process.load("MLProf.RuntimeMeasurement.tfRuntime_cfi")
-process.tfRuntime.graphPath = cms.string("__GRAPH_PATH__")
-process.tfRuntime.inputTensorNames = cms.vstring(__INPUT_TENSOR_NAMES__)  # noqa
-process.tfRuntime.outputTensorNames = cms.vstring(__OUTPUT_TENSOR_NAMES__)  # noqa
-process.tfRuntime.outputFile = cms.string(options.csvFile)
-process.tfRuntime.inputType = cms.string("__INPUT_TYPE__")
-process.tfRuntime.inputRanks = cms.vint32(__INPUT_RANKS__)  # noqa
-process.tfRuntime.flatInputSizes = cms.vint32(__FLAT_INPUT_SIZES__)  # noqa
-process.tfRuntime.batchSizes = cms.vint32(list(options.batchSizes))
-process.tfRuntime.nCalls = cms.int32(__N_CALLS__)  # noqa
+process.load("MLProf.RuntimeMeasurement.tfInference_cfi")
+process.tfInference.graphPath = cms.string("__GRAPH_PATH__")
+process.tfInference.inputTensorNames = cms.vstring(__INPUT_TENSOR_NAMES__)  # noqa
+process.tfInference.outputTensorNames = cms.vstring(__OUTPUT_TENSOR_NAMES__)  # noqa
+process.tfInference.outputFile = cms.string(options.csvFile)
+process.tfInference.inputType = cms.string("__INPUT_TYPE__")
+process.tfInference.inputRanks = cms.vint32(__INPUT_RANKS__)  # noqa
+process.tfInference.flatInputSizes = cms.vint32(__FLAT_INPUT_SIZES__)  # noqa
+process.tfInference.batchSize = cms.int32(options.batchSize)
+process.tfInference.nCalls = cms.int32(__N_CALLS__)  # noqa
 
 # define what to run in the path
-process.p = cms.Path(process.tfRuntime)
+process.p = cms.Path(process.tfInference)
diff --git a/cmssw/MLProf/Utils/BuildFile.xml b/cmssw/MLProf/Utils/BuildFile.xml
new file mode 100644
index 0000000..5d7e337
--- /dev/null
+++ b/cmssw/MLProf/Utils/BuildFile.xml
@@ -0,0 +1,5 @@
+<use name="FWCore/Framework"/>
+
+<export>
+  <lib name="1"/>
+</export>
diff --git a/cmssw/MLProf/Utils/interface/utils.h b/cmssw/MLProf/Utils/interface/utils.h
index 1cdf162..747fe93 100644
--- a/cmssw/MLProf/Utils/interface/utils.h
+++ b/cmssw/MLProf/Utils/interface/utils.h
@@ -1,3 +1,6 @@
+#ifndef MLPROF_UTILS_H
+#define MLPROF_UTILS_H
+
 /*
  * Helper functions.
  */
@@ -10,18 +13,20 @@
 
 namespace mlprof {
 
-enum InputType {
-  Incremental,
-  Random,
-  Zeros,
-};
+  enum InputType {
+    Incremental,
+    Random,
+    Zeros,
+  };
 
-void writeRuntimes(const std::string& path, float batchSize, std::vector<float> runtimes) {
-  std::ofstream file(path, std::ios::out | std::ios::app);
-  for (int i = 0; i < (int)runtimes.size(); i++) {
-    file << batchSize << "," << runtimes[i] << std::endl;
+  void writeRuntimes(const std::string& path, float batchSize, std::vector<float> runtimes) {
+    std::ofstream file(path, std::ios::out | std::ios::app);
+    for (int i = 0; i < (int)runtimes.size(); i++) {
+      file << batchSize << "," << runtimes[i] << std::endl;
+    }
+    file.close();
   }
-  file.close();
-}
 
 }  // namespace mlprof
+
+#endif  // MLPROF_UTILS_H
diff --git a/cmssw/install_sandbox.sh b/cmssw/install_sandbox.sh
index e80b33b..1503e4e 100644
--- a/cmssw/install_sandbox.sh
+++ b/cmssw/install_sandbox.sh
@@ -1,5 +1,8 @@
 #!/bin/bash
 
+# This script in executed in the $CMSSW_BASE/src directory after the initial "cmsenv" command and
+# before "scram b" is called.
+
 rm -rf MLProf
 cp -r "${MLP_BASE}/cmssw/MLProf" .
 rm -rf MLProf/*/test
diff --git a/examples/cnn/model.json b/examples/cnn/model.json
deleted file mode 100644
index 3c3e3f2..0000000
--- a/examples/cnn/model.json
+++ /dev/null
@@ -1,16 +0,0 @@
-{
-    "file": "conv_2d_inputs.pb",
-    "inputs": [
-        {
-            "name": "input_0_input",
-            "shape": [28,28,1]
-        }
-    ],
-    "outputs": [
-        {
-            "name": "Identity"
-        }
-    ],
-    "network_name": "cnn",
-    "inference_engine": "tf"
-}
diff --git a/examples/dnn_2_inputs/model.json b/examples/dnn_2_inputs/model.json
deleted file mode 100644
index d504b07..0000000
--- a/examples/dnn_2_inputs/model.json
+++ /dev/null
@@ -1,20 +0,0 @@
-{
-    "file": "dnn_2_inputs.pb",
-    "inputs": [
-        {
-            "name": "input_0",
-            "shape": [392]
-        },
-        {
-            "name": "input_1",
-            "shape": [392]
-        }
-    ],
-    "outputs": [
-        {
-            "name": "Identity"
-        }
-    ],
-    "network_name": "dnn_2_inputs",
-    "inference_engine": "tf"
-}
diff --git a/examples/simple_dnn/model.json b/examples/simple_dnn/model.json
deleted file mode 100644
index d424054..0000000
--- a/examples/simple_dnn/model.json
+++ /dev/null
@@ -1,16 +0,0 @@
-{
-    "file": "simple_dnn.pb",
-    "inputs": [
-        {
-            "name": "input_0",
-            "shape": [784]
-        }
-    ],
-    "outputs": [
-        {
-            "name": "Identity"
-        }
-    ],
-    "network_name": "dnn",
-    "inference_engine": "tf"
-}
diff --git a/mlprof/tasks/parameters.py b/mlprof/tasks/parameters.py
index 9c800e4..f3ffa29 100644
--- a/mlprof/tasks/parameters.py
+++ b/mlprof/tasks/parameters.py
@@ -18,7 +18,7 @@ def __init__(self, model_file: str, name, label, **kwargs):
 
         super().__init__(**kwargs)
 
-        self.model_file = model_file
+        self.model_file = os.path.abspath(os.path.expandvars(os.path.expanduser(model_file)))
         self.name = name
         self.label = label
 
@@ -37,9 +37,8 @@ def full_name(self):
             return self.name
 
         # create a hash
-        model_file = os.path.expandvars(os.path.expanduser(self.model_file))
-        name = os.path.splitext(os.path.basename(model_file))[0]
-        return f"{name}{law.util.create_hash(model_file)}"
+        name = os.path.splitext(os.path.basename(self.model_file))[0]
+        return f"{name}{law.util.create_hash(self.model_file)}"
 
     @property
     def full_model_label(self):
@@ -61,8 +60,8 @@ class CMSSWParameters(BaseTask):
     """
 
     cmssw_version = luigi.Parameter(
-        default="CMSSW_13_3_1",
-        description="CMSSW version; default: CMSSW_13_3_1",
+        default="CMSSW_13_3_3",
+        description="CMSSW version; default: CMSSW_13_3_3",
     )
     scram_arch = luigi.Parameter(
         default="slc7_amd64_gcc12",
@@ -129,9 +128,9 @@ class ModelParameters(BaseTask):
     """
 
     model_file = luigi.Parameter(
-        default="$MLP_BASE/examples/simple_dnn/model.json",
+        default="$MLP_BASE/examples/simple_dnn/model_tf.json",
         description="json file containing information of model to be tested; "
-        "default: $MLP_BASE/examples/simple_dnn/model.json",
+        "default: $MLP_BASE/examples/simple_dnn/model_tf.json",
     )
     model_name = luigi.Parameter(
         default=law.NO_STR,
@@ -144,6 +143,15 @@ class ModelParameters(BaseTask):
         "used when existing, and full_name otherwise; default: empty",
     )
 
+    @classmethod
+    def modify_param_values(cls, params) -> dict:
+        params = super().modify_param_values(params)
+
+        if params.get("model_file"):
+            params["model_file"] = os.path.abspath(os.path.expandvars(os.path.expanduser(params["model_file"])))
+
+        return params
+
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
diff --git a/mlprof/tasks/runtime.py b/mlprof/tasks/runtime.py
index 4658695..8245b38 100644
--- a/mlprof/tasks/runtime.py
+++ b/mlprof/tasks/runtime.py
@@ -45,7 +45,8 @@ def run(self):
 
         # resolve the graph path relative to the model file
         graph_path = os.path.expandvars(os.path.expanduser(model_data["file"]))
-        graph_path = os.path.join(os.path.dirname(self.model_file), graph_path)
+        model_file = os.path.expandvars(os.path.expanduser(self.model_file))
+        graph_path = os.path.join(os.path.dirname(model_file), graph_path)
 
         # determine input files
         if self.input_file:
@@ -81,7 +82,7 @@ def run(self):
         if model_data["inference_engine"] == "tf":
             template = "$MLP_BASE/cmssw/MLProf/RuntimeMeasurement/test/tf_runtime_template_cfg.py"
         elif model_data["inference_engine"] == "onnx":
-            template = "$MLP_BASE/cmssw/MLProf/ONNXRuntimeModule/test/onnx_runtime_template_cfg.py"
+            template = "$MLP_BASE/cmssw/MLProf/RuntimeMeasurement/test/onnx_runtime_template_cfg.py"
         else:
             raise Exception("The only inference_engine supported are 'tf' and 'onnx'")
 
@@ -111,13 +112,13 @@ def requires(self):
         return CreateRuntimeConfig.req(self)
 
     def output(self):
-        return self.local_target(f"runtime_bs_{self.batch_size}.csv")
+        return self.local_target(f"runtime_bs{self.batch_size}.csv")
 
     def build_command(self):
         return [
             "cmsRun",
             self.input().path,
-            f"batchSizes={self.batch_size}",
+            f"batchSize={self.batch_size}",
             f"csvFile={self.output().path}",
         ]
 
@@ -131,7 +132,7 @@ def requires(self):
         ]
 
     def output(self):
-        return self.local_target(f"runtimes_bs_{self.batch_sizes_repr}.csv")
+        return self.local_target(f"runtimes_bs{self.batch_sizes_repr}.csv")
 
     def run(self):
         # merge files