Merge pull request #3 from uhh-cms/aot_support

Add AOT support
cms-ml · Apr 26, 2024 · 82913de · 82913de
2 parents 54ba404 + b93db49
commit 82913de
Show file tree

Hide file tree

Showing 122 changed files with 21,479 additions and 533 deletions.
diff --git a/.flake8 b/.flake8
@@ -1,10 +1,10 @@
 [flake8]
 
-# line length of 100 is recommended, but set it to a forgiving value
+# line length
 max-line-length = 120
 
 # codes of errors to ignore
-ignore = E128, E306, E402, E722, E731, W504
+ignore = E128, E306, E402, E702, E722, E731, W504
 
 # enforce double quotes
 inline-quotes = double
diff --git a/.gitattributes b/.gitattributes
@@ -6,3 +6,5 @@
 *.pb filter=lfs diff=lfs merge=lfs -text
 *.pb.txt filter=lfs diff=lfs merge=lfs -text
 *.pbtxt filter=lfs diff=lfs merge=lfs -text
+*.index filter=lfs diff=lfs merge=lfs -text
+*.data-* filter=lfs diff=lfs merge=lfs -text
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
diff --git a/.github/workflows/lint_and_test.yml b/.github/workflows/lint_and_test.yml
@@ -0,0 +1,48 @@
+name: Lint and test
+
+on:
+  workflow_dispatch:
+  push:
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout ⬇️
+        uses: actions/checkout@v3
+        with:
+          persist-credentials: false
+
+      - name: Setup python 🐍
+        uses: actions/setup-python@v4
+        with:
+          python-version: 3.9
+
+      - name: Install dependencies ☕️
+        run: |
+          pip install -U pip setuptools wheel
+          pip install -r sandboxes/dev.txt
+
+      - name: Lint 🔍
+        run: flake8 mlprof
+
+  typecheck:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout ⬇️
+        uses: actions/checkout@v3
+        with:
+          persist-credentials: false
+
+      - name: Setup python 🐍
+        uses: actions/setup-python@v4
+        with:
+          python-version: 3.9
+
+      - name: Install dependencies ☕️
+        run: |
+          pip install -U pip setuptools wheel
+          pip install -r sandboxes/dev.txt
+
+      - name: Typecheck 🥊
+        run: mypy mlprof
diff --git a/.gitignore b/.gitignore
@@ -12,10 +12,9 @@
 *.npz
 *.h5
 *.hdf5
-*.json
-*.yaml
 *.out
 *.parquet
+*.vscode
 .coverage
 coverage*.xml
 __pycache__
@@ -28,3 +27,4 @@ software
 data
 .data
 .law
+.python-version
diff --git a/cmssw/MLProf/RuntimeMeasurement/plugins/ONNXInference.cc b/cmssw/MLProf/RuntimeMeasurement/plugins/ONNXInference.cc
@@ -31,9 +31,9 @@ class ONNXInference : public edm::stream::EDAnalyzer<edm::GlobalCache<ONNXRuntim
   static void globalEndJob(const ONNXRuntime*);
 
 private:
-  void beginJob();
+  void beginJob(){};
   void analyze(const edm::Event&, const edm::EventSetup&);
-  void endJob();
+  void endJob(){};
 
   inline float drawNormal() { return normalPdf_(rndGen_); }
 
@@ -76,10 +76,9 @@ void ONNXInference::fillDescriptions(edm::ConfigurationDescriptions& description
   // the rank (number of dimensions) of each input tensor
   desc.add<std::vector<int>>("inputRanks");
   // flat list of sizes of each dimension of each input tensor
-  // (for a graph with a 1D and a 2D input tensor, this would be a vector of
-  // three values)
+  // (for a graph with a 1D and a 2D input tensor, this would be a vector of three values)
   desc.add<std::vector<int>>("flatInputSizes");
-  // batch sizes to test
+  // batch size to test
   desc.add<int>("batchSize");
   // the number of calls to the graph to measure the runtime
   desc.add<int>("nCalls");
@@ -133,9 +132,10 @@ ONNXInference::ONNXInference(const edm::ParameterSet& iConfig, const ONNXRuntime
     inputType_ = mlprof::InputType::Random;
   } else if (inputTypeStr_ == "zeros") {
     inputType_ = mlprof::InputType::Zeros;
+  } else if (inputTypeStr_ == "ones") {
+    inputType_ = mlprof::InputType::Ones;
   } else {
-    throw cms::Exception("InvalidInputType")
-        << "input type must be either 'incremental', 'zeros' or 'random', got " << inputTypeStr_;
+    throw cms::Exception("InvalidInputType") << "input type unknown: " << inputTypeStr_;
   }
 
   // initialize the input_shapes array with inputRanks_ and flatInputSizes_
@@ -147,10 +147,12 @@ ONNXInference::ONNXInference(const edm::ParameterSet& iConfig, const ONNXRuntime
     i += rank;
   }
   // initialize the input data arrays
-  // note there is only one element in the FloatArrays type (i.e.
-  // vector<vector<float>>) variable
+  // note there is only one element in the FloatArrays type (i.e. vector<vector<float>>) variable
   for (int i = 0; i < nInputs_; i++) {
-    inputArrays_.emplace_back(batchSize_ * flatInputSizes_[i], 0);
+    // multiply the size of all dimensions in an input
+    int full_size_input = std::accumulate(begin(input_shapes_[i]), end(input_shapes_[i]), 1, std::multiplies<int>());
+    // initialize inputArrays_ with 0s at first
+    inputArrays_.emplace_back(full_size_input, 0);
   }
 }
 
@@ -167,7 +169,7 @@ void ONNXInference::analyze(const edm::Event& iEvent, const edm::EventSetup& iSe
     for (int i = 0; i < (int)group_data.size(); i++) {
       group_data[i] = inputType_ == mlprof::InputType::Incremental
                           ? float(i)
-                          : (inputType_ == mlprof::InputType::Zeros ? float(0) : drawNormal());
+                          : float(inputType_ == mlprof::InputType::Zeros ? 0 : drawNormal());
     }
   }
 
@@ -183,7 +185,10 @@ void ONNXInference::analyze(const edm::Event& iEvent, const edm::EventSetup& iSe
   std::vector<float> runtimes;
   for (int r = 0; r < nCalls_; r++) {
     auto start = std::chrono::high_resolution_clock::now();
+
+    // inference
     outputs = globalCache()->run(inputTensorNames_, inputArrays_, input_shapes_, outputTensorNames_, batchSize_);
+
     auto end = std::chrono::high_resolution_clock::now();
     std::chrono::duration<float> runtime_in_seconds = (end - start);
     runtimes.push_back(runtime_in_seconds.count() * 1000);

diff --git a/cmssw/MLProf/RuntimeMeasurement/plugins/TFInference.cc b/cmssw/MLProf/RuntimeMeasurement/plugins/TFInference.cc
@@ -29,9 +29,9 @@ class TFInference : public edm::stream::EDAnalyzer<edm::GlobalCache<tensorflow::
   static void globalEndJob(const tensorflow::SessionCache*);
 
 private:
-  void beginJob();
+  void beginJob(){};
   void analyze(const edm::Event&, const edm::EventSetup&);
-  void endJob();
+  void endJob(){};
 
   inline float drawNormal() { return normalPdf_(rndGen_); }
   tensorflow::Tensor createInputTensor(int rank, std::vector<int> shape);
@@ -83,7 +83,7 @@ void TFInference::fillDescriptions(edm::ConfigurationDescriptions& descriptions)
   // flat list of sizes of each dimension of each input tensor
   // (for a graph with a 1D and a 2D input tensor, this would be a vector of three values)
   desc.add<std::vector<int>>("flatInputSizes");
-  // batch sizes to test
+  // batch size to test
   desc.add<int>("batchSize");
   // the number of calls to the graph to measure the runtime
   desc.add<int>("nCalls");
@@ -137,16 +137,13 @@ TFInference::TFInference(const edm::ParameterSet& config, const tensorflow::Sess
     inputType_ = mlprof::InputType::Random;
   } else if (inputTypeStr_ == "zeros") {
     inputType_ = mlprof::InputType::Zeros;
+  } else if (inputTypeStr_ == "ones") {
+    inputType_ = mlprof::InputType::Ones;
   } else {
-    throw cms::Exception("InvalidInputType")
-        << "input type must be either 'incremental', 'zeros' or 'random', got " << inputTypeStr_;
+    throw cms::Exception("InvalidInputType") << "input type unknown: " << inputTypeStr_;
   }
 }
 
-void TFInference::beginJob() {}
-
-void TFInference::endJob() {}
-
 tensorflow::Tensor TFInference::createInputTensor(int rank, std::vector<int> shape) {
   // convert the shape to a tf shape
   tensorflow::TensorShape tShape;
@@ -162,7 +159,7 @@ tensorflow::Tensor TFInference::createInputTensor(int rank, std::vector<int> sha
   for (int i = 0; i < tensor.NumElements(); i++, data++) {
     *data = inputType_ == mlprof::InputType::Incremental
                 ? float(i)
-                : (inputType_ == mlprof::InputType::Zeros ? float(0) : drawNormal());
+                : float(inputType_ == mlprof::InputType::Zeros ? 0 : drawNormal());
   }
 
   return tensor;
@@ -194,7 +191,10 @@ void TFInference::analyze(const edm::Event& event, const edm::EventSetup& setup)
   std::vector<float> runtimes;
   for (int r = 0; r < nCalls_; r++) {
     auto start = std::chrono::high_resolution_clock::now();
+
+    // inference
     tensorflow::run(session_, inputs, outputTensorNames_, &outputs);
+
     auto end = std::chrono::high_resolution_clock::now();
     std::chrono::duration<float> runtime_in_seconds = (end - start);
     runtimes.push_back(runtime_in_seconds.count() * 1000);

diff --git a/cmssw/MLProf/RuntimeMeasurement/plugins/aot/BuildFile.xml b/cmssw/MLProf/RuntimeMeasurement/plugins/aot/BuildFile.xml
@@ -0,0 +1,12 @@
+<library name="MLProfRuntimeMeasurementTFAOTInference" file="TFAOTInference.cc">
+  <use name="FWCore/Framework"/>
+  <use name="FWCore/PluginManager"/>
+  <use name="FWCore/ParameterSet"/>
+
+  <use name="PhysicsTools/TensorFlowAOT"/>
+  <use name="MLProf/Utils"/>
+
+  <use name="tfaot-model-mlprof-test"/>
+
+  <flags EDM_PLUGIN="1"/>
+</library>