small code corrections, including label plot with single network, do …

…plots with correct unit, add zeros input type, add top right custom label
cms-ml · Jan 15, 2024 · 8bf0846 · 8bf0846
1 parent baced1a
commit 8bf0846
Show file tree

Hide file tree

Showing 5 changed files with 49 additions and 22 deletions.
diff --git a/cmssw/MLProf/RuntimeMeasurement/plugins/TFRuntime.cpp b/cmssw/MLProf/RuntimeMeasurement/plugins/TFRuntime.cpp
@@ -115,9 +115,6 @@ TFRuntime::TFRuntime(const edm::ParameterSet& config, const tensorflow::SessionC
     if (rank < 1) {
       throw cms::Exception("InvalidRank") << "only ranks above 0 are supported, got " << rank;
     }
-    if (rank > 3) {
-      throw cms::Exception("InvalidRank") << "only ranks up to 3 are supported, got " << rank;
-    }
   }
   // the sum of ranks must match the number of flat input sizes
   if (std::accumulate(inputRanks_.begin(), inputRanks_.end(), 0) != (int)flatInputSizes_.size()) {
@@ -141,9 +138,11 @@ TFRuntime::TFRuntime(const edm::ParameterSet& config, const tensorflow::SessionC
     inputType_ = mlprof::InputType::Incremental;
   } else if (inputTypeStr_ == "random") {
     inputType_ = mlprof::InputType::Random;
+  } else if (inputTypeStr_ == "zeros") {
+    inputType_ = mlprof::InputType::Zeros;
   } else {
     throw cms::Exception("InvalidInputType")
-        << "input type must be either 'incremental' or 'random', got " << inputTypeStr_;
+        << "input type must be either 'incremental', 'zeros' or 'random', got " << inputTypeStr_;
   }
 }
 
@@ -164,7 +163,9 @@ tensorflow::Tensor TFRuntime::createInputTensor(int rank, std::vector<int> shape
   // fill it
   float* data = tensor.flat<float>().data();
   for (int i = 0; i < tensor.NumElements(); i++, data++) {
-    *data = inputType_ == mlprof::InputType::Incremental ? float(i) : drawNormal();
+    *data = inputType_ == mlprof::InputType::Incremental ? float(i) :
+    inputType_ == mlprof::InputType::Zeros ? float(0) :
+    drawNormal();
   }
 
   return tensor;
@@ -199,7 +200,8 @@ void TFRuntime::analyze(const edm::Event& event, const edm::EventSetup& setup) {
       auto start = std::chrono::high_resolution_clock::now();
       tensorflow::run(session_, inputs, outputTensorNames_, &outputs);
       auto end = std::chrono::high_resolution_clock::now();
-      runtimes.push_back((end - start).count() * 1000);
+      std::chrono::duration<float> runtime_in_seconds = (end - start);
+      runtimes.push_back(runtime_in_seconds.count() * 1000);
     }
 
     // save them

diff --git a/cmssw/MLProf/Utils/interface/utils.h b/cmssw/MLProf/Utils/interface/utils.h
@@ -13,6 +13,7 @@ namespace mlprof {
 enum InputType {
   Incremental,
   Random,
+  Zeros,
 };
 
 void writeRuntimes(const std::string& path, float batchSize, std::vector<float> runtimes) {

diff --git a/mlprof/plotting/plotter.py b/mlprof/plotting/plotter.py
@@ -103,8 +103,12 @@ def plot_batch_size_several_measurements(different_batch_sizes, input_paths, out
     import matplotlib.pyplot as plt
     import mplhep as hep
 
-    measurements_paths_strs = ["_".join(measurement) for measurement in measurements]
-    measurements_labels_strs = [", ".join(measurement) for measurement in measurements]
+    if type(measurements[0]) == str:
+        measurements_paths_strs = measurements
+        measurements_labels_strs = measurements
+    else:
+        measurements_paths_strs = ["_".join(measurement) for measurement in measurements]
+        measurements_labels_strs = [", ".join(measurement) for measurement in measurements]
     # get the values to be plotted
     plotting_values = {}
     for i, input_path in enumerate(input_paths):
@@ -146,7 +150,7 @@ def plot_batch_size_several_measurements(different_batch_sizes, input_paths, out
 
     # choose text to add on the top left of the figure
     hep.cms.text(text="MLProf", loc=0)  # hep.cms.text(text="Simulation, Network test", loc=0)
-
+    hep.cms.lumitext(text=customization_dict["top_right_label"])
     # save plot
     fig.savefig(output_path, bbox_inches="tight")
     plt.close()
diff --git a/mlprof/tasks/parameters.py b/mlprof/tasks/parameters.py
@@ -52,7 +52,7 @@ class RuntimeParameters(BaseTask):
     )
     input_type = luigi.Parameter(
         default="random",
-        description="either 'random', 'incremental', or a path to a root file; default: random",
+        description="either 'random', 'incremental', 'zeros', or a path to a root file; default: random",
     )
     n_events = luigi.IntParameter(
         default=1,
@@ -68,11 +68,11 @@ def __init__(self, *args, **kwargs):
 
         # verify the input type
         self.input_file = None
-        if self.input_type not in ("random", "incremental"):
+        if self.input_type not in ("random", "incremental", "zeros"):
             self.input_file = os.path.abspath(os.path.expandvars(os.path.expanduser(self.input_type)))
             if not os.path.exists(self.input_file):
                 raise ValueError(
-                    f"input type '{self.input_type}' is neither 'random' nor 'incremental' nor a path to an existing "
+                    f"input type '{self.input_type}' is neither 'random' nor 'incremental' nor 'zeros' nor a path to an existing "
                     f"root file",
                 )
 
@@ -144,7 +144,13 @@ class CustomPlotParameters(BaseTask):
         default=True,
         description="plot the errors as error bands instead of error bars; default: True",
     )
+    top_right_label = luigi.Parameter(
+        default="",
+        description="stick a label over the top right corner of the plot",
+    )
 
     @property
     def custom_plot_params(self):
-        return {"log_y": self.log_y, "bs_normalized": self.bs_normalized, "filling": self.filling}
+        return {"log_y": self.log_y, "bs_normalized": self.bs_normalized, "filling": self.filling,
+                "top_right_label": self.top_right_label,
+                }
diff --git a/mlprof/tasks/runtime.py b/mlprof/tasks/runtime.py
@@ -5,6 +5,7 @@
 """
 
 import os
+import itertools
 
 import luigi
 import law
@@ -275,11 +276,13 @@ def run(self):
         )
 
 
-class PlotRuntimesMultipleParams(RuntimeParameters,
-                                 CMSSWParameters,
-                                 BatchSizesParameters,
-                                 PlotTask,
-                                 CustomPlotParameters):
+class PlotRuntimesMultipleParams(
+    RuntimeParameters,
+    CMSSWParameters,
+    BatchSizesParameters,
+    PlotTask,
+    CustomPlotParameters,
+):
     """
     Task to plot the results from the runtime measurements for several parameters, e.g. networks
     or cmssw versions, depending on the batch sizes
@@ -290,28 +293,31 @@ class PlotRuntimesMultipleParams(RuntimeParameters,
 
     model_files = law.CSVParameter(
         description="comma-separated list of json files containing information of models to be tested",
+        default=None,
     )
 
     cmssw_versions = law.CSVParameter(
         cls=luigi.Parameter,
-        default=("CMSSW_12_2_4", "CMSSW_12_2_2"),
+        default=None,
         description="comma-separated list of CMSSW versions; default: ('CMSSW_12_2_4','CMSSW_12_2_2')",
         brace_expand=True,
     )
 
+    # create params_to_write if model_files or cmssw_versions is None? -> gets difficult with itertools product if only one param is changed
+
     def requires(self):
-        import itertools
+        self.fill_undefined_param_values()
         all_params = list(itertools.product(self.model_files, self.cmssw_versions))
         return [MergeRuntimes.req(self, model_file=params[0], cmssw_version=params[1]) for params in all_params]
 
     def output(self):
+        self.fill_undefined_param_values()
         all_params = self.factorize_params()
         all_params_list = ["_".join(all_params_item) for all_params_item in all_params]
         all_params_repr = "_".join(all_params_list)
         return self.local_target(f"runtime_plot_params_{all_params_repr}_different_batch_sizes_{self.batch_sizes_repr}.pdf")  # noqa
 
     def factorize_params(self):
-        import itertools
         # get additional parameters plotting
         network_names = []
         for model_file in self.model_files:
@@ -322,13 +328,21 @@ def factorize_params(self):
         all_params = list(itertools.product(network_names, self.cmssw_versions))
         return all_params
 
+    def fill_undefined_param_values(self):
+        if self.model_files is None:
+            self.model_files = tuple(self.model_file)
+
+        if self.cmssw_versions is None:
+            self.cmssw_versions = tuple(self.cmssw_version)
+
     @view_output_plots
     def run(self):
-
         # prepare the output directory
         output = self.output()
         output.parent.touch()
 
+        self.fill_undefined_param_values()
+
         input_paths = [inp.path for inp in self.input()]
         print(input_paths)
         all_params = self.factorize_params()