linting, polishing

cms-ml · Oct 15, 2023 · bf230b9 · bf230b9
1 parent 643b026
commit bf230b9
Show file tree

Hide file tree

Showing 4 changed files with 85 additions and 73 deletions.
diff --git a/README.md b/README.md
@@ -45,7 +45,7 @@ flowchart TD
     B --> |merge the results for different batch sizes| C[MergeRuntimes]
     C --> D[PlotRuntimes]
     C --> E[PlotRuntimesMultipleCMSSW]
-    C --> F[PlotRuntimesSeveralNetworks]
+    C --> F[PlotRuntimesMultipleNetworks]
 ```
 
 A task is run with the command ```law run``` followed by the name of the task.
@@ -109,7 +109,7 @@ flowchart TD
     B --> |merge the results for different batch sizes| C[MergeRuntimes]
     C --> D[PlotRuntimes]
     C --> E[PlotRuntimesMultipleCMSSW]
-    C --> F[PlotRuntimesSeveralNetworks]
+    C --> F[PlotRuntimesMultipleNetworks]
 ```
 
 It is composed of four major types of tasks:
@@ -123,7 +123,7 @@ single batch size and outputs a .csv file with the results of the timing measure
 3. [MergeRuntimes](#mergeruntimes): This task merges the .csv output files with the required multiple batch sizes
 from the [MeasureRuntime](#measureruntime) tasks to obtain a single .csv file containing the informations to plot.
 
-4. [PlotRuntimes](#plotruntimes), [PlotRuntimesSeveralNetworks](#plotruntimesseveralnetworks),
+4. [PlotRuntimes](#plotruntimes), [PlotRuntimesMultipleNetworks](#plotruntimesseveralnetworks),
 [PlotRuntimesMultipleCMSSW](#plotruntimesmultiplecmssw): These tasks create the plots with the values stored in the
 .csv file from [MergeRuntimes](#mergeruntimes).
 
@@ -159,7 +159,7 @@ The format of the file to give to MLProf is the following:
         },
         ...
     ],
-    "network_name": "{name_of_the_network_for_the_legend_of_the_plots_and_the_name_of_the_output_pdf_of_PlotRuntimesSeveralNetworks}"
+    "network_name": "{name_of_the_network_for_the_legend_of_the_plots_and_the_name_of_the_output_pdf_of_PlotRuntimesMultipleNetworks}"
 }
 ```
 There are already a few examples of these configswith working paths for the networks in the "examples" folder.
@@ -343,7 +343,7 @@ plotted data point is given by ```events * repetitions```.
 - scram-arch: str. The SCRAM architecture used for the inference. default: ```slc7_amd64_gcc10```
 
 ## Output:
-- ```runtime_plot_different_batchsizes_{batch_size_1}_{batch_size_2}_{...}.pdf```: The plot of the runtime measurement
+- ```runtime_plot_different_batch_sizes_{batch_size_1}_{batch_size_2}_{...}.pdf```: The plot of the runtime measurement
 against the different batch sizes given.
 
 ## Example:
@@ -359,7 +359,7 @@ law run PlotRuntimes --version test_simple_dnn \
                      --bs-normalized True
 ```
 
-# PlotRuntimesSeveralNetworks
+# PlotRuntimesMultipleNetworks
 
 This task plots the results of the runtime measurement against the given batch sizes for several models.
 The model-files argument is required and replaces the module-file argument. The points are
@@ -401,19 +401,19 @@ plotted data point is given by ```events * repetitions```.
 - scram-arch: str. The SCRAM architecture used for the inference. default: ```slc7_amd64_gcc10```
 
 ## Output:
-- ```runtime_plot_networks_{network_name_1}_{network_name_2}_{...}_different_batchsizes_{batch_size_1}_{batch_size_2}_{...}.pdf```: The plot of the runtime measurement
+- ```runtime_plot_networks_{network_name_1}_{network_name_2}_{...}_different_batch_sizes_{batch_size_1}_{batch_size_2}_{...}.pdf```: The plot of the runtime measurement
 against the different batch sizes given.
 
 ## Example:
 
 ```shell
-law run PlotRuntimesSeveralNetworks --version test_several_networks \
-                                    --model-files $MLP_BASE/examples/model1/model.json,$MLP_BASE/examples/cnn/model_cnn.json\
-                                    --repetitions 500 \
-                                    --cmssw-version CMSSW_12_2_4 \
-                                    --batch-sizes 1,2,4,8,16,32,64,128,256,512,1024 \
-                                    --log-y False \
-                                    --bs-normalized True
+law run PlotRuntimesMultipleNetworks --version test_several_networks \
+                                     --model-files $MLP_BASE/examples/model1/model.json,$MLP_BASE/examples/cnn/model_cnn.json\
+                                     --repetitions 500 \
+                                     --cmssw-version CMSSW_12_2_4 \
+                                     --batch-sizes 1,2,4,8,16,32,64,128,256,512,1024 \
+                                     --log-y False \
+                                     --bs-normalized True
 ```
 
 # PlotRuntimesMultipleCMSSW
@@ -460,7 +460,7 @@ plotted data point is given by ```events * repetitions```.
 - scram-arch: str. The SCRAM architecture used for the inference. default: ```slc7_amd64_gcc10```
 
 ## Output:
-- ```runtime_plot__multiple_cmssw_{cmssw_version_1}_{cmssw_version_2}_{...}_different_batchsizes_{batch_size_1}_{batch_size_2}_{...}.pdf```: The plot of the runtime measurement
+- ```runtime_plot__multiple_cmssw_{cmssw_version_1}_{cmssw_version_2}_{...}_different_batch_sizes_{batch_size_1}_{batch_size_2}_{...}.pdf```: The plot of the runtime measurement
 against the different batch sizes given.
 
 ## Example:

diff --git a/mlprof/plotting/plotter.py b/mlprof/plotting/plotter.py
@@ -1,67 +1,67 @@
 # coding: utf-8
 
-colors = {"mpl_standard": ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b',
-                           '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'],
-          "custom_edgecolor": ['#CC4F1B', '#1B2ACC', '#3F7F4C'],
-          "custom_facecolor": ['#FF9848', '#089FFF', '#7EFF99'],
+colors = {"mpl_standard": ["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd", "#8c564b",
+                           "#e377c2", "#7f7f7f", "#bcbd22", "#17becf"],
+          "custom_edgecolor": ["#CC4F1B", "#1B2ACC", "#3F7F4C"],
+          "custom_facecolor": ["#FF9848", "#089FFF", "#7EFF99"],
           }
 
+
 def open_csv_file(path, columns):
     import pandas as pd
     pd_dataset = pd.read_csv(path, delimiter=",", names=columns)
 
     # or with chunking?
-    '''
+    """
     tp = pd.read_csv(path, delimiter=",", names=columns, iterator=True, chunksize=2000)
     df = pd.concat(tp, ignore_index=True)
-    '''
+    """
     return pd_dataset
 
 
-def calculate_medians_and_errors_per_batch_size(different_batchsizes, path):
-    '''
-    Calculate and plot the medians and errors of the runtime per batchsize
+def calculate_medians_and_errors_per_batch_size(different_batch_sizes, path):
+    """
+    Calculate and plot the medians and errors of the runtime per batch size
 
     Args:
-    different_batchsizes: list(int). The list of different batch sizes to be plotted
+    different_batch_sizes: list(int). The list of different batch sizes to be plotted
     path: str. The path to the csv file containing the results of the measurement.
-    '''
-    import pandas as pd
+    """
     import numpy as np
 
     # open the csv file
     pd_dataset = open_csv_file(path, ["batch_size", "runtimes"])
 
     # create the arrays to be plotted with median values and up and down errors
-    medians = np.empty(len(different_batchsizes))
-    err_down = np.empty(len(different_batchsizes))
-    err_up = np.empty(len(different_batchsizes))
-
-    for i, batchsize in enumerate(different_batchsizes):
-        runtimes_per_batchsize = pd_dataset.loc[pd_dataset["batch_size"] == batchsize, "runtimes"]
-        # mean = np.mean(runtimes_per_batchsize)
-        median = np.percentile(runtimes_per_batchsize, 50)
+    medians = np.empty(len(different_batch_sizes))
+    err_down = np.empty(len(different_batch_sizes))
+    err_up = np.empty(len(different_batch_sizes))
+
+    for i, batch_size in enumerate(different_batch_sizes):
+        runtimes_per_batch_size = pd_dataset.loc[pd_dataset["batch_size"] == batch_size, "runtimes"]
+        # mean = np.mean(runtimes_per_batch_size)
+        median = np.percentile(runtimes_per_batch_size, 50)
         medians[i] = median
-        err_down[i] = abs(np.percentile(runtimes_per_batchsize, 16) - median)
-        err_up[i] = abs(np.percentile(runtimes_per_batchsize, 84) - median)
+        err_down[i] = abs(np.percentile(runtimes_per_batch_size, 16) - median)
+        err_up[i] = abs(np.percentile(runtimes_per_batch_size, 84) - median)
     return medians, err_down, err_up
 
 
 def apply_individual_customizations(customization_dict, fig, ax):
-    '''
+    """
     Apply the remaining customization parameters from the command line
 
     Args:
     customization_dict: dict. The dictionary containing the customization parameters
     fig, ax: the matplotlib object to handle figure and axis.
-    '''
+    """
     import matplotlib.pyplot as plt
     if customization_dict["log_y"]:
         plt.yscale("log")
 
 
 def fill_plot(x, y, yerr_d, yerr_u, filling, color):
-    '''
+    """
     Fill the plots with the measured values and their errors
 
     Args:
@@ -71,11 +71,11 @@ def fill_plot(x, y, yerr_d, yerr_u, filling, color):
     yerr_u: array(float). error up on the y-axis
     filling: bool. customizatioon parameter to decide if the errors will be represented as error bars or bands
     color: the colors to use for the plotted values
-    '''
+    """
     import matplotlib.pyplot as plt
     import numpy as np
     if filling:
-        p1 = plt.plot(x, y, '-', color=color)
+        p1 = plt.plot(x, y, "-", color=color)
         plt.fill_between(x, y - yerr_d, y + yerr_u, alpha=0.5, facecolor=color)
         p2 = plt.fill(np.NaN, np.NaN, alpha=0.5, color=color)
         legend = (p1[0], p2[0])
@@ -88,29 +88,29 @@ def fill_plot(x, y, yerr_d, yerr_u, filling, color):
     return legend
 
 
-def plot_batchsize_several_measurements(different_batchsizes, input_paths, output_path, measurements,
+def plot_batch_size_several_measurements(different_batch_sizes, input_paths, output_path, measurements,
                                         customization_dict):
-    '''
+    """
     General plotting function for runtime plots
 
     Args:
-    different_batchsizes: list(int). The batch sizes to be used for the x-axis of the plot.
+    different_batch_sizes: list(int). The batch sizes to be used for the x-axis of the plot.
     input_paths: list(str). The paths of the csv files containing the measurement results.
     output_path: str. The path to be used for saving the plot.
     measurements: list(str). The labels of the plot.
     customization_dict: dict. The dictionary containing the customization parameters.
-    '''
+    """
     import matplotlib.pyplot as plt
     import mplhep as hep
 
     # get the values to be plotted
     plotting_values = {}
     for i, input_path in enumerate(input_paths):
-        medians, err_down, err_up = calculate_medians_and_errors_per_batch_size(different_batchsizes, input_path)
+        medians, err_down, err_up = calculate_medians_and_errors_per_batch_size(different_batch_sizes, input_path)
         if customization_dict["bs_normalized"]:
-            medians = medians / different_batchsizes
-            err_down = err_down / different_batchsizes
-            err_up = err_up / different_batchsizes
+            medians = medians / different_batch_sizes
+            err_down = err_down / different_batch_sizes
+            err_up = err_up / different_batch_sizes
         plotting_values[measurements[i]] = {"medians": medians, "err_down": err_down, "err_up": err_up}
 
     # set style and add CMS logo
@@ -120,8 +120,8 @@ def plot_batchsize_several_measurements(different_batchsizes, input_paths, outpu
     fig, ax = plt.subplots(1, 1)
     to_legend = []
     for i, input_path in enumerate(input_paths):
-        color = next(ax._get_lines.prop_cycler)['color']
-        legend = fill_plot(different_batchsizes, plotting_values[measurements[i]]["medians"],
+        color = next(ax._get_lines.prop_cycler)["color"]
+        legend = fill_plot(different_batch_sizes, plotting_values[measurements[i]]["medians"],
                   plotting_values[measurements[i]]["err_down"],
                   plotting_values[measurements[i]]["err_up"], customization_dict["filling"],
                   color)  # colors["mpl_standard"][i])
@@ -133,15 +133,18 @@ def plot_batchsize_several_measurements(different_batchsizes, input_paths, outpu
     plt.xscale("log")
     apply_individual_customizations(customization_dict, fig, ax)
     plt.xlabel("Batch size")
-    plt.ylabel("Runtime / batch size [ms]")
+    if customization_dict["bs_normalized"]:
+        plt.ylabel("Runtime / batch size [ms]")
+    else:
+        plt.ylabel("runtime [ms]")
     plt.ylim(bottom=0)
-    ax.xaxis.set_major_locator(plt.MaxNLocator(len(different_batchsizes)))
+    ax.xaxis.set_major_locator(plt.MaxNLocator(len(different_batch_sizes)))
     ax.xaxis.set_minor_locator(plt.NullLocator())
-    plt.xticks(different_batchsizes, different_batchsizes)
+    plt.xticks(different_batch_sizes, different_batch_sizes)
 
     # choose text to add on the top left of the figure
     hep.cms.text(text="MLProf", loc=0)  # hep.cms.text(text="Simulation, Network test", loc=0)
 
-    #save plot
-    fig.savefig(output_path, bbox_inches='tight')
+    # save plot
+    fig.savefig(output_path, bbox_inches="tight")
     plt.close()
diff --git a/mlprof/tasks/parameters.py b/mlprof/tasks/parameters.py
@@ -122,7 +122,8 @@ class BatchSizesParameters(BaseTask):
     def batch_sizes_repr(self):
         return "_".join(map(str, self.batch_sizes))
 
-class PlotCustomParameters(BaseTask):
+
+class CustomPlotParameters(BaseTask):
     """
     Parameters for customization of plotting
     """
@@ -142,4 +143,4 @@ class PlotCustomParameters(BaseTask):
 
     @property
     def custom_plot_params(self):
-        return {"log_y":self.log_y, "bs_normalized":self.bs_normalized, "filling":self.filling}
+        return {"log_y": self.log_y, "bs_normalized": self.bs_normalized, "filling": self.filling}
diff --git a/mlprof/tasks/runtime.py b/mlprof/tasks/runtime.py
@@ -8,9 +8,9 @@
 import law
 
 from mlprof.tasks.base import CommandTask, PlotTask, view_output_plots
-from mlprof.tasks.parameters import RuntimeParameters, CMSSWParameters, BatchSizesParameters, PlotCustomParameters
+from mlprof.tasks.parameters import RuntimeParameters, CMSSWParameters, BatchSizesParameters, CustomPlotParameters
 from mlprof.tasks.sandboxes import CMSSWSandboxTask
-from mlprof.plotting.plotter import plot_batchsize_several_measurements
+from mlprof.plotting.plotter import plot_batch_size_several_measurements
 
 
 class CreateRuntimeConfig(RuntimeParameters, CMSSWParameters):
@@ -139,7 +139,7 @@ def run(self):
         )
 
 
-class PlotRuntimes(RuntimeParameters, CMSSWParameters, BatchSizesParameters, PlotTask, PlotCustomParameters):
+class PlotRuntimes(RuntimeParameters, CMSSWParameters, BatchSizesParameters, PlotTask, CustomPlotParameters):
     """
     Task to plot the results from the runtime measurements depending on the batch sizes given as parameters,
     default are 1, 2 and 4.
@@ -151,7 +151,7 @@ def requires(self):
         return MergeRuntimes.req(self)
 
     def output(self):
-        return self.local_target(f"runtime_plot_different_batchsizes_{self.batch_sizes_repr}.pdf")
+        return self.local_target(f"runtime_plot_different_batch_sizes_{self.batch_sizes_repr}.pdf")
 
     @view_output_plots
     def run(self):
@@ -164,11 +164,16 @@ def run(self):
         network_name = model_data["network_name"]
 
         # create the plot
-        plot_batchsize_several_measurements(self.batch_sizes, [self.input().path], output.path, [network_name], self.custom_plot_params)
+        plot_batch_size_several_measurements(self.batch_sizes, [self.input().path], output.path, [network_name],
+                                            self.custom_plot_params)
         print("plot saved")
 
 
-class PlotRuntimesSeveralNetworks(RuntimeParameters, CMSSWParameters, BatchSizesParameters, PlotTask, PlotCustomParameters):
+class PlotRuntimesMultipleNetworks(RuntimeParameters,
+                                  CMSSWParameters,
+                                  BatchSizesParameters,
+                                  PlotTask,
+                                  CustomPlotParameters):
     """
     Task to plot the results from the runtime measurements for several networks, depending on the batch sizes
     given as parameters, default are 1, 2 and 4.
@@ -177,7 +182,7 @@ class PlotRuntimesSeveralNetworks(RuntimeParameters, CMSSWParameters, BatchSizes
     sandbox = "bash::$MLP_BASE/sandboxes/plotting.sh"
 
     model_files = law.CSVParameter(
-        description="comma-separated list of json files containing information of models to be tested"
+        description="comma-separated list of json files containing information of models to be tested",
     )
 
     def requires(self):
@@ -189,7 +194,7 @@ def output(self):
             model_data = law.LocalFileTarget(model_file).load(formatter="json")
             network_names += [model_data["network_name"]]
         network_names_repr = "_".join(network_names)
-        return self.local_target(f"runtime_plot_networks_{network_names_repr}_different_batchsizes_{self.batch_sizes_repr}.pdf")
+        return self.local_target(f"runtime_plot_networks_{network_names_repr}_different_batch_sizes_{self.batch_sizes_repr}.pdf")  # noqa
 
     @view_output_plots
     def run(self):
@@ -205,12 +210,16 @@ def run(self):
             network_names += [model_data["network_name"]]
         for input_task in self.input():
             input_paths += [input_task.path]
-        plot_batchsize_several_measurements(self.batch_sizes, input_paths,
+        plot_batch_size_several_measurements(self.batch_sizes, input_paths,
                                         output.path, network_names, self.custom_plot_params)
         print("plot saved")
 
 
-class PlotRuntimesMultipleCMSSW(RuntimeParameters, CMSSWParameters, BatchSizesParameters, PlotTask, PlotCustomParameters):
+class PlotRuntimesMultipleCMSSW(RuntimeParameters,
+                                CMSSWParameters,
+                                BatchSizesParameters,
+                                PlotTask,
+                                CustomPlotParameters):
     """
     Task to plot the results from the runtime measurements for inferences performed in multiple cmssw versions,
     depending on the batch sizes given as parameters, default are 1, 2 and 4.
@@ -220,7 +229,7 @@ class PlotRuntimesMultipleCMSSW(RuntimeParameters, CMSSWParameters, BatchSizesPa
 
     cmssw_versions = law.CSVParameter(
         cls=luigi.Parameter,
-        default=("CMSSW_12_2_4","CMSSW_12_2_2"),
+        default=("CMSSW_12_2_4", "CMSSW_12_2_2"),
         description="comma-separated list of CMSSW versions; default: ('CMSSW_12_2_4','CMSSW_12_2_2')",
         brace_expand=True,
     )
@@ -230,7 +239,7 @@ def requires(self):
 
     def output(self):
         cmssw_versions_repr = "_".join(self.cmssw_versions)
-        return self.local_target(f"runtime_plot__multiple_cmssw_{cmssw_versions_repr}_different_batchsizes_{self.batch_sizes_repr}.pdf")
+        return self.local_target(f"runtime_plot_multiple_cmssw_{cmssw_versions_repr}_different_batch_sizes_{self.batch_sizes_repr}.pdf")  # noqa
 
     @view_output_plots
     def run(self):
@@ -242,7 +251,6 @@ def run(self):
         input_paths = []
         for input_task in self.input():
             input_paths += [input_task.path]
-        from IPython import embed; embed()
-        plot_batchsize_several_measurements(self.batch_sizes, input_paths,
+        plot_batch_size_several_measurements(self.batch_sizes, input_paths,
                                         output.path, self.cmssw_versions, self.custom_plot_params)
         print("plot saved")