Allow passing numpy array to TH1::Fill()

* Allow passing numpy array to TH1::FillN * Adapt tutorials to make use of FillN(np.array()) instead of Fill() inside for loop * prefer assertAlmostEqual in unittest instead of bare assert * Pythonize Fill() rather than FillN() * bugfix for tStudent tutorial (make weights an array)
mdessole · Nov 27, 2024 · ad3272b · ad3272b
1 parent 1c5ac40
commit ad3272b
Show file tree

Hide file tree

Showing 9 changed files with 111 additions and 21 deletions.
diff --git a/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_th1.py b/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_th1.py
@@ -130,6 +130,38 @@ def _imul(self, c):
     self.Scale(c)
     return self
 
+# Fill with numpy array
+
+def _FillWithNumpyArray(self, *args):
+    """
+    Fill histogram with numpy array.
+    Parameters:
+    - self: histogram
+    - args: arguments to FillN
+            If the first argument is numpy.ndarray:
+            - fills the histogram with this array
+            - optional second argument is weights array,
+              if not provided, weights of 1 are used
+            Otherwise:
+            - Arguments are passed directly to the original FillN method
+    Returns:
+    - Result of FillN if numpy case is detected, otherwise result of Fill
+    Raises:
+    - ValueError: If weights length doesn't match data length
+    """
+    import numpy as np
+
+    if args and isinstance(args[0], np.ndarray):
+        data = args[0]
+        weights = np.ones(len(data)) if len(args) < 2 or args[1] is None else args[1]
+        if len(weights) != len(data):
+            raise ValueError(
+                f"Length mismatch: data length ({len(data)}) != weights length ({len(weights)})"
+            )
+        return self.FillN(len(data), data, weights)
+    else:
+        return self._Fill(*args)
+
 
 @pythonization('TH1')
 def pythonize_th1(klass):
@@ -138,3 +170,7 @@ def pythonize_th1(klass):
 
     # Support hist *= scalar
     klass.__imul__ = _imul
+
+    # Support hist.Fill(numpy_array) and hist.Fill(numpy_array, numpy_array)
+    klass._Fill = klass.Fill
+    klass.Fill = _FillWithNumpyArray
diff --git a/bindings/pyroot/pythonizations/test/CMakeLists.txt b/bindings/pyroot/pythonizations/test/CMakeLists.txt
@@ -51,6 +51,7 @@ ROOT_ADD_PYUNITTEST(pyroot_pyz_ttree_branch ttree_branch.py PYTHON_DEPS numpy)
 
 # TH1 and subclasses pythonizations
 ROOT_ADD_PYUNITTEST(pyroot_pyz_th1_operators th1_operators.py)
+ROOT_ADD_PYUNITTEST(pyroot_pyz_th1_fillN th1_fillN.py PYTHON_DEPS numpy)
 ROOT_ADD_PYUNITTEST(pyroot_pyz_th2 th2.py)
 
 # TGraph, TGraph2D and error subclasses pythonizations

diff --git a/bindings/pyroot/pythonizations/test/th1_fillN.py b/bindings/pyroot/pythonizations/test/th1_fillN.py
@@ -0,0 +1,41 @@
+import unittest
+
+import ROOT
+
+
+class FillWithNumpyArray(unittest.TestCase):
+    """
+    Test for the FillN method of TH1 and subclasses, which fills
+    the histogram with a numpy array.
+    """
+
+    # Tests
+    def test_fill(self):
+        import numpy as np
+        # Create sample data
+        data = np.array([1., 2, 2, 3, 3, 3, 4, 4, 5])
+        # Create histograms
+        nbins = 5
+        min_val = 0
+        max_val = 10
+        verbose_hist = ROOT.TH1F("verbose_hist", "verbose_hist", nbins, min_val, max_val)
+        simple_hist = ROOT.TH1F("simple_hist", "simple_hist", nbins, min_val, max_val)
+        # Fill histograms
+        verbose_hist.FillN(len(data), data, np.ones(len(data)))
+        simple_hist.Fill(data)
+        # Test if the histograms have the same content
+        for i in range(nbins):
+            self.assertAlmostEqual(verbose_hist.GetBinContent(i), simple_hist.GetBinContent(i))
+        # Test filling with weights
+        weights = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
+        verbose_hist.FillN(len(data), data, weights)
+        simple_hist.Fill(data, weights)
+        for i in range(nbins):
+            self.assertAlmostEqual(verbose_hist.GetBinContent(i), simple_hist.GetBinContent(i))
+        # Test filling with weights with a different length
+        weights = np.array([0.1, 0.2, 0.3, 0.4])
+        with self.assertRaises(ValueError):
+            simple_hist.Fill(data, weights)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tutorials/dataframe/df017_vecOpsHEP.py b/tutorials/dataframe/df017_vecOpsHEP.py
@@ -17,6 +17,7 @@
 ## \authors Danilo Piparo (CERN), Andre Vieira Silva
 
 import ROOT
+import numpy as np
 
 filename = ROOT.gROOT.GetTutorialDir().Data() + "/dataframe/df017_vecOpsHEP.root"
 treename = "myDataset"
@@ -26,9 +27,15 @@ def WithPyROOT(filename):
     f = ROOT.TFile(filename)
     h = ROOT.TH1F("pt", "With PyROOT", 16, 0, 4)
     for event in f[treename]:
-        for E, px, py in zip(event.E, event.px, event.py):
-            if (E > 100):
-               h.Fill(sqrt(px*px + py*py))
+        h.Fill(
+            np.array(
+                [
+                    sqrt(px * px + py * py)
+                    for E, px, py in zip(event.E, event.px, event.py)
+                    if E > 100
+                ]
+            )
+        )
     h.DrawCopy()
 
 def WithRDataFrameVecOpsJit(treename, filename):

diff --git a/tutorials/fit/fitConvolution.py b/tutorials/fit/fitConvolution.py
@@ -10,17 +10,21 @@
 ## \author Jonas Rembser, Aurelie Flandi (C++ version)
 
 import ROOT
+import numpy as np
 
 # Construction of histogram to fit.
 h_ExpGauss = ROOT.TH1F("h_ExpGauss", "Exponential convoluted by Gaussian", 100, 0.0, 5.0)
-for i in range(1000000):
-    # Gives a alpha of -0.3 in the exp.
-    x = ROOT.gRandom.Exp(1.0 / 0.3)
-    x += ROOT.gRandom.Gaus(0.0, 3.0)
-    # Probability density function of the addition of two variables is the
-    # convolution of two density functions.
-    h_ExpGauss.Fill(x)
-
+h_ExpGauss.Fill(
+    np.array(
+        [
+            # Gives a alpha of -0.3 in the exp.
+            # Probability density function of the addition of two variables is the
+            # convolution of two density functions.
+            ROOT.gRandom.Exp(1.0 / 0.3) + ROOT.gRandom.Gaus(0.0, 3.0)
+            for _ in range(1000000)
+        ]
+    )
+)
 f_conv = ROOT.TF1Convolution("expo", "gaus", -1, 6, True)
 f_conv.SetRange(-1.0, 6.0)
 f_conv.SetNofPointsFFT(1000)

diff --git a/tutorials/hist/twoscales.py b/tutorials/hist/twoscales.py
@@ -17,15 +17,15 @@
 #include "TRandom.h"
 
 import ROOT
+import numpy as np
 
 c1 = ROOT.TCanvas("c1","hists with different scales",600,400)
 
 ROOT.gStyle.SetOptStat(False)
 
 h1 = ROOT.TH1F("h1","my histogram",100,-3,3)
 
-for i in range(10000) :
-    h1.Fill(ROOT.gRandom.Gaus(0,1))
+h1.Fill(np.array([ROOT.gRandom.Gaus(0, 1) for _ in range(10000)]))
 
 h1.Draw()
 c1.Update()

diff --git a/tutorials/math/tStudent.py b/tutorials/math/tStudent.py
@@ -18,6 +18,7 @@
 
 from ROOT import TH1D, TF1, TCanvas, kRed, kBlue
 import ROOT
+import numpy as np
 
 
 # This is the way to force load of MathMore in Cling
@@ -33,9 +34,10 @@
 # Create the histogram and fill it with the quantiles
 quant = TH1D("quant", "", 9, 0, 0.9)
 
-for i in range(1, 10):
-    quant.Fill((i-0.5)/10.0, ROOT.Math.tdistribution_quantile(0.1 * i,
-                                                              3.0))
+quant.Fill(
+    np.array([(i - 0.5) / 10.0 for i in range(1, 10)]),
+    np.array([ROOT.Math.tdistribution_quantile(0.1 * i, 3.0) for i in range(1, 10)]),
+)
 
 # For each quantile fill with the pdf
 xx = []

diff --git a/tutorials/roofit/rf102_dataimport.py b/tutorials/roofit/rf102_dataimport.py
@@ -13,15 +13,15 @@
 
 import ROOT
 from array import array
+import numpy as np
 
 
 def makeTH1(trnd):
 
     # Create ROOT ROOT.TH1 filled with a Gaussian distribution
 
     hh = ROOT.TH1D("hh", "hh", 25, -10, 10)
-    for i in range(100):
-        hh.Fill(trnd.Gaus(0, 3))
+    hh.Fill(np.array([trnd.Gaus(0, 3) for _ in range(100)]))
     return hh
 
 

diff --git a/tutorials/roofit/rf401_importttreethx.py b/tutorials/roofit/rf401_importttreethx.py
@@ -14,15 +14,14 @@
 
 import ROOT
 from array import array
+import numpy as np
 
 
 def makeTH1(trnd, name, mean, sigma):
     """Create ROOT TH1 filled with a Gaussian distribution."""
 
     hh = ROOT.TH1D(name, name, 100, -10, 10)
-    for i in range(1000):
-        hh.Fill(trnd.Gaus(mean, sigma))
-
+    hh.Fill(np.array([trnd.Gaus(mean, sigma) for _ in range(1000)]))
     return hh