From b0b6814c332e52b874a5442bdb3a84c215258635 Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Sat, 11 Jul 2020 15:16:47 -0500 Subject: [PATCH] Bug-fixes for the PyHEP tutorial. (#48) * Bug-fixes for the PyHEP tutorial. * Make interpretations visible. * Introduce 'np1' on TH1 and 'np2' on TH2 and do not let non-1d histograms inherit from TH1. * More no_inherit rules. * Missed one. * Fixes #47. * Switch from 'np', 'bh' properties to 'to_numpy()', 'to_boost()' methods. * Fix 'flow=False' behavior for TH2 and TH3. * Added 'to_hist()'. --- tests/test_0001-source-class.py | 52 +++++++++++++++++++++++ tests/test_0046-histograms-bh-hist.py | 42 ++++++++++-------- uproot4/__init__.py | 16 +++++++ uproot4/_util.py | 8 ++++ uproot4/behaviors/TH1.py | 37 +++++++++++++--- uproot4/behaviors/TH2.py | 61 ++++++++++++++++++++++++--- uproot4/behaviors/TH2Poly.py | 13 ++++-- uproot4/behaviors/TH3.py | 52 ++++++++++++++++++++--- uproot4/behaviors/TProfile.py | 35 ++++++++++++--- uproot4/behaviors/TProfile2D.py | 12 ++++-- uproot4/behaviors/TProfile3D.py | 12 ++++-- uproot4/deserialization.py | 5 +++ uproot4/extras.py | 2 + uproot4/reading.py | 11 ++++- uproot4/source/http.py | 8 ++-- 15 files changed, 306 insertions(+), 60 deletions(-) diff --git a/tests/test_0001-source-class.py b/tests/test_0001-source-class.py index df570062c..1d9e1c658 100644 --- a/tests/test_0001-source-class.py +++ b/tests/test_0001-source-class.py @@ -111,6 +111,39 @@ def test_http(): assert [x.raw_data.tostring() for x in chunks] == [one, two, three] +def colons_and_ports(): + assert uproot4._util.file_object_path_split("https://example.com:443") == ( + "https://example.com:443", + None, + ) + assert uproot4._util.file_object_path_split( + "https://example.com:443/something" + ) == ("https://example.com:443/something", None) + assert uproot4._util.file_object_path_split( + "https://example.com:443/something:else" + ) == ("https://example.com:443/something", "else") + + +@pytest.mark.network +def test_http_port(): + source = uproot4.source.http.HTTPSource( + "https://example.com:443", timeout=10, num_fallback_workers=0 + ) + with source as tmp: + chunks = tmp.chunks([(0, 100), (50, 55), (200, 400)]) + one, two, three = [chunk.raw_data.tostring() for chunk in chunks] + assert len(one) == 100 + assert len(two) == 5 + assert len(three) == 200 + + source = uproot4.source.http.MultithreadedHTTPSource( + "https://example.com:443", num_workers=0, timeout=10 + ) + with source as tmp: + chunks = tmp.chunks([(0, 100), (50, 55), (200, 400)]) + assert [x.raw_data.tostring() for x in chunks] == [one, two, three] + + @pytest.mark.network def test_http_size(): with uproot4.source.http.HTTPSource( @@ -128,6 +161,25 @@ def test_http_size(): assert size1 == size2 +@pytest.mark.network +def test_http_size_port(): + with uproot4.source.http.HTTPSource( + "https://scikit-hep.org:443/uproot/examples/Zmumu.root", + timeout=10, + num_fallback_workers=0, + ) as source: + size1 = source.num_bytes + + with uproot4.source.http.MultithreadedHTTPSource( + "https://scikit-hep.org:443/uproot/examples/Zmumu.root", + num_workers=0, + timeout=10, + ) as source: + size2 = source.num_bytes + + assert size1 == size2 + + @pytest.mark.network def test_http_fail(): source = uproot4.source.http.HTTPSource( diff --git a/tests/test_0046-histograms-bh-hist.py b/tests/test_0046-histograms-bh-hist.py index c92d27a64..0d7881bd1 100644 --- a/tests/test_0046-histograms-bh-hist.py +++ b/tests/test_0046-histograms-bh-hist.py @@ -12,7 +12,7 @@ def test_numpy_1d(): with uproot4.open(skhep_testdata.data_path("uproot-hepdata-example.root")) as f: - values, edges = f["hpx"].np + values, edges = f["hpx"].to_numpy() assert values.tolist() == [ 2.0, 2.0, @@ -229,7 +229,7 @@ def test_numpy_1d(): def test_numpy_2d(): with uproot4.open(skhep_testdata.data_path("uproot-hepdata-example.root")) as f: - values, xedges, yedges = f["hpxpy"].np + values, xedges, yedges = f["hpxpy"].to_numpy() assert values.tolist() == [ [ @@ -2183,7 +2183,7 @@ def test_numpy_profile(): with uproot4.open(skhep_testdata.data_path("uproot-hepdata-example.root")) as f: obj = f["hprof"] - assert obj.np[1].tolist() == [ + assert obj.to_numpy(errors=True)[1].tolist() == [ -numpy.inf, -4.0, -3.92, @@ -2289,7 +2289,7 @@ def test_numpy_profile(): numpy.inf, ] - assert obj.np[0][0].tolist() == [ + assert obj.to_numpy(errors=True)[0][0].tolist() == [ 17.99833583831787, 17.05295467376709, 16.96826426188151, @@ -2394,7 +2394,7 @@ def test_numpy_profile(): 17.8403746287028, ] - assert obj.np[0][1].tolist() == [ + assert obj.to_numpy(errors=True)[0][1].tolist() == [ 0.2425426377130359, 0.7421210342302459, 0.4940066334987832, @@ -2499,9 +2499,9 @@ def test_numpy_profile(): 0.16817919583370047, ] - obj._members["fErrorMode"] = uproot4.behaviors.TProfile._kERRORSPREAD - - assert obj.np[0][1].tolist() == [ + assert obj.to_numpy( + errors=True, error_mode=uproot4.behaviors.TProfile._kERRORSPREAD + )[0][1].tolist() == [ 0.34300708770751953, 1.0495176315307617, 0.8556445884959498, @@ -2606,9 +2606,9 @@ def test_numpy_profile(): 0.29129491196004526, ] - obj._members["fErrorMode"] = uproot4.behaviors.TProfile._kERRORSPREADI - - assert obj.np[0][1].tolist() == [ + assert obj.to_numpy( + errors=True, error_mode=uproot4.behaviors.TProfile._kERRORSPREADI + )[0][1].tolist() == [ 0.2425426377130359, 0.7421210342302459, 0.4940066334987832, @@ -2713,9 +2713,9 @@ def test_numpy_profile(): 0.16817919583370047, ] - obj._members["fErrorMode"] = uproot4.behaviors.TProfile._kERRORSPREADG - - assert obj.np[0][1].tolist() == [ + assert obj.to_numpy( + errors=True, error_mode=uproot4.behaviors.TProfile._kERRORSPREADG + )[0][1].tolist() == [ 0.7071067811865475, 0.7071067811865475, 0.5773502691896258, @@ -2826,16 +2826,22 @@ def test_numpy_profile(): def test_boost_1d(): boost_histogram = pytest.importorskip("boost_histogram") with uproot4.open(skhep_testdata.data_path("uproot-hepdata-example.root")) as f: - f["hpx"].bh + f["hpx"].to_boost() def test_boost_2d(): boost_histogram = pytest.importorskip("boost_histogram") with uproot4.open(skhep_testdata.data_path("uproot-hepdata-example.root")) as f: - f["hpxpy"].bh + f["hpxpy"].to_boost() + + +def test_hist_1d(): + boost_histogram = pytest.importorskip("boost_histogram") + with uproot4.open(skhep_testdata.data_path("uproot-hepdata-example.root")) as f: + f["hpx"].to_hist() -def test_boost_profile(): +def test_hist_2d(): boost_histogram = pytest.importorskip("boost_histogram") with uproot4.open(skhep_testdata.data_path("uproot-hepdata-example.root")) as f: - f["hprof"].bh + f["hpxpy"].to_hist() diff --git a/uproot4/__init__.py b/uproot4/__init__.py index 1e64b46c3..5091ca938 100644 --- a/uproot4/__init__.py +++ b/uproot4/__init__.py @@ -59,6 +59,22 @@ import uproot4.interpretation import uproot4.interpretation.library +from uproot4.interpretation.numerical import AsDtype +from uproot4.interpretation.numerical import AsArray +from uproot4.interpretation.numerical import AsDouble32 +from uproot4.interpretation.numerical import AsFloat16 +from uproot4.interpretation.numerical import AsSTLBits +from uproot4.interpretation.jagged import AsJagged +from uproot4.interpretation.strings import AsStrings +from uproot4.interpretation.objects import AsObjects +from uproot4.interpretation.objects import AsStridedObjects +from uproot4.containers import AsString +from uproot4.containers import AsPointer +from uproot4.containers import AsArray +from uproot4.containers import AsDynamic +from uproot4.containers import AsVector +from uproot4.containers import AsSet +from uproot4.containers import AsMap default_library = "ak" diff --git a/uproot4/_util.py b/uproot4/_util.py index 8387435bf..880ab79cb 100644 --- a/uproot4/_util.py +++ b/uproot4/_util.py @@ -147,6 +147,7 @@ def regularize_path(path): _windows_drive_letter_ending = re.compile(r".*\b[A-Za-z]$") _windows_absolute_path_pattern = re.compile(r"^[A-Za-z]:\\") _windows_absolute_path_pattern_slash = re.compile(r"^/[A-Za-z]:\\") +_might_be_port = re.compile(r"^[0-9].*") def file_object_path_split(path): @@ -158,6 +159,13 @@ def file_object_path_split(path): return path, None else: file_path, object_path = path[:index], path[index + 1 :] + + if ( + _might_be_port.match(object_path) is not None + and urlparse(file_path).path == "" + ): + return path, None + file_path = file_path.rstrip() object_path = object_path.lstrip() diff --git a/uproot4/behaviors/TH1.py b/uproot4/behaviors/TH1.py index 6c0c3be7c..a3d81ae52 100644 --- a/uproot4/behaviors/TH1.py +++ b/uproot4/behaviors/TH1.py @@ -32,6 +32,8 @@ def _boost_axis(axis): fXbins = axis.member("fXbins", none_if_missing=True) metadata = axis.all_members + metadata["name"] = metadata.pop("fName") + metadata["title"] = metadata.pop("fTitle") metadata.pop("fXbins", None) metadata.pop("fLabels", None) @@ -68,8 +70,6 @@ def values(self): return numpy.array(values, dtype=values.dtype.newbyteorder("=")) def values_errors(self): - # this should work equally well for TH2 and TH3 - values = self.values() errors = numpy.zeros(values.shape, dtype=numpy.float64) @@ -84,12 +84,30 @@ def values_errors(self): return values, errors - @property - def np(self): - return self.values(), self.edges(0) + def to_numpy(self, flow=True, dd=False, errors=False): + if errors: + values, errs = self.values_errors() + else: + values, errs = self.values(), None + + xedges = self.edges(0) + if not flow: + values = values[1:-1] + if errors: + errs = errs[1:-1] + xedges = xedges[1:-1] + + if errors: + values_errors = values, errs + else: + values_errors = values + + if dd: + return values_errors, (xedges,) + else: + return values_errors, xedges - @property - def bh(self): + def to_boost(self): boost_histogram = uproot4.extras.boost_histogram() values = self.values() @@ -108,6 +126,8 @@ def bh(self): out = boost_histogram.Histogram(xaxis, storage=storage) metadata = self.all_members + metadata["name"] = metadata.pop("fName") + metadata["title"] = metadata.pop("fTitle") metadata.pop("fXaxis", None) metadata.pop("fYaxis", None) metadata.pop("fZaxis", None) @@ -127,3 +147,6 @@ def bh(self): view[:] = values return out + + def to_hist(self): + return uproot4.extras.hist().Hist(self.to_boost()) diff --git a/uproot4/behaviors/TH2.py b/uproot4/behaviors/TH2.py index afce1da08..34d8da8bd 100644 --- a/uproot4/behaviors/TH2.py +++ b/uproot4/behaviors/TH2.py @@ -9,6 +9,8 @@ class TH2(object): + no_inherit = (uproot4.behaviors.TH1.TH1,) + def edges(self, axis): if axis == 0 or axis == "x": return uproot4.behaviors.TH1._edges(self.member("fXaxis")) @@ -25,14 +27,47 @@ def values(self): yaxis_fNbins = self.member("fYaxis").member("fNbins") return values.reshape(xaxis_fNbins + 2, yaxis_fNbins + 2) - # values_errors "inherited" from TH1 + def values_errors(self): + values = self.values() + errors = numpy.zeros(values.shape, dtype=numpy.float64) - @property - def np(self): - return self.values(), self.edges(0), self.edges(1) + sumw2 = self.member("fSumw2", none_if_missing=True) + if sumw2 is not None and len(sumw2) == self.member("fNcells"): + sumw2 = sumw2.reshape(values.shape) + positive = sumw2 > 0 + errors[positive] = numpy.sqrt(sumw2[positive]) + else: + positive = values > 0 + errors[positive] = numpy.sqrt(values[positive]) + + return values, errors - @property - def bh(self): + def to_numpy(self, flow=True, dd=False, errors=False): + if errors: + values, errs = self.values_errors() + else: + values, errs = self.values(), None + + xedges = self.edges(0) + yedges = self.edges(1) + if not flow: + values = values[1:-1, 1:-1] + if errors: + errs = errs[1:-1, 1:-1] + xedges = xedges[1:-1] + yedges = yedges[1:-1] + + if errors: + values_errors = values, errs + else: + values_errors = values + + if dd: + return values_errors, (xedges, yedges) + else: + return values_errors, xedges, yedges + + def to_boost(self): boost_histogram = uproot4.extras.boost_histogram() values = self.values() @@ -53,6 +88,17 @@ def bh(self): yaxis = uproot4.behaviors.TH1._boost_axis(self.member("fYaxis")) out = boost_histogram.Histogram(xaxis, yaxis, storage=storage) + metadata = self.all_members + metadata["name"] = metadata.pop("fName") + metadata["title"] = metadata.pop("fTitle") + metadata.pop("fXaxis", None) + metadata.pop("fYaxis", None) + metadata.pop("fZaxis", None) + metadata.pop("fContour", None) + metadata.pop("fSumw2", None) + metadata.pop("fBuffer", None) + out.metadata = metadata + if isinstance(xaxis, boost_histogram.axis.StrCategory): values = values[1:, :] if isinstance(yaxis, boost_histogram.axis.StrCategory): @@ -66,3 +112,6 @@ def bh(self): view[:] = values return out + + def to_hist(self): + return uproot4.extras.hist().Hist(self.to_boost()) diff --git a/uproot4/behaviors/TH2Poly.py b/uproot4/behaviors/TH2Poly.py index 3067ab5ca..0bfea6593 100644 --- a/uproot4/behaviors/TH2Poly.py +++ b/uproot4/behaviors/TH2Poly.py @@ -2,8 +2,12 @@ from __future__ import absolute_import +import uproot4.behaviors.TH2 + class TH2Poly(object): + no_inherit = (uproot4.behaviors.TH2.TH2,) + def edges(self, axis): raise NotImplementedError(repr(self)) @@ -13,10 +17,11 @@ def values(self): def values_errors(self, error_mode=0): raise NotImplementedError(repr(self)) - @property - def np(self): + def to_numpy(self, flow=True, dd=False, errors=False): raise NotImplementedError(repr(self)) - @property - def bh(self): + def to_boost(self): raise NotImplementedError(repr(self)) + + def to_hist(self): + return uproot4.extras.hist().Hist(self.to_boost()) diff --git a/uproot4/behaviors/TH3.py b/uproot4/behaviors/TH3.py index 1d63a0e15..cd0edea05 100644 --- a/uproot4/behaviors/TH3.py +++ b/uproot4/behaviors/TH3.py @@ -9,6 +9,8 @@ class TH3(object): + no_inherit = (uproot4.behaviors.TH1.TH1,) + def edges(self, axis): if axis == 0 or axis == "x": return uproot4.behaviors.TH1._edges(self.member("fXaxis")) @@ -28,14 +30,49 @@ def values(self): zaxis_fNbins = self.member("fZaxis").member("fNbins") return values.reshape(xaxis_fNbins + 2, yaxis_fNbins + 2, zaxis_fNbins + 2) - # values_errors "inherited" from TH1 + def values_errors(self): + values = self.values() + errors = numpy.zeros(values.shape, dtype=numpy.float64) + + sumw2 = self.member("fSumw2", none_if_missing=True) + if sumw2 is not None and len(sumw2) == self.member("fNcells"): + sumw2 = sumw2.reshape(values.shape) + positive = sumw2 > 0 + errors[positive] = numpy.sqrt(sumw2[positive]) + else: + positive = values > 0 + errors[positive] = numpy.sqrt(values[positive]) + + return values, errors + + def to_numpy(self, flow=True, dd=False, errors=False): + if errors: + values, errs = self.values_errors() + else: + values, errs = self.values(), None + + xedges = self.edges(0) + yedges = self.edges(1) + zedges = self.edges(2) + if not flow: + values = values[1:-1, 1:-1, 1:-1] + if errors: + errs = errs[1:-1, 1:-1, 1:-1] + xedges = xedges[1:-1] + yedges = yedges[1:-1] + zedges = zedges[1:-1] + + if errors: + values_errors = values, errs + else: + values_errors = values - @property - def np(self): - return self.values(), (self.edges(0), self.edges(1), self.edges(2)) + if dd: + return values_errors, (xedges, yedges, zedges) + else: + return values_errors, xedges, yedges, zedges - @property - def bh(self): + def to_boost(self): boost_histogram = uproot4.extras.boost_histogram() values = self.values() @@ -72,3 +109,6 @@ def bh(self): view[:] = values return out + + def to_hist(self): + return uproot4.extras.hist().Hist(self.to_boost()) diff --git a/uproot4/behaviors/TProfile.py b/uproot4/behaviors/TProfile.py index fb1bcde4e..d31631a27 100644 --- a/uproot4/behaviors/TProfile.py +++ b/uproot4/behaviors/TProfile.py @@ -15,6 +15,8 @@ class TProfile(object): + no_inherit = (uproot4.behaviors.TH1.TH1,) + def edges(self, axis=0): if axis == 0 or axis == "x": return uproot4.behaviors.TH1._edges(self.member("fXaxis")) @@ -133,12 +135,30 @@ def values_errors(self, error_mode=0): out[nonzero] = root_eprim[nonzero] / numpy.sqrt(root_neff[nonzero]) return root_contsum, out - @property - def np(self): - return self.values_errors(self.member("fErrorMode")), self.edges(0) + def to_numpy(self, flow=True, dd=False, errors=False, error_mode=0): + if errors: + values, errs = self.values_errors(error_mode=error_mode) + else: + values, errs = self.values(), None + + xedges = self.edges(0) + if not flow: + values = values[1:-1] + if errors: + errs = errs[1:-1] + xedges = xedges[1:-1] + + if errors: + values_errors = values, errs + else: + values_errors = values - @property - def bh(self): + if dd: + return values_errors, (xedges,) + else: + return values_errors, xedges + + def to_boost(self): boost_histogram = uproot4.extras.boost_histogram() storage = boost_histogram.storage.WeightedMean() @@ -159,4 +179,7 @@ def bh(self): view.value = values view.sum_of_weighted_deltas_squared - return out + raise NotImplementedError(repr(self)) + + def to_hist(self): + return uproot4.extras.hist().Hist(self.to_boost()) diff --git a/uproot4/behaviors/TProfile2D.py b/uproot4/behaviors/TProfile2D.py index fb2464283..b4c0cd990 100644 --- a/uproot4/behaviors/TProfile2D.py +++ b/uproot4/behaviors/TProfile2D.py @@ -3,9 +3,12 @@ from __future__ import absolute_import import uproot4.behaviors.TH1 +import uproot4.behaviors.TH2 class TProfile2D(object): + no_inherit = (uproot4.behaviors.TH2.TH2,) + def edges(self, axis): if axis == 0 or axis == "x": return uproot4.behaviors.TH1._edges(self.member("fXaxis")) @@ -20,10 +23,11 @@ def values(self): def values_errors(self, error_mode=0): raise NotImplementedError(repr(self)) - @property - def np(self): + def to_numpy(self, flow=True, dd=False, errors=False, error_mode=0): raise NotImplementedError(repr(self)) - @property - def bh(self): + def to_boost(self): raise NotImplementedError(repr(self)) + + def to_hist(self): + return uproot4.extras.hist().Hist(self.to_boost()) diff --git a/uproot4/behaviors/TProfile3D.py b/uproot4/behaviors/TProfile3D.py index df95d6243..894971a38 100644 --- a/uproot4/behaviors/TProfile3D.py +++ b/uproot4/behaviors/TProfile3D.py @@ -3,9 +3,12 @@ from __future__ import absolute_import import uproot4.behaviors.TH1 +import uproot4.behaviors.TH3 class TProfile3D(object): + no_inherit = (uproot4.behaviors.TH3.TH3,) + def edges(self, axis): if axis == 0 or axis == "x": return uproot4.behaviors.TH1._edges(self.member("fXaxis")) @@ -22,10 +25,11 @@ def values(self): def values_errors(self, error_mode=0): raise NotImplementedError(repr(self)) - @property - def np(self): + def to_numpy(self, flow=True, dd=False, errors=False, error_mode=0): raise NotImplementedError(repr(self)) - @property - def bh(self): + def to_boost(self): raise NotImplementedError(repr(self)) + + def to_hist(self): + return uproot4.extras.hist().Hist(self.to_boost()) diff --git a/uproot4/deserialization.py b/uproot4/deserialization.py index ec0040fbb..72498b90d 100644 --- a/uproot4/deserialization.py +++ b/uproot4/deserialization.py @@ -55,6 +55,11 @@ def c(name, version=None): out.__module__ = "" behaviors = tuple(_yield_all_behaviors(out, c)) + exclude = tuple( + bad for cls in behaviors if hasattr(cls, "no_inherit") for bad in cls.no_inherit + ) + behaviors = tuple(cls for cls in behaviors if cls not in exclude) + if len(behaviors) != 0: out = uproot4._util.new_class(out.__name__, behaviors + (out,), {}) out.__module__ = "" diff --git a/uproot4/extras.py b/uproot4/extras.py index 23c1f967f..147eb1815 100644 --- a/uproot4/extras.py +++ b/uproot4/extras.py @@ -128,6 +128,8 @@ def lzma(): ) else: return lzma + else: + return lzma def lz4_block(): diff --git a/uproot4/reading.py b/uproot4/reading.py index b9511a04c..bbb156dfb 100644 --- a/uproot4/reading.py +++ b/uproot4/reading.py @@ -32,12 +32,21 @@ from uproot4._util import no_filter -def open(path, object_cache=100, array_cache="100 MB", custom_classes=None, **options): +def open( + path, + parse_object=True, + object_cache=100, + array_cache="100 MB", + custom_classes=None, + **options +): """ Args: path (str or Path): Path or URL to open, which may include a colon separating a file path from an object-within-ROOT path, like `"root://server/path/to/file.root : internal_directory/my_ttree"`. + parse_object (bool): If False, interpret the `path` purely as a file + path (no colon-delimited object path). object_cache (None, MutableMapping, or int): Cache of objects drawn from ROOT directories (e.g histograms, TTrees, other directories); if None, do not use a cache; if an int, create a new cache of this diff --git a/uproot4/source/http.py b/uproot4/source/http.py index d2a9c02a9..33a0255a2 100644 --- a/uproot4/source/http.py +++ b/uproot4/source/http.py @@ -35,18 +35,18 @@ def make_connection(parsed_url, timeout): if parsed_url.scheme == "https": if uproot4._util.py2: return HTTPSConnection( - parsed_url.netloc, parsed_url.port, None, None, False, timeout + parsed_url.hostname, parsed_url.port, None, None, False, timeout ) else: return HTTPSConnection( - parsed_url.netloc, parsed_url.port, None, None, timeout + parsed_url.hostname, parsed_url.port, None, None, timeout ) elif parsed_url.scheme == "http": if uproot4._util.py2: - return HTTPConnection(parsed_url.netloc, parsed_url.port, False, timeout) + return HTTPConnection(parsed_url.hostname, parsed_url.port, False, timeout) else: - return HTTPConnection(parsed_url.netloc, parsed_url.port, timeout) + return HTTPConnection(parsed_url.hostname, parsed_url.port, timeout) else: raise ValueError(