Skip to content

Commit

Permalink
Bug-fixes for the PyHEP tutorial. (#48)
Browse files Browse the repository at this point in the history
* Bug-fixes for the PyHEP tutorial.

* Make interpretations visible.

* Introduce 'np1' on TH1 and 'np2' on TH2 and do not let non-1d histograms inherit from TH1.

* More no_inherit rules.

* Missed one.

* Fixes #47.

* Switch from 'np', 'bh' properties to 'to_numpy()', 'to_boost()' methods.

* Fix 'flow=False' behavior for TH2 and TH3.

* Added 'to_hist()'.
  • Loading branch information
jpivarski authored Jul 11, 2020
1 parent 77c5404 commit b0b6814
Show file tree
Hide file tree
Showing 15 changed files with 306 additions and 60 deletions.
52 changes: 52 additions & 0 deletions tests/test_0001-source-class.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,39 @@ def test_http():
assert [x.raw_data.tostring() for x in chunks] == [one, two, three]


def colons_and_ports():
assert uproot4._util.file_object_path_split("https://example.com:443") == (
"https://example.com:443",
None,
)
assert uproot4._util.file_object_path_split(
"https://example.com:443/something"
) == ("https://example.com:443/something", None)
assert uproot4._util.file_object_path_split(
"https://example.com:443/something:else"
) == ("https://example.com:443/something", "else")


@pytest.mark.network
def test_http_port():
source = uproot4.source.http.HTTPSource(
"https://example.com:443", timeout=10, num_fallback_workers=0
)
with source as tmp:
chunks = tmp.chunks([(0, 100), (50, 55), (200, 400)])
one, two, three = [chunk.raw_data.tostring() for chunk in chunks]
assert len(one) == 100
assert len(two) == 5
assert len(three) == 200

source = uproot4.source.http.MultithreadedHTTPSource(
"https://example.com:443", num_workers=0, timeout=10
)
with source as tmp:
chunks = tmp.chunks([(0, 100), (50, 55), (200, 400)])
assert [x.raw_data.tostring() for x in chunks] == [one, two, three]


@pytest.mark.network
def test_http_size():
with uproot4.source.http.HTTPSource(
Expand All @@ -128,6 +161,25 @@ def test_http_size():
assert size1 == size2


@pytest.mark.network
def test_http_size_port():
with uproot4.source.http.HTTPSource(
"https://scikit-hep.org:443/uproot/examples/Zmumu.root",
timeout=10,
num_fallback_workers=0,
) as source:
size1 = source.num_bytes

with uproot4.source.http.MultithreadedHTTPSource(
"https://scikit-hep.org:443/uproot/examples/Zmumu.root",
num_workers=0,
timeout=10,
) as source:
size2 = source.num_bytes

assert size1 == size2


@pytest.mark.network
def test_http_fail():
source = uproot4.source.http.HTTPSource(
Expand Down
42 changes: 24 additions & 18 deletions tests/test_0046-histograms-bh-hist.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

def test_numpy_1d():
with uproot4.open(skhep_testdata.data_path("uproot-hepdata-example.root")) as f:
values, edges = f["hpx"].np
values, edges = f["hpx"].to_numpy()
assert values.tolist() == [
2.0,
2.0,
Expand Down Expand Up @@ -229,7 +229,7 @@ def test_numpy_1d():

def test_numpy_2d():
with uproot4.open(skhep_testdata.data_path("uproot-hepdata-example.root")) as f:
values, xedges, yedges = f["hpxpy"].np
values, xedges, yedges = f["hpxpy"].to_numpy()

assert values.tolist() == [
[
Expand Down Expand Up @@ -2183,7 +2183,7 @@ def test_numpy_profile():
with uproot4.open(skhep_testdata.data_path("uproot-hepdata-example.root")) as f:
obj = f["hprof"]

assert obj.np[1].tolist() == [
assert obj.to_numpy(errors=True)[1].tolist() == [
-numpy.inf,
-4.0,
-3.92,
Expand Down Expand Up @@ -2289,7 +2289,7 @@ def test_numpy_profile():
numpy.inf,
]

assert obj.np[0][0].tolist() == [
assert obj.to_numpy(errors=True)[0][0].tolist() == [
17.99833583831787,
17.05295467376709,
16.96826426188151,
Expand Down Expand Up @@ -2394,7 +2394,7 @@ def test_numpy_profile():
17.8403746287028,
]

assert obj.np[0][1].tolist() == [
assert obj.to_numpy(errors=True)[0][1].tolist() == [
0.2425426377130359,
0.7421210342302459,
0.4940066334987832,
Expand Down Expand Up @@ -2499,9 +2499,9 @@ def test_numpy_profile():
0.16817919583370047,
]

obj._members["fErrorMode"] = uproot4.behaviors.TProfile._kERRORSPREAD

assert obj.np[0][1].tolist() == [
assert obj.to_numpy(
errors=True, error_mode=uproot4.behaviors.TProfile._kERRORSPREAD
)[0][1].tolist() == [
0.34300708770751953,
1.0495176315307617,
0.8556445884959498,
Expand Down Expand Up @@ -2606,9 +2606,9 @@ def test_numpy_profile():
0.29129491196004526,
]

obj._members["fErrorMode"] = uproot4.behaviors.TProfile._kERRORSPREADI

assert obj.np[0][1].tolist() == [
assert obj.to_numpy(
errors=True, error_mode=uproot4.behaviors.TProfile._kERRORSPREADI
)[0][1].tolist() == [
0.2425426377130359,
0.7421210342302459,
0.4940066334987832,
Expand Down Expand Up @@ -2713,9 +2713,9 @@ def test_numpy_profile():
0.16817919583370047,
]

obj._members["fErrorMode"] = uproot4.behaviors.TProfile._kERRORSPREADG

assert obj.np[0][1].tolist() == [
assert obj.to_numpy(
errors=True, error_mode=uproot4.behaviors.TProfile._kERRORSPREADG
)[0][1].tolist() == [
0.7071067811865475,
0.7071067811865475,
0.5773502691896258,
Expand Down Expand Up @@ -2826,16 +2826,22 @@ def test_numpy_profile():
def test_boost_1d():
boost_histogram = pytest.importorskip("boost_histogram")
with uproot4.open(skhep_testdata.data_path("uproot-hepdata-example.root")) as f:
f["hpx"].bh
f["hpx"].to_boost()


def test_boost_2d():
boost_histogram = pytest.importorskip("boost_histogram")
with uproot4.open(skhep_testdata.data_path("uproot-hepdata-example.root")) as f:
f["hpxpy"].bh
f["hpxpy"].to_boost()


def test_hist_1d():
boost_histogram = pytest.importorskip("boost_histogram")
with uproot4.open(skhep_testdata.data_path("uproot-hepdata-example.root")) as f:
f["hpx"].to_hist()


def test_boost_profile():
def test_hist_2d():
boost_histogram = pytest.importorskip("boost_histogram")
with uproot4.open(skhep_testdata.data_path("uproot-hepdata-example.root")) as f:
f["hprof"].bh
f["hpxpy"].to_hist()
16 changes: 16 additions & 0 deletions uproot4/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,22 @@

import uproot4.interpretation
import uproot4.interpretation.library
from uproot4.interpretation.numerical import AsDtype
from uproot4.interpretation.numerical import AsArray
from uproot4.interpretation.numerical import AsDouble32
from uproot4.interpretation.numerical import AsFloat16
from uproot4.interpretation.numerical import AsSTLBits
from uproot4.interpretation.jagged import AsJagged
from uproot4.interpretation.strings import AsStrings
from uproot4.interpretation.objects import AsObjects
from uproot4.interpretation.objects import AsStridedObjects
from uproot4.containers import AsString
from uproot4.containers import AsPointer
from uproot4.containers import AsArray
from uproot4.containers import AsDynamic
from uproot4.containers import AsVector
from uproot4.containers import AsSet
from uproot4.containers import AsMap

default_library = "ak"

Expand Down
8 changes: 8 additions & 0 deletions uproot4/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ def regularize_path(path):
_windows_drive_letter_ending = re.compile(r".*\b[A-Za-z]$")
_windows_absolute_path_pattern = re.compile(r"^[A-Za-z]:\\")
_windows_absolute_path_pattern_slash = re.compile(r"^/[A-Za-z]:\\")
_might_be_port = re.compile(r"^[0-9].*")


def file_object_path_split(path):
Expand All @@ -158,6 +159,13 @@ def file_object_path_split(path):
return path, None
else:
file_path, object_path = path[:index], path[index + 1 :]

if (
_might_be_port.match(object_path) is not None
and urlparse(file_path).path == ""
):
return path, None

file_path = file_path.rstrip()
object_path = object_path.lstrip()

Expand Down
37 changes: 30 additions & 7 deletions uproot4/behaviors/TH1.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ def _boost_axis(axis):
fXbins = axis.member("fXbins", none_if_missing=True)

metadata = axis.all_members
metadata["name"] = metadata.pop("fName")
metadata["title"] = metadata.pop("fTitle")
metadata.pop("fXbins", None)
metadata.pop("fLabels", None)

Expand Down Expand Up @@ -68,8 +70,6 @@ def values(self):
return numpy.array(values, dtype=values.dtype.newbyteorder("="))

def values_errors(self):
# this should work equally well for TH2 and TH3

values = self.values()
errors = numpy.zeros(values.shape, dtype=numpy.float64)

Expand All @@ -84,12 +84,30 @@ def values_errors(self):

return values, errors

@property
def np(self):
return self.values(), self.edges(0)
def to_numpy(self, flow=True, dd=False, errors=False):
if errors:
values, errs = self.values_errors()
else:
values, errs = self.values(), None

xedges = self.edges(0)
if not flow:
values = values[1:-1]
if errors:
errs = errs[1:-1]
xedges = xedges[1:-1]

if errors:
values_errors = values, errs
else:
values_errors = values

if dd:
return values_errors, (xedges,)
else:
return values_errors, xedges

@property
def bh(self):
def to_boost(self):
boost_histogram = uproot4.extras.boost_histogram()

values = self.values()
Expand All @@ -108,6 +126,8 @@ def bh(self):
out = boost_histogram.Histogram(xaxis, storage=storage)

metadata = self.all_members
metadata["name"] = metadata.pop("fName")
metadata["title"] = metadata.pop("fTitle")
metadata.pop("fXaxis", None)
metadata.pop("fYaxis", None)
metadata.pop("fZaxis", None)
Expand All @@ -127,3 +147,6 @@ def bh(self):
view[:] = values

return out

def to_hist(self):
return uproot4.extras.hist().Hist(self.to_boost())
61 changes: 55 additions & 6 deletions uproot4/behaviors/TH2.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@


class TH2(object):
no_inherit = (uproot4.behaviors.TH1.TH1,)

def edges(self, axis):
if axis == 0 or axis == "x":
return uproot4.behaviors.TH1._edges(self.member("fXaxis"))
Expand All @@ -25,14 +27,47 @@ def values(self):
yaxis_fNbins = self.member("fYaxis").member("fNbins")
return values.reshape(xaxis_fNbins + 2, yaxis_fNbins + 2)

# values_errors "inherited" from TH1
def values_errors(self):
values = self.values()
errors = numpy.zeros(values.shape, dtype=numpy.float64)

@property
def np(self):
return self.values(), self.edges(0), self.edges(1)
sumw2 = self.member("fSumw2", none_if_missing=True)
if sumw2 is not None and len(sumw2) == self.member("fNcells"):
sumw2 = sumw2.reshape(values.shape)
positive = sumw2 > 0
errors[positive] = numpy.sqrt(sumw2[positive])
else:
positive = values > 0
errors[positive] = numpy.sqrt(values[positive])

return values, errors

@property
def bh(self):
def to_numpy(self, flow=True, dd=False, errors=False):
if errors:
values, errs = self.values_errors()
else:
values, errs = self.values(), None

xedges = self.edges(0)
yedges = self.edges(1)
if not flow:
values = values[1:-1, 1:-1]
if errors:
errs = errs[1:-1, 1:-1]
xedges = xedges[1:-1]
yedges = yedges[1:-1]

if errors:
values_errors = values, errs
else:
values_errors = values

if dd:
return values_errors, (xedges, yedges)
else:
return values_errors, xedges, yedges

def to_boost(self):
boost_histogram = uproot4.extras.boost_histogram()

values = self.values()
Expand All @@ -53,6 +88,17 @@ def bh(self):
yaxis = uproot4.behaviors.TH1._boost_axis(self.member("fYaxis"))
out = boost_histogram.Histogram(xaxis, yaxis, storage=storage)

metadata = self.all_members
metadata["name"] = metadata.pop("fName")
metadata["title"] = metadata.pop("fTitle")
metadata.pop("fXaxis", None)
metadata.pop("fYaxis", None)
metadata.pop("fZaxis", None)
metadata.pop("fContour", None)
metadata.pop("fSumw2", None)
metadata.pop("fBuffer", None)
out.metadata = metadata

if isinstance(xaxis, boost_histogram.axis.StrCategory):
values = values[1:, :]
if isinstance(yaxis, boost_histogram.axis.StrCategory):
Expand All @@ -66,3 +112,6 @@ def bh(self):
view[:] = values

return out

def to_hist(self):
return uproot4.extras.hist().Hist(self.to_boost())
Loading

0 comments on commit b0b6814

Please sign in to comment.