Skip to content

Commit

Permalink
Removed the colon-parsing and replaced it with dicts. (#81)
Browse files Browse the repository at this point in the history
* [skip ci] Removed the colon-parsing and replaced it with dicts. Tests will fail because they still assume you can use colons.

* All tests should now pass.

* Added tests for the file/tree selection features.

* Implemented conditional colon-parsing and full error message as described here: #79 (comment)

* Fixed flake8 error.
  • Loading branch information
jpivarski authored Aug 27, 2020
1 parent ccf790f commit ee60836
Show file tree
Hide file tree
Showing 13 changed files with 483 additions and 247 deletions.
4 changes: 2 additions & 2 deletions tests/test_0016-interpretations.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def test_recovery(mini):
# flat array to recover:
filename = skhep_testdata.data_path("uproot-issue21.root")
with uproot4.open(
"file:" + filename + " : nllscan/mH", minimal_ttree_metadata=mini
{"file:" + filename: "nllscan/mH"}, minimal_ttree_metadata=mini
) as branch:
basket = branch.basket(0)
assert basket.data.view(">f8").tolist()[:10] == [
Expand All @@ -107,7 +107,7 @@ def test_recovery(mini):
# uproot-from-geant4.root Details: numgood, TrackedRays: Event phi
filename = skhep_testdata.data_path("uproot-issue327.root")
with uproot4.open(
"file:" + filename + " : DstTree/fTracks.fCharge", minimal_ttree_metadata=mini
{"file:" + filename: "DstTree/fTracks.fCharge"}, minimal_ttree_metadata=mini
) as branch:
basket = branch.basket(0)
assert basket.data.view("i1")[:10].tolist() == [
Expand Down
33 changes: 15 additions & 18 deletions tests/test_0043-iterate-function.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,14 +159,13 @@ def test_iterate_report_2():


def test_function_iterate():
files = (
skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root").replace(
"6.20.04", "*"
)
+ ":sample"
files = skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root").replace(
"6.20.04", "*"
)
expect = 0
for arrays, report in uproot4.iterate(files, "i8", report=True, library="np"):
for arrays, report in uproot4.iterate(
{files: "sample"}, "i8", report=True, library="np"
):
assert arrays["i8"][:5].tolist() == [-15, -14, -13, -12, -11]
assert report.global_entry_start == expect
assert report.global_entry_stop == expect + len(arrays["i8"])
Expand All @@ -175,14 +174,13 @@ def test_function_iterate():

def test_function_iterate_pandas():
pandas = pytest.importorskip("pandas")
files = (
skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root").replace(
"6.20.04", "*"
)
+ ":sample"
files = skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root").replace(
"6.20.04", "*"
)
expect = 0
for arrays, report in uproot4.iterate(files, "i8", report=True, library="pd"):
for arrays, report in uproot4.iterate(
{files: "sample"}, "i8", report=True, library="pd"
):
assert arrays["i8"].values[:5].tolist() == [-15, -14, -13, -12, -11]
assert arrays.index.values[0] == expect
assert report.global_entry_start == expect
Expand All @@ -192,13 +190,12 @@ def test_function_iterate_pandas():

def test_function_iterate_pandas_2():
pandas = pytest.importorskip("pandas")
files = (
skhep_testdata.data_path("uproot-HZZ.root").replace(
"HZZ", "HZZ-{uncompressed,zlib,lz4}"
)
+ ":events"
files = skhep_testdata.data_path("uproot-HZZ.root").replace(
"HZZ", "HZZ-{uncompressed,zlib,lz4}"
)
expect = 0
for arrays, report in uproot4.iterate(files, "Muon_Px", report=True, library="pd"):
for arrays, report in uproot4.iterate(
{files: "events"}, "Muon_Px", report=True, library="pd"
):
assert arrays["Muon_Px"].index.values[0] == (expect, 0)
expect += report.tree.num_entries
27 changes: 9 additions & 18 deletions tests/test_0044-concatenate-function.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,40 +10,31 @@


def test_concatenate_numpy():
files = (
skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root").replace(
"6.20.04", "*"
)
+ ":sample"
files = skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root").replace(
"6.20.04", "*"
)
arrays = uproot4.concatenate(files, ["i8", "f8"], library="np")
arrays = uproot4.concatenate({files: "sample"}, ["i8", "f8"], library="np")
assert len(arrays["i8"]) == 420
assert len(arrays["f8"]) == 420


def test_concatenate_awkward():
awkward1 = pytest.importorskip("awkward1")
files = (
skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root").replace(
"6.20.04", "*"
)
+ ":sample"
files = skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root").replace(
"6.20.04", "*"
)
arrays = uproot4.concatenate(files, ["i8", "f8"], library="ak")
arrays = uproot4.concatenate({files: "sample"}, ["i8", "f8"], library="ak")
assert isinstance(arrays, awkward1.Array)
assert set(awkward1.keys(arrays)) == set(["i8", "f8"])
assert len(arrays) == 420


def test_concatenate_pandas():
pandas = pytest.importorskip("pandas")
files = (
skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root").replace(
"6.20.04", "*"
)
+ ":sample"
files = skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root").replace(
"6.20.04", "*"
)
arrays = uproot4.concatenate(files, ["i8", "f8"], library="pd")
arrays = uproot4.concatenate({files: "sample"}, ["i8", "f8"], library="pd")
assert isinstance(arrays, pandas.DataFrame)
assert set(arrays.columns.tolist()) == set(["i8", "f8"])
assert len(arrays) == 420
21 changes: 8 additions & 13 deletions tests/test_0045-lazy-arrays-1.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def test_branch_pluralization():
assert False

for i, arrays in enumerate(
uproot4.iterate(skhep_testdata.data_path("uproot-Zmumu.root") + ":events/px1")
uproot4.iterate({skhep_testdata.data_path("uproot-Zmumu.root"): "events/px1"})
):
if i == 0:
assert arrays["px1"][:5].tolist() == [
Expand Down Expand Up @@ -103,14 +103,11 @@ def test_branch_pluralization():

def test_awkward():
awkward1 = pytest.importorskip("awkward1")
files = (
skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root").replace(
"6.20.04", "*"
)
+ ":sample"
files = skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root").replace(
"6.20.04", "*"
)
cache = {}
array = uproot4.lazy(files, array_cache=cache)
array = uproot4.lazy({files: "sample"}, array_cache=cache)
assert len(cache) == 0

assert awkward1.to_list(array[:5, "i4"]) == [-15, -14, -13, -12, -11]
Expand Down Expand Up @@ -146,10 +143,8 @@ def test_awkward():

def test_awkward_pluralization():
awkward1 = pytest.importorskip("awkward1")
files = (
skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root").replace(
"6.20.04", "*"
)
+ ":sample/i4"
files = skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root").replace(
"6.20.04", "*"
)
assert awkward1.to_list(uproot4.lazy(files)[:5, "i4"]) == [-15, -14, -13, -12, -11]
array = uproot4.lazy({files: "sample"})
assert awkward1.to_list(array[:5, "i4"]) == [-15, -14, -13, -12, -11]
4 changes: 3 additions & 1 deletion tests/test_0066-fix-http-fallback-freeze.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

@pytest.mark.network
def test():
with uproot4.open("http://scikit-hep.org/uproot/examples/HZZ.root:events") as t:
with uproot4.open(
{"http://scikit-hep.org/uproot/examples/HZZ.root": "events"}
) as t:
t["MET_px"].array()
t["MET_py"].array()
File renamed without changes.
140 changes: 140 additions & 0 deletions tests/test_0081-dont-parse-colons.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/uproot4/blob/master/LICENSE

from __future__ import absolute_import

import pytest
import skhep_testdata

import uproot4


def test_open():
assert isinstance(
uproot4.open(skhep_testdata.data_path("uproot-issue63.root")),
uproot4.reading.ReadOnlyDirectory,
)
assert isinstance(
uproot4.open(
{skhep_testdata.data_path("uproot-issue63.root"): "WtLoop_nominal"}
),
uproot4.behaviors.TTree.TTree,
)

with pytest.raises(ValueError):
uproot4.open([skhep_testdata.data_path("uproot-issue63.root")])


def test_lazy():
with pytest.raises(ValueError):
uproot4.lazy(skhep_testdata.data_path("uproot-issue63.root"))

with pytest.raises(ValueError):
uproot4.lazy(
{skhep_testdata.data_path("uproot-issue63.root"): "blah"},
allow_missing=True,
)

uproot4.lazy({skhep_testdata.data_path("uproot-issue63.root"): "WtLoop_nominal"})
uproot4.lazy(
{
skhep_testdata.data_path("uproot-issue63.root"): "WtLoop_nominal",
skhep_testdata.data_path("uproot-issue63.root"): "WtLoop_Fake_nominal",
}
)

uproot4.lazy([{skhep_testdata.data_path("uproot-issue63.root"): "WtLoop_nominal"}])
uproot4.lazy(
{skhep_testdata.data_path("uproot-issue63.root") + "*": "WtLoop_nominal"}
)
uproot4.lazy(
[{skhep_testdata.data_path("uproot-issue63.root") + "*": "WtLoop_nominal"}]
)


def test_concatenate():
with pytest.raises(ValueError):
uproot4.concatenate(skhep_testdata.data_path("uproot-issue63.root"))

assert (
len(
uproot4.concatenate(
{skhep_testdata.data_path("uproot-issue63.root"): "blah"},
allow_missing=True,
)
)
== 0
)

files = skhep_testdata.data_path("uproot-sample-6.16.00-uncompressed.root").replace(
"6.16.00", "*"
)

uproot4.concatenate(files, "Ai8")
uproot4.concatenate({files: "sample"}, "Ai8")
uproot4.concatenate([files], "Ai8")
uproot4.concatenate([{files: "sample"}], "Ai8")


def test_iterate():
with pytest.raises(ValueError):
for arrays in uproot4.iterate(skhep_testdata.data_path("uproot-issue63.root")):
pass

assert (
len(
list(
uproot4.iterate(
{skhep_testdata.data_path("uproot-issue63.root"): "blah"},
allow_missing=True,
)
)
)
== 0
)

files = skhep_testdata.data_path("uproot-sample-6.16.00-uncompressed.root").replace(
"6.16.00", "*"
)

for arrays in uproot4.iterate(files, "Ai8"):
pass
for arrays in uproot4.iterate({files: "sample"}, "Ai8"):
pass
for arrays in uproot4.iterate([files], "Ai8"):
pass
for arrays in uproot4.iterate([{files: "sample"}], "Ai8"):
pass


pathlib = pytest.importorskip("pathlib")


def test_open_colon():
assert isinstance(
uproot4.open(
skhep_testdata.data_path("uproot-issue63.root") + ":WtLoop_nominal"
),
uproot4.behaviors.TTree.TTree,
)

with pytest.raises(FileNotFoundError):
uproot4.open(
pathlib.Path(
skhep_testdata.data_path("uproot-issue63.root") + ":WtLoop_nominal"
)
)

with pytest.raises(FileNotFoundError):
uproot4.open(
{skhep_testdata.data_path("uproot-issue63.root") + ":WtLoop_nominal": None}
)


def test_lazy_colon():
uproot4.lazy(skhep_testdata.data_path("uproot-issue63.root") + ":WtLoop_nominal")
uproot4.lazy(
[
skhep_testdata.data_path("uproot-issue63.root") + ":WtLoop_nominal",
skhep_testdata.data_path("uproot-issue63.root") + ":WtLoop_Fake_nominal",
]
)
46 changes: 46 additions & 0 deletions uproot4/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,52 @@ def file_path_to_source_class(file_path, options):
raise ValueError("URI scheme not recognized: {0}".format(file_path))


if isinstance(__builtins__, dict):
if "FileNotFoundError" in __builtins__:
_FileNotFoundError = __builtins__["FileNotFoundError"]
else:
_FileNotFoundError = __builtins__["IOError"]
else:
if hasattr(__builtins__, "FileNotFoundError"):
_FileNotFoundError = __builtins__.FileNotFoundError
else:
_FileNotFoundError = __builtins__.IOError


def _file_not_found(files, message=None):
if message is None:
message = ""
else:
message = " (" + message + ")"

return _FileNotFoundError(
"""file not found{0}
{1}
Files may be specified as:
* str/bytes: relative or absolute filesystem path or URL, without any colons
other than Windows drive letter or URL schema.
Examples: "rel/file.root", "C:\\abs\\file.root", "http://where/what.root"
* str/bytes: same with an object-within-ROOT path, separated by a colon.
Example: "rel/file.root:tdirectory/ttree"
* pathlib.Path: always interpreted as a filesystem path or URL only (no
object-within-ROOT path), regardless of whether there are any colons.
Examples: Path("rel:/file.root"), Path("/abs/path:stuff.root")
Functions that accept many files (uproot4.iterate, etc.) also allow:
* glob syntax in str/bytes and pathlib.Path.
Examples: Path("rel/*.root"), "/abs/*.root:tdirectory/ttree"
* dict: keys are filesystem paths, values are objects-within-ROOT paths.
Example: {{"/data_v1/*.root": "ttree_v1", "/data_v2/*.root": "ttree_v2"}}
* already-open TTree objects.
* iterables of the above.
""".format(
message, repr(files)
)
)


def memory_size(data, error_message=None):
"""
Regularizes strings like '## kB' and plain integer number of bytes to
Expand Down
Loading

0 comments on commit ee60836

Please sign in to comment.