From 1ac0cd5683bbc4469aadbaddac21fc0cac9264cb Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Thu, 2 Jul 2020 18:21:25 -0500 Subject: [PATCH] Last data type/interpretation features before moving on to iteration. (#35) * All TTrees in scikit-hep-testdata can be _listed_ except issue475b.root:Event/Sim/SimHeader. * Now _all_ TTrees in scikit-hep-testdata can be listed. * Fix flake8. * To read the TH2D file, we need TBranchObject. * More stability. * Renamed stl_container/STLContainer -> container/Container. * Fix Cursor origin for entry_start != 0. * Include type hint in AsDynamic. * Adding missing offsets; it's not really necessary to look at TIOFeatures, though one could. * Names in zipped arrays are split at _ . / only. --- tests/test_0018-array-fetching-interface.py | 216 +++++++++---------- tests/test_0029-more-string-types.py | 8 +- tests/test_0031-test-stl-containers.py | 8 +- tests/test_0035-datatype-generality.py | 55 +++++ uproot4/__init__.py | 25 ++- uproot4/_util.py | 6 +- uproot4/behaviors/TBranch.py | 51 +++-- uproot4/const.py | 2 +- uproot4/{stl_containers.py => containers.py} | 116 +++++++--- uproot4/deserialization.py | 29 +-- uproot4/interpretation/identify.py | 205 +++++++++--------- uproot4/interpretation/jagged.py | 19 +- uproot4/interpretation/library.py | 4 +- uproot4/interpretation/objects.py | 28 ++- uproot4/model.py | 28 ++- uproot4/models/TArray.py | 10 +- uproot4/models/TAtt.py | 20 +- uproot4/models/TBasket.py | 12 ++ uproot4/models/TBranch.py | 25 +++ uproot4/models/TNamed.py | 12 +- uproot4/models/TString.py | 2 +- uproot4/reading.py | 23 +- uproot4/source/cursor.py | 7 +- uproot4/streamers.py | 32 +-- 24 files changed, 599 insertions(+), 344 deletions(-) create mode 100644 tests/test_0035-datatype-generality.py rename uproot4/{stl_containers.py => containers.py} (89%) diff --git a/tests/test_0018-array-fetching-interface.py b/tests/test_0018-array-fetching-interface.py index b48ecd5b2..6016e59f0 100644 --- a/tests/test_0018-array-fetching-interface.py +++ b/tests/test_0018-array-fetching-interface.py @@ -339,152 +339,152 @@ def test_arrays(): how="zip", ) assert result.tolist() == [ - {"i4": -15, "f4": -14.899999618530273, "A": []}, - {"i4": -14, "f4": -13.899999618530273, "A": [{"i4": -15, "f8": -15.0}]}, + {"i4": -15, "f4": -14.899999618530273, "jagged0": []}, + {"i4": -14, "f4": -13.899999618530273, "jagged0": [{"Ai4": -15, "Af8": -15.0}]}, { "i4": -13, "f4": -12.899999618530273, - "A": [{"i4": -15, "f8": -15.0}, {"i4": -13, "f8": -13.9}], + "jagged0": [{"Ai4": -15, "Af8": -15.0}, {"Ai4": -13, "Af8": -13.9}], }, { "i4": -12, "f4": -11.899999618530273, - "A": [ - {"i4": -15, "f8": -15.0}, - {"i4": -13, "f8": -13.9}, - {"i4": -11, "f8": -12.8}, + "jagged0": [ + {"Ai4": -15, "Af8": -15.0}, + {"Ai4": -13, "Af8": -13.9}, + {"Ai4": -11, "Af8": -12.8}, ], }, { "i4": -11, "f4": -10.899999618530273, - "A": [ - {"i4": -15, "f8": -15.0}, - {"i4": -13, "f8": -13.9}, - {"i4": -11, "f8": -12.8}, - {"i4": -9, "f8": -11.7}, + "jagged0": [ + {"Ai4": -15, "Af8": -15.0}, + {"Ai4": -13, "Af8": -13.9}, + {"Ai4": -11, "Af8": -12.8}, + {"Ai4": -9, "Af8": -11.7}, ], }, - {"i4": -10, "f4": -9.899999618530273, "A": []}, - {"i4": -9, "f4": -8.899999618530273, "A": [{"i4": -10, "f8": -10.0}]}, + {"i4": -10, "f4": -9.899999618530273, "jagged0": []}, + {"i4": -9, "f4": -8.899999618530273, "jagged0": [{"Ai4": -10, "Af8": -10.0}]}, { "i4": -8, "f4": -7.900000095367432, - "A": [{"i4": -10, "f8": -10.0}, {"i4": -8, "f8": -8.9}], + "jagged0": [{"Ai4": -10, "Af8": -10.0}, {"Ai4": -8, "Af8": -8.9}], }, { "i4": -7, "f4": -6.900000095367432, - "A": [ - {"i4": -10, "f8": -10.0}, - {"i4": -8, "f8": -8.9}, - {"i4": -6, "f8": -7.8}, + "jagged0": [ + {"Ai4": -10, "Af8": -10.0}, + {"Ai4": -8, "Af8": -8.9}, + {"Ai4": -6, "Af8": -7.8}, ], }, { "i4": -6, "f4": -5.900000095367432, - "A": [ - {"i4": -10, "f8": -10.0}, - {"i4": -8, "f8": -8.9}, - {"i4": -6, "f8": -7.8}, - {"i4": -4, "f8": -6.7}, + "jagged0": [ + {"Ai4": -10, "Af8": -10.0}, + {"Ai4": -8, "Af8": -8.9}, + {"Ai4": -6, "Af8": -7.8}, + {"Ai4": -4, "Af8": -6.7}, ], }, - {"i4": -5, "f4": -4.900000095367432, "A": []}, - {"i4": -4, "f4": -3.9000000953674316, "A": [{"i4": -5, "f8": -5.0}]}, + {"i4": -5, "f4": -4.900000095367432, "jagged0": []}, + {"i4": -4, "f4": -3.9000000953674316, "jagged0": [{"Ai4": -5, "Af8": -5.0}]}, { "i4": -3, "f4": -2.9000000953674316, - "A": [{"i4": -5, "f8": -5.0}, {"i4": -3, "f8": -3.9}], + "jagged0": [{"Ai4": -5, "Af8": -5.0}, {"Ai4": -3, "Af8": -3.9}], }, { "i4": -2, "f4": -1.899999976158142, - "A": [ - {"i4": -5, "f8": -5.0}, - {"i4": -3, "f8": -3.9}, - {"i4": -1, "f8": -2.8}, + "jagged0": [ + {"Ai4": -5, "Af8": -5.0}, + {"Ai4": -3, "Af8": -3.9}, + {"Ai4": -1, "Af8": -2.8}, ], }, { "i4": -1, "f4": -0.8999999761581421, - "A": [ - {"i4": -5, "f8": -5.0}, - {"i4": -3, "f8": -3.9}, - {"i4": -1, "f8": -2.8}, - {"i4": 1, "f8": -1.7}, + "jagged0": [ + {"Ai4": -5, "Af8": -5.0}, + {"Ai4": -3, "Af8": -3.9}, + {"Ai4": -1, "Af8": -2.8}, + {"Ai4": 1, "Af8": -1.7}, ], }, - {"i4": 0, "f4": 0.10000000149011612, "A": []}, - {"i4": 1, "f4": 1.100000023841858, "A": [{"i4": 0, "f8": 0.0}]}, + {"i4": 0, "f4": 0.10000000149011612, "jagged0": []}, + {"i4": 1, "f4": 1.100000023841858, "jagged0": [{"Ai4": 0, "Af8": 0.0}]}, { "i4": 2, "f4": 2.0999999046325684, - "A": [{"i4": 0, "f8": 0.0}, {"i4": 2, "f8": 1.1}], + "jagged0": [{"Ai4": 0, "Af8": 0.0}, {"Ai4": 2, "Af8": 1.1}], }, { "i4": 3, "f4": 3.0999999046325684, - "A": [{"i4": 0, "f8": 0.0}, {"i4": 2, "f8": 1.1}, {"i4": 4, "f8": 2.2}], + "jagged0": [{"Ai4": 0, "Af8": 0.0}, {"Ai4": 2, "Af8": 1.1}, {"Ai4": 4, "Af8": 2.2}], }, { "i4": 4, "f4": 4.099999904632568, - "A": [ - {"i4": 0, "f8": 0.0}, - {"i4": 2, "f8": 1.1}, - {"i4": 4, "f8": 2.2}, - {"i4": 6, "f8": 3.3}, + "jagged0": [ + {"Ai4": 0, "Af8": 0.0}, + {"Ai4": 2, "Af8": 1.1}, + {"Ai4": 4, "Af8": 2.2}, + {"Ai4": 6, "Af8": 3.3}, ], }, - {"i4": 5, "f4": 5.099999904632568, "A": []}, - {"i4": 6, "f4": 6.099999904632568, "A": [{"i4": 5, "f8": 5.0}]}, + {"i4": 5, "f4": 5.099999904632568, "jagged0": []}, + {"i4": 6, "f4": 6.099999904632568, "jagged0": [{"Ai4": 5, "Af8": 5.0}]}, { "i4": 7, "f4": 7.099999904632568, - "A": [{"i4": 5, "f8": 5.0}, {"i4": 7, "f8": 6.1}], + "jagged0": [{"Ai4": 5, "Af8": 5.0}, {"Ai4": 7, "Af8": 6.1}], }, { "i4": 8, "f4": 8.100000381469727, - "A": [{"i4": 5, "f8": 5.0}, {"i4": 7, "f8": 6.1}, {"i4": 9, "f8": 7.2}], + "jagged0": [{"Ai4": 5, "Af8": 5.0}, {"Ai4": 7, "Af8": 6.1}, {"Ai4": 9, "Af8": 7.2}], }, { "i4": 9, "f4": 9.100000381469727, - "A": [ - {"i4": 5, "f8": 5.0}, - {"i4": 7, "f8": 6.1}, - {"i4": 9, "f8": 7.2}, - {"i4": 11, "f8": 8.3}, + "jagged0": [ + {"Ai4": 5, "Af8": 5.0}, + {"Ai4": 7, "Af8": 6.1}, + {"Ai4": 9, "Af8": 7.2}, + {"Ai4": 11, "Af8": 8.3}, ], }, - {"i4": 10, "f4": 10.100000381469727, "A": []}, - {"i4": 11, "f4": 11.100000381469727, "A": [{"i4": 10, "f8": 10.0}]}, + {"i4": 10, "f4": 10.100000381469727, "jagged0": []}, + {"i4": 11, "f4": 11.100000381469727, "jagged0": [{"Ai4": 10, "Af8": 10.0}]}, { "i4": 12, "f4": 12.100000381469727, - "A": [{"i4": 10, "f8": 10.0}, {"i4": 12, "f8": 11.1}], + "jagged0": [{"Ai4": 10, "Af8": 10.0}, {"Ai4": 12, "Af8": 11.1}], }, { "i4": 13, "f4": 13.100000381469727, - "A": [ - {"i4": 10, "f8": 10.0}, - {"i4": 12, "f8": 11.1}, - {"i4": 14, "f8": 12.2}, + "jagged0": [ + {"Ai4": 10, "Af8": 10.0}, + {"Ai4": 12, "Af8": 11.1}, + {"Ai4": 14, "Af8": 12.2}, ], }, { "i4": 14, "f4": 14.100000381469727, - "A": [ - {"i4": 10, "f8": 10.0}, - {"i4": 12, "f8": 11.1}, - {"i4": 14, "f8": 12.2}, - {"i4": 16, "f8": 13.3}, + "jagged0": [ + {"Ai4": 10, "Af8": 10.0}, + {"Ai4": 12, "Af8": 11.1}, + {"Ai4": 14, "Af8": 12.2}, + {"Ai4": 16, "Af8": 13.3}, ], }, ] @@ -496,100 +496,100 @@ def test_arrays(): how="zip", ) assert result.tolist() == [ - {"i4": -10, "f4": -9.899999618530273, "A": []}, - {"i4": -9, "f4": -8.899999618530273, "A": [{"i4": -10, "f8": -10.0}]}, + {"i4": -10, "f4": -9.899999618530273, "jagged0": []}, + {"i4": -9, "f4": -8.899999618530273, "jagged0": [{"Ai4": -10, "Af8": -10.0}]}, { "i4": -8, "f4": -7.900000095367432, - "A": [{"i4": -10, "f8": -10.0}, {"i4": -8, "f8": -8.9}], + "jagged0": [{"Ai4": -10, "Af8": -10.0}, {"Ai4": -8, "Af8": -8.9}], }, { "i4": -7, "f4": -6.900000095367432, - "A": [ - {"i4": -10, "f8": -10.0}, - {"i4": -8, "f8": -8.9}, - {"i4": -6, "f8": -7.8}, + "jagged0": [ + {"Ai4": -10, "Af8": -10.0}, + {"Ai4": -8, "Af8": -8.9}, + {"Ai4": -6, "Af8": -7.8}, ], }, { "i4": -6, "f4": -5.900000095367432, - "A": [ - {"i4": -10, "f8": -10.0}, - {"i4": -8, "f8": -8.9}, - {"i4": -6, "f8": -7.8}, - {"i4": -4, "f8": -6.7}, + "jagged0": [ + {"Ai4": -10, "Af8": -10.0}, + {"Ai4": -8, "Af8": -8.9}, + {"Ai4": -6, "Af8": -7.8}, + {"Ai4": -4, "Af8": -6.7}, ], }, - {"i4": -5, "f4": -4.900000095367432, "A": []}, - {"i4": -4, "f4": -3.9000000953674316, "A": [{"i4": -5, "f8": -5.0}]}, + {"i4": -5, "f4": -4.900000095367432, "jagged0": []}, + {"i4": -4, "f4": -3.9000000953674316, "jagged0": [{"Ai4": -5, "Af8": -5.0}]}, { "i4": -3, "f4": -2.9000000953674316, - "A": [{"i4": -5, "f8": -5.0}, {"i4": -3, "f8": -3.9}], + "jagged0": [{"Ai4": -5, "Af8": -5.0}, {"Ai4": -3, "Af8": -3.9}], }, { "i4": -2, "f4": -1.899999976158142, - "A": [ - {"i4": -5, "f8": -5.0}, - {"i4": -3, "f8": -3.9}, - {"i4": -1, "f8": -2.8}, + "jagged0": [ + {"Ai4": -5, "Af8": -5.0}, + {"Ai4": -3, "Af8": -3.9}, + {"Ai4": -1, "Af8": -2.8}, ], }, { "i4": -1, "f4": -0.8999999761581421, - "A": [ - {"i4": -5, "f8": -5.0}, - {"i4": -3, "f8": -3.9}, - {"i4": -1, "f8": -2.8}, - {"i4": 1, "f8": -1.7}, + "jagged0": [ + {"Ai4": -5, "Af8": -5.0}, + {"Ai4": -3, "Af8": -3.9}, + {"Ai4": -1, "Af8": -2.8}, + {"Ai4": 1, "Af8": -1.7}, ], }, - {"i4": 0, "f4": 0.10000000149011612, "A": []}, - {"i4": 1, "f4": 1.100000023841858, "A": [{"i4": 0, "f8": 0.0}]}, + {"i4": 0, "f4": 0.10000000149011612, "jagged0": []}, + {"i4": 1, "f4": 1.100000023841858, "jagged0": [{"Ai4": 0, "Af8": 0.0}]}, { "i4": 2, "f4": 2.0999999046325684, - "A": [{"i4": 0, "f8": 0.0}, {"i4": 2, "f8": 1.1}], + "jagged0": [{"Ai4": 0, "Af8": 0.0}, {"Ai4": 2, "Af8": 1.1}], }, { "i4": 3, "f4": 3.0999999046325684, - "A": [{"i4": 0, "f8": 0.0}, {"i4": 2, "f8": 1.1}, {"i4": 4, "f8": 2.2}], + "jagged0": [{"Ai4": 0, "Af8": 0.0}, {"Ai4": 2, "Af8": 1.1}, {"Ai4": 4, "Af8": 2.2}], }, { "i4": 4, "f4": 4.099999904632568, - "A": [ - {"i4": 0, "f8": 0.0}, - {"i4": 2, "f8": 1.1}, - {"i4": 4, "f8": 2.2}, - {"i4": 6, "f8": 3.3}, + "jagged0": [ + {"Ai4": 0, "Af8": 0.0}, + {"Ai4": 2, "Af8": 1.1}, + {"Ai4": 4, "Af8": 2.2}, + {"Ai4": 6, "Af8": 3.3}, ], }, - {"i4": 5, "f4": 5.099999904632568, "A": []}, - {"i4": 6, "f4": 6.099999904632568, "A": [{"i4": 5, "f8": 5.0}]}, + {"i4": 5, "f4": 5.099999904632568, "jagged0": []}, + {"i4": 6, "f4": 6.099999904632568, "jagged0": [{"Ai4": 5, "Af8": 5.0}]}, { "i4": 7, "f4": 7.099999904632568, - "A": [{"i4": 5, "f8": 5.0}, {"i4": 7, "f8": 6.1}], + "jagged0": [{"Ai4": 5, "Af8": 5.0}, {"Ai4": 7, "Af8": 6.1}], }, { "i4": 8, "f4": 8.100000381469727, - "A": [{"i4": 5, "f8": 5.0}, {"i4": 7, "f8": 6.1}, {"i4": 9, "f8": 7.2}], + "jagged0": [{"Ai4": 5, "Af8": 5.0}, {"Ai4": 7, "Af8": 6.1}, {"Ai4": 9, "Af8": 7.2}], }, { "i4": 9, "f4": 9.100000381469727, - "A": [ - {"i4": 5, "f8": 5.0}, - {"i4": 7, "f8": 6.1}, - {"i4": 9, "f8": 7.2}, - {"i4": 11, "f8": 8.3}, + "jagged0": [ + {"Ai4": 5, "Af8": 5.0}, + {"Ai4": 7, "Af8": 6.1}, + {"Ai4": 9, "Af8": 7.2}, + {"Ai4": 11, "Af8": 8.3}, ], }, ] diff --git a/tests/test_0029-more-string-types.py b/tests/test_0029-more-string-types.py index 0067aaf9f..06e23c9f3 100644 --- a/tests/test_0029-more-string-types.py +++ b/tests/test_0029-more-string-types.py @@ -11,10 +11,10 @@ import uproot4 from uproot4.interpretation.identify import parse_typename -from uproot4.stl_containers import AsString -from uproot4.stl_containers import AsVector -from uproot4.stl_containers import AsSet -from uproot4.stl_containers import AsMap +from uproot4.containers import AsString +from uproot4.containers import AsVector +from uproot4.containers import AsSet +from uproot4.containers import AsMap def test_parse_typename(): diff --git a/tests/test_0031-test-stl-containers.py b/tests/test_0031-test-stl-containers.py index f932c1158..83cb77f6d 100644 --- a/tests/test_0031-test-stl-containers.py +++ b/tests/test_0031-test-stl-containers.py @@ -13,10 +13,10 @@ from uproot4.interpretation.numerical import AsDtype from uproot4.interpretation.jagged import AsJagged from uproot4.interpretation.objects import AsObjects -from uproot4.stl_containers import AsString -from uproot4.stl_containers import AsVector -from uproot4.stl_containers import AsSet -from uproot4.stl_containers import AsMap +from uproot4.containers import AsString +from uproot4.containers import AsVector +from uproot4.containers import AsSet +from uproot4.containers import AsMap def test_typename(): diff --git a/tests/test_0035-datatype-generality.py b/tests/test_0035-datatype-generality.py new file mode 100644 index 000000000..b9afc9069 --- /dev/null +++ b/tests/test_0035-datatype-generality.py @@ -0,0 +1,55 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/uproot4/blob/master/LICENSE + +from __future__ import absolute_import + +import numpy +import pytest +import skhep_testdata + +import uproot4 + + +def test_TH2_in_ttree(): + with uproot4.open(skhep_testdata.data_path("uproot-issue-tbranch-of-th2.root"))[ + "g4SimHits/tree" + ] as tree: + assert ( + tree["histogram"].array(library="np")[0].member("fXaxis").member("fName") + == "xaxis" + ) + + +def test_iofeatures_offsets(): + with uproot4.open(skhep_testdata.data_path("uproot-small-dy-withoffsets.root"))[ + "tree/Muon_pt" + ] as withoffsets: + muonpt1 = withoffsets.array(library="np", entry_start=10, entry_stop=20) + assert [x.tolist() for x in muonpt1] == [ + [51.685970306396484], + [35.227813720703125, 23.073759078979492, 32.921417236328125], + [8.922308921813965, 4.368383407592773], + [], + [], + [], + [], + [51.9132194519043], + [31.930095672607422], + [], + ] + + with uproot4.open(skhep_testdata.data_path("uproot-small-dy-nooffsets.root"))[ + "tree/Muon_pt" + ] as nooffsets: + muonpt2 = nooffsets.array(library="np", entry_start=10, entry_stop=20) + assert [x.tolist() for x in muonpt2] == [ + [51.685970306396484], + [35.227813720703125, 23.073759078979492, 32.921417236328125], + [8.922308921813965, 4.368383407592773], + [], + [], + [], + [], + [51.9132194519043], + [31.930095672607422], + [], + ] diff --git a/uproot4/__init__.py b/uproot4/__init__.py index 31bbbbde6..ea29f887c 100644 --- a/uproot4/__init__.py +++ b/uproot4/__init__.py @@ -34,17 +34,7 @@ from uproot4.model import classname_encode from uproot4.model import has_class_named from uproot4.model import class_named -from uproot4.model import bootstrap_classnames -from uproot4.model import bootstrap_classes - -from uproot4.stl_containers import STLVector -from uproot4.stl_containers import STLSet -from uproot4.stl_containers import STLMap - -import uproot4.interpretation -import uproot4.interpretation.library - -default_library = "ak" +from uproot4.model import reset_classes import uproot4.models.TObject import uproot4.models.TString @@ -61,11 +51,20 @@ import uproot4.models.TBranch import uproot4.models.TLeaf import uproot4.models.TBasket +import uproot4.models.RNTuple + +from uproot4.containers import STLVector +from uproot4.containers import STLSet +from uproot4.containers import STLMap + +import uproot4.interpretation +import uproot4.interpretation.library + +default_library = "ak" + from uproot4.behaviors.TTree import TTree from uproot4.behaviors.TBranch import TBranch -import uproot4.models.RNTuple - import pkgutil import uproot4.behaviors diff --git a/uproot4/_util.py b/uproot4/_util.py index 8a7ee75eb..864366eb9 100644 --- a/uproot4/_util.py +++ b/uproot4/_util.py @@ -271,7 +271,9 @@ def awkward_form(model, file, header=False, tobject_header=True): model = model.newbyteorder("=") if model not in _primitive_awkward_form: - if model == numpy.dtype(numpy.int8): + if model == numpy.dtype(numpy.bool_) or model == numpy.dtype(numpy.bool): + _primitive_awkward_form[model] = awkward1.forms.Form.fromjson('"bool"') + elif model == numpy.dtype(numpy.int8): _primitive_awkward_form[model] = awkward1.forms.Form.fromjson('"int8"') elif model == numpy.dtype(numpy.uint8): _primitive_awkward_form[model] = awkward1.forms.Form.fromjson('"uint8"') @@ -302,7 +304,7 @@ def awkward_form(model, file, header=False, tobject_header=True): '"float64"' ) else: - raise AssertionError(model) + raise AssertionError("{0}: {1}".format(repr(model), type(model))) return _primitive_awkward_form[model] diff --git a/uproot4/behaviors/TBranch.py b/uproot4/behaviors/TBranch.py index 51af03413..c5cc98111 100644 --- a/uproot4/behaviors/TBranch.py +++ b/uproot4/behaviors/TBranch.py @@ -23,7 +23,7 @@ import uproot4.source.cursor import uproot4.streamers -import uproot4.stl_containers +import uproot4.containers import uproot4.interpretation import uproot4.interpretation.numerical import uproot4.interpretation.jagged @@ -903,8 +903,6 @@ def postprocess(self, chunk, cursor, context): self._lookup = {} self._interpretation = None - self._count_branch = None - self._count_leaf = None self._typename = None self._streamer = None self._context = dict(context) @@ -1086,15 +1084,18 @@ def interpretation(self): @property def count_branch(self): - if self._count_branch is None: - raise NotImplementedError - return self._count_branch + leaf = self.count_leaf + if leaf is None: + return None + else: + return leaf.parent @property def count_leaf(self): - if self._count_leaf is None: - raise NotImplementedError - return self._count_leaf + leaves = self.member("fLeaves") + if len(leaves) != 1: + return None + return leaves[0].member("fLeafCount") @property def num_entries(self): @@ -1106,10 +1107,12 @@ def num_baskets(self): def __repr__(self): if len(self) == 0: - return "".format(repr(self.name), id(self)) + return "<{0} {1} at 0x{2:012x}>".format( + self.classname, repr(self.name), id(self) + ) else: - return "".format( - repr(self.name), len(self), id(self) + return "<{0} {1} ({2} subbranches) at 0x{3:012x}>".format( + self.classname, repr(self.name), len(self), id(self) ) def basket_chunk_bytes(self, basket_num): @@ -1203,6 +1206,30 @@ def entries_to_ranges_or_baskets(self, entry_start, entry_stop): start = stop return out + def debug( + self, + entry, + skip_bytes=None, + limit_bytes=None, + dtype=None, + offset=0, + stream=sys.stdout, + ): + interpretation = uproot4.interpretation.jagged.AsJagged( + uproot4.interpretation.numerical.AsDtype("u1") + ) + data = self.array( + interpretation, entry_start=entry, entry_stop=entry + 1, library="np" + )[0] + chunk = uproot4.source.chunk.Chunk.wrap(self._file.source, data) + if skip_bytes is None: + cursor = uproot4.source.cursor.Cursor(0) + else: + cursor = uproot4.source.cursor.Cursor(skip_bytes) + cursor.debug( + chunk, limit_bytes=limit_bytes, dtype=dtype, offset=offset, stream=stream + ) + def array( self, interpretation=None, diff --git a/uproot4/const.py b/uproot4/const.py index b22c4fad9..4cb3c18e3 100644 --- a/uproot4/const.py +++ b/uproot4/const.py @@ -102,4 +102,4 @@ ############# IOFeatures -kGenerateOffsetMap = 1 +kGenerateOffsetMap = numpy.uint8(1) diff --git a/uproot4/stl_containers.py b/uproot4/containers.py similarity index 89% rename from uproot4/stl_containers.py rename to uproot4/containers.py index 7f802d97d..86c770940 100644 --- a/uproot4/stl_containers.py +++ b/uproot4/containers.py @@ -60,7 +60,7 @@ def _read_nested(model, length, chunk, cursor, context, file, parent, header=Tru else: values = numpy.empty(length, dtype=_stl_object_type) - if isinstance(model, AsSTLContainer): + if isinstance(model, AsContainer): for i in range(length): values[i] = model.read( chunk, cursor, context, file, parent, header=header @@ -118,7 +118,7 @@ def _str_with_ellipsis(tostring, length, lbracket, rbracket, limit): return lbracket + "".join(left) + "..., " + "".join(right) + rbracket -class AsSTLContainer(object): +class AsContainer(object): @property def header(self): return self._header @@ -158,7 +158,7 @@ def __ne__(self, other): return not self == other -class STLContainer(object): +class Container(object): def __ne__(self, other): return not self == other @@ -166,7 +166,7 @@ def tolist(self): raise AssertionError -class AsFIXME(AsSTLContainer): +class AsFIXME(AsContainer): def __init__(self, message): self.message = message @@ -185,7 +185,7 @@ def typename(self): return "unknown" def awkward_form(self, file, header=False, tobject_header=True): - raise uproot4.deserialization.CannotBeAwkward(self.message) + raise uproot4.interpretation.objects.CannotBeAwkward(self.message) def read(self, chunk, cursor, context, file, parent, header=True): raise uproot4.deserialization.DeserializationError( @@ -199,7 +199,7 @@ def __eq__(self, other): return False -class AsString(AsSTLContainer): +class AsString(AsContainer): def __init__(self, header, length_bytes="1-5", typename=None): self.header = header if length_bytes in ("1-5", "4"): @@ -284,7 +284,7 @@ def __eq__(self, other): ) -class AsPointer(AsSTLContainer): +class AsPointer(AsContainer): def __init__(self, pointee): self._pointee = pointee @@ -311,7 +311,7 @@ def typename(self): return _content_typename(self._pointee) + "*" def awkward_form(self, file, header=False, tobject_header=True): - raise uproot4.deserialization.CannotBeAwkward("arbitrary pointer") + raise uproot4.interpretation.objects.CannotBeAwkward("arbitrary pointer") def read(self, chunk, cursor, context, file, parent, header=True): return uproot4.deserialization.read_object_any( @@ -325,7 +325,7 @@ def __eq__(self, other): return False -class AsArray(AsSTLContainer): +class AsArray(AsContainer): def __init__(self, header, values): self._header = header self._values = values @@ -335,7 +335,11 @@ def values(self): return self._values def __repr__(self): - return "AsArray({0}, {1})".format(self.header, repr(self._values)) + if isinstance(self._values, type): + values = self._values.__name__ + else: + values = repr(self._values) + return "AsArray({0}, {1})".format(self.header, values) @property def cache_key(self): @@ -365,14 +369,64 @@ def read(self, chunk, cursor, context, file, parent, header=True): else: out = [] while cursor.index < chunk.stop: - out.append(self._values.read(chunk, cursor, context, file, self)) + out.append(self._values.read(chunk, cursor, context, file, parent)) return numpy.array(out, dtype=numpy.dtype(numpy.object)) -class AsVector(AsSTLContainer): +class AsDynamic(AsContainer): + def __init__(self, model=None): + self._model = model + + @property + def model(self): + return self._model + + def __repr__(self): + if self._model is None: + model = "" + elif isinstance(self._model, type): + model = "model=" + self._model.__name__ + else: + model = "model=" + repr(self._model) + return "AsDynamic({0})".format(model) + + @property + def cache_key(self): + if self._model is None: + return "AsDynamic(None)" + else: + return "AsDynamic({0})".format(_content_cache_key(self._model)) + + @property + def typename(self): + if self._model is None: + return "void*" + else: + return _content_typename(self._values) + "*" + + def awkward_form(self, file, header=False, tobject_header=True): + import awkward1 + + if self._model is None: + raise uproot4.interpretation.objects.CannotBeAwkward("dynamic type") + else: + return awkward1.forms.ListOffsetForm( + "i32", + uproot4._util.awkward_form(self._model, file, header, tobject_header), + parameters={"uproot": {"as": "array", "header": self._header}}, + ) + + def read(self, chunk, cursor, context, file, parent, header=True): + classname = cursor.string(chunk, context) + cursor.skip(1) + cls = file.class_named(classname) + return cls.read(chunk, cursor, context, file, parent) + + +class AsVector(AsContainer): def __init__(self, header, values): self.header = header - if isinstance(values, AsSTLContainer): + if isinstance(values, AsContainer): self._values = values elif isinstance(values, type) and issubclass( values, (uproot4.model.Model, uproot4.model.DispatchByVersion) @@ -423,7 +477,9 @@ def read(self, chunk, cursor, context, file, parent, header=True): length = cursor.field(chunk, _stl_container_size, context) - values = _read_nested(self._values, length, chunk, cursor, context, file, self) + values = _read_nested( + self._values, length, chunk, cursor, context, file, parent + ) out = STLVector(values) if self._header and header: @@ -458,7 +514,7 @@ def __eq__(self, other): return False -class STLVector(STLContainer, Sequence): +class STLVector(Container, Sequence): def __init__(self, values): if isinstance(values, types.GeneratorType): values = numpy.asarray(list(values)) @@ -505,15 +561,14 @@ def __eq__(self, other): def tolist(self): return [ - x.tolist() if isinstance(x, (STLContainer, numpy.ndarray)) else x - for x in self + x.tolist() if isinstance(x, (Container, numpy.ndarray)) else x for x in self ] -class AsSet(AsSTLContainer): +class AsSet(AsContainer): def __init__(self, header, keys): self.header = header - if isinstance(keys, AsSTLContainer): + if isinstance(keys, AsContainer): self._keys = keys elif isinstance(keys, type) and issubclass( keys, (uproot4.model.Model, uproot4.model.DispatchByVersion) @@ -565,7 +620,7 @@ def read(self, chunk, cursor, context, file, parent, header=True): length = cursor.field(chunk, _stl_container_size, context) - keys = _read_nested(self._keys, length, chunk, cursor, context, file, self) + keys = _read_nested(self._keys, length, chunk, cursor, context, file, parent) out = STLSet(keys) if self._header and header: @@ -598,7 +653,7 @@ def __eq__(self, other): return False -class STLSet(STLContainer, Set): +class STLSet(Container, Set): def __init__(self, keys): if isinstance(keys, types.GeneratorType): keys = numpy.asarray(list(keys)) @@ -657,28 +712,27 @@ def __eq__(self, other): def tolist(self): return set( - x.tolist() if isinstance(x, (STLContainer, numpy.ndarray)) else x - for x in self + x.tolist() if isinstance(x, (Container, numpy.ndarray)) else x for x in self ) def _has_nested_header(obj): - if isinstance(obj, AsSTLContainer): + if isinstance(obj, AsContainer): return obj.header else: return False -class AsMap(AsSTLContainer): +class AsMap(AsContainer): def __init__(self, header, keys, values): self.header = header - if isinstance(keys, AsSTLContainer): + if isinstance(keys, AsContainer): self._keys = keys else: self._keys = numpy.dtype(keys) - if isinstance(values, AsSTLContainer): + if isinstance(values, AsContainer): self._values = values elif isinstance(values, type) and issubclass( values, (uproot4.model.Model, uproot4.model.DispatchByVersion) @@ -757,13 +811,13 @@ def read(self, chunk, cursor, context, file, parent, header=True): if _has_nested_header(self._keys) and header: cursor.skip(6) keys = _read_nested( - self._keys, length, chunk, cursor, context, file, self, header=False + self._keys, length, chunk, cursor, context, file, parent, header=False ) if _has_nested_header(self._values) and header: cursor.skip(6) values = _read_nested( - self._values, length, chunk, cursor, context, file, self, header=False + self._values, length, chunk, cursor, context, file, parent, header=False ) out = STLMap(keys, values) @@ -811,7 +865,7 @@ def __eq__(self, other): return False -class STLMap(STLContainer, Mapping): +class STLMap(Container, Mapping): @classmethod def from_mapping(cls, mapping): return STLMap(mapping.keys(), mapping.values()) @@ -941,7 +995,7 @@ def tolist(self): out = {} for i in range(len(self)): x = self._values[i] - if isinstance(x, (STLContainer, numpy.ndarray)): + if isinstance(x, (Container, numpy.ndarray)): out[self._keys[i]] = x.tolist() else: out[self._keys[i]] = x diff --git a/uproot4/deserialization.py b/uproot4/deserialization.py index 7ff7d8fa6..cbf2324d8 100644 --- a/uproot4/deserialization.py +++ b/uproot4/deserialization.py @@ -74,7 +74,7 @@ def __str__(self): obj.instance_version, type(obj).__module__, type(obj).__name__, - obj.num_bytes, + "?" if obj.num_bytes is None else obj.num_bytes, ) ) indent = indent + " " @@ -197,7 +197,7 @@ def numbytes_check( if num_bytes is not None: observed = stop_cursor.displacement(start_cursor) if observed != num_bytes: - raise uproot4.deserialization.DeserializationError( + raise DeserializationError( """expected {0} bytes but cursor moved by {1} bytes (through {2})""".format( num_bytes, observed, classname ), @@ -208,23 +208,6 @@ def numbytes_check( ) -# _map_string_string_format1 = struct.Struct(">I") -# def map_long_int(chunk, cursor, context): -# cursor.skip(12) -# size = cursor.field(chunk, _map_string_string_format1, context) -# keys = cursor.array(chunk, size, numpy.dtype(">i8"), context) -# values = cursor.array(chunk, size, numpy.dtype(">i4"), context) -# return dict(zip(keys, values)) -# scope["map_long_int"] = map_long_int - -# def set_long(chunk, cursor, context): -# cursor.skip(6) -# size = cursor.field(chunk, _map_string_string_format1, context) -# values = cursor.array(chunk, size, numpy.dtype(">i8"), context) -# return set(values) -# scope["set_long"] = set_long - - _read_object_any_format1 = struct.Struct(">I") @@ -301,12 +284,16 @@ def read_object_any(chunk, cursor, context, file, parent, as_class=None): in_file = "" else: in_file = "\n\nin file {0}".format(file.file_path) - raise ValueError( + raise DeserializationError( """invalid class-tag reference: {0} Known references: {1}{2}""".format( ref, ", ".join(str(x) for x in cursor.refs), in_file - ) + ), + chunk, + cursor, + context, + file.file_path, ) cls = cursor.refs[ref] # reference class diff --git a/uproot4/interpretation/identify.py b/uproot4/interpretation/identify.py index b574a7ac0..f4a0d3a01 100644 --- a/uproot4/interpretation/identify.py +++ b/uproot4/interpretation/identify.py @@ -11,7 +11,7 @@ import uproot4.interpretation.numerical import uproot4.interpretation.strings import uproot4.interpretation.objects -import uproot4.stl_containers +import uproot4.containers import uproot4.streamers import uproot4._util @@ -177,18 +177,25 @@ def _leaf_to_dtype(leaf): _tokenize_typename_pattern = re.compile( - r"(\b([A-Za-z_][A-Za-z_0-9]*)(\s*::\s*[A-Za-z_][A-Za-z_0-9]*)*\b(\s*\*)*|<|>|,)" + r"(\b([A-Za-z_0-9]+)(\s*::\s*[A-Za-z_][A-Za-z_0-9]*)*\b(\s*\*)*|<|>|,)" ) _simplify_token_1 = re.compile(r"\s*\*") _simplify_token_2 = re.compile(r"\s*::\s*") +_simplify_token_3 = re.compile(r"\s*<\s*") +_simplify_token_4 = re.compile(r"\s*>\s*") def _simplify_token(token, is_token=True): if is_token: - return _simplify_token_2.sub("::", _simplify_token_1.sub("*", token.group(0))) + text = token.group(0) else: - return _simplify_token_2.sub("::", _simplify_token_1.sub("*", token)) + text = token + text = _simplify_token_1.sub("*", text) + text = _simplify_token_2.sub("::", text) + text = _simplify_token_3.sub("<", text) + text = _simplify_token_4.sub(">", text) + return text def _parse_error(pos, typename, file): @@ -237,7 +244,7 @@ def _parse_node(tokens, i, typename, file, quote, header, inner_header): return ( i + 1, _parse_maybe_quote( - 'uproot4.stl_containers.AsArray({0}, numpy.dtype("?"))'.format(header), + 'uproot4.containers.AsArray({0}, numpy.dtype("?"))'.format(header), quote, ), ) @@ -245,7 +252,7 @@ def _parse_node(tokens, i, typename, file, quote, header, inner_header): return ( i + 1, _parse_maybe_quote( - 'uproot4.stl_containers.AsArray({0}, numpy.dtype("?"))'.format(header), + 'uproot4.containers.AsArray({0}, numpy.dtype("?"))'.format(header), quote, ), ) @@ -263,7 +270,7 @@ def _parse_node(tokens, i, typename, file, quote, header, inner_header): return ( i + 1, _parse_maybe_quote( - 'uproot4.stl_containers.AsArray({0}, numpy.dtype("u1"))'.format(header), + 'uproot4.containers.AsArray({0}, numpy.dtype("u1"))'.format(header), quote, ), ) @@ -275,7 +282,7 @@ def _parse_node(tokens, i, typename, file, quote, header, inner_header): return ( i + 2, _parse_maybe_quote( - 'uproot4.stl_containers.AsArray({0}, numpy.dtype("u1"))'.format(header), + 'uproot4.containers.AsArray({0}, numpy.dtype("u1"))'.format(header), quote, ), ) @@ -295,9 +302,7 @@ def _parse_node(tokens, i, typename, file, quote, header, inner_header): return ( i + 1, _parse_maybe_quote( - 'uproot4.stl_containers.AsArray({0}, numpy.dtype(">i2"))'.format( - header - ), + 'uproot4.containers.AsArray({0}, numpy.dtype(">i2"))'.format(header), quote, ), ) @@ -305,9 +310,7 @@ def _parse_node(tokens, i, typename, file, quote, header, inner_header): return ( i + 1, _parse_maybe_quote( - 'uproot4.stl_containers.AsArray({0}, numpy.dtype(">i2"))'.format( - header - ), + 'uproot4.containers.AsArray({0}, numpy.dtype(">i2"))'.format(header), quote, ), ) @@ -315,9 +318,7 @@ def _parse_node(tokens, i, typename, file, quote, header, inner_header): return ( i + 1, _parse_maybe_quote( - 'uproot4.stl_containers.AsArray({0}, numpy.dtype(">u2"))'.format( - header - ), + 'uproot4.containers.AsArray({0}, numpy.dtype(">u2"))'.format(header), quote, ), ) @@ -329,9 +330,7 @@ def _parse_node(tokens, i, typename, file, quote, header, inner_header): return ( i + 2, _parse_maybe_quote( - 'uproot4.stl_containers.AsArray({0}, numpy.dtype(">u2"))'.format( - header - ), + 'uproot4.containers.AsArray({0}, numpy.dtype(">u2"))'.format(header), quote, ), ) @@ -349,9 +348,7 @@ def _parse_node(tokens, i, typename, file, quote, header, inner_header): return ( i + 1, _parse_maybe_quote( - 'uproot4.stl_containers.AsArray({0}, numpy.dtype(">i4"))'.format( - header - ), + 'uproot4.containers.AsArray({0}, numpy.dtype(">i4"))'.format(header), quote, ), ) @@ -359,9 +356,7 @@ def _parse_node(tokens, i, typename, file, quote, header, inner_header): return ( i + 1, _parse_maybe_quote( - 'uproot4.stl_containers.AsArray({0}, numpy.dtype(">i4"))'.format( - header - ), + 'uproot4.containers.AsArray({0}, numpy.dtype(">i4"))'.format(header), quote, ), ) @@ -369,9 +364,7 @@ def _parse_node(tokens, i, typename, file, quote, header, inner_header): return ( i + 1, _parse_maybe_quote( - 'uproot4.stl_containers.AsArray({0}, numpy.dtype(">u4"))'.format( - header - ), + 'uproot4.containers.AsArray({0}, numpy.dtype(">u4"))'.format(header), quote, ), ) @@ -383,9 +376,7 @@ def _parse_node(tokens, i, typename, file, quote, header, inner_header): return ( i + 2, _parse_maybe_quote( - 'uproot4.stl_containers.AsArray({0}, numpy.dtype(">u4"))'.format( - header - ), + 'uproot4.containers.AsArray({0}, numpy.dtype(">u4"))'.format(header), quote, ), ) @@ -407,9 +398,7 @@ def _parse_node(tokens, i, typename, file, quote, header, inner_header): return ( i + 1, _parse_maybe_quote( - 'uproot4.stl_containers.AsArray({0}, numpy.dtype(">i8"))'.format( - header - ), + 'uproot4.containers.AsArray({0}, numpy.dtype(">i8"))'.format(header), quote, ), ) @@ -417,9 +406,7 @@ def _parse_node(tokens, i, typename, file, quote, header, inner_header): return ( i + 1, _parse_maybe_quote( - 'uproot4.stl_containers.AsArray({0}, numpy.dtype(">i8"))'.format( - header - ), + 'uproot4.containers.AsArray({0}, numpy.dtype(">i8"))'.format(header), quote, ), ) @@ -427,9 +414,7 @@ def _parse_node(tokens, i, typename, file, quote, header, inner_header): return ( i + 1, _parse_maybe_quote( - 'uproot4.stl_containers.AsArray({0}, numpy.dtype(">i8"))'.format( - header - ), + 'uproot4.containers.AsArray({0}, numpy.dtype(">i8"))'.format(header), quote, ), ) @@ -437,9 +422,7 @@ def _parse_node(tokens, i, typename, file, quote, header, inner_header): return ( i + 1, _parse_maybe_quote( - 'uproot4.stl_containers.AsArray({0}, numpy.dtype(">u8"))'.format( - header - ), + 'uproot4.containers.AsArray({0}, numpy.dtype(">u8"))'.format(header), quote, ), ) @@ -447,9 +430,7 @@ def _parse_node(tokens, i, typename, file, quote, header, inner_header): return ( i + 1, _parse_maybe_quote( - 'uproot4.stl_containers.AsArray({0}, numpy.dtype(">u8"))'.format( - header - ), + 'uproot4.containers.AsArray({0}, numpy.dtype(">u8"))'.format(header), quote, ), ) @@ -461,9 +442,7 @@ def _parse_node(tokens, i, typename, file, quote, header, inner_header): return ( i + 2, _parse_maybe_quote( - 'uproot4.stl_containers.AsArray({0}, numpy.dtype(">u8"))'.format( - header - ), + 'uproot4.containers.AsArray({0}, numpy.dtype(">u8"))'.format(header), quote, ), ) @@ -477,9 +456,7 @@ def _parse_node(tokens, i, typename, file, quote, header, inner_header): return ( i + 1, _parse_maybe_quote( - 'uproot4.stl_containers.AsArray({0}, numpy.dtype(">f4"))'.format( - header - ), + 'uproot4.containers.AsArray({0}, numpy.dtype(">f4"))'.format(header), quote, ), ) @@ -487,9 +464,7 @@ def _parse_node(tokens, i, typename, file, quote, header, inner_header): return ( i + 1, _parse_maybe_quote( - 'uproot4.stl_containers.AsArray({0}, numpy.dtype(">f4"))'.format( - header - ), + 'uproot4.containers.AsArray({0}, numpy.dtype(">f4"))'.format(header), quote, ), ) @@ -503,9 +478,7 @@ def _parse_node(tokens, i, typename, file, quote, header, inner_header): return ( i + 1, _parse_maybe_quote( - 'uproot4.stl_containers.AsArray({0}, numpy.dtype(">f8"))'.format( - header - ), + 'uproot4.containers.AsArray({0}, numpy.dtype(">f8"))'.format(header), quote, ), ) @@ -513,9 +486,7 @@ def _parse_node(tokens, i, typename, file, quote, header, inner_header): return ( i + 1, _parse_maybe_quote( - 'uproot4.stl_containers.AsArray({0}, numpy.dtype(">f8"))'.format( - header - ), + 'uproot4.containers.AsArray({0}, numpy.dtype(">f8"))'.format(header), quote, ), ) @@ -524,7 +495,7 @@ def _parse_node(tokens, i, typename, file, quote, header, inner_header): return ( i + 1, _parse_maybe_quote( - 'uproot4.stl_containers.AsFIXME("Float16_t in another context")', quote + 'uproot4.containers.AsFIXME("Float16_t in another context")', quote ), ) @@ -532,8 +503,8 @@ def _parse_node(tokens, i, typename, file, quote, header, inner_header): return ( i + 1, _parse_maybe_quote( - "uproot4.stl_containers.AsArray({0}, " - 'uproot4.stl_containers.AsFIXME("Float16_t in array"))'.format(header), + "uproot4.containers.AsArray({0}, " + 'uproot4.containers.AsFIXME("Float16_t in array"))'.format(header), quote, ), ) @@ -542,7 +513,7 @@ def _parse_node(tokens, i, typename, file, quote, header, inner_header): return ( i + 1, _parse_maybe_quote( - 'uproot4.stl_containers.AsFIXME("Double32_t in another context")', quote + 'uproot4.containers.AsFIXME("Double32_t in another context")', quote ), ) @@ -550,8 +521,8 @@ def _parse_node(tokens, i, typename, file, quote, header, inner_header): return ( i + 1, _parse_maybe_quote( - "uproot4.stl_containers.AsArray({0}, " - 'uproot4.stl_containers.AsFIXME("Double32_t in array ' + "uproot4.containers.AsArray({0}, " + 'uproot4.containers.AsFIXME("Double32_t in array ' '(note: Event.root fClosestDistance has an example)"))'.format(header), quote, ), @@ -561,21 +532,21 @@ def _parse_node(tokens, i, typename, file, quote, header, inner_header): return ( i + 1, _parse_maybe_quote( - "uproot4.stl_containers.AsString({0})".format(header), quote + "uproot4.containers.AsString({0})".format(header), quote ), ) elif tokens[i].group(0) == "TString": return ( i + 1, _parse_maybe_quote( - "uproot4.stl_containers.AsString(False, typename='TString')", quote + "uproot4.containers.AsString(False, typename='TString')", quote ), ) elif _simplify_token(tokens[i]) == "char*": return ( i + 1, _parse_maybe_quote( - "uproot4.stl_containers.AsString(False, length_bytes='4', typename='char*')", + "uproot4.containers.AsString(False, length_bytes='4', typename='char*')", quote, ), ) @@ -587,7 +558,7 @@ def _parse_node(tokens, i, typename, file, quote, header, inner_header): return ( i + 2, _parse_maybe_quote( - "uproot4.stl_containers.AsString(False, length_bytes='4', typename='char*')", + "uproot4.containers.AsString(False, length_bytes='4', typename='char*')", quote, ), ) @@ -603,7 +574,7 @@ def _parse_node(tokens, i, typename, file, quote, header, inner_header): return ( i + 4, _parse_maybe_quote( - 'uproot4.stl_containers.AsFIXME("std::bitset<{0}>")'.format(num_bits), + 'uproot4.containers.AsFIXME("std::bitset<{0}>")'.format(num_bits), quote, ), ) @@ -617,10 +588,10 @@ def _parse_node(tokens, i, typename, file, quote, header, inner_header): if quote: return ( i + 1, - "uproot4.stl_containers.AsVector({0}, {1})".format(header, values), + "uproot4.containers.AsVector({0}, {1})".format(header, values), ) else: - return i + 1, uproot4.stl_containers.AsVector(header, values) + return i + 1, uproot4.containers.AsVector(header, values) elif tokens[i].group(0) == "set" or _simplify_token(tokens[i]) == "std::set": _parse_expect("<", tokens, i + 1, typename, file) @@ -629,9 +600,9 @@ def _parse_node(tokens, i, typename, file, quote, header, inner_header): ) _parse_expect(">", tokens, i, typename, file) if quote: - return i + 1, "uproot4.stl_containers.AsSet({0}, {1})".format(header, keys) + return i + 1, "uproot4.containers.AsSet({0}, {1})".format(header, keys) else: - return i + 1, uproot4.stl_containers.AsSet(header, keys) + return i + 1, uproot4.containers.AsSet(header, keys) elif tokens[i].group(0) == "map" or _simplify_token(tokens[i]) == "std::map": _parse_expect("<", tokens, i + 1, typename, file) @@ -646,23 +617,20 @@ def _parse_node(tokens, i, typename, file, quote, header, inner_header): if quote: return ( i + 1, - "uproot4.stl_containers.AsMap({0}, {1}, {2})".format( - header, keys, values - ), + "uproot4.containers.AsMap({0}, {1}, {2})".format(header, keys, values), ) else: - return i + 1, uproot4.stl_containers.AsMap(header, keys, values) + return i + 1, uproot4.containers.AsMap(header, keys, values) else: start, stop = tokens[i].span() if has2 and tokens[i + 1].group(0) == "<": i, keys = _parse_node( - tokens, i + 1, typename, file, quote, inner_header, inner_header + tokens, i + 2, typename, file, quote, inner_header, inner_header ) - _parse_expect(">", tokens, i + 1, typename, file) - stop = tokens[i + 1].span()[1] - i += 1 + _parse_expect(">", tokens, i, typename, file) + stop = tokens[i].span()[1] classname = _simplify_token(typename[start:stop], is_token=False) @@ -674,15 +642,15 @@ def _parse_node(tokens, i, typename, file, quote, header, inner_header): if quote: cls = "c({0})".format(repr(classname)) for x in range(pointers): - cls = "uproot4.stl_containers.AsPointer({0})".format(cls) + cls = "uproot4.containers.AsPointer({0})".format(cls) elif file is None: cls = uproot4.classes[classname] for x in range(pointers): - cls = uproot4.stl_containers.AsPointer(cls) + cls = uproot4.containers.AsPointer(cls) else: cls = file.class_named(classname) for x in range(pointers): - cls = uproot4.stl_containers.AsPointer(cls) + cls = uproot4.containers.AsPointer(cls) return i + 1, cls @@ -705,7 +673,7 @@ def parse_typename( or _simplify_token(tokens[0]) == "std::string" ) ): - i, out = 1, _parse_maybe_quote("uproot4.stl_containers.AsString(False)", quote) + i, out = 1, _parse_maybe_quote("uproot4.containers.AsString(False)", quote) else: i, out = _parse_node( @@ -962,7 +930,37 @@ def _float16_or_double32(branch, context, leaf, is_float16, dims): return uproot4.interpretation.numerical.AsFloat16(low, high, num_bits, dims) -def interpretation_of(branch, context): +def interpretation_of(branch, context, simplify=True): + if branch.classname == "TBranchObject": + if branch.top_level and branch.has_member("fClassName"): + model_cls = parse_typename( + branch.member("fClassName"), + file=branch.file, + outer_header=True, + inner_header=False, + string_header=False, + ) + return uproot4.interpretation.objects.AsObjects( + uproot4.containers.AsDynamic(model_cls), branch + ) + + if branch.streamer is not None: + model_cls = parse_typename( + branch.streamer.typename, + file=branch.file, + outer_header=True, + inner_header=False, + string_header=True, + ) + + return uproot4.interpretation.objects.AsObjects( + uproot4.containers.AsDynamic(model_cls), branch + ) + + return uproot4.interpretation.objects.AsObjects( + uproot4.containers.AsDynamic(), branch + ) + dims, is_jagged = _from_leaves(branch, context) try: @@ -1057,9 +1055,11 @@ def interpretation_of(branch, context): string_header=False, ) - return uproot4.interpretation.objects.AsObjects( - model_cls, branch - ).simplify() + out = uproot4.interpretation.objects.AsObjects(model_cls, branch) + if simplify: + return out.simplify() + else: + return out if branch.streamer is not None: model_cls = parse_typename( @@ -1069,9 +1069,22 @@ def interpretation_of(branch, context): inner_header=False, string_header=True, ) - return uproot4.interpretation.objects.AsObjects( - model_cls, branch - ).simplify() + + # kObjectp/kAnyp (as opposed to kObjectP/kAnyP) are stored inline + if isinstance( + model_cls, uproot4.containers.AsPointer + ) and branch.streamer.member("fType") in ( + uproot4.const.kObjectp, + uproot4.const.kAnyp, + ): + while isinstance(model_cls, uproot4.containers.AsPointer): + model_cls = model_cls.pointee + + out = uproot4.interpretation.objects.AsObjects(model_cls, branch) + if simplify: + return out.simplify() + else: + return out raise UnknownInterpretation( "none of the rules matched", branch.file.file_path, branch.object_path, diff --git a/uproot4/interpretation/jagged.py b/uproot4/interpretation/jagged.py index 8e06e7375..fb37596dc 100644 --- a/uproot4/interpretation/jagged.py +++ b/uproot4/interpretation/jagged.py @@ -142,10 +142,25 @@ def basket_array(self, data, byte_offsets, basket, branch, context, cursor_offse cursor_offset=cursor_offset, ) - assert basket.byte_offsets is not None + if byte_offsets is None: + counts = basket.counts + if counts is None: + raise uproot4.deserialization.DeserializationError( + "missing offsets (and missing count branch) for jagged array", + None, + None, + context, + branch.file.file_path, + ) + else: + itemsize = self._content.from_dtype.itemsize + numpy.multiply(counts, itemsize, out=counts) + byte_offsets = numpy.empty(len(counts) + 1, dtype=numpy.int32) + byte_offsets[0] = 0 + numpy.cumsum(counts, out=byte_offsets[1:]) if self._header_bytes == 0: - offsets = fast_divide(basket.byte_offsets, self._content.itemsize) + offsets = fast_divide(byte_offsets, self._content.itemsize) content = self._content.basket_array( data, None, basket, branch, context, cursor_offset ) diff --git a/uproot4/interpretation/library.py b/uproot4/interpretation/library.py index 4b5fd9dde..c4be071fc 100644 --- a/uproot4/interpretation/library.py +++ b/uproot4/interpretation/library.py @@ -10,7 +10,7 @@ import uproot4.interpretation.jagged import uproot4.interpretation.strings import uproot4.interpretation.objects -import uproot4.stl_containers +import uproot4.containers class Library(object): @@ -372,7 +372,7 @@ def group(self, arrays, expression_context, how): cut = len(jagged[0]) for name in jagged: cut = min(cut, len(name)) - while cut > 0 and name[:cut] != jagged[0][:cut]: + while cut > 0 and (name[:cut] != jagged[0][:cut] or name[cut - 1] not in ("_", ".", "/")): cut -= 1 if cut == 0: break diff --git a/uproot4/interpretation/objects.py b/uproot4/interpretation/objects.py index ba646e4ee..b618b6577 100644 --- a/uproot4/interpretation/objects.py +++ b/uproot4/interpretation/objects.py @@ -8,7 +8,7 @@ import uproot4.interpretation.strings import uproot4.interpretation.jagged import uproot4.interpretation.numerical -import uproot4.stl_containers +import uproot4.containers import uproot4.model import uproot4.source.chunk import uproot4.source.cursor @@ -63,16 +63,15 @@ def cursor_offset(self): def __len__(self): return len(self._byte_offsets) - 1 - def chunk(self, entry_num): - byte_start = self._byte_offsets[entry_num] - byte_stop = self._byte_offsets[entry_num + 1] - data = self._byte_content[byte_start:byte_stop] - return uproot4.source.chunk.Chunk.wrap(self._branch.file.source, data) - def __getitem__(self, where): if uproot4._util.isint(where): - chunk = self.chunk(where) - cursor = uproot4.source.cursor.Cursor(0, origin=-self._cursor_offset) + byte_start = self._byte_offsets[where] + byte_stop = self._byte_offsets[where + 1] + data = self._byte_content[byte_start:byte_stop] + chunk = uproot4.source.chunk.Chunk.wrap(self._branch.file.source, data) + cursor = uproot4.source.cursor.Cursor( + 0, origin=-(byte_start + self._cursor_offset) + ) return self._model.read( chunk, cursor, self._context, self._branch.file, self._branch ) @@ -133,12 +132,12 @@ def numpy_dtype(self): @property def cache_key(self): - content_key = uproot4.stl_containers._content_cache_key(self._model) + content_key = uproot4.containers._content_cache_key(self._model) return "{0}({1})".format(type(self).__name__, content_key) @property def typename(self): - if isinstance(self._model, uproot4.stl_containers.AsSTLContainer): + if isinstance(self._model, uproot4.containers.AsContainer): return self._model.typename else: return uproot4.model.classname_decode(self._model.__name__)[0] @@ -258,7 +257,7 @@ def simplify(self): except CannotBeStrided: pass - if isinstance(self._model, uproot4.stl_containers.AsString): + if isinstance(self._model, uproot4.containers.AsString): header_bytes = 0 if self._model.header: header_bytes = 6 @@ -270,12 +269,11 @@ def simplify(self): ) if isinstance( - self._model, - (uproot4.stl_containers.AsArray, uproot4.stl_containers.AsVector), + self._model, (uproot4.containers.AsArray, uproot4.containers.AsVector), ): if not self._model.header: header_bytes = 0 - elif isinstance(self._model, uproot4.stl_containers.AsArray): + elif isinstance(self._model, uproot4.containers.AsArray): header_bytes = 1 else: header_bytes = 10 diff --git a/uproot4/model.py b/uproot4/model.py index d88fcb418..ffdf69c36 100644 --- a/uproot4/model.py +++ b/uproot4/model.py @@ -11,7 +11,6 @@ import uproot4._util import uproot4.interpretation.objects - bootstrap_classnames = [ "TStreamerInfo", "TStreamerElement", @@ -46,6 +45,33 @@ def bootstrap_classes(): return custom_classes +def reset_classes(): + if uproot4._util.py2: + reload = __builtins__["reload"] + else: + from importlib import reload + + uproot4.classes = {} + uproot4.unknown_classes = {} + + reload(uproot4.streamers) + reload(uproot4.models.TObject) + reload(uproot4.models.TString) + reload(uproot4.models.TArray) + reload(uproot4.models.TNamed) + reload(uproot4.models.TList) + reload(uproot4.models.THashList) + reload(uproot4.models.TObjArray) + reload(uproot4.models.TObjString) + reload(uproot4.models.TAtt) + reload(uproot4.models.TRef) + reload(uproot4.models.TTree) + reload(uproot4.models.TBranch) + reload(uproot4.models.TLeaf) + reload(uproot4.models.TBasket) + reload(uproot4.models.RNTuple) + + class Model(object): class_streamer = None diff --git a/uproot4/models/TArray.py b/uproot4/models/TArray.py index 741c9c17f..075a94876 100644 --- a/uproot4/models/TArray.py +++ b/uproot4/models/TArray.py @@ -42,7 +42,13 @@ def __len__(self): def __repr__(self): return "<{0} {1} at 0x{2:012x}>".format( uproot4.model.classname_pretty(self.classname, self.class_version), - str(self._data), + numpy.array2string( + self._data, + max_line_width=numpy.inf, + separator=", ", + formatter={"float": lambda x: "%g" % x}, + threshold=6, + ), id(self), ) @@ -55,7 +61,7 @@ def awkward_form(cls, file, header=False, tobject_header=True): return awkward1.forms.ListOffsetForm( "i32", - uproot4._util.awkward_form(cls.dtype), + uproot4._util.awkward_form(cls.dtype, file, header, tobject_header), parameters={"uproot": {"as": "TArray"}}, ) diff --git a/uproot4/models/TAtt.py b/uproot4/models/TAtt.py index 8104503c4..c886fb9a8 100644 --- a/uproot4/models/TAtt.py +++ b/uproot4/models/TAtt.py @@ -42,7 +42,9 @@ def awkward_form(cls, file, header=False, tobject_header=True): contents = {} if header: - contents["@num_bytes"] = uproot4._util.awkward_form(numpy.dtype("u4")) + contents["@num_bytes"] = uproot4._util.awkward_form( + numpy.dtype("u4"), file, header, tobject_header + ) contents["@instance_version"] = uproot4._util.awkward_form( numpy.dtype("u2"), file, header, tobject_header ) @@ -94,7 +96,9 @@ def awkward_form(cls, file, header=False, tobject_header=True): contents = {} if header: - contents["@num_bytes"] = uproot4._util.awkward_form(numpy.dtype("u4")) + contents["@num_bytes"] = uproot4._util.awkward_form( + numpy.dtype("u4"), file, header, tobject_header + ) contents["@instance_version"] = uproot4._util.awkward_form( numpy.dtype("u2"), file, header, tobject_header ) @@ -147,7 +151,9 @@ def awkward_form(cls, file, header=False, tobject_header=True): contents = {} if header: - contents["@num_bytes"] = uproot4._util.awkward_form(numpy.dtype("u4")) + contents["@num_bytes"] = uproot4._util.awkward_form( + numpy.dtype("u4"), file, header, tobject_header + ) contents["@instance_version"] = uproot4._util.awkward_form( numpy.dtype("u2"), file, header, tobject_header ) @@ -193,7 +199,9 @@ def awkward_form(cls, file, header=False, tobject_header=True): contents = {} if header: - contents["@num_bytes"] = uproot4._util.awkward_form(numpy.dtype("u4")) + contents["@num_bytes"] = uproot4._util.awkward_form( + numpy.dtype("u4"), file, header, tobject_header + ) contents["@instance_version"] = uproot4._util.awkward_form( numpy.dtype("u2"), file, header, tobject_header ) @@ -245,7 +253,9 @@ def awkward_form(cls, file, header=False, tobject_header=True): contents = {} if header: - contents["@num_bytes"] = uproot4._util.awkward_form(numpy.dtype("u4")) + contents["@num_bytes"] = uproot4._util.awkward_form( + numpy.dtype("u4"), file, header, tobject_header + ) contents["@instance_version"] = uproot4._util.awkward_form( numpy.dtype("u2"), file, header, tobject_header ) diff --git a/uproot4/models/TBasket.py b/uproot4/models/TBasket.py index f36035d1c..fecc0b106 100644 --- a/uproot4/models/TBasket.py +++ b/uproot4/models/TBasket.py @@ -10,6 +10,7 @@ import uproot4.deserialization import uproot4.compression import uproot4.behaviors.TBranch +import uproot4.const _tbasket_format1 = struct.Struct(">ihiIhh") @@ -159,6 +160,17 @@ def data(self): def byte_offsets(self): return self._byte_offsets + @property + def counts(self): + count_branch = self._parent.count_branch + if count_branch is not None: + entry_offsets = count_branch.entry_offsets + entry_start = entry_offsets[self._basket_num] + entry_stop = entry_offsets[self._basket_num + 1] + return count_branch.array( + entry_start=entry_start, entry_stop=entry_stop, library="np" + ) + def array(self, interpretation=None): if interpretation is None: interpretation = self._parent.interpretation diff --git a/uproot4/models/TBranch.py b/uproot4/models/TBranch.py index aba055616..e509df59c 100644 --- a/uproot4/models/TBranch.py +++ b/uproot4/models/TBranch.py @@ -622,5 +622,30 @@ class Model_TBranchElement(uproot4.model.DispatchByVersion): } +class Model_TBranchObject_v1( + uproot4.behaviors.TBranch.TBranch, uproot4.model.VersionedModel +): + def read_members(self, chunk, cursor, context): + self._bases.append( + self.class_named("TBranch", 13).read( + chunk, cursor, context, self._file, self._parent + ) + ) + self._members["fClassName"] = self.class_named("TString").read( + chunk, cursor, context, self._file, self + ) + + base_names_versions = [("TBranch", 13)] + member_names = ["fClassName"] + class_flags = {} + + +class Model_TBranchObject(uproot4.model.DispatchByVersion): + known_versions = { + 1: Model_TBranchObject_v1, + } + + uproot4.classes["TBranch"] = Model_TBranch uproot4.classes["TBranchElement"] = Model_TBranchElement +uproot4.classes["TBranchObject"] = Model_TBranchObject diff --git a/uproot4/models/TNamed.py b/uproot4/models/TNamed.py index 4c0b1cf12..4d5eea6d6 100644 --- a/uproot4/models/TNamed.py +++ b/uproot4/models/TNamed.py @@ -6,7 +6,7 @@ import uproot4.model import uproot4.models.TObject -import uproot4.stl_containers +import uproot4.containers class Model_TNamed(uproot4.model.Model): @@ -34,14 +34,16 @@ def awkward_form(cls, file, header=False, tobject_header=True): contents = {} if header: - contents["@num_bytes"] = uproot4._util.awkward_form(numpy.dtype("u4")) + contents["@num_bytes"] = uproot4._util.awkward_form( + numpy.dtype("u4"), file, header, tobject_header + ) contents["@instance_version"] = uproot4._util.awkward_form( - numpy.dtype("u2") + numpy.dtype("u2"), file, header, tobject_header ) - contents["fName"] = uproot4.stl_containers.AsString( + contents["fName"] = uproot4.containers.AsString( False, typename="TString" ).awkward_form(file, header, tobject_header) - contents["fTitle"] = uproot4.stl_containers.AsString( + contents["fTitle"] = uproot4.containers.AsString( False, typename="TString" ).awkward_form(file, header, tobject_header) return awkward1.forms.RecordForm(contents, parameters={"__record__": "TNamed"},) diff --git a/uproot4/models/TString.py b/uproot4/models/TString.py index 694c7cd5a..24d1013df 100644 --- a/uproot4/models/TString.py +++ b/uproot4/models/TString.py @@ -35,7 +35,7 @@ def tojson(self): @classmethod def awkward_form(cls, file, header=False, tobject_header=True): - return uproot4.stl_containers.AsString(False, typename="TString").awkward_form( + return uproot4.containers.AsString(False, typename="TString").awkward_form( file, header, tobject_header ) diff --git a/uproot4/reading.py b/uproot4/reading.py index d6d455123..62bd3ae17 100644 --- a/uproot4/reading.py +++ b/uproot4/reading.py @@ -475,7 +475,26 @@ def class_named(self, classname, version=None): if version is not None and issubclass(cls, uproot4.model.DispatchByVersion): if not uproot4._util.isint(version): - version = self.streamer_named(classname, version).class_version + streamer = self.streamer_named(classname, version) + if streamer is not None: + version = streamer.class_version + elif version == "max" and len(cls.known_versions) != 0: + version = max(cls.known_versions) + elif version == "min" and len(cls.known_versions) != 0: + version = min(cls.known_versions) + else: + unknown_cls = uproot4.unknown_classes.get(classname) + if unknown_cls is None: + unknown_cls = uproot4._util.new_class( + uproot4.model.classname_encode( + classname, version, unknown=True + ), + (uproot4.model.UnknownClassVersion,), + {}, + ) + uproot4.unknown_classes[classname] = unknown_cls + return unknown_cls + versioned_cls = cls.class_of_version(version) if versioned_cls is None: cls = cls.new_class(self, version) @@ -844,7 +863,7 @@ def get(self): if breadcrumbs is None or all( breadcrumb_cls.classname in uproot4.model.bootstrap_classnames - or isinstance(breadcrumb_cls, uproot4.stl_containers.AsSTLContainer) + or isinstance(breadcrumb_cls, uproot4.containers.AsContainer) or getattr(breadcrumb_cls.class_streamer, "file_uuid", None) == self._file.uuid for breadcrumb_cls in breadcrumbs diff --git a/uproot4/source/cursor.py b/uproot4/source/cursor.py index c546d65b7..1fcc4d332 100644 --- a/uproot4/source/cursor.py +++ b/uproot4/source/cursor.py @@ -343,7 +343,12 @@ def classname(self, chunk, context, move=True): if move: self._index += local_stop - out = remainder[: local_stop - 1].tostring() + out = remainder[: local_stop - 1] + if hasattr(out, "tobytes"): + out = out.tobytes() + else: + out = out.tostring() + if uproot4._util.py2: return out else: diff --git a/uproot4/streamers.py b/uproot4/streamers.py index 89e1c1c46..624d33b46 100644 --- a/uproot4/streamers.py +++ b/uproot4/streamers.py @@ -235,7 +235,7 @@ def class_code(self): fields = [] formats = [] dtypes = [] - stl_containers = [] + containers = [] base_names_versions = [] member_names = [] class_flags = {} @@ -251,7 +251,7 @@ def class_code(self): fields, formats, dtypes, - stl_containers, + containers, base_names_versions, member_names, class_flags, @@ -284,7 +284,7 @@ def class_code(self): for i, dt in enumerate(dtypes): class_data.append(" _dtype{0} = {1}".format(i, dt)) - for i, stl in enumerate(stl_containers): + for i, stl in enumerate(containers): class_data.append(" _stl_container{0} = {1}".format(i, stl)) class_data.append( @@ -436,7 +436,7 @@ def class_code( fields, formats, dtypes, - stl_containers, + containers, base_names_versions, member_names, class_flags, @@ -458,7 +458,7 @@ def class_code( ) awkward_form.append( - " raise uproot4.deserialization.CannotBeAwkward(" + " raise uproot4.interpretation.objects.CannotBeAwkward(" "'not implemented: class members defined by {0} of type {1} in member " "{2} of class {3}')".format( type(self).__name__, self.typename, self.name, streamerinfo.name @@ -512,7 +512,7 @@ def class_code( fields, formats, dtypes, - stl_containers, + containers, base_names_versions, member_names, class_flags, @@ -570,7 +570,7 @@ def class_code( fields, formats, dtypes, - stl_containers, + containers, base_names_versions, member_names, class_flags, @@ -697,7 +697,7 @@ def class_code( fields, formats, dtypes, - stl_containers, + containers, base_names_versions, member_names, class_flags, @@ -845,7 +845,7 @@ def class_code( fields, formats, dtypes, - stl_containers, + containers, base_names_versions, member_names, class_flags, @@ -930,7 +930,7 @@ def class_code( fields, formats, dtypes, - stl_containers, + containers, base_names_versions, member_names, class_flags, @@ -945,22 +945,22 @@ def class_code( read_members.append( " self._members[{0}] = self._stl_container{1}.read(" "chunk, cursor, context, self._file, self)" - "".format(repr(self.name), len(stl_containers)) + "".format(repr(self.name), len(containers)) ) strided_interpretation.append( " members.append(({0}, cls._stl_container{1}." "strided_interpretation(file, header, tobject_header)))".format( - repr(self.name), len(stl_containers) + repr(self.name), len(containers) ) ) awkward_form.append( " contents[{0}] = cls._stl_container{1}.awkward_form(file, " - "header, tobject_header)".format(repr(self.name), len(stl_containers)) + "header, tobject_header)".format(repr(self.name), len(containers)) ) - stl_containers.append(stl_container) + containers.append(stl_container) member_names.append(self.name) @@ -990,7 +990,7 @@ def class_code( fields, formats, dtypes, - stl_containers, + containers, base_names_versions, member_names, class_flags, @@ -1078,7 +1078,7 @@ def class_code( fields, formats, dtypes, - stl_containers, + containers, base_names_versions, member_names, class_flags,