Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

factorize interpretation #106

Open
wants to merge 60 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
60 commits
Select commit Hold shift + click to select a range
1ce019b
factorize interpretation
aminnj Sep 15, 2021
fadd2aa
make methods more specific
aminnj Sep 15, 2021
e608734
for ci
aminnj Sep 15, 2021
e17bd83
TDirectory support (#107)
aminnj Sep 16, 2021
ef584bc
Bump version number
tamasgal Sep 16, 2021
0939b27
fix
aminnj Sep 16, 2021
a90904f
[skip ci] bump version
aminnj Sep 16, 2021
2a9e4ca
add more basic types (#110)
aminnj Sep 17, 2021
80d819b
iterate on baskets + an optimization (#111)
aminnj Sep 17, 2021
1d06a98
optimize TBasketKey unpacking (#115)
aminnj Sep 27, 2021
d2f639c
default byte for fIOFeatures (#117)
aminnj Sep 27, 2021
1a81634
bump
Moelf Sep 27, 2021
514187d
basket cluster iteration utilities (#118)
aminnj Sep 28, 2021
4a9c9b3
match DataFrames behavior for `show` (#121)
aminnj Oct 1, 2021
fe7edd9
add interface function (#124)
aminnj Oct 2, 2021
6c48138
lower bound for PrettyTables (#125)
aminnj Oct 3, 2021
4f6e62d
optimize and return VoV (#122)
aminnj Oct 4, 2021
2334368
add broadcasting fusion (#126)
Moelf Oct 4, 2021
20ddaa7
Update Project.toml
Moelf Oct 4, 2021
776eee7
avoid intermediate array for lz4/zlib (#128)
aminnj Oct 5, 2021
961b1d1
test zstd compression (#129)
aminnj Oct 5, 2021
fbd798c
Update Project.toml
Moelf Oct 5, 2021
4a284bf
custom html repr (#132)
aminnj Oct 7, 2021
0d1ed21
add lazy Chain/Vcat for Tree (#131)
Moelf Oct 8, 2021
0409b7c
remove unused
Moelf Oct 8, 2021
8c2c452
fix compat
Moelf Oct 8, 2021
4d526ee
remove ill-defined method
Moelf Oct 8, 2021
ef0bae8
performance bug fix from Polyester
Moelf Oct 8, 2021
934b8dc
remove Polyester (#134)
Moelf Oct 12, 2021
3cf9cd5
add LazyTree(path) methods
Moelf Oct 21, 2021
972e71d
use LibDeflate for zlib (#137)
aminnj Nov 27, 2021
29e206f
Update Project.toml
Moelf Nov 27, 2021
fe44d2b
use safe api for LibDeflate (#138)
aminnj Nov 28, 2021
47ba3bf
Update ci.yml
Moelf Dec 1, 2021
2d028ce
Fix gitlab ci (#141)
tamasgal Jan 20, 2022
d41491d
Improve thread handling in tests (#144)
tamasgal Feb 7, 2022
c309eb7
faster vcat (#152)
Moelf Feb 23, 2022
39eb35b
Bump version number
tamasgal Feb 24, 2022
cc39f3e
fix stacktrace too long (#149)
Moelf Feb 25, 2022
d35aa9c
reduce number of read during `ROOTFile` and basket unpacking, use mor…
Moelf Mar 2, 2022
e60d17f
bump version
Moelf Mar 2, 2022
f6a0d4a
improve LV and add a debug for compression type
Moelf Mar 11, 2022
00e7815
fix split branches (#155)
Moelf Mar 15, 2022
f6339cc
bump version
Moelf Mar 15, 2022
e822e5d
fix windows `id -u` (#158)
Moelf Apr 5, 2022
03614f4
Update Project.toml
Moelf Apr 5, 2022
4df552e
Fix URLs (#159)
tamasgal Apr 5, 2022
b05997f
add TLeafS (#161)
Moelf Apr 7, 2022
e5746a0
Change the URL of deploydocs to the new one
tamasgal Apr 8, 2022
494f191
Fix error when file is not existent (#164)
tamasgal Apr 25, 2022
353b5b0
Bump version
tamasgal Apr 25, 2022
9b42eb8
C-style arrays (#166)
tamasgal Apr 29, 2022
5a70053
Update Project.toml
Moelf Apr 29, 2022
44e1351
Normalize branch name (#156)
Moelf May 19, 2022
23d87ed
xrootd read support (#150)
Moelf Jun 1, 2022
247f65e
JOSS Paper (#135)
tamasgal Jun 3, 2022
54aea8b
add DOI
Moelf Jun 3, 2022
9358bd5
Update README.md
Moelf Jun 3, 2022
fe28a44
Add JOSS badge
tamasgal Jun 3, 2022
d9fd45a
Merge branch 'master' into factorize-interpretation
Moelf Jun 23, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions src/custom.jl
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ function interped_data(rawdata, rawoffsets, ::Type{Vector{LVF64}}, ::Type{Offset
offset .+= 1
VectorOfVectors(real_data, offset)
end
function interped_data(rawdata, rawoffsets, ::Type{LVF64}, ::Type{J}) where {T, J <: JaggType}
function interped_data(rawdata, rawoffsets, ::Type{LVF64}, ::Type{Nojagg})
# even with rawoffsets, we know each TLV is destinied to be 64 bytes
[
reinterpret(LVF64, x) for x in Base.Iterators.partition(rawdata, 64)
Expand All @@ -114,7 +114,10 @@ end
function readtype(io::IO, T::Type{_KM3NETDAQHit})
T(readtype(io, Int32), read(io, UInt8), read(io, Int32), read(io, UInt8))
end
function interped_data(rawdata, rawoffsets, ::Type{Vector{_KM3NETDAQHit}}, ::Type{J}) where {T, J <: UnROOT.JaggType}
function interped_data(rawdata, rawoffsets, ::Type{Vector{_KM3NETDAQHit}}, ::Type{Nojagg})
UnROOT.splitup(rawdata, rawoffsets, _KM3NETDAQHit, skipbytes=10)
end
function interped_data(rawdata, rawoffsets, ::Type{Vector{_KM3NETDAQHit}}, ::Type{Offsetjagg})
UnROOT.splitup(rawdata, rawoffsets, _KM3NETDAQHit, skipbytes=10)
end

Expand All @@ -137,7 +140,10 @@ function readtype(io::IO, T::Type{_KM3NETDAQTriggeredHit})
T(dom_id, channel_id, tdc, tot, trigger_mask)
end

function UnROOT.interped_data(rawdata, rawoffsets, ::Type{Vector{_KM3NETDAQTriggeredHit}}, ::Type{J}) where {T, J <: UnROOT.JaggType}
function UnROOT.interped_data(rawdata, rawoffsets, ::Type{Vector{_KM3NETDAQTriggeredHit}}, ::Type{Nojagg})
UnROOT.splitup(rawdata, rawoffsets, _KM3NETDAQTriggeredHit, skipbytes=10)
end
function UnROOT.interped_data(rawdata, rawoffsets, ::Type{Vector{_KM3NETDAQTriggeredHit}}, ::Type{Offsetjagg})
UnROOT.splitup(rawdata, rawoffsets, _KM3NETDAQTriggeredHit, skipbytes=10)
end

Expand Down Expand Up @@ -169,6 +175,6 @@ function readtype(io::IO, T::Type{_KM3NETDAQEventHeader})
T(detector_id, run, frame_index, UTC_seconds, UTC_16nanosecondcycles, trigger_counter, trigger_mask, overlays)
end

function UnROOT.interped_data(rawdata, rawoffsets, ::Type{_KM3NETDAQEventHeader}, ::Type{J}) where {T, J <: UnROOT.JaggType}
function UnROOT.interped_data(rawdata, rawoffsets, ::Type{_KM3NETDAQEventHeader}, ::Type{Nojagg})
UnROOT.splitup(rawdata, rawoffsets, _KM3NETDAQEventHeader, jagged=false)
end
116 changes: 59 additions & 57 deletions src/root.jl
Original file line number Diff line number Diff line change
Expand Up @@ -215,66 +215,68 @@ function interped_data(rawdata, rawoffsets, ::Type{Bool}, ::Type{Nojagg})
# specialized case to get Vector{Bool} instead of BitVector
return map(ntoh,reinterpret(Bool, rawdata))
end
function interped_data(rawdata, rawoffsets, ::Type{T}, ::Type{J}) where {T, J<:JaggType}
# there are two possibility, one is the leaf is just normal leaf but the title has "[...]" in it
# magic offsets, seems to be common for a lot of types, see auto.py in uproot3
# only needs when the jaggedness comes from TLeafElements, not needed when
# the jaggedness comes from having "[]" in TLeaf's title
# the other is where we need to auto detector T bsaed on class name
# we want the fundamental type as `reinterpret` will create vector
if J === Nojagg
return ntoh.(reinterpret(T, rawdata))
elseif J === Offsetjaggjagg # the branch is doubly jagged
jagg_offset = 10
subT = eltype(eltype(T))
out = VectorOfVectors(T(), Int32[1])
@views for i in 1:(length(rawoffsets)-1)
flat = rawdata[(rawoffsets[i]+1+jagg_offset:rawoffsets[i+1])]
row = VectorOfVectors{subT}()
cursor = 1
while cursor < length(flat)
n = ntoh(reinterpret(Int32, flat[cursor:cursor+sizeof(Int32)-1])[1])
cursor += sizeof(Int32)
b = ntoh.(reinterpret(subT, flat[cursor:cursor+n*sizeof(subT)-1]))
cursor += n*sizeof(subT)
push!(row, b)
end
push!(out, row)
end
return out
else # the branch is singly jagged
# for each "event", the index range is `offsets[i] + jagg_offset + 1` to `offsets[i+1]`
# this is why we need to append `rawoffsets` in the `readbranchraw()` call
# when you use this range to index `rawdata`, you will get raw bytes belong to each event
# Say your real data is Int32 and you see 8 bytes after indexing, then this event has [num1, num2] as real data
_size = sizeof(eltype(T))
if J === Offsetjagg
jagg_offset = 10
dp = 0 # book keeping for copy_to!
lr = length(rawoffsets)
offset = Vector{Int32}(undef, lr)
offset[1] = 0
@views @inbounds for i in 1:lr-1
start = rawoffsets[i]+jagg_offset+1
stop = rawoffsets[i+1]
l = stop-start+1
if l > 0
unsafe_copyto!(rawdata, dp+1, rawdata, start, l)
dp += l
offset[i+1] = offset[i] + l
else
# when we have an empty [] in jagged basket
offset[i+1] = offset[i]
end
end
resize!(rawdata, dp)
function interped_data(rawdata, rawoffsets, ::Type{T}, ::Type{Nojagg}) where T
return ntoh.(reinterpret(T, rawdata))
end
# there are two possibility, one is the leaf is just normal leaf but the title has "[...]" in it
# magic offsets, seems to be common for a lot of types, see auto.py in uproot3
# only needs when the jaggedness comes from TLeafElements, not needed when
# the jaggedness comes from having "[]" in TLeaf's title
# the other is where we need to auto detector T bsaed on class name
# we want the fundamental type as `reinterpret` will create vector
function interped_data(rawdata, rawoffsets, ::Type{T}, ::Type{Nooffsetjagg}) where T
_size = sizeof(eltype(T))
real_data = ntoh.(reinterpret(T, rawdata))
rawoffsets .= (rawoffsets .÷ _size) .+ 1
return VectorOfVectors(real_data, rawoffsets, ArraysOfArrays.no_consistency_checks)
end
function interped_data(rawdata, rawoffsets, ::Type{T}, ::Type{Offsetjagg}) where T
# for each "event", the index range is `offsets[i] + jagg_offset + 1` to `offsets[i+1]`
# this is why we need to append `rawoffsets` in the `readbranchraw()` call
# when you use this range to index `rawdata`, you will get raw bytes belong to each event
# Say your real data is Int32 and you see 8 bytes after indexing, then this event has [num1, num2] as real data
_size = sizeof(eltype(T))
jagg_offset = 10
dp = 0 # book keeping for copy_to!
lr = length(rawoffsets)
offset = Vector{Int32}(undef, lr)
offset[1] = 0
@views @inbounds for i in 1:lr-1
start = rawoffsets[i]+jagg_offset+1
stop = rawoffsets[i+1]
l = stop-start+1
if l > 0
unsafe_copyto!(rawdata, dp+1, rawdata, start, l)
dp += l
offset[i+1] = offset[i] + l
else
offset = rawoffsets
# when we have an empty [] in jagged basket
offset[i+1] = offset[i]
end
end
resize!(rawdata, dp)
real_data = ntoh.(reinterpret(T, rawdata))
offset .= (offset .÷ _size) .+ 1
return VectorOfVectors(real_data, offset, ArraysOfArrays.no_consistency_checks)
end
function interped_data(rawdata, rawoffsets, ::Type{T}, ::Type{Offsetjaggjagg}) where T
jagg_offset = 10
subT = eltype(eltype(T))
out = VectorOfVectors(T(), Int32[1])
@views for i in 1:(length(rawoffsets)-1)
flat = rawdata[(rawoffsets[i]+1+jagg_offset:rawoffsets[i+1])]
row = VectorOfVectors{subT}()
cursor = 1
while cursor < length(flat)
n = ntoh(reinterpret(Int32, flat[cursor:cursor+sizeof(Int32)-1])[1])
cursor += sizeof(Int32)
b = ntoh.(reinterpret(subT, flat[cursor:cursor+n*sizeof(subT)-1]))
cursor += n*sizeof(subT)
push!(row, b)
end
real_data = ntoh.(reinterpret(T, rawdata))
offset .= (offset .÷ _size) .+ 1
return VectorOfVectors(real_data, offset, ArraysOfArrays.no_consistency_checks)
push!(out, row)
end
return out
end

function _normalize_ftype(fType)
Expand Down
2 changes: 1 addition & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ end
@testset "Branch filtering" begin
# Branch selection behavior: if not regex, require exact name match
treebranches = ["Muon_pt", "Muon_eta", "Muon_phi", "Muon_charge", "Muon_ptErr",
"Muon_", "_pt", "Muon.pt"]
"Muon_", "_pt", "Muon.pt"]
_m(s::AbstractString) = isequal(s)
_m(r::Regex) = Base.Fix1(occursin, r)
filter_branches(selected) = Set(mapreduce(b->filter(_m(b), treebranches), ∪, selected))
Expand Down