From daac667707a459ef0e18b6e211817b12c1447153 Mon Sep 17 00:00:00 2001 From: Nathan Zimmerberg <39104088+nhz2@users.noreply.github.com> Date: Sun, 17 Dec 2023 14:20:10 -0500 Subject: [PATCH] A predicate function can be used to load a specific array or group. (#45) * add predicate for loading * add tests for predicate * bump version --- Project.toml | 2 +- src/loading.jl | 24 ++++++++++------- test/test_simple-usage.jl | 54 ++++++++++++++++++++++++++------------- 3 files changed, 52 insertions(+), 28 deletions(-) diff --git a/Project.toml b/Project.toml index cd1b955..f05d55a 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "SmallZarrGroups" uuid = "d423b6e5-1c84-4ae2-8d2d-b903aee15ac7" authors = ["nhz2 "] -version = "0.8.2" +version = "0.8.3" [deps] AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" diff --git a/src/loading.jl b/src/loading.jl index d1cec30..1417d4f 100644 --- a/src/loading.jl +++ b/src/loading.jl @@ -1,6 +1,6 @@ # loading a storage tree from a directory or zip file. -function load_dir(dirpath::AbstractString)::ZGroup +function load_dir(dirpath::AbstractString; predicate=Returns(true))::ZGroup reader = if isdir(dirpath) DirectoryReader(dirpath) elseif isfile(dirpath) @@ -8,7 +8,7 @@ function load_dir(dirpath::AbstractString)::ZGroup else throw(ArgumentError("loading directory $(repr(dirpath)): No such file or directory")) end - load_dir(reader) + load_dir(reader; predicate) end """ @@ -18,13 +18,13 @@ end Load data in a file `filename` or a `data` vector in ZipStore format. """ -function load_zip(filename::AbstractString)::ZGroup +function load_zip(filename::AbstractString; predicate=Returns(true))::ZGroup reader = ZarrZipReader(read(filename)) - load_dir(reader) + load_dir(reader; predicate) end -function load_zip(data::Vector{UInt8})::ZGroup +function load_zip(data::Vector{UInt8}; predicate=Returns(true))::ZGroup reader = ZarrZipReader(data) - load_dir(reader) + load_dir(reader; predicate) end @@ -38,11 +38,17 @@ function try_add_attrs!(zthing::Union{ZGroup, ZArray}, reader::AbstractReader, k end end -function load_dir(reader::AbstractReader)::ZGroup +function load_dir(reader::AbstractReader; predicate=Returns(true))::ZGroup output = ZGroup() keynames = key_names(reader) - splitkeys = map(x->split(x,'/';keepempty=false), keynames) - keyname_dict::Dict{String, Int} = Dict{String, Int}(zip(keynames,eachindex(keynames))) + splitkeys = Vector{SubString{String}}[] + keyname_dict = Dict{String, Int}() + for (key_idx, keyname) in enumerate(keynames) + if predicate(keyname) + push!(splitkeys, split(keyname,'/';keepempty=false)) + keyname_dict[keyname] = key_idx + end + end try_add_attrs!(output, reader, keyname_dict, "") for splitkey in sort(splitkeys) if length(splitkey) < 2 diff --git a/test/test_simple-usage.jl b/test/test_simple-usage.jl index 0bc96f7..2ea673f 100644 --- a/test/test_simple-usage.jl +++ b/test/test_simple-usage.jl @@ -185,8 +185,7 @@ end """ end - -@testset "saving and loading a directory" begin +function example_group()::ZGroup g = ZGroup() data1 = rand(10,20) g["testarray1"] = data1 @@ -198,21 +197,26 @@ end g["testgroup1"] = ZGroup() g["testgroup1"]["testarray1"] = data3 attrs(g["testgroup1/testarray1"])["foo"] = "bar3" + g +end + +@testset "saving and loading a directory" begin + g = example_group() mktempdir() do path # Note this will delete pre existing data at dirpath # if path ends in ".zip" the data will be saved in a zip file instead. SmallZarrGroups.save_dir(path,g) gload = SmallZarrGroups.load_dir(path) - @test gload["testarray1"] == data1 + @test gload["testarray1"] == g["testarray1"] @test attrs(gload["testarray1"]) == OrderedDict([ "foo" => "bar1", ]) - @test gload["testarray2"] == data2 + @test gload["testarray2"] == g["testarray2"] @test attrs(gload["testarray2"]) == OrderedDict([]) @test attrs(gload) == OrderedDict([ "qaz" => "baz", ]) - @test gload["testgroup1/testarray1"] == data3 + @test gload["testgroup1/testarray1"] == g["testgroup1/testarray1"] @test attrs(gload["testgroup1/testarray1"]) == OrderedDict([ "foo" => "bar3", ]) @@ -227,16 +231,7 @@ end end @testset "saving and loading a zip file" begin - g = ZGroup() - data1 = rand(10,20) - g["testarray1"] = data1 - attrs(g["testarray1"])["foo"] = "bar1" - data2 = rand(Int,20) - g["testarray2"] = data2 - data3 = rand(UInt8,20) - g["testgroup1"] = ZGroup() - g["testgroup1"]["testarray1"] = data3 - attrs(g["testgroup1/testarray1"])["foo"] = "bar3" + g = example_group() mktempdir() do path # Note this will delete pre existing data at "path/test.zip". # This zip file can be read by zarr-python. @@ -251,17 +246,40 @@ end # `7z a -tzip archive.zarr.zip archive.zarr/.` # " gload = SmallZarrGroups.load_zip(joinpath(path,"test.zip")) - @test collect(gload["testarray1"]) == data1 + @test gload["testarray1"] == g["testarray1"] @test attrs(gload["testarray1"]) == OrderedDict([ "foo" => "bar1", ]) - @test gload["testarray2"] == data2 + @test gload["testarray2"] == g["testarray2"] @test attrs(gload["testarray2"]) == OrderedDict([]) + @test attrs(gload) == OrderedDict([ + "qaz" => "baz", + ]) + @test gload["testgroup1/testarray1"] == g["testgroup1/testarray1"] + @test attrs(gload["testgroup1/testarray1"]) == OrderedDict([ + "foo" => "bar3", + ]) + end +end + +@testset "loading with predicate" begin + g = example_group() + mktempdir() do path + SmallZarrGroups.save_zip(joinpath(path,"test.zip"), g) + # A predicate function can be used to load a specific array or group + # The predicate function filters keys in the underlying store. + gload = SmallZarrGroups.load_zip( + joinpath(path,"test.zip"); + predicate = startswith("testgroup1/testarray1/"), + ) + @test collect(keys(gload)) == ["testgroup1"] + # Higher level groups may have their attributes ignored. @test attrs(gload) == OrderedDict([]) - @test gload["testgroup1/testarray1"] == data3 + @test collect(keys(gload["testgroup1"])) == ["testarray1"] @test attrs(gload["testgroup1/testarray1"]) == OrderedDict([ "foo" => "bar3", ]) + @test gload["testgroup1/testarray1"] == collect(g["testgroup1/testarray1"]) end end