diff --git a/CHANGELOG.md b/CHANGELOG.md index 065c8dab..5cfbf21c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +# 2.16.0 + + - Add `load_function` keyword argument to `collect_results` to customize how data is loaded from file before being converted to a dataframe by `collect_results` + # 2.15.0 - Add `wload_kwargs` to `produce_or_load` to allow passing kwargs to `wload` diff --git a/Project.toml b/Project.toml index d806621b..c50f70f5 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "DrWatson" uuid = "634d3b9d-ee7a-5ddf-bec9-22491ea816e1" repo = "https://github.com/JuliaDynamics/DrWatson.jl.git" -version = "2.15.0" +version = "2.16.0" [deps] Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" diff --git a/src/result_collection.jl b/src/result_collection.jl index 42ce4791..c75fcf62 100644 --- a/src/result_collection.jl +++ b/src/result_collection.jl @@ -50,6 +50,7 @@ See also [`collect_results`](@ref). * `black_list = [:gitcommit, :gitpatch, :script]`: List of keys not to include from result-file. * `special_list = []`: List of additional (derived) key-value pairs to put in `df` as explained below. +* `load_function = wload`: Load function. Defaults to `wload`. You may want to specify a custom load function for example if you store results as a struct and you want the fields of the struct to form the columns of the dataframe. The struct is saved to file as a one-element dictionary so the dataframe will only have a single column. To work around this you could convert it to a dictionary by specifying `load_function = (filename) -> struct2dict(wload(filename)["mykey"])`. This way `collect_results` will receive a `Dict` whose keys are the fields of the struct. `special_list` is a `Vector` where each entry is a derived quantity to be included in `df`. There are two types of entries. @@ -90,6 +91,7 @@ function collect_results!(filename, folder; newfile = false, # keyword only for defining collect_results without ! rinclude = [r""], rexclude = [r"^\b$"], + load_function = wload, kwargs...) @assert all(eltype(r) <: Regex for r in (rinclude, rexclude)) "Elements of `rinclude` and `rexclude` must be Regex expressions." @@ -100,7 +102,7 @@ function collect_results!(filename, folder; mtimes = Dict{String,Float64}() else verbose && @info "Loading existing result collection..." - data = wload(filename) + data = load_function(filename) df = data["df"] # Check if we have pre-recorded mtimes (if not this could be because of an old results database). if "mtime" ∈ keys(data) @@ -170,7 +172,7 @@ function collect_results!(filename, folder; mtimes[file] = mtime_file fpath = rpath === nothing ? file : joinpath(rpath, file) - df_new = to_data_row(FileIO.query(fpath); kwargs...) + df_new = to_data_row(FileIO.query(fpath); load_function=load_function, kwargs...) #add filename df_new[!, :path] .= file if replace_entry @@ -231,18 +233,17 @@ is_valid_file(file, valid_filetypes) = any(endswith(file, v) for v in valid_filetypes) # Use wload per default when nothing else is available -function to_data_row(file::File; kwargs...) +function to_data_row(file::File; load_function=wload, kwargs...) fpath = filename(file) @debug "Opening $(filename(file)) with fallback wload." - return to_data_row(wload(fpath), fpath; kwargs...) + return to_data_row(load_function(fpath), fpath; kwargs...) end # Specialize for JLD2 files, can do much faster mmapped access -function to_data_row(file::File{format"JLD2"}; kwargs...) +function to_data_row(file::File{format"JLD2"}; load_function=(filename) -> JLD2.jldopen(filename, "r"), kwargs...) fpath = filename(file) @debug "Opening $(filename(file)) with jldopen." - JLD2.jldopen(filename(file), "r") do data - return to_data_row(data, fpath; kwargs...) - end + data = load_function(fpath) + return to_data_row(data, fpath; kwargs...) end function to_data_row(data, file; white_list = collect(keys(data)), diff --git a/test/update_results_tests.jl b/test/update_results_tests.jl index e7849908..aad2bd26 100644 --- a/test/update_results_tests.jl +++ b/test/update_results_tests.jl @@ -64,6 +64,22 @@ cres_relpath = collect_results!(relpathname, folder; rpath = projectdir()) @info all(startswith.(cres[!,"path"], "data")) +struct dummy + a::Float64 + b::Int64 + c::Matrix{Float64} +end +_dummy_matrix = rand(3,3) +_dummy = dummy(1.0, 1, _dummy_matrix) +wsave(datadir("dummy.jld2"), "dummy", _dummy) + +actual_dataframe = collect_results(datadir(), rinclude=[r"dummy.jld2"], load_function=(filename) -> struct2dict(wload(filename)["dummy"])) +_dataframe_vector = Vector{Union{Missing, Matrix{Float64}}}(undef, 1) +_dataframe_vector[1] = _dummy_matrix +expected_dataframe = DataFrame(a = 1.0, b = 1, c = _dataframe_vector, path = datadir("dummy.jld2")) + +@test actual_dataframe == expected_dataframe + ############################################################################### # Trailing slash in foldername # ###############################################################################