Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

implement load_function kwarg for collect_results! #424

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# 2.16.0

- Add `load_function` keyword argument to `collect_results` to customize how data is loaded from file before being converted to a dataframe by `collect_results`

# 2.15.0

- Add `wload_kwargs` to `produce_or_load` to allow passing kwargs to `wload`
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "DrWatson"
uuid = "634d3b9d-ee7a-5ddf-bec9-22491ea816e1"
repo = "https://github.com/JuliaDynamics/DrWatson.jl.git"
version = "2.15.0"
version = "2.16.0"

[deps]
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
Expand Down
17 changes: 9 additions & 8 deletions src/result_collection.jl
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ See also [`collect_results`](@ref).
* `black_list = [:gitcommit, :gitpatch, :script]`: List of keys not to include from result-file.
* `special_list = []`: List of additional (derived) key-value pairs
to put in `df` as explained below.
* `load_function = wload`: Load function. Defaults to `wload`. You may want to specify a custom load function for example if you store results as a struct and you want the fields of the struct to form the columns of the dataframe. The struct is saved to file as a one-element dictionary so the dataframe will only have a single column. To work around this you could convert it to a dictionary by specifying `load_function = (filename) -> struct2dict(wload(filename)["mykey"])`. This way `collect_results` will receive a `Dict` whose keys are the fields of the struct.

`special_list` is a `Vector` where each entry
is a derived quantity to be included in `df`. There are two types of entries.
Expand Down Expand Up @@ -90,6 +91,7 @@ function collect_results!(filename, folder;
newfile = false, # keyword only for defining collect_results without !
rinclude = [r""],
rexclude = [r"^\b$"],
load_function = wload,
kwargs...)

@assert all(eltype(r) <: Regex for r in (rinclude, rexclude)) "Elements of `rinclude` and `rexclude` must be Regex expressions."
Expand All @@ -100,7 +102,7 @@ function collect_results!(filename, folder;
mtimes = Dict{String,Float64}()
else
verbose && @info "Loading existing result collection..."
data = wload(filename)
data = load_function(filename)
df = data["df"]
# Check if we have pre-recorded mtimes (if not this could be because of an old results database).
if "mtime" ∈ keys(data)
Expand Down Expand Up @@ -170,7 +172,7 @@ function collect_results!(filename, folder;
mtimes[file] = mtime_file

fpath = rpath === nothing ? file : joinpath(rpath, file)
df_new = to_data_row(FileIO.query(fpath); kwargs...)
df_new = to_data_row(FileIO.query(fpath); load_function=load_function, kwargs...)
#add filename
df_new[!, :path] .= file
if replace_entry
Expand Down Expand Up @@ -231,18 +233,17 @@ is_valid_file(file, valid_filetypes) =
any(endswith(file, v) for v in valid_filetypes)

# Use wload per default when nothing else is available
function to_data_row(file::File; kwargs...)
function to_data_row(file::File; load_function=wload, kwargs...)
fpath = filename(file)
@debug "Opening $(filename(file)) with fallback wload."
return to_data_row(wload(fpath), fpath; kwargs...)
return to_data_row(load_function(fpath), fpath; kwargs...)
end
# Specialize for JLD2 files, can do much faster mmapped access
function to_data_row(file::File{format"JLD2"}; kwargs...)
function to_data_row(file::File{format"JLD2"}; load_function=(filename) -> JLD2.jldopen(filename, "r"), kwargs...)
fpath = filename(file)
@debug "Opening $(filename(file)) with jldopen."
JLD2.jldopen(filename(file), "r") do data
return to_data_row(data, fpath; kwargs...)
end
data = load_function(fpath)
return to_data_row(data, fpath; kwargs...)
end
function to_data_row(data, file;
white_list = collect(keys(data)),
Expand Down
16 changes: 16 additions & 0 deletions test/update_results_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,22 @@ cres_relpath = collect_results!(relpathname, folder;
rpath = projectdir())
@info all(startswith.(cres[!,"path"], "data"))

struct dummy
a::Float64
b::Int64
c::Matrix{Float64}
end
_dummy_matrix = rand(3,3)
_dummy = dummy(1.0, 1, _dummy_matrix)
wsave(datadir("dummy.jld2"), "dummy", _dummy)

actual_dataframe = collect_results(datadir(), rinclude=[r"dummy.jld2"], load_function=(filename) -> struct2dict(wload(filename)["dummy"]))
_dataframe_vector = Vector{Union{Missing, Matrix{Float64}}}(undef, 1)
_dataframe_vector[1] = _dummy_matrix
expected_dataframe = DataFrame(a = 1.0, b = 1, c = _dataframe_vector, path = datadir("dummy.jld2"))

@test actual_dataframe == expected_dataframe

###############################################################################
# Trailing slash in foldername #
###############################################################################
Expand Down
Loading