Skip to content

Commit

Permalink
Merge branch 'JuliaDynamics:main' into Issue-#399
Browse files Browse the repository at this point in the history
  • Loading branch information
franzi448 authored Aug 27, 2024
2 parents 4e2659e + ac0fe7f commit 187df81
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 9 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# 2.16.0

- Add `load_function` keyword argument to `collect_results` to customize how data is loaded from file before being converted to a dataframe by `collect_results`

# 2.15.0

- Add `wload_kwargs` to `produce_or_load` to allow passing kwargs to `wload`
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "DrWatson"
uuid = "634d3b9d-ee7a-5ddf-bec9-22491ea816e1"
repo = "https://github.com/JuliaDynamics/DrWatson.jl.git"
version = "2.15.0"
version = "2.16.0"

[deps]
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
Expand Down
17 changes: 9 additions & 8 deletions src/result_collection.jl
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ See also [`collect_results`](@ref).
* `black_list = [:gitcommit, :gitpatch, :script]`: List of keys not to include from result-file.
* `special_list = []`: List of additional (derived) key-value pairs
to put in `df` as explained below.
* `load_function = wload`: Load function. Defaults to `wload`. You may want to specify a custom load function for example if you store results as a struct and you want the fields of the struct to form the columns of the dataframe. The struct is saved to file as a one-element dictionary so the dataframe will only have a single column. To work around this you could convert it to a dictionary by specifying `load_function = (filename) -> struct2dict(wload(filename)["mykey"])`. This way `collect_results` will receive a `Dict` whose keys are the fields of the struct.
`special_list` is a `Vector` where each entry
is a derived quantity to be included in `df`. There are two types of entries.
Expand Down Expand Up @@ -92,6 +93,7 @@ function collect_results!(filename, folder;
newfile = false, # keyword only for defining collect_results without !
rinclude = [r""],
rexclude = [r"^\b$"],
load_function = wload,
kwargs...)

@assert all(eltype(r) <: Regex for r in (rinclude, rexclude)) "Elements of `rinclude` and `rexclude` must be Regex expressions."
Expand All @@ -102,7 +104,7 @@ function collect_results!(filename, folder;
mtimes = Dict{String,Float64}()
else
verbose && @info "Loading existing result collection..."
data = wload(filename)
data = load_function(filename)
df = data["df"]
# Check if we have pre-recorded mtimes (if not this could be because of an old results database).
if "mtime" keys(data)
Expand Down Expand Up @@ -172,7 +174,7 @@ function collect_results!(filename, folder;
mtimes[file] = mtime_file

fpath = rpath === nothing ? file : joinpath(rpath, file)
df_new = to_data_row(FileIO.query(fpath); kwargs...)
df_new = to_data_row(FileIO.query(fpath); load_function=load_function, kwargs...)
#add filename
df_new[!, :path] .= file
if replace_entry
Expand Down Expand Up @@ -233,18 +235,17 @@ is_valid_file(file, valid_filetypes) =
any(endswith(file, v) for v in valid_filetypes)

# Use wload per default when nothing else is available
function to_data_row(file::File; kwargs...)
function to_data_row(file::File; load_function=wload, kwargs...)
fpath = filename(file)
@debug "Opening $(filename(file)) with fallback wload."
return to_data_row(wload(fpath), fpath; kwargs...)
return to_data_row(load_function(fpath), fpath; kwargs...)
end
# Specialize for JLD2 files, can do much faster mmapped access
function to_data_row(file::File{format"JLD2"}; kwargs...)
function to_data_row(file::File{format"JLD2"}; load_function=(filename) -> JLD2.jldopen(filename, "r"), kwargs...)
fpath = filename(file)
@debug "Opening $(filename(file)) with jldopen."
JLD2.jldopen(filename(file), "r") do data
return to_data_row(data, fpath; kwargs...)
end
data = load_function(fpath)
return to_data_row(data, fpath; kwargs...)
end
function to_data_row(data, file;
white_list = collect(keys(data)),
Expand Down
16 changes: 16 additions & 0 deletions test/update_results_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,22 @@ cres_relpath = collect_results!(relpathname, folder;
rpath = projectdir())
@info all(startswith.(cres[!,"path"], "data"))

struct dummy
a::Float64
b::Int64
c::Matrix{Float64}
end
_dummy_matrix = rand(3,3)
_dummy = dummy(1.0, 1, _dummy_matrix)
wsave(datadir("dummy.jld2"), "dummy", _dummy)

actual_dataframe = collect_results(datadir(), rinclude=[r"dummy.jld2"], load_function=(filename) -> struct2dict(wload(filename)["dummy"]))
_dataframe_vector = Vector{Union{Missing, Matrix{Float64}}}(undef, 1)
_dataframe_vector[1] = _dummy_matrix
expected_dataframe = DataFrame(a = 1.0, b = 1, c = _dataframe_vector, path = datadir("dummy.jld2"))

@test actual_dataframe == expected_dataframe

###############################################################################
# Trailing slash in foldername #
###############################################################################
Expand Down

0 comments on commit 187df81

Please sign in to comment.