Skip to content

Commit

Permalink
Speed up loading Results (multiprocessing + ignore summary files)
Browse files Browse the repository at this point in the history
  • Loading branch information
burggraaff committed Mar 19, 2024
1 parent f1e497d commit 5c31ff0
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 8 deletions.
18 changes: 11 additions & 7 deletions fpcup/io.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Functions for file input and output.
"""
from functools import cache
from functools import cache, partial
from multiprocessing import Pool
from os import makedirs
from pathlib import Path
Expand All @@ -19,7 +19,9 @@
from .constants import CRS_AMERSFOORT
from .model import Result, Summary

# Constants
_SAMPLE_LENGTH = 10
_THRESHOLD_PARALLEL_LOADING = 1000

def save_ensemble_results(results: Iterable[Result], savefolder: PathOrStr, *,
progressbar=True, leave_progressbar=True) -> None:
Expand Down Expand Up @@ -95,8 +97,10 @@ def load_ensemble_summary_from_folder(folder: PathOrStr, *,
return summary


_load_ensemble_result_simple = partial(Result.from_file, run_id=None, include_summary=False)
def load_ensemble_results_from_folder(folder: PathOrStr, run_ids: Optional[Iterable[PathOrStr]]=None, *,
extension=".wout", sample=False, progressbar=True, leave_progressbar=True) -> list[Result]:
extension=".wout", sample=False,
progressbar=True, leave_progressbar=True) -> list[Result]:
"""
Load the result files in a given folder.
By default, load all files in the folder. If `run_ids` is specified, load only those files.
Expand Down Expand Up @@ -125,10 +129,10 @@ def load_ensemble_results_from_folder(folder: PathOrStr, run_ids: Optional[Itera

# Load the files with an optional progressbar
filenames = tqdm(filenames, total=n_results, desc="Loading outputs", unit="files", disable=not progressbar, leave=leave_progressbar)
# if n_results < 1000:
results = [Result.from_file(filename) for filename in filenames]
# else:
# with Pool() as p:
# results = list(p.imap_unordered(Result.from_file, filenames, chunksize=100))
if n_results < _THRESHOLD_PARALLEL_LOADING:
results = list(map(_load_ensemble_result_simple, filenames))
else:
with Pool() as p:
results = list(p.imap_unordered(_load_ensemble_result_simple, filenames, chunksize=25))

return results
2 changes: 1 addition & 1 deletion fpcup/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ def from_model(cls, model: Engine, run_data: RunData, **kwargs):

@classmethod
def from_file(cls, filename: PathOrStr, *,
run_id: Optional[str]=None, include_summary=True, **kwargs):
run_id: Optional[str]=None, include_summary=False, **kwargs):
"""
Load an output file.
If a run_id is not provided, use the filename stem.
Expand Down

0 comments on commit 5c31ff0

Please sign in to comment.