From 304f953ff149969e1356519b2de010677b50febc Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Fri, 28 Jul 2023 09:31:34 +0200 Subject: [PATCH 1/6] Customizable figure size (#222) Currently, there is no easy way to change figure sizes when using high-level visualization functions such as `plot_problem`. Generally, I'd prefer controlling plotting through matplotlib's `rcParams`, but this change provides at least some backwards-compatible means of changing figure sizes at a higher level by, e.g. `petab.visualize.plotting.DEFAULT_FIGSIZE = (8, 4)`. --- petab/visualize/plotting.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/petab/visualize/plotting.py b/petab/visualize/plotting.py index 51149272..ab40c92e 100644 --- a/petab/visualize/plotting.py +++ b/petab/visualize/plotting.py @@ -20,6 +20,8 @@ IdsList = List[str] NumList = List[int] +# The default figure size +DEFAULT_FIGSIZE = (20, 15) # also for type hints # TODO: split into dataplot and subplot level dicts? @@ -262,7 +264,7 @@ class Figure: Contains information regarding how data should be visualized. """ def __init__(self, subplots: Optional[List[Subplot]] = None, - size: Tuple = (20, 15), + size: Tuple = DEFAULT_FIGSIZE, title: Optional[Tuple] = None): """ Constructor. From 49c672a576958568d38a91ffb2b8ae849420602e Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Fri, 28 Jul 2023 18:35:06 +0200 Subject: [PATCH 2/6] Handle missing nominalValue in Problem.get_x_nominal (#223) It doesn't seem to be quite clear if the `nominalValue` column needs to be present if all parameters are estimated. However, since it's allowed to leave `nominalValue` empty, which is treated as NaN, it seems to make sense to treat a missing `nominalValue` column as all-NaN. See also https://github.com/ICB-DCM/pyPESTO/issues/1104 --- petab/problem.py | 7 ++++++- tests/test_petab.py | 5 +++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/petab/problem.py b/petab/problem.py index ae877422..7897f9ae 100644 --- a/petab/problem.py +++ b/petab/problem.py @@ -3,6 +3,7 @@ import os import tempfile +from math import nan from pathlib import Path, PurePosixPath from typing import Dict, Iterable, List, Optional, Union, TYPE_CHECKING from urllib.parse import unquote, urlparse, urlunparse @@ -660,7 +661,11 @@ def get_x_nominal(self, free: bool = True, fixed: bool = True, ------- The parameter nominal values. """ - v = list(self.parameter_df[NOMINAL_VALUE]) + if NOMINAL_VALUE in self.parameter_df: + v = list(self.parameter_df[NOMINAL_VALUE]) + else: + v = [nan] * len(self.parameter_df) + if scaled: v = list(parameters.map_scale( v, self.parameter_df[PARAMETER_SCALE])) diff --git a/tests/test_petab.py b/tests/test_petab.py index 1b774aea..9d692188 100644 --- a/tests/test_petab.py +++ b/tests/test_petab.py @@ -557,6 +557,11 @@ def test_parameter_properties(petab_problem): # pylint: disable=W0621 assert petab_problem.x_nominal_free_scaled == [7, np.log(8)] assert petab_problem.x_nominal_fixed_scaled == [np.log10(9)] + # Check that a missing nominalValues column is handled correctly + del petab_problem.parameter_df[NOMINAL_VALUE] + assert len(petab_problem.x_nominal) == 3 + assert np.isnan(petab_problem.x_nominal).all() + def test_to_float_if_float(): to_float_if_float = petab.core.to_float_if_float From 488e82981c8e2572c0fcf3b6870a438be442f7f5 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Thu, 31 Aug 2023 15:18:03 +0200 Subject: [PATCH 3/6] Fix pandas 2.1.0 FutureWarnings (#226) Fixes #224 and a bunch of other pandas 2.1.0 FutureWarnings. --- .flake8 | 4 ++++ petab/calculate.py | 2 +- petab/simplify.py | 4 ++-- petab/visualize/data_overview.py | 2 ++ petab/visualize/lint.py | 2 ++ tests/test_conditions.py | 1 + tests/test_lint.py | 2 +- tests/test_petab.py | 1 + 8 files changed, 14 insertions(+), 4 deletions(-) diff --git a/.flake8 b/.flake8 index fd7cda9d..50513b8a 100644 --- a/.flake8 +++ b/.flake8 @@ -1,8 +1,12 @@ [flake8] extend-ignore = + # whitespace before ":", conflicts with black + E203 F403 F405 + # line too long, conflicts with black in rare cases + E501 exclude = build, dist, diff --git a/petab/calculate.py b/petab/calculate.py index abfeb40d..adf3d2ba 100644 --- a/petab/calculate.py +++ b/petab/calculate.py @@ -83,7 +83,7 @@ def calculate_residuals_for_table( # create residual df as copy of measurement df, change column residual_df = measurement_df.copy(deep=True).rename( columns={MEASUREMENT: RESIDUAL}) - + residual_df[RESIDUAL] = residual_df[RESIDUAL].astype("float64") # matching columns compared_cols = set(MEASUREMENT_DF_COLS) compared_cols -= {MEASUREMENT} diff --git a/petab/simplify.py b/petab/simplify.py index 6ad4af1d..37e89879 100644 --- a/petab/simplify.py +++ b/petab/simplify.py @@ -73,11 +73,11 @@ def condition_parameters_to_parameter_table(problem: Problem): continue series = problem.condition_df[parameter_id] - value = petab.to_float_if_float(series[0]) + value = petab.to_float_if_float(series.iloc[0]) # same value for all conditions and no parametric overrides (str)? if isinstance(value, float) and len(series.unique()) == 1: - replacements[parameter_id] = series[0] + replacements[parameter_id] = series.iloc[0] if not replacements: return diff --git a/petab/visualize/data_overview.py b/petab/visualize/data_overview.py index 44e7c8ec..ec0ee9ea 100644 --- a/petab/visualize/data_overview.py +++ b/petab/visualize/data_overview.py @@ -58,6 +58,8 @@ def get_data_per_observable(measurement_df: pd.DataFrame) -> pd.DataFrame: """ my_measurements = measurement_df.copy() + my_measurements[PREEQUILIBRATION_CONDITION_ID] = my_measurements[PREEQUILIBRATION_CONDITION_ID].astype("object") + index = [SIMULATION_CONDITION_ID] if PREEQUILIBRATION_CONDITION_ID in my_measurements: my_measurements[PREEQUILIBRATION_CONDITION_ID].fillna('', inplace=True) diff --git a/petab/visualize/lint.py b/petab/visualize/lint.py index f9f78c9a..413d0c89 100644 --- a/petab/visualize/lint.py +++ b/petab/visualize/lint.py @@ -124,6 +124,8 @@ def set_default(column: str, value): if column not in vis_df: vis_df[column] = value elif value is not None: + if isinstance(value, str): + vis_df[column] = vis_df[column].astype('object') vis_df[column].fillna(value, inplace=True) set_default(C.PLOT_NAME, "") diff --git a/tests/test_conditions.py b/tests/test_conditions.py index b507abf6..0e252319 100644 --- a/tests/test_conditions.py +++ b/tests/test_conditions.py @@ -26,6 +26,7 @@ def test_get_parametric_overrides(): assert conditions.get_parametric_overrides(condition_df) == [] + condition_df['fixedParameter1'] = condition_df['fixedParameter1'].astype("object") condition_df.loc[0, 'fixedParameter1'] = 'parameterId' assert conditions.get_parametric_overrides(condition_df) == ['parameterId'] diff --git a/tests/test_lint.py b/tests/test_lint.py index 0d738813..7bbf4c83 100644 --- a/tests/test_lint.py +++ b/tests/test_lint.py @@ -469,7 +469,7 @@ def test_check_parameter_df(): LOWER_BOUND: [1e-5, 1e-6, 1e-7], UPPER_BOUND: [1e5, 1e6, 1e7] }).set_index(PARAMETER_ID) - + parameter_df[NOMINAL_VALUE] = parameter_df[NOMINAL_VALUE].astype("object") lint.check_parameter_df(df=parameter_df) # NOMINAL_VALUE empty, for non-estimated parameter diff --git a/tests/test_petab.py b/tests/test_petab.py index 9d692188..8c4913ab 100644 --- a/tests/test_petab.py +++ b/tests/test_petab.py @@ -24,6 +24,7 @@ def condition_df_2_conditions(): 'fixedParameter1': [1.0, 2.0] }) condition_df.set_index('conditionId', inplace=True) + condition_df.fixedParameter1 = condition_df.fixedParameter1.astype("object") return condition_df From 95aead8bb4ef48049595c91656666560e84a3ab1 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Thu, 31 Aug 2023 16:15:45 +0200 Subject: [PATCH 4/6] Add pre-commit-config, run black, isort, ... (#225) Relevant changes are all in .pre-commit-config.yaml, the rest is auto-formatting by isort/black --- .coveragerc | 2 +- .pre-commit-config.yaml | 28 + README.md | 4 +- doc/conf.py | 64 +- .../example_Fujita/Fujita_measurementData.tsv | 2 +- .../Fujita_parameters_scaling.tsv | 22 +- .../visuSpecs/Fujita_visuSpec_empty.tsv | 1 - .../Isensee_experimentalCondition.tsv | 2 +- .../Isensee_measurementData.tsv | 2 +- doc/example/example_visualization.ipynb | 128 ++- .../example_visualization_with_visspec.ipynb | 66 +- ...xample_visualization_without_visspec.ipynb | 79 +- doc/md2rst.py | 13 +- petab/C.py | 268 +++--- petab/__init__.py | 6 +- petab/calculate.py | 182 ++-- petab/composite_problem.py | 38 +- petab/conditions.py | 35 +- petab/core.py | 237 ++++-- petab/lint.py | 535 +++++++----- petab/mapping.py | 36 +- petab/measurements.py | 182 ++-- petab/models/__init__.py | 4 +- petab/models/model.py | 13 +- petab/models/pysb_model.py | 68 +- petab/models/sbml_model.py | 66 +- petab/observables.py | 100 ++- petab/parameter_mapping.py | 432 ++++++---- petab/parameters.py | 272 +++--- petab/petablint.py | 123 ++- petab/problem.py | 502 ++++++----- petab/sampling.py | 77 +- petab/sbml.py | 164 ++-- petab/simplify.py | 31 +- petab/simulate.py | 83 +- petab/version.py | 2 +- petab/visualize/__init__.py | 33 +- petab/visualize/cli.py | 59 +- petab/visualize/data_overview.py | 41 +- petab/visualize/helper_functions.py | 25 +- petab/visualize/lint.py | 96 ++- petab/visualize/plot_data_and_simulation.py | 129 +-- petab/visualize/plot_residuals.py | 148 ++-- petab/visualize/plotter.py | 489 ++++++----- petab/visualize/plotting.py | 574 +++++++------ petab/yaml.py | 118 +-- pyproject.toml | 3 + setup.py | 17 +- tests/test_calculate.py | 532 ++++++++---- tests/test_combine.py | 90 +- tests/test_conditions.py | 95 ++- tests/test_deprecated.py | 32 +- tests/test_lint.py | 558 +++++++----- tests/test_mapping.py | 30 +- tests/test_measurements.py | 142 ++-- tests/test_model_pysb.py | 50 +- tests/test_observables.py | 153 ++-- tests/test_parameter_mapping.py | 791 +++++++++++------- tests/test_parameters.py | 219 +++-- tests/test_petab.py | 719 +++++++++------- tests/test_sbml.py | 110 ++- tests/test_simplify.py | 102 +-- tests/test_simulate.py | 114 +-- tests/test_visualization.py | 539 +++++++----- tests/test_visualization_data_overview.py | 12 +- tests/test_yaml.py | 52 +- 66 files changed, 6006 insertions(+), 3935 deletions(-) create mode 100644 .pre-commit-config.yaml diff --git a/.coveragerc b/.coveragerc index c48222a0..94e3328d 100644 --- a/.coveragerc +++ b/.coveragerc @@ -9,5 +9,5 @@ exclude_lines = raise NotImplementedError if __name__ == .__main__.: ignore_errors = True -omit = +omit = tests/* diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..89cf753b --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,28 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +repos: +- repo: https://github.com/pycqa/isort + rev: 5.12.0 + hooks: + - id: isort + name: isort (python) + args: ["--profile", "black", "--filter-files", "--line-length", "79"] +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: check-added-large-files + - id: check-merge-conflict + - id: check-yaml + args: [--allow-multiple-documents] + - id: end-of-file-fixer + - id: trailing-whitespace +- repo: https://github.com/psf/black + rev: 23.7.0 + hooks: + - id: black-jupyter + # It is recommended to specify the latest version of Python + # supported by your project here, or alternatively use + # pre-commit's default_language_version, see + # https://pre-commit.com/#top_level-default_language_version + language_version: python3.11 + args: ["--line-length", "79"] diff --git a/README.md b/README.md index 17591810..043f21ad 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ The PEtab library is available on [pypi](https://pypi.org/project/petab/) and the easiest way to install it is running pip3 install petab - + It will require Python>=3.9 to run. (We are following the [numpy Python support policy](https://numpy.org/neps/nep-0029-deprecation_policy.html)). @@ -40,7 +40,7 @@ be: `petablint` - [`petab.create_parameter_df`](https://petab.readthedocs.io/projects/libpetab-python/en/latest/build/_autosummary/petab.parameters.html#petab.parameters.create_parameter_df) - to create the parameter table, once you have set up the model, + to create the parameter table, once you have set up the model, condition table, observable table and measurement table - [`petab.create_combine_archive`](https://petab.readthedocs.io/projects/libpetab-python/en/latest/build/_autosummary/petab.core.html#petab.core.create_combine_archive) diff --git a/doc/conf.py b/doc/conf.py index 1d7875b6..7e3932ae 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -15,21 +15,21 @@ # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # -sys.path.insert(0, os.path.abspath('..')) +sys.path.insert(0, os.path.abspath("..")) # -- Project information ----------------------------------------------------- -project = 'libpetab-python' -copyright = '2018, the PEtab developers' -author = 'PEtab developers' +project = "libpetab-python" +copyright = "2018, the PEtab developers" +author = "PEtab developers" # The full version, including alpha/beta/rc tags -release = 'latest' +release = "latest" # -- Custom pre-build -------------------------------------------------------- -subprocess.run(['python', 'md2rst.py']) +subprocess.run(["python", "md2rst.py"]) # -- General configuration --------------------------------------------------- @@ -37,49 +37,49 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.napoleon', - 'sphinx.ext.autodoc', - 'sphinx.ext.autosummary', - 'sphinx.ext.intersphinx', - 'sphinx.ext.viewcode', - 'sphinx_markdown_tables', - 'myst_nb', + "sphinx.ext.napoleon", + "sphinx.ext.autodoc", + "sphinx.ext.autosummary", + "sphinx.ext.intersphinx", + "sphinx.ext.viewcode", + "sphinx_markdown_tables", + "myst_nb", ] intersphinx_mapping = { - 'pandas': ('https://pandas.pydata.org/docs/', None), - 'numpy': ('https://numpy.org/devdocs/', None), - 'sympy': ('https://docs.sympy.org/latest/', None), - 'python': ('https://docs.python.org/3', None), + "pandas": ("https://pandas.pydata.org/docs/", None), + "numpy": ("https://numpy.org/devdocs/", None), + "sympy": ("https://docs.sympy.org/latest/", None), + "python": ("https://docs.python.org/3", None), } # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. exclude_patterns = [ - 'build/doctrees', - 'build/html', - '**.ipynb_checkpoints', - 'logo/LICENSE.md', + "build/doctrees", + "build/html", + "**.ipynb_checkpoints", + "logo/LICENSE.md", ] -master_doc = 'index' +master_doc = "index" autosummary_generate = True autodoc_default_options = { "members": None, - "imported-members": ['petab'], + "imported-members": ["petab"], "inherited-members": None, "private-members": None, "show-inheritance": None, } # For some reason causes sphinx import errors otherwise -autodoc_mock_imports = ['yaml'] +autodoc_mock_imports = ["yaml"] # myst_nb options # https://myst-nb.readthedocs.io/en/latest/configuration.html @@ -87,8 +87,8 @@ source_suffix = { - '.rst': 'restructuredtext', - '.txt': 'restructuredtext', + ".rst": "restructuredtext", + ".txt": "restructuredtext", } # ignore numpy warnings @@ -100,12 +100,12 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'sphinx_rtd_theme' +html_theme = "sphinx_rtd_theme" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] html_context = { "display_github": True, @@ -115,15 +115,15 @@ "conf_py_path": "/doc", } -html_logo = 'logo/PEtab.png' +html_logo = "logo/PEtab.png" def skip_some_objects(app, what, name, obj, skip, options): """Exclude some objects from the documentation""" - if getattr(obj, '__module__', None) == 'collections': + if getattr(obj, "__module__", None) == "collections": return True def setup(app): """Sphinx setup""" - app.connect('autodoc-skip-member', skip_some_objects) + app.connect("autodoc-skip-member", skip_some_objects) diff --git a/doc/example/example_Fujita/Fujita_measurementData.tsv b/doc/example/example_Fujita/Fujita_measurementData.tsv index 50b372f9..b294bcfa 100644 --- a/doc/example/example_Fujita/Fujita_measurementData.tsv +++ b/doc/example/example_Fujita/Fujita_measurementData.tsv @@ -142,4 +142,4 @@ pS6_tot model1_data6 0.07993 300 scaling_pS6_tot 0.01 model1_data6_pS6_tot pS6_tot model1_data6 0.42868 600 scaling_pS6_tot 0.02 model1_data6_pS6_tot pS6_tot model1_data6 0.60508 900 scaling_pS6_tot 0.03 model1_data6_pS6_tot pS6_tot model1_data6 0.62088 1800 scaling_pS6_tot 0.035 model1_data6_pS6_tot -pS6_tot model1_data6 0.32084 3600 scaling_pS6_tot 0.07 model1_data6_pS6_tot \ No newline at end of file +pS6_tot model1_data6 0.32084 3600 scaling_pS6_tot 0.07 model1_data6_pS6_tot diff --git a/doc/example/example_Fujita/Fujita_parameters_scaling.tsv b/doc/example/example_Fujita/Fujita_parameters_scaling.tsv index 2254fbf8..1eb01fd6 100644 --- a/doc/example/example_Fujita/Fujita_parameters_scaling.tsv +++ b/doc/example/example_Fujita/Fujita_parameters_scaling.tsv @@ -7,14 +7,14 @@ reaction_1_k1 reaction_{1,k1} log10 1E-08 100000000 0.003690766129111 1 paramete reaction_1_k2 reaction_{1,k2} log10 1E-08 100000000 0.002301175486005 1 laplace 1000;20 laplace 1000;20 reaction_2_k1 reaction_{2,k1} log10 1E-08 100000000 0.000936500808211 1 logLaplace 2;1 logLaplace 2;1 reaction_2_k2 reaction_{2,k2} log10 1E-08 100000000 60965.2066642586 1 parameterScaleLaplace 2;1 parameterScaleLaplace 2;1 -reaction_3_k1 reaction_{3,k1} log10 1E-08 100000000 0.433225051651771 1 -reaction_4_k1 reaction_{4,k1} log10 1E-08 100000000 0.030155177423024 1 -reaction_5_k1 reaction_{5,k1} log10 1E-08 100000000 3.27310803801897E-06 1 -reaction_5_k2 reaction_{5,k2} log10 1E-08 100000000 0.000398546299782 1 -reaction_6_k1 reaction_{6,k1} log10 1E-08 100000000 5.46319692934546E-06 1 -reaction_7_k1 reaction_{7,k1} log10 1E-08 100000000 0.011803208311735 1 -reaction_8_k1 reaction_{8,k1} log10 1E-08 100000000 0.000944761775113 1 -reaction_9_k1 reaction_{9,k1} log10 1E-08 100000000 0.028510798479438 1 -scaling_pAkt_tot scaling_{pAkt}_tot log10 1E-08 100000000 41.377103160384 1 -scaling_pEGFR_tot scaling_{pEGFR}_tot log10 1E-08 100000000 5.64785460492811E-08 1 -scaling_pS6_tot scaling_{pS6}_tot log10 1E-08 100000000 78521.9513232784 1 +reaction_3_k1 reaction_{3,k1} log10 1E-08 100000000 0.433225051651771 1 +reaction_4_k1 reaction_{4,k1} log10 1E-08 100000000 0.030155177423024 1 +reaction_5_k1 reaction_{5,k1} log10 1E-08 100000000 3.27310803801897E-06 1 +reaction_5_k2 reaction_{5,k2} log10 1E-08 100000000 0.000398546299782 1 +reaction_6_k1 reaction_{6,k1} log10 1E-08 100000000 5.46319692934546E-06 1 +reaction_7_k1 reaction_{7,k1} log10 1E-08 100000000 0.011803208311735 1 +reaction_8_k1 reaction_{8,k1} log10 1E-08 100000000 0.000944761775113 1 +reaction_9_k1 reaction_{9,k1} log10 1E-08 100000000 0.028510798479438 1 +scaling_pAkt_tot scaling_{pAkt}_tot log10 1E-08 100000000 41.377103160384 1 +scaling_pEGFR_tot scaling_{pEGFR}_tot log10 1E-08 100000000 5.64785460492811E-08 1 +scaling_pS6_tot scaling_{pS6}_tot log10 1E-08 100000000 78521.9513232784 1 diff --git a/doc/example/example_Fujita/visuSpecs/Fujita_visuSpec_empty.tsv b/doc/example/example_Fujita/visuSpecs/Fujita_visuSpec_empty.tsv index 8b137891..e69de29b 100644 --- a/doc/example/example_Fujita/visuSpecs/Fujita_visuSpec_empty.tsv +++ b/doc/example/example_Fujita/visuSpecs/Fujita_visuSpec_empty.tsv @@ -1 +0,0 @@ - diff --git a/doc/example/example_Isensee/Isensee_experimentalCondition.tsv b/doc/example/example_Isensee/Isensee_experimentalCondition.tsv index 59ae9d09..eb7b0488 100644 --- a/doc/example/example_Isensee/Isensee_experimentalCondition.tsv +++ b/doc/example/example_Isensee/Isensee_experimentalCondition.tsv @@ -119,4 +119,4 @@ JI09_160126_Drg449_444_CycNuc__Fsk10_and_IBMX100 Fsk(10)/IBMX(100) 0 10 0 100 0 JI09_160126_Drg449_444_CycNuc__Sp8_Br_cAMPS_AM10 Sp8-Br-cAMPS-AM(10) 0 0 0 0 0 0 0 10 0 0 0 0 0 0 0 0 JI09_160201_Drg453-452_CycNuc__ctrl ctrl 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 JI09_160201_Drg453-452_CycNuc__Fsk Fsk(10) 0 10 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -JI09_160201_Drg453-452_CycNuc__Sp8_Br_cAMPS_AM Sp8-Br-cAMPS-AM(10) 0 0 0 0 0 0 0 10 0 0 0 0 0 0 0 0 \ No newline at end of file +JI09_160201_Drg453-452_CycNuc__Sp8_Br_cAMPS_AM Sp8-Br-cAMPS-AM(10) 0 0 0 0 0 0 0 10 0 0 0 0 0 0 0 0 diff --git a/doc/example/example_Isensee/Isensee_measurementData.tsv b/doc/example/example_Isensee/Isensee_measurementData.tsv index 7fb716be..34c906a8 100644 --- a/doc/example/example_Isensee/Isensee_measurementData.tsv +++ b/doc/example/example_Isensee/Isensee_measurementData.tsv @@ -685,4 +685,4 @@ Calpha_Microscopy control JI09_160201_Drg453-452_CycNuc__Sp8_Br_cAMPS_AM 1711.76 Calpha_Microscopy control JI09_160201_Drg453-452_CycNuc__Sp8_Br_cAMPS_AM 1970.37755061332 15 s_Calpha_global;b_Calpha_global;rel_open;xi_rel_open s_Calpha_global;rho_Calpha_Microscopy lin normal JI09_160201_Drg453_452_CycNuc JI09_160201_Drg453-452_CycNuc__Sp8_Br_cAMPS_AM Drg453 Calpha_Microscopy control JI09_160201_Drg453-452_CycNuc__Sp8_Br_cAMPS_AM 1950.86994136962 30 s_Calpha_global;b_Calpha_global;rel_open;xi_rel_open s_Calpha_global;rho_Calpha_Microscopy lin normal JI09_160201_Drg453_452_CycNuc JI09_160201_Drg453-452_CycNuc__Sp8_Br_cAMPS_AM Drg453 Calpha_Microscopy control JI09_160201_Drg453-452_CycNuc__Sp8_Br_cAMPS_AM 2016.8833683752 60 s_Calpha_global;b_Calpha_global;rel_open;xi_rel_open s_Calpha_global;rho_Calpha_Microscopy lin normal JI09_160201_Drg453_452_CycNuc JI09_160201_Drg453-452_CycNuc__Sp8_Br_cAMPS_AM Drg453 -Calpha_Microscopy control JI09_160201_Drg453-452_CycNuc__Sp8_Br_cAMPS_AM 1802.11729015102 120 s_Calpha_global;b_Calpha_global;rel_open;xi_rel_open s_Calpha_global;rho_Calpha_Microscopy lin normal JI09_160201_Drg453_452_CycNuc JI09_160201_Drg453-452_CycNuc__Sp8_Br_cAMPS_AM Drg453 \ No newline at end of file +Calpha_Microscopy control JI09_160201_Drg453-452_CycNuc__Sp8_Br_cAMPS_AM 1802.11729015102 120 s_Calpha_global;b_Calpha_global;rel_open;xi_rel_open s_Calpha_global;rho_Calpha_Microscopy lin normal JI09_160201_Drg453_452_CycNuc JI09_160201_Drg453-452_CycNuc__Sp8_Br_cAMPS_AM Drg453 diff --git a/doc/example/example_visualization.ipynb b/doc/example/example_visualization.ipynb index 7a55bf48..113de75a 100644 --- a/doc/example/example_visualization.ipynb +++ b/doc/example/example_visualization.ipynb @@ -52,8 +52,12 @@ "metadata": {}, "outputs": [], "source": [ - "ax = plot_with_vis_spec(visualization_file_path, condition_file_path,\n", - " data_file_path, simulation_file_path)" + "ax = plot_with_vis_spec(\n", + " visualization_file_path,\n", + " condition_file_path,\n", + " data_file_path,\n", + " simulation_file_path,\n", + ")" ] }, { @@ -69,7 +73,9 @@ "metadata": {}, "outputs": [], "source": [ - "ax_without_sim = plot_with_vis_spec(visualization_file_path, condition_file_path, data_file_path)" + "ax_without_sim = plot_with_vis_spec(\n", + " visualization_file_path, condition_file_path, data_file_path\n", + ")" ] }, { @@ -85,8 +91,11 @@ "metadata": {}, "outputs": [], "source": [ - "ax = plot_with_vis_spec(visualization_file_path, condition_file_path,\n", - " simulations_df = simulation_file_path)" + "ax = plot_with_vis_spec(\n", + " visualization_file_path,\n", + " condition_file_path,\n", + " simulations_df=simulation_file_path,\n", + ")" ] }, { @@ -102,9 +111,15 @@ "metadata": {}, "outputs": [], "source": [ - "visualization_file_scatterplots = folder + \"Isensee_visualizationSpecification_scatterplot.tsv\"\n", - "ax = plot_with_vis_spec(visualization_file_scatterplots, condition_file_path,\n", - " data_file_path, simulation_file_path)" + "visualization_file_scatterplots = (\n", + " folder + \"Isensee_visualizationSpecification_scatterplot.tsv\"\n", + ")\n", + "ax = plot_with_vis_spec(\n", + " visualization_file_scatterplots,\n", + " condition_file_path,\n", + " data_file_path,\n", + " simulation_file_path,\n", + ")" ] }, { @@ -121,13 +136,20 @@ "metadata": {}, "outputs": [], "source": [ - "datasets = [['JI09_150302_Drg345_343_CycNuc__4_ABnOH_and_ctrl',\n", - " 'JI09_150302_Drg345_343_CycNuc__4_ABnOH_and_Fsk'],\n", - " ['JI09_160201_Drg453-452_CycNuc__ctrl',\n", - " 'JI09_160201_Drg453-452_CycNuc__Fsk',\n", - " 'JI09_160201_Drg453-452_CycNuc__Sp8_Br_cAMPS_AM']]\n", - "ax_without_sim = plot_without_vis_spec(condition_file_path, datasets, 'dataset',\n", - " data_file_path)" + "datasets = [\n", + " [\n", + " \"JI09_150302_Drg345_343_CycNuc__4_ABnOH_and_ctrl\",\n", + " \"JI09_150302_Drg345_343_CycNuc__4_ABnOH_and_Fsk\",\n", + " ],\n", + " [\n", + " \"JI09_160201_Drg453-452_CycNuc__ctrl\",\n", + " \"JI09_160201_Drg453-452_CycNuc__Fsk\",\n", + " \"JI09_160201_Drg453-452_CycNuc__Sp8_Br_cAMPS_AM\",\n", + " ],\n", + "]\n", + "ax_without_sim = plot_without_vis_spec(\n", + " condition_file_path, datasets, \"dataset\", data_file_path\n", + ")" ] }, { @@ -153,18 +175,26 @@ "data_file_Fujita = \"example_Fujita/Fujita_measurementData.tsv\"\n", "condition_file_Fujita = \"example_Fujita/Fujita_experimentalCondition.tsv\"\n", "\n", - "# Plot 4 axes objects, plotting \n", + "# Plot 4 axes objects, plotting\n", "# - in the first window all observables of the simulation condition 'model1_data1'\n", "# - in the second window all observables of the simulation conditions 'model1_data2', 'model1_data3'\n", "# - in the third window all observables of the simulation conditions 'model1_data4', 'model1_data5'\n", "# - in the fourth window all observables of the simulation condition 'model1_data6'\n", "\n", - "sim_cond_id_list = [['model1_data1'], ['model1_data2', 'model1_data3'],\n", - " ['model1_data4', 'model1_data5'], ['model1_data6']]\n", + "sim_cond_id_list = [\n", + " [\"model1_data1\"],\n", + " [\"model1_data2\", \"model1_data3\"],\n", + " [\"model1_data4\", \"model1_data5\"],\n", + " [\"model1_data6\"],\n", + "]\n", "\n", - "ax = plot_without_vis_spec(condition_file_Fujita, sim_cond_id_list,\n", - " 'simulation', data_file_Fujita,\n", - " plotted_noise='provided')" + "ax = plot_without_vis_spec(\n", + " condition_file_Fujita,\n", + " sim_cond_id_list,\n", + " \"simulation\",\n", + " data_file_Fujita,\n", + " plotted_noise=\"provided\",\n", + ")" ] }, { @@ -178,12 +208,16 @@ "# - in the second window the observable 'pEGFR_tot' for all simulation conditions\n", "# - in the third window the observable 'pAkt_tot' for all simulation conditions\n", "\n", - "observable_id_list = [['pS6_tot'], ['pEGFR_tot'], ['pAkt_tot']]\n", + "observable_id_list = [[\"pS6_tot\"], [\"pEGFR_tot\"], [\"pAkt_tot\"]]\n", "\n", "\n", - "ax = plot_without_vis_spec(condition_file_Fujita, observable_id_list,\n", - " 'observable', data_file_Fujita,\n", - " plotted_noise='provided')" + "ax = plot_without_vis_spec(\n", + " condition_file_Fujita,\n", + " observable_id_list,\n", + " \"observable\",\n", + " data_file_Fujita,\n", + " plotted_noise=\"provided\",\n", + ")" ] }, { @@ -198,12 +232,16 @@ "# - in the third window the observable 'pAkt_tot' for all simulation conditions\n", "# while using the noise values which are saved in the PEtab files\n", "\n", - "observable_id_list = [['pS6_tot'], ['pEGFR_tot']]\n", + "observable_id_list = [[\"pS6_tot\"], [\"pEGFR_tot\"]]\n", "\n", "\n", - "ax = plot_without_vis_spec(condition_file_Fujita, observable_id_list,\n", - " 'observable', data_file_Fujita,\n", - " plotted_noise='provided')" + "ax = plot_without_vis_spec(\n", + " condition_file_Fujita,\n", + " observable_id_list,\n", + " \"observable\",\n", + " data_file_Fujita,\n", + " plotted_noise=\"provided\",\n", + ")" ] }, { @@ -221,12 +259,20 @@ "source": [ "simu_file_Fujita = \"example_Fujita/Fujita_simulatedData.tsv\"\n", "\n", - "sim_cond_id_list = [['model1_data1'], ['model1_data2', 'model1_data3'],\n", - " ['model1_data4', 'model1_data5'], ['model1_data6']]\n", + "sim_cond_id_list = [\n", + " [\"model1_data1\"],\n", + " [\"model1_data2\", \"model1_data3\"],\n", + " [\"model1_data4\", \"model1_data5\"],\n", + " [\"model1_data6\"],\n", + "]\n", "\n", - "ax = plot_without_vis_spec(condition_file_Fujita, sim_cond_id_list,\n", - " 'simulation', simulations_df=simu_file_Fujita,\n", - " plotted_noise='provided')" + "ax = plot_without_vis_spec(\n", + " condition_file_Fujita,\n", + " sim_cond_id_list,\n", + " \"simulation\",\n", + " simulations_df=simu_file_Fujita,\n", + " plotted_noise=\"provided\",\n", + ")" ] }, { @@ -235,11 +281,15 @@ "metadata": {}, "outputs": [], "source": [ - "observable_id_list = [['pS6_tot'], ['pEGFR_tot'], ['pAkt_tot']]\n", + "observable_id_list = [[\"pS6_tot\"], [\"pEGFR_tot\"], [\"pAkt_tot\"]]\n", "\n", - "ax = plot_without_vis_spec(condition_file_Fujita, observable_id_list,\n", - " 'observable', simulations_df=simu_file_Fujita,\n", - " plotted_noise='provided')" + "ax = plot_without_vis_spec(\n", + " condition_file_Fujita,\n", + " observable_id_list,\n", + " \"observable\",\n", + " simulations_df=simu_file_Fujita,\n", + " plotted_noise=\"provided\",\n", + ")" ] } ], @@ -264,4 +314,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/doc/example/example_visualization_with_visspec.ipynb b/doc/example/example_visualization_with_visspec.ipynb index 0e745fc4..403fca1c 100644 --- a/doc/example/example_visualization_with_visspec.ipynb +++ b/doc/example/example_visualization_with_visspec.ipynb @@ -42,10 +42,12 @@ "source": [ "visualization_file_path = folder + \"/visuSpecs/Fujita_visuSpec_empty.tsv\"\n", "\n", - "pp = petab.Problem.from_files(measurement_file=data_file_path,\n", - " condition_file=condition_file_path,\n", - " observable_files=observables_file_path,\n", - " visualization_files=visualization_file_path)\n", + "pp = petab.Problem.from_files(\n", + " measurement_file=data_file_path,\n", + " condition_file=condition_file_path,\n", + " observable_files=observables_file_path,\n", + " visualization_files=visualization_file_path,\n", + ")\n", "petab.visualize.plot_problem(petab_problem=pp);" ] }, @@ -71,10 +73,12 @@ "source": [ "visualization_file_path = folder + \"/visuSpecs/Fujita_visuSpec_mandatory.tsv\"\n", "\n", - "pp = petab.Problem.from_files(measurement_file=data_file_path,\n", - " condition_file=condition_file_path,\n", - " observable_files=observables_file_path,\n", - " visualization_files=visualization_file_path)\n", + "pp = petab.Problem.from_files(\n", + " measurement_file=data_file_path,\n", + " condition_file=condition_file_path,\n", + " observable_files=observables_file_path,\n", + " visualization_files=visualization_file_path,\n", + ")\n", "\n", "petab.visualize.plot_problem(petab_problem=pp);" ] @@ -98,10 +102,12 @@ "source": [ "visualization_file_path = folder + \"/visuSpecs/Fujita_visuSpec_1.tsv\"\n", "\n", - "pp = petab.Problem.from_files(measurement_file=data_file_path,\n", - " condition_file=condition_file_path,\n", - " observable_files=observables_file_path,\n", - " visualization_files=visualization_file_path)\n", + "pp = petab.Problem.from_files(\n", + " measurement_file=data_file_path,\n", + " condition_file=condition_file_path,\n", + " observable_files=observables_file_path,\n", + " visualization_files=visualization_file_path,\n", + ")\n", "\n", "petab.visualize.plot_problem(petab_problem=pp);" ] @@ -133,10 +139,12 @@ "source": [ "visualization_file_path = folder + \"/visuSpecs/Fujita_visuSpec_2.tsv\"\n", "\n", - "pp = petab.Problem.from_files(measurement_file=data_file_path,\n", - " condition_file=condition_file_path,\n", - " observable_files=observables_file_path,\n", - " visualization_files=visualization_file_path)\n", + "pp = petab.Problem.from_files(\n", + " measurement_file=data_file_path,\n", + " condition_file=condition_file_path,\n", + " observable_files=observables_file_path,\n", + " visualization_files=visualization_file_path,\n", + ")\n", "\n", "petab.visualize.plot_problem(petab_problem=pp);" ] @@ -163,12 +171,16 @@ "metadata": {}, "outputs": [], "source": [ - "visualization_file_path = folder + \"/visuSpecs/Fujita_visuSpec_individual_datasets.tsv\"\n", + "visualization_file_path = (\n", + " folder + \"/visuSpecs/Fujita_visuSpec_individual_datasets.tsv\"\n", + ")\n", "\n", - "pp = petab.Problem.from_files(measurement_file=data_file_path,\n", - " condition_file=condition_file_path,\n", - " observable_files=observables_file_path,\n", - " visualization_files=visualization_file_path)\n", + "pp = petab.Problem.from_files(\n", + " measurement_file=data_file_path,\n", + " condition_file=condition_file_path,\n", + " observable_files=observables_file_path,\n", + " visualization_files=visualization_file_path,\n", + ")\n", "\n", "petab.visualize.plot_problem(petab_problem=pp);" ] @@ -183,10 +195,12 @@ "source": [ "visualization_file_path = folder + \"/visuSpecs/Fujita_visuSpec_datasetIds.tsv\"\n", "\n", - "pp = petab.Problem.from_files(measurement_file=data_file_path,\n", - " condition_file=condition_file_path,\n", - " observable_files=observables_file_path,\n", - " visualization_files=visualization_file_path)\n", + "pp = petab.Problem.from_files(\n", + " measurement_file=data_file_path,\n", + " condition_file=condition_file_path,\n", + " observable_files=observables_file_path,\n", + " visualization_files=visualization_file_path,\n", + ")\n", "\n", "petab.visualize.plot_problem(petab_problem=pp);" ] @@ -224,4 +238,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/doc/example/example_visualization_without_visspec.ipynb b/doc/example/example_visualization_without_visspec.ipynb index 6200b928..5bbbbeb0 100644 --- a/doc/example/example_visualization_without_visspec.ipynb +++ b/doc/example/example_visualization_without_visspec.ipynb @@ -46,9 +46,11 @@ "outputs": [], "source": [ "# load PEtab problem from files\n", - "pp = petab.Problem.from_files(measurement_file=data_file_path,\n", - " condition_file=condition_file_path,\n", - " observable_files=observables_file_path)\n", + "pp = petab.Problem.from_files(\n", + " measurement_file=data_file_path,\n", + " condition_file=condition_file_path,\n", + " observable_files=observables_file_path,\n", + ")\n", "# Alternatively, from yaml file\n", "# pp = petab.Problem.from_yaml(folder + \"Fujita.yaml\")\n", "\n", @@ -71,8 +73,9 @@ "metadata": {}, "outputs": [], "source": [ - "ax = petab.visualize.plot_problem(petab_problem=pp, \n", - " grouping_list = [['pEGFR_tot'], ['pAkt_tot', 'pS6_tot']])" + "ax = petab.visualize.plot_problem(\n", + " petab_problem=pp, grouping_list=[[\"pEGFR_tot\"], [\"pAkt_tot\", \"pS6_tot\"]]\n", + ")" ] }, { @@ -88,11 +91,15 @@ "metadata": {}, "outputs": [], "source": [ - "ax = petab.visualize.plot_problem(petab_problem=pp, \n", - " grouping_list = [['model1_data1'], \n", - " ['model1_data2', 'model1_data3'], \n", - " ['model1_data4', 'model1_data5', 'model1_data6']],\n", - " group_by='simulation')" + "ax = petab.visualize.plot_problem(\n", + " petab_problem=pp,\n", + " grouping_list=[\n", + " [\"model1_data1\"],\n", + " [\"model1_data2\", \"model1_data3\"],\n", + " [\"model1_data4\", \"model1_data5\", \"model1_data6\"],\n", + " ],\n", + " group_by=\"simulation\",\n", + ")" ] }, { @@ -108,26 +115,36 @@ "metadata": {}, "outputs": [], "source": [ - "ax = petab.visualize.plot_problem(petab_problem=pp, \n", - " grouping_list = [['model1_data1_pEGFR_tot', \n", - " 'model1_data2_pEGFR_tot', \n", - " 'model1_data3_pEGFR_tot',\n", - " 'model1_data4_pEGFR_tot', \n", - " 'model1_data5_pEGFR_tot', \n", - " 'model1_data6_pEGFR_tot'], \n", - " ['model1_data1_pAkt_tot', \n", - " 'model1_data2_pAkt_tot',\n", - " 'model1_data3_pAkt_tot', \n", - " 'model1_data4_pAkt_tot',\n", - " 'model1_data5_pAkt_tot',\n", - " 'model1_data6_pAkt_tot'], \n", - " ['model1_data1_pS6_tot',\n", - " 'model1_data2_pS6_tot',\n", - " 'model1_data3_pS6_tot',\n", - " 'model1_data4_pS6_tot', \n", - " 'model1_data5_pS6_tot', \n", - " 'model1_data6_pS6_tot']],\n", - " group_by='dataset')" + "ax = petab.visualize.plot_problem(\n", + " petab_problem=pp,\n", + " grouping_list=[\n", + " [\n", + " \"model1_data1_pEGFR_tot\",\n", + " \"model1_data2_pEGFR_tot\",\n", + " \"model1_data3_pEGFR_tot\",\n", + " \"model1_data4_pEGFR_tot\",\n", + " \"model1_data5_pEGFR_tot\",\n", + " \"model1_data6_pEGFR_tot\",\n", + " ],\n", + " [\n", + " \"model1_data1_pAkt_tot\",\n", + " \"model1_data2_pAkt_tot\",\n", + " \"model1_data3_pAkt_tot\",\n", + " \"model1_data4_pAkt_tot\",\n", + " \"model1_data5_pAkt_tot\",\n", + " \"model1_data6_pAkt_tot\",\n", + " ],\n", + " [\n", + " \"model1_data1_pS6_tot\",\n", + " \"model1_data2_pS6_tot\",\n", + " \"model1_data3_pS6_tot\",\n", + " \"model1_data4_pS6_tot\",\n", + " \"model1_data5_pS6_tot\",\n", + " \"model1_data6_pS6_tot\",\n", + " ],\n", + " ],\n", + " group_by=\"dataset\",\n", + ")" ] }, { @@ -168,4 +185,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/doc/md2rst.py b/doc/md2rst.py index 9ae0919d..77389017 100644 --- a/doc/md2rst.py +++ b/doc/md2rst.py @@ -1,6 +1,7 @@ -import m2r2 import re +import m2r2 + def read(fname): """Read a file.""" @@ -13,8 +14,8 @@ def absolute_links(txt): raw_base = f"(https://raw.githubusercontent.com/{repo}/master/" embedded_base = f"(https://github.com/{repo}/tree/master/" # iterate over links - for var in re.findall(r'\[.*?\]\((?!http).*?\)', txt): - if re.match(r'.*?.(png|svg)\)', var): + for var in re.findall(r"\[.*?\]\((?!http).*?\)", txt): + if re.match(r".*?.(png|svg)\)", var): # link to raw file rep = var.replace("(", raw_base) else: @@ -27,10 +28,10 @@ def absolute_links(txt): def md2rst(source: str, target: str): txt = absolute_links(read(source)) txt = m2r2.convert(txt) - with open(target, 'w') as f: + with open(target, "w") as f: f.write(txt) -if __name__ == '__main__': +if __name__ == "__main__": # parse readme - md2rst('../README.md', '_static/README.rst') + md2rst("../README.md", "_static/README.rst") diff --git a/petab/C.py b/petab/C.py index 9d6a49c3..2e3616ee 100644 --- a/petab/C.py +++ b/petab/C.py @@ -5,143 +5,172 @@ import math as _math - # MEASUREMENTS #: -OBSERVABLE_ID = 'observableId' +OBSERVABLE_ID = "observableId" #: -PREEQUILIBRATION_CONDITION_ID = 'preequilibrationConditionId' +PREEQUILIBRATION_CONDITION_ID = "preequilibrationConditionId" #: -SIMULATION_CONDITION_ID = 'simulationConditionId' +SIMULATION_CONDITION_ID = "simulationConditionId" #: -MEASUREMENT = 'measurement' +MEASUREMENT = "measurement" #: -TIME = 'time' +TIME = "time" #: Time value that indicates steady-state measurements TIME_STEADY_STATE = _math.inf #: -OBSERVABLE_PARAMETERS = 'observableParameters' +OBSERVABLE_PARAMETERS = "observableParameters" #: -NOISE_PARAMETERS = 'noiseParameters' +NOISE_PARAMETERS = "noiseParameters" #: -DATASET_ID = 'datasetId' +DATASET_ID = "datasetId" #: -REPLICATE_ID = 'replicateId' +REPLICATE_ID = "replicateId" #: Mandatory columns of measurement table MEASUREMENT_DF_REQUIRED_COLS = [ - OBSERVABLE_ID, SIMULATION_CONDITION_ID, MEASUREMENT, TIME] + OBSERVABLE_ID, + SIMULATION_CONDITION_ID, + MEASUREMENT, + TIME, +] #: Optional columns of measurement table MEASUREMENT_DF_OPTIONAL_COLS = [ - PREEQUILIBRATION_CONDITION_ID, OBSERVABLE_PARAMETERS, + PREEQUILIBRATION_CONDITION_ID, + OBSERVABLE_PARAMETERS, NOISE_PARAMETERS, - DATASET_ID, REPLICATE_ID] + DATASET_ID, + REPLICATE_ID, +] #: Measurement table columns MEASUREMENT_DF_COLS = [ - MEASUREMENT_DF_REQUIRED_COLS[0], MEASUREMENT_DF_OPTIONAL_COLS[0], - *MEASUREMENT_DF_REQUIRED_COLS[1:], *MEASUREMENT_DF_OPTIONAL_COLS[1:]] + MEASUREMENT_DF_REQUIRED_COLS[0], + MEASUREMENT_DF_OPTIONAL_COLS[0], + *MEASUREMENT_DF_REQUIRED_COLS[1:], + *MEASUREMENT_DF_OPTIONAL_COLS[1:], +] # PARAMETERS #: -PARAMETER_ID = 'parameterId' +PARAMETER_ID = "parameterId" #: -PARAMETER_NAME = 'parameterName' +PARAMETER_NAME = "parameterName" #: -PARAMETER_SCALE = 'parameterScale' +PARAMETER_SCALE = "parameterScale" #: -LOWER_BOUND = 'lowerBound' +LOWER_BOUND = "lowerBound" #: -UPPER_BOUND = 'upperBound' +UPPER_BOUND = "upperBound" #: -NOMINAL_VALUE = 'nominalValue' +NOMINAL_VALUE = "nominalValue" #: -ESTIMATE = 'estimate' +ESTIMATE = "estimate" #: -INITIALIZATION_PRIOR_TYPE = 'initializationPriorType' +INITIALIZATION_PRIOR_TYPE = "initializationPriorType" #: -INITIALIZATION_PRIOR_PARAMETERS = 'initializationPriorParameters' +INITIALIZATION_PRIOR_PARAMETERS = "initializationPriorParameters" #: -OBJECTIVE_PRIOR_TYPE = 'objectivePriorType' +OBJECTIVE_PRIOR_TYPE = "objectivePriorType" #: -OBJECTIVE_PRIOR_PARAMETERS = 'objectivePriorParameters' +OBJECTIVE_PRIOR_PARAMETERS = "objectivePriorParameters" #: Mandatory columns of parameter table PARAMETER_DF_REQUIRED_COLS = [ - PARAMETER_ID, PARAMETER_SCALE, LOWER_BOUND, UPPER_BOUND, ESTIMATE] + PARAMETER_ID, + PARAMETER_SCALE, + LOWER_BOUND, + UPPER_BOUND, + ESTIMATE, +] #: Optional columns of parameter table PARAMETER_DF_OPTIONAL_COLS = [ - PARAMETER_NAME, NOMINAL_VALUE, - INITIALIZATION_PRIOR_TYPE, INITIALIZATION_PRIOR_PARAMETERS, - OBJECTIVE_PRIOR_TYPE, OBJECTIVE_PRIOR_PARAMETERS] + PARAMETER_NAME, + NOMINAL_VALUE, + INITIALIZATION_PRIOR_TYPE, + INITIALIZATION_PRIOR_PARAMETERS, + OBJECTIVE_PRIOR_TYPE, + OBJECTIVE_PRIOR_PARAMETERS, +] #: Parameter table columns PARAMETER_DF_COLS = [ - PARAMETER_DF_REQUIRED_COLS[0], PARAMETER_DF_OPTIONAL_COLS[0], - *PARAMETER_DF_REQUIRED_COLS[1:], *PARAMETER_DF_OPTIONAL_COLS[1:]] + PARAMETER_DF_REQUIRED_COLS[0], + PARAMETER_DF_OPTIONAL_COLS[0], + *PARAMETER_DF_REQUIRED_COLS[1:], + *PARAMETER_DF_OPTIONAL_COLS[1:], +] #: -INITIALIZATION = 'initialization' +INITIALIZATION = "initialization" #: -OBJECTIVE = 'objective' +OBJECTIVE = "objective" # CONDITIONS #: -CONDITION_ID = 'conditionId' +CONDITION_ID = "conditionId" #: -CONDITION_NAME = 'conditionName' +CONDITION_NAME = "conditionName" # OBSERVABLES #: -OBSERVABLE_NAME = 'observableName' +OBSERVABLE_NAME = "observableName" #: -OBSERVABLE_FORMULA = 'observableFormula' +OBSERVABLE_FORMULA = "observableFormula" #: -NOISE_FORMULA = 'noiseFormula' +NOISE_FORMULA = "noiseFormula" #: -OBSERVABLE_TRANSFORMATION = 'observableTransformation' +OBSERVABLE_TRANSFORMATION = "observableTransformation" #: -NOISE_DISTRIBUTION = 'noiseDistribution' +NOISE_DISTRIBUTION = "noiseDistribution" #: Mandatory columns of observables table OBSERVABLE_DF_REQUIRED_COLS = [ - OBSERVABLE_ID, OBSERVABLE_FORMULA, NOISE_FORMULA] + OBSERVABLE_ID, + OBSERVABLE_FORMULA, + NOISE_FORMULA, +] #: Optional columns of observables table OBSERVABLE_DF_OPTIONAL_COLS = [ - OBSERVABLE_NAME, OBSERVABLE_TRANSFORMATION, NOISE_DISTRIBUTION] + OBSERVABLE_NAME, + OBSERVABLE_TRANSFORMATION, + NOISE_DISTRIBUTION, +] #: Observables table columns OBSERVABLE_DF_COLS = [ - *OBSERVABLE_DF_REQUIRED_COLS, *OBSERVABLE_DF_OPTIONAL_COLS] + *OBSERVABLE_DF_REQUIRED_COLS, + *OBSERVABLE_DF_OPTIONAL_COLS, +] # TRANSFORMATIONS #: -LIN = 'lin' +LIN = "lin" #: -LOG = 'log' +LOG = "log" #: -LOG10 = 'log10' +LOG10 = "log10" #: Supported observable transformations OBSERVABLE_TRANSFORMATIONS = [LIN, LOG, LOG10] @@ -149,26 +178,33 @@ # NOISE MODELS #: -UNIFORM = 'uniform' +UNIFORM = "uniform" #: -PARAMETER_SCALE_UNIFORM = 'parameterScaleUniform' +PARAMETER_SCALE_UNIFORM = "parameterScaleUniform" #: -NORMAL = 'normal' +NORMAL = "normal" #: -PARAMETER_SCALE_NORMAL = 'parameterScaleNormal' +PARAMETER_SCALE_NORMAL = "parameterScaleNormal" #: -LAPLACE = 'laplace' +LAPLACE = "laplace" #: -PARAMETER_SCALE_LAPLACE = 'parameterScaleLaplace' +PARAMETER_SCALE_LAPLACE = "parameterScaleLaplace" #: -LOG_NORMAL = 'logNormal' +LOG_NORMAL = "logNormal" #: -LOG_LAPLACE = 'logLaplace' +LOG_LAPLACE = "logLaplace" #: Supported prior types PRIOR_TYPES = [ - UNIFORM, NORMAL, LAPLACE, LOG_NORMAL, LOG_LAPLACE, - PARAMETER_SCALE_UNIFORM, PARAMETER_SCALE_NORMAL, PARAMETER_SCALE_LAPLACE] + UNIFORM, + NORMAL, + LAPLACE, + LOG_NORMAL, + LOG_LAPLACE, + PARAMETER_SCALE_UNIFORM, + PARAMETER_SCALE_NORMAL, + PARAMETER_SCALE_LAPLACE, +] #: Supported noise distributions NOISE_MODELS = [NORMAL, LAPLACE] @@ -177,60 +213,86 @@ # VISUALIZATION #: -PLOT_ID = 'plotId' +PLOT_ID = "plotId" #: -PLOT_NAME = 'plotName' +PLOT_NAME = "plotName" #: -PLOT_TYPE_SIMULATION = 'plotTypeSimulation' +PLOT_TYPE_SIMULATION = "plotTypeSimulation" #: -PLOT_TYPE_DATA = 'plotTypeData' +PLOT_TYPE_DATA = "plotTypeData" #: -X_VALUES = 'xValues' +X_VALUES = "xValues" #: -X_OFFSET = 'xOffset' +X_OFFSET = "xOffset" #: -X_LABEL = 'xLabel' +X_LABEL = "xLabel" #: -X_SCALE = 'xScale' +X_SCALE = "xScale" #: -Y_VALUES = 'yValues' +Y_VALUES = "yValues" #: -Y_OFFSET = 'yOffset' +Y_OFFSET = "yOffset" #: -Y_LABEL = 'yLabel' +Y_LABEL = "yLabel" #: -Y_SCALE = 'yScale' +Y_SCALE = "yScale" #: -LEGEND_ENTRY = 'legendEntry' +LEGEND_ENTRY = "legendEntry" #: Mandatory columns of visualization table VISUALIZATION_DF_REQUIRED_COLS = [PLOT_ID] #: Optional columns of visualization table VISUALIZATION_DF_OPTIONAL_COLS = [ - PLOT_NAME, PLOT_TYPE_SIMULATION, PLOT_TYPE_DATA, X_VALUES, X_OFFSET, - X_LABEL, X_SCALE, Y_VALUES, Y_OFFSET, Y_LABEL, Y_SCALE, LEGEND_ENTRY, - DATASET_ID] + PLOT_NAME, + PLOT_TYPE_SIMULATION, + PLOT_TYPE_DATA, + X_VALUES, + X_OFFSET, + X_LABEL, + X_SCALE, + Y_VALUES, + Y_OFFSET, + Y_LABEL, + Y_SCALE, + LEGEND_ENTRY, + DATASET_ID, +] #: Visualization table columns VISUALIZATION_DF_COLS = [ - *VISUALIZATION_DF_REQUIRED_COLS, *VISUALIZATION_DF_OPTIONAL_COLS] + *VISUALIZATION_DF_REQUIRED_COLS, + *VISUALIZATION_DF_OPTIONAL_COLS, +] #: Visualization table columns that contain subplot specifications VISUALIZATION_DF_SUBPLOT_LEVEL_COLS = [ - PLOT_ID, PLOT_NAME, PLOT_TYPE_SIMULATION, PLOT_TYPE_DATA, - X_LABEL, X_SCALE, Y_LABEL, Y_SCALE] + PLOT_ID, + PLOT_NAME, + PLOT_TYPE_SIMULATION, + PLOT_TYPE_DATA, + X_LABEL, + X_SCALE, + Y_LABEL, + Y_SCALE, +] #: Visualization table columns that contain single plot specifications VISUALIZATION_DF_SINGLE_PLOT_LEVEL_COLS = [ - X_VALUES, X_OFFSET, Y_VALUES, Y_OFFSET, LEGEND_ENTRY, DATASET_ID] + X_VALUES, + X_OFFSET, + Y_VALUES, + Y_OFFSET, + LEGEND_ENTRY, + DATASET_ID, +] #: -LINE_PLOT = 'LinePlot' +LINE_PLOT = "LinePlot" #: -BAR_PLOT = 'BarPlot' +BAR_PLOT = "BarPlot" #: -SCATTER_PLOT = 'ScatterPlot' +SCATTER_PLOT = "ScatterPlot" #: Supported plot types PLOT_TYPES_SIMULATION = [LINE_PLOT, BAR_PLOT, SCATTER_PLOT] @@ -242,62 +304,62 @@ #: -MEAN_AND_SD = 'MeanAndSD' +MEAN_AND_SD = "MeanAndSD" #: -MEAN_AND_SEM = 'MeanAndSEM' +MEAN_AND_SEM = "MeanAndSEM" #: -REPLICATE = 'replicate' +REPLICATE = "replicate" #: -PROVIDED = 'provided' +PROVIDED = "provided" #: Supported settings for handling replicates PLOT_TYPES_DATA = [MEAN_AND_SD, MEAN_AND_SEM, REPLICATE, PROVIDED] # YAML #: -FORMAT_VERSION = 'format_version' +FORMAT_VERSION = "format_version" #: -PARAMETER_FILE = 'parameter_file' +PARAMETER_FILE = "parameter_file" #: -PROBLEMS = 'problems' +PROBLEMS = "problems" #: -SBML_FILES = 'sbml_files' +SBML_FILES = "sbml_files" #: -MODEL_FILES = 'model_files' +MODEL_FILES = "model_files" #: -MODEL_LOCATION = 'location' +MODEL_LOCATION = "location" #: -MODEL_LANGUAGE = 'language' +MODEL_LANGUAGE = "language" #: -CONDITION_FILES = 'condition_files' +CONDITION_FILES = "condition_files" #: -MEASUREMENT_FILES = 'measurement_files' +MEASUREMENT_FILES = "measurement_files" #: -OBSERVABLE_FILES = 'observable_files' +OBSERVABLE_FILES = "observable_files" #: -VISUALIZATION_FILES = 'visualization_files' +VISUALIZATION_FILES = "visualization_files" #: -MAPPING_FILES = 'mapping_files' +MAPPING_FILES = "mapping_files" #: -EXTENSIONS = 'extensions' +EXTENSIONS = "extensions" # MAPPING #: -PETAB_ENTITY_ID = 'petabEntityId' +PETAB_ENTITY_ID = "petabEntityId" #: -MODEL_ENTITY_ID = 'modelEntityId' +MODEL_ENTITY_ID = "modelEntityId" #: MAPPING_DF_REQUIRED_COLS = [PETAB_ENTITY_ID, MODEL_ENTITY_ID] # MORE #: -SIMULATION = 'simulation' +SIMULATION = "simulation" #: -RESIDUAL = 'residual' +RESIDUAL = "residual" #: -NOISE_VALUE = 'noiseValue' +NOISE_VALUE = "noiseValue" # separator for multiple parameter values (bounds, observableParameters, ...) -PARAMETER_SEPARATOR = ';' +PARAMETER_SEPARATOR = ";" diff --git a/petab/__init__.py b/petab/__init__.py index 1f9719e0..16cff24b 100644 --- a/petab/__init__.py +++ b/petab/__init__.py @@ -16,7 +16,9 @@ from .composite_problem import * # noqa: F403, F401, E402 from .conditions import * # noqa: F403, F401, E402 from .core import * # noqa: F403, F401, E402 +from .format_version import __format_version__ # noqa: F401, E402 from .lint import * # noqa: F403, F401, E402 +from .mapping import * # noqa: F403, F401, E402 from .measurements import * # noqa: F403, F401, E402 from .observables import * # noqa: F403, F401, E402 from .parameter_mapping import * # noqa: F403, F401, E402 @@ -25,7 +27,5 @@ from .sampling import * # noqa: F403, F401, E402 from .sbml import * # noqa: F403, F401, E402 from .simulate import * # noqa: F403, F401, E402 -from .yaml import * # noqa: F403, F401, E402 from .version import __version__ # noqa: F401, E402 -from .format_version import __format_version__ # noqa: F401, E402 -from .mapping import * # noqa: F403, F401, E402 +from .yaml import * # noqa: F403, F401, E402 diff --git a/petab/calculate.py b/petab/calculate.py index adf3d2ba..cb20bd38 100644 --- a/petab/calculate.py +++ b/petab/calculate.py @@ -10,21 +10,29 @@ from sympy.abc import _clash import petab + from .C import * -__all__ = ['calculate_residuals', 'calculate_residuals_for_table', - 'get_symbolic_noise_formulas', 'evaluate_noise_formula', - 'calculate_chi2', 'calculate_chi2_for_table_from_residuals', - 'calculate_llh', 'calculate_llh_for_table', 'calculate_single_llh'] +__all__ = [ + "calculate_residuals", + "calculate_residuals_for_table", + "get_symbolic_noise_formulas", + "evaluate_noise_formula", + "calculate_chi2", + "calculate_chi2_for_table_from_residuals", + "calculate_llh", + "calculate_llh_for_table", + "calculate_single_llh", +] def calculate_residuals( - measurement_dfs: Union[List[pd.DataFrame], pd.DataFrame], - simulation_dfs: Union[List[pd.DataFrame], pd.DataFrame], - observable_dfs: Union[List[pd.DataFrame], pd.DataFrame], - parameter_dfs: Union[List[pd.DataFrame], pd.DataFrame], - normalize: bool = True, - scale: bool = True + measurement_dfs: Union[List[pd.DataFrame], pd.DataFrame], + simulation_dfs: Union[List[pd.DataFrame], pd.DataFrame], + observable_dfs: Union[List[pd.DataFrame], pd.DataFrame], + parameter_dfs: Union[List[pd.DataFrame], pd.DataFrame], + normalize: bool = True, + scale: bool = True, ) -> List[pd.DataFrame]: """Calculate residuals. @@ -59,22 +67,28 @@ def calculate_residuals( # iterate over data frames residual_dfs = [] - for (measurement_df, simulation_df, observable_df, parameter_df) in zip( - measurement_dfs, simulation_dfs, observable_dfs, parameter_dfs): + for measurement_df, simulation_df, observable_df, parameter_df in zip( + measurement_dfs, simulation_dfs, observable_dfs, parameter_dfs + ): residual_df = calculate_residuals_for_table( - measurement_df, simulation_df, observable_df, parameter_df, - normalize, scale) + measurement_df, + simulation_df, + observable_df, + parameter_df, + normalize, + scale, + ) residual_dfs.append(residual_df) return residual_dfs def calculate_residuals_for_table( - measurement_df: pd.DataFrame, - simulation_df: pd.DataFrame, - observable_df: pd.DataFrame, - parameter_df: pd.DataFrame, - normalize: bool = True, - scale: bool = True + measurement_df: pd.DataFrame, + simulation_df: pd.DataFrame, + observable_df: pd.DataFrame, + parameter_df: pd.DataFrame, + normalize: bool = True, + scale: bool = True, ) -> pd.DataFrame: """ Calculate residuals for a single measurement table. @@ -82,7 +96,8 @@ def calculate_residuals_for_table( """ # create residual df as copy of measurement df, change column residual_df = measurement_df.copy(deep=True).rename( - columns={MEASUREMENT: RESIDUAL}) + columns={MEASUREMENT: RESIDUAL} + ) residual_df[RESIDUAL] = residual_df[RESIDUAL].astype("float64") # matching columns compared_cols = set(MEASUREMENT_DF_COLS) @@ -97,8 +112,10 @@ def calculate_residuals_for_table( for irow, row in measurement_df.iterrows(): measurement = row[MEASUREMENT] # look up in simulation df - masks = [(simulation_df[col] == row[col]) | petab.is_empty(row[col]) - for col in compared_cols] + masks = [ + (simulation_df[col] == row[col]) | petab.is_empty(row[col]) + for col in compared_cols + ] mask = reduce(lambda x, y: x & y, masks) simulation = simulation_df.loc[mask][SIMULATION].iloc[0] if scale: @@ -115,7 +132,8 @@ def calculate_residuals_for_table( if normalize: # look up noise standard deviation noise_value = evaluate_noise_formula( - row, noise_formulas, parameter_df, simulation) + row, noise_formulas, parameter_df, simulation + ) residual /= noise_value # fill in value @@ -145,10 +163,10 @@ def get_symbolic_noise_formulas(observable_df) -> Dict[str, sympy.Expr]: def evaluate_noise_formula( - measurement: pd.Series, - noise_formulas: Dict[str, sympy.Expr], - parameter_df: pd.DataFrame, - simulation: numbers.Number, + measurement: pd.Series, + noise_formulas: Dict[str, sympy.Expr], + parameter_df: pd.DataFrame, + simulation: numbers.Number, ) -> float: """Fill in parameters for `measurement` and evaluate noise_formula. @@ -167,7 +185,8 @@ def evaluate_noise_formula( # extract measurement specific overrides observable_parameter_overrides = petab.split_parameter_replacement_list( - measurement.get(NOISE_PARAMETERS, None)) + measurement.get(NOISE_PARAMETERS, None) + ) # fill in measurement specific parameters overrides = { f"noiseParameter{i_obs_par + 1}_{observable_id}": obs_par @@ -199,17 +218,18 @@ def evaluate_noise_formula( f"Cannot replace all parameters in noise formula {noise_value} " f"for observable {observable_id}. " f"Missing {noise_formula.free_symbols}. Note that model states " - "are currently not supported.") from e + "are currently not supported." + ) from e return noise_value def calculate_chi2( - measurement_dfs: Union[List[pd.DataFrame], pd.DataFrame], - simulation_dfs: Union[List[pd.DataFrame], pd.DataFrame], - observable_dfs: Union[List[pd.DataFrame], pd.DataFrame], - parameter_dfs: Union[List[pd.DataFrame], pd.DataFrame], - normalize: bool = True, - scale: bool = True + measurement_dfs: Union[List[pd.DataFrame], pd.DataFrame], + simulation_dfs: Union[List[pd.DataFrame], pd.DataFrame], + observable_dfs: Union[List[pd.DataFrame], pd.DataFrame], + parameter_dfs: Union[List[pd.DataFrame], pd.DataFrame], + normalize: bool = True, + scale: bool = True, ) -> float: """Calculate the chi2 value. @@ -232,25 +252,31 @@ def calculate_chi2( The aggregated chi2 value. """ residual_dfs = calculate_residuals( - measurement_dfs, simulation_dfs, observable_dfs, parameter_dfs, - normalize, scale) - chi2s = [calculate_chi2_for_table_from_residuals(df) - for df in residual_dfs] + measurement_dfs, + simulation_dfs, + observable_dfs, + parameter_dfs, + normalize, + scale, + ) + chi2s = [ + calculate_chi2_for_table_from_residuals(df) for df in residual_dfs + ] return sum(chi2s) def calculate_chi2_for_table_from_residuals( - residual_df: pd.DataFrame, + residual_df: pd.DataFrame, ) -> float: """Compute chi2 value for a single residual table.""" - return (np.array(residual_df[RESIDUAL])**2).sum() + return (np.array(residual_df[RESIDUAL]) ** 2).sum() def calculate_llh( - measurement_dfs: Union[List[pd.DataFrame], pd.DataFrame], - simulation_dfs: Union[List[pd.DataFrame], pd.DataFrame], - observable_dfs: Union[List[pd.DataFrame], pd.DataFrame], - parameter_dfs: Union[List[pd.DataFrame], pd.DataFrame], + measurement_dfs: Union[List[pd.DataFrame], pd.DataFrame], + simulation_dfs: Union[List[pd.DataFrame], pd.DataFrame], + observable_dfs: Union[List[pd.DataFrame], pd.DataFrame], + parameter_dfs: Union[List[pd.DataFrame], pd.DataFrame], ) -> float: """Calculate total log likelihood. @@ -279,19 +305,22 @@ def calculate_llh( # iterate over data frames llhs = [] - for (measurement_df, simulation_df, observable_df, parameter_df) in zip( - measurement_dfs, simulation_dfs, observable_dfs, parameter_dfs): + for measurement_df, simulation_df, observable_df, parameter_df in zip( + measurement_dfs, simulation_dfs, observable_dfs, parameter_dfs + ): _llh = calculate_llh_for_table( - measurement_df, simulation_df, observable_df, parameter_df) + measurement_df, simulation_df, observable_df, parameter_df + ) llhs.append(_llh) return sum(llhs) def calculate_llh_for_table( - measurement_df: pd.DataFrame, - simulation_df: pd.DataFrame, - observable_df: pd.DataFrame, - parameter_df: pd.DataFrame) -> float: + measurement_df: pd.DataFrame, + simulation_df: pd.DataFrame, + observable_df: pd.DataFrame, + parameter_df: pd.DataFrame, +) -> float: """Calculate log-likelihood for one set of tables. For the arguments, see `calculate_llh`.""" llhs = [] @@ -310,8 +339,10 @@ def calculate_llh_for_table( measurement = row[MEASUREMENT] # look up in simulation df - masks = [(simulation_df[col] == row[col]) | petab.is_empty(row[col]) - for col in compared_cols] + masks = [ + (simulation_df[col] == row[col]) | petab.is_empty(row[col]) + for col in compared_cols + ] mask = reduce(lambda x, y: x & y, masks) simulation = simulation_df.loc[mask][SIMULATION].iloc[0] @@ -323,23 +354,26 @@ def calculate_llh_for_table( # get noise standard deviation noise_value = evaluate_noise_formula( - row, noise_formulas, parameter_df, petab.scale(simulation, scale)) + row, noise_formulas, parameter_df, petab.scale(simulation, scale) + ) # get noise distribution noise_distribution = observable.get(NOISE_DISTRIBUTION, NORMAL) llh = calculate_single_llh( - measurement, simulation, scale, noise_distribution, noise_value) + measurement, simulation, scale, noise_distribution, noise_value + ) llhs.append(llh) return sum(llhs) def calculate_single_llh( - measurement: float, - simulation: float, - scale: str, - noise_distribution: str, - noise_value: float) -> float: + measurement: float, + simulation: float, + scale: str, + noise_distribution: str, + noise_value: float, +) -> float: """Calculate a single log likelihood. Arguments: @@ -359,20 +393,28 @@ def calculate_single_llh( # go over the possible cases if noise_distribution == NORMAL and scale == LIN: - nllh = 0.5*log(2*pi*sigma**2) + 0.5*((s-m)/sigma)**2 + nllh = 0.5 * log(2 * pi * sigma**2) + 0.5 * ((s - m) / sigma) ** 2 elif noise_distribution == NORMAL and scale == LOG: - nllh = 0.5*log(2*pi*sigma**2*m**2) + 0.5*((log(s)-log(m))/sigma)**2 + nllh = ( + 0.5 * log(2 * pi * sigma**2 * m**2) + + 0.5 * ((log(s) - log(m)) / sigma) ** 2 + ) elif noise_distribution == NORMAL and scale == LOG10: - nllh = 0.5*log(2*pi*sigma**2*m**2*log(10)**2) + \ - 0.5*((log10(s)-log10(m))/sigma)**2 + nllh = ( + 0.5 * log(2 * pi * sigma**2 * m**2 * log(10) ** 2) + + 0.5 * ((log10(s) - log10(m)) / sigma) ** 2 + ) elif noise_distribution == LAPLACE and scale == LIN: - nllh = log(2*sigma) + abs((s-m)/sigma) + nllh = log(2 * sigma) + abs((s - m) / sigma) elif noise_distribution == LAPLACE and scale == LOG: - nllh = log(2*sigma*m) + abs((log(s)-log(m))/sigma) + nllh = log(2 * sigma * m) + abs((log(s) - log(m)) / sigma) elif noise_distribution == LAPLACE and scale == LOG10: - nllh = log(2*sigma*m*log(10)) + abs((log10(s)-log10(m))/sigma) + nllh = log(2 * sigma * m * log(10)) + abs( + (log10(s) - log10(m)) / sigma + ) else: raise NotImplementedError( "Unsupported combination of noise_distribution and scale " - f"specified: {noise_distribution}, {scale}.") + f"specified: {noise_distribution}, {scale}." + ) return -nllh diff --git a/petab/composite_problem.py b/petab/composite_problem.py index 612263fd..d9803765 100644 --- a/petab/composite_problem.py +++ b/petab/composite_problem.py @@ -1,15 +1,13 @@ """PEtab problems consisting of multiple models""" import os -from typing import List, Union, Dict +from typing import Dict, List, Union import pandas as pd -from . import parameters -from . import problem -from . import yaml +from . import parameters, problem, yaml from .C import * # noqa: F403 -__all__ = ['CompositeProblem'] +__all__ = ["CompositeProblem"] class CompositeProblem: @@ -23,9 +21,10 @@ class CompositeProblem: """ def __init__( - self, - parameter_df: pd.DataFrame = None, - problems: List[problem.Problem] = None): + self, + parameter_df: pd.DataFrame = None, + problems: List[problem.Problem] = None, + ): """Constructor Arguments: @@ -38,7 +37,7 @@ def __init__( self.parameter_df: pd.DataFrame = parameter_df @staticmethod - def from_yaml(yaml_config: Union[Dict, str]) -> 'CompositeProblem': + def from_yaml(yaml_config: Union[Dict, str]) -> "CompositeProblem": """Create from YAML file Factory method to create a CompositeProblem instance from a PEtab @@ -55,7 +54,8 @@ def from_yaml(yaml_config: Union[Dict, str]) -> 'CompositeProblem': path_prefix = "" parameter_df = parameters.get_parameter_df( - os.path.join(path_prefix, yaml_config[PARAMETER_FILE])) + os.path.join(path_prefix, yaml_config[PARAMETER_FILE]) + ) problems = [] for problem_config in yaml_config[PROBLEMS]: @@ -64,20 +64,24 @@ def from_yaml(yaml_config: Union[Dict, str]) -> 'CompositeProblem': # don't set parameter file if we have multiple models cur_problem = problem.Problem.from_files( sbml_file=os.path.join( - path_prefix, problem_config[SBML_FILES][0]), + path_prefix, problem_config[SBML_FILES][0] + ), measurement_file=[ os.path.join(path_prefix, f) - for f in problem_config[MEASUREMENT_FILES]], + for f in problem_config[MEASUREMENT_FILES] + ], condition_file=os.path.join( - path_prefix, problem_config[CONDITION_FILES][0]), + path_prefix, problem_config[CONDITION_FILES][0] + ), visualization_files=[ os.path.join(path_prefix, f) - for f in problem_config[VISUALIZATION_FILES]], + for f in problem_config[VISUALIZATION_FILES] + ], observable_files=[ os.path.join(path_prefix, f) - for f in problem_config[OBSERVABLE_FILES]] + for f in problem_config[OBSERVABLE_FILES] + ], ) problems.append(cur_problem) - return CompositeProblem(parameter_df=parameter_df, - problems=problems) + return CompositeProblem(parameter_df=parameter_df, problems=problems) diff --git a/petab/conditions.py b/petab/conditions.py index 3e206463..55cf133d 100644 --- a/petab/conditions.py +++ b/petab/conditions.py @@ -9,12 +9,16 @@ from . import core, lint from .C import * -__all__ = ['get_condition_df', 'write_condition_df', 'create_condition_df', - 'get_parametric_overrides'] +__all__ = [ + "get_condition_df", + "write_condition_df", + "create_condition_df", + "get_parametric_overrides", +] def get_condition_df( - condition_file: Union[str, pd.DataFrame, Path, None] + condition_file: Union[str, pd.DataFrame, Path, None] ) -> pd.DataFrame: """Read the provided condition file into a ``pandas.Dataframe`` @@ -27,11 +31,13 @@ def get_condition_df( return condition_file if isinstance(condition_file, (str, Path)): - condition_file = pd.read_csv(condition_file, sep='\t', - float_precision='round_trip') + condition_file = pd.read_csv( + condition_file, sep="\t", float_precision="round_trip" + ) lint.assert_no_leading_trailing_whitespace( - condition_file.columns.values, "condition") + condition_file.columns.values, "condition" + ) if not isinstance(condition_file.index, pd.RangeIndex): condition_file.reset_index(inplace=True) @@ -40,7 +46,8 @@ def get_condition_df( condition_file.set_index([CONDITION_ID], inplace=True) except KeyError: raise KeyError( - f'Condition table missing mandatory field {CONDITION_ID}.') + f"Condition table missing mandatory field {CONDITION_ID}." + ) return condition_file @@ -53,12 +60,12 @@ def write_condition_df(df: pd.DataFrame, filename: Union[str, Path]) -> None: filename: Destination file name """ df = get_condition_df(df) - df.to_csv(filename, sep='\t', index=True) + df.to_csv(filename, sep="\t", index=True) -def create_condition_df(parameter_ids: Iterable[str], - condition_ids: Optional[Iterable[str]] = None - ) -> pd.DataFrame: +def create_condition_df( + parameter_ids: Iterable[str], condition_ids: Optional[Iterable[str]] = None +) -> pd.DataFrame: """Create empty condition DataFrame Arguments: @@ -93,8 +100,10 @@ def get_parametric_overrides(condition_df: pd.DataFrame) -> List[str]: Returns: List of parameter IDs that are mapped in a condition-specific way """ - constant_parameters = (set(condition_df.columns.values.tolist()) - - {CONDITION_ID, CONDITION_NAME}) + constant_parameters = set(condition_df.columns.values.tolist()) - { + CONDITION_ID, + CONDITION_NAME, + } result = [] for column in constant_parameters: diff --git a/petab/core.py b/petab/core.py index ffd5ea86..05deb161 100644 --- a/petab/core.py +++ b/petab/core.py @@ -1,10 +1,17 @@ """PEtab core functions (or functions that don't fit anywhere else)""" -from pathlib import Path import logging import os import re +from pathlib import Path from typing import ( - Iterable, Optional, Callable, Union, Any, Sequence, List, Dict, + Any, + Callable, + Dict, + Iterable, + List, + Optional, + Sequence, + Union, ) from warnings import warn @@ -15,12 +22,20 @@ from .C import * # noqa: F403 logger = logging.getLogger(__name__) -__all__ = ['get_simulation_df', 'write_simulation_df', 'get_visualization_df', - 'write_visualization_df', 'get_notnull_columns', - 'flatten_timepoint_specific_output_overrides', - 'concat_tables', 'to_float_if_float', 'is_empty', - 'create_combine_archive', 'unique_preserve_order', - 'unflatten_simulation_df'] +__all__ = [ + "get_simulation_df", + "write_simulation_df", + "get_visualization_df", + "write_visualization_df", + "get_notnull_columns", + "flatten_timepoint_specific_output_overrides", + "concat_tables", + "to_float_if_float", + "is_empty", + "create_combine_archive", + "unique_preserve_order", + "unflatten_simulation_df", +] POSSIBLE_GROUPVARS_FLATTENED_PROBLEM = [ OBSERVABLE_ID, @@ -40,8 +55,9 @@ def get_simulation_df(simulation_file: Union[str, Path]) -> pd.DataFrame: Returns: Simulation DataFrame """ - return pd.read_csv(simulation_file, sep="\t", index_col=None, - float_precision='round_trip') + return pd.read_csv( + simulation_file, sep="\t", index_col=None, float_precision="round_trip" + ) def write_simulation_df(df: pd.DataFrame, filename: Union[str, Path]) -> None: @@ -51,7 +67,7 @@ def write_simulation_df(df: pd.DataFrame, filename: Union[str, Path]) -> None: df: PEtab simulation table filename: Destination file name """ - df.to_csv(filename, sep='\t', index=False) + df.to_csv(filename, sep="\t", index=False) def get_visualization_df( @@ -75,18 +91,24 @@ def get_visualization_df( try: types = {PLOT_NAME: str} - vis_spec = pd.read_csv(visualization_file, sep="\t", index_col=None, - converters=types, - float_precision='round_trip') + vis_spec = pd.read_csv( + visualization_file, + sep="\t", + index_col=None, + converters=types, + float_precision="round_trip", + ) except pd.errors.EmptyDataError: - warn("Visualization table is empty. Defaults will be used. " - "Refer to the documentation for details.") + warn( + "Visualization table is empty. Defaults will be used. " + "Refer to the documentation for details." + ) vis_spec = pd.DataFrame() return vis_spec def write_visualization_df( - df: pd.DataFrame, filename: Union[str, Path] + df: pd.DataFrame, filename: Union[str, Path] ) -> None: """Write PEtab visualization table @@ -94,7 +116,7 @@ def write_visualization_df( df: PEtab visualization table filename: Destination file name """ - df.to_csv(filename, sep='\t', index=False) + df.to_csv(filename, sep="\t", index=False) def get_notnull_columns(df: pd.DataFrame, candidates: Iterable): @@ -109,8 +131,9 @@ def get_notnull_columns(df: pd.DataFrame, candidates: Iterable): candidates: Columns of ``df`` to consider """ - return [col for col in candidates - if col in df and not np.all(df[col].isnull())] + return [ + col for col in candidates if col in df and not np.all(df[col].isnull()) + ] def get_observable_replacement_id(groupvars, groupvar) -> str: @@ -126,21 +149,24 @@ def get_observable_replacement_id(groupvars, groupvar) -> str: Returns: The observable replacement ID. """ - replacement_id = '' + replacement_id = "" for field in POSSIBLE_GROUPVARS_FLATTENED_PROBLEM: if field in groupvars: - val = str(groupvar[groupvars.index(field)])\ - .replace(PARAMETER_SEPARATOR, '_').replace('.', '_') - if replacement_id == '': + val = ( + str(groupvar[groupvars.index(field)]) + .replace(PARAMETER_SEPARATOR, "_") + .replace(".", "_") + ) + if replacement_id == "": replacement_id = val - elif val != '': - replacement_id += f'__{val}' + elif val != "": + replacement_id += f"__{val}" return replacement_id def get_hyperparameter_replacement_id( - hyperparameter_type, - observable_replacement_id, + hyperparameter_type, + observable_replacement_id, ): """Get the full ID for a replaced hyperparameter. @@ -155,11 +181,11 @@ def get_hyperparameter_replacement_id( The hyperparameter replacement ID, with a field that will be replaced by the first matched substring in a regex substitution. """ - return f'{hyperparameter_type}\\1_{observable_replacement_id}' + return f"{hyperparameter_type}\\1_{observable_replacement_id}" def get_flattened_id_mappings( - petab_problem: 'petab.problem.Problem', + petab_problem: "petab.problem.Problem", ) -> Dict[str, Dict[str, str]]: """Get mapping from unflattened to flattened observable IDs. @@ -173,41 +199,48 @@ def get_flattened_id_mappings( for either: observable IDs; noise parameter IDs; or, observable parameter IDs. """ - groupvars = get_notnull_columns(petab_problem.measurement_df, - POSSIBLE_GROUPVARS_FLATTENED_PROBLEM) + groupvars = get_notnull_columns( + petab_problem.measurement_df, POSSIBLE_GROUPVARS_FLATTENED_PROBLEM + ) mappings = { OBSERVABLE_ID: {}, NOISE_PARAMETERS: {}, OBSERVABLE_PARAMETERS: {}, } - for groupvar, measurements in \ - petab_problem.measurement_df.groupby(groupvars, dropna=False): + for groupvar, measurements in petab_problem.measurement_df.groupby( + groupvars, dropna=False + ): observable_id = groupvar[groupvars.index(OBSERVABLE_ID)] - observable_replacement_id = \ - get_observable_replacement_id(groupvars, groupvar) + observable_replacement_id = get_observable_replacement_id( + groupvars, groupvar + ) - logger.debug(f'Creating synthetic observable {observable_id}') + logger.debug(f"Creating synthetic observable {observable_id}") if observable_replacement_id in petab_problem.observable_df.index: - raise RuntimeError('could not create synthetic observables ' - f'since {observable_replacement_id} was ' - 'already present in observable table') + raise RuntimeError( + "could not create synthetic observables " + f"since {observable_replacement_id} was " + "already present in observable table" + ) mappings[OBSERVABLE_ID][observable_replacement_id] = observable_id for field, hyperparameter_type, target in [ - (NOISE_PARAMETERS, 'noiseParameter', NOISE_FORMULA), - (OBSERVABLE_PARAMETERS, 'observableParameter', OBSERVABLE_FORMULA) + (NOISE_PARAMETERS, "noiseParameter", NOISE_FORMULA), + (OBSERVABLE_PARAMETERS, "observableParameter", OBSERVABLE_FORMULA), ]: if field in measurements: - mappings[field][get_hyperparameter_replacement_id( - hyperparameter_type=hyperparameter_type, - observable_replacement_id=observable_replacement_id, - )] = fr'{hyperparameter_type}([0-9]+)_{observable_id}' + mappings[field][ + get_hyperparameter_replacement_id( + hyperparameter_type=hyperparameter_type, + observable_replacement_id=observable_replacement_id, + ) + ] = rf"{hyperparameter_type}([0-9]+)_{observable_id}" return mappings def flatten_timepoint_specific_output_overrides( - petab_problem: 'petab.problem.Problem', + petab_problem: "petab.problem.Problem", ) -> None: """Flatten timepoint-specific output parameter overrides. @@ -225,31 +258,36 @@ def flatten_timepoint_specific_output_overrides( """ new_measurement_dfs = [] new_observable_dfs = [] - groupvars = get_notnull_columns(petab_problem.measurement_df, - POSSIBLE_GROUPVARS_FLATTENED_PROBLEM) + groupvars = get_notnull_columns( + petab_problem.measurement_df, POSSIBLE_GROUPVARS_FLATTENED_PROBLEM + ) mappings = get_flattened_id_mappings(petab_problem) - for groupvar, measurements in \ - petab_problem.measurement_df.groupby(groupvars, dropna=False): + for groupvar, measurements in petab_problem.measurement_df.groupby( + groupvars, dropna=False + ): obs_id = groupvar[groupvars.index(OBSERVABLE_ID)] - observable_replacement_id = \ - get_observable_replacement_id(groupvars, groupvar) + observable_replacement_id = get_observable_replacement_id( + groupvars, groupvar + ) observable = petab_problem.observable_df.loc[obs_id].copy() observable.name = observable_replacement_id for field, hyperparameter_type, target in [ - (NOISE_PARAMETERS, 'noiseParameter', NOISE_FORMULA), - (OBSERVABLE_PARAMETERS, 'observableParameter', OBSERVABLE_FORMULA) + (NOISE_PARAMETERS, "noiseParameter", NOISE_FORMULA), + (OBSERVABLE_PARAMETERS, "observableParameter", OBSERVABLE_FORMULA), ]: if field in measurements: - hyperparameter_replacement_id = \ + hyperparameter_replacement_id = ( get_hyperparameter_replacement_id( hyperparameter_type=hyperparameter_type, observable_replacement_id=observable_replacement_id, ) - hyperparameter_id = \ - mappings[field][hyperparameter_replacement_id] + ) + hyperparameter_id = mappings[field][ + hyperparameter_replacement_id + ] observable[target] = re.sub( hyperparameter_id, hyperparameter_replacement_id, @@ -267,7 +305,7 @@ def flatten_timepoint_specific_output_overrides( def unflatten_simulation_df( simulation_df: pd.DataFrame, - petab_problem: 'petab.problem.Problem', + petab_problem: "petab.problem.Problem", ) -> None: """Unflatten simulations from a flattened PEtab problem. @@ -286,20 +324,22 @@ def unflatten_simulation_df( The simulation dataframe for the unflattened PEtab problem. """ mappings = get_flattened_id_mappings(petab_problem) - original_observable_ids = ( - simulation_df[OBSERVABLE_ID] - .replace(mappings[OBSERVABLE_ID]) + original_observable_ids = simulation_df[OBSERVABLE_ID].replace( + mappings[OBSERVABLE_ID] + ) + unflattened_simulation_df = simulation_df.assign( + **{ + OBSERVABLE_ID: original_observable_ids, + } ) - unflattened_simulation_df = simulation_df.assign(**{ - OBSERVABLE_ID: original_observable_ids, - }) return unflattened_simulation_df def concat_tables( - tables: Union[str, Path, pd.DataFrame, - Iterable[Union[pd.DataFrame, str, Path]]], - file_parser: Optional[Callable] = None + tables: Union[ + str, Path, pd.DataFrame, Iterable[Union[pd.DataFrame, str, Path]] + ], + file_parser: Optional[Callable] = None, ) -> pd.DataFrame: """Concatenate DataFrames provided as DataFrames or filenames, and a parser @@ -327,8 +367,11 @@ def concat_tables( if isinstance(tmp_df, (str, Path)): tmp_df = file_parser(tmp_df) - df = pd.concat([df, tmp_df], sort=False, - ignore_index=isinstance(tmp_df.index, pd.RangeIndex)) + df = pd.concat( + [df, tmp_df], + sort=False, + ignore_index=isinstance(tmp_df.index, pd.RangeIndex), + ) return df @@ -358,16 +401,16 @@ def is_empty(val) -> bool: Returns: Whether the field is to be considered empty. """ - return val == '' or pd.isnull(val) + return val == "" or pd.isnull(val) def create_combine_archive( - yaml_file: Union[str, Path], - filename: Union[str, Path], - family_name: Optional[str] = None, - given_name: Optional[str] = None, - email: Optional[str] = None, - organization: Optional[str] = None, + yaml_file: Union[str, Path], + filename: Union[str, Path], + family_name: Optional[str] = None, + given_name: Optional[str] = None, + email: Optional[str] = None, + organization: Optional[str] = None, ) -> None: """Create COMBINE archive (https://co.mbine.org/documents/archive) based on PEtab YAML file. @@ -391,7 +434,8 @@ def create_combine_archive( except ImportError: raise ImportError( "To use PEtab's COMBINE functionality, libcombine " - "(python-libcombine) must be installed.") + "(python-libcombine) must be installed." + ) def _add_file_metadata(location: str, description: str = ""): """Add metadata to the added file""" @@ -399,7 +443,8 @@ def _add_file_metadata(location: str, description: str = ""): omex_description.setAbout(location) omex_description.setDescription(description) omex_description.setCreated( - libcombine.OmexDescription.getCurrentDateAndTime()) + libcombine.OmexDescription.getCurrentDateAndTime() + ) archive.addMetadata(location, omex_description) archive = libcombine.CombineArchive() @@ -409,24 +454,25 @@ def _add_file_metadata(location: str, description: str = ""): str(yaml_file), os.path.basename(yaml_file), "http://identifiers.org/combine.specifications/petab.version-1", - True + True, + ) + _add_file_metadata( + location=os.path.basename(yaml_file), description="PEtab YAML file" ) - _add_file_metadata(location=os.path.basename(yaml_file), - description="PEtab YAML file") # Add parameter file(s) that describe a single parameter table. # Works for a single file name, or a list of file names. - for parameter_subset_file in ( - list(np.array(yaml_config[PARAMETER_FILE]).flat)): + for parameter_subset_file in list( + np.array(yaml_config[PARAMETER_FILE]).flat + ): archive.addFile( os.path.join(path_prefix, parameter_subset_file), parameter_subset_file, libcombine.KnownFormats.lookupFormat("tsv"), - False + False, ) _add_file_metadata( - location=parameter_subset_file, - description="PEtab parameter file" + location=parameter_subset_file, description="PEtab parameter file" ) for problem in yaml_config[PROBLEMS]: @@ -435,12 +481,16 @@ def _add_file_metadata(location: str, description: str = ""): os.path.join(path_prefix, sbml_file), sbml_file, libcombine.KnownFormats.lookupFormat("sbml"), - False + False, ) _add_file_metadata(location=sbml_file, description="SBML model") - for field in [MEASUREMENT_FILES, OBSERVABLE_FILES, - VISUALIZATION_FILES, CONDITION_FILES]: + for field in [ + MEASUREMENT_FILES, + OBSERVABLE_FILES, + VISUALIZATION_FILES, + CONDITION_FILES, + ]: if field not in problem: continue @@ -449,11 +499,12 @@ def _add_file_metadata(location: str, description: str = ""): os.path.join(path_prefix, file), file, libcombine.KnownFormats.lookupFormat("tsv"), - False + False, ) desc = field.split("_")[0] - _add_file_metadata(location=file, - description=f"PEtab {desc} file") + _add_file_metadata( + location=file, description=f"PEtab {desc} file" + ) # Add archive metadata description = libcombine.OmexDescription() diff --git a/petab/lint.py b/petab/lint.py index 49fed16e..b332fdbd 100644 --- a/petab/lint.py +++ b/petab/lint.py @@ -13,41 +13,44 @@ from sympy.abc import _clash import petab -from . import (core, measurements, parameters) + +from . import core, measurements, parameters from .C import * # noqa: F403 from .models import Model logger = logging.getLogger(__name__) -__all__ = ['assert_all_parameters_present_in_parameter_df', - 'assert_measured_observables_defined', - 'assert_measurement_conditions_present_in_condition_table', - 'assert_measurements_not_null', - 'assert_measurements_numeric', - 'assert_model_parameters_in_condition_or_parameter_table', - 'assert_no_leading_trailing_whitespace', - 'assert_noise_distributions_valid', - 'assert_parameter_bounds_are_numeric', - 'assert_parameter_estimate_is_boolean', - 'assert_parameter_id_is_string', - 'assert_parameter_prior_parameters_are_valid', - 'assert_parameter_prior_type_is_valid', - 'assert_parameter_scale_is_valid', - 'assert_unique_observable_ids', - 'assert_unique_parameter_ids', - 'check_condition_df', - 'check_ids', - 'check_measurement_df', - 'check_observable_df', - 'check_parameter_bounds', - 'check_parameter_df', - 'condition_table_is_parameter_free', - 'get_non_unique', - 'is_scalar_float', - 'is_valid_identifier', - 'lint_problem', - 'measurement_table_has_observable_parameter_numeric_overrides', - 'measurement_table_has_timepoint_specific_mappings', - 'observable_table_has_nontrivial_noise_formula'] +__all__ = [ + "assert_all_parameters_present_in_parameter_df", + "assert_measured_observables_defined", + "assert_measurement_conditions_present_in_condition_table", + "assert_measurements_not_null", + "assert_measurements_numeric", + "assert_model_parameters_in_condition_or_parameter_table", + "assert_no_leading_trailing_whitespace", + "assert_noise_distributions_valid", + "assert_parameter_bounds_are_numeric", + "assert_parameter_estimate_is_boolean", + "assert_parameter_id_is_string", + "assert_parameter_prior_parameters_are_valid", + "assert_parameter_prior_type_is_valid", + "assert_parameter_scale_is_valid", + "assert_unique_observable_ids", + "assert_unique_parameter_ids", + "check_condition_df", + "check_ids", + "check_measurement_df", + "check_observable_df", + "check_parameter_bounds", + "check_parameter_df", + "condition_table_is_parameter_free", + "get_non_unique", + "is_scalar_float", + "is_valid_identifier", + "lint_problem", + "measurement_table_has_observable_parameter_numeric_overrides", + "measurement_table_has_timepoint_specific_mappings", + "observable_table_has_nontrivial_noise_formula", +] def _check_df(df: pd.DataFrame, req_cols: Iterable, name: str) -> None: @@ -63,11 +66,13 @@ def _check_df(df: pd.DataFrame, req_cols: Iterable, name: str) -> None: """ if missing_cols := set(req_cols) - set(df.columns.values): raise AssertionError( - f"DataFrame {name} requires the columns {missing_cols}.") + f"DataFrame {name} requires the columns {missing_cols}." + ) def assert_no_leading_trailing_whitespace( - names_list: Iterable[str], name: str) -> None: + names_list: Iterable[str], name: str +) -> None: """Check that there is no trailing whitespace in elements of Iterable Arguments: @@ -77,17 +82,17 @@ def assert_no_leading_trailing_whitespace( Raises: AssertionError: if there is trailing whitespace """ - r = re.compile(r'(?:^\s)|(?:\s$)') + r = re.compile(r"(?:^\s)|(?:\s$)") for i, x in enumerate(names_list): if isinstance(x, str) and r.search(x): raise AssertionError(f"Whitespace around {name}[{i}] = '{x}'.") def check_condition_df( - df: pd.DataFrame, - model: Optional[Model] = None, - observable_df: Optional[pd.DataFrame] = None, - mapping_df: Optional[pd.DataFrame] = None, + df: pd.DataFrame, + model: Optional[Model] = None, + observable_df: Optional[pd.DataFrame] = None, + mapping_df: Optional[pd.DataFrame] = None, ) -> None: """Run sanity checks on PEtab condition table @@ -109,37 +114,49 @@ def check_condition_df( if df.index.name != CONDITION_ID: raise AssertionError( f"Condition table has wrong index {df.index.name}." - f"expected {CONDITION_ID}.") + f"expected {CONDITION_ID}." + ) - check_ids(df.index.values, kind='condition') + check_ids(df.index.values, kind="condition") if not df.index.is_unique: - raise AssertionError("Non-unique condition IDs: " - f"{df.index.values[df.index.duplicated()]}") + raise AssertionError( + "Non-unique condition IDs: " + f"{df.index.values[df.index.duplicated()]}" + ) for column_name in req_cols: if not np.issubdtype(df[column_name].dtype, np.number): assert_no_leading_trailing_whitespace( - df[column_name].values, column_name) + df[column_name].values, column_name + ) if model is not None: allowed_cols = set(model.get_valid_ids_for_condition_table()) if observable_df is not None: - allowed_cols |= set(petab.get_output_parameters( - model=model, observable_df=observable_df, mapping_df=mapping_df - )) + allowed_cols |= set( + petab.get_output_parameters( + model=model, + observable_df=observable_df, + mapping_df=mapping_df, + ) + ) if mapping_df is not None: allowed_cols |= set(mapping_df.index.values) for column_name in df.columns: - if column_name != CONDITION_NAME \ - and column_name not in allowed_cols: + if ( + column_name != CONDITION_NAME + and column_name not in allowed_cols + ): raise AssertionError( "Condition table contains column for unknown entity '" - f"{column_name}'.") + f"{column_name}'." + ) -def check_measurement_df(df: pd.DataFrame, - observable_df: Optional[pd.DataFrame] = None) -> None: +def check_measurement_df( + df: pd.DataFrame, observable_df: Optional[pd.DataFrame] = None +) -> None: """Run sanity checks on PEtab measurement table Arguments: @@ -156,18 +173,20 @@ def check_measurement_df(df: pd.DataFrame, for column_name in MEASUREMENT_DF_REQUIRED_COLS: if not np.issubdtype(df[column_name].dtype, np.number): assert_no_leading_trailing_whitespace( - df[column_name].values, column_name) + df[column_name].values, column_name + ) for column_name in MEASUREMENT_DF_OPTIONAL_COLS: - if column_name in df \ - and not np.issubdtype(df[column_name].dtype, np.number): + if column_name in df and not np.issubdtype( + df[column_name].dtype, np.number + ): assert_no_leading_trailing_whitespace( - df[column_name].values, column_name) + df[column_name].values, column_name + ) if observable_df is not None: assert_measured_observables_defined(df, observable_df) - measurements.assert_overrides_match_parameter_count( - df, observable_df) + measurements.assert_overrides_match_parameter_count(df, observable_df) if OBSERVABLE_TRANSFORMATION in observable_df: # Check for positivity of measurements in case of @@ -178,21 +197,23 @@ def check_measurement_df(df: pd.DataFrame, for measurement, obs_id in zip(df[MEASUREMENT], df[OBSERVABLE_ID]): trafo = observable_df.loc[obs_id, OBSERVABLE_TRANSFORMATION] if measurement <= 0.0 and trafo in [LOG, LOG10]: - raise ValueError('Measurements with observable ' - f'transformation {trafo} must be ' - f'positive, but {measurement} <= 0.') + raise ValueError( + "Measurements with observable " + f"transformation {trafo} must be " + f"positive, but {measurement} <= 0." + ) assert_measurements_not_null(df) assert_measurements_numeric(df) def check_parameter_df( - df: pd.DataFrame, - model: Optional[Model] = None, - observable_df: Optional[pd.DataFrame] = None, - measurement_df: Optional[pd.DataFrame] = None, - condition_df: Optional[pd.DataFrame] = None, - mapping_df: Optional[pd.DataFrame] = None, + df: pd.DataFrame, + model: Optional[Model] = None, + observable_df: Optional[pd.DataFrame] = None, + measurement_df: Optional[pd.DataFrame] = None, + condition_df: Optional[pd.DataFrame] = None, + mapping_df: Optional[pd.DataFrame] = None, ) -> None: """Run sanity checks on PEtab parameter table @@ -212,27 +233,36 @@ def check_parameter_df( if df.index.name != PARAMETER_ID: raise AssertionError( f"Parameter table has wrong index {df.index.name}." - f"expected {PARAMETER_ID}.") + f"expected {PARAMETER_ID}." + ) - check_ids(df.index.values, kind='parameter') + check_ids(df.index.values, kind="parameter") for column_name in PARAMETER_DF_REQUIRED_COLS[1:]: # 0 is PARAMETER_ID if not np.issubdtype(df[column_name].dtype, np.number): assert_no_leading_trailing_whitespace( - df[column_name].values, column_name) + df[column_name].values, column_name + ) # nominal value is generally optional, but required if any for any # parameter estimate != 1 non_estimated_par_ids = list( - df.index[(df[ESTIMATE] != 1) | ( - pd.api.types.is_string_dtype(df[ESTIMATE]) - and df[ESTIMATE] != '1')]) + df.index[ + (df[ESTIMATE] != 1) + | ( + pd.api.types.is_string_dtype(df[ESTIMATE]) + and df[ESTIMATE] != "1" + ) + ] + ) if non_estimated_par_ids: if NOMINAL_VALUE not in df: - raise AssertionError("Parameter table contains parameters " - f"{non_estimated_par_ids} that are not " - "specified to be estimated, " - f"but column {NOMINAL_VALUE} is missing.") + raise AssertionError( + "Parameter table contains parameters " + f"{non_estimated_par_ids} that are not " + "specified to be estimated, " + f"but column {NOMINAL_VALUE} is missing." + ) try: df.loc[non_estimated_par_ids, NOMINAL_VALUE].apply(float) except ValueError as e: @@ -249,8 +279,7 @@ def check_parameter_df( check_parameter_bounds(df) assert_parameter_prior_type_is_valid(df) - if model and measurement_df is not None \ - and condition_df is not None: + if model and measurement_df is not None and condition_df is not None: assert_all_parameters_present_in_parameter_df( df, model, observable_df, measurement_df, condition_df, mapping_df ) @@ -267,19 +296,21 @@ def check_observable_df(observable_df: pd.DataFrame) -> None: """ _check_df(observable_df, OBSERVABLE_DF_REQUIRED_COLS[1:], "observable") - check_ids(observable_df.index.values, kind='observable') + check_ids(observable_df.index.values, kind="observable") for column_name in OBSERVABLE_DF_REQUIRED_COLS[1:]: if not np.issubdtype(observable_df[column_name].dtype, np.number): assert_no_leading_trailing_whitespace( - observable_df[column_name].values, column_name) + observable_df[column_name].values, column_name + ) for column_name in OBSERVABLE_DF_OPTIONAL_COLS: - if column_name in observable_df \ - and not np.issubdtype(observable_df[column_name].dtype, - np.number): + if column_name in observable_df and not np.issubdtype( + observable_df[column_name].dtype, np.number + ): assert_no_leading_trailing_whitespace( - observable_df[column_name].values, column_name) + observable_df[column_name].values, column_name + ) assert_noise_distributions_valid(observable_df) assert_unique_observable_ids(observable_df) @@ -292,30 +323,34 @@ def check_observable_df(observable_df: pd.DataFrame) -> None: except sp.SympifyError as e: raise AssertionError( f"Cannot parse expression '{obs}' " - f"for observable {row.Index}: {e}") from e + f"for observable {row.Index}: {e}" + ) from e noise = getattr(row, NOISE_FORMULA) try: sympified_noise = sp.sympify(noise, locals=_clash) - if sympified_noise is None \ - or (sympified_noise.is_Number - and not sympified_noise.is_finite): - raise AssertionError(f"No or non-finite {NOISE_FORMULA} " - f"given for observable {row.Index}.") + if sympified_noise is None or ( + sympified_noise.is_Number and not sympified_noise.is_finite + ): + raise AssertionError( + f"No or non-finite {NOISE_FORMULA} " + f"given for observable {row.Index}." + ) except sp.SympifyError as e: raise AssertionError( f"Cannot parse expression '{noise}' " - f"for noise model for observable " f"{row.Index}: {e}" + f"for noise model for observable " + f"{row.Index}: {e}" ) from e def assert_all_parameters_present_in_parameter_df( - parameter_df: pd.DataFrame, - model: Model, - observable_df: pd.DataFrame, - measurement_df: pd.DataFrame, - condition_df: pd.DataFrame, - mapping_df: pd.DataFrame = None, + parameter_df: pd.DataFrame, + model: Model, + observable_df: pd.DataFrame, + measurement_df: pd.DataFrame, + condition_df: pd.DataFrame, + mapping_df: pd.DataFrame = None, ) -> None: """Ensure all required parameters are contained in the parameter table with no additional ones @@ -332,14 +367,18 @@ def assert_all_parameters_present_in_parameter_df( AssertionError: in case of problems """ required = parameters.get_required_parameters_for_parameter_table( - model=model, condition_df=condition_df, - observable_df=observable_df, measurement_df=measurement_df, - mapping_df=mapping_df + model=model, + condition_df=condition_df, + observable_df=observable_df, + measurement_df=measurement_df, + mapping_df=mapping_df, ) allowed = parameters.get_valid_parameters_for_parameter_table( - model=model, condition_df=condition_df, - observable_df=observable_df, measurement_df=measurement_df, + model=model, + condition_df=condition_df, + observable_df=observable_df, + measurement_df=measurement_df, mapping_df=mapping_df, ) @@ -351,8 +390,9 @@ def assert_all_parameters_present_in_parameter_df( # the mapping table if missing and mapping_df is not None: model_to_petab_mapping = {} - for map_from, map_to in zip(mapping_df.index.values, - mapping_df[MODEL_ENTITY_ID]): + for map_from, map_to in zip( + mapping_df.index.values, mapping_df[MODEL_ENTITY_ID] + ): if map_to in model_to_petab_mapping: model_to_petab_mapping[map_to].append(map_from) else: @@ -361,23 +401,26 @@ def assert_all_parameters_present_in_parameter_df( missing_id for missing_id in missing if missing_id not in model_to_petab_mapping - or all(mapping_parameter not in actual - for mapping_parameter in model_to_petab_mapping[missing_id]) + or all( + mapping_parameter not in actual + for mapping_parameter in model_to_petab_mapping[missing_id] + ) } if missing: - raise AssertionError('Missing parameter(s) in the model or the ' - 'parameters table: ' - + str(missing)) + raise AssertionError( + "Missing parameter(s) in the model or the " + "parameters table: " + str(missing) + ) if extraneous: - raise AssertionError('Extraneous parameter(s) in parameter table: ' - + str(extraneous)) + raise AssertionError( + "Extraneous parameter(s) in parameter table: " + str(extraneous) + ) def assert_measured_observables_defined( - measurement_df: pd.DataFrame, - observable_df: pd.DataFrame + measurement_df: pd.DataFrame, observable_df: pd.DataFrame ) -> None: """Check if all observables in the measurement table have been defined in the observable table @@ -430,7 +473,8 @@ def assert_parameter_id_is_string(parameter_df: pd.DataFrame) -> None: if isinstance(parameter_id, str): if parameter_id[0].isdigit(): raise AssertionError( - f"{PARAMETER_ID} {parameter_id} starts with integer.") + f"{PARAMETER_ID} {parameter_id} starts with integer." + ) else: raise AssertionError(f"Empty {PARAMETER_ID} found.") @@ -449,7 +493,8 @@ def assert_unique_parameter_ids(parameter_df: pd.DataFrame) -> None: if len(non_unique_ids) > 0: raise AssertionError( f"Non-unique values found in the {PARAMETER_ID} column" - " of the parameter table: " + str(non_unique_ids)) + " of the parameter table: " + str(non_unique_ids) + ) def assert_parameter_scale_is_valid(parameter_df: pd.DataFrame) -> None: @@ -466,8 +511,10 @@ def assert_parameter_scale_is_valid(parameter_df: pd.DataFrame) -> None: """ for parameter_scale in parameter_df[PARAMETER_SCALE]: if parameter_scale not in [LIN, LOG, LOG10]: - raise AssertionError(f"Expected {LIN}, {LOG}, or {LOG10}, but " - f"got {parameter_scale}.") + raise AssertionError( + f"Expected {LIN}, {LOG}, or {LOG10}, but " + f"got {parameter_scale}." + ) def assert_parameter_bounds_are_numeric(parameter_df: pd.DataFrame) -> None: @@ -503,16 +550,18 @@ def check_parameter_bounds(parameter_df: pd.DataFrame) -> None: if not row[LOWER_BOUND] <= row[UPPER_BOUND]: raise AssertionError( f"{LOWER_BOUND} greater than {UPPER_BOUND} for " - f"{PARAMETER_ID} {row.name}.") - if (row[LOWER_BOUND] < 0.0 or row[UPPER_BOUND] < 0.0) \ - and row[PARAMETER_SCALE] in [LOG, LOG10]: + f"{PARAMETER_ID} {row.name}." + ) + if (row[LOWER_BOUND] < 0.0 or row[UPPER_BOUND] < 0.0) and row[ + PARAMETER_SCALE + ] in [LOG, LOG10]: raise AssertionError( f"Bounds for {row[PARAMETER_SCALE]} scaled parameter " - f"{ row.name} must be positive.") + f"{ row.name} must be positive." + ) -def assert_parameter_prior_type_is_valid( - parameter_df: pd.DataFrame) -> None: +def assert_parameter_prior_type_is_valid(parameter_df: pd.DataFrame) -> None: """Check that valid prior types have been selected Arguments: @@ -528,11 +577,13 @@ def assert_parameter_prior_type_is_valid( if row[col] not in PRIOR_TYPES and not core.is_empty(row[col]): raise AssertionError( f"{col} must be one of {PRIOR_TYPES} but is " - f"'{row[col]}'.") + f"'{row[col]}'." + ) def assert_parameter_prior_parameters_are_valid( - parameter_df: pd.DataFrame) -> None: + parameter_df: pd.DataFrame, +) -> None: """Check that the prior parameters are valid. Arguments: @@ -541,10 +592,11 @@ def assert_parameter_prior_parameters_are_valid( Raises: AssertionError: in case of invalid prior parameters """ - prior_type_cols = [INITIALIZATION_PRIOR_TYPE, - OBJECTIVE_PRIOR_TYPE] - prior_par_cols = [INITIALIZATION_PRIOR_PARAMETERS, - OBJECTIVE_PRIOR_PARAMETERS] + prior_type_cols = [INITIALIZATION_PRIOR_TYPE, OBJECTIVE_PRIOR_TYPE] + prior_par_cols = [ + INITIALIZATION_PRIOR_PARAMETERS, + OBJECTIVE_PRIOR_PARAMETERS, + ] # perform test for both priors for type_col, par_col in zip(prior_type_cols, prior_par_cols): @@ -556,14 +608,15 @@ def assert_parameter_prior_parameters_are_valid( else: type_ = row[type_col] # get parameters - pars_str = row.get(par_col, '') + pars_str = row.get(par_col, "") with_default_parameters = [PARAMETER_SCALE_UNIFORM] # check if parameters are empty if core.is_empty(pars_str): if type_ not in with_default_parameters: raise AssertionError( f"An empty {par_col} is only permitted with " - f"{type_col} in {with_default_parameters}.") + f"{type_col} in {with_default_parameters}." + ) # empty parameters fine continue # parse parameters @@ -573,14 +626,16 @@ def assert_parameter_prior_parameters_are_valid( ) except ValueError as e: raise AssertionError( - f"Could not parse prior parameters '{pars_str}'.") from e + f"Could not parse prior parameters '{pars_str}'." + ) from e # all distributions take 2 parameters if len(pars) != 2: raise AssertionError( f"The prior parameters '{pars}' do not contain the " "expected number of entries (currently 'par1" - f"{PARAMETER_SEPARATOR}par2' for all prior types).") + f"{PARAMETER_SEPARATOR}par2' for all prior types)." + ) def assert_parameter_estimate_is_boolean(parameter_df: pd.DataFrame) -> None: @@ -597,7 +652,8 @@ def assert_parameter_estimate_is_boolean(parameter_df: pd.DataFrame) -> None: for estimate in parameter_df[ESTIMATE]: if int(estimate) not in [True, False]: raise AssertionError( - f"Expected 0 or 1 but got {estimate} in {ESTIMATE} column.") + f"Expected 0 or 1 but got {estimate} in {ESTIMATE} column." + ) def is_scalar_float(x: Any): @@ -620,9 +676,9 @@ def is_scalar_float(x: Any): def measurement_table_has_timepoint_specific_mappings( - measurement_df: Optional[pd.DataFrame], - allow_scalar_numeric_noise_parameters: bool = False, - allow_scalar_numeric_observable_parameters: bool = False, + measurement_df: Optional[pd.DataFrame], + allow_scalar_numeric_noise_parameters: bool = False, + allow_scalar_numeric_observable_parameters: bool = False, ) -> bool: """ Are there time-point or replicate specific parameter assignments in the @@ -652,7 +708,7 @@ def measurement_table_has_timepoint_specific_mappings( # mask numeric values for col, allow_scalar_numeric in [ (OBSERVABLE_PARAMETERS, allow_scalar_numeric_observable_parameters), - (NOISE_PARAMETERS, allow_scalar_numeric_noise_parameters) + (NOISE_PARAMETERS, allow_scalar_numeric_noise_parameters), ]: if col not in measurement_df: continue @@ -666,18 +722,24 @@ def measurement_table_has_timepoint_specific_mappings( grouping_cols = core.get_notnull_columns( measurement_df, - [OBSERVABLE_ID, - SIMULATION_CONDITION_ID, - PREEQUILIBRATION_CONDITION_ID, - OBSERVABLE_PARAMETERS, - NOISE_PARAMETERS]) + [ + OBSERVABLE_ID, + SIMULATION_CONDITION_ID, + PREEQUILIBRATION_CONDITION_ID, + OBSERVABLE_PARAMETERS, + NOISE_PARAMETERS, + ], + ) grouped_df = measurement_df.groupby(grouping_cols, dropna=False) grouping_cols = core.get_notnull_columns( measurement_df, - [OBSERVABLE_ID, - SIMULATION_CONDITION_ID, - PREEQUILIBRATION_CONDITION_ID]) + [ + OBSERVABLE_ID, + SIMULATION_CONDITION_ID, + PREEQUILIBRATION_CONDITION_ID, + ], + ) grouped_df2 = measurement_df.groupby(grouping_cols) # data frame has timepoint specific overrides if grouping by noise # parameters and observable parameters in addition to observable, @@ -686,7 +748,8 @@ def measurement_table_has_timepoint_specific_mappings( def observable_table_has_nontrivial_noise_formula( - observable_df: Optional[pd.DataFrame]) -> bool: + observable_df: Optional[pd.DataFrame], +) -> bool: """ Does any observable have a noise formula that is not just a single parameter? @@ -701,14 +764,19 @@ def observable_table_has_nontrivial_noise_formula( if observable_df is None: return False - return not observable_df[NOISE_FORMULA].apply( - lambda x: is_scalar_float(x) or - re.match(r'^[\w]+$', str(x)) is not None - ).all() + return ( + not observable_df[NOISE_FORMULA] + .apply( + lambda x: is_scalar_float(x) + or re.match(r"^[\w]+$", str(x)) is not None + ) + .all() + ) def measurement_table_has_observable_parameter_numeric_overrides( - measurement_df: pd.DataFrame) -> bool: + measurement_df: pd.DataFrame, +) -> bool: """Are there any numbers to override observable parameters? Arguments: @@ -723,7 +791,8 @@ def measurement_table_has_observable_parameter_numeric_overrides( for _, row in measurement_df.iterrows(): for override in measurements.split_parameter_replacement_list( - row.get(OBSERVABLE_PARAMETERS, None)): + row.get(OBSERVABLE_PARAMETERS, None) + ): if isinstance(override, numbers.Number): return True @@ -744,21 +813,23 @@ def assert_noise_distributions_valid(observable_df: pd.DataFrame) -> None: if OBSERVABLE_TRANSFORMATION in observable_df: # check for valid values for trafo in observable_df[OBSERVABLE_TRANSFORMATION]: - if trafo not in ['', *OBSERVABLE_TRANSFORMATIONS] \ - and not (isinstance(trafo, numbers.Number) - and np.isnan(trafo)): + if trafo not in ["", *OBSERVABLE_TRANSFORMATIONS] and not ( + isinstance(trafo, numbers.Number) and np.isnan(trafo) + ): raise ValueError( f"Unrecognized observable transformation in observable " - f"table: {trafo}.") + f"table: {trafo}." + ) if NOISE_DISTRIBUTION in observable_df: for distr in observable_df[NOISE_DISTRIBUTION]: - if distr not in ['', *NOISE_MODELS] \ - and not (isinstance(distr, numbers.Number) - and np.isnan(distr)): + if distr not in ["", *NOISE_MODELS] and not ( + isinstance(distr, numbers.Number) and np.isnan(distr) + ): raise ValueError( f"Unrecognized noise distribution in observable " - f"table: {distr}.") + f"table: {distr}." + ) def assert_unique_observable_ids(observable_df: pd.DataFrame) -> None: @@ -775,7 +846,8 @@ def assert_unique_observable_ids(observable_df: pd.DataFrame) -> None: if len(non_unique_ids) > 0: raise AssertionError( f"Non-unique values found in the {OBSERVABLE_ID} column" - " of the observable table: " + str(non_unique_ids)) + " of the observable table: " + str(non_unique_ids) + ) def get_non_unique(values): @@ -783,7 +855,7 @@ def get_non_unique(values): return [value for (value, count) in counter.items() if count > 1] -def lint_problem(problem: 'petab.Problem') -> bool: +def lint_problem(problem: "petab.Problem") -> bool: """Run PEtab validation on problem Arguments: @@ -831,7 +903,7 @@ def lint_problem(problem: 'petab.Problem') -> bool: problem.condition_df, model=problem.model, observable_df=problem.observable_df, - mapping_df=problem.mapping_df + mapping_df=problem.mapping_df, ) except AssertionError as e: logger.error(e) @@ -849,8 +921,9 @@ def lint_problem(problem: 'petab.Problem') -> bool: if problem.model is not None: for obs_id in problem.observable_df.index: if problem.model.has_entity_with_id(obs_id): - logger.error(f"Observable ID {obs_id} shadows model " - "entity.") + logger.error( + f"Observable ID {obs_id} shadows model " "entity." + ) errors_occurred = True else: logger.warning("Observable table not available. Skipping.") @@ -859,9 +932,12 @@ def lint_problem(problem: 'petab.Problem') -> bool: logger.info("Checking parameter table...") try: check_parameter_df( - problem.parameter_df, problem.model, problem.observable_df, - problem.measurement_df, problem.condition_df, - problem.mapping_df + problem.parameter_df, + problem.model, + problem.observable_df, + problem.measurement_df, + problem.condition_df, + problem.mapping_df, ) except AssertionError as e: logger.error(e) @@ -869,8 +945,11 @@ def lint_problem(problem: 'petab.Problem') -> bool: else: logger.warning("Parameter table not available. Skipping.") - if problem.model is not None and problem.condition_df is not None \ - and problem.parameter_df is not None: + if ( + problem.model is not None + and problem.condition_df is not None + and problem.parameter_df is not None + ): try: assert_model_parameters_in_condition_or_parameter_table( problem.model, @@ -885,30 +964,37 @@ def lint_problem(problem: 'petab.Problem') -> bool: if problem.visualization_df is not None: logger.info("Checking visualization table...") from petab.visualize.lint import validate_visualization_df + errors_occurred |= validate_visualization_df(problem) else: logger.warning("Visualization table not available. Skipping.") if errors_occurred: - logger.error('Not OK') - elif problem.measurement_df is None or problem.condition_df is None \ - or problem.model is None or problem.parameter_df is None \ - or problem.observable_df is None: - logger.warning('Not all files of the PEtab problem definition could ' - 'be checked.') + logger.error("Not OK") + elif ( + problem.measurement_df is None + or problem.condition_df is None + or problem.model is None + or problem.parameter_df is None + or problem.observable_df is None + ): + logger.warning( + "Not all files of the PEtab problem definition could " + "be checked." + ) else: - logger.info('PEtab format check completed successfully.') + logger.info("PEtab format check completed successfully.") return errors_occurred def assert_model_parameters_in_condition_or_parameter_table( - model: Model, - condition_df: pd.DataFrame, - parameter_df: pd.DataFrame, - mapping_df: pd.DataFrame = None, - observable_df: pd.DataFrame = None, - measurement_df: pd.DataFrame = None, + model: Model, + condition_df: pd.DataFrame, + parameter_df: pd.DataFrame, + mapping_df: pd.DataFrame = None, + observable_df: pd.DataFrame = None, + measurement_df: pd.DataFrame = None, ) -> None: """Model parameters that are rule targets must not be present in the parameter table. Other parameters must only be present in either in @@ -929,15 +1015,16 @@ def assert_model_parameters_in_condition_or_parameter_table( if mapping_df is not None: allowed_in_condition_cols |= { from_id - for from_id, to_id in zip(mapping_df.index.values, - mapping_df[MODEL_ENTITY_ID]) + for from_id, to_id in zip( + mapping_df.index.values, mapping_df[MODEL_ENTITY_ID] + ) # mapping table entities mapping to already allowed parameters if to_id in allowed_in_condition_cols # mapping table entities mapping to species or model.is_state_variable(to_id) } - allowed_in_parameter_table = \ + allowed_in_parameter_table = ( parameters.get_valid_parameters_for_parameter_table( model=model, condition_df=condition_df, @@ -945,41 +1032,51 @@ def assert_model_parameters_in_condition_or_parameter_table( measurement_df=measurement_df, mapping_df=mapping_df, ) + ) entities_in_condition_table = set(condition_df.columns) - {CONDITION_NAME} entities_in_parameter_table = set(parameter_df.index.values) disallowed_in_condition = { - x for x in (entities_in_condition_table - allowed_in_condition_cols) + x + for x in (entities_in_condition_table - allowed_in_condition_cols) # we only check model entities here, not output parameters if model.has_entity_with_id(x) } if disallowed_in_condition: is_or_are = "is" if len(disallowed_in_condition) == 1 else "are" - raise AssertionError(f"{disallowed_in_condition} {is_or_are} not " - "allowed to occur in condition table " - "columns.") + raise AssertionError( + f"{disallowed_in_condition} {is_or_are} not " + "allowed to occur in condition table " + "columns." + ) disallowed_in_parameters = { - x for x in (entities_in_parameter_table - allowed_in_parameter_table) + x + for x in (entities_in_parameter_table - allowed_in_parameter_table) # we only check model entities here, not output parameters if model.has_entity_with_id(x) } if disallowed_in_parameters: is_or_are = "is" if len(disallowed_in_parameters) == 1 else "are" - raise AssertionError(f"{disallowed_in_parameters} {is_or_are} not " - "allowed to occur in the parameters table.") + raise AssertionError( + f"{disallowed_in_parameters} {is_or_are} not " + "allowed to occur in the parameters table." + ) in_both = entities_in_condition_table & entities_in_parameter_table if in_both: is_or_are = "is" if len(in_both) == 1 else "are" - raise AssertionError(f"{in_both} {is_or_are} present in both " - "the condition table and the parameter table.") + raise AssertionError( + f"{in_both} {is_or_are} present in both " + "the condition table and the parameter table." + ) def assert_measurement_conditions_present_in_condition_table( - measurement_df: pd.DataFrame, condition_df: pd.DataFrame) -> None: + measurement_df: pd.DataFrame, condition_df: pd.DataFrame +) -> None: """Ensure that all entries from measurement_df.simulationConditionId and measurement_df.preequilibrationConditionId are present in condition_df.index. @@ -994,13 +1091,16 @@ def assert_measurement_conditions_present_in_condition_table( used_conditions = set(measurement_df[SIMULATION_CONDITION_ID].values) if PREEQUILIBRATION_CONDITION_ID in measurement_df: - used_conditions |= \ - set(measurement_df[PREEQUILIBRATION_CONDITION_ID].dropna().values) + used_conditions |= set( + measurement_df[PREEQUILIBRATION_CONDITION_ID].dropna().values + ) available_conditions = set(condition_df.index.values) if missing_conditions := (used_conditions - available_conditions): - raise AssertionError("Measurement table references conditions that " - "are not specified in the condition table: " - + str(missing_conditions)) + raise AssertionError( + "Measurement table references conditions that " + "are not specified in the condition table: " + + str(missing_conditions) + ) def assert_measurements_not_null( @@ -1017,7 +1117,7 @@ def assert_measurements_not_null( Some measurement value(s) are null (missing). """ if measurement_df[MEASUREMENT].isnull().any(): - raise AssertionError('Some measurement(s) are null (missing).') + raise AssertionError("Some measurement(s) are null (missing).") def assert_measurements_numeric( @@ -1037,14 +1137,14 @@ def assert_measurements_numeric( """ not_null_measurement_values = measurement_df[MEASUREMENT].dropna() all_measurements_are_numeric = ( - pd.to_numeric(not_null_measurement_values, errors='coerce') + pd.to_numeric(not_null_measurement_values, errors="coerce") .notnull() .all() ) if not all_measurements_are_numeric: raise AssertionError( - 'Some values in the `petab.C.MEASUREMENT` column of the PEtab ' - 'measurements table are not numeric.' + "Some values in the `petab.C.MEASUREMENT` column of the PEtab " + "measurements table are not numeric." ) @@ -1064,10 +1164,10 @@ def is_valid_identifier(x: str) -> bool: if pd.isna(x): return False - return re.match(r'^[a-zA-Z_]\w*$', x) is not None + return re.match(r"^[a-zA-Z_]\w*$", x) is not None -def check_ids(ids: Iterable[str], kind: str = '') -> None: +def check_ids(ids: Iterable[str], kind: str = "") -> None: """Check IDs are valid Arguments: @@ -1077,16 +1177,21 @@ def check_ids(ids: Iterable[str], kind: str = '') -> None: Raises: ValueError: in case of invalid IDs """ - invalids = [(index, _id) - for index, _id in enumerate(ids) - if not is_valid_identifier(_id)] + invalids = [ + (index, _id) + for index, _id in enumerate(ids) + if not is_valid_identifier(_id) + ] if invalids: # The first row is the header row, and Python lists are zero-indexed, # hence need to add 2 for the correct line number. offset = 2 - error_output = '\n'.join([ - f'Line {index+offset}: ' + - ('Missing ID' if pd.isna(_id) else _id) - for index, _id in invalids]) + error_output = "\n".join( + [ + f"Line {index+offset}: " + + ("Missing ID" if pd.isna(_id) else _id) + for index, _id in invalids + ] + ) raise ValueError(f"Invalid {kind} ID(s):\n{error_output}") diff --git a/petab/mapping.py b/petab/mapping.py index b7bcb677..357daf47 100644 --- a/petab/mapping.py +++ b/petab/mapping.py @@ -1,21 +1,22 @@ """Functionality related to the PEtab entity mapping table""" from pathlib import Path -from typing import Union, Optional -from .models import Model +from typing import Optional, Union + import pandas as pd from . import lint from .C import * # noqa: F403 +from .models import Model __all__ = [ - 'get_mapping_df', - 'write_mapping_df', - 'check_mapping_df', + "get_mapping_df", + "write_mapping_df", + "check_mapping_df", ] def get_mapping_df( - mapping_file: Union[None, str, Path, pd.DataFrame] + mapping_file: Union[None, str, Path, pd.DataFrame] ) -> pd.DataFrame: """ Read the provided mapping file into a ``pandas.Dataframe``. @@ -30,8 +31,9 @@ def get_mapping_df( return mapping_file if isinstance(mapping_file, (str, Path)): - mapping_file = pd.read_csv(mapping_file, sep='\t', - float_precision='round_trip') + mapping_file = pd.read_csv( + mapping_file, sep="\t", float_precision="round_trip" + ) if not isinstance(mapping_file.index, pd.RangeIndex): mapping_file.reset_index(inplace=True) @@ -39,10 +41,12 @@ def get_mapping_df( for col in MAPPING_DF_REQUIRED_COLS: if col not in mapping_file.columns: raise KeyError( - f"Mapping table missing mandatory field {PETAB_ENTITY_ID}.") + f"Mapping table missing mandatory field {PETAB_ENTITY_ID}." + ) lint.assert_no_leading_trailing_whitespace( - mapping_file.reset_index()[col].values, col) + mapping_file.reset_index()[col].values, col + ) mapping_file.set_index([PETAB_ENTITY_ID], inplace=True) @@ -57,12 +61,12 @@ def write_mapping_df(df: pd.DataFrame, filename: Union[str, Path]) -> None: filename: Destination file name """ df = get_mapping_df(df) - df.to_csv(filename, sep='\t', index=True) + df.to_csv(filename, sep="\t", index=True) def check_mapping_df( - df: pd.DataFrame, - model: Optional[Model] = None, + df: pd.DataFrame, + model: Optional[Model] = None, ) -> None: """Run sanity checks on PEtab mapping table @@ -78,7 +82,8 @@ def check_mapping_df( if df.index.name != PETAB_ENTITY_ID: raise AssertionError( f"Mapping table has wrong index {df.index.name}. " - f"Expected {PETAB_ENTITY_ID}.") + f"Expected {PETAB_ENTITY_ID}." + ) lint.check_ids(df.index.values, kind=PETAB_ENTITY_ID) @@ -91,8 +96,7 @@ def check_mapping_df( ) -def resolve_mapping(mapping_df: Optional[pd.DataFrame], - element: str) -> str: +def resolve_mapping(mapping_df: Optional[pd.DataFrame], element: str) -> str: """Resolve mapping for a given element. :param element: diff --git a/petab/measurements.py b/petab/measurements.py index 1ddac966..f329f144 100644 --- a/petab/measurements.py +++ b/petab/measurements.py @@ -10,23 +10,25 @@ import numpy as np import pandas as pd -from . import (core, lint, observables) +from . import core, lint, observables from .C import * # noqa: F403 -__all__ = ['assert_overrides_match_parameter_count', - 'create_measurement_df', - 'get_measurement_df', - 'get_measurement_parameter_ids', - 'get_rows_for_condition', - 'get_simulation_conditions', - 'measurements_have_replicates', - 'measurement_is_at_steady_state', - 'split_parameter_replacement_list', - 'write_measurement_df'] +__all__ = [ + "assert_overrides_match_parameter_count", + "create_measurement_df", + "get_measurement_df", + "get_measurement_parameter_ids", + "get_rows_for_condition", + "get_simulation_conditions", + "measurements_have_replicates", + "measurement_is_at_steady_state", + "split_parameter_replacement_list", + "write_measurement_df", +] def get_measurement_df( - measurement_file: Union[None, str, Path, pd.DataFrame] + measurement_file: Union[None, str, Path, pd.DataFrame] ) -> pd.DataFrame: """ Read the provided measurement file into a ``pandas.Dataframe``. @@ -41,11 +43,13 @@ def get_measurement_df( return measurement_file if isinstance(measurement_file, (str, Path)): - measurement_file = pd.read_csv(measurement_file, sep='\t', - float_precision='round_trip') + measurement_file = pd.read_csv( + measurement_file, sep="\t", float_precision="round_trip" + ) lint.assert_no_leading_trailing_whitespace( - measurement_file.columns.values, MEASUREMENT) + measurement_file.columns.values, MEASUREMENT + ) return measurement_file @@ -58,7 +62,7 @@ def write_measurement_df(df: pd.DataFrame, filename: Union[str, Path]) -> None: filename: Destination file name """ df = get_measurement_df(df) - df.to_csv(filename, sep='\t', index=False) + df.to_csv(filename, sep="\t", index=False) def get_simulation_conditions(measurement_df: pd.DataFrame) -> pd.DataFrame: @@ -82,21 +86,27 @@ def get_simulation_conditions(measurement_df: pd.DataFrame) -> pd.DataFrame: # can be improved by checking for identical condition vectors grouping_cols = core.get_notnull_columns( measurement_df, - [SIMULATION_CONDITION_ID, PREEQUILIBRATION_CONDITION_ID]) + [SIMULATION_CONDITION_ID, PREEQUILIBRATION_CONDITION_ID], + ) # group by cols and return dataframe containing each combination # of those rows only once (and an additional counting row) # We require NaN-containing rows, but they are ignored by `groupby`, # therefore replace them before - simulation_conditions = measurement_df.fillna('').groupby( - grouping_cols).size().reset_index()[grouping_cols] + simulation_conditions = ( + measurement_df.fillna("") + .groupby(grouping_cols) + .size() + .reset_index()[grouping_cols] + ) # sort to be really sure that we always get the same order return simulation_conditions.sort_values(grouping_cols, ignore_index=True) -def get_rows_for_condition(measurement_df: pd.DataFrame, - condition: Union[pd.Series, pd.DataFrame, Dict] - ) -> pd.DataFrame: +def get_rows_for_condition( + measurement_df: pd.DataFrame, + condition: Union[pd.Series, pd.DataFrame, Dict], +) -> pd.DataFrame: """ Extract rows in `measurement_df` for `condition` according to 'preequilibrationConditionId' and 'simulationConditionId' in @@ -118,12 +128,15 @@ def get_rows_for_condition(measurement_df: pd.DataFrame, row_filter = 1 # check for equality in all grouping cols if PREEQUILIBRATION_CONDITION_ID in condition: - row_filter = (measurement_df[PREEQUILIBRATION_CONDITION_ID] - .fillna('') == - condition[PREEQUILIBRATION_CONDITION_ID]) & row_filter + row_filter = ( + measurement_df[PREEQUILIBRATION_CONDITION_ID].fillna("") + == condition[PREEQUILIBRATION_CONDITION_ID] + ) & row_filter if SIMULATION_CONDITION_ID in condition: - row_filter = (measurement_df[SIMULATION_CONDITION_ID] == - condition[SIMULATION_CONDITION_ID]) & row_filter + row_filter = ( + measurement_df[SIMULATION_CONDITION_ID] + == condition[SIMULATION_CONDITION_ID] + ) & row_filter # apply filter cur_measurement_df = measurement_df.loc[row_filter, :] @@ -146,16 +159,19 @@ def get_measurement_parameter_ids(measurement_df: pd.DataFrame) -> List[str]: def get_unique_parameters(series): return core.unique_preserve_order( itertools.chain.from_iterable( - series.apply(split_parameter_replacement_list))) + series.apply(split_parameter_replacement_list) + ) + ) return core.unique_preserve_order( get_unique_parameters(measurement_df[OBSERVABLE_PARAMETERS]) - + get_unique_parameters(measurement_df[NOISE_PARAMETERS])) + + get_unique_parameters(measurement_df[NOISE_PARAMETERS]) + ) def split_parameter_replacement_list( - list_string: Union[str, numbers.Number], - delim: str = PARAMETER_SEPARATOR) -> List[Union[str, numbers.Number]]: + list_string: Union[str, numbers.Number], delim: str = PARAMETER_SEPARATOR +) -> List[Union[str, numbers.Number]]: """ Split values in observableParameters and noiseParameters in measurement table. @@ -168,7 +184,7 @@ def split_parameter_replacement_list( List of split values. Numeric values may be converted to `float`, and parameter IDs are kept as strings. """ - if list_string is None or list_string == '': + if list_string is None or list_string == "": return [] if isinstance(list_string, numbers.Number): @@ -202,17 +218,19 @@ def create_measurement_df() -> pd.DataFrame: Created DataFrame """ - return pd.DataFrame(data={ - OBSERVABLE_ID: [], - PREEQUILIBRATION_CONDITION_ID: [], - SIMULATION_CONDITION_ID: [], - MEASUREMENT: [], - TIME: [], - OBSERVABLE_PARAMETERS: [], - NOISE_PARAMETERS: [], - DATASET_ID: [], - REPLICATE_ID: [] - }) + return pd.DataFrame( + data={ + OBSERVABLE_ID: [], + PREEQUILIBRATION_CONDITION_ID: [], + SIMULATION_CONDITION_ID: [], + MEASUREMENT: [], + TIME: [], + OBSERVABLE_PARAMETERS: [], + NOISE_PARAMETERS: [], + DATASET_ID: [], + REPLICATE_ID: [], + } + ) def measurements_have_replicates(measurement_df: pd.DataFrame) -> bool: @@ -226,15 +244,20 @@ def measurements_have_replicates(measurement_df: pd.DataFrame) -> bool: """ grouping_cols = core.get_notnull_columns( measurement_df, - [OBSERVABLE_ID, SIMULATION_CONDITION_ID, - PREEQUILIBRATION_CONDITION_ID, TIME]) + [ + OBSERVABLE_ID, + SIMULATION_CONDITION_ID, + PREEQUILIBRATION_CONDITION_ID, + TIME, + ], + ) return np.any( - measurement_df.fillna('').groupby(grouping_cols).size().values - 1) + measurement_df.fillna("").groupby(grouping_cols).size().values - 1 + ) def assert_overrides_match_parameter_count( - measurement_df: pd.DataFrame, - observable_df: pd.DataFrame + measurement_df: pd.DataFrame, observable_df: pd.DataFrame ) -> None: """Ensure that number of parameters in the observable definition matches the number of overrides in ``measurement_df`` @@ -245,15 +268,21 @@ def assert_overrides_match_parameter_count( """ # sympify only once and save number of parameters observable_parameters_count = { - obs_id: len(observables.get_formula_placeholders( - formula, obs_id, 'observable')) - for obs_id, formula in zip(observable_df.index.values, - observable_df[OBSERVABLE_FORMULA])} + obs_id: len( + observables.get_formula_placeholders(formula, obs_id, "observable") + ) + for obs_id, formula in zip( + observable_df.index.values, observable_df[OBSERVABLE_FORMULA] + ) + } noise_parameters_count = { - obs_id: - len(observables.get_formula_placeholders(formula, obs_id, 'noise')) - for obs_id, formula in zip(observable_df.index.values, - observable_df[NOISE_FORMULA])} + obs_id: len( + observables.get_formula_placeholders(formula, obs_id, "noise") + ) + for obs_id, formula in zip( + observable_df.index.values, observable_df[NOISE_FORMULA] + ) + } for _, row in measurement_df.iterrows(): # check observable parameters @@ -262,40 +291,49 @@ def assert_overrides_match_parameter_count( except KeyError as e: raise ValueError( f"Observable {row[OBSERVABLE_ID]} used in measurement table " - f"is not defined.") from e + f"is not defined." + ) from e - actual = len(split_parameter_replacement_list( - row.get(OBSERVABLE_PARAMETERS, None))) + actual = len( + split_parameter_replacement_list( + row.get(OBSERVABLE_PARAMETERS, None) + ) + ) # No overrides are also allowed if actual != expected: formula = observable_df.loc[row[OBSERVABLE_ID], OBSERVABLE_FORMULA] raise AssertionError( - f'Mismatch of observable parameter overrides for ' - f'{row[OBSERVABLE_ID]} ({formula})' - f'in:\n{row}\n' - f'Expected {expected} but got {actual}') + f"Mismatch of observable parameter overrides for " + f"{row[OBSERVABLE_ID]} ({formula})" + f"in:\n{row}\n" + f"Expected {expected} but got {actual}" + ) # check noise parameters replacements = split_parameter_replacement_list( - row.get(NOISE_PARAMETERS, None)) + row.get(NOISE_PARAMETERS, None) + ) try: expected = noise_parameters_count[row[OBSERVABLE_ID]] # No overrides are also allowed if len(replacements) != expected: raise AssertionError( - f'Mismatch of noise parameter overrides in:\n{row}\n' - f'Expected {expected} but got {len(replacements)}') + f"Mismatch of noise parameter overrides in:\n{row}\n" + f"Expected {expected} but got {len(replacements)}" + ) except KeyError: # no overrides defined, but a numerical sigma can be provided # anyways - if len(replacements) != 1 \ - or not isinstance(replacements[0], numbers.Number): + if len(replacements) != 1 or not isinstance( + replacements[0], numbers.Number + ): raise AssertionError( - f'No placeholders have been specified in the noise model ' - f'for observable {row[OBSERVABLE_ID]}, but parameter ID ' - 'or multiple overrides were specified in the ' - 'noiseParameters column.') + f"No placeholders have been specified in the noise model " + f"for observable {row[OBSERVABLE_ID]}, but parameter ID " + "or multiple overrides were specified in the " + "noiseParameters column." + ) def measurement_is_at_steady_state(time: float) -> bool: diff --git a/petab/models/__init__.py b/petab/models/__init__.py index e37cd379..a5628aaa 100644 --- a/petab/models/__init__.py +++ b/petab/models/__init__.py @@ -1,5 +1,5 @@ -MODEL_TYPE_SBML = 'sbml' -MODEL_TYPE_PYSB = 'pysb' +MODEL_TYPE_SBML = "sbml" +MODEL_TYPE_PYSB = "pysb" known_model_types = { MODEL_TYPE_SBML, diff --git a/petab/models/model.py b/petab/models/model.py index 5a62cbef..29ebf495 100644 --- a/petab/models/model.py +++ b/petab/models/model.py @@ -54,7 +54,7 @@ def get_parameter_value(self, id_: str) -> float: @abc.abstractmethod def get_free_parameter_ids_with_values( - self + self, ) -> Iterable[Tuple[str, float]]: """Get free model parameters along with their values @@ -125,9 +125,7 @@ def is_state_variable(self, id_: str) -> bool: def model_factory( - filepath_or_buffer: Any, - model_language: str, - model_id: str = None + filepath_or_buffer: Any, model_language: str, model_id: str = None ) -> Model: """Create a PEtab model instance from the given model @@ -136,18 +134,21 @@ def model_factory( :param model_id: PEtab model ID for the given model :returns: A :py:class:`Model` instance representing the given model """ - from . import MODEL_TYPE_SBML, MODEL_TYPE_PYSB, known_model_types + from . import MODEL_TYPE_PYSB, MODEL_TYPE_SBML, known_model_types if model_language == MODEL_TYPE_SBML: from .sbml_model import SbmlModel + return SbmlModel.from_file(filepath_or_buffer, model_id=model_id) if model_language == MODEL_TYPE_PYSB: from .pysb_model import PySBModel + return PySBModel.from_file(filepath_or_buffer, model_id=model_id) if model_language in known_model_types: raise NotImplementedError( - f"Unsupported model format: {model_language}") + f"Unsupported model format: {model_language}" + ) raise ValueError(f"Unknown model format: {model_language}") diff --git a/petab/models/pysb_model.py b/petab/models/pysb_model.py index 30c6922f..9dfd9512 100644 --- a/petab/models/pysb_model.py +++ b/petab/models/pysb_model.py @@ -19,18 +19,20 @@ def _pysb_model_from_path(pysb_model_file: Union[str, Path]) -> pysb.Model: :return: The pysb Model instance """ pysb_model_file = Path(pysb_model_file) - pysb_model_module_name = pysb_model_file.with_suffix('').name + pysb_model_module_name = pysb_model_file.with_suffix("").name import importlib.util + spec = importlib.util.spec_from_file_location( - pysb_model_module_name, pysb_model_file) + pysb_model_module_name, pysb_model_file + ) module = importlib.util.module_from_spec(spec) sys.modules[pysb_model_module_name] = module spec.loader.exec_module(module) # find a pysb.Model instance in the module # 1) check if module.model exists and is a pysb.Model - model = getattr(module, 'model', None) + model = getattr(module, "model", None) if model: return model @@ -45,13 +47,10 @@ def _pysb_model_from_path(pysb_model_file: Union[str, Path]) -> pysb.Model: class PySBModel(Model): """PEtab wrapper for PySB models""" + type_id = MODEL_TYPE_PYSB - def __init__( - self, - model: pysb.Model, - model_id: str - ): + def __init__(self, model: pysb.Model, model_id: str): super().__init__() self.model = model @@ -60,14 +59,14 @@ def __init__( @staticmethod def from_file(filepath_or_buffer, model_id: str): return PySBModel( - model=_pysb_model_from_path(filepath_or_buffer), - model_id=model_id + model=_pysb_model_from_path(filepath_or_buffer), model_id=model_id ) def to_file(self, filename: [str, Path]): from pysb.export import export - model_source = export(self.model, 'pysb_flat') - with open(filename, 'w') as f: + + model_source = export(self.model, "pysb_flat") + with open(filename, "w") as f: f.write(model_source) @property @@ -88,7 +87,7 @@ def get_parameter_value(self, id_: str) -> float: raise ValueError(f"Parameter {id_} does not exist.") from e def get_free_parameter_ids_with_values( - self + self, ) -> Iterable[Tuple[str, float]]: return ((p.name, p.value) for p in self.model.parameters) @@ -104,12 +103,14 @@ def get_valid_parameters_for_parameter_table(self) -> Iterable[str]: return self.get_parameter_ids() def get_valid_ids_for_condition_table(self) -> Iterable[str]: - return itertools.chain(self.get_parameter_ids(), - self.get_compartment_ids()) + return itertools.chain( + self.get_parameter_ids(), self.get_compartment_ids() + ) def symbol_allowed_in_observable_formula(self, id_: str) -> bool: return id_ in ( - x.name for x in itertools.chain( + x.name + for x in itertools.chain( self.model.parameters, self.model.observables, self.model.expressions, @@ -156,7 +157,7 @@ def get_compartment_ids(self) -> Iterable[str]: def parse_species_name( - name: str + name: str, ) -> List[Tuple[str, Optional[str], Dict[str, Any]]]: """Parse a PySB species name @@ -166,12 +167,12 @@ def parse_species_name( mapping to site states. :raises ValueError: In case this is not a valid ID """ - if '=MultiState(' in name: + if "=MultiState(" in name: raise NotImplementedError("MultiState is not yet supported.") complex_constituent_pattern = re.compile( - r'^(?P\w+)\((?P.*)\)' - r'( \*\* (?P.*))?$' + r"^(?P\w+)\((?P.*)\)" + r"( \*\* (?P.*))?$" ) result = [] complex_constituents = name.split(" % ") @@ -179,29 +180,33 @@ def parse_species_name( for complex_constituent in complex_constituents: match = complex_constituent_pattern.match(complex_constituent) if not match: - raise ValueError(f"Invalid species name: '{name}' " - f"('{complex_constituent}')") - monomer = match.groupdict()['monomer'] - site_config_str = match.groupdict()['site_config'] - compartment = match.groupdict()['compartment'] + raise ValueError( + f"Invalid species name: '{name}' " f"('{complex_constituent}')" + ) + monomer = match.groupdict()["monomer"] + site_config_str = match.groupdict()["site_config"] + compartment = match.groupdict()["compartment"] site_config = {} for site_str in site_config_str.split(", "): if not site_str: continue site, config = site_str.split("=") - if config == 'None': + if config == "None": config = None elif config.startswith("'"): if not config.endswith("'"): - raise ValueError(f"Invalid species name: '{name}' " - f"('{config}')") + raise ValueError( + f"Invalid species name: '{name}' " f"('{config}')" + ) # strip quotes config = config[1:-1] else: config = int(config) site_config[site] = config - result.append((monomer, compartment, site_config),) + result.append( + (monomer, compartment, site_config), + ) return result @@ -215,7 +220,8 @@ def pattern_from_string(string: str, model: pysb.Model) -> pysb.ComplexPattern: pysb.MonomerPattern( monomer=model.monomers.get(part[0]), compartment=model.compartments.get(part[1], None), - site_conditions=part[2] - )) + site_conditions=part[2], + ) + ) return pysb.ComplexPattern(patterns, compartment=None) diff --git a/petab/models/sbml_model.py b/petab/models/sbml_model.py index 36101ee2..6e1b981c 100644 --- a/petab/models/sbml_model.py +++ b/petab/models/sbml_model.py @@ -6,10 +6,14 @@ import libsbml +from ..sbml import ( + get_sbml_model, + is_sbml_consistent, + load_sbml_from_string, + write_sbml, +) from . import MODEL_TYPE_SBML from .model import Model -from ..sbml import (get_sbml_model, is_sbml_consistent, load_sbml_from_string, - write_sbml) class SbmlModel(Model): @@ -18,11 +22,11 @@ class SbmlModel(Model): type_id = MODEL_TYPE_SBML def __init__( - self, - sbml_model: libsbml.Model = None, - sbml_reader: libsbml.SBMLReader = None, - sbml_document: libsbml.SBMLDocument = None, - model_id: str = None + self, + sbml_model: libsbml.Model = None, + sbml_reader: libsbml.SBMLReader = None, + sbml_document: libsbml.SBMLDocument = None, + model_id: str = None, ): super().__init__() @@ -40,9 +44,9 @@ def __getstate__(self): if self.sbml_model: sbml_document = self.sbml_model.getSBMLDocument() sbml_writer = libsbml.SBMLWriter() - state['sbml_string'] = sbml_writer.writeSBMLToString(sbml_document) + state["sbml_string"] = sbml_writer.writeSBMLToString(sbml_document) - exclude = ['sbml_reader', 'sbml_document', 'sbml_model'] + exclude = ["sbml_reader", "sbml_document", "sbml_model"] for key in exclude: state.pop(key) @@ -51,17 +55,21 @@ def __getstate__(self): def __setstate__(self, state): """Set state after unpickling""" # load SBML model from pickled string - sbml_string = state.pop('sbml_string', None) + sbml_string = state.pop("sbml_string", None) if sbml_string: - self.sbml_reader, self.sbml_document, self.sbml_model = \ - load_sbml_from_string(sbml_string) + ( + self.sbml_reader, + self.sbml_document, + self.sbml_model, + ) = load_sbml_from_string(sbml_string) self.__dict__.update(state) @staticmethod def from_file(filepath_or_buffer, model_id: str = None): sbml_reader, sbml_document, sbml_model = get_sbml_model( - filepath_or_buffer) + filepath_or_buffer + ) return SbmlModel( sbml_model=sbml_model, sbml_reader=sbml_reader, @@ -78,8 +86,9 @@ def model_id(self, model_id): self._model_id = model_id def to_file(self, filename: [str, Path]): - write_sbml(self.sbml_document or self.sbml_model.getSBMLDocument(), - filename) + write_sbml( + self.sbml_document or self.sbml_model.getSBMLDocument(), filename + ) def get_parameter_value(self, id_: str) -> float: parameter = self.sbml_model.getParameter(id_) @@ -88,7 +97,7 @@ def get_parameter_value(self, id_: str) -> float: return parameter.getValue() def get_free_parameter_ids_with_values( - self + self, ) -> Iterable[Tuple[str, float]]: rule_targets = { ar.getVariable() for ar in self.sbml_model.getListOfRules() @@ -106,7 +115,8 @@ def get_parameter_ids(self) -> Iterable[str]: } return ( - p.getId() for p in self.sbml_model.getListOfParameters() + p.getId() + for p in self.sbml_model.getListOfParameters() if p.getId() not in rule_targets ) @@ -130,25 +140,31 @@ def get_valid_parameters_for_parameter_table(self) -> Iterable[str]: ar.getVariable() for ar in self.sbml_model.getListOfRules() } - return (p.getId() for p in self.sbml_model.getListOfParameters() - if p.getId() not in disallowed_set) + return ( + p.getId() + for p in self.sbml_model.getListOfParameters() + if p.getId() not in disallowed_set + ) def get_valid_ids_for_condition_table(self) -> Iterable[str]: return ( - x.getId() for x in itertools.chain( + x.getId() + for x in itertools.chain( self.sbml_model.getListOfParameters(), self.sbml_model.getListOfSpecies(), - self.sbml_model.getListOfCompartments() + self.sbml_model.getListOfCompartments(), ) ) def symbol_allowed_in_observable_formula(self, id_: str) -> bool: - return self.sbml_model.getElementBySId(id_) or id_ == 'time' + return self.sbml_model.getElementBySId(id_) or id_ == "time" def is_valid(self) -> bool: return is_sbml_consistent(self.sbml_model.getSBMLDocument()) def is_state_variable(self, id_: str) -> bool: - return (self.sbml_model.getSpecies(id_) is not None - or self.sbml_model.getCompartment(id_) is not None - or self.sbml_model.getRuleByVariable(id_) is not None) + return ( + self.sbml_model.getSpecies(id_) is not None + or self.sbml_model.getCompartment(id_) is not None + or self.sbml_model.getRuleByVariable(id_) is not None + ) diff --git a/petab/observables.py b/petab/observables.py index 91251f83..27cafab5 100644 --- a/petab/observables.py +++ b/petab/observables.py @@ -14,17 +14,17 @@ from .models import Model __all__ = [ - 'create_observable_df', - 'get_formula_placeholders', - 'get_observable_df', - 'get_output_parameters', - 'get_placeholders', - 'write_observable_df' + "create_observable_df", + "get_formula_placeholders", + "get_observable_df", + "get_output_parameters", + "get_placeholders", + "write_observable_df", ] def get_observable_df( - observable_file: Union[str, pd.DataFrame, Path, None] + observable_file: Union[str, pd.DataFrame, Path, None] ) -> Union[pd.DataFrame, None]: """ Read the provided observable file into a ``pandas.Dataframe``. @@ -39,11 +39,13 @@ def get_observable_df( return observable_file if isinstance(observable_file, (str, Path)): - observable_file = pd.read_csv(observable_file, sep='\t', - float_precision='round_trip') + observable_file = pd.read_csv( + observable_file, sep="\t", float_precision="round_trip" + ) lint.assert_no_leading_trailing_whitespace( - observable_file.columns.values, "observable") + observable_file.columns.values, "observable" + ) if not isinstance(observable_file.index, pd.RangeIndex): observable_file.reset_index(inplace=True) @@ -52,7 +54,8 @@ def get_observable_df( observable_file.set_index([OBSERVABLE_ID], inplace=True) except KeyError: raise KeyError( - f"Observable table missing mandatory field {OBSERVABLE_ID}.") + f"Observable table missing mandatory field {OBSERVABLE_ID}." + ) return observable_file @@ -65,15 +68,15 @@ def write_observable_df(df: pd.DataFrame, filename: Union[str, Path]) -> None: filename: Destination file name """ df = get_observable_df(df) - df.to_csv(filename, sep='\t', index=True) + df.to_csv(filename, sep="\t", index=True) def get_output_parameters( - observable_df: pd.DataFrame, - model: Model, - observables: bool = True, - noise: bool = True, - mapping_df: pd.DataFrame = None + observable_df: pd.DataFrame, + model: Model, + observables: bool = True, + noise: bool = True, + mapping_df: pd.DataFrame = None, ) -> List[str]: """Get output parameters @@ -98,18 +101,23 @@ def get_output_parameters( output_parameters = OrderedDict() for formula in formulas: - free_syms = sorted(sp.sympify(formula, locals=_clash).free_symbols, - key=lambda symbol: symbol.name) + free_syms = sorted( + sp.sympify(formula, locals=_clash).free_symbols, + key=lambda symbol: symbol.name, + ) for free_sym in free_syms: sym = str(free_sym) if model.symbol_allowed_in_observable_formula(sym): continue # does it mapping to a model entity? - if mapping_df is not None \ - and sym in mapping_df.index \ - and model.symbol_allowed_in_observable_formula( - mapping_df.loc[sym, MODEL_ENTITY_ID]): + if ( + mapping_df is not None + and sym in mapping_df.index + and model.symbol_allowed_in_observable_formula( + mapping_df.loc[sym, MODEL_ENTITY_ID] + ) + ): continue output_parameters[sym] = None @@ -118,9 +126,9 @@ def get_output_parameters( def get_formula_placeholders( - formula_string: str, - observable_id: str, - override_type: Literal['observable', 'noise'], + formula_string: str, + observable_id: str, + override_type: Literal["observable", "noise"], ) -> List[str]: """ Get placeholder variables in noise or observable definition for the @@ -142,26 +150,34 @@ def get_formula_placeholders( if not isinstance(formula_string, str): return [] - pattern = re.compile(r'(?:^|\W)(' + re.escape(override_type) - + r'Parameter\d+_' + re.escape(observable_id) - + r')(?=\W|$)') + pattern = re.compile( + r"(?:^|\W)(" + + re.escape(override_type) + + r"Parameter\d+_" + + re.escape(observable_id) + + r")(?=\W|$)" + ) placeholder_set = set(pattern.findall(formula_string)) # need to sort and check that there are no gaps in numbering - placeholders = [f"{override_type}Parameter{i}_{observable_id}" - for i in range(1, len(placeholder_set) + 1)] + placeholders = [ + f"{override_type}Parameter{i}_{observable_id}" + for i in range(1, len(placeholder_set) + 1) + ] if placeholder_set != set(placeholders): - raise AssertionError("Non-consecutive numbering of placeholder " - f"parameter for {placeholder_set}") + raise AssertionError( + "Non-consecutive numbering of placeholder " + f"parameter for {placeholder_set}" + ) return placeholders def get_placeholders( - observable_df: pd.DataFrame, - observables: bool = True, - noise: bool = True, + observable_df: pd.DataFrame, + observables: bool = True, + noise: bool = True, ) -> List[str]: """Get all placeholder parameters from observable table observableFormulas and noiseFormulas @@ -181,21 +197,23 @@ def get_placeholders( placeholder_types = [] formula_columns = [] if observables: - placeholder_types.append('observable') + placeholder_types.append("observable") formula_columns.append(OBSERVABLE_FORMULA) if noise: - placeholder_types.append('noise') + placeholder_types.append("noise") formula_columns.append(NOISE_FORMULA) placeholders = [] for _, row in observable_df.iterrows(): - for placeholder_type, formula_column \ - in zip(placeholder_types, formula_columns): + for placeholder_type, formula_column in zip( + placeholder_types, formula_columns + ): if formula_column not in row: continue cur_placeholders = get_formula_placeholders( - row[formula_column], row.name, placeholder_type) + row[formula_column], row.name, placeholder_type + ) placeholders.extend(cur_placeholders) return core.unique_preserve_order(placeholders) diff --git a/petab/parameter_mapping.py b/petab/parameter_mapping.py index ff1fb891..1483fc13 100644 --- a/petab/parameter_mapping.py +++ b/petab/parameter_mapping.py @@ -6,27 +6,37 @@ import os import re import warnings -from typing import Any, Dict, Iterable, List, Optional, Tuple, Union, Literal +from typing import Any, Dict, Iterable, List, Literal, Optional, Tuple, Union import libsbml import numpy as np import pandas as pd -from . import ENV_NUM_THREADS, core, lint, measurements, observables, \ - parameters +from . import ( + ENV_NUM_THREADS, + core, + lint, + measurements, + observables, + parameters, +) from .C import * # noqa: F403 -from .models import Model from .mapping import resolve_mapping +from .models import Model logger = logging.getLogger(__name__) -__all__ = ['get_optimization_to_simulation_parameter_mapping', - 'get_parameter_mapping_for_condition', - 'handle_missing_overrides', - 'merge_preeq_and_sim_pars', - 'merge_preeq_and_sim_pars_condition', - 'ParMappingDict', 'ParMappingDictTuple', - 'ScaleMappingDict', 'ScaleMappingDictTuple', - 'ParMappingDictQuadruple'] +__all__ = [ + "get_optimization_to_simulation_parameter_mapping", + "get_parameter_mapping_for_condition", + "handle_missing_overrides", + "merge_preeq_and_sim_pars", + "merge_preeq_and_sim_pars_condition", + "ParMappingDict", + "ParMappingDictTuple", + "ScaleMappingDict", + "ScaleMappingDictTuple", + "ParMappingDictQuadruple", +] # Parameter mapping for condition @@ -39,23 +49,24 @@ ScaleMappingDictTuple = Tuple[ScaleMappingDict, ScaleMappingDict] # Parameter mapping for combination of preequilibration and simulation # conditions, for parameter and scale mapping -ParMappingDictQuadruple = Tuple[ParMappingDict, ParMappingDict, - ScaleMappingDict, ScaleMappingDict] +ParMappingDictQuadruple = Tuple[ + ParMappingDict, ParMappingDict, ScaleMappingDict, ScaleMappingDict +] def get_optimization_to_simulation_parameter_mapping( - condition_df: pd.DataFrame, - measurement_df: pd.DataFrame, - parameter_df: Optional[pd.DataFrame] = None, - observable_df: Optional[pd.DataFrame] = None, - mapping_df: Optional[pd.DataFrame] = None, - sbml_model: libsbml.Model = None, - simulation_conditions: Optional[pd.DataFrame] = None, - warn_unmapped: Optional[bool] = True, - scaled_parameters: bool = False, - fill_fixed_parameters: bool = True, - allow_timepoint_specific_numeric_noise_parameters: bool = False, - model: Model = None, + condition_df: pd.DataFrame, + measurement_df: pd.DataFrame, + parameter_df: Optional[pd.DataFrame] = None, + observable_df: Optional[pd.DataFrame] = None, + mapping_df: Optional[pd.DataFrame] = None, + sbml_model: libsbml.Model = None, + simulation_conditions: Optional[pd.DataFrame] = None, + warn_unmapped: Optional[bool] = True, + scaled_parameters: bool = False, + fill_fixed_parameters: bool = True, + allow_timepoint_specific_numeric_noise_parameters: bool = False, + model: Model = None, ) -> List[ParMappingDictQuadruple]: """ Create list of mapping dicts from PEtab-problem to model parameters. @@ -106,25 +117,31 @@ def get_optimization_to_simulation_parameter_mapping( be empty. ``NaN`` is used where no mapping exists. """ if sbml_model: - warnings.warn("Passing a model via the `sbml_model` argument is " - "deprecated, use `model=petab.models.sbml_model." - "SbmlModel(...)` instead.", DeprecationWarning, - stacklevel=2) + warnings.warn( + "Passing a model via the `sbml_model` argument is " + "deprecated, use `model=petab.models.sbml_model." + "SbmlModel(...)` instead.", + DeprecationWarning, + stacklevel=2, + ) from petab.models.sbml_model import SbmlModel + if model: - raise ValueError("Arguments `model` and `sbml_model` are " - "mutually exclusive.") + raise ValueError( + "Arguments `model` and `sbml_model` are " "mutually exclusive." + ) model = SbmlModel(sbml_model=sbml_model) # Ensure inputs are okay _perform_mapping_checks( measurement_df, - allow_timepoint_specific_numeric_noise_parameters= # noqa: E251,E501 - allow_timepoint_specific_numeric_noise_parameters) + allow_timepoint_specific_numeric_noise_parameters=allow_timepoint_specific_numeric_noise_parameters, # noqa: E251,E501 + ) if simulation_conditions is None: simulation_conditions = measurements.get_simulation_conditions( - measurement_df) + measurement_df + ) simulation_parameters = dict(model.get_free_parameter_ids_with_values()) # Add output parameters that are not already defined in the model @@ -143,46 +160,71 @@ def get_optimization_to_simulation_parameter_mapping( mapping = map( _map_condition, _map_condition_arg_packer( - simulation_conditions, measurement_df, condition_df, - parameter_df, mapping_df, - model, simulation_parameters, warn_unmapped, scaled_parameters, + simulation_conditions, + measurement_df, + condition_df, + parameter_df, + mapping_df, + model, + simulation_parameters, + warn_unmapped, + scaled_parameters, fill_fixed_parameters, - allow_timepoint_specific_numeric_noise_parameters)) + allow_timepoint_specific_numeric_noise_parameters, + ), + ) return list(mapping) # Run multi-threaded from concurrent.futures import ThreadPoolExecutor + with ThreadPoolExecutor(max_workers=num_threads) as executor: mapping = executor.map( _map_condition, _map_condition_arg_packer( - simulation_conditions, measurement_df, condition_df, - parameter_df, mapping_df, model, simulation_parameters, - warn_unmapped, scaled_parameters, fill_fixed_parameters, - allow_timepoint_specific_numeric_noise_parameters)) + simulation_conditions, + measurement_df, + condition_df, + parameter_df, + mapping_df, + model, + simulation_parameters, + warn_unmapped, + scaled_parameters, + fill_fixed_parameters, + allow_timepoint_specific_numeric_noise_parameters, + ), + ) return list(mapping) def _map_condition_arg_packer( - simulation_conditions, - measurement_df, - condition_df, - parameter_df, - mapping_df, - model, - simulation_parameters, - warn_unmapped, - scaled_parameters, - fill_fixed_parameters, - allow_timepoint_specific_numeric_noise_parameters + simulation_conditions, + measurement_df, + condition_df, + parameter_df, + mapping_df, + model, + simulation_parameters, + warn_unmapped, + scaled_parameters, + fill_fixed_parameters, + allow_timepoint_specific_numeric_noise_parameters, ): """Helper function to pack extra arguments for _map_condition""" for _, condition in simulation_conditions.iterrows(): yield ( - condition, measurement_df, condition_df, parameter_df, mapping_df, - model, simulation_parameters, warn_unmapped, scaled_parameters, + condition, + measurement_df, + condition_df, + parameter_df, + mapping_df, + model, + simulation_parameters, + warn_unmapped, + scaled_parameters, fill_fixed_parameters, - allow_timepoint_specific_numeric_noise_parameters + allow_timepoint_specific_numeric_noise_parameters, ) @@ -193,24 +235,39 @@ def _map_condition(packed_args): :py:func:`get_optimization_to_simulation_parameter_mapping`. """ - (condition, measurement_df, condition_df, parameter_df, mapping_df, model, - simulation_parameters, warn_unmapped, scaled_parameters, - fill_fixed_parameters, - allow_timepoint_specific_numeric_noise_parameters) = packed_args + ( + condition, + measurement_df, + condition_df, + parameter_df, + mapping_df, + model, + simulation_parameters, + warn_unmapped, + scaled_parameters, + fill_fixed_parameters, + allow_timepoint_specific_numeric_noise_parameters, + ) = packed_args cur_measurement_df = None # Get the condition specific measurements for the current condition, but # only if relevant for parameter mapping - if (OBSERVABLE_PARAMETERS in measurement_df - and measurement_df[OBSERVABLE_PARAMETERS].notna().any()) \ - or (NOISE_PARAMETERS in measurement_df - and measurement_df[NOISE_PARAMETERS].notna().any()): + if ( + OBSERVABLE_PARAMETERS in measurement_df + and measurement_df[OBSERVABLE_PARAMETERS].notna().any() + ) or ( + NOISE_PARAMETERS in measurement_df + and measurement_df[NOISE_PARAMETERS].notna().any() + ): cur_measurement_df = measurements.get_rows_for_condition( - measurement_df, condition) + measurement_df, condition + ) - if PREEQUILIBRATION_CONDITION_ID not in condition \ - or not isinstance(condition[PREEQUILIBRATION_CONDITION_ID], str) \ - or not condition[PREEQUILIBRATION_CONDITION_ID]: + if ( + PREEQUILIBRATION_CONDITION_ID not in condition + or not isinstance(condition[PREEQUILIBRATION_CONDITION_ID], str) + or not condition[PREEQUILIBRATION_CONDITION_ID] + ): par_map_preeq = {} scale_map_preeq = {} else: @@ -226,8 +283,7 @@ def _map_condition(packed_args): warn_unmapped=warn_unmapped, scaled_parameters=scaled_parameters, fill_fixed_parameters=fill_fixed_parameters, - allow_timepoint_specific_numeric_noise_parameters= # noqa: E251,E501 - allow_timepoint_specific_numeric_noise_parameters + allow_timepoint_specific_numeric_noise_parameters=allow_timepoint_specific_numeric_noise_parameters, # noqa: E251,E501 ) par_map_sim, scale_map_sim = get_parameter_mapping_for_condition( @@ -242,27 +298,26 @@ def _map_condition(packed_args): warn_unmapped=warn_unmapped, scaled_parameters=scaled_parameters, fill_fixed_parameters=fill_fixed_parameters, - allow_timepoint_specific_numeric_noise_parameters= # noqa: E251,E501 - allow_timepoint_specific_numeric_noise_parameters + allow_timepoint_specific_numeric_noise_parameters=allow_timepoint_specific_numeric_noise_parameters, # noqa: E251,E501 ) return par_map_preeq, par_map_sim, scale_map_preeq, scale_map_sim def get_parameter_mapping_for_condition( - condition_id: str, - is_preeq: bool, - cur_measurement_df: Optional[pd.DataFrame], - sbml_model: libsbml.Model = None, - condition_df: pd.DataFrame = None, - parameter_df: pd.DataFrame = None, - mapping_df: Optional[pd.DataFrame] = None, - simulation_parameters: Optional[Dict[str, str]] = None, - warn_unmapped: bool = True, - scaled_parameters: bool = False, - fill_fixed_parameters: bool = True, - allow_timepoint_specific_numeric_noise_parameters: bool = False, - model: Model = None, + condition_id: str, + is_preeq: bool, + cur_measurement_df: Optional[pd.DataFrame], + sbml_model: libsbml.Model = None, + condition_df: pd.DataFrame = None, + parameter_df: pd.DataFrame = None, + mapping_df: Optional[pd.DataFrame] = None, + simulation_parameters: Optional[Dict[str, str]] = None, + warn_unmapped: bool = True, + scaled_parameters: bool = False, + fill_fixed_parameters: bool = True, + allow_timepoint_specific_numeric_noise_parameters: bool = False, + model: Model = None, ) -> Tuple[ParMappingDict, ScaleMappingDict]: """ Create dictionary of parameter value and parameter scale mappings from @@ -313,25 +368,31 @@ def get_parameter_mapping_for_condition( ``NaN`` is used where no mapping exists. """ if sbml_model: - warnings.warn("Passing a model via the `sbml_model` argument is " - "deprecated, use `model=petab.models.sbml_model." - "SbmlModel(...)` instead.", DeprecationWarning, - stacklevel=2) + warnings.warn( + "Passing a model via the `sbml_model` argument is " + "deprecated, use `model=petab.models.sbml_model." + "SbmlModel(...)` instead.", + DeprecationWarning, + stacklevel=2, + ) from petab.models.sbml_model import SbmlModel + if model: - raise ValueError("Arguments `model` and `sbml_model` are " - "mutually exclusive.") + raise ValueError( + "Arguments `model` and `sbml_model` are " "mutually exclusive." + ) model = SbmlModel(sbml_model=sbml_model) if cur_measurement_df is not None: _perform_mapping_checks( cur_measurement_df, - allow_timepoint_specific_numeric_noise_parameters= # noqa: E251,E501 - allow_timepoint_specific_numeric_noise_parameters) + allow_timepoint_specific_numeric_noise_parameters=allow_timepoint_specific_numeric_noise_parameters, # noqa: E251,E501 + ) if simulation_parameters is None: simulation_parameters = dict( - model.get_free_parameter_ids_with_values()) + model.get_free_parameter_ids_with_values() + ) # NOTE: order matters here - the former is overwritten by the latter: # model < condition table < measurement < table parameter table @@ -350,11 +411,21 @@ def get_parameter_mapping_for_condition( if not is_preeq: handle_missing_overrides(par_mapping, warn=warn_unmapped) - _apply_condition_parameters(par_mapping, scale_mapping, condition_id, - condition_df, model, mapping_df) - _apply_parameter_table(par_mapping, scale_mapping, - parameter_df, scaled_parameters, - fill_fixed_parameters) + _apply_condition_parameters( + par_mapping, + scale_mapping, + condition_id, + condition_df, + model, + mapping_df, + ) + _apply_parameter_table( + par_mapping, + scale_mapping, + parameter_df, + scaled_parameters, + fill_fixed_parameters, + ) return par_mapping, scale_mapping @@ -373,8 +444,7 @@ def _output_parameters_to_nan(mapping: ParMappingDict) -> None: def _apply_output_parameter_overrides( - mapping: ParMappingDict, - cur_measurement_df: pd.DataFrame + mapping: ParMappingDict, cur_measurement_df: pd.DataFrame ) -> None: """ Apply output parameter overrides to the parameter mapping dict for a given @@ -390,21 +460,25 @@ def _apply_output_parameter_overrides( for _, row in cur_measurement_df.iterrows(): # we trust that the number of overrides matches (see above) overrides = measurements.split_parameter_replacement_list( - row.get(OBSERVABLE_PARAMETERS, None)) - _apply_overrides_for_observable(mapping, row[OBSERVABLE_ID], - 'observable', overrides) + row.get(OBSERVABLE_PARAMETERS, None) + ) + _apply_overrides_for_observable( + mapping, row[OBSERVABLE_ID], "observable", overrides + ) overrides = measurements.split_parameter_replacement_list( - row.get(NOISE_PARAMETERS, None)) - _apply_overrides_for_observable(mapping, row[OBSERVABLE_ID], 'noise', - overrides) + row.get(NOISE_PARAMETERS, None) + ) + _apply_overrides_for_observable( + mapping, row[OBSERVABLE_ID], "noise", overrides + ) def _apply_overrides_for_observable( - mapping: ParMappingDict, - observable_id: str, - override_type: Literal['observable', 'noise'], - overrides: List[str], + mapping: ParMappingDict, + observable_id: str, + override_type: Literal["observable", "noise"], + overrides: List[str], ) -> None: """ Apply parameter-overrides for observables and noises to mapping @@ -417,17 +491,17 @@ def _apply_overrides_for_observable( overrides: list of overrides for noise or observable parameters """ for i, override in enumerate(overrides): - overridee_id = f'{override_type}Parameter{i+1}_{observable_id}' + overridee_id = f"{override_type}Parameter{i+1}_{observable_id}" mapping[overridee_id] = override def _apply_condition_parameters( - par_mapping: ParMappingDict, - scale_mapping: ScaleMappingDict, - condition_id: str, - condition_df: pd.DataFrame, - model: Model, - mapping_df: Optional[pd.DataFrame] = None, + par_mapping: ParMappingDict, + scale_mapping: ScaleMappingDict, + condition_id: str, + condition_df: pd.DataFrame, + model: Model, + mapping_df: Optional[pd.DataFrame] = None, ) -> None: """Replace parameter IDs in parameter mapping dictionary by condition table parameter values (in-place). @@ -448,15 +522,18 @@ def _apply_condition_parameters( continue par_mapping[overridee_id] = core.to_float_if_float( - condition_df.loc[condition_id, overridee_id]) + condition_df.loc[condition_id, overridee_id] + ) - if isinstance(par_mapping[overridee_id], numbers.Number) \ - and np.isnan(par_mapping[overridee_id]): + if isinstance(par_mapping[overridee_id], numbers.Number) and np.isnan( + par_mapping[overridee_id] + ): # NaN in the condition table for an entity without time derivative # indicates that the model value should be used try: - par_mapping[overridee_id] = \ - model.get_parameter_value(overridee_id) + par_mapping[overridee_id] = model.get_parameter_value( + overridee_id + ) except ValueError as e: raise NotImplementedError( "Not sure how to handle NaN in condition table for " @@ -467,11 +544,11 @@ def _apply_condition_parameters( def _apply_parameter_table( - par_mapping: ParMappingDict, - scale_mapping: ScaleMappingDict, - parameter_df: Optional[pd.DataFrame] = None, - scaled_parameters: bool = False, - fill_fixed_parameters: bool = True, + par_mapping: ParMappingDict, + scale_mapping: ScaleMappingDict, + parameter_df: Optional[pd.DataFrame] = None, + scaled_parameters: bool = False, + fill_fixed_parameters: bool = True, ) -> None: """Replace parameters from parameter table in mapping list for a given condition and set the corresponding scale. @@ -523,12 +600,17 @@ def _apply_parameter_table( raise # or the overridee is only defined in the parameter table - scale = parameter_df.loc[sim_par, PARAMETER_SCALE] \ - if PARAMETER_SCALE in parameter_df else LIN - - if fill_fixed_parameters \ - and ESTIMATE in parameter_df \ - and parameter_df.loc[sim_par, ESTIMATE] == 0: + scale = ( + parameter_df.loc[sim_par, PARAMETER_SCALE] + if PARAMETER_SCALE in parameter_df + else LIN + ) + + if ( + fill_fixed_parameters + and ESTIMATE in parameter_df + and parameter_df.loc[sim_par, ESTIMATE] == 0 + ): val = parameter_df.loc[sim_par, NOMINAL_VALUE] if scaled_parameters: val = parameters.scale(val, scale) @@ -540,26 +622,27 @@ def _apply_parameter_table( def _perform_mapping_checks( - measurement_df: pd.DataFrame, - allow_timepoint_specific_numeric_noise_parameters: bool = False + measurement_df: pd.DataFrame, + allow_timepoint_specific_numeric_noise_parameters: bool = False, ) -> None: """Check for PEtab features which we can't account for during parameter mapping.""" if lint.measurement_table_has_timepoint_specific_mappings( - measurement_df, - allow_scalar_numeric_noise_parameters= # noqa: E251,E501 - allow_timepoint_specific_numeric_noise_parameters): + measurement_df, + allow_scalar_numeric_noise_parameters=allow_timepoint_specific_numeric_noise_parameters, # noqa: E251,E501 + ): # we could allow that for floats, since they don't matter in this # function and would be simply ignored raise ValueError( - "Timepoint-specific parameter overrides currently unsupported.") + "Timepoint-specific parameter overrides currently unsupported." + ) def handle_missing_overrides( - mapping_par_opt_to_par_sim: ParMappingDict, - warn: bool = True, - condition_id: str = None, + mapping_par_opt_to_par_sim: ParMappingDict, + warn: bool = True, + condition_id: str = None, ) -> None: """ Find all observable parameters and noise parameters that were not mapped @@ -589,18 +672,20 @@ def handle_missing_overrides( _missed_vals.append(key) if _missed_vals and warn: - logger.warning(f"Could not map the following overrides for condition " - f"{condition_id}: " - f"{_missed_vals}. Usually, this is just due to missing " - f"data points.") + logger.warning( + f"Could not map the following overrides for condition " + f"{condition_id}: " + f"{_missed_vals}. Usually, this is just due to missing " + f"data points." + ) def merge_preeq_and_sim_pars_condition( - condition_map_preeq: ParMappingDict, - condition_map_sim: ParMappingDict, - condition_scale_map_preeq: ScaleMappingDict, - condition_scale_map_sim: ScaleMappingDict, - condition: Any, + condition_map_preeq: ParMappingDict, + condition_map_sim: ParMappingDict, + condition_scale_map_preeq: ScaleMappingDict, + condition_scale_map_sim: ScaleMappingDict, + condition: Any, ) -> None: """Merge preequilibration and simulation parameters and scales for a single condition while checking for compatibility. @@ -624,8 +709,9 @@ def merge_preeq_and_sim_pars_condition( # nothing to do return - all_par_ids = set(condition_map_sim.keys()) \ - | set(condition_map_preeq.keys()) + all_par_ids = set(condition_map_sim.keys()) | set( + condition_map_preeq.keys() + ) for par_id in all_par_ids: if par_id not in condition_map_preeq: @@ -641,8 +727,9 @@ def merge_preeq_and_sim_pars_condition( # present in both par_preeq = condition_map_preeq[par_id] par_sim = condition_map_sim[par_id] - if par_preeq != par_sim \ - and not (core.is_empty(par_sim) and core.is_empty(par_preeq)): + if par_preeq != par_sim and not ( + core.is_empty(par_sim) and core.is_empty(par_preeq) + ): # both identical or both nan is okay if core.is_empty(par_sim): # unmapped for simulation @@ -652,10 +739,11 @@ def merge_preeq_and_sim_pars_condition( pass else: raise ValueError( - 'Cannot handle different values for dynamic ' - f'parameters: for condition {condition} ' - f'parameter {par_id} is {par_preeq} for preeq ' - f'and {par_sim} for simulation.') + "Cannot handle different values for dynamic " + f"parameters: for condition {condition} " + f"parameter {par_id} is {par_preeq} for preeq " + f"and {par_sim} for simulation." + ) scale_preeq = condition_scale_map_preeq[par_id] scale_sim = condition_scale_map_sim[par_id] @@ -670,15 +758,16 @@ def merge_preeq_and_sim_pars_condition( pass else: raise ValueError( - 'Cannot handle different parameter scales ' - f'parameters: for condition {condition} ' - f'scale for parameter {par_id} is {scale_preeq} for preeq ' - f'and {scale_sim} for simulation.') + "Cannot handle different parameter scales " + f"parameters: for condition {condition} " + f"scale for parameter {par_id} is {scale_preeq} for preeq " + f"and {scale_sim} for simulation." + ) def merge_preeq_and_sim_pars( - parameter_mappings: Iterable[ParMappingDictTuple], - scale_mappings: Iterable[ScaleMappingDictTuple] + parameter_mappings: Iterable[ParMappingDictTuple], + scale_mappings: Iterable[ScaleMappingDictTuple], ) -> Tuple[List[ParMappingDictTuple], List[ScaleMappingDictTuple]]: """Merge preequilibration and simulation parameters and scales for a list of conditions while checking for compatibility. @@ -696,14 +785,17 @@ def merge_preeq_and_sim_pars( """ parameter_mapping = [] scale_mapping = [] - for ic, ((map_preeq, map_sim), (scale_map_preeq, scale_map_sim)) in \ - enumerate(zip(parameter_mappings, scale_mappings)): + for ic, ( + (map_preeq, map_sim), + (scale_map_preeq, scale_map_sim), + ) in enumerate(zip(parameter_mappings, scale_mappings)): merge_preeq_and_sim_pars_condition( condition_map_preeq=map_preeq, condition_map_sim=map_sim, condition_scale_map_preeq=scale_map_preeq, condition_scale_map_sim=scale_map_sim, - condition=ic) + condition=ic, + ) parameter_mapping.append(map_sim) scale_mapping.append(scale_map_sim) diff --git a/petab/parameters.py b/petab/parameters.py index ecc0f197..a3c13250 100644 --- a/petab/parameters.py +++ b/petab/parameters.py @@ -5,7 +5,15 @@ from collections import OrderedDict from pathlib import Path from typing import ( - Dict, Iterable, List, Set, Tuple, Union, Optional, Literal, Sequence + Dict, + Iterable, + List, + Literal, + Optional, + Sequence, + Set, + Tuple, + Union, ) import libsbml @@ -16,25 +24,28 @@ from .C import * # noqa: F403 from .models import Model -__all__ = ['create_parameter_df', - 'get_optimization_parameter_scaling', - 'get_optimization_parameters', - 'get_parameter_df', - 'get_priors_from_df', - 'get_valid_parameters_for_parameter_table', - 'map_scale', - 'map_unscale', - 'normalize_parameter_df', - 'scale', - 'unscale', - 'write_parameter_df'] +__all__ = [ + "create_parameter_df", + "get_optimization_parameter_scaling", + "get_optimization_parameters", + "get_parameter_df", + "get_priors_from_df", + "get_valid_parameters_for_parameter_table", + "map_scale", + "map_unscale", + "normalize_parameter_df", + "scale", + "unscale", + "write_parameter_df", +] -PARAMETER_SCALE_ARGS = Literal['', 'lin', 'log', 'log10'] +PARAMETER_SCALE_ARGS = Literal["", "lin", "log", "log10"] def get_parameter_df( - parameter_file: Union[str, Path, pd.DataFrame, - Iterable[Union[str, Path, pd.DataFrame]], None] + parameter_file: Union[ + str, Path, pd.DataFrame, Iterable[Union[str, Path, pd.DataFrame]], None + ] ) -> Union[pd.DataFrame, None]: """ Read the provided parameter file into a ``pandas.Dataframe``. @@ -51,8 +62,9 @@ def get_parameter_df( if isinstance(parameter_file, pd.DataFrame): parameter_df = parameter_file elif isinstance(parameter_file, (str, Path)): - parameter_df = pd.read_csv(parameter_file, sep='\t', - float_precision='round_trip') + parameter_df = pd.read_csv( + parameter_file, sep="\t", float_precision="round_trip" + ) elif isinstance(parameter_file, Iterable): dfs = [get_parameter_df(x) for x in parameter_file if x] @@ -66,7 +78,8 @@ def get_parameter_df( return parameter_df lint.assert_no_leading_trailing_whitespace( - parameter_df.columns.values, "parameter") + parameter_df.columns.values, "parameter" + ) if not isinstance(parameter_df.index, pd.RangeIndex): parameter_df.reset_index(inplace=True) @@ -75,7 +88,8 @@ def get_parameter_df( parameter_df.set_index([PARAMETER_ID], inplace=True) except KeyError as e: raise KeyError( - f"Parameter table missing mandatory field {PARAMETER_ID}.") from e + f"Parameter table missing mandatory field {PARAMETER_ID}." + ) from e _check_for_contradicting_parameter_definitions(parameter_df) return parameter_df @@ -85,12 +99,13 @@ def _check_for_contradicting_parameter_definitions(parameter_df: pd.DataFrame): """ Raises a ValueError for non-unique parameter IDs """ - parameter_duplicates = set(parameter_df.index.values[ - parameter_df.index.duplicated()]) + parameter_duplicates = set( + parameter_df.index.values[parameter_df.index.duplicated()] + ) if parameter_duplicates: raise ValueError( - f'The values of `{PARAMETER_ID}` must be unique. The ' - f'following duplicates were found:\n{parameter_duplicates}' + f"The values of `{PARAMETER_ID}` must be unique. The " + f"following duplicates were found:\n{parameter_duplicates}" ) @@ -102,7 +117,7 @@ def write_parameter_df(df: pd.DataFrame, filename: Union[str, Path]) -> None: filename: Destination file name """ df = get_parameter_df(df) - df.to_csv(filename, sep='\t', index=True) + df.to_csv(filename, sep="\t", index=True) def get_optimization_parameters(parameter_df: pd.DataFrame) -> List[str]: @@ -119,7 +134,8 @@ def get_optimization_parameters(parameter_df: pd.DataFrame) -> List[str]: def get_optimization_parameter_scaling( - parameter_df: pd.DataFrame) -> Dict[str, str]: + parameter_df: pd.DataFrame, +) -> Dict[str, str]: """ Get Dictionary with optimization parameter IDs mapped to parameter scaling strings. @@ -136,16 +152,16 @@ def get_optimization_parameter_scaling( def create_parameter_df( - sbml_model: Optional[libsbml.Model] = None, - condition_df: Optional[pd.DataFrame] = None, - observable_df: Optional[pd.DataFrame] = None, - measurement_df: Optional[pd.DataFrame] = None, - model: Optional[Model] = None, - include_optional: bool = False, - parameter_scale: str = LOG10, - lower_bound: Iterable = None, - upper_bound: Iterable = None, - mapping_df: Optional[pd.DataFrame] = None, + sbml_model: Optional[libsbml.Model] = None, + condition_df: Optional[pd.DataFrame] = None, + observable_df: Optional[pd.DataFrame] = None, + measurement_df: Optional[pd.DataFrame] = None, + model: Optional[Model] = None, + include_optional: bool = False, + parameter_scale: str = LOG10, + lower_bound: Iterable = None, + upper_bound: Iterable = None, + mapping_df: Optional[pd.DataFrame] = None, ) -> pd.DataFrame: """Create a new PEtab parameter table @@ -172,25 +188,39 @@ def create_parameter_df( The created parameter DataFrame """ if sbml_model: - warnings.warn("Passing a model via the `sbml_model` argument is " - "deprecated, use `model=petab.models.sbml_model." - "SbmlModel(...)` instead.", DeprecationWarning, - stacklevel=2) + warnings.warn( + "Passing a model via the `sbml_model` argument is " + "deprecated, use `model=petab.models.sbml_model." + "SbmlModel(...)` instead.", + DeprecationWarning, + stacklevel=2, + ) from petab.models.sbml_model import SbmlModel + if model: - raise ValueError("Arguments `model` and `sbml_model` are " - "mutually exclusive.") + raise ValueError( + "Arguments `model` and `sbml_model` are " "mutually exclusive." + ) model = SbmlModel(sbml_model=sbml_model) if include_optional: - parameter_ids = list(get_valid_parameters_for_parameter_table( - model=model, condition_df=condition_df, - observable_df=observable_df, measurement_df=measurement_df)) + parameter_ids = list( + get_valid_parameters_for_parameter_table( + model=model, + condition_df=condition_df, + observable_df=observable_df, + measurement_df=measurement_df, + ) + ) else: - parameter_ids = list(get_required_parameters_for_parameter_table( - model=model, condition_df=condition_df, - observable_df=observable_df, measurement_df=measurement_df, - mapping_df=mapping_df - )) + parameter_ids = list( + get_required_parameters_for_parameter_table( + model=model, + condition_df=condition_df, + observable_df=observable_df, + measurement_df=measurement_df, + mapping_df=mapping_df, + ) + ) df = pd.DataFrame( data={ @@ -201,18 +231,20 @@ def create_parameter_df( UPPER_BOUND: upper_bound, NOMINAL_VALUE: np.nan, ESTIMATE: 1, - INITIALIZATION_PRIOR_TYPE: '', - INITIALIZATION_PRIOR_PARAMETERS: '', - OBJECTIVE_PRIOR_TYPE: '', - OBJECTIVE_PRIOR_PARAMETERS: '', - }) + INITIALIZATION_PRIOR_TYPE: "", + INITIALIZATION_PRIOR_PARAMETERS: "", + OBJECTIVE_PRIOR_TYPE: "", + OBJECTIVE_PRIOR_PARAMETERS: "", + } + ) df.set_index([PARAMETER_ID], inplace=True) # For model parameters, set nominal values as defined in the model for parameter_id in df.index: try: - df.loc[parameter_id, NOMINAL_VALUE] = \ - model.get_parameter_value(parameter_id) + df.loc[parameter_id, NOMINAL_VALUE] = model.get_parameter_value( + parameter_id + ) except ValueError: # parameter was introduced as condition-specific override and # is potentially not present in the model @@ -221,11 +253,11 @@ def create_parameter_df( def get_required_parameters_for_parameter_table( - model: Model, - condition_df: pd.DataFrame, - observable_df: pd.DataFrame, - measurement_df: pd.DataFrame, - mapping_df: pd.DataFrame = None + model: Model, + condition_df: pd.DataFrame, + observable_df: pd.DataFrame, + measurement_df: pd.DataFrame, + mapping_df: pd.DataFrame = None, ) -> Set[str]: """ Get set of parameters which need to go into the parameter table @@ -254,18 +286,26 @@ def append_overrides(overrides): for _, row in measurement_df.iterrows(): # we trust that the number of overrides matches - append_overrides(measurements.split_parameter_replacement_list( - row.get(OBSERVABLE_PARAMETERS, None))) - append_overrides(measurements.split_parameter_replacement_list( - row.get(NOISE_PARAMETERS, None))) + append_overrides( + measurements.split_parameter_replacement_list( + row.get(OBSERVABLE_PARAMETERS, None) + ) + ) + append_overrides( + measurements.split_parameter_replacement_list( + row.get(NOISE_PARAMETERS, None) + ) + ) # Add output parameters except for placeholders - for kwargs in [dict(observables=True, noise=False), - dict(observables=False, noise=True)]: + for kwargs in [ + dict(observables=True, noise=False), + dict(observables=False, noise=True), + ]: output_parameters = observables.get_output_parameters( - observable_df, model, mapping_df=mapping_df, **kwargs) - placeholders = observables.get_placeholders( - observable_df, **kwargs) + observable_df, model, mapping_df=mapping_df, **kwargs + ) + placeholders = observables.get_placeholders(observable_df, **kwargs) for p in output_parameters: if p not in placeholders: parameter_ids[p] = None @@ -287,11 +327,11 @@ def append_overrides(overrides): def get_valid_parameters_for_parameter_table( - model: Model, - condition_df: pd.DataFrame, - observable_df: pd.DataFrame, - measurement_df: pd.DataFrame, - mapping_df: pd.DataFrame = None, + model: Model, + condition_df: pd.DataFrame, + observable_df: pd.DataFrame, + measurement_df: pd.DataFrame, + mapping_df: pd.DataFrame = None, ) -> Set[str]: """ Get set of parameters which may be present inside the parameter table @@ -331,20 +371,23 @@ def get_valid_parameters_for_parameter_table( # don't use sets here, to have deterministic ordering, # e.g. for creating parameter tables parameter_ids = OrderedDict.fromkeys( - p for p in model.get_valid_parameters_for_parameter_table() + p + for p in model.get_valid_parameters_for_parameter_table() if p not in blackset ) if mapping_df is not None: - for from_id, to_id in zip(mapping_df.index.values, - mapping_df[MODEL_ENTITY_ID]): + for from_id, to_id in zip( + mapping_df.index.values, mapping_df[MODEL_ENTITY_ID] + ): if to_id in parameter_ids.keys(): parameter_ids[from_id] = None if observable_df is not None: # add output parameters from observables table output_parameters = observables.get_output_parameters( - observable_df=observable_df, model=model) + observable_df=observable_df, model=model + ) for p in output_parameters: if p not in blackset: parameter_ids[p] = None @@ -359,10 +402,16 @@ def append_overrides(overrides): if measurement_df is not None: for _, row in measurement_df.iterrows(): # we trust that the number of overrides matches - append_overrides(measurements.split_parameter_replacement_list( - row.get(OBSERVABLE_PARAMETERS, None))) - append_overrides(measurements.split_parameter_replacement_list( - row.get(NOISE_PARAMETERS, None))) + append_overrides( + measurements.split_parameter_replacement_list( + row.get(OBSERVABLE_PARAMETERS, None) + ) + ) + append_overrides( + measurements.split_parameter_replacement_list( + row.get(NOISE_PARAMETERS, None) + ) + ) # Append parameter overrides from condition table if condition_df is not None: @@ -373,8 +422,8 @@ def append_overrides(overrides): def get_priors_from_df( - parameter_df: pd.DataFrame, - mode: Literal['initialization', 'objective'], + parameter_df: pd.DataFrame, + mode: Literal["initialization", "objective"], ) -> List[Tuple]: """Create list with information about the parameter priors @@ -392,16 +441,18 @@ def get_priors_from_df( prior_list = [] for _, row in par_to_estimate.iterrows(): # retrieve info about type - prior_type = str(row.get(f'{mode}PriorType', '')) + prior_type = str(row.get(f"{mode}PriorType", "")) if core.is_empty(prior_type): prior_type = PARAMETER_SCALE_UNIFORM # retrieve info about parameters of priors, make it a tuple of floats - pars_str = str(row.get(f'{mode}PriorParameters', '')) + pars_str = str(row.get(f"{mode}PriorParameters", "")) if core.is_empty(pars_str): - lb, ub = map_scale([row[LOWER_BOUND], row[UPPER_BOUND]], - [row[PARAMETER_SCALE]] * 2) - pars_str = f'{lb}{PARAMETER_SEPARATOR}{ub}' + lb, ub = map_scale( + [row[LOWER_BOUND], row[UPPER_BOUND]], + [row[PARAMETER_SCALE]] * 2, + ) + pars_str = f"{lb}{PARAMETER_SEPARATOR}{ub}" prior_pars = tuple( float(entry) for entry in pars_str.split(PARAMETER_SEPARATOR) ) @@ -411,10 +462,12 @@ def get_priors_from_df( par_bounds = (row[LOWER_BOUND], row[UPPER_BOUND]) # if no prior is specified, we assume a non-informative (uniform) one - if prior_type == 'nan': + if prior_type == "nan": prior_type = PARAMETER_SCALE_UNIFORM - prior_pars = (scale(row[LOWER_BOUND], par_scale), - scale(row[UPPER_BOUND], par_scale)) + prior_pars = ( + scale(row[LOWER_BOUND], par_scale), + scale(row[UPPER_BOUND], par_scale), + ) prior_list.append((prior_type, prior_pars, par_scale, par_bounds)) @@ -422,8 +475,8 @@ def get_priors_from_df( def scale( - parameter: numbers.Number, - scale_str: PARAMETER_SCALE_ARGS, + parameter: numbers.Number, + scale_str: PARAMETER_SCALE_ARGS, ) -> numbers.Number: """Scale parameter according to ``scale_str``. @@ -446,8 +499,8 @@ def scale( def unscale( - parameter: numbers.Number, - scale_str: PARAMETER_SCALE_ARGS, + parameter: numbers.Number, + scale_str: PARAMETER_SCALE_ARGS, ) -> numbers.Number: """Unscale parameter according to ``scale_str``. @@ -517,10 +570,11 @@ def normalize_parameter_df(parameter_df: pd.DataFrame) -> pd.DataFrame: if PARAMETER_NAME not in df: df[PARAMETER_NAME] = df.reset_index()[PARAMETER_ID] - prior_type_cols = [INITIALIZATION_PRIOR_TYPE, - OBJECTIVE_PRIOR_TYPE] - prior_par_cols = [INITIALIZATION_PRIOR_PARAMETERS, - OBJECTIVE_PRIOR_PARAMETERS] + prior_type_cols = [INITIALIZATION_PRIOR_TYPE, OBJECTIVE_PRIOR_TYPE] + prior_par_cols = [ + INITIALIZATION_PRIOR_PARAMETERS, + OBJECTIVE_PRIOR_PARAMETERS, + ] # iterate over initialization and objective priors for prior_type_col, prior_par_col in zip(prior_type_cols, prior_par_cols): # fill in default values for prior type @@ -533,10 +587,14 @@ def normalize_parameter_df(parameter_df: pd.DataFrame) -> pd.DataFrame: if prior_par_col not in df: df[prior_par_col] = None for irow, row in df.iterrows(): - if core.is_empty(row[prior_par_col]) \ - and row[prior_type_col] == PARAMETER_SCALE_UNIFORM: - lb, ub = map_scale([row[LOWER_BOUND], row[UPPER_BOUND]], - [row[PARAMETER_SCALE]] * 2) - df.loc[irow, prior_par_col] = f'{lb}{PARAMETER_SEPARATOR}{ub}' + if ( + core.is_empty(row[prior_par_col]) + and row[prior_type_col] == PARAMETER_SCALE_UNIFORM + ): + lb, ub = map_scale( + [row[LOWER_BOUND], row[UPPER_BOUND]], + [row[PARAMETER_SCALE]] * 2, + ) + df.loc[irow, prior_par_col] = f"{lb}{PARAMETER_SEPARATOR}{ub}" return df diff --git a/petab/petablint.py b/petab/petablint.py index f31a63fe..45995602 100755 --- a/petab/petablint.py +++ b/petab/petablint.py @@ -6,19 +6,22 @@ import logging import sys +from colorama import Fore +from colorama import init as init_colorama + import petab -from colorama import (init as init_colorama, Fore) logger = logging.getLogger(__name__) class LintFormatter(logging.Formatter): """Custom log formatter""" + formats = { - logging.DEBUG: Fore.CYAN + '%(message)s', - logging.INFO: Fore.GREEN + '%(message)s', - logging.WARN: Fore.YELLOW + '%(message)s', - logging.ERROR: Fore.RED + '%(message)s', + logging.DEBUG: Fore.CYAN + "%(message)s", + logging.INFO: Fore.GREEN + "%(message)s", + logging.WARN: Fore.YELLOW + "%(message)s", + logging.ERROR: Fore.RED + "%(message)s", } def format(self, record): @@ -33,38 +36,74 @@ def format(self, record): def parse_cli_args(): """Parse command line arguments""" parser = argparse.ArgumentParser( - description='Check if a set of files adheres to the PEtab format.') + description="Check if a set of files adheres to the PEtab format." + ) # General options: - parser.add_argument('-v', '--verbose', dest='verbose', action='store_true', - help='More verbose output') + parser.add_argument( + "-v", + "--verbose", + dest="verbose", + action="store_true", + help="More verbose output", + ) # Call with set of files - parser.add_argument('-s', '--sbml', dest='sbml_file_name', - help='SBML model filename') - parser.add_argument('-o', '--observables', dest='observable_file_name', - help='Observable table') - parser.add_argument('-m', '--measurements', dest='measurement_file_name', - help='Measurement table') - parser.add_argument('-c', '--conditions', dest='condition_file_name', - help='Conditions table') - parser.add_argument('-p', '--parameters', dest='parameter_file_name', - help='Parameter table') - parser.add_argument('--vis', '--visualizations', - dest='visualization_file_name', - help='Visualization table') + parser.add_argument( + "-s", "--sbml", dest="sbml_file_name", help="SBML model filename" + ) + parser.add_argument( + "-o", + "--observables", + dest="observable_file_name", + help="Observable table", + ) + parser.add_argument( + "-m", + "--measurements", + dest="measurement_file_name", + help="Measurement table", + ) + parser.add_argument( + "-c", + "--conditions", + dest="condition_file_name", + help="Conditions table", + ) + parser.add_argument( + "-p", + "--parameters", + dest="parameter_file_name", + help="Parameter table", + ) + parser.add_argument( + "--vis", + "--visualizations", + dest="visualization_file_name", + help="Visualization table", + ) group = parser.add_mutually_exclusive_group() - group.add_argument('-y', '--yaml', dest='yaml_file_name', - help='PEtab YAML problem filename') + group.add_argument( + "-y", + "--yaml", + dest="yaml_file_name", + help="PEtab YAML problem filename", + ) args = parser.parse_args() - if (args.yaml_file_name - and any((args.sbml_file_name, args.condition_file_name, - args.measurement_file_name, args.parameter_file_name))): - parser.error('When providing a yaml file, no other files may ' - 'be specified.') + if args.yaml_file_name and any( + ( + args.sbml_file_name, + args.condition_file_name, + args.measurement_file_name, + args.parameter_file_name, + ) + ): + parser.error( + "When providing a yaml file, no other files may " "be specified." + ) return args @@ -83,13 +122,16 @@ def main(): logging.basicConfig(level=logging.DEBUG, handlers=[ch]) if args.yaml_file_name: - from petab.yaml import validate from jsonschema.exceptions import ValidationError + + from petab.yaml import validate + try: validate(args.yaml_file_name) except ValidationError as e: - logger.error("Provided YAML file does not adhere to PEtab " - f"schema: {e}") + logger.error( + "Provided YAML file does not adhere to PEtab " f"schema: {e}" + ) sys.exit(1) if petab.is_composite_problem(args.yaml_file_name): @@ -101,20 +143,21 @@ def main(): problem = petab.Problem.from_yaml(args.yaml_file_name) else: - logger.debug('Looking for...') + logger.debug("Looking for...") if args.sbml_file_name: - logger.debug(f'\tSBML model: {args.sbml_file_name}') + logger.debug(f"\tSBML model: {args.sbml_file_name}") if args.condition_file_name: - logger.debug(f'\tCondition table: {args.condition_file_name}') + logger.debug(f"\tCondition table: {args.condition_file_name}") if args.observable_file_name: - logger.debug(f'\tObservable table: {args.observable_file_name}') + logger.debug(f"\tObservable table: {args.observable_file_name}") if args.measurement_file_name: - logger.debug(f'\tMeasurement table: {args.measurement_file_name}') + logger.debug(f"\tMeasurement table: {args.measurement_file_name}") if args.parameter_file_name: - logger.debug(f'\tParameter table: {args.parameter_file_name}') + logger.debug(f"\tParameter table: {args.parameter_file_name}") if args.visualization_file_name: - logger.debug('\tVisualization table: ' - f'{args.visualization_file_name}') + logger.debug( + "\tVisualization table: " f"{args.visualization_file_name}" + ) try: problem = petab.Problem.from_files( @@ -133,5 +176,5 @@ def main(): sys.exit(ret) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/petab/problem.py b/petab/problem.py index 7897f9ae..8d2d0cf8 100644 --- a/petab/problem.py +++ b/petab/problem.py @@ -5,14 +5,25 @@ import tempfile from math import nan from pathlib import Path, PurePosixPath -from typing import Dict, Iterable, List, Optional, Union, TYPE_CHECKING +from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Union from urllib.parse import unquote, urlparse, urlunparse from warnings import warn import pandas as pd -from . import (conditions, core, format_version, measurements, observables, - parameter_mapping, parameters, sampling, sbml, yaml, mapping) +from . import ( + conditions, + core, + format_version, + mapping, + measurements, + observables, + parameter_mapping, + parameters, + sampling, + sbml, + yaml, +) from .C import * # noqa: F403 from .models import MODEL_TYPE_SBML from .models.model import Model, model_factory @@ -22,7 +33,7 @@ import libsbml -__all__ = ['Problem'] +__all__ = ["Problem"] class Problem: @@ -53,19 +64,19 @@ class Problem: """ def __init__( - self, - sbml_model: libsbml.Model = None, - sbml_reader: libsbml.SBMLReader = None, - sbml_document: libsbml.SBMLDocument = None, - model: Model = None, - model_id: str = None, - condition_df: pd.DataFrame = None, - measurement_df: pd.DataFrame = None, - parameter_df: pd.DataFrame = None, - visualization_df: pd.DataFrame = None, - observable_df: pd.DataFrame = None, - mapping_df: pd.DataFrame = None, - extensions_config: Dict = None, + self, + sbml_model: libsbml.Model = None, + sbml_reader: libsbml.SBMLReader = None, + sbml_document: libsbml.SBMLDocument = None, + model: Model = None, + model_id: str = None, + condition_df: pd.DataFrame = None, + measurement_df: pd.DataFrame = None, + parameter_df: pd.DataFrame = None, + visualization_df: pd.DataFrame = None, + observable_df: pd.DataFrame = None, + mapping_df: pd.DataFrame = None, + extensions_config: Dict = None, ): self.condition_df: Optional[pd.DataFrame] = condition_df self.measurement_df: Optional[pd.DataFrame] = measurement_df @@ -74,20 +85,28 @@ def __init__( self.observable_df: Optional[pd.DataFrame] = observable_df self.mapping_df: Optional[pd.DataFrame] = mapping_df - if any((sbml_model, sbml_document, sbml_reader),): - warn("Passing `sbml_model`, `sbml_document`, or `sbml_reader` " - "to petab.Problem is deprecated and will be removed in a " - "future version. Use `model=petab.models.sbml_model." - "SbmlModel(...)` instead.", DeprecationWarning, stacklevel=2) + if any( + (sbml_model, sbml_document, sbml_reader), + ): + warn( + "Passing `sbml_model`, `sbml_document`, or `sbml_reader` " + "to petab.Problem is deprecated and will be removed in a " + "future version. Use `model=petab.models.sbml_model." + "SbmlModel(...)` instead.", + DeprecationWarning, + stacklevel=2, + ) if model: - raise ValueError("Must only provide one of (`sbml_model`, " - "`sbml_document`, `sbml_reader`) or `model`.") + raise ValueError( + "Must only provide one of (`sbml_model`, " + "`sbml_document`, `sbml_reader`) or `model`." + ) model = SbmlModel( sbml_model=sbml_model, sbml_reader=sbml_reader, sbml_document=sbml_document, - model_id=model_id + model_id=model_id, ) self.model: Optional[Model] = model @@ -96,15 +115,16 @@ def __init__( def __getattr__(self, name): # For backward-compatibility, allow access to SBML model related # attributes now stored in self.model - if name in {'sbml_model', 'sbml_reader', 'sbml_document'}: + if name in {"sbml_model", "sbml_reader", "sbml_document"}: return getattr(self.model, name) if self.model else None - raise AttributeError(f"'{self.__class__.__name__}' object has no " - f"attribute '{name}'") + raise AttributeError( + f"'{self.__class__.__name__}' object has no " f"attribute '{name}'" + ) def __setattr__(self, name, value): # For backward-compatibility, allow access to SBML model related # attributes now stored in self.model - if name in {'sbml_model', 'sbml_reader', 'sbml_document'}: + if name in {"sbml_model", "sbml_reader", "sbml_document"}: if self.model: setattr(self.model, name, value) else: @@ -114,20 +134,30 @@ def __setattr__(self, name, value): def __str__(self): model = f"with model ({self.model})" if self.model else "without model" - conditions = f"{self.condition_df.shape[0]} conditions" \ - if self.condition_df is not None else "without conditions table" + conditions = ( + f"{self.condition_df.shape[0]} conditions" + if self.condition_df is not None + else "without conditions table" + ) - observables = f"{self.observable_df.shape[0]} observables" \ - if self.observable_df is not None else "without observables table" + observables = ( + f"{self.observable_df.shape[0]} observables" + if self.observable_df is not None + else "without observables table" + ) - measurements = f"{self.measurement_df.shape[0]} measurements" \ - if self.measurement_df is not None \ + measurements = ( + f"{self.measurement_df.shape[0]} measurements" + if self.measurement_df is not None else "without measurements table" + ) if self.parameter_df is not None: - num_estimated_parameters = sum(self.parameter_df[ESTIMATE] == 1) \ - if ESTIMATE in self.parameter_df \ + num_estimated_parameters = ( + sum(self.parameter_df[ESTIMATE] == 1) + if ESTIMATE in self.parameter_df else self.parameter_df.shape[0] + ) parameters = f"{num_estimated_parameters} estimated parameters" else: parameters = "without parameter_df table" @@ -139,20 +169,17 @@ def __str__(self): @staticmethod def from_files( - sbml_file: Union[str, Path] = None, - condition_file: - Union[str, Path, Iterable[Union[str, Path]]] = None, - measurement_file: Union[str, Path, - Iterable[Union[str, Path]]] = None, - parameter_file: Union[str, Path, - Iterable[Union[str, Path]]] = None, - visualization_files: Union[str, Path, - Iterable[Union[str, Path]]] = None, - observable_files: Union[str, Path, - Iterable[Union[str, Path]]] = None, - model_id: str = None, - extensions_config: Dict = None, - ) -> 'Problem': + sbml_file: Union[str, Path] = None, + condition_file: Union[str, Path, Iterable[Union[str, Path]]] = None, + measurement_file: Union[str, Path, Iterable[Union[str, Path]]] = None, + parameter_file: Union[str, Path, Iterable[Union[str, Path]]] = None, + visualization_files: Union[ + str, Path, Iterable[Union[str, Path]] + ] = None, + observable_files: Union[str, Path, Iterable[Union[str, Path]]] = None, + model_id: str = None, + extensions_config: Dict = None, + ) -> "Problem": """ Factory method to load model and tables from files. @@ -166,34 +193,53 @@ def from_files( model_id: PEtab ID of the model extensions_config: Information on the extensions used """ - warn("petab.Problem.from_files is deprecated and will be removed in a " - "future version. Use `petab.Problem.from_yaml instead.", - DeprecationWarning, stacklevel=2) + warn( + "petab.Problem.from_files is deprecated and will be removed in a " + "future version. Use `petab.Problem.from_yaml instead.", + DeprecationWarning, + stacklevel=2, + ) - model = model_factory(sbml_file, MODEL_TYPE_SBML, model_id=model_id) \ - if sbml_file else None + model = ( + model_factory(sbml_file, MODEL_TYPE_SBML, model_id=model_id) + if sbml_file + else None + ) - condition_df = core.concat_tables( - condition_file, conditions.get_condition_df) \ - if condition_file else None + condition_df = ( + core.concat_tables(condition_file, conditions.get_condition_df) + if condition_file + else None + ) # If there are multiple tables, we will merge them - measurement_df = core.concat_tables( - measurement_file, measurements.get_measurement_df) \ - if measurement_file else None + measurement_df = ( + core.concat_tables( + measurement_file, measurements.get_measurement_df + ) + if measurement_file + else None + ) - parameter_df = parameters.get_parameter_df(parameter_file) \ - if parameter_file else None + parameter_df = ( + parameters.get_parameter_df(parameter_file) + if parameter_file + else None + ) # If there are multiple tables, we will merge them - visualization_df = core.concat_tables( - visualization_files, core.get_visualization_df) \ - if visualization_files else None + visualization_df = ( + core.concat_tables(visualization_files, core.get_visualization_df) + if visualization_files + else None + ) # If there are multiple tables, we will merge them - observable_df = core.concat_tables( - observable_files, observables.get_observable_df) \ - if observable_files else None + observable_df = ( + core.concat_tables(observable_files, observables.get_observable_df) + if observable_files + else None + ) return Problem( model=model, @@ -206,7 +252,7 @@ def from_files( ) @staticmethod - def from_yaml(yaml_config: Union[Dict, Path, str]) -> 'Problem': + def from_yaml(yaml_config: Union[Dict, Path, str]) -> "Problem": """ Factory method to load model and tables as specified by YAML file. @@ -223,107 +269,149 @@ def from_yaml(yaml_config: Union[Dict, Path, str]) -> 'Problem': # yaml_config may be path or URL path_url = urlparse(yaml_path) - if not path_url.scheme or \ - (path_url.scheme != 'file' and not path_url.netloc): + if not path_url.scheme or ( + path_url.scheme != "file" and not path_url.netloc + ): # a regular file path string path_prefix = Path(yaml_path).parent - get_path = lambda filename: \ - path_prefix / filename # noqa: E731 + get_path = ( + lambda filename: path_prefix / filename + ) # noqa: E731 else: # a URL # extract parent path from url_path = unquote(urlparse(yaml_path).path) parent_path = str(PurePosixPath(url_path).parent) path_prefix = urlunparse( - (path_url.scheme, path_url.netloc, parent_path, - path_url.params, path_url.query, path_url.fragment) + ( + path_url.scheme, + path_url.netloc, + parent_path, + path_url.params, + path_url.query, + path_url.fragment, + ) ) # need "/" on windows, not "\" - get_path = lambda filename: \ - f"{path_prefix}/{filename}" # noqa: E731 + get_path = ( + lambda filename: f"{path_prefix}/{filename}" + ) # noqa: E731 if yaml.is_composite_problem(yaml_config): - raise ValueError('petab.Problem.from_yaml() can only be used for ' - 'yaml files comprising a single model. ' - 'Consider using ' - 'petab.CompositeProblem.from_yaml() instead.') + raise ValueError( + "petab.Problem.from_yaml() can only be used for " + "yaml files comprising a single model. " + "Consider using " + "petab.CompositeProblem.from_yaml() instead." + ) if yaml_config[FORMAT_VERSION] not in {"1", 1, "1.0.0", "2.0.0"}: - raise ValueError("Provided PEtab files are of unsupported version " - f"{yaml_config[FORMAT_VERSION]}. Expected " - f"{format_version.__format_version__}.") + raise ValueError( + "Provided PEtab files are of unsupported version " + f"{yaml_config[FORMAT_VERSION]}. Expected " + f"{format_version.__format_version__}." + ) if yaml_config[FORMAT_VERSION] == "2.0.0": warn("Support for PEtab2.0 is experimental!") - problem0 = yaml_config['problems'][0] + problem0 = yaml_config["problems"][0] if isinstance(yaml_config[PARAMETER_FILE], list): - parameter_df = parameters.get_parameter_df([ - get_path(f) - for f in yaml_config[PARAMETER_FILE] - ]) - else: parameter_df = parameters.get_parameter_df( - get_path(yaml_config[PARAMETER_FILE])) \ - if yaml_config[PARAMETER_FILE] else None + [get_path(f) for f in yaml_config[PARAMETER_FILE]] + ) + else: + parameter_df = ( + parameters.get_parameter_df( + get_path(yaml_config[PARAMETER_FILE]) + ) + if yaml_config[PARAMETER_FILE] + else None + ) if yaml_config[FORMAT_VERSION] in [1, "1", "1.0.0"]: if len(problem0[SBML_FILES]) > 1: # TODO https://github.com/PEtab-dev/libpetab-python/issues/6 raise NotImplementedError( - 'Support for multiple models is not yet implemented.') + "Support for multiple models is not yet implemented." + ) - model = model_factory(get_path(problem0[SBML_FILES][0]), - MODEL_TYPE_SBML, model_id=None) \ - if problem0[SBML_FILES] else None + model = ( + model_factory( + get_path(problem0[SBML_FILES][0]), + MODEL_TYPE_SBML, + model_id=None, + ) + if problem0[SBML_FILES] + else None + ) else: if len(problem0[MODEL_FILES]) > 1: # TODO https://github.com/PEtab-dev/libpetab-python/issues/6 raise NotImplementedError( - 'Support for multiple models is not yet implemented.') + "Support for multiple models is not yet implemented." + ) if not problem0[MODEL_FILES]: model = None else: - model_id, model_info = \ - next(iter(problem0[MODEL_FILES].items())) - model = model_factory(get_path(model_info[MODEL_LOCATION]), - model_info[MODEL_LANGUAGE], - model_id=model_id) + model_id, model_info = next( + iter(problem0[MODEL_FILES].items()) + ) + model = model_factory( + get_path(model_info[MODEL_LOCATION]), + model_info[MODEL_LANGUAGE], + model_id=model_id, + ) measurement_files = [ - get_path(f) for f in problem0.get(MEASUREMENT_FILES, [])] + get_path(f) for f in problem0.get(MEASUREMENT_FILES, []) + ] # If there are multiple tables, we will merge them - measurement_df = core.concat_tables( - measurement_files, measurements.get_measurement_df) \ - if measurement_files else None + measurement_df = ( + core.concat_tables( + measurement_files, measurements.get_measurement_df + ) + if measurement_files + else None + ) condition_files = [ - get_path(f) for f in problem0.get(CONDITION_FILES, [])] + get_path(f) for f in problem0.get(CONDITION_FILES, []) + ] # If there are multiple tables, we will merge them - condition_df = core.concat_tables( - condition_files, conditions.get_condition_df) \ - if condition_files else None + condition_df = ( + core.concat_tables(condition_files, conditions.get_condition_df) + if condition_files + else None + ) visualization_files = [ - get_path(f) for f in problem0.get(VISUALIZATION_FILES, [])] + get_path(f) for f in problem0.get(VISUALIZATION_FILES, []) + ] # If there are multiple tables, we will merge them - visualization_df = core.concat_tables( - visualization_files, core.get_visualization_df) \ - if visualization_files else None + visualization_df = ( + core.concat_tables(visualization_files, core.get_visualization_df) + if visualization_files + else None + ) observable_files = [ - get_path(f) for f in problem0.get(OBSERVABLE_FILES, [])] + get_path(f) for f in problem0.get(OBSERVABLE_FILES, []) + ] # If there are multiple tables, we will merge them - observable_df = core.concat_tables( - observable_files, observables.get_observable_df) \ - if observable_files else None + observable_df = ( + core.concat_tables(observable_files, observables.get_observable_df) + if observable_files + else None + ) - mapping_files = [ - get_path(f) for f in problem0.get(MAPPING_FILES, [])] + mapping_files = [get_path(f) for f in problem0.get(MAPPING_FILES, [])] # If there are multiple tables, we will merge them - mapping_df = core.concat_tables( - mapping_files, mapping.get_mapping_df) \ - if mapping_files else None + mapping_df = ( + core.concat_tables(mapping_files, mapping.get_mapping_df) + if mapping_files + else None + ) return Problem( condition_df=condition_df, @@ -333,11 +421,11 @@ def from_yaml(yaml_config: Union[Dict, Path, str]) -> 'Problem': model=model, visualization_df=visualization_df, mapping_df=mapping_df, - extensions_config=yaml_config.get(EXTENSIONS, {}) + extensions_config=yaml_config.get(EXTENSIONS, {}), ) @staticmethod - def from_combine(filename: Union[Path, str]) -> 'Problem': + def from_combine(filename: Union[Path, str]) -> "Problem": """Read PEtab COMBINE archive (http://co.mbine.org/documents/archive). See also :py:func:`petab.create_combine_archive`. @@ -355,7 +443,8 @@ def from_combine(filename: Union[Path, str]) -> 'Problem': except ImportError as e: raise ImportError( "To use PEtab's COMBINE functionality, libcombine " - "(python-libcombine) must be installed.") from e + "(python-libcombine) must be installed." + ) from e archive = libcombine.CombineArchive() if archive.initializeFromArchive(str(filename)) is None: @@ -365,8 +454,8 @@ def from_combine(filename: Union[Path, str]) -> 'Problem': with tempfile.TemporaryDirectory() as tmpdirname: archive.extractTo(tmpdirname) problem = Problem.from_yaml( - os.path.join(tmpdirname, - archive.getMasterFile().getLocation())) + os.path.join(tmpdirname, archive.getMasterFile().getLocation()) + ) archive.cleanUp() return problem @@ -398,43 +487,44 @@ def to_files_generic( # contain data. filenames = {} for table_name in [ - 'condition', - 'measurement', - 'parameter', - 'observable', - 'visualization', - 'mapping', + "condition", + "measurement", + "parameter", + "observable", + "visualization", + "mapping", ]: - if getattr(self, f'{table_name}_df') is not None: - filenames[f'{table_name}_file'] = f'{table_name}s.tsv' + if getattr(self, f"{table_name}_df") is not None: + filenames[f"{table_name}_file"] = f"{table_name}s.tsv" if self.model: if not isinstance(self.model, SbmlModel): - raise NotImplementedError("Saving non-SBML models is " - "currently not supported.") - filenames['model_file'] = 'model.xml' + raise NotImplementedError( + "Saving non-SBML models is " "currently not supported." + ) + filenames["model_file"] = "model.xml" - filenames['yaml_file'] = 'problem.yaml' + filenames["yaml_file"] = "problem.yaml" self.to_files(**filenames, prefix_path=prefix_path) if prefix_path is None: - return filenames['yaml_file'] - return str(prefix_path / filenames['yaml_file']) + return filenames["yaml_file"] + return str(prefix_path / filenames["yaml_file"]) def to_files( - self, - sbml_file: Union[None, str, Path] = None, - condition_file: Union[None, str, Path] = None, - measurement_file: Union[None, str, Path] = None, - parameter_file: Union[None, str, Path] = None, - visualization_file: Union[None, str, Path] = None, - observable_file: Union[None, str, Path] = None, - yaml_file: Union[None, str, Path] = None, - prefix_path: Union[None, str, Path] = None, - relative_paths: bool = True, - model_file: Union[None, str, Path] = None, - mapping_file: Union[None, str, Path] = None, + self, + sbml_file: Union[None, str, Path] = None, + condition_file: Union[None, str, Path] = None, + measurement_file: Union[None, str, Path] = None, + parameter_file: Union[None, str, Path] = None, + visualization_file: Union[None, str, Path] = None, + observable_file: Union[None, str, Path] = None, + yaml_file: Union[None, str, Path] = None, + prefix_path: Union[None, str, Path] = None, + relative_paths: bool = True, + model_file: Union[None, str, Path] = None, + mapping_file: Union[None, str, Path] = None, ) -> None: """ Write PEtab tables to files for this problem @@ -469,13 +559,18 @@ def to_files( If a destination was provided for a non-existing entity. """ if sbml_file: - warn("The `sbml_file` argument is deprecated and will be " - "removed in a future version. Use `model_file` instead.", - DeprecationWarning, stacklevel=2) + warn( + "The `sbml_file` argument is deprecated and will be " + "removed in a future version. Use `model_file` instead.", + DeprecationWarning, + stacklevel=2, + ) if model_file: - raise ValueError("Must provide either `sbml_file` or " - "`model_file` argument, but not both.") + raise ValueError( + "Must provide either `sbml_file` or " + "`model_file` argument, but not both." + ) model_file = sbml_file @@ -502,36 +597,41 @@ def error(name: str) -> ValueError: if condition_file: if self.condition_df is not None: - conditions.write_condition_df(self.condition_df, - condition_file) + conditions.write_condition_df( + self.condition_df, condition_file + ) else: raise error("condition") if measurement_file: if self.measurement_df is not None: - measurements.write_measurement_df(self.measurement_df, - measurement_file) + measurements.write_measurement_df( + self.measurement_df, measurement_file + ) else: raise error("measurement") if parameter_file: if self.parameter_df is not None: - parameters.write_parameter_df(self.parameter_df, - parameter_file) + parameters.write_parameter_df( + self.parameter_df, parameter_file + ) else: raise error("parameter") if observable_file: if self.observable_df is not None: - observables.write_observable_df(self.observable_df, - observable_file) + observables.write_observable_df( + self.observable_df, observable_file + ) else: raise error("observable") if visualization_file: if self.visualization_df is not None: - core.write_visualization_df(self.visualization_df, - visualization_file) + core.write_visualization_df( + self.visualization_df, visualization_file + ) else: raise error("visualization") @@ -572,9 +672,12 @@ def get_optimization_parameter_scales(self): def get_model_parameters(self): """See :py:func:`petab.sbml.get_model_parameters`""" - warn("petab.Problem.get_model_parameters is deprecated and will be " - "removed in a future version.", - DeprecationWarning, stacklevel=2) + warn( + "petab.Problem.get_model_parameters is deprecated and will be " + "removed in a future version.", + DeprecationWarning, + stacklevel=2, + ) return sbml.get_model_parameters(self.sbml_model) @@ -642,8 +745,9 @@ def x_fixed_ids(self) -> List[str]: """Parameter table parameter IDs, for fixed parameters.""" return self.get_x_ids(free=False) - def get_x_nominal(self, free: bool = True, fixed: bool = True, - scaled: bool = False): + def get_x_nominal( + self, free: bool = True, fixed: bool = True, scaled: bool = False + ): """Generic function to get parameter nominal values. Parameters @@ -667,8 +771,9 @@ def get_x_nominal(self, free: bool = True, fixed: bool = True, v = [nan] * len(self.parameter_df) if scaled: - v = list(parameters.map_scale( - v, self.parameter_df[PARAMETER_SCALE])) + v = list( + parameters.map_scale(v, self.parameter_df[PARAMETER_SCALE]) + ) return self._apply_mask(v, free=free, fixed=fixed) @property @@ -703,8 +808,9 @@ def x_nominal_fixed_scaled(self) -> List: for fixed parameters.""" return self.get_x_nominal(free=False, scaled=True) - def get_lb(self, free: bool = True, fixed: bool = True, - scaled: bool = False): + def get_lb( + self, free: bool = True, fixed: bool = True, scaled: bool = False + ): """Generic function to get lower parameter bounds. Parameters @@ -724,8 +830,9 @@ def get_lb(self, free: bool = True, fixed: bool = True, """ v = list(self.parameter_df[LOWER_BOUND]) if scaled: - v = list(parameters.map_scale( - v, self.parameter_df[PARAMETER_SCALE])) + v = list( + parameters.map_scale(v, self.parameter_df[PARAMETER_SCALE]) + ) return self._apply_mask(v, free=free, fixed=fixed) @property @@ -738,8 +845,9 @@ def lb_scaled(self) -> List: """Parameter table lower bounds with applied parameter scaling""" return self.get_lb(scaled=True) - def get_ub(self, free: bool = True, fixed: bool = True, - scaled: bool = False): + def get_ub( + self, free: bool = True, fixed: bool = True, scaled: bool = False + ): """Generic function to get upper parameter bounds. Parameters @@ -759,8 +867,9 @@ def get_ub(self, free: bool = True, fixed: bool = True, """ v = list(self.parameter_df[UPPER_BOUND]) if scaled: - v = list(parameters.map_scale( - v, self.parameter_df[PARAMETER_SCALE])) + v = list( + parameters.map_scale(v, self.parameter_df[PARAMETER_SCALE]) + ) return self._apply_mask(v, free=free, fixed=fixed) @property @@ -789,23 +898,22 @@ def get_simulation_conditions_from_measurement_df(self): """See petab.get_simulation_conditions""" return measurements.get_simulation_conditions(self.measurement_df) - def get_optimization_to_simulation_parameter_mapping( - self, **kwargs - ): + def get_optimization_to_simulation_parameter_mapping(self, **kwargs): """ See :py:func:`petab.parameter_mapping.get_optimization_to_simulation_parameter_mapping`, to which all keyword arguments are forwarded. """ - return parameter_mapping \ - .get_optimization_to_simulation_parameter_mapping( + return ( + parameter_mapping.get_optimization_to_simulation_parameter_mapping( condition_df=self.condition_df, measurement_df=self.measurement_df, parameter_df=self.parameter_df, observable_df=self.observable_df, model=self.model, - **kwargs + **kwargs, ) + ) def create_parameter_df(self, *args, **kwargs): """Create a new PEtab parameter table @@ -818,7 +926,9 @@ def create_parameter_df(self, *args, **kwargs): observable_df=self.observable_df, measurement_df=self.measurement_df, mapping_df=self.mapping_df, - *args, **kwargs) + *args, + **kwargs, + ) def sample_parameter_startpoints(self, n_starts: int = 100): """Create 2D array with starting points for optimization @@ -826,11 +936,11 @@ def sample_parameter_startpoints(self, n_starts: int = 100): See :py:func:`petab.sample_parameter_startpoints`. """ return sampling.sample_parameter_startpoints( - self.parameter_df, n_starts=n_starts) + self.parameter_df, n_starts=n_starts + ) def sample_parameter_startpoints_dict( - self, - n_starts: int = 100 + self, n_starts: int = 100 ) -> List[Dict[str, float]]: """Create dictionaries with starting points for optimization diff --git a/petab/sampling.py b/petab/sampling.py index 51096b43..7a27a73d 100644 --- a/petab/sampling.py +++ b/petab/sampling.py @@ -1,19 +1,18 @@ """Functions related to parameter sampling""" +from typing import Tuple + import numpy as np import pandas as pd -from typing import Tuple - from . import parameters from .C import * # noqa: F403 -__all__ = ['sample_from_prior', 'sample_parameter_startpoints'] +__all__ = ["sample_from_prior", "sample_parameter_startpoints"] def sample_from_prior( - prior: Tuple[str, list, str, list], - n_starts: int + prior: Tuple[str, list, str, list], n_starts: int ) -> np.array: """Creates samples for one parameter based on prior @@ -38,7 +37,8 @@ def scale(x): return np.log10(x) raise NotImplementedError( f"Parameter priors on the parameter scale {scaling} are " - "currently not implemented.") + "currently not implemented." + ) def clip_to_bounds(x: np.array): """Clip values in array x to bounds""" @@ -46,48 +46,70 @@ def clip_to_bounds(x: np.array): # define lambda functions for each parameter if p_type == UNIFORM: - sp = scale((p_params[1] - p_params[0]) * np.random.random(( - n_starts,)) + p_params[0]) + sp = scale( + (p_params[1] - p_params[0]) * np.random.random((n_starts,)) + + p_params[0] + ) elif p_type == PARAMETER_SCALE_UNIFORM: - sp = (p_params[1] - p_params[0]) * np.random.random((n_starts, - )) + p_params[0] + sp = (p_params[1] - p_params[0]) * np.random.random( + (n_starts,) + ) + p_params[0] elif p_type == NORMAL: - sp = scale(np.random.normal(loc=p_params[0], scale=p_params[1], - size=(n_starts,))) + sp = scale( + np.random.normal( + loc=p_params[0], scale=p_params[1], size=(n_starts,) + ) + ) elif p_type == LOG_NORMAL: - sp = scale(np.exp(np.random.normal( - loc=p_params[0], scale=p_params[1], size=(n_starts,)))) + sp = scale( + np.exp( + np.random.normal( + loc=p_params[0], scale=p_params[1], size=(n_starts,) + ) + ) + ) elif p_type == PARAMETER_SCALE_NORMAL: - sp = np.random.normal(loc=p_params[0], scale=p_params[1], - size=(n_starts,)) + sp = np.random.normal( + loc=p_params[0], scale=p_params[1], size=(n_starts,) + ) elif p_type == LAPLACE: - sp = scale(np.random.laplace( - loc=p_params[0], scale=p_params[1], size=(n_starts,))) + sp = scale( + np.random.laplace( + loc=p_params[0], scale=p_params[1], size=(n_starts,) + ) + ) elif p_type == LOG_LAPLACE: - sp = scale(np.exp(np.random.laplace( - loc=p_params[0], scale=p_params[1], size=(n_starts,)))) + sp = scale( + np.exp( + np.random.laplace( + loc=p_params[0], scale=p_params[1], size=(n_starts,) + ) + ) + ) elif p_type == PARAMETER_SCALE_LAPLACE: - sp = np.random.laplace(loc=p_params[0], scale=p_params[1], - size=(n_starts,)) + sp = np.random.laplace( + loc=p_params[0], scale=p_params[1], size=(n_starts,) + ) else: raise NotImplementedError( - f"Parameter priors of type {prior[0]} are not implemented.") + f"Parameter priors of type {prior[0]} are not implemented." + ) return clip_to_bounds(sp) def sample_parameter_startpoints( - parameter_df: pd.DataFrame, - n_starts: int = 100, - seed: int = None, + parameter_df: pd.DataFrame, + n_starts: int = 100, + seed: int = None, ) -> np.array: """Create :class:`numpy.array` with starting points for an optimization @@ -105,7 +127,8 @@ def sample_parameter_startpoints( # get types and parameters of priors from dataframe prior_list = parameters.get_priors_from_df( - parameter_df, mode=INITIALIZATION) + parameter_df, mode=INITIALIZATION + ) startpoints = [sample_from_prior(prior, n_starts) for prior in prior_list] diff --git a/petab/sbml.py b/petab/sbml.py index 733fa8dd..7b5fec16 100644 --- a/petab/sbml.py +++ b/petab/sbml.py @@ -14,21 +14,21 @@ logger = logging.getLogger(__name__) __all__ = [ - 'get_model_for_condition', - 'get_model_parameters', - 'get_sbml_model', - 'globalize_parameters', - 'is_sbml_consistent', - 'load_sbml_from_file', - 'load_sbml_from_string', - 'log_sbml_errors', - 'write_sbml' + "get_model_for_condition", + "get_model_parameters", + "get_sbml_model", + "globalize_parameters", + "is_sbml_consistent", + "load_sbml_from_file", + "load_sbml_from_string", + "log_sbml_errors", + "write_sbml", ] def is_sbml_consistent( - sbml_document: libsbml.SBMLDocument, - check_units: bool = False, + sbml_document: libsbml.SBMLDocument, + check_units: bool = False, ) -> bool: """Check for SBML validity / consistency @@ -42,20 +42,22 @@ def is_sbml_consistent( if not check_units: sbml_document.setConsistencyChecks( - libsbml.LIBSBML_CAT_UNITS_CONSISTENCY, False) + libsbml.LIBSBML_CAT_UNITS_CONSISTENCY, False + ) has_problems = sbml_document.checkConsistency() if has_problems: log_sbml_errors(sbml_document) logger.warning( - 'WARNING: Generated invalid SBML model. Check messages above.') + "WARNING: Generated invalid SBML model. Check messages above." + ) return not has_problems def log_sbml_errors( - sbml_document: libsbml.SBMLDocument, - minimum_severity=libsbml.LIBSBML_SEV_WARNING, + sbml_document: libsbml.SBMLDocument, + minimum_severity=libsbml.LIBSBML_SEV_WARNING, ) -> None: """Log libsbml errors @@ -74,13 +76,15 @@ def log_sbml_errors( category = error.getCategoryAsString() severity_str = error.getSeverityAsString() message = error.getMessage() - logger.log(severity_to_log_level.get(severity, logging.ERROR), - f'libSBML {severity_str} ({category}): {message}') + logger.log( + severity_to_log_level.get(severity, logging.ERROR), + f"libSBML {severity_str} ({category}): {message}", + ) def globalize_parameters( - sbml_model: libsbml.Model, - prepend_reaction_id: bool = False, + sbml_model: libsbml.Model, + prepend_reaction_id: bool = False, ) -> None: """Turn all local parameters into global parameters with the same properties @@ -99,17 +103,19 @@ def globalize_parameters( Prepend reaction id of local parameter when creating global parameters """ - warn("This function will be removed in future releases.", - DeprecationWarning) + warn( + "This function will be removed in future releases.", DeprecationWarning + ) for reaction in sbml_model.getListOfReactions(): law = reaction.getKineticLaw() # copy first so we can delete in the following loop - local_parameters = list(local_parameter for local_parameter - in law.getListOfParameters()) + local_parameters = list( + local_parameter for local_parameter in law.getListOfParameters() + ) for lp in local_parameters: if prepend_reaction_id: - parameter_id = f'{reaction.getId()}_{lp.getId()}' + parameter_id = f"{reaction.getId()}_{lp.getId()}" else: parameter_id = lp.getId() @@ -126,7 +132,7 @@ def globalize_parameters( def get_model_parameters( - sbml_model: libsbml.Model, with_values=False + sbml_model: libsbml.Model, with_values=False ) -> Union[List[str], Dict[str, float]]: """Return SBML model parameters which are not Rule targets @@ -139,17 +145,21 @@ def get_model_parameters( values. """ if not with_values: - return [p.getId() for p in sbml_model.getListOfParameters() - if sbml_model.getRuleByVariable(p.getId()) is None] - - return {p.getId(): p.getValue() + return [ + p.getId() for p in sbml_model.getListOfParameters() - if sbml_model.getRuleByVariable(p.getId()) is None} + if sbml_model.getRuleByVariable(p.getId()) is None + ] + + return { + p.getId(): p.getValue() + for p in sbml_model.getListOfParameters() + if sbml_model.getRuleByVariable(p.getId()) is None + } def write_sbml( - sbml_doc: libsbml.SBMLDocument, - filename: Union[Path, str] + sbml_doc: libsbml.SBMLDocument, filename: Union[Path, str] ) -> None: """Write PEtab visualization table @@ -160,12 +170,14 @@ def write_sbml( sbml_writer = libsbml.SBMLWriter() ret = sbml_writer.writeSBMLToFile(sbml_doc, str(filename)) if not ret: - raise RuntimeError(f"libSBML reported error {ret} when trying to " - f"create SBML file {filename}.") + raise RuntimeError( + f"libSBML reported error {ret} when trying to " + f"create SBML file {filename}." + ) def get_sbml_model( - filepath_or_buffer + filepath_or_buffer, ) -> Tuple[libsbml.SBMLReader, libsbml.SBMLDocument, libsbml.Model]: """Get an SBML model from file or URL or file handle @@ -174,8 +186,8 @@ def get_sbml_model( :return: The SBML document, model and reader """ if is_file_like(filepath_or_buffer) or is_url(filepath_or_buffer): - with get_handle(filepath_or_buffer, mode='r') as io_handle: - data = load_sbml_from_string(''.join(io_handle.handle)) + with get_handle(filepath_or_buffer, mode="r") as io_handle: + data = load_sbml_from_string("".join(io_handle.handle)) # URL or already opened file, we will load the model from a string return data @@ -183,7 +195,7 @@ def get_sbml_model( def load_sbml_from_string( - sbml_string: str + sbml_string: str, ) -> Tuple[libsbml.SBMLReader, libsbml.SBMLDocument, libsbml.Model]: """Load SBML model from string @@ -192,15 +204,14 @@ def load_sbml_from_string( """ sbml_reader = libsbml.SBMLReader() - sbml_document = \ - sbml_reader.readSBMLFromString(sbml_string) + sbml_document = sbml_reader.readSBMLFromString(sbml_string) sbml_model = sbml_document.getModel() return sbml_reader, sbml_document, sbml_model def load_sbml_from_file( - sbml_file: str + sbml_file: str, ) -> Tuple[libsbml.SBMLReader, libsbml.SBMLDocument, libsbml.Model]: """Load SBML model from file @@ -215,9 +226,9 @@ def load_sbml_from_file( def get_model_for_condition( - petab_problem: "petab.Problem", - sim_condition_id: str = None, - preeq_condition_id: Optional[str] = None, + petab_problem: "petab.Problem", + sim_condition_id: str = None, + preeq_condition_id: Optional[str] = None, ) -> Tuple[libsbml.SBMLDocument, libsbml.Model]: """Create an SBML model for the given condition. @@ -235,31 +246,35 @@ def get_model_for_condition( :return: The generated SBML document, and SBML model """ from .models.sbml_model import SbmlModel + assert isinstance(petab_problem.model, SbmlModel) condition_dict = {petab.SIMULATION_CONDITION_ID: sim_condition_id} if preeq_condition_id: - condition_dict[petab.PREEQUILIBRATION_CONDITION_ID] = \ - preeq_condition_id + condition_dict[ + petab.PREEQUILIBRATION_CONDITION_ID + ] = preeq_condition_id cur_measurement_df = petab.measurements.get_rows_for_condition( measurement_df=petab_problem.measurement_df, condition=condition_dict, ) - parameter_map, scale_map = \ - petab.parameter_mapping.get_parameter_mapping_for_condition( - condition_id=sim_condition_id, - is_preeq=False, - cur_measurement_df=cur_measurement_df, - model=petab_problem.model, - condition_df=petab_problem.condition_df, - parameter_df=petab_problem.parameter_df, - warn_unmapped=True, - scaled_parameters=False, - fill_fixed_parameters=True, - # will only become problematic once the observable and noise terms - # are added to the model - allow_timepoint_specific_numeric_noise_parameters=True, - ) + ( + parameter_map, + scale_map, + ) = petab.parameter_mapping.get_parameter_mapping_for_condition( + condition_id=sim_condition_id, + is_preeq=False, + cur_measurement_df=cur_measurement_df, + model=petab_problem.model, + condition_df=petab_problem.condition_df, + parameter_df=petab_problem.parameter_df, + warn_unmapped=True, + scaled_parameters=False, + fill_fixed_parameters=True, + # will only become problematic once the observable and noise terms + # are added to the model + allow_timepoint_specific_numeric_noise_parameters=True, + ) # create a copy of the model sbml_doc = petab_problem.model.sbml_model.getSBMLDocument().clone() sbml_model = sbml_doc.getModel() @@ -272,19 +287,23 @@ def get_param_value(parameter_id: str): # Handle parametric initial concentrations with contextlib.suppress(KeyError): return petab_problem.parameter_df.loc[ - parameter_id, petab.NOMINAL_VALUE] + parameter_id, petab.NOMINAL_VALUE + ] if not isinstance(mapped_value, str): return mapped_value # estimated parameter, look up in nominal parameters - return petab_problem.parameter_df.loc[mapped_value, - petab.NOMINAL_VALUE] + return petab_problem.parameter_df.loc[ + mapped_value, petab.NOMINAL_VALUE + ] def remove_rules(target_id: str): if sbml_model.removeRuleByVariable(target_id): - warn("An SBML rule was removed to set the component " - f"{target_id} to a constant value.") + warn( + "An SBML rule was removed to set the component " + f"{target_id} to a constant value." + ) sbml_model.removeInitialAssignment(target_id) for parameter in sbml_model.getListOfParameters(): @@ -305,14 +324,16 @@ def remove_rules(target_id: str): # set initial concentration/amount new_value = petab.to_float_if_float( - petab_problem.condition_df.loc[sim_condition_id, component_id]) + petab_problem.condition_df.loc[sim_condition_id, component_id] + ) if not isinstance(new_value, Number): # parameter reference in condition table new_value = get_param_value(new_value) - if sbml_species.isSetInitialAmount() \ - or (sbml_species.getHasOnlySubstanceUnits() - and not sbml_species.isSetInitialConcentration()): + if sbml_species.isSetInitialAmount() or ( + sbml_species.getHasOnlySubstanceUnits() + and not sbml_species.isSetInitialConcentration() + ): sbml_species.setInitialAmount(new_value) else: sbml_species.setInitialConcentration(new_value) @@ -328,7 +349,8 @@ def remove_rules(target_id: str): # set initial concentration/amount new_value = petab.to_float_if_float( - petab_problem.condition_df.loc[sim_condition_id, component_id]) + petab_problem.condition_df.loc[sim_condition_id, component_id] + ) if not isinstance(new_value, Number): # parameter reference in condition table new_value = get_param_value(new_value) diff --git a/petab/simplify.py b/petab/simplify.py index 37e89879..39a58ab0 100644 --- a/petab/simplify.py +++ b/petab/simplify.py @@ -4,6 +4,7 @@ import pandas as pd import petab + from . import Problem from .C import * # noqa: F403 from .lint import lint_problem @@ -29,19 +30,23 @@ def remove_unused_observables(problem: Problem): """Remove observables that have no measurements""" measured_observables = set(problem.measurement_df[OBSERVABLE_ID].unique()) problem.observable_df = problem.observable_df[ - problem.observable_df.index.isin(measured_observables)] + problem.observable_df.index.isin(measured_observables) + ] def remove_unused_conditions(problem: Problem): """Remove conditions that have no measurements""" - measured_conditions = \ - set(problem.measurement_df[SIMULATION_CONDITION_ID].unique()) + measured_conditions = set( + problem.measurement_df[SIMULATION_CONDITION_ID].unique() + ) if PREEQUILIBRATION_CONDITION_ID in problem.measurement_df: - measured_conditions |= \ - set(problem.measurement_df[PREEQUILIBRATION_CONDITION_ID].unique()) + measured_conditions |= set( + problem.measurement_df[PREEQUILIBRATION_CONDITION_ID].unique() + ) problem.condition_df = problem.condition_df[ - problem.condition_df.index.isin(measured_conditions)] + problem.condition_df.index.isin(measured_conditions) + ] def simplify_problem(problem: Problem): @@ -59,8 +64,11 @@ def simplify_problem(problem: Problem): def condition_parameters_to_parameter_table(problem: Problem): """Move parameters from the condition table to the parameters table, if the same parameter value is used for all conditions.""" - if problem.condition_df is None or problem.condition_df.empty \ - or problem.model is None: + if ( + problem.condition_df is None + or problem.condition_df.empty + or problem.model is None + ): return replacements = {} @@ -89,7 +97,7 @@ def condition_parameters_to_parameter_table(problem: Problem): LOWER_BOUND: nan, UPPER_BOUND: nan, NOMINAL_VALUE: value, - ESTIMATE: 0 + ESTIMATE: 0, } for parameter_id, value in replacements.items() ] @@ -101,5 +109,6 @@ def condition_parameters_to_parameter_table(problem: Problem): else: problem.parameter_df = pd.concat([problem.parameter_df, rows]) - problem.condition_df = \ - problem.condition_df.drop(columns=replacements.keys()) + problem.condition_df = problem.condition_df.drop( + columns=replacements.keys() + ) diff --git a/petab/simulate.py b/petab/simulate.py index 31dcf801..560aa073 100644 --- a/petab/simulate.py +++ b/petab/simulate.py @@ -1,16 +1,18 @@ """PEtab simulator base class and related functions.""" import abc -import numpy as np import pathlib -import pandas as pd -import petab import shutil -import sympy as sp import tempfile from typing import Dict, Optional, Union -__all__ = ['Simulator', 'sample_noise'] +import numpy as np +import pandas as pd +import sympy as sp + +import petab + +__all__ = ["Simulator", "sample_noise"] class Simulator(abc.ABC): @@ -68,7 +70,8 @@ def __init__( self.working_dir.mkdir(parents=True, exist_ok=True) self.noise_formulas = petab.calculate.get_symbolic_noise_formulas( - self.petab_problem.observable_df) + self.petab_problem.observable_df + ) self.rng = np.random.default_rng() def remove_working_dir(self, force: bool = False, **kwargs) -> None: @@ -87,12 +90,16 @@ def remove_working_dir(self, force: bool = False, **kwargs) -> None: if force or self.temporary_working_dir: shutil.rmtree(self.working_dir, **kwargs) if self.working_dir.is_dir(): - print('Failed to remove the working directory: ' - + str(self.working_dir)) + print( + "Failed to remove the working directory: " + + str(self.working_dir) + ) else: - print('By default, specified working directories are not removed. ' - 'Please call this method with `force=True`, or manually ' - f'delete the working directory: {self.working_dir}') + print( + "By default, specified working directories are not removed. " + "Please call this method with `force=True`, or manually " + f"delete the working directory: {self.working_dir}" + ) @abc.abstractmethod def simulate_without_noise(self) -> pd.DataFrame: @@ -112,11 +119,11 @@ def simulate_without_noise(self) -> pd.DataFrame: raise NotImplementedError() def simulate( - self, - noise: bool = False, - noise_scaling_factor: float = 1, - as_measurement: bool = False, - **kwargs + self, + noise: bool = False, + noise_scaling_factor: float = 1, + as_measurement: bool = False, + **kwargs, ) -> pd.DataFrame: """Simulate a PEtab problem, optionally with noise. @@ -146,10 +153,10 @@ def simulate( return simulation_df def add_noise( - self, - simulation_df: pd.DataFrame, - noise_scaling_factor: float = 1, - **kwargs + self, + simulation_df: pd.DataFrame, + noise_scaling_factor: float = 1, + **kwargs, ) -> pd.DataFrame: """Add noise to simulated data. @@ -182,13 +189,13 @@ def add_noise( def sample_noise( - petab_problem: petab.Problem, - measurement_row: pd.Series, - simulated_value: float, - noise_formulas: Optional[Dict[str, sp.Expr]] = None, - rng: Optional[np.random.Generator] = None, - noise_scaling_factor: float = 1, - zero_bounded: bool = False, + petab_problem: petab.Problem, + measurement_row: pd.Series, + simulated_value: float, + noise_formulas: Optional[Dict[str, sp.Expr]] = None, + rng: Optional[np.random.Generator] = None, + noise_scaling_factor: float = 1, + zero_bounded: bool = False, ) -> float: """Generate a sample from a PEtab noise distribution. @@ -218,7 +225,8 @@ def sample_noise( """ if noise_formulas is None: noise_formulas = petab.calculate.get_symbolic_noise_formulas( - petab_problem.observable_df) + petab_problem.observable_df + ) if rng is None: rng = np.random.default_rng() @@ -226,16 +234,13 @@ def sample_noise( measurement_row, noise_formulas, petab_problem.parameter_df, - simulated_value + simulated_value, ) # default noise distribution is petab.C.NORMAL - noise_distribution = ( - petab_problem - .observable_df - .loc[measurement_row[petab.C.OBSERVABLE_ID]] - .get(petab.C.NOISE_DISTRIBUTION, petab.C.NORMAL) - ) + noise_distribution = petab_problem.observable_df.loc[ + measurement_row[petab.C.OBSERVABLE_ID] + ].get(petab.C.NOISE_DISTRIBUTION, petab.C.NORMAL) # an empty noise distribution column in an observables table can result in # `noise_distribution == float('nan')` if pd.isna(noise_distribution): @@ -243,13 +248,11 @@ def sample_noise( # below is e.g.: `np.random.normal(loc=simulation, scale=noise_value)` simulated_value_with_noise = getattr(rng, noise_distribution)( - loc=simulated_value, - scale=noise_value * noise_scaling_factor + loc=simulated_value, scale=noise_value * noise_scaling_factor ) - if ( - zero_bounded and - np.sign(simulated_value) != np.sign(simulated_value_with_noise) + if zero_bounded and np.sign(simulated_value) != np.sign( + simulated_value_with_noise ): return 0.0 return simulated_value_with_noise diff --git a/petab/version.py b/petab/version.py index 04b038fc..d35910e3 100644 --- a/petab/version.py +++ b/petab/version.py @@ -1,2 +1,2 @@ """PEtab library version""" -__version__ = '0.2.3' +__version__ = "0.2.3" diff --git a/petab/visualize/__init__.py b/petab/visualize/__init__.py index 60c5a327..dea23bb1 100644 --- a/petab/visualize/__init__.py +++ b/petab/visualize/__init__.py @@ -12,26 +12,27 @@ from .plotting import DataProvider, Figure -__all__ = [ - "DataProvider", - "Figure" -] +__all__ = ["DataProvider", "Figure"] if mpl_spec is not None: from .plot_data_and_simulation import ( - plot_without_vis_spec, - plot_with_vis_spec, plot_problem, + plot_with_vis_spec, + plot_without_vis_spec, + ) + from .plot_residuals import ( + plot_goodness_of_fit, + plot_residuals_vs_simulation, ) - - from .plot_residuals import plot_goodness_of_fit, plot_residuals_vs_simulation from .plotter import MPLPlotter - __all__.extend([ - "plot_without_vis_spec", - "plot_with_vis_spec", - "plot_problem", - "plot_goodness_of_fit", - "plot_residuals_vs_simulation", - "MPLPlotter", - ]) + __all__.extend( + [ + "plot_without_vis_spec", + "plot_with_vis_spec", + "plot_problem", + "plot_goodness_of_fit", + "plot_residuals_vs_simulation", + "MPLPlotter", + ] + ) diff --git a/petab/visualize/cli.py b/petab/visualize/cli.py index 9ec63a53..269461ad 100644 --- a/petab/visualize/cli.py +++ b/petab/visualize/cli.py @@ -4,29 +4,51 @@ import matplotlib.pyplot as plt -from .plot_data_and_simulation import plot_problem from .. import Problem, get_simulation_df, get_visualization_df +from .plot_data_and_simulation import plot_problem def _parse_cli_args(): """Parse command-line arguments.""" parser = argparse.ArgumentParser( - description='Create PEtab visualizations.') - - parser.add_argument('-y', '--yaml', dest='yaml_file_name', required=True, - help='PEtab problem YAML filename') - parser.add_argument('-s', '--simulations', dest='simulation_file_name', - required=False, - help='PEtab simulation filename') - parser.add_argument('-o', '--output-directory', dest='output_directory', - required=True, help='Output directory') - parser.add_argument('-v', '--visualizations', required=False, - dest='visualization_file_name', - help='PEtab visualization specification filename') - parser.add_argument('--style', required=False, - dest='style_file_name', - help='Matplotlib style file') + description="Create PEtab visualizations." + ) + + parser.add_argument( + "-y", + "--yaml", + dest="yaml_file_name", + required=True, + help="PEtab problem YAML filename", + ) + parser.add_argument( + "-s", + "--simulations", + dest="simulation_file_name", + required=False, + help="PEtab simulation filename", + ) + parser.add_argument( + "-o", + "--output-directory", + dest="output_directory", + required=True, + help="Output directory", + ) + parser.add_argument( + "-v", + "--visualizations", + required=False, + dest="visualization_file_name", + help="PEtab visualization specification filename", + ) + parser.add_argument( + "--style", + required=False, + dest="style_file_name", + help="Matplotlib style file", + ) return parser.parse_args() @@ -41,13 +63,14 @@ def _petab_visualize_main(): if args.visualization_file_name: petab_problem.visualization_df = get_visualization_df( - args.visualization_file_name) + args.visualization_file_name + ) if args.style_file_name: plt.style.use(args.style_file_name) # Avoid errors when plotting without X server - plt.switch_backend('agg') + plt.switch_backend("agg") Path(args.output_directory).mkdir(exist_ok=True, parents=True) diff --git a/petab/visualize/data_overview.py b/petab/visualize/data_overview.py index ec0ee9ea..4878b312 100644 --- a/petab/visualize/data_overview.py +++ b/petab/visualize/data_overview.py @@ -7,16 +7,15 @@ from typing import Union import pandas as pd + import petab from petab.C import * -__all__ = ['create_report'] +__all__ = ["create_report"] def create_report( - problem: petab.Problem, - model_name: str, - output_path: Union[str, Path] = '' + problem: petab.Problem, model_name: str, output_path: Union[str, Path] = "" ) -> None: """Create an HTML overview data / model overview report @@ -26,7 +25,7 @@ def create_report( output_path: Output directory """ - template_dir = Path(__file__).absolute().parent / 'templates' + template_dir = Path(__file__).absolute().parent / "templates" output_path = Path(output_path) template_file = "report.html" @@ -35,17 +34,21 @@ def create_report( # Setup template engine import jinja2 + template_loader = jinja2.FileSystemLoader(searchpath=template_dir) template_env = jinja2.Environment(loader=template_loader) template = template_env.get_template(template_file) # Render and save - output_text = template.render(problem=problem, model_name=model_name, - data_per_observable=data_per_observable, - num_conditions=num_conditions) - with open(output_path / f'{model_name}.html', 'w') as html_file: + output_text = template.render( + problem=problem, + model_name=model_name, + data_per_observable=data_per_observable, + num_conditions=num_conditions, + ) + with open(output_path / f"{model_name}.html", "w") as html_file: html_file.write(output_text) - copyfile(template_dir / 'mystyle.css', output_path / 'mystyle.css') + copyfile(template_dir / "mystyle.css", output_path / "mystyle.css") def get_data_per_observable(measurement_df: pd.DataFrame) -> pd.DataFrame: @@ -58,21 +61,27 @@ def get_data_per_observable(measurement_df: pd.DataFrame) -> pd.DataFrame: """ my_measurements = measurement_df.copy() - my_measurements[PREEQUILIBRATION_CONDITION_ID] = my_measurements[PREEQUILIBRATION_CONDITION_ID].astype("object") + my_measurements[PREEQUILIBRATION_CONDITION_ID] = my_measurements[ + PREEQUILIBRATION_CONDITION_ID + ].astype("object") index = [SIMULATION_CONDITION_ID] if PREEQUILIBRATION_CONDITION_ID in my_measurements: - my_measurements[PREEQUILIBRATION_CONDITION_ID].fillna('', inplace=True) + my_measurements[PREEQUILIBRATION_CONDITION_ID].fillna("", inplace=True) index.append(PREEQUILIBRATION_CONDITION_ID) data_per_observable = pd.pivot_table( - my_measurements, values=MEASUREMENT, aggfunc='count', + my_measurements, + values=MEASUREMENT, + aggfunc="count", index=index, - columns=[OBSERVABLE_ID], fill_value=0) + columns=[OBSERVABLE_ID], + fill_value=0, + ) # Add row and column sums - data_per_observable.loc['SUM', :] = data_per_observable.sum(axis=0).values - data_per_observable['SUM'] = data_per_observable.sum(axis=1).values + data_per_observable.loc["SUM", :] = data_per_observable.sum(axis=0).values + data_per_observable["SUM"] = data_per_observable.sum(axis=1).values data_per_observable = data_per_observable.astype(int) diff --git a/petab/visualize/helper_functions.py b/petab/visualize/helper_functions.py index d22dfffa..7d6fb02f 100644 --- a/petab/visualize/helper_functions.py +++ b/petab/visualize/helper_functions.py @@ -14,8 +14,8 @@ IdsList = List[str] NumList = List[int] __all__ = [ - 'create_dataset_id_list_new', - 'generate_dataset_id_col', + "create_dataset_id_list_new", + "generate_dataset_id_col", ] @@ -43,16 +43,15 @@ def generate_dataset_id_col(exp_data: pd.DataFrame) -> List[str]: for ind, cond_id in enumerate(tmp_simcond): # create and add dummy datasetID - dataset_id = cond_id + '_' + tmp_obs[ind] + dataset_id = cond_id + "_" + tmp_obs[ind] dataset_id_column.append(dataset_id) return dataset_id_column -def create_dataset_id_list_new(df: pd.DataFrame, - group_by: str, - id_list: List[IdsList] - ) -> List[IdsList]: +def create_dataset_id_list_new( + df: pd.DataFrame, group_by: str, id_list: List[IdsList] +) -> List[IdsList]: """ Create dataset ID list from a list of simulation condition IDs or observable IDs. @@ -70,13 +69,13 @@ def create_dataset_id_list_new(df: pd.DataFrame, """ if DATASET_ID not in df.columns: - raise ValueError(f'{DATASET_ID} column must be in exp_data DataFrame') + raise ValueError(f"{DATASET_ID} column must be in exp_data DataFrame") dataset_id_list = [] - if group_by == 'simulation': + if group_by == "simulation": groupping_col = SIMULATION_CONDITION_ID - elif group_by == 'observable': + elif group_by == "observable": groupping_col = OBSERVABLE_ID if id_list is None: # this is the default case. If no grouping is specified, @@ -89,8 +88,8 @@ def create_dataset_id_list_new(df: pd.DataFrame, for sublist in id_list: plot_id_list = [] for cond_id in sublist: - plot_id_list.extend(list( - df[df[groupping_col] == cond_id][ - DATASET_ID].unique())) + plot_id_list.extend( + list(df[df[groupping_col] == cond_id][DATASET_ID].unique()) + ) dataset_id_list.append(plot_id_list) return dataset_id_list diff --git a/petab/visualize/lint.py b/petab/visualize/lint.py index 413d0c89..c737b29b 100644 --- a/petab/visualize/lint.py +++ b/petab/visualize/lint.py @@ -6,13 +6,10 @@ from .. import C, Problem from ..C import VISUALIZATION_DF_REQUIRED_COLS - logger = logging.getLogger(__name__) -def validate_visualization_df( - problem: Problem -) -> bool: +def validate_visualization_df(problem: Problem) -> bool: """Validate visualization table Arguments: @@ -27,10 +24,13 @@ def validate_visualization_df( errors = False - if missing_req_cols := (set(VISUALIZATION_DF_REQUIRED_COLS) - - set(vis_df.columns)): - logger.error(f"Missing required columns {missing_req_cols} " - "in visualization table.") + if missing_req_cols := ( + set(VISUALIZATION_DF_REQUIRED_COLS) - set(vis_df.columns) + ): + logger.error( + f"Missing required columns {missing_req_cols} " + "in visualization table." + ) errors = True # Set all unspecified optional values to their defaults to simplify @@ -38,55 +38,72 @@ def validate_visualization_df( vis_df = vis_df.copy() _apply_defaults(vis_df) - if unknown_types := (set(vis_df[C.PLOT_TYPE_SIMULATION].unique()) - - set(C.PLOT_TYPES_SIMULATION)): - logger.error(f"Unknown {C.PLOT_TYPE_SIMULATION}: {unknown_types}. " - f"Must be one of {C.PLOT_TYPES_SIMULATION}") + if unknown_types := ( + set(vis_df[C.PLOT_TYPE_SIMULATION].unique()) + - set(C.PLOT_TYPES_SIMULATION) + ): + logger.error( + f"Unknown {C.PLOT_TYPE_SIMULATION}: {unknown_types}. " + f"Must be one of {C.PLOT_TYPES_SIMULATION}" + ) errors = True - if unknown_types := (set(vis_df[C.PLOT_TYPE_DATA].unique()) - - set(C.PLOT_TYPES_DATA)): - logger.error(f"Unknown {C.PLOT_TYPE_DATA}: {unknown_types}. " - f"Must be one of {C.PLOT_TYPES_DATA}") + if unknown_types := ( + set(vis_df[C.PLOT_TYPE_DATA].unique()) - set(C.PLOT_TYPES_DATA) + ): + logger.error( + f"Unknown {C.PLOT_TYPE_DATA}: {unknown_types}. " + f"Must be one of {C.PLOT_TYPES_DATA}" + ) errors = True - if unknown_scale := (set(vis_df[C.X_SCALE].unique()) - - set(C.X_SCALES)): - logger.error(f"Unknown {C.X_SCALE}: {unknown_scale}. " - f"Must be one of {C.X_SCALES}") + if unknown_scale := (set(vis_df[C.X_SCALE].unique()) - set(C.X_SCALES)): + logger.error( + f"Unknown {C.X_SCALE}: {unknown_scale}. " + f"Must be one of {C.X_SCALES}" + ) errors = True if any( - (vis_df[C.X_SCALE] == 'order') - & (vis_df[C.PLOT_TYPE_SIMULATION] != C.LINE_PLOT) + (vis_df[C.X_SCALE] == "order") + & (vis_df[C.PLOT_TYPE_SIMULATION] != C.LINE_PLOT) ): - logger.error(f"{C.X_SCALE}=order is only allowed with " - f"{C.PLOT_TYPE_SIMULATION}={C.LINE_PLOT}.") + logger.error( + f"{C.X_SCALE}=order is only allowed with " + f"{C.PLOT_TYPE_SIMULATION}={C.LINE_PLOT}." + ) errors = True - if unknown_scale := (set(vis_df[C.Y_SCALE].unique()) - - set(C.Y_SCALES)): - logger.error(f"Unknown {C.Y_SCALE}: {unknown_scale}. " - f"Must be one of {C.Y_SCALES}") + if unknown_scale := (set(vis_df[C.Y_SCALE].unique()) - set(C.Y_SCALES)): + logger.error( + f"Unknown {C.Y_SCALE}: {unknown_scale}. " + f"Must be one of {C.Y_SCALES}" + ) errors = True if problem.condition_df is not None: # check for ambiguous values reserved_names = {C.TIME, "condition"} for reserved_name in reserved_names: - if reserved_name in problem.condition_df \ - and reserved_name in vis_df[C.X_VALUES]: - logger.error(f"Ambiguous value for `{C.X_VALUES}`: " - f"`{reserved_name}` has a special meaning as " - f"`{C.X_VALUES}`, but there exists also a model " - "entity with that name.") + if ( + reserved_name in problem.condition_df + and reserved_name in vis_df[C.X_VALUES] + ): + logger.error( + f"Ambiguous value for `{C.X_VALUES}`: " + f"`{reserved_name}` has a special meaning as " + f"`{C.X_VALUES}`, but there exists also a model " + "entity with that name." + ) errors = True # check xValues exist in condition table for xvalue in set(vis_df[C.X_VALUES].unique()) - reserved_names: if xvalue not in problem.condition_df: - logger.error(f"{C.X_VALUES} was set to `{xvalue}`, but no " - "such column exists in the conditions table.") + logger.error( + f"{C.X_VALUES} was set to `{xvalue}`, but no " + "such column exists in the conditions table." + ) errors = True if problem.observable_df is not None: @@ -98,8 +115,8 @@ def validate_visualization_df( continue logger.error( - f'{C.Y_VALUES} must be specified if there is more ' - 'than one observable.' + f"{C.Y_VALUES} must be specified if there is more " + "than one observable." ) errors = True @@ -120,12 +137,13 @@ def _apply_defaults(vis_df: pd.DataFrame): Adds default values to the given visualization table where no value was specified. """ + def set_default(column: str, value): if column not in vis_df: vis_df[column] = value elif value is not None: if isinstance(value, str): - vis_df[column] = vis_df[column].astype('object') + vis_df[column] = vis_df[column].astype("object") vis_df[column].fillna(value, inplace=True) set_default(C.PLOT_NAME, "") diff --git a/petab/visualize/plot_data_and_simulation.py b/petab/visualize/plot_data_and_simulation.py index 40d4c6ba..6d254c50 100644 --- a/petab/visualize/plot_data_and_simulation.py +++ b/petab/visualize/plot_data_and_simulation.py @@ -6,30 +6,26 @@ import matplotlib.pyplot as plt import pandas as pd -from .plotter import MPLPlotter -from .plotting import VisSpecParser from .. import problem from ..C import * +from .plotter import MPLPlotter +from .plotting import VisSpecParser # for typehints IdsList = List[str] NumList = List[int] -__all__ = [ - "plot_with_vis_spec", - "plot_without_vis_spec", - "plot_problem" -] +__all__ = ["plot_with_vis_spec", "plot_without_vis_spec", "plot_problem"] def plot_with_vis_spec( - vis_spec_df: Union[str, pd.DataFrame], - conditions_df: Union[str, pd.DataFrame], - measurements_df: Optional[Union[str, pd.DataFrame]] = None, - simulations_df: Optional[Union[str, pd.DataFrame]] = None, - subplot_dir: Optional[str] = None, - plotter_type: str = 'mpl', - format_: str = 'png', + vis_spec_df: Union[str, pd.DataFrame], + conditions_df: Union[str, pd.DataFrame], + measurements_df: Optional[Union[str, pd.DataFrame]] = None, + simulations_df: Optional[Union[str, pd.DataFrame]] = None, + subplot_dir: Optional[str] = None, + plotter_type: str = "mpl", + format_: str = "png", ) -> Optional[Dict[str, plt.Subplot]]: """ Plot measurements and/or simulations. Specification of the visualization @@ -64,32 +60,36 @@ def plot_with_vis_spec( """ if measurements_df is None and simulations_df is None: - raise TypeError('Not enough arguments. Either measurements_data ' - 'or simulations_data should be provided.') - - vis_spec_parser = VisSpecParser(conditions_df, measurements_df, - simulations_df) + raise TypeError( + "Not enough arguments. Either measurements_data " + "or simulations_data should be provided." + ) + + vis_spec_parser = VisSpecParser( + conditions_df, measurements_df, simulations_df + ) figure, dataprovider = vis_spec_parser.parse_from_vis_spec(vis_spec_df) - if plotter_type == 'mpl': + if plotter_type == "mpl": plotter = MPLPlotter(figure, dataprovider) else: - raise NotImplementedError('Currently, only visualization with ' - 'matplotlib is possible.') + raise NotImplementedError( + "Currently, only visualization with " "matplotlib is possible." + ) return plotter.generate_figure(subplot_dir, format_=format_) def plot_without_vis_spec( - conditions_df: Union[str, pd.DataFrame], - grouping_list: Optional[List[IdsList]] = None, - group_by: str = 'observable', - measurements_df: Optional[Union[str, pd.DataFrame]] = None, - simulations_df: Optional[Union[str, pd.DataFrame]] = None, - plotted_noise: str = MEAN_AND_SD, - subplot_dir: Optional[str] = None, - plotter_type: str = 'mpl', - format_: str = 'png', + conditions_df: Union[str, pd.DataFrame], + grouping_list: Optional[List[IdsList]] = None, + group_by: str = "observable", + measurements_df: Optional[Union[str, pd.DataFrame]] = None, + simulations_df: Optional[Union[str, pd.DataFrame]] = None, + plotted_noise: str = MEAN_AND_SD, + subplot_dir: Optional[str] = None, + plotter_type: str = "mpl", + format_: str = "png", ) -> Optional[Dict[str, plt.Subplot]]: """ Plot measurements and/or simulations. What exactly should be plotted is @@ -135,32 +135,37 @@ def plot_without_vis_spec( """ if measurements_df is None and simulations_df is None: - raise TypeError('Not enough arguments. Either measurements_data ' - 'or simulations_data should be provided.') + raise TypeError( + "Not enough arguments. Either measurements_data " + "or simulations_data should be provided." + ) - vis_spec_parser = VisSpecParser(conditions_df, measurements_df, - simulations_df) + vis_spec_parser = VisSpecParser( + conditions_df, measurements_df, simulations_df + ) figure, dataprovider = vis_spec_parser.parse_from_id_list( - grouping_list, group_by, plotted_noise) + grouping_list, group_by, plotted_noise + ) - if plotter_type == 'mpl': + if plotter_type == "mpl": plotter = MPLPlotter(figure, dataprovider) else: - raise NotImplementedError('Currently, only visualization with ' - 'matplotlib is possible.') + raise NotImplementedError( + "Currently, only visualization with " "matplotlib is possible." + ) return plotter.generate_figure(subplot_dir, format_=format_) def plot_problem( - petab_problem: problem.Problem, - simulations_df: Optional[Union[str, pd.DataFrame]] = None, - grouping_list: Optional[List[IdsList]] = None, - group_by: str = 'observable', - plotted_noise: str = MEAN_AND_SD, - subplot_dir: Optional[str] = None, - plotter_type: str = 'mpl' + petab_problem: problem.Problem, + simulations_df: Optional[Union[str, pd.DataFrame]] = None, + grouping_list: Optional[List[IdsList]] = None, + group_by: str = "observable", + plotted_noise: str = MEAN_AND_SD, + subplot_dir: Optional[str] = None, + plotter_type: str = "mpl", ) -> Optional[Dict[str, plt.Subplot]]: """ Visualization using petab problem. @@ -200,17 +205,21 @@ def plot_problem( """ if petab_problem.visualization_df is not None: - return plot_with_vis_spec(petab_problem.visualization_df, - petab_problem.condition_df, - petab_problem.measurement_df, - simulations_df, - subplot_dir, - plotter_type) - return plot_without_vis_spec(petab_problem.condition_df, - grouping_list, - group_by, - petab_problem.measurement_df, - simulations_df, - plotted_noise, - subplot_dir, - plotter_type) + return plot_with_vis_spec( + petab_problem.visualization_df, + petab_problem.condition_df, + petab_problem.measurement_df, + simulations_df, + subplot_dir, + plotter_type, + ) + return plot_without_vis_spec( + petab_problem.condition_df, + grouping_list, + group_by, + petab_problem.measurement_df, + simulations_df, + plotted_noise, + subplot_dir, + plotter_type, + ) diff --git a/petab/visualize/plot_residuals.py b/petab/visualize/plot_residuals.py index 58fd3292..44418c9d 100644 --- a/petab/visualize/plot_residuals.py +++ b/petab/visualize/plot_residuals.py @@ -8,22 +8,21 @@ import matplotlib.pyplot as plt import numpy as np import pandas as pd - from scipy import stats +from ..C import * from ..calculate import calculate_residuals from ..core import get_simulation_df from ..problem import Problem -from ..C import * -__all__ = ['plot_goodness_of_fit', 'plot_residuals_vs_simulation'] +__all__ = ["plot_goodness_of_fit", "plot_residuals_vs_simulation"] def plot_residuals_vs_simulation( - petab_problem: Problem, - simulations_df: Union[str, Path, pd.DataFrame], - size: Optional[Tuple] = (10, 7), - axes: Optional[Tuple[plt.Axes, plt.Axes]] = None + petab_problem: Problem, + simulations_df: Union[str, Path, pd.DataFrame], + size: Optional[Tuple] = (10, 7), + axes: Optional[Tuple[plt.Axes, plt.Axes]] = None, ) -> matplotlib.axes.Axes: """ Plot residuals versus simulation values for measurements with normal noise @@ -51,8 +50,11 @@ def plot_residuals_vs_simulation( if NOISE_DISTRIBUTION in petab_problem.observable_df: if OBSERVABLE_TRANSFORMATION in petab_problem.observable_df: observable_ids = petab_problem.observable_df[ - (petab_problem.observable_df[NOISE_DISTRIBUTION] == NORMAL) & - (petab_problem.observable_df[OBSERVABLE_TRANSFORMATION] == LIN) + (petab_problem.observable_df[NOISE_DISTRIBUTION] == NORMAL) + & ( + petab_problem.observable_df[OBSERVABLE_TRANSFORMATION] + == LIN + ) ].index else: @@ -63,12 +65,15 @@ def plot_residuals_vs_simulation( observable_ids = petab_problem.observable_df.index if observable_ids.empty: - raise ValueError("Residuals plot is only applicable for normal " - "additive noise assumption") + raise ValueError( + "Residuals plot is only applicable for normal " + "additive noise assumption" + ) if axes is None: - fig, axes = plt.subplots(1, 2, sharey=True, figsize=size, - width_ratios=[2, 1]) + fig, axes = plt.subplots( + 1, 2, sharey=True, figsize=size, width_ratios=[2, 1] + ) fig.set_layout_engine("tight") fig.suptitle("Residuals") @@ -76,48 +81,60 @@ def plot_residuals_vs_simulation( measurement_dfs=petab_problem.measurement_df, simulation_dfs=simulations_df, observable_dfs=petab_problem.observable_df, - parameter_dfs=petab_problem.parameter_df)[0] + parameter_dfs=petab_problem.parameter_df, + )[0] - normal_residuals = residual_df[residual_df[OBSERVABLE_ID].isin( - observable_ids)] + normal_residuals = residual_df[ + residual_df[OBSERVABLE_ID].isin(observable_ids) + ] simulations_normal = simulations_df[ - simulations_df[OBSERVABLE_ID].isin(observable_ids)] + simulations_df[OBSERVABLE_ID].isin(observable_ids) + ] # compare to standard normal distribution ks_result = stats.kstest(normal_residuals[RESIDUAL], stats.norm.cdf) # plot the residuals plot - axes[0].hlines(y=0, xmin=min(simulations_normal[SIMULATION]), - xmax=max(simulations_normal[SIMULATION]), ls='--', - color='gray') - axes[0].scatter(simulations_normal[SIMULATION], - normal_residuals[RESIDUAL]) - axes[0].text(0.15, 0.85, - f'Kolmogorov-Smirnov test results:\n' - f'statistic: {ks_result[0]:.2f}\n' - f'pvalue: {ks_result[1]:.2e} ', transform=axes[0].transAxes) - axes[0].set_xlabel('simulated values') - axes[0].set_ylabel('residuals') + axes[0].hlines( + y=0, + xmin=min(simulations_normal[SIMULATION]), + xmax=max(simulations_normal[SIMULATION]), + ls="--", + color="gray", + ) + axes[0].scatter(simulations_normal[SIMULATION], normal_residuals[RESIDUAL]) + axes[0].text( + 0.15, + 0.85, + f"Kolmogorov-Smirnov test results:\n" + f"statistic: {ks_result[0]:.2f}\n" + f"pvalue: {ks_result[1]:.2e} ", + transform=axes[0].transAxes, + ) + axes[0].set_xlabel("simulated values") + axes[0].set_ylabel("residuals") # plot histogram - axes[1].hist(normal_residuals[RESIDUAL], density=True, - orientation='horizontal') - axes[1].set_xlabel('distribution') + axes[1].hist( + normal_residuals[RESIDUAL], density=True, orientation="horizontal" + ) + axes[1].set_xlabel("distribution") ymin, ymax = axes[0].get_ylim() ylim = max(abs(ymin), abs(ymax)) axes[0].set_ylim(-ylim, ylim) - axes[1].tick_params(left=False, labelleft=False, right=True, - labelright=True) + axes[1].tick_params( + left=False, labelleft=False, right=True, labelright=True + ) return axes def plot_goodness_of_fit( - petab_problem: Problem, - simulations_df: Union[str, Path, pd.DataFrame], - size: Tuple = (10, 7), - ax: Optional[plt.Axes] = None + petab_problem: Problem, + simulations_df: Union[str, Path, pd.DataFrame], + size: Tuple = (10, 7), + ax: Optional[plt.Axes] = None, ) -> matplotlib.axes.Axes: """ Plot goodness of fit. @@ -143,49 +160,54 @@ def plot_goodness_of_fit( simulations_df = get_simulation_df(simulations_df) if simulations_df is None or petab_problem.measurement_df is None: - raise NotImplementedError('Both measurements and simulation data ' - 'are needed for goodness_of_fit') + raise NotImplementedError( + "Both measurements and simulation data " + "are needed for goodness_of_fit" + ) residual_df = calculate_residuals( measurement_dfs=petab_problem.measurement_df, simulation_dfs=simulations_df, observable_dfs=petab_problem.observable_df, - parameter_dfs=petab_problem.parameter_df)[0] + parameter_dfs=petab_problem.parameter_df, + )[0] slope, intercept, r_value, p_value, std_err = stats.linregress( - petab_problem.measurement_df['measurement'], - simulations_df['simulation']) # x, y + petab_problem.measurement_df["measurement"], + simulations_df["simulation"], + ) # x, y if ax is None: fig, ax = plt.subplots(figsize=size) fig.set_layout_engine("tight") - ax.scatter(petab_problem.measurement_df['measurement'], - simulations_df['simulation']) + ax.scatter( + petab_problem.measurement_df["measurement"], + simulations_df["simulation"], + ) - ax.axis('square') + ax.axis("square") xlim = ax.get_xlim() ylim = ax.get_ylim() - lim = [min([xlim[0], ylim[0]]), - max([xlim[1], ylim[1]])] + lim = [min([xlim[0], ylim[0]]), max([xlim[1], ylim[1]])] ax.set_xlim(lim) ax.set_ylim(lim) x = np.linspace(lim, 100) - ax.plot(x, x, linestyle='--', - color='gray') - ax.plot(x, - intercept + slope*x, 'r', - label='fitted line') - - mse = np.mean(np.abs(residual_df['residual'])) - ax.text(0.1, 0.70, - f'$R^2$: {r_value**2:.2f}\n' - f'slope: {slope:.2f}\n' - f'intercept: {intercept:.2f}\n' - f'pvalue: {std_err:.2e}\n' - f'mean squared error: {mse:.2e}\n', - transform=ax.transAxes) + ax.plot(x, x, linestyle="--", color="gray") + ax.plot(x, intercept + slope * x, "r", label="fitted line") + + mse = np.mean(np.abs(residual_df["residual"])) + ax.text( + 0.1, + 0.70, + f"$R^2$: {r_value**2:.2f}\n" + f"slope: {slope:.2f}\n" + f"intercept: {intercept:.2f}\n" + f"pvalue: {std_err:.2e}\n" + f"mean squared error: {mse:.2e}\n", + transform=ax.transAxes, + ) ax.set_title("Goodness of fit") - ax.set_xlabel('simulated values') - ax.set_ylabel('measurements') + ax.set_xlabel("simulated values") + ax.set_ylabel("measurements") return ax diff --git a/petab/visualize/plotter.py b/petab/visualize/plotter.py index 7cb69b54..90cab85e 100644 --- a/petab/visualize/plotter.py +++ b/petab/visualize/plotter.py @@ -1,20 +1,19 @@ """PEtab visualization plotter classes""" import os +from abc import ABC, abstractmethod +from typing import Dict, List, Optional, Tuple, Union import matplotlib.axes +import matplotlib.ticker as mtick import numpy as np import pandas as pd - -from abc import ABC, abstractmethod -from typing import Dict, List, Optional, Tuple, Union from matplotlib import pyplot as plt from mpl_toolkits.axes_grid1 import make_axes_locatable -import matplotlib.ticker as mtick -from .plotting import (Figure, DataProvider, Subplot, DataPlot, DataSeries) from ..C import * +from .plotting import DataPlot, DataProvider, DataSeries, Figure, Subplot -__all__ = ['Plotter', 'MPLPlotter', 'SeabornPlotter'] +__all__ = ["Plotter", "MPLPlotter", "SeabornPlotter"] class Plotter(ABC): @@ -30,14 +29,14 @@ class Plotter(ABC): data_provider: Data provider """ + def __init__(self, figure: Figure, data_provider: DataProvider): self.figure = figure self.data_provider = data_provider @abstractmethod def generate_figure( - self, - subplot_dir: Optional[str] = None + self, subplot_dir: Optional[str] = None ) -> Optional[Dict[str, plt.Subplot]]: pass @@ -46,6 +45,7 @@ class MPLPlotter(Plotter): """ Matplotlib wrapper """ + def __init__(self, figure: Figure, data_provider: DataProvider): super().__init__(figure, data_provider) @@ -63,19 +63,19 @@ def _error_column_for_plot_type_data(plot_type_data: str) -> Optional[str]: Name of corresponding column """ if plot_type_data == MEAN_AND_SD: - return 'sd' + return "sd" if plot_type_data == MEAN_AND_SEM: - return 'sem' + return "sem" if plot_type_data == PROVIDED: - return 'noise_model' + return "noise_model" return None def generate_lineplot( - self, - ax: matplotlib.axes.Axes, - dataplot: DataPlot, - plotTypeData: str, - splitaxes_params: dict + self, + ax: matplotlib.axes.Axes, + dataplot: DataPlot, + plotTypeData: str, + splitaxes_params: dict, ) -> Tuple[matplotlib.axes.Axes, matplotlib.axes.Axes]: """ Generate lineplot. @@ -94,28 +94,35 @@ def generate_lineplot( """ simu_color = None - measurements_to_plot, simulations_to_plot = \ - self.data_provider.get_data_to_plot(dataplot, - plotTypeData == PROVIDED) + ( + measurements_to_plot, + simulations_to_plot, + ) = self.data_provider.get_data_to_plot( + dataplot, plotTypeData == PROVIDED + ) noise_col = self._error_column_for_plot_type_data(plotTypeData) label_base = dataplot.legendEntry # check if t_inf is there # todo: if only t_inf, adjust appearance for that case - plot_at_t_inf = (measurements_to_plot is not None and - measurements_to_plot.inf_point) or ( - simulations_to_plot is not None and - simulations_to_plot.inf_point) - - if measurements_to_plot is not None \ - and not measurements_to_plot.data_to_plot.empty: + plot_at_t_inf = ( + measurements_to_plot is not None and measurements_to_plot.inf_point + ) or ( + simulations_to_plot is not None and simulations_to_plot.inf_point + ) + + if ( + measurements_to_plot is not None + and not measurements_to_plot.data_to_plot.empty + ): # plotting all measurement data p = None if plotTypeData == REPLICATE: replicates = np.stack( - measurements_to_plot.data_to_plot.repl.values) + measurements_to_plot.data_to_plot.repl.values + ) if replicates.ndim == 1: replicates = np.expand_dims(replicates, axis=1) @@ -123,26 +130,34 @@ def generate_lineplot( p = ax.plot( measurements_to_plot.conditions, replicates[:, 0], - linestyle='-.', - marker='x', markersize=10, label=label_base + linestyle="-.", + marker="x", + markersize=10, + label=label_base, ) # plot other replicates with the same color ax.plot( measurements_to_plot.conditions, replicates[:, 1:], - linestyle='-.', - marker='x', markersize=10, color=p[0].get_color() + linestyle="-.", + marker="x", + markersize=10, + color=p[0].get_color(), ) # construct errorbar-plots: noise specified above else: # sorts according to ascending order of conditions - scond, smean, snoise = \ - zip(*sorted(zip( - measurements_to_plot.conditions, - measurements_to_plot.data_to_plot['mean'], - measurements_to_plot.data_to_plot[noise_col]))) + scond, smean, snoise = zip( + *sorted( + zip( + measurements_to_plot.conditions, + measurements_to_plot.data_to_plot["mean"], + measurements_to_plot.data_to_plot[noise_col], + ) + ) + ) if np.inf in scond: # remove inf point @@ -153,8 +168,12 @@ def generate_lineplot( if len(scond) > 0 and len(smean) > 0 and len(snoise) > 0: # if only t=inf there will be nothing to plot p = ax.errorbar( - scond, smean, snoise, - linestyle='-.', marker='.', label=label_base + scond, + smean, + snoise, + linestyle="-.", + marker=".", + label=label_base, ) # simulations should have the same colors if both measurements @@ -162,23 +181,37 @@ def generate_lineplot( simu_color = p[0].get_color() if p else None # construct simulation plot - if simulations_to_plot is not None \ - and not simulations_to_plot.data_to_plot.empty: + if ( + simulations_to_plot is not None + and not simulations_to_plot.data_to_plot.empty + ): # markers will be displayed only for points that have measurement # counterpart if measurements_to_plot is not None: - meas_conditions = measurements_to_plot.conditions.to_numpy() \ - if isinstance(measurements_to_plot.conditions, pd.Series) \ + meas_conditions = ( + measurements_to_plot.conditions.to_numpy() + if isinstance(measurements_to_plot.conditions, pd.Series) else measurements_to_plot.conditions - every = [condition in meas_conditions - for condition in simulations_to_plot.conditions] + ) + every = [ + condition in meas_conditions + for condition in simulations_to_plot.conditions + ] else: every = None # sorts according to ascending order of conditions - xs, ys = map(list, zip(*sorted(zip( - simulations_to_plot.conditions, - simulations_to_plot.data_to_plot['mean'])))) + xs, ys = map( + list, + zip( + *sorted( + zip( + simulations_to_plot.conditions, + simulations_to_plot.data_to_plot["mean"], + ) + ) + ), + ) if np.inf in xs: # remove inf point @@ -188,8 +221,13 @@ def generate_lineplot( if len(xs) > 0 and len(ys) > 0: p = ax.plot( - xs, ys, linestyle='-', marker='o', markevery=every, - label=label_base + " simulation", color=simu_color + xs, + ys, + linestyle="-", + marker="o", + markevery=every, + label=label_base + " simulation", + color=simu_color, ) # lines at t=inf should have the same colors also in case # only simulations are plotted @@ -197,23 +235,24 @@ def generate_lineplot( # plot inf points if plot_at_t_inf: - ax, splitaxes_params['ax_inf'] = self._line_plot_at_t_inf( - ax, plotTypeData, + ax, splitaxes_params["ax_inf"] = self._line_plot_at_t_inf( + ax, + plotTypeData, measurements_to_plot, simulations_to_plot, noise_col, label_base, splitaxes_params, - color=simu_color + color=simu_color, ) - return ax, splitaxes_params['ax_inf'] + return ax, splitaxes_params["ax_inf"] def generate_barplot( - self, - ax: 'matplotlib.pyplot.Axes', - dataplot: DataPlot, - plotTypeData: str + self, + ax: "matplotlib.pyplot.Axes", + dataplot: DataPlot, + plotTypeData: str, ) -> None: """ Generate barplot. @@ -230,41 +269,54 @@ def generate_barplot( # TODO: plotTypeData == REPLICATE? noise_col = self._error_column_for_plot_type_data(plotTypeData) - measurements_to_plot, simulations_to_plot = \ - self.data_provider.get_data_to_plot(dataplot, - plotTypeData == PROVIDED) + ( + measurements_to_plot, + simulations_to_plot, + ) = self.data_provider.get_data_to_plot( + dataplot, plotTypeData == PROVIDED + ) x_name = dataplot.legendEntry if simulations_to_plot: bar_kwargs = { - 'align': 'edge', - 'width': -1/3, + "align": "edge", + "width": -1 / 3, } else: bar_kwargs = { - 'align': 'center', - 'width': 2/3, + "align": "center", + "width": 2 / 3, } color = plt.rcParams["axes.prop_cycle"].by_key()["color"][0] if measurements_to_plot is not None: - ax.bar(x_name, measurements_to_plot.data_to_plot['mean'], - yerr=measurements_to_plot.data_to_plot[noise_col], - color=color, **bar_kwargs, label='measurement') + ax.bar( + x_name, + measurements_to_plot.data_to_plot["mean"], + yerr=measurements_to_plot.data_to_plot[noise_col], + color=color, + **bar_kwargs, + label="measurement", + ) if simulations_to_plot is not None: - bar_kwargs['width'] = -bar_kwargs['width'] - ax.bar(x_name, simulations_to_plot.data_to_plot['mean'], - color='white', edgecolor=color, **bar_kwargs, - label='simulation') + bar_kwargs["width"] = -bar_kwargs["width"] + ax.bar( + x_name, + simulations_to_plot.data_to_plot["mean"], + color="white", + edgecolor=color, + **bar_kwargs, + label="simulation", + ) def generate_scatterplot( - self, - ax: 'matplotlib.pyplot.Axes', - dataplot: DataPlot, - plotTypeData: str + self, + ax: "matplotlib.pyplot.Axes", + dataplot: DataPlot, + plotTypeData: str, ) -> None: """ Generate scatterplot. @@ -278,23 +330,30 @@ def generate_scatterplot( plotTypeData: Specifies how replicates should be handled. """ - measurements_to_plot, simulations_to_plot = \ - self.data_provider.get_data_to_plot(dataplot, - plotTypeData == PROVIDED) + ( + measurements_to_plot, + simulations_to_plot, + ) = self.data_provider.get_data_to_plot( + dataplot, plotTypeData == PROVIDED + ) if simulations_to_plot is None or measurements_to_plot is None: - raise NotImplementedError('Both measurements and simulation data ' - 'are needed for scatter plots') - ax.scatter(measurements_to_plot.data_to_plot['mean'], - simulations_to_plot.data_to_plot['mean'], - label=getattr(dataplot, LEGEND_ENTRY)) + raise NotImplementedError( + "Both measurements and simulation data " + "are needed for scatter plots" + ) + ax.scatter( + measurements_to_plot.data_to_plot["mean"], + simulations_to_plot.data_to_plot["mean"], + label=getattr(dataplot, LEGEND_ENTRY), + ) self._square_plot_equal_ranges(ax) def generate_subplot( - self, - fig: matplotlib.figure.Figure, - ax: matplotlib.axes.Axes, - subplot: Subplot + self, + fig: matplotlib.figure.Figure, + ax: matplotlib.axes.Axes, + subplot: Subplot, ) -> None: """ Generate subplot based on markup provided by subplot. @@ -344,7 +403,7 @@ def generate_subplot( elif subplot.xScale == LOG: ax.set_xscale("log", base=np.e) # equidistant - elif subplot.xScale == 'order': + elif subplot.xScale == "order": ax.set_xscale("linear") # check if conditions are monotone decreasing or increasing if np.all(np.diff(subplot.conditions) < 0): @@ -357,22 +416,28 @@ def generate_subplot( conditions = range(len(subplot.conditions)) ax.set_xticks(range(len(conditions)), xlabel) else: - raise ValueError('Error: x-conditions do not coincide, ' - 'some are mon. increasing, some ' - 'monotonically decreasing') + raise ValueError( + "Error: x-conditions do not coincide, " + "some are mon. increasing, some " + "monotonically decreasing" + ) splitaxes_params = self._preprocess_splitaxes(fig, ax, subplot) for data_plot in subplot.data_plots: - ax, splitaxes_params['ax_inf'] = self.generate_lineplot( - ax, data_plot, subplot.plotTypeData, - splitaxes_params=splitaxes_params) - if splitaxes_params['ax_inf'] is not None: - self._postprocess_splitaxes(ax, splitaxes_params['ax_inf'], - splitaxes_params['t_inf']) + ax, splitaxes_params["ax_inf"] = self.generate_lineplot( + ax, + data_plot, + subplot.plotTypeData, + splitaxes_params=splitaxes_params, + ) + if splitaxes_params["ax_inf"] is not None: + self._postprocess_splitaxes( + ax, splitaxes_params["ax_inf"], splitaxes_params["t_inf"] + ) # show 'e' as basis not 2.7... in natural log scale cases def ticks(y, _): - return r'$e^{{{:.0f}}}$'.format(np.log(y)) + return r"$e^{{{:.0f}}}$".format(np.log(y)) if subplot.xScale == LOG: ax.xaxis.set_major_formatter(mtick.FuncFormatter(ticks)) @@ -393,9 +458,9 @@ def ticks(y, _): ax.set_ylabel(subplot.yLabel) def generate_figure( - self, - subplot_dir: Optional[str] = None, - format_: str = 'png', + self, + subplot_dir: Optional[str] = None, + format_: str = "png", ) -> Optional[Dict[str, plt.Subplot]]: """ Generate the full figure based on the markup in the figure attribute. @@ -421,15 +486,17 @@ def generate_figure( num_row = int(np.round(np.sqrt(self.figure.num_subplots))) num_col = int(np.ceil(self.figure.num_subplots / num_row)) - fig, axes = plt.subplots(num_row, num_col, squeeze=False, - figsize=self.figure.size) + fig, axes = plt.subplots( + num_row, num_col, squeeze=False, figsize=self.figure.size + ) fig.set_layout_engine("tight") - for ax in axes.flat[self.figure.num_subplots:]: + for ax in axes.flat[self.figure.num_subplots :]: ax.remove() - axes = dict(zip([plot.plotId for plot in self.figure.subplots], - axes.flat)) + axes = dict( + zip([plot.plotId for plot in self.figure.subplots], axes.flat) + ) for subplot in self.figure.subplots: if subplot_dir is not None: @@ -442,13 +509,15 @@ def generate_figure( self.generate_subplot(fig, ax, subplot) except Exception as e: raise RuntimeError( - f"Error plotting {getattr(subplot, PLOT_ID)}.") from e + f"Error plotting {getattr(subplot, PLOT_ID)}." + ) from e if subplot_dir is not None: # TODO: why this doesn't work? plt.tight_layout() - plt.savefig(os.path.join(subplot_dir, - f'{subplot.plotId}.{format_}')) + plt.savefig( + os.path.join(subplot_dir, f"{subplot.plotId}.{format_}") + ) plt.close() if subplot_dir is None: @@ -458,9 +527,8 @@ def generate_figure( @staticmethod def _square_plot_equal_ranges( - ax: 'matplotlib.pyplot.Axes', - lim: Optional[Union[List, Tuple]] = None - ) -> 'matplotlib.pyplot.Axes': + ax: "matplotlib.pyplot.Axes", lim: Optional[Union[List, Tuple]] = None + ) -> "matplotlib.pyplot.Axes": """ Square plot with equal range for scatter plots. @@ -469,13 +537,12 @@ def _square_plot_equal_ranges( Updated axis object. """ - ax.axis('square') + ax.axis("square") if lim is None: xlim = ax.get_xlim() ylim = ax.get_ylim() - lim = [np.min([xlim[0], ylim[0]]), - np.max([xlim[1], ylim[1]])] + lim = [np.min([xlim[0], ylim[0]]), np.max([xlim[1], ylim[1]])] ax.set_xlim(lim) ax.set_ylim(lim) @@ -494,7 +561,7 @@ def _line_plot_at_t_inf( noise_col: str, label_base: str, split_axes_params: dict, - color=None + color=None, ) -> Tuple[matplotlib.axes.Axes, matplotlib.axes.Axes]: """ Plot data at t=inf. @@ -527,20 +594,23 @@ def _line_plot_at_t_inf( Two axis objects: for the data corresponding to the finite timepoints and for the data corresponding to t=inf """ - ax_inf = split_axes_params['ax_inf'] - t_inf = split_axes_params['t_inf'] - ax_finite_right_limit = split_axes_params['ax_finite_right_limit'] - ax_left_limit = split_axes_params['ax_left_limit'] - - timepoints_inf = [ax_finite_right_limit, - t_inf, - ax_finite_right_limit + - (ax_finite_right_limit - ax_left_limit) * 0.2] + ax_inf = split_axes_params["ax_inf"] + t_inf = split_axes_params["t_inf"] + ax_finite_right_limit = split_axes_params["ax_finite_right_limit"] + ax_left_limit = split_axes_params["ax_left_limit"] + + timepoints_inf = [ + ax_finite_right_limit, + t_inf, + ax_finite_right_limit + + (ax_finite_right_limit - ax_left_limit) * 0.2, + ] # plot measurements if measurements_to_plot is not None and measurements_to_plot.inf_point: - measurements_data_to_plot_inf = \ + measurements_data_to_plot_inf = ( measurements_to_plot.data_to_plot.loc[np.inf] + ) if plotTypeData == REPLICATE: p = None @@ -552,30 +622,43 @@ def _line_plot_at_t_inf( # plot first replicate p = ax_inf.plot( timepoints_inf, - [replicates[0]]*3, - linestyle='-.', marker='x', markersize=10, - markevery=[1], label=label_base + " simulation", - color=color + [replicates[0]] * 3, + linestyle="-.", + marker="x", + markersize=10, + markevery=[1], + label=label_base + " simulation", + color=color, ) # plot other replicates with the same color ax_inf.plot( timepoints_inf, - [replicates[1:]]*3, - linestyle='-.', - marker='x', markersize=10, markevery=[1], - color=p[0].get_color() + [replicates[1:]] * 3, + linestyle="-.", + marker="x", + markersize=10, + markevery=[1], + color=p[0].get_color(), ) else: - p = ax_inf.plot([timepoints_inf[0], timepoints_inf[2]], - [measurements_data_to_plot_inf['mean'], - measurements_data_to_plot_inf['mean']], - linestyle='-.', color=color) + p = ax_inf.plot( + [timepoints_inf[0], timepoints_inf[2]], + [ + measurements_data_to_plot_inf["mean"], + measurements_data_to_plot_inf["mean"], + ], + linestyle="-.", + color=color, + ) ax_inf.errorbar( - t_inf, measurements_data_to_plot_inf['mean'], + t_inf, + measurements_data_to_plot_inf["mean"], measurements_data_to_plot_inf[noise_col], - linestyle='-.', marker='.', - label=label_base + " simulation", color=p[0].get_color() + linestyle="-.", + marker=".", + label=label_base + " simulation", + color=p[0].get_color(), ) if color is None: @@ -586,8 +669,9 @@ def _line_plot_at_t_inf( # plot simulations if simulations_to_plot is not None and simulations_to_plot.inf_point: - simulations_data_to_plot_inf = \ + simulations_data_to_plot_inf = ( simulations_to_plot.data_to_plot.loc[np.inf] + ) if plotTypeData == REPLICATE: replicates = simulations_data_to_plot_inf.repl @@ -598,31 +682,38 @@ def _line_plot_at_t_inf( p = ax_inf.plot( timepoints_inf, [replicates[0]] * 3, - linestyle='-', marker='o', markevery=[1], - label=label_base, color=color + linestyle="-", + marker="o", + markevery=[1], + label=label_base, + color=color, ) # plot other replicates with the same color ax_inf.plot( timepoints_inf, [replicates[1:]] * 3, - linestyle='-', marker='o', markevery=[1], - color=p[0].get_color() + linestyle="-", + marker="o", + markevery=[1], + color=p[0].get_color(), ) else: - ax_inf.plot(timepoints_inf, - [simulations_data_to_plot_inf['mean']]*3, - linestyle='-', marker='o', markevery=[1], - color=color) + ax_inf.plot( + timepoints_inf, + [simulations_data_to_plot_inf["mean"]] * 3, + linestyle="-", + marker="o", + markevery=[1], + color=color, + ) ax.set_xlim(right=ax_finite_right_limit) return ax, ax_inf @staticmethod def _postprocess_splitaxes( - ax: matplotlib.axes.Axes, - ax_inf: matplotlib.axes.Axes, - t_inf: float + ax: matplotlib.axes.Axes, ax_inf: matplotlib.axes.Axes, t_inf: float ) -> None: """ Postprocess the splitaxes: set axes limits, turn off unnecessary @@ -638,27 +729,30 @@ def _postprocess_splitaxes( Time value that represents t=inf """ ax_inf.tick_params(left=False, labelleft=False) - ax_inf.spines['left'].set_visible(False) + ax_inf.spines["left"].set_visible(False) ax_inf.set_xticks([t_inf]) - ax_inf.set_xticklabels([r'$t_{\infty}$']) + ax_inf.set_xticklabels([r"$t_{\infty}$"]) bottom, top = ax.get_ylim() left, right = ax.get_xlim() - ax.spines['right'].set_visible(False) - ax_inf.set_xlim(right, - right + (right - left) * 0.2) + ax.spines["right"].set_visible(False) + ax_inf.set_xlim(right, right + (right - left) * 0.2) d = (top - bottom) * 0.02 - ax_inf.vlines(x=right, ymin=bottom + d, ymax=top - d, ls='--', - color='gray') # right - ax.vlines(x=right, ymin=bottom + d, ymax=top - d, ls='--', - color='gray') # left + ax_inf.vlines( + x=right, ymin=bottom + d, ymax=top - d, ls="--", color="gray" + ) # right + ax.vlines( + x=right, ymin=bottom + d, ymax=top - d, ls="--", color="gray" + ) # left ax_inf.set_ylim(bottom, top) ax.set_ylim(bottom, top) - def _preprocess_splitaxes(self, - fig: matplotlib.figure.Figure, - ax: matplotlib.axes.Axes, - subplot: Subplot) -> Dict: + def _preprocess_splitaxes( + self, + fig: matplotlib.figure.Figure, + ax: matplotlib.axes.Axes, + subplot: Subplot, + ) -> Dict: """ Prepare splitaxes if data at t=inf should be plotted: compute left and right limits for the axis where the data corresponding to the finite @@ -667,7 +761,7 @@ def _preprocess_splitaxes(self, """ def check_data_to_plot( - data_to_plot: DataSeries + data_to_plot: DataSeries, ) -> Tuple[bool, Optional[float], float]: """ Check if there is data available at t=inf and compute maximum and @@ -679,9 +773,13 @@ def check_data_to_plot( if data_to_plot is not None and len(data_to_plot.conditions): contains_inf = np.inf in data_to_plot.conditions finite_conditions = data_to_plot.conditions[ - data_to_plot.conditions != np.inf] - max_finite_cond = np.max(finite_conditions) if \ - finite_conditions.size else None + data_to_plot.conditions != np.inf + ] + max_finite_cond = ( + np.max(finite_conditions) + if finite_conditions.size + else None + ) min_cond = min(data_to_plot.conditions) return contains_inf, max_finite_cond, min_cond @@ -689,23 +787,32 @@ def check_data_to_plot( ax_inf = None t_inf, ax_finite_right_limit, ax_left_limit = None, None, np.inf for dataplot in subplot.data_plots: - measurements_to_plot, simulations_to_plot = \ - self.data_provider.get_data_to_plot( - dataplot, subplot.plotTypeData == PROVIDED) + ( + measurements_to_plot, + simulations_to_plot, + ) = self.data_provider.get_data_to_plot( + dataplot, subplot.plotTypeData == PROVIDED + ) contains_inf_m, max_finite_cond_m, min_cond_m = check_data_to_plot( - measurements_to_plot) + measurements_to_plot + ) contains_inf_s, max_finite_cond_s, min_cond_s = check_data_to_plot( - simulations_to_plot) + simulations_to_plot + ) if max_finite_cond_m is not None: - ax_finite_right_limit = max(ax_finite_right_limit, - max_finite_cond_m) if \ - ax_finite_right_limit is not None else max_finite_cond_m + ax_finite_right_limit = ( + max(ax_finite_right_limit, max_finite_cond_m) + if ax_finite_right_limit is not None + else max_finite_cond_m + ) if max_finite_cond_s is not None: - ax_finite_right_limit = max(ax_finite_right_limit, - max_finite_cond_s) if \ - ax_finite_right_limit is not None else max_finite_cond_s + ax_finite_right_limit = ( + max(ax_finite_right_limit, max_finite_cond_s) + if ax_finite_right_limit is not None + else max_finite_cond_s + ) ax_left_limit = min(ax_left_limit, min(min_cond_m, min_cond_s)) # check if t=inf is contained in any data to be plotted on the @@ -720,28 +827,32 @@ def check_data_to_plot( if ax_finite_right_limit is None and ax_left_limit == np.inf: ax_finite_right_limit = 10 ax_left_limit = 0 - t_inf = ax_finite_right_limit + (ax_finite_right_limit - - ax_left_limit)*0.1 + t_inf = ( + ax_finite_right_limit + + (ax_finite_right_limit - ax_left_limit) * 0.1 + ) # create axes for t=inf divider = make_axes_locatable(ax) ax_inf = divider.new_horizontal(size="10%", pad=0.3) fig.add_axes(ax_inf) - return {'ax_inf': ax_inf, - 't_inf': t_inf, - 'ax_finite_right_limit': ax_finite_right_limit, - 'ax_left_limit': ax_left_limit} + return { + "ax_inf": ax_inf, + "t_inf": t_inf, + "ax_finite_right_limit": ax_finite_right_limit, + "ax_left_limit": ax_left_limit, + } class SeabornPlotter(Plotter): """ Seaborn wrapper. """ + def __init__(self, figure: Figure, data_provider: DataProvider): super().__init__(figure, data_provider) def generate_figure( - self, - subplot_dir: Optional[str] = None + self, subplot_dir: Optional[str] = None ) -> Optional[Dict[str, plt.Subplot]]: pass diff --git a/petab/visualize/plotting.py b/petab/visualize/plotting.py index ab40c92e..2990f528 100644 --- a/petab/visualize/plotting.py +++ b/petab/visualize/plotting.py @@ -2,19 +2,27 @@ import warnings from numbers import Number, Real from pathlib import Path -from typing import Dict, List, Optional, Tuple, Union, Literal +from typing import Dict, List, Literal, Optional, Tuple, Union import numpy as np import pandas as pd -from .helper_functions import (create_dataset_id_list_new, - generate_dataset_id_col) from .. import conditions, core, measurements from ..C import * from ..problem import Problem - -__all__ = ['DataSeries', 'DataPlot', 'Subplot', 'Figure', 'DataProvider', - 'VisSpecParser'] +from .helper_functions import ( + create_dataset_id_list_new, + generate_dataset_id_col, +) + +__all__ = [ + "DataSeries", + "DataPlot", + "Subplot", + "Figure", + "DataProvider", + "VisSpecParser", +] # for typehints IdsList = List[str] @@ -46,15 +54,19 @@ class DataSeries: """ Data for one individual line """ - def __init__(self, conditions_: Optional[Union[np.ndarray, pd.Series]], - data_to_plot: Optional[pd.DataFrame] = None): + def __init__( + self, + conditions_: Optional[Union[np.ndarray, pd.Series]], + data_to_plot: Optional[pd.DataFrame] = None, + ): self.data_to_plot = data_to_plot self.data_to_plot.sort_index(inplace=True) self.conditions = conditions_ - self.inf_point = np.inf in self.conditions if \ - self.conditions is not None else False + self.inf_point = ( + np.inf in self.conditions if self.conditions is not None else False + ) # sort index for the case that indices of conditions and # measurements differ. if indep_var='time', conditions is a # numpy array, if indep_var=observable it's a Series @@ -77,8 +89,8 @@ def add_x_offset(self, offset) -> None: self.conditions += offset def add_y_offset(self, offset): - self.data_to_plot['mean'] += offset - self.data_to_plot['repl'] += offset + self.data_to_plot["mean"] += offset + self.data_to_plot["repl"] += offset def add_offsets(self, x_offset=0, y_offset=0) -> None: """ @@ -100,8 +112,8 @@ class DataPlot: Visualization specification of a plot of one data series, e.g. for an individual line on a subplot. """ - def __init__(self, - plot_settings: dict): + + def __init__(self, plot_settings: dict): """ Constructor. @@ -115,21 +127,20 @@ def __init__(self, setattr(self, key, val) if DATASET_ID not in vars(self): - raise ValueError(f'{DATASET_ID} must be specified') + raise ValueError(f"{DATASET_ID} must be specified") if X_VALUES not in vars(self): # TODO: singular? setattr(self, X_VALUES, TIME) if X_OFFSET not in vars(self): setattr(self, X_OFFSET, 0) if Y_VALUES not in vars(self): - setattr(self, Y_VALUES, '') + setattr(self, Y_VALUES, "") if Y_OFFSET not in vars(self): - setattr(self, Y_OFFSET, 0.) + setattr(self, Y_OFFSET, 0.0) if LEGEND_ENTRY not in vars(self): setattr(self, LEGEND_ENTRY, getattr(self, DATASET_ID)) @classmethod def from_df(cls, plot_spec: pd.DataFrame): - vis_spec_dict = plot_spec.to_dict() return cls(vis_spec_dict) @@ -142,10 +153,13 @@ class Subplot: """ Visualization specification of a subplot. """ - def __init__(self, - plot_id: str, - plot_settings: dict, - dataplots: Optional[List[DataPlot]] = None): + + def __init__( + self, + plot_id: str, + plot_settings: dict, + dataplots: Optional[List[DataPlot]] = None, + ): """ Constructor. @@ -165,7 +179,7 @@ def __init__(self, setattr(self, key, val) if PLOT_NAME not in vars(self): - setattr(self, PLOT_NAME, '') + setattr(self, PLOT_NAME, "") if PLOT_TYPE_SIMULATION not in vars(self): setattr(self, PLOT_TYPE_SIMULATION, LINE_PLOT) if PLOT_TYPE_DATA not in vars(self): @@ -175,7 +189,7 @@ def __init__(self, if X_SCALE not in vars(self): setattr(self, X_SCALE, LIN) if Y_LABEL not in vars(self): - setattr(self, Y_LABEL, 'values') + setattr(self, Y_LABEL, "values") if Y_SCALE not in vars(self): setattr(self, Y_SCALE, LIN) @@ -184,45 +198,59 @@ def __init__(self, self.ylim = None @classmethod - def from_df(cls, plot_id: str, vis_spec: pd.DataFrame, - dataplots: Optional[List[DataPlot]] = None): - + def from_df( + cls, + plot_id: str, + vis_spec: pd.DataFrame, + dataplots: Optional[List[DataPlot]] = None, + ): vis_spec_dict = {} for col in vis_spec: if col in VISUALIZATION_DF_SUBPLOT_LEVEL_COLS: entry = vis_spec.loc[:, col] entry = np.unique(entry) if entry.size > 1: - warnings.warn(f'For {PLOT_ID} {plot_id} in column ' - f'{col} contradictory settings ({entry})' - f'. Proceeding with first entry ' - f'({entry[0]}).') + warnings.warn( + f"For {PLOT_ID} {plot_id} in column " + f"{col} contradictory settings ({entry})" + f". Proceeding with first entry " + f"({entry[0]})." + ) entry = entry[0] # check if values are allowed - if col in [Y_SCALE, X_SCALE] and entry not in \ - OBSERVABLE_TRANSFORMATIONS: - raise ValueError(f'{X_SCALE} and {Y_SCALE} have to be ' - f'one of the following: ' - + ', '.join(OBSERVABLE_TRANSFORMATIONS)) - elif col == PLOT_TYPE_DATA and entry not in \ - PLOT_TYPES_DATA: - raise ValueError(f'{PLOT_TYPE_DATA} has to be one of the ' - f'following: ' - + ', '.join(PLOT_TYPES_DATA)) - elif col == PLOT_TYPE_SIMULATION and entry not in \ - PLOT_TYPES_SIMULATION: - raise ValueError(f'{PLOT_TYPE_SIMULATION} has to be one of' - f' the following: ' - + ', '.join(PLOT_TYPES_SIMULATION)) + if ( + col in [Y_SCALE, X_SCALE] + and entry not in OBSERVABLE_TRANSFORMATIONS + ): + raise ValueError( + f"{X_SCALE} and {Y_SCALE} have to be " + f"one of the following: " + + ", ".join(OBSERVABLE_TRANSFORMATIONS) + ) + elif col == PLOT_TYPE_DATA and entry not in PLOT_TYPES_DATA: + raise ValueError( + f"{PLOT_TYPE_DATA} has to be one of the " + f"following: " + ", ".join(PLOT_TYPES_DATA) + ) + elif ( + col == PLOT_TYPE_SIMULATION + and entry not in PLOT_TYPES_SIMULATION + ): + raise ValueError( + f"{PLOT_TYPE_SIMULATION} has to be one of" + f" the following: " + ", ".join(PLOT_TYPES_SIMULATION) + ) # append new entry to dict vis_spec_dict[col] = entry else: - warnings.warn(f'Column {col} cannot be used to specify subplot' - f', only settings from the following columns can' - f' be used:' - + ', '.join(VISUALIZATION_DF_SUBPLOT_LEVEL_COLS)) + warnings.warn( + f"Column {col} cannot be used to specify subplot" + f", only settings from the following columns can" + f" be used:" + + ", ".join(VISUALIZATION_DF_SUBPLOT_LEVEL_COLS) + ) return cls(plot_id, vis_spec_dict, dataplots) def add_dataplot(self, dataplot: DataPlot) -> None: @@ -237,11 +265,11 @@ def add_dataplot(self, dataplot: DataPlot) -> None: """ self.data_plots.append(dataplot) - def set_axes_limits(self, - xlim: Optional[Tuple[Optional[Real], - Optional[Real]]] = None, - ylim: Optional[Tuple[Optional[Real], - Optional[Real]]] = None): + def set_axes_limits( + self, + xlim: Optional[Tuple[Optional[Real], Optional[Real]]] = None, + ylim: Optional[Tuple[Optional[Real], Optional[Real]]] = None, + ): """ Set axes limits for all subplots. If xlim or ylim or any of the tuple items is None, corresponding limit is left unchanged. @@ -263,9 +291,13 @@ class Figure: Contains information regarding how data should be visualized. """ - def __init__(self, subplots: Optional[List[Subplot]] = None, - size: Tuple = DEFAULT_FIGSIZE, - title: Optional[Tuple] = None): + + def __init__( + self, + subplots: Optional[List[Subplot]] = None, + size: Tuple = DEFAULT_FIGSIZE, + title: Optional[Tuple] = None, + ): """ Constructor. @@ -303,11 +335,11 @@ def add_subplot(self, subplot: Subplot) -> None: """ self.subplots.append(subplot) - def set_axes_limits(self, - xlim: Optional[Tuple[Optional[Real], - Optional[Real]]] = None, - ylim: Optional[Tuple[Optional[Real], - Optional[Real]]] = None) -> None: + def set_axes_limits( + self, + xlim: Optional[Tuple[Optional[Real], Optional[Real]]] = None, + ylim: Optional[Tuple[Optional[Real], Optional[Real]]] = None, + ) -> None: """ Set axes limits for all subplots. If xlim or ylim or any of the tuple items is None, corresponding limit is left unchanged. @@ -323,7 +355,7 @@ def set_axes_limits(self, for subplot in self.subplots: subplot.set_axes_limits(xlim, ylim) - def save_to_tsv(self, output_file_path: str = 'visuSpec.tsv') -> None: + def save_to_tsv(self, output_file_path: str = "visuSpec.tsv") -> None: """ Save full Visualization specification table. @@ -339,20 +371,26 @@ def save_to_tsv(self, output_file_path: str = 'visuSpec.tsv') -> None: """ # TODO: what if datasetIds were generated? - warnings.warn(f'Note: please check that {DATASET_ID} column ' - f'corresponds to {DATASET_ID} column in Measurement ' - f'(Simulation) table.') + warnings.warn( + f"Note: please check that {DATASET_ID} column " + f"corresponds to {DATASET_ID} column in Measurement " + f"(Simulation) table." + ) visu_dict = {} for subplot in self.subplots: - subplot_level = {key: subplot.__dict__[key] for key in - subplot.__dict__ if key in - VISUALIZATION_DF_SUBPLOT_LEVEL_COLS} + subplot_level = { + key: subplot.__dict__[key] + for key in subplot.__dict__ + if key in VISUALIZATION_DF_SUBPLOT_LEVEL_COLS + } for dataplot in subplot.data_plots: - dataset_level = {key: dataplot.__dict__[key] for key in - dataplot.__dict__ if key in - VISUALIZATION_DF_SINGLE_PLOT_LEVEL_COLS} + dataset_level = { + key: dataplot.__dict__[key] + for key in dataplot.__dict__ + if key in VISUALIZATION_DF_SINGLE_PLOT_LEVEL_COLS + } row = {**subplot_level, **dataset_level} for key, value in row.items(): if key in visu_dict: @@ -360,29 +398,34 @@ def save_to_tsv(self, output_file_path: str = 'visuSpec.tsv') -> None: else: visu_dict[key] = [row[key]] visu_df = pd.DataFrame.from_dict(visu_dict) - visu_df.to_csv(output_file_path, sep='\t', index=False) + visu_df.to_csv(output_file_path, sep="\t", index=False) class DataProvider: """ Handles data selection. """ - def __init__(self, - exp_conditions: pd.DataFrame, - measurements_data: Optional[pd.DataFrame] = None, - simulations_data: Optional[pd.DataFrame] = None): + + def __init__( + self, + exp_conditions: pd.DataFrame, + measurements_data: Optional[pd.DataFrame] = None, + simulations_data: Optional[pd.DataFrame] = None, + ): self.conditions_data = exp_conditions if measurements_data is None and simulations_data is None: - raise TypeError('Not enough arguments. Either measurements_data ' - 'or simulations_data should be provided.') + raise TypeError( + "Not enough arguments. Either measurements_data " + "or simulations_data should be provided." + ) self.measurements_data = measurements_data self.simulations_data = simulations_data @staticmethod - def _matches_plot_spec(df: pd.DataFrame, - plot_spec: 'DataPlot', - dataset_id) -> pd.Series: + def _matches_plot_spec( + df: pd.DataFrame, plot_spec: "DataPlot", dataset_id + ) -> pd.Series: """ Construct an index for subsetting of the dataframe according to what is specified in plot_spec. @@ -400,22 +443,20 @@ def _matches_plot_spec(df: pd.DataFrame, Boolean series that can be used for subsetting of the passed dataframe """ - subset = ( - (df[DATASET_ID] == dataset_id) - ) - if getattr(plot_spec, Y_VALUES) == '': + subset = df[DATASET_ID] == dataset_id + if getattr(plot_spec, Y_VALUES) == "": if len(df.loc[subset, OBSERVABLE_ID].unique()) > 1: raise ValueError( - f'{Y_VALUES} must be specified in visualization table if ' - f'multiple different observables are available.' + f"{Y_VALUES} must be specified in visualization table if " + f"multiple different observables are available." ) else: - subset &= (df[OBSERVABLE_ID] == getattr(plot_spec, Y_VALUES)) + subset &= df[OBSERVABLE_ID] == getattr(plot_spec, Y_VALUES) return subset - def _get_independent_var_values(self, data_df: pd.DataFrame, - dataplot: DataPlot - ) -> Tuple[np.ndarray, str, pd.Series]: + def _get_independent_var_values( + self, data_df: pd.DataFrame, dataplot: DataPlot + ) -> Tuple[np.ndarray, str, pd.Series]: """ Get independent variable values. @@ -460,13 +501,14 @@ def _get_independent_var_values(self, data_df: pd.DataFrame, dataset_id = getattr(dataplot, DATASET_ID) - single_m_data = data_df[self._matches_plot_spec( - data_df, dataplot, dataset_id)] + single_m_data = data_df[ + self._matches_plot_spec(data_df, dataplot, dataset_id) + ] # gather simulationConditionIds belonging to datasetId uni_condition_id, uind = np.unique( - single_m_data[SIMULATION_CONDITION_ID], - return_index=True) + single_m_data[SIMULATION_CONDITION_ID], return_index=True + ) # keep the ordering which was given by user from top to bottom # (avoid ordering by names '1','10','11','2',...)' uni_condition_id = uni_condition_id[np.argsort(uind)] @@ -477,7 +519,7 @@ def _get_independent_var_values(self, data_df: pd.DataFrame, uni_condition_id = single_m_data[TIME].unique() col_name_unique = TIME conditions_ = uni_condition_id - elif indep_var == 'condition': + elif indep_var == "condition": conditions_ = None else: # indep_var = parameterOrStateId case ? @@ -488,11 +530,11 @@ def _get_independent_var_values(self, data_df: pd.DataFrame, return uni_condition_id, col_name_unique, conditions_ def get_data_series( - self, - data_df: pd.DataFrame, - data_col: Literal['measurement', 'simulation'], - dataplot: DataPlot, - provided_noise: bool + self, + data_df: pd.DataFrame, + data_col: Literal["measurement", "simulation"], + dataplot: DataPlot, + provided_noise: bool, ) -> DataSeries: """ Get data to plot from measurement or simulation DataFrame. @@ -510,71 +552,83 @@ def get_data_series( ------- Data to plot """ - uni_condition_id, col_name_unique, conditions_ = \ - self._get_independent_var_values(data_df, dataplot) + ( + uni_condition_id, + col_name_unique, + conditions_, + ) = self._get_independent_var_values(data_df, dataplot) dataset_id = getattr(dataplot, DATASET_ID) # get data subset selected based on provided dataset_id # and observable_ids - single_m_data = data_df[self._matches_plot_spec( - data_df, dataplot, dataset_id)] + single_m_data = data_df[ + self._matches_plot_spec(data_df, dataplot, dataset_id) + ] # create empty dataframe for means and SDs measurements_to_plot = pd.DataFrame( - columns=['mean', 'noise_model', 'sd', 'sem', 'repl'], - index=uni_condition_id + columns=["mean", "noise_model", "sd", "sem", "repl"], + index=uni_condition_id, ) for var_cond_id in uni_condition_id: - - subset = (single_m_data[col_name_unique] == var_cond_id) + subset = single_m_data[col_name_unique] == var_cond_id # what has to be plotted is selected - data_measurements = single_m_data.loc[ - subset, - data_col - ] + data_measurements = single_m_data.loc[subset, data_col] # TODO: all this rather inside DataSeries? # process the data - measurements_to_plot.at[var_cond_id, 'mean'] = np.mean( - data_measurements) - measurements_to_plot.at[var_cond_id, 'sd'] = np.std( - data_measurements) + measurements_to_plot.at[var_cond_id, "mean"] = np.mean( + data_measurements + ) + measurements_to_plot.at[var_cond_id, "sd"] = np.std( + data_measurements + ) if provided_noise and np.any(subset): - if len(single_m_data.loc[ - subset, NOISE_PARAMETERS].unique()) > 1: + if ( + len(single_m_data.loc[subset, NOISE_PARAMETERS].unique()) + > 1 + ): raise NotImplementedError( f"Datapoints with inconsistent {NOISE_PARAMETERS} " - f"is currently not implemented. Stopping.") - tmp_noise = \ - single_m_data.loc[subset, NOISE_PARAMETERS].values[0] + f"is currently not implemented. Stopping." + ) + tmp_noise = single_m_data.loc[subset, NOISE_PARAMETERS].values[ + 0 + ] if isinstance(tmp_noise, str): raise NotImplementedError( "No numerical noise values provided in the " - "measurement table. Stopping.") - if isinstance(tmp_noise, Number) or \ - tmp_noise.dtype == 'float64': + "measurement table. Stopping." + ) + if ( + isinstance(tmp_noise, Number) + or tmp_noise.dtype == "float64" + ): measurements_to_plot.at[ - var_cond_id, 'noise_model'] = tmp_noise + var_cond_id, "noise_model" + ] = tmp_noise # standard error of mean - measurements_to_plot.at[var_cond_id, 'sem'] = \ - np.std(data_measurements) / np.sqrt( - len(data_measurements)) + measurements_to_plot.at[var_cond_id, "sem"] = np.std( + data_measurements + ) / np.sqrt(len(data_measurements)) # single replicates - measurements_to_plot.at[var_cond_id, 'repl'] = \ - data_measurements.values + measurements_to_plot.at[ + var_cond_id, "repl" + ] = data_measurements.values data_series = DataSeries(conditions_, measurements_to_plot) data_series.add_offsets(dataplot.xOffset, dataplot.yOffset) return data_series - def get_data_to_plot(self, dataplot: DataPlot, provided_noise: bool - ) -> Tuple[DataSeries, DataSeries]: + def get_data_to_plot( + self, dataplot: DataPlot, provided_noise: bool + ) -> Tuple[DataSeries, DataSeries]: """ Get data to plot. @@ -594,16 +648,14 @@ def get_data_to_plot(self, dataplot: DataPlot, provided_noise: bool simulations_to_plot = None if self.measurements_data is not None: - measurements_to_plot = self.get_data_series(self.measurements_data, - MEASUREMENT, - dataplot, - provided_noise) + measurements_to_plot = self.get_data_series( + self.measurements_data, MEASUREMENT, dataplot, provided_noise + ) if self.simulations_data is not None: - simulations_to_plot = self.get_data_series(self.simulations_data, - SIMULATION, - dataplot, - provided_noise) + simulations_to_plot = self.get_data_series( + self.simulations_data, SIMULATION, dataplot, provided_noise + ) return measurements_to_plot, simulations_to_plot @@ -616,11 +668,12 @@ class VisSpecParser: Figure instance, a DataProvider instance is created that will be responsible for the data selection and manipulation. """ + def __init__( - self, - conditions_data: Union[str, Path, pd.DataFrame], - exp_data: Optional[Union[str, Path, pd.DataFrame]] = None, - sim_data: Optional[Union[str, Path, pd.DataFrame]] = None, + self, + conditions_data: Union[str, Path, pd.DataFrame], + exp_data: Optional[Union[str, Path, pd.DataFrame]] = None, + sim_data: Optional[Union[str, Path, pd.DataFrame]] = None, ): if isinstance(conditions_data, (str, Path)): conditions_data = conditions.get_condition_df(conditions_data) @@ -633,8 +686,10 @@ def __init__( sim_data = core.get_simulation_df(sim_data) if exp_data is None and sim_data is None: - raise TypeError('Not enough arguments. Either measurements_data ' - 'or simulations_data should be provided.') + raise TypeError( + "Not enough arguments. Either measurements_data " + "or simulations_data should be provided." + ) self.conditions_data = conditions_data self.measurements_data = exp_data @@ -642,19 +697,21 @@ def __init__( @classmethod def from_problem(cls, petab_problem: Problem, sim_data): - return cls(petab_problem.condition_df, - petab_problem.measurement_df, - sim_data) + return cls( + petab_problem.condition_df, petab_problem.measurement_df, sim_data + ) @property def _data_df(self): - return self.measurements_data if self.measurements_data is not \ - None else self.simulations_data + return ( + self.measurements_data + if self.measurements_data is not None + else self.simulations_data + ) @staticmethod def create_subplot( - plot_id: str, - subplot_vis_spec: pd.DataFrame + plot_id: str, subplot_vis_spec: pd.DataFrame ) -> Subplot: """ Create subplot. @@ -672,13 +729,20 @@ def create_subplot( Subplot """ - subplot_columns = [col for col in subplot_vis_spec.columns if col in - VISUALIZATION_DF_SUBPLOT_LEVEL_COLS] - subplot = Subplot.from_df(plot_id, - subplot_vis_spec.loc[:, subplot_columns]) + subplot_columns = [ + col + for col in subplot_vis_spec.columns + if col in VISUALIZATION_DF_SUBPLOT_LEVEL_COLS + ] + subplot = Subplot.from_df( + plot_id, subplot_vis_spec.loc[:, subplot_columns] + ) - dataplot_cols = [col for col in subplot_vis_spec.columns if col in - VISUALIZATION_DF_SINGLE_PLOT_LEVEL_COLS] + dataplot_cols = [ + col + for col in subplot_vis_spec.columns + if col in VISUALIZATION_DF_SINGLE_PLOT_LEVEL_COLS + ] dataplot_spec = subplot_vis_spec.loc[:, dataplot_cols] for _, row in dataplot_spec.iterrows(): @@ -688,8 +752,8 @@ def create_subplot( return subplot def parse_from_vis_spec( - self, - vis_spec: Optional[Union[str, Path, pd.DataFrame]], + self, + vis_spec: Optional[Union[str, Path, pd.DataFrame]], ) -> Tuple[Figure, DataProvider]: """ Get visualization settings from a visualization specification. @@ -713,16 +777,24 @@ def parse_from_vis_spec( self._add_dataset_id_col() vis_spec = self._expand_vis_spec_settings(vis_spec) else: - if self.measurements_data is not None \ - and DATASET_ID not in self.measurements_data: - raise ValueError(f"grouping by datasetId was requested, but " - f"{DATASET_ID} column is missing from " - f"measurement table") - if self.simulations_data is not None \ - and DATASET_ID not in self.simulations_data: - raise ValueError(f"grouping by datasetId was requested, but " - f"{DATASET_ID} column is missing from " - f"simulation table") + if ( + self.measurements_data is not None + and DATASET_ID not in self.measurements_data + ): + raise ValueError( + f"grouping by datasetId was requested, but " + f"{DATASET_ID} column is missing from " + f"measurement table" + ) + if ( + self.simulations_data is not None + and DATASET_ID not in self.simulations_data + ): + raise ValueError( + f"grouping by datasetId was requested, but " + f"{DATASET_ID} column is missing from " + f"simulation table" + ) figure = Figure() @@ -733,20 +805,21 @@ def parse_from_vis_spec( # loop over unique plotIds for plot_id in plot_ids: # get indices for specific plotId - ind_plot = (vis_spec[PLOT_ID] == plot_id) + ind_plot = vis_spec[PLOT_ID] == plot_id subplot = self.create_subplot(plot_id, vis_spec[ind_plot]) figure.add_subplot(subplot) - return figure, DataProvider(self.conditions_data, - self.measurements_data, - self.simulations_data) + return figure, DataProvider( + self.conditions_data, self.measurements_data, self.simulations_data + ) - def parse_from_id_list(self, - ids_per_plot: Optional[List[IdsList]] = None, - group_by: str = 'observable', - plotted_noise: Optional[str] = MEAN_AND_SD - ) -> Tuple[Figure, DataProvider]: + def parse_from_id_list( + self, + ids_per_plot: Optional[List[IdsList]] = None, + group_by: str = "observable", + plotted_noise: Optional[str] = MEAN_AND_SD, + ) -> Tuple[Figure, DataProvider]: """ Get visualization settings from a list of ids and a grouping parameter. @@ -792,20 +865,24 @@ def parse_from_id_list(self, unique_obs_list = self._data_df[OBSERVABLE_ID].unique() ids_per_plot = [[obs_id] for obs_id in unique_obs_list] - if group_by == 'dataset' and DATASET_ID not in self._data_df: - raise ValueError(f"grouping by datasetId was requested, but " - f"{DATASET_ID} column is missing from data table") + if group_by == "dataset" and DATASET_ID not in self._data_df: + raise ValueError( + f"grouping by datasetId was requested, but " + f"{DATASET_ID} column is missing from data table" + ) - if group_by != 'dataset': + if group_by != "dataset": # datasetId_list will be created (possibly overwriting previous # list - only in the local variable, not in the tsv-file) self._add_dataset_id_col() columns_dict = self._get_vis_spec_dependent_columns_dict( - group_by, ids_per_plot) + group_by, ids_per_plot + ) - columns_dict[PLOT_TYPE_DATA] = [plotted_noise]*len( - columns_dict[DATASET_ID]) + columns_dict[PLOT_TYPE_DATA] = [plotted_noise] * len( + columns_dict[DATASET_ID] + ) vis_spec_df = pd.DataFrame(columns_dict) @@ -820,25 +897,27 @@ def _add_dataset_id_col(self) -> None: if self.measurements_data is not None: if DATASET_ID in self.measurements_data.columns: self.measurements_data = self.measurements_data.drop( - DATASET_ID, axis=1) + DATASET_ID, axis=1 + ) self.measurements_data.insert( loc=self.measurements_data.columns.size, column=DATASET_ID, - value=generate_dataset_id_col(self.measurements_data)) + value=generate_dataset_id_col(self.measurements_data), + ) if self.simulations_data is not None: if DATASET_ID in self.simulations_data.columns: - self.simulations_data = self.simulations_data.drop(DATASET_ID, - axis=1) + self.simulations_data = self.simulations_data.drop( + DATASET_ID, axis=1 + ) self.simulations_data.insert( loc=self.simulations_data.columns.size, column=DATASET_ID, - value=generate_dataset_id_col(self.simulations_data)) + value=generate_dataset_id_col(self.simulations_data), + ) def _get_vis_spec_dependent_columns_dict( - self, - group_by: str, - id_list: Optional[List[IdsList]] = None + self, group_by: str, id_list: Optional[List[IdsList]] = None ) -> Dict: """ Helper method for creating values for columns PLOT_ID, DATASET_ID, @@ -860,34 +939,45 @@ def _get_vis_spec_dependent_columns_dict( LEGEND_ENTRY, Y_VALUES for visualization specification. """ - if group_by != 'dataset': - dataset_id_list = create_dataset_id_list_new(self._data_df, - group_by, id_list) + if group_by != "dataset": + dataset_id_list = create_dataset_id_list_new( + self._data_df, group_by, id_list + ) else: dataset_id_list = id_list - dataset_id_column = [i_dataset for sublist in dataset_id_list - for i_dataset in sublist] + dataset_id_column = [ + i_dataset for sublist in dataset_id_list for i_dataset in sublist + ] - dataset_label_column = [self._create_legend(i_dataset) for sublist in - dataset_id_list for i_dataset in sublist] + dataset_label_column = [ + self._create_legend(i_dataset) + for sublist in dataset_id_list + for i_dataset in sublist + ] # such dataset ids were generated that each dataset_id always # corresponds to one observable - yvalues_column = [self._data_df.loc[self._data_df[DATASET_ID] == - dataset_id, OBSERVABLE_ID].iloc[0] - for sublist in dataset_id_list for dataset_id in - sublist] + yvalues_column = [ + self._data_df.loc[ + self._data_df[DATASET_ID] == dataset_id, OBSERVABLE_ID + ].iloc[0] + for sublist in dataset_id_list + for dataset_id in sublist + ] # get number of plots and create plotId-lists - plot_id_column = ['plot%s' % str(ind + 1) for ind, inner_list in - enumerate(dataset_id_list) for _ in inner_list] + plot_id_column = [ + "plot%s" % str(ind + 1) + for ind, inner_list in enumerate(dataset_id_list) + for _ in inner_list + ] return { PLOT_ID: plot_id_column, DATASET_ID: dataset_id_column, LEGEND_ENTRY: dataset_label_column, - Y_VALUES: yvalues_column + Y_VALUES: yvalues_column, } def _create_legend(self, dataset_id: str) -> str: @@ -906,16 +996,15 @@ def _create_legend(self, dataset_id: str) -> str: # relies on the fact that dataset ids were created based on cond_ids # and obs_ids. Therefore, in the following query all pairs will be # the same - cond_id, obs_id = self._data_df[self._data_df[DATASET_ID] == - dataset_id][[SIMULATION_CONDITION_ID, - OBSERVABLE_ID]].iloc[0, :] + cond_id, obs_id = self._data_df[ + self._data_df[DATASET_ID] == dataset_id + ][[SIMULATION_CONDITION_ID, OBSERVABLE_ID]].iloc[0, :] tmp = self.conditions_data.loc[cond_id] - if CONDITION_NAME not in tmp.index or \ - pd.isna(tmp[CONDITION_NAME]): + if CONDITION_NAME not in tmp.index or pd.isna(tmp[CONDITION_NAME]): cond_name = cond_id else: cond_name = tmp[CONDITION_NAME] - return f'{cond_name} - {obs_id}' + return f"{cond_name} - {obs_id}" def _expand_vis_spec_settings(self, vis_spec: pd.DataFrame): """ @@ -933,8 +1022,10 @@ def _expand_vis_spec_settings(self, vis_spec: pd.DataFrame): A visualization specification DataFrame. """ if DATASET_ID in vis_spec.columns: - raise ValueError(f"visualization specification expansion is " - f"unnecessary if column {DATASET_ID} is present") + raise ValueError( + f"visualization specification expansion is " + f"unnecessary if column {DATASET_ID} is present" + ) if vis_spec.empty: # in case of empty spec all measurements corresponding to each @@ -942,7 +1033,7 @@ def _expand_vis_spec_settings(self, vis_spec: pd.DataFrame): observable_ids = self._data_df[OBSERVABLE_ID].unique() vis_spec_exp_rows = [ - self._vis_spec_rows_for_obs(obs_id, {PLOT_ID: f'plot{idx}'}) + self._vis_spec_rows_for_obs(obs_id, {PLOT_ID: f"plot{idx}"}) for idx, obs_id in enumerate(observable_ids) ] return pd.concat(vis_spec_exp_rows, ignore_index=True) @@ -962,8 +1053,9 @@ def _expand_vis_spec_settings(self, vis_spec: pd.DataFrame): ) return pd.concat(vis_spec_exp_rows, ignore_index=True) - def _vis_spec_rows_for_obs(self, obs_id: str, settings: dict - ) -> pd.DataFrame: + def _vis_spec_rows_for_obs( + self, obs_id: str, settings: dict + ) -> pd.DataFrame: """ Create vis_spec for one observable. @@ -983,24 +1075,34 @@ def _vis_spec_rows_for_obs(self, obs_id: str, settings: dict ------- A visualization specification DataFrame. """ - columns_to_expand = [PLOT_ID, PLOT_NAME, PLOT_TYPE_SIMULATION, - PLOT_TYPE_DATA, X_VALUES, X_OFFSET, X_LABEL, - X_SCALE, Y_OFFSET, Y_LABEL, Y_SCALE, - LEGEND_ENTRY] - - dataset_ids = self._data_df[ - self._data_df[OBSERVABLE_ID] == - obs_id][DATASET_ID].unique() + columns_to_expand = [ + PLOT_ID, + PLOT_NAME, + PLOT_TYPE_SIMULATION, + PLOT_TYPE_DATA, + X_VALUES, + X_OFFSET, + X_LABEL, + X_SCALE, + Y_OFFSET, + Y_LABEL, + Y_SCALE, + LEGEND_ENTRY, + ] + + dataset_ids = self._data_df[self._data_df[OBSERVABLE_ID] == obs_id][ + DATASET_ID + ].unique() n_rows = len(dataset_ids) - columns_dict = {DATASET_ID: dataset_ids, - Y_VALUES: [obs_id] * n_rows} + columns_dict = {DATASET_ID: dataset_ids, Y_VALUES: [obs_id] * n_rows} for column in settings: if column in columns_to_expand: columns_dict[column] = [settings[column]] * n_rows if LEGEND_ENTRY not in columns_dict: - columns_dict[LEGEND_ENTRY] = \ - [self._create_legend(dataset_id) for dataset_id - in columns_dict[DATASET_ID]] + columns_dict[LEGEND_ENTRY] = [ + self._create_legend(dataset_id) + for dataset_id in columns_dict[DATASET_ID] + ] return pd.DataFrame(columns_dict) diff --git a/petab/yaml.py b/petab/yaml.py index 3728f8ab..1a0ee079 100644 --- a/petab/yaml.py +++ b/petab/yaml.py @@ -15,20 +15,26 @@ SCHEMA_DIR = Path(__file__).parent / "schemas" # map of version number to validation schema SCHEMAS = { - '1': SCHEMA_DIR / "petab_schema.v1.0.0.yaml", - '1.0.0': SCHEMA_DIR / "petab_schema.v1.0.0.yaml", - '2.0.0': SCHEMA_DIR / "petab_schema.v2.0.0.yaml", + "1": SCHEMA_DIR / "petab_schema.v1.0.0.yaml", + "1.0.0": SCHEMA_DIR / "petab_schema.v1.0.0.yaml", + "2.0.0": SCHEMA_DIR / "petab_schema.v2.0.0.yaml", } -__all__ = ['validate', 'validate_yaml_syntax', 'validate_yaml_semantics', - 'load_yaml', 'is_composite_problem', - 'assert_single_condition_and_sbml_file', 'write_yaml', - 'create_problem_yaml'] +__all__ = [ + "validate", + "validate_yaml_syntax", + "validate_yaml_semantics", + "load_yaml", + "is_composite_problem", + "assert_single_condition_and_sbml_file", + "write_yaml", + "create_problem_yaml", +] def validate( - yaml_config: Union[Dict, str, Path], - path_prefix: Union[None, str, Path] = None, + yaml_config: Union[Dict, str, Path], + path_prefix: Union[None, str, Path] = None, ): """Validate syntax and semantics of PEtab config YAML @@ -42,13 +48,12 @@ def validate( """ validate_yaml_syntax(yaml_config) - validate_yaml_semantics(yaml_config=yaml_config, - path_prefix=path_prefix) + validate_yaml_semantics(yaml_config=yaml_config, path_prefix=path_prefix) def validate_yaml_syntax( - yaml_config: Union[Dict, str, Path], - schema: Union[None, Dict, str] = None): + yaml_config: Union[Dict, str, Path], schema: Union[None, Dict, str] = None +): """Validate PEtab YAML file syntax Arguments: @@ -66,20 +71,23 @@ def validate_yaml_syntax( # try get PEtab version from yaml file # if this is not the available, the file is not valid anyways, # but let's still use the latest PEtab schema for full validation - version = yaml_config.get(FORMAT_VERSION, None) \ - or list(SCHEMAS.values())[-1] + version = ( + yaml_config.get(FORMAT_VERSION, None) or list(SCHEMAS.values())[-1] + ) try: schema = SCHEMAS[str(version)] except KeyError as e: - raise ValueError("Unknown PEtab version given in problem " - f"specification: {version}") from e + raise ValueError( + "Unknown PEtab version given in problem " + f"specification: {version}" + ) from e schema = load_yaml(schema) jsonschema.validate(instance=yaml_config, schema=schema) def validate_yaml_semantics( - yaml_config: Union[Dict, str, Path], - path_prefix: Union[None, str, Path] = None + yaml_config: Union[Dict, str, Path], + path_prefix: Union[None, str, Path] = None, ): """Validate PEtab YAML file semantics @@ -108,21 +116,28 @@ def validate_yaml_semantics( def _check_file(_filename: str, _field: str): if not os.path.isfile(_filename): - raise AssertionError(f"File '{_filename}' provided as '{_field}' " - "does not exist.") + raise AssertionError( + f"File '{_filename}' provided as '{_field}' " "does not exist." + ) # Handles both a single parameter file, and a parameter file that has been # split into multiple subset files. - for parameter_subset_file in ( - list(np.array(yaml_config[PARAMETER_FILE]).flat)): + for parameter_subset_file in list( + np.array(yaml_config[PARAMETER_FILE]).flat + ): _check_file( os.path.join(path_prefix, parameter_subset_file), - parameter_subset_file + parameter_subset_file, ) for problem_config in yaml_config[PROBLEMS]: - for field in [SBML_FILES, CONDITION_FILES, MEASUREMENT_FILES, - VISUALIZATION_FILES, OBSERVABLE_FILES]: + for field in [ + SBML_FILES, + CONDITION_FILES, + MEASUREMENT_FILES, + VISUALIZATION_FILES, + OBSERVABLE_FILES, + ]: if field in problem_config: for filename in problem_config[field]: _check_file(os.path.join(path_prefix, filename), field) @@ -147,7 +162,7 @@ def load_yaml(yaml_config: Union[Dict, Path, str]) -> Dict: if isinstance(yaml_config, dict): return yaml_config - with get_handle(yaml_config, mode='r') as io_handle: + with get_handle(yaml_config, mode="r") as io_handle: data = yaml.safe_load(io_handle.handle) return data @@ -175,18 +190,20 @@ def assert_single_condition_and_sbml_file(problem_config: Dict) -> None: NotImplementedError: If multiple condition or SBML files specified. """ - if (len(problem_config[SBML_FILES]) > 1 - or len(problem_config[CONDITION_FILES]) > 1): + if ( + len(problem_config[SBML_FILES]) > 1 + or len(problem_config[CONDITION_FILES]) > 1 + ): # TODO https://github.com/ICB-DCM/PEtab/issues/188 # TODO https://github.com/ICB-DCM/PEtab/issues/189 raise NotImplementedError( - 'Support for multiple models or condition files is not yet ' - 'implemented.') + "Support for multiple models or condition files is not yet " + "implemented." + ) def write_yaml( - yaml_config: Dict[str, Any], - filename: Union[str, Path] + yaml_config: Dict[str, Any], filename: Union[str, Path] ) -> None: """Write PEtab YAML file @@ -194,22 +211,24 @@ def write_yaml( yaml_config: Data to write filename: File to create """ - with open(filename, 'w') as outfile: - yaml.dump(yaml_config, outfile, default_flow_style=False, - sort_keys=False) + with open(filename, "w") as outfile: + yaml.dump( + yaml_config, outfile, default_flow_style=False, sort_keys=False + ) def create_problem_yaml( - sbml_files: Union[str, Path, List[Union[str, Path]]], - condition_files: Union[str, Path, List[Union[str, Path]]], - measurement_files: Union[str, Path, List[Union[str, Path]]], - parameter_file: Union[str, Path], - observable_files: Union[str, Path, List[Union[str, Path]]], - yaml_file: Union[str, Path], - visualization_files: - Optional[Union[str, Path, List[Union[str, Path]]]] = None, - relative_paths: bool = True, - mapping_files: Union[str, Path, List[Union[str, Path]]] = None, + sbml_files: Union[str, Path, List[Union[str, Path]]], + condition_files: Union[str, Path, List[Union[str, Path]]], + measurement_files: Union[str, Path, List[Union[str, Path]]], + parameter_file: Union[str, Path], + observable_files: Union[str, Path, List[Union[str, Path]]], + yaml_file: Union[str, Path], + visualization_files: Optional[ + Union[str, Path, List[Union[str, Path]]] + ] = None, + relative_paths: bool = True, + mapping_files: Union[str, Path, List[Union[str, Path]]] = None, ) -> None: """Create and write default YAML file for a single PEtab problem @@ -246,8 +265,7 @@ def get_rel_to_yaml(paths: Union[List[str], None]): if paths is None: return paths return [ - os.path.relpath(path, start=yaml_file_dir) - for path in paths + os.path.relpath(path, start=yaml_file_dir) for path in paths ] sbml_files = get_rel_to_yaml(sbml_files) @@ -262,7 +280,7 @@ def get_rel_to_yaml(paths: Union[List[str], None]): CONDITION_FILES: condition_files, MEASUREMENT_FILES: measurement_files, SBML_FILES: sbml_files, - OBSERVABLE_FILES: observable_files + OBSERVABLE_FILES: observable_files, } if mapping_files: problem_dic[MAPPING_FILES] = mapping_files @@ -272,6 +290,6 @@ def get_rel_to_yaml(paths: Union[List[str], None]): yaml_dic = { PARAMETER_FILE: parameter_file, FORMAT_VERSION: 1, - PROBLEMS: [problem_dic] + PROBLEMS: [problem_dic], } write_yaml(yaml_dic, yaml_file) diff --git a/pyproject.toml b/pyproject.toml index 1c2d3554..104f49e5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,3 +4,6 @@ requires = [ "wheel", ] build-backend = "setuptools.build_meta" + +[tool.black] +line-length = 80 diff --git a/setup.py b/setup.py index a8249c56..ddc2aa8c 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,8 @@ -from setuptools import setup, find_namespace_packages -import sys import os import re +import sys + +from setuptools import find_namespace_packages, setup def read(fname): @@ -12,10 +13,12 @@ def read(fname): def absolute_links(txt): """Replace relative petab github links by absolute links.""" - raw_base = \ + raw_base = ( "(https://raw.githubusercontent.com/petab-dev/libpetab-python/master/" - embedded_base = \ + ) + embedded_base = ( "(https://github.com/petab-dev/libpetab-python/tree/master/" + ) # iterate over links for var in re.findall(r"\[.*?\]\((?!http).*?\)", txt): if re.match(r".*?.(png|svg)\)", var): @@ -97,10 +100,6 @@ def absolute_links(txt): # https://github.com/spatialaudio/nbsphinx/issues/687#issuecomment-1339271312 "ipython>=7.21.0, !=8.7.0", ], - "vis": [ - "matplotlib>=3.6.0", - "seaborn", - "scipy" - ] + "vis": ["matplotlib>=3.6.0", "seaborn", "scipy"], }, ) diff --git a/tests/test_calculate.py b/tests/test_calculate.py index 94f2f16a..d98896c8 100644 --- a/tests/test_calculate.py +++ b/tests/test_calculate.py @@ -1,234 +1,359 @@ """Tests related to petab.calculate.""" -from petab import (calculate_residuals, calculate_chi2, calculate_llh, - calculate_single_llh) -from petab.C import * -import pandas as pd import numpy as np +import pandas as pd import pytest +from petab import ( + calculate_chi2, + calculate_llh, + calculate_residuals, + calculate_single_llh, +) +from petab.C import * + def model_simple(): - "Simple model.""" - measurement_df = pd.DataFrame(data={ - OBSERVABLE_ID: ['obs_a', 'obs_a', 'obs_b', 'obs_b'], - SIMULATION_CONDITION_ID: ['c0', 'c1', 'c0', 'c1'], - TIME: [0, 10, 0, 10], - MEASUREMENT: [0, 1, 20, 22] - }) - - observable_df = pd.DataFrame(data={ - OBSERVABLE_ID: ['obs_a', 'obs_b'], - OBSERVABLE_FORMULA: ['A', 'B'], - NOISE_FORMULA: [2, 3] - }).set_index([OBSERVABLE_ID]) - - parameter_df = pd.DataFrame(data={ - PARAMETER_ID: ['par1', 'par2'], - NOMINAL_VALUE: [3, 4] - }) + "Simple model." "" + measurement_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs_a", "obs_a", "obs_b", "obs_b"], + SIMULATION_CONDITION_ID: ["c0", "c1", "c0", "c1"], + TIME: [0, 10, 0, 10], + MEASUREMENT: [0, 1, 20, 22], + } + ) + + observable_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs_a", "obs_b"], + OBSERVABLE_FORMULA: ["A", "B"], + NOISE_FORMULA: [2, 3], + } + ).set_index([OBSERVABLE_ID]) + + parameter_df = pd.DataFrame( + data={PARAMETER_ID: ["par1", "par2"], NOMINAL_VALUE: [3, 4]} + ) simulation_df = measurement_df.copy(deep=True).rename( - columns={MEASUREMENT: SIMULATION}) + columns={MEASUREMENT: SIMULATION} + ) simulation_df[SIMULATION] = [2, 2, 19, 20] - expected_residuals = {(2-0)/2, (2-1)/2, (19-20)/3, (20-22)/3} - expected_residuals_nonorm = {2-0, 2-1, 19-20, 20-22} - expected_llh = - 0.5*(np.array(list(expected_residuals))**2).sum() - \ - 0.5*np.log(2*np.pi*np.array([2, 2, 3, 3])**2).sum() - - return (measurement_df, observable_df, parameter_df, - simulation_df, expected_residuals, expected_residuals_nonorm, - expected_llh) + expected_residuals = { + (2 - 0) / 2, + (2 - 1) / 2, + (19 - 20) / 3, + (20 - 22) / 3, + } + expected_residuals_nonorm = {2 - 0, 2 - 1, 19 - 20, 20 - 22} + expected_llh = ( + -0.5 * (np.array(list(expected_residuals)) ** 2).sum() + - 0.5 * np.log(2 * np.pi * np.array([2, 2, 3, 3]) ** 2).sum() + ) + + return ( + measurement_df, + observable_df, + parameter_df, + simulation_df, + expected_residuals, + expected_residuals_nonorm, + expected_llh, + ) def model_replicates(): """Model with replicates.""" - measurement_df = pd.DataFrame(data={ - OBSERVABLE_ID: ['obs_a', 'obs_a'], - SIMULATION_CONDITION_ID: ['c0', 'c0'], - TIME: [10, 10], - MEASUREMENT: [0, 1] - }) - - observable_df = pd.DataFrame(data={ - OBSERVABLE_ID: ['obs_a'], - OBSERVABLE_FORMULA: ['A'], - NOISE_FORMULA: [2] - }).set_index([OBSERVABLE_ID]) - - parameter_df = pd.DataFrame(data={ - PARAMETER_ID: ['par1', 'par2'], - NOMINAL_VALUE: [3, 4] - }).set_index([PARAMETER_ID]) + measurement_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs_a", "obs_a"], + SIMULATION_CONDITION_ID: ["c0", "c0"], + TIME: [10, 10], + MEASUREMENT: [0, 1], + } + ) + + observable_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs_a"], + OBSERVABLE_FORMULA: ["A"], + NOISE_FORMULA: [2], + } + ).set_index([OBSERVABLE_ID]) + + parameter_df = pd.DataFrame( + data={PARAMETER_ID: ["par1", "par2"], NOMINAL_VALUE: [3, 4]} + ).set_index([PARAMETER_ID]) simulation_df = measurement_df.copy(deep=True).rename( - columns={MEASUREMENT: SIMULATION}) + columns={MEASUREMENT: SIMULATION} + ) simulation_df[SIMULATION] = [2, 2] - expected_residuals = {(2-0)/2, (2-1)/2} - expected_residuals_nonorm = {2-0, 2-1} - expected_llh = - 0.5*(np.array(list(expected_residuals))**2).sum() - \ - 0.5*np.log(2*np.pi*np.array([2, 2])**2).sum() + expected_residuals = {(2 - 0) / 2, (2 - 1) / 2} + expected_residuals_nonorm = {2 - 0, 2 - 1} + expected_llh = ( + -0.5 * (np.array(list(expected_residuals)) ** 2).sum() + - 0.5 * np.log(2 * np.pi * np.array([2, 2]) ** 2).sum() + ) - return (measurement_df, observable_df, parameter_df, - simulation_df, expected_residuals, expected_residuals_nonorm, - expected_llh) + return ( + measurement_df, + observable_df, + parameter_df, + simulation_df, + expected_residuals, + expected_residuals_nonorm, + expected_llh, + ) def model_scalings(): """Model with scalings.""" - measurement_df = pd.DataFrame(data={ - OBSERVABLE_ID: ['obs_a', 'obs_a'], - SIMULATION_CONDITION_ID: ['c0', 'c0'], - TIME: [5, 10], - MEASUREMENT: [0.5, 1] - }) - - observable_df = pd.DataFrame(data={ - OBSERVABLE_ID: ['obs_a'], - OBSERVABLE_FORMULA: ['A'], - OBSERVABLE_TRANSFORMATION: [LOG], - NOISE_FORMULA: [2] - }).set_index([OBSERVABLE_ID]) - - parameter_df = pd.DataFrame(data={ - PARAMETER_ID: ['par1', 'par2'], - NOMINAL_VALUE: [3, 4] - }).set_index([PARAMETER_ID]) + measurement_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs_a", "obs_a"], + SIMULATION_CONDITION_ID: ["c0", "c0"], + TIME: [5, 10], + MEASUREMENT: [0.5, 1], + } + ) + + observable_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs_a"], + OBSERVABLE_FORMULA: ["A"], + OBSERVABLE_TRANSFORMATION: [LOG], + NOISE_FORMULA: [2], + } + ).set_index([OBSERVABLE_ID]) + + parameter_df = pd.DataFrame( + data={PARAMETER_ID: ["par1", "par2"], NOMINAL_VALUE: [3, 4]} + ).set_index([PARAMETER_ID]) simulation_df = measurement_df.copy(deep=True).rename( - columns={MEASUREMENT: SIMULATION}) + columns={MEASUREMENT: SIMULATION} + ) simulation_df[SIMULATION] = [2, 3] - expected_residuals = {(np.log(2)-np.log(0.5))/2, (np.log(3)-np.log(1))/2} - expected_residuals_nonorm = {np.log(2)-np.log(0.5), np.log(3)-np.log(1)} - expected_llh = - 0.5*(np.array(list(expected_residuals))**2).sum() - \ - 0.5*np.log(2*np.pi*np.array([2, 2])**2*np.array([0.5, 1])**2).sum() - - return (measurement_df, observable_df, parameter_df, - simulation_df, expected_residuals, expected_residuals_nonorm, - expected_llh) + expected_residuals = { + (np.log(2) - np.log(0.5)) / 2, + (np.log(3) - np.log(1)) / 2, + } + expected_residuals_nonorm = { + np.log(2) - np.log(0.5), + np.log(3) - np.log(1), + } + expected_llh = ( + -0.5 * (np.array(list(expected_residuals)) ** 2).sum() + - 0.5 + * np.log( + 2 * np.pi * np.array([2, 2]) ** 2 * np.array([0.5, 1]) ** 2 + ).sum() + ) + + return ( + measurement_df, + observable_df, + parameter_df, + simulation_df, + expected_residuals, + expected_residuals_nonorm, + expected_llh, + ) def model_non_numeric_overrides(): """Model with non-numeric overrides.""" - measurement_df = pd.DataFrame(data={ - OBSERVABLE_ID: ['obs_a', 'obs_a'], - SIMULATION_CONDITION_ID: ['c0', 'c0'], - TIME: [5, 10], - MEASUREMENT: [0.5, 1], - NOISE_PARAMETERS: ['7;8', '2;par1'] - }) - - observable_df = pd.DataFrame(data={ - OBSERVABLE_ID: ['obs_a'], - OBSERVABLE_FORMULA: ['A'], - OBSERVABLE_TRANSFORMATION: [LOG], - NOISE_FORMULA: ['2*noiseParameter1_obs_a + ' - 'noiseParameter2_obs_a + par2 + obs_a'] - }).set_index([OBSERVABLE_ID]) - - parameter_df = pd.DataFrame(data={ - PARAMETER_ID: ['par1', 'par2'], - NOMINAL_VALUE: [3, 4] - }).set_index([PARAMETER_ID]) + measurement_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs_a", "obs_a"], + SIMULATION_CONDITION_ID: ["c0", "c0"], + TIME: [5, 10], + MEASUREMENT: [0.5, 1], + NOISE_PARAMETERS: ["7;8", "2;par1"], + } + ) + + observable_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs_a"], + OBSERVABLE_FORMULA: ["A"], + OBSERVABLE_TRANSFORMATION: [LOG], + NOISE_FORMULA: [ + "2*noiseParameter1_obs_a + " + "noiseParameter2_obs_a + par2 + obs_a" + ], + } + ).set_index([OBSERVABLE_ID]) + + parameter_df = pd.DataFrame( + data={PARAMETER_ID: ["par1", "par2"], NOMINAL_VALUE: [3, 4]} + ).set_index([PARAMETER_ID]) simulation_df = measurement_df.copy(deep=True).rename( - columns={MEASUREMENT: SIMULATION}) + columns={MEASUREMENT: SIMULATION} + ) simulation_df[SIMULATION] = [2, 3] - expected_residuals = {(np.log(2)-np.log(0.5))/(2*7+8+4+np.log(2)), - (np.log(3)-np.log(1))/(2*2+3+4+np.log(3))} - expected_residuals_nonorm = {np.log(2)-np.log(0.5), np.log(3)-np.log(1)} - expected_llh = - 0.5*(np.array(list(expected_residuals))**2).sum() - \ - 0.5*np.log(2*np.pi*np.array([2*7+8+4+np.log(2), 2*2+3+4+np.log(3)])**2 - * np.array([0.5, 1])**2).sum() - - return (measurement_df, observable_df, parameter_df, - simulation_df, expected_residuals, expected_residuals_nonorm, - expected_llh) + expected_residuals = { + (np.log(2) - np.log(0.5)) / (2 * 7 + 8 + 4 + np.log(2)), + (np.log(3) - np.log(1)) / (2 * 2 + 3 + 4 + np.log(3)), + } + expected_residuals_nonorm = { + np.log(2) - np.log(0.5), + np.log(3) - np.log(1), + } + expected_llh = ( + -0.5 * (np.array(list(expected_residuals)) ** 2).sum() + - 0.5 + * np.log( + 2 + * np.pi + * np.array([2 * 7 + 8 + 4 + np.log(2), 2 * 2 + 3 + 4 + np.log(3)]) + ** 2 + * np.array([0.5, 1]) ** 2 + ).sum() + ) + + return ( + measurement_df, + observable_df, + parameter_df, + simulation_df, + expected_residuals, + expected_residuals_nonorm, + expected_llh, + ) def model_custom_likelihood(): """Model with customized likelihoods.""" - measurement_df = pd.DataFrame(data={ - OBSERVABLE_ID: ['obs_a', 'obs_b'], - SIMULATION_CONDITION_ID: ['c0', 'c0'], - TIME: [5, 10], - MEASUREMENT: [0.5, 2] - }) - - observable_df = pd.DataFrame(data={ - OBSERVABLE_ID: ['obs_a', 'obs_b'], - OBSERVABLE_FORMULA: ['A', 'B'], - OBSERVABLE_TRANSFORMATION: [LOG, LIN], - NOISE_FORMULA: [2, 1.5], - NOISE_DISTRIBUTION: [LAPLACE, LAPLACE] - }).set_index([OBSERVABLE_ID]) - - parameter_df = pd.DataFrame(data={ - PARAMETER_ID: ['par1', 'par2'], - NOMINAL_VALUE: [3, 4] - }).set_index([PARAMETER_ID]) + measurement_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs_a", "obs_b"], + SIMULATION_CONDITION_ID: ["c0", "c0"], + TIME: [5, 10], + MEASUREMENT: [0.5, 2], + } + ) + + observable_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs_a", "obs_b"], + OBSERVABLE_FORMULA: ["A", "B"], + OBSERVABLE_TRANSFORMATION: [LOG, LIN], + NOISE_FORMULA: [2, 1.5], + NOISE_DISTRIBUTION: [LAPLACE, LAPLACE], + } + ).set_index([OBSERVABLE_ID]) + + parameter_df = pd.DataFrame( + data={PARAMETER_ID: ["par1", "par2"], NOMINAL_VALUE: [3, 4]} + ).set_index([PARAMETER_ID]) simulation_df = measurement_df.copy(deep=True).rename( - columns={MEASUREMENT: SIMULATION}) + columns={MEASUREMENT: SIMULATION} + ) simulation_df[SIMULATION] = [2, 3] - expected_residuals = {(np.log(2)-np.log(0.5))/2, (3-2)/1.5} - expected_residuals_nonorm = {np.log(2)-np.log(0.5), 3-2} - expected_llh = - np.abs(list(expected_residuals)).sum() - \ - np.log(2*np.array([2, 1.5])*np.array([0.5, 1])).sum() + expected_residuals = {(np.log(2) - np.log(0.5)) / 2, (3 - 2) / 1.5} + expected_residuals_nonorm = {np.log(2) - np.log(0.5), 3 - 2} + expected_llh = ( + -np.abs(list(expected_residuals)).sum() + - np.log(2 * np.array([2, 1.5]) * np.array([0.5, 1])).sum() + ) - return (measurement_df, observable_df, parameter_df, - simulation_df, expected_residuals, expected_residuals_nonorm, - expected_llh) + return ( + measurement_df, + observable_df, + parameter_df, + simulation_df, + expected_residuals, + expected_residuals_nonorm, + expected_llh, + ) @pytest.fixture def models(): """Test model collection covering different features.""" - return [model_simple(), model_replicates(), - model_scalings(), model_non_numeric_overrides(), - model_custom_likelihood()] + return [ + model_simple(), + model_replicates(), + model_scalings(), + model_non_numeric_overrides(), + model_custom_likelihood(), + ] def test_calculate_residuals(models): # pylint: disable=W0621 """Test calculate.calculate_residuals.""" for i_model, model in enumerate(models): print(f"Model {i_model}") - (measurement_df, observable_df, parameter_df, simulation_df, - expected_residuals, _, _) = model + ( + measurement_df, + observable_df, + parameter_df, + simulation_df, + expected_residuals, + _, + _, + ) = model residual_dfs = calculate_residuals( - measurement_df, simulation_df, observable_df, parameter_df) - assert sorted(residual_dfs[0][RESIDUAL]) == \ - pytest.approx(sorted(expected_residuals)) + measurement_df, simulation_df, observable_df, parameter_df + ) + assert sorted(residual_dfs[0][RESIDUAL]) == pytest.approx( + sorted(expected_residuals) + ) def test_calculate_non_normalized_residuals(models): # pylint: disable=W0621 """Test calculate.calculate_residuals without normalization.""" for i_model, model in enumerate(models): print(f"Model {i_model}") - (measurement_df, observable_df, parameter_df, simulation_df, - _, expected_residuals_nonorm, _) = model + ( + measurement_df, + observable_df, + parameter_df, + simulation_df, + _, + expected_residuals_nonorm, + _, + ) = model residual_dfs = calculate_residuals( - measurement_df, simulation_df, observable_df, parameter_df, - normalize=False) - assert sorted(residual_dfs[0][RESIDUAL]) == \ - pytest.approx(sorted(expected_residuals_nonorm)) + measurement_df, + simulation_df, + observable_df, + parameter_df, + normalize=False, + ) + assert sorted(residual_dfs[0][RESIDUAL]) == pytest.approx( + sorted(expected_residuals_nonorm) + ) def test_calculate_chi2(models): # pylint: disable=W0621 """Test calculate.calculate_chi2.""" for i_model, model in enumerate(models): print(f"Model {i_model}") - (measurement_df, observable_df, parameter_df, simulation_df, - expected_residuals, _, _) = model + ( + measurement_df, + observable_df, + parameter_df, + simulation_df, + expected_residuals, + _, + _, + ) = model chi2 = calculate_chi2( - measurement_df, simulation_df, observable_df, parameter_df) + measurement_df, simulation_df, observable_df, parameter_df + ) - expected = sum(np.array(list(expected_residuals))**2) + expected = sum(np.array(list(expected_residuals)) ** 2) assert chi2 == pytest.approx(expected) @@ -236,10 +361,18 @@ def test_calculate_llh(models): # pylint: disable=W0621 """Test calculate.calculate_llh.""" for i_model, model in enumerate(models): print(f"Model {i_model}") - (measurement_df, observable_df, parameter_df, simulation_df, - _, _, expected_llh) = model + ( + measurement_df, + observable_df, + parameter_df, + simulation_df, + _, + _, + expected_llh, + ) = model llh = calculate_llh( - measurement_df, simulation_df, observable_df, parameter_df) + measurement_df, simulation_df, observable_df, parameter_df + ) assert llh == pytest.approx(expected_llh) or expected_llh is None @@ -248,34 +381,69 @@ def test_calculate_single_llh(): m, s, sigma = 5.3, 4.5, 1.6 pi, log, log10 = np.pi, np.log, np.log10 - llh = calculate_single_llh(measurement=m, simulation=s, noise_value=sigma, - noise_distribution=NORMAL, scale=LIN) - expected_llh = - 0.5 * (((s-m)/sigma)**2 + log(2*pi*sigma**2)) + llh = calculate_single_llh( + measurement=m, + simulation=s, + noise_value=sigma, + noise_distribution=NORMAL, + scale=LIN, + ) + expected_llh = -0.5 * (((s - m) / sigma) ** 2 + log(2 * pi * sigma**2)) assert llh == pytest.approx(expected_llh) - llh = calculate_single_llh(measurement=m, simulation=s, noise_value=sigma, - noise_distribution=NORMAL, scale=LOG) - expected_llh = - 0.5 * (((log(s)-log(m))/sigma)**2 + - log(2*pi*sigma**2*m**2)) + llh = calculate_single_llh( + measurement=m, + simulation=s, + noise_value=sigma, + noise_distribution=NORMAL, + scale=LOG, + ) + expected_llh = -0.5 * ( + ((log(s) - log(m)) / sigma) ** 2 + log(2 * pi * sigma**2 * m**2) + ) assert llh == pytest.approx(expected_llh) - llh = calculate_single_llh(measurement=m, simulation=s, noise_value=sigma, - noise_distribution=NORMAL, scale=LOG10) - expected_llh = - 0.5 * (((log10(s)-log10(m))/sigma)**2 + - log(2*pi*sigma**2*m**2*log(10)**2)) + llh = calculate_single_llh( + measurement=m, + simulation=s, + noise_value=sigma, + noise_distribution=NORMAL, + scale=LOG10, + ) + expected_llh = -0.5 * ( + ((log10(s) - log10(m)) / sigma) ** 2 + + log(2 * pi * sigma**2 * m**2 * log(10) ** 2) + ) assert llh == pytest.approx(expected_llh) - llh = calculate_single_llh(measurement=m, simulation=s, noise_value=sigma, - noise_distribution=LAPLACE, scale=LIN) - expected_llh = - abs((s-m)/sigma) - log(2*sigma) + llh = calculate_single_llh( + measurement=m, + simulation=s, + noise_value=sigma, + noise_distribution=LAPLACE, + scale=LIN, + ) + expected_llh = -abs((s - m) / sigma) - log(2 * sigma) assert llh == pytest.approx(expected_llh) - llh = calculate_single_llh(measurement=m, simulation=s, noise_value=sigma, - noise_distribution=LAPLACE, scale=LOG) - expected_llh = - abs((log(s)-log(m))/sigma) - log(2*sigma*m) + llh = calculate_single_llh( + measurement=m, + simulation=s, + noise_value=sigma, + noise_distribution=LAPLACE, + scale=LOG, + ) + expected_llh = -abs((log(s) - log(m)) / sigma) - log(2 * sigma * m) assert llh == pytest.approx(expected_llh) - llh = calculate_single_llh(measurement=m, simulation=s, noise_value=sigma, - noise_distribution=LAPLACE, scale=LOG10) - expected_llh = - abs((log10(s)-log10(m))/sigma) - log(2*sigma*m*log(10)) + llh = calculate_single_llh( + measurement=m, + simulation=s, + noise_value=sigma, + noise_distribution=LAPLACE, + scale=LOG10, + ) + expected_llh = -abs((log10(s) - log10(m)) / sigma) - log( + 2 * sigma * m * log(10) + ) assert llh == pytest.approx(expected_llh) diff --git a/tests/test_combine.py b/tests/test_combine.py index fb245218..dbe54c90 100644 --- a/tests/test_combine.py +++ b/tests/test_combine.py @@ -3,6 +3,7 @@ from pathlib import Path import pandas as pd + import petab from petab.C import * @@ -17,41 +18,50 @@ def test_combine_archive(): # Create test files import simplesbml + ss_model = simplesbml.SbmlModel() # Create tables with arbitrary content - measurement_df = pd.DataFrame(data={ - OBSERVABLE_ID: ['obs1', 'obs2'], - OBSERVABLE_PARAMETERS: ['', 'p1;p2'], - NOISE_PARAMETERS: ['p3;p4', 'p5'] - }) - - condition_df = pd.DataFrame(data={ - CONDITION_ID: ['condition1', 'condition2'], - CONDITION_NAME: ['', 'Condition 2'], - 'fixedParameter1': [1.0, 2.0] - }) + measurement_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs1", "obs2"], + OBSERVABLE_PARAMETERS: ["", "p1;p2"], + NOISE_PARAMETERS: ["p3;p4", "p5"], + } + ) + + condition_df = pd.DataFrame( + data={ + CONDITION_ID: ["condition1", "condition2"], + CONDITION_NAME: ["", "Condition 2"], + "fixedParameter1": [1.0, 2.0], + } + ) condition_df.set_index(CONDITION_ID, inplace=True) - parameter_df = pd.DataFrame(data={ - PARAMETER_ID: ['dynamicParameter1', 'dynamicParameter2'], - PARAMETER_NAME: ['', '...'], - }) + parameter_df = pd.DataFrame( + data={ + PARAMETER_ID: ["dynamicParameter1", "dynamicParameter2"], + PARAMETER_NAME: ["", "..."], + } + ) parameter_df.set_index(PARAMETER_ID, inplace=True) - observable_df = pd.DataFrame(data={ - OBSERVABLE_ID: ['observable_1'], - OBSERVABLE_FORMULA: ['observable_1'], - NOISE_FORMULA: [1], - }) + observable_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["observable_1"], + OBSERVABLE_FORMULA: ["observable_1"], + NOISE_FORMULA: [1], + } + ) observable_df.set_index(OBSERVABLE_ID, inplace=True) - sbml_file_name = 'model.xml' - measurement_file_name = 'measurements.tsv' - condition_file_name = 'conditions.tsv' - parameter_file_name = 'parameters.tsv' - observable_file_name = 'observables.tsv' - yaml_file_name = 'test.yaml' + sbml_file_name = "model.xml" + measurement_file_name = "measurements.tsv" + condition_file_name = "conditions.tsv" + parameter_file_name = "parameters.tsv" + observable_file_name = "observables.tsv" + yaml_file_name = "test.yaml" yaml_config = { FORMAT_VERSION: petab.__format_version__, @@ -61,31 +71,31 @@ def test_combine_archive(): SBML_FILES: [sbml_file_name], MEASUREMENT_FILES: [measurement_file_name], CONDITION_FILES: [condition_file_name], - OBSERVABLE_FILES: [observable_file_name] + OBSERVABLE_FILES: [observable_file_name], } - ] + ], } - with tempfile.TemporaryDirectory(prefix='petab_test_combine_archive') \ - as tempdir: + with tempfile.TemporaryDirectory( + prefix="petab_test_combine_archive" + ) as tempdir: # Write test data outdir = Path(tempdir) petab.write_sbml(ss_model.document, outdir / sbml_file_name) petab.write_measurement_df( - measurement_df, outdir / measurement_file_name) - petab.write_parameter_df( - parameter_df, outdir / parameter_file_name) - petab.write_observable_df( - observable_df, outdir / observable_file_name) - petab.write_condition_df( - condition_df, outdir / condition_file_name) + measurement_df, outdir / measurement_file_name + ) + petab.write_parameter_df(parameter_df, outdir / parameter_file_name) + petab.write_observable_df(observable_df, outdir / observable_file_name) + petab.write_condition_df(condition_df, outdir / condition_file_name) petab.write_yaml(yaml_config, outdir / yaml_file_name) - archive_file_name = outdir / 'test.omex' + archive_file_name = outdir / "test.omex" # Create COMBINE archive - petab.create_combine_archive(outdir / yaml_file_name, - archive_file_name, family_name="Tester") + petab.create_combine_archive( + outdir / yaml_file_name, archive_file_name, family_name="Tester" + ) # Read COMBINE archive problem = petab.Problem.from_combine(archive_file_name) diff --git a/tests/test_conditions.py b/tests/test_conditions.py index 0e252319..b240241d 100644 --- a/tests/test_conditions.py +++ b/tests/test_conditions.py @@ -5,44 +5,51 @@ import numpy as np import pandas as pd -import petab import pytest + +import petab from petab import conditions from petab.C import * def test_get_parametric_overrides(): - - condition_df = pd.DataFrame(data={ - CONDITION_ID: ['condition1', 'condition2'], - CONDITION_NAME: ['', 'Condition 2'], - 'fixedParameter1': [1.0, 2.0] - }) + condition_df = pd.DataFrame( + data={ + CONDITION_ID: ["condition1", "condition2"], + CONDITION_NAME: ["", "Condition 2"], + "fixedParameter1": [1.0, 2.0], + } + ) assert conditions.get_parametric_overrides(condition_df) == [] - condition_df.fixedParameter1 = \ - condition_df.fixedParameter1.values.astype(int) + condition_df.fixedParameter1 = condition_df.fixedParameter1.values.astype( + int + ) assert conditions.get_parametric_overrides(condition_df) == [] - condition_df['fixedParameter1'] = condition_df['fixedParameter1'].astype("object") - condition_df.loc[0, 'fixedParameter1'] = 'parameterId' + condition_df["fixedParameter1"] = condition_df["fixedParameter1"].astype( + "object" + ) + condition_df.loc[0, "fixedParameter1"] = "parameterId" - assert conditions.get_parametric_overrides(condition_df) == ['parameterId'] + assert conditions.get_parametric_overrides(condition_df) == ["parameterId"] def test_get_condition_df(): """Test conditions.get_condition_df.""" # condition df missing ids - condition_df = pd.DataFrame(data={ - CONDITION_NAME: ['Condition 1', 'Condition 2'], - 'fixedParameter1': [1.0, 2.0] - }) - - with tempfile.NamedTemporaryFile(mode='w', delete=False) as fh: + condition_df = pd.DataFrame( + data={ + CONDITION_NAME: ["Condition 1", "Condition 2"], + "fixedParameter1": [1.0, 2.0], + } + ) + + with tempfile.NamedTemporaryFile(mode="w", delete=False) as fh: file_name = fh.name - condition_df.to_csv(fh, sep='\t', index=False) + condition_df.to_csv(fh, sep="\t", index=False) with pytest.raises(KeyError): petab.get_condition_df(file_name) @@ -50,17 +57,19 @@ def test_get_condition_df(): os.remove(file_name) # with ids - condition_df = pd.DataFrame(data={ - CONDITION_ID: ['condition1', 'condition2'], - CONDITION_NAME: ['', 'Condition 2'], - 'fixedParameter1': [1.0, 2.0] - }) - - with tempfile.NamedTemporaryFile(mode='w', delete=False) as fh: + condition_df = pd.DataFrame( + data={ + CONDITION_ID: ["condition1", "condition2"], + CONDITION_NAME: ["", "Condition 2"], + "fixedParameter1": [1.0, 2.0], + } + ) + + with tempfile.NamedTemporaryFile(mode="w", delete=False) as fh: file_name = fh.name - condition_df.to_csv(fh, sep='\t', index=False) + condition_df.to_csv(fh, sep="\t", index=False) - df = petab.get_condition_df(file_name).replace(np.nan, '') + df = petab.get_condition_df(file_name).replace(np.nan, "") assert (df == condition_df.set_index(CONDITION_ID)).all().all() os.remove(file_name) @@ -72,11 +81,13 @@ def test_get_condition_df(): def test_write_condition_df(): """Test conditions.write_condition_df.""" - condition_df = pd.DataFrame(data={ - CONDITION_ID: ['condition1', 'condition2'], - CONDITION_NAME: ['Condition 1', 'Condition 2'], - 'fixedParameter1': [1.0, 2.0] - }).set_index(CONDITION_ID) + condition_df = pd.DataFrame( + data={ + CONDITION_ID: ["condition1", "condition2"], + CONDITION_NAME: ["Condition 1", "Condition 2"], + "fixedParameter1": [1.0, 2.0], + } + ).set_index(CONDITION_ID) with tempfile.TemporaryDirectory() as temp_dir: file_name = Path(temp_dir) / "conditions.tsv" @@ -87,16 +98,18 @@ def test_write_condition_df(): def test_create_condition_df(): """Test conditions.create_condition_df.""" - parameter_ids = ['par1', 'par2', 'par3'] - condition_ids = ['condition1', 'condition2'] + parameter_ids = ["par1", "par2", "par3"] + condition_ids = ["condition1", "condition2"] df = petab.create_condition_df(parameter_ids, condition_ids) - expected = pd.DataFrame(data={ - CONDITION_ID: ['condition1', 'condition2'], - 'par1': [np.nan, np.nan], - 'par2': [np.nan, np.nan], - 'par3': [np.nan, np.nan] - }).set_index(CONDITION_ID) + expected = pd.DataFrame( + data={ + CONDITION_ID: ["condition1", "condition2"], + "par1": [np.nan, np.nan], + "par2": [np.nan, np.nan], + "par3": [np.nan, np.nan], + } + ).set_index(CONDITION_ID) assert ((df == expected) | df.isna() == expected.isna()).all().all() diff --git a/tests/test_deprecated.py b/tests/test_deprecated.py index 305f6b03..4af41fa3 100644 --- a/tests/test_deprecated.py +++ b/tests/test_deprecated.py @@ -1,19 +1,25 @@ """Check that deprecated functionality raises but still works.""" -import pytest import tempfile from pathlib import Path +import pytest + import petab -from .test_sbml import create_test_data, check_model from .test_petab import petab_problem # noqa: F401 +from .test_sbml import check_model, create_test_data def test_problem_with_sbml_model(): """Test that a problem can be correctly created from sbml model.""" # retrieve test data - ss_model, condition_df, observable_df, measurement_df, parameter_df = \ - create_test_data() + ( + ss_model, + condition_df, + observable_df, + measurement_df, + parameter_df, + ) = create_test_data() with pytest.deprecated_call(): petab_problem = petab.Problem( # noqa: F811 @@ -23,11 +29,14 @@ def test_problem_with_sbml_model(): parameter_df=parameter_df, ) - with pytest.warns(UserWarning, - match="An SBML rule was removed to set the component " - "species_2 to a constant value."): + with pytest.warns( + UserWarning, + match="An SBML rule was removed to set the component " + "species_2 to a constant value.", + ): _, condition_model = petab.get_model_for_condition( - petab_problem, "condition_1") + petab_problem, "condition_1" + ) check_model(condition_model) @@ -57,5 +66,8 @@ def test_to_files_with_sbml_model(petab_problem): # noqa: F811 # exemplarily load some parameter_df = petab.get_parameter_df(parameter_file) same_nans = parameter_df.isna() == petab_problem.parameter_df.isna() - assert ((parameter_df == petab_problem.parameter_df) | same_nans) \ - .all().all() + assert ( + ((parameter_df == petab_problem.parameter_df) | same_nans) + .all() + .all() + ) diff --git a/tests/test_lint.py b/tests/test_lint.py index 7bbf4c83..e6919f14 100644 --- a/tests/test_lint.py +++ b/tests/test_lint.py @@ -19,30 +19,33 @@ def test_assert_measured_observables_present(): # create test model - measurement_df = pd.DataFrame(data={ - OBSERVABLE_ID: ['non-existing1'], - }) + measurement_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["non-existing1"], + } + ) - observable_df = pd.DataFrame(data={ - OBSERVABLE_ID: ['obs1'], - }) + observable_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs1"], + } + ) observable_df.set_index(OBSERVABLE_ID, inplace=True) with pytest.raises(AssertionError): - lint.assert_measured_observables_defined( - measurement_df, observable_df - ) + lint.assert_measured_observables_defined(measurement_df, observable_df) def test_condition_table_is_parameter_free(): - with patch('petab.get_parametric_overrides') \ - as mock_get_parametric_overrides: + with patch( + "petab.get_parametric_overrides" + ) as mock_get_parametric_overrides: mock_get_parametric_overrides.return_value = [] assert lint.condition_table_is_parameter_free(pd.DataFrame()) is True mock_get_parametric_overrides.assert_called_once() mock_get_parametric_overrides.reset_mock() - mock_get_parametric_overrides.return_value = ['p1'] + mock_get_parametric_overrides.return_value = ["p1"] assert lint.condition_table_is_parameter_free(pd.DataFrame()) is False mock_get_parametric_overrides.assert_called_once() @@ -50,211 +53,260 @@ def test_condition_table_is_parameter_free(): def test_measurement_table_has_timepoint_specific_mappings(): # Ensure we fail if we have time-point specific assignments - measurement_df = pd.DataFrame(data={ - OBSERVABLE_ID: ['obs1', 'obs1'], - SIMULATION_CONDITION_ID: ['condition1', 'condition1'], - PREEQUILIBRATION_CONDITION_ID: [nan, nan], - TIME: [1.0, 2.0], - OBSERVABLE_PARAMETERS: ['obsParOverride', ''], - NOISE_PARAMETERS: ['1.0', 1.0] - }) + measurement_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs1", "obs1"], + SIMULATION_CONDITION_ID: ["condition1", "condition1"], + PREEQUILIBRATION_CONDITION_ID: [nan, nan], + TIME: [1.0, 2.0], + OBSERVABLE_PARAMETERS: ["obsParOverride", ""], + NOISE_PARAMETERS: ["1.0", 1.0], + } + ) - assert lint.measurement_table_has_timepoint_specific_mappings( - measurement_df) is True + assert ( + lint.measurement_table_has_timepoint_specific_mappings(measurement_df) + is True + ) # both measurements different anyways - measurement_df.loc[1, OBSERVABLE_ID] = 'obs2' - assert lint.measurement_table_has_timepoint_specific_mappings( - measurement_df) is False + measurement_df.loc[1, OBSERVABLE_ID] = "obs2" + assert ( + lint.measurement_table_has_timepoint_specific_mappings(measurement_df) + is False + ) # mixed numeric string - measurement_df.loc[1, OBSERVABLE_ID] = 'obs1' - measurement_df.loc[1, OBSERVABLE_PARAMETERS] = 'obsParOverride' - assert lint.measurement_table_has_timepoint_specific_mappings( - measurement_df) is False + measurement_df.loc[1, OBSERVABLE_ID] = "obs1" + measurement_df.loc[1, OBSERVABLE_PARAMETERS] = "obsParOverride" + assert ( + lint.measurement_table_has_timepoint_specific_mappings(measurement_df) + is False + ) # different numeric values measurement_df.loc[1, NOISE_PARAMETERS] = 2.0 - assert lint.measurement_table_has_timepoint_specific_mappings( - measurement_df) is True - assert lint.measurement_table_has_timepoint_specific_mappings( - measurement_df, allow_scalar_numeric_noise_parameters=True) is False + assert ( + lint.measurement_table_has_timepoint_specific_mappings(measurement_df) + is True + ) + assert ( + lint.measurement_table_has_timepoint_specific_mappings( + measurement_df, allow_scalar_numeric_noise_parameters=True + ) + is False + ) def test_observable_table_has_nontrivial_noise_formula(): # Ensure we fail if we have nontrivial noise formulas - observable_df = pd.DataFrame(data={ - OBSERVABLE_ID: ['0obsPar1noisePar', '2obsPar0noisePar', - '3obsPar0noisePar'], - OBSERVABLE_FORMULA: ['1.0', - '1.0', - '1.0'], - NOISE_FORMULA: ['noiseParameter1_0obsPar1noisePar + 3.0', - 1e18, - '1e18'] - }) - - assert lint.observable_table_has_nontrivial_noise_formula(observable_df)\ + observable_df = pd.DataFrame( + data={ + OBSERVABLE_ID: [ + "0obsPar1noisePar", + "2obsPar0noisePar", + "3obsPar0noisePar", + ], + OBSERVABLE_FORMULA: ["1.0", "1.0", "1.0"], + NOISE_FORMULA: [ + "noiseParameter1_0obsPar1noisePar + 3.0", + 1e18, + "1e18", + ], + } + ) + + assert ( + lint.observable_table_has_nontrivial_noise_formula(observable_df) is True + ) - observable_df.loc[0, NOISE_FORMULA] = 'sigma1' + observable_df.loc[0, NOISE_FORMULA] = "sigma1" - assert lint.observable_table_has_nontrivial_noise_formula(observable_df) \ + assert ( + lint.observable_table_has_nontrivial_noise_formula(observable_df) is False + ) def test_assert_overrides_match_parameter_count(): # Ensure we recognize and fail if we have wrong number of overrides - observable_df = pd.DataFrame(data={ - OBSERVABLE_ID: ['0obsPar1noisePar', '2obsPar0noisePar'], - OBSERVABLE_FORMULA: ['1.0', - 'observableParameter1_2obsPar0noisePar + ' - 'observableParameter2_2obsPar0noisePar'], - NOISE_FORMULA: ['noiseParameter1_0obsPar1noisePar', '1.0'] - }) + observable_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["0obsPar1noisePar", "2obsPar0noisePar"], + OBSERVABLE_FORMULA: [ + "1.0", + "observableParameter1_2obsPar0noisePar + " + "observableParameter2_2obsPar0noisePar", + ], + NOISE_FORMULA: ["noiseParameter1_0obsPar1noisePar", "1.0"], + } + ) observable_df.set_index(OBSERVABLE_ID, inplace=True) - measurement_df_orig = pd.DataFrame(data={ - OBSERVABLE_ID: ['0obsPar1noisePar', - '2obsPar0noisePar'], - SIMULATION_CONDITION_ID: ['condition1', 'condition1'], - PREEQUILIBRATION_CONDITION_ID: ['', ''], - TIME: [1.0, 2.0], - OBSERVABLE_PARAMETERS: ['', 'override1;override2'], - NOISE_PARAMETERS: ['noiseParOverride', ''] - }) + measurement_df_orig = pd.DataFrame( + data={ + OBSERVABLE_ID: ["0obsPar1noisePar", "2obsPar0noisePar"], + SIMULATION_CONDITION_ID: ["condition1", "condition1"], + PREEQUILIBRATION_CONDITION_ID: ["", ""], + TIME: [1.0, 2.0], + OBSERVABLE_PARAMETERS: ["", "override1;override2"], + NOISE_PARAMETERS: ["noiseParOverride", ""], + } + ) # valid petab.assert_overrides_match_parameter_count( - measurement_df_orig, observable_df) + measurement_df_orig, observable_df + ) # 0 noise parameters given, 1 expected measurement_df = measurement_df_orig.copy() - measurement_df.loc[0, NOISE_PARAMETERS] = '' + measurement_df.loc[0, NOISE_PARAMETERS] = "" with pytest.raises(AssertionError): petab.assert_overrides_match_parameter_count( - measurement_df, observable_df) + measurement_df, observable_df + ) # 2 noise parameters given, 1 expected measurement_df = measurement_df_orig.copy() - measurement_df.loc[0, NOISE_PARAMETERS] = 'noiseParOverride;oneTooMuch' + measurement_df.loc[0, NOISE_PARAMETERS] = "noiseParOverride;oneTooMuch" with pytest.raises(AssertionError): petab.assert_overrides_match_parameter_count( - measurement_df, observable_df) + measurement_df, observable_df + ) # 1 noise parameter given, 0 allowed measurement_df = measurement_df_orig.copy() - measurement_df.loc[1, NOISE_PARAMETERS] = 'oneTooMuch' + measurement_df.loc[1, NOISE_PARAMETERS] = "oneTooMuch" with pytest.raises(AssertionError): petab.assert_overrides_match_parameter_count( - measurement_df, observable_df) + measurement_df, observable_df + ) # 0 observable parameters given, 2 expected measurement_df = measurement_df_orig.copy() - measurement_df.loc[1, OBSERVABLE_PARAMETERS] = '' + measurement_df.loc[1, OBSERVABLE_PARAMETERS] = "" with pytest.raises(AssertionError): petab.assert_overrides_match_parameter_count( - measurement_df, observable_df) + measurement_df, observable_df + ) # 1 observable parameters given, 2 expected measurement_df = measurement_df_orig.copy() - measurement_df.loc[1, OBSERVABLE_PARAMETERS] = 'oneMissing' + measurement_df.loc[1, OBSERVABLE_PARAMETERS] = "oneMissing" with pytest.raises(AssertionError): petab.assert_overrides_match_parameter_count( - measurement_df, observable_df) + measurement_df, observable_df + ) # 3 observable parameters given, 2 expected measurement_df = measurement_df_orig.copy() - measurement_df.loc[1, OBSERVABLE_PARAMETERS] = \ - 'override1;override2;oneTooMuch' + measurement_df.loc[ + 1, OBSERVABLE_PARAMETERS + ] = "override1;override2;oneTooMuch" with pytest.raises(AssertionError): petab.assert_overrides_match_parameter_count( - measurement_df, observable_df) + measurement_df, observable_df + ) # 1 observable parameters given, 0 expected measurement_df = measurement_df_orig.copy() - measurement_df.loc[0, OBSERVABLE_PARAMETERS] = \ - 'oneTooMuch' + measurement_df.loc[0, OBSERVABLE_PARAMETERS] = "oneTooMuch" with pytest.raises(AssertionError): petab.assert_overrides_match_parameter_count( - measurement_df, observable_df) + measurement_df, observable_df + ) def test_assert_no_leading_trailing_whitespace(): - - test_df = pd.DataFrame(data={ - 'testId': ['name1 ', 'name2'], - 'testText ': [' name1', 'name2'], - 'testNumeric': [1.0, 2.0], - 'testNone': None - }) + test_df = pd.DataFrame( + data={ + "testId": ["name1 ", "name2"], + "testText ": [" name1", "name2"], + "testNumeric": [1.0, 2.0], + "testNone": None, + } + ) with pytest.raises(AssertionError): lint.assert_no_leading_trailing_whitespace( - test_df.columns.values, "test") + test_df.columns.values, "test" + ) with pytest.raises(AssertionError): lint.assert_no_leading_trailing_whitespace( - test_df['testId'].values, "testId") + test_df["testId"].values, "testId" + ) with pytest.raises(AssertionError): lint.assert_no_leading_trailing_whitespace( - test_df['testText '].values, "testText") + test_df["testText "].values, "testText" + ) lint.assert_no_leading_trailing_whitespace( - test_df['testNumeric'].values, "testNumeric") + test_df["testNumeric"].values, "testNumeric" + ) lint.assert_no_leading_trailing_whitespace( - test_df['testNone'].values, "testNone") + test_df["testNone"].values, "testNone" + ) def test_assert_model_parameters_in_condition_or_parameter_table(): import simplesbml + from petab.models.sbml_model import SbmlModel + ss_model = simplesbml.SbmlModel() - ss_model.addParameter('parameter1', 0.0) - ss_model.addParameter('noiseParameter1_', 0.0) - ss_model.addParameter('observableParameter1_', 0.0) + ss_model.addParameter("parameter1", 0.0) + ss_model.addParameter("noiseParameter1_", 0.0) + ss_model.addParameter("observableParameter1_", 0.0) sbml_model = SbmlModel(sbml_model=ss_model.model) lint.assert_model_parameters_in_condition_or_parameter_table( - sbml_model, pd.DataFrame(columns=['parameter1']), pd.DataFrame() + sbml_model, pd.DataFrame(columns=["parameter1"]), pd.DataFrame() ) lint.assert_model_parameters_in_condition_or_parameter_table( - sbml_model, pd.DataFrame(), pd.DataFrame(index=['parameter1'])) + sbml_model, pd.DataFrame(), pd.DataFrame(index=["parameter1"]) + ) with pytest.raises(AssertionError): lint.assert_model_parameters_in_condition_or_parameter_table( sbml_model, - pd.DataFrame(columns=['parameter1']), - pd.DataFrame(index=['parameter1'])) + pd.DataFrame(columns=["parameter1"]), + pd.DataFrame(index=["parameter1"]), + ) lint.assert_model_parameters_in_condition_or_parameter_table( - sbml_model, pd.DataFrame(), pd.DataFrame()) + sbml_model, pd.DataFrame(), pd.DataFrame() + ) - ss_model.addAssignmentRule('parameter1', 'parameter2') + ss_model.addAssignmentRule("parameter1", "parameter2") lint.assert_model_parameters_in_condition_or_parameter_table( - sbml_model, pd.DataFrame(), pd.DataFrame()) + sbml_model, pd.DataFrame(), pd.DataFrame() + ) def test_assert_noise_distributions_valid(): - observable_df = pd.DataFrame(data={ - OBSERVABLE_ID: ['0obsPar1noisePar', - '2obsPar0noisePar'], - NOISE_PARAMETERS: ['', ''], - NOISE_DISTRIBUTION: ['', ''], - }) + observable_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["0obsPar1noisePar", "2obsPar0noisePar"], + NOISE_PARAMETERS: ["", ""], + NOISE_DISTRIBUTION: ["", ""], + } + ) observable_df.set_index([OBSERVABLE_ID], inplace=True) lint.assert_noise_distributions_valid(observable_df) observable_df[OBSERVABLE_TRANSFORMATION] = [LIN, LOG] - observable_df[NOISE_DISTRIBUTION] = [NORMAL, ''] + observable_df[NOISE_DISTRIBUTION] = [NORMAL, ""] lint.assert_noise_distributions_valid(observable_df) - observable_df[NOISE_DISTRIBUTION] = ['Normal', ''] + observable_df[NOISE_DISTRIBUTION] = ["Normal", ""] with pytest.raises(ValueError): lint.assert_noise_distributions_valid(observable_df) @@ -264,155 +316,200 @@ def test_assert_noise_distributions_valid(): def test_check_measurement_df(): """Check measurement (and observable) tables""" - observable_df = pd.DataFrame(data={ - OBSERVABLE_ID: ['0obsPar1noisePar', - '2obsPar0noisePar'], - OBSERVABLE_FORMULA: ['', ''], - NOISE_FORMULA: ['', ''], - NOISE_DISTRIBUTION: ['', ''], - }) + observable_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["0obsPar1noisePar", "2obsPar0noisePar"], + OBSERVABLE_FORMULA: ["", ""], + NOISE_FORMULA: ["", ""], + NOISE_DISTRIBUTION: ["", ""], + } + ) observable_df.set_index([OBSERVABLE_ID], inplace=True) - measurement_df = pd.DataFrame(data={ - OBSERVABLE_ID: ['0obsPar1noisePar', - '2obsPar0noisePar'], - SIMULATION_CONDITION_ID: ['condition1', 'condition1'], - PREEQUILIBRATION_CONDITION_ID: ['', ''], - TIME: [1.0, 2.0], - MEASUREMENT: [1.0, 2.0], - OBSERVABLE_PARAMETERS: ['', ''], - NOISE_PARAMETERS: ['', ''], - }) + measurement_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["0obsPar1noisePar", "2obsPar0noisePar"], + SIMULATION_CONDITION_ID: ["condition1", "condition1"], + PREEQUILIBRATION_CONDITION_ID: ["", ""], + TIME: [1.0, 2.0], + MEASUREMENT: [1.0, 2.0], + OBSERVABLE_PARAMETERS: ["", ""], + NOISE_PARAMETERS: ["", ""], + } + ) lint.check_measurement_df(measurement_df, observable_df) # Incompatible measurement and transformation - observable_df[OBSERVABLE_TRANSFORMATION] = [LOG, ''] + observable_df[OBSERVABLE_TRANSFORMATION] = [LOG, ""] measurement_df[MEASUREMENT] = [-1.0, 0.0] with pytest.raises(ValueError): lint.check_measurement_df(measurement_df, observable_df) def test_check_parameter_bounds(): - lint.check_parameter_bounds(pd.DataFrame( - {LOWER_BOUND: [1], UPPER_BOUND: [2], ESTIMATE: [1]})) + lint.check_parameter_bounds( + pd.DataFrame({LOWER_BOUND: [1], UPPER_BOUND: [2], ESTIMATE: [1]}) + ) with pytest.raises(AssertionError): - lint.check_parameter_bounds(pd.DataFrame( - {LOWER_BOUND: [3], UPPER_BOUND: [2], ESTIMATE: [1]})) + lint.check_parameter_bounds( + pd.DataFrame({LOWER_BOUND: [3], UPPER_BOUND: [2], ESTIMATE: [1]}) + ) with pytest.raises(AssertionError): - lint.check_parameter_bounds(pd.DataFrame( - {LOWER_BOUND: [-1], UPPER_BOUND: [2], - ESTIMATE: [1], PARAMETER_SCALE: [LOG10]})) + lint.check_parameter_bounds( + pd.DataFrame( + { + LOWER_BOUND: [-1], + UPPER_BOUND: [2], + ESTIMATE: [1], + PARAMETER_SCALE: [LOG10], + } + ) + ) with pytest.raises(AssertionError): - lint.check_parameter_bounds(pd.DataFrame( - {LOWER_BOUND: [-1], UPPER_BOUND: [2], - ESTIMATE: [1], PARAMETER_SCALE: [LOG]})) + lint.check_parameter_bounds( + pd.DataFrame( + { + LOWER_BOUND: [-1], + UPPER_BOUND: [2], + ESTIMATE: [1], + PARAMETER_SCALE: [LOG], + } + ) + ) def test_assert_parameter_prior_type_is_valid(): """Check lint.assert_parameter_prior_type_is_valid.""" - lint.assert_parameter_prior_type_is_valid(pd.DataFrame( - {INITIALIZATION_PRIOR_TYPE: [UNIFORM, LAPLACE, ''], - OBJECTIVE_PRIOR_TYPE: [NORMAL, LOG_NORMAL, '']})) + lint.assert_parameter_prior_type_is_valid( + pd.DataFrame( + { + INITIALIZATION_PRIOR_TYPE: [UNIFORM, LAPLACE, ""], + OBJECTIVE_PRIOR_TYPE: [NORMAL, LOG_NORMAL, ""], + } + ) + ) lint.assert_parameter_prior_type_is_valid(pd.DataFrame()) with pytest.raises(AssertionError): - lint.assert_parameter_prior_type_is_valid(pd.DataFrame( - {INITIALIZATION_PRIOR_TYPE: ['normel']})) + lint.assert_parameter_prior_type_is_valid( + pd.DataFrame({INITIALIZATION_PRIOR_TYPE: ["normel"]}) + ) def test_assert_parameter_prior_parameters_are_valid(): """Check lint.assert_parameter_prior_parameters_are_valid.""" - parameter_df = pd.DataFrame({ - INITIALIZATION_PRIOR_TYPE: [UNIFORM, '', ''], - INITIALIZATION_PRIOR_PARAMETERS: ['0;1', '10;20', ''], - OBJECTIVE_PRIOR_PARAMETERS: ['0;20', '10;20', ''] - }) + parameter_df = pd.DataFrame( + { + INITIALIZATION_PRIOR_TYPE: [UNIFORM, "", ""], + INITIALIZATION_PRIOR_PARAMETERS: ["0;1", "10;20", ""], + OBJECTIVE_PRIOR_PARAMETERS: ["0;20", "10;20", ""], + } + ) lint.assert_parameter_prior_parameters_are_valid(parameter_df) with pytest.raises(AssertionError): - lint.assert_parameter_prior_parameters_are_valid(pd.DataFrame( - {INITIALIZATION_PRIOR_TYPE: [NORMAL]})) + lint.assert_parameter_prior_parameters_are_valid( + pd.DataFrame({INITIALIZATION_PRIOR_TYPE: [NORMAL]}) + ) with pytest.raises(AssertionError): - lint.assert_parameter_prior_parameters_are_valid(pd.DataFrame( - {OBJECTIVE_PRIOR_PARAMETERS: ['0;1;2']})) + lint.assert_parameter_prior_parameters_are_valid( + pd.DataFrame({OBJECTIVE_PRIOR_PARAMETERS: ["0;1;2"]}) + ) def test_petablint_succeeds(): """Run petablint and ensure we exit successfully for a file that should contain no errors""" - dir_isensee = '../doc/example/example_Isensee/' - dir_fujita = '../doc/example/example_Fujita/' + dir_isensee = "../doc/example/example_Isensee/" + dir_fujita = "../doc/example/example_Fujita/" # run with measurement file script_path = os.path.abspath(os.path.dirname(__file__)) measurement_file = os.path.join( - script_path, dir_isensee, 'Isensee_measurementData.tsv') - result = subprocess.run(['petablint', '-m', measurement_file]) + script_path, dir_isensee, "Isensee_measurementData.tsv" + ) + result = subprocess.run(["petablint", "-m", measurement_file]) assert result.returncode == 0 # run with yaml - yaml_file = os.path.join(script_path, dir_fujita, 'Fujita.yaml') - result = subprocess.run(['petablint', '-v', '-y', yaml_file]) + yaml_file = os.path.join(script_path, dir_fujita, "Fujita.yaml") + result = subprocess.run(["petablint", "-v", "-y", yaml_file]) assert result.returncode == 0 parameter_file = os.path.join( - script_path, dir_fujita, 'Fujita_parameters.tsv') - result = subprocess.run(['petablint', '-v', '-p', parameter_file]) + script_path, dir_fujita, "Fujita_parameters.tsv" + ) + result = subprocess.run(["petablint", "-v", "-p", parameter_file]) assert result.returncode == 0 def test_assert_measurement_conditions_present_in_condition_table(): - condition_df = pd.DataFrame(data={ - CONDITION_ID: ['condition1', 'condition2'], - CONDITION_NAME: ['', 'Condition 2'], - 'fixedParameter1': [1.0, 2.0] - }) + condition_df = pd.DataFrame( + data={ + CONDITION_ID: ["condition1", "condition2"], + CONDITION_NAME: ["", "Condition 2"], + "fixedParameter1": [1.0, 2.0], + } + ) condition_df.set_index(CONDITION_ID, inplace=True) - measurement_df = pd.DataFrame(data={ - OBSERVABLE_ID: ['', ''], - SIMULATION_CONDITION_ID: ['condition1', 'condition1'], - TIME: [1.0, 2.0], - MEASUREMENT: [1.0, 2.0], - OBSERVABLE_PARAMETERS: ['', ''], - NOISE_PARAMETERS: ['', ''], - }) + measurement_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["", ""], + SIMULATION_CONDITION_ID: ["condition1", "condition1"], + TIME: [1.0, 2.0], + MEASUREMENT: [1.0, 2.0], + OBSERVABLE_PARAMETERS: ["", ""], + NOISE_PARAMETERS: ["", ""], + } + ) # check we can handle missing preeq condition lint.assert_measurement_conditions_present_in_condition_table( - measurement_df=measurement_df, condition_df=condition_df) + measurement_df=measurement_df, condition_df=condition_df + ) # check we can handle preeq condition - measurement_df[PREEQUILIBRATION_CONDITION_ID] = ['condition1', - 'condition2'] + measurement_df[PREEQUILIBRATION_CONDITION_ID] = [ + "condition1", + "condition2", + ] lint.assert_measurement_conditions_present_in_condition_table( - measurement_df=measurement_df, condition_df=condition_df) + measurement_df=measurement_df, condition_df=condition_df + ) # check we detect missing condition - measurement_df[PREEQUILIBRATION_CONDITION_ID] = ['missing_condition1', - 'missing_condition2'] + measurement_df[PREEQUILIBRATION_CONDITION_ID] = [ + "missing_condition1", + "missing_condition2", + ] with pytest.raises(AssertionError): lint.assert_measurement_conditions_present_in_condition_table( - measurement_df=measurement_df, condition_df=condition_df) + measurement_df=measurement_df, condition_df=condition_df + ) def test_check_condition_df(): """Check that we correctly detect errors in condition table""" import simplesbml + from petab.models.sbml_model import SbmlModel + ss_model = simplesbml.SbmlModel() model = SbmlModel(sbml_model=ss_model.model) - condition_df = pd.DataFrame(data={ - CONDITION_ID: ['condition1'], - 'p1': [nan], - }) + condition_df = pd.DataFrame( + data={ + CONDITION_ID: ["condition1"], + "p1": [nan], + } + ) condition_df.set_index(CONDITION_ID, inplace=True) # parameter missing in model @@ -420,18 +517,20 @@ def test_check_condition_df(): lint.check_condition_df(condition_df, model) # fix by adding output parameter - observable_df = pd.DataFrame({ - OBSERVABLE_ID: ["obs1"], - OBSERVABLE_FORMULA: ["p1"], - }) + observable_df = pd.DataFrame( + { + OBSERVABLE_ID: ["obs1"], + OBSERVABLE_FORMULA: ["p1"], + } + ) lint.check_condition_df(condition_df, model, observable_df) # fix by adding parameter - ss_model.addParameter('p1', 1.0) + ss_model.addParameter("p1", 1.0) lint.check_condition_df(condition_df, model) # species missing in model - condition_df['s1'] = [3.0] + condition_df["s1"] = [3.0] with pytest.raises(AssertionError): lint.check_condition_df(condition_df, model) @@ -440,40 +539,42 @@ def test_check_condition_df(): lint.check_condition_df(condition_df, model) # compartment missing in model - condition_df['c2'] = [4.0] + condition_df["c2"] = [4.0] with pytest.raises(AssertionError): lint.check_condition_df(condition_df, model) # fix: - ss_model.addCompartment(comp_id='c2', vol=1.0) + ss_model.addCompartment(comp_id="c2", vol=1.0) lint.check_condition_df(condition_df, model) def test_check_ids(): """Test check_ids""" - lint.check_ids(['a1', '_1']) + lint.check_ids(["a1", "_1"]) with pytest.raises(ValueError): - lint.check_ids(['1']) + lint.check_ids(["1"]) def test_check_parameter_df(): """Check lint.check_parameter_df.""" - parameter_df = pd.DataFrame({ - PARAMETER_ID: ['par0', 'par1', 'par2'], - PARAMETER_SCALE: [LOG10, LOG10, LIN], - NOMINAL_VALUE: [1e-2, 1e-3, 1e-4], - ESTIMATE: [1, 1, 0], - LOWER_BOUND: [1e-5, 1e-6, 1e-7], - UPPER_BOUND: [1e5, 1e6, 1e7] - }).set_index(PARAMETER_ID) + parameter_df = pd.DataFrame( + { + PARAMETER_ID: ["par0", "par1", "par2"], + PARAMETER_SCALE: [LOG10, LOG10, LIN], + NOMINAL_VALUE: [1e-2, 1e-3, 1e-4], + ESTIMATE: [1, 1, 0], + LOWER_BOUND: [1e-5, 1e-6, 1e-7], + UPPER_BOUND: [1e5, 1e6, 1e7], + } + ).set_index(PARAMETER_ID) parameter_df[NOMINAL_VALUE] = parameter_df[NOMINAL_VALUE].astype("object") lint.check_parameter_df(df=parameter_df) # NOMINAL_VALUE empty, for non-estimated parameter - parameter_df.loc['par2', NOMINAL_VALUE] = "" + parameter_df.loc["par2", NOMINAL_VALUE] = "" with pytest.raises(AssertionError): lint.check_parameter_df(df=parameter_df) @@ -486,55 +587,60 @@ def test_check_parameter_df(): def test_check_observable_df(): """Check that we correctly detect errors in observable table""" - observable_df = pd.DataFrame(data={ - OBSERVABLE_ID: ['obs1', 'obs2'], - OBSERVABLE_FORMULA: ['x1', 'x2'], - NOISE_FORMULA: ['sigma1', 'sigma2'] - }).set_index(OBSERVABLE_ID) + observable_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs1", "obs2"], + OBSERVABLE_FORMULA: ["x1", "x2"], + NOISE_FORMULA: ["sigma1", "sigma2"], + } + ).set_index(OBSERVABLE_ID) lint.check_observable_df(observable_df) # Check that duplicated observables ids are detected bad_observable_df = observable_df.copy() - bad_observable_df.index = ['obs1', 'obs1'] + bad_observable_df.index = ["obs1", "obs1"] with pytest.raises(AssertionError): lint.check_observable_df(bad_observable_df) # Check that missing noiseFormula is detected bad_observable_df = observable_df.copy() - bad_observable_df.loc['obs1', NOISE_FORMULA] = nan + bad_observable_df.loc["obs1", NOISE_FORMULA] = nan with pytest.raises(AssertionError): lint.check_observable_df(bad_observable_df) def test_condition_ids_are_unique(): - condition_df = pd.DataFrame(data={ - CONDITION_ID: ['condition1', 'condition1'], - 'parameter1': [1.0, 2.0] - }) + condition_df = pd.DataFrame( + data={ + CONDITION_ID: ["condition1", "condition1"], + "parameter1": [1.0, 2.0], + } + ) condition_df.set_index(CONDITION_ID, inplace=True) with pytest.raises(AssertionError): lint.check_condition_df(condition_df) - condition_df.index = ['condition0', 'condition1'] - condition_df.index.name = 'conditionId' + condition_df.index = ["condition0", "condition1"] + condition_df.index.name = "conditionId" lint.check_condition_df(condition_df) def test_parameter_ids_are_unique(): - parameter_df = pd.DataFrame({ - PARAMETER_ID: ['par0', 'par0'], - PARAMETER_SCALE: [LIN, LIN], - ESTIMATE: [1, 1], - LOWER_BOUND: [1e-5, 1e-6], - UPPER_BOUND: [1e5, 1e6] - - }).set_index(PARAMETER_ID) + parameter_df = pd.DataFrame( + { + PARAMETER_ID: ["par0", "par0"], + PARAMETER_SCALE: [LIN, LIN], + ESTIMATE: [1, 1], + LOWER_BOUND: [1e-5, 1e-6], + UPPER_BOUND: [1e5, 1e6], + } + ).set_index(PARAMETER_ID) with pytest.raises(AssertionError): lint.check_parameter_df(parameter_df) - parameter_df.index = ['par0', 'par1'] - parameter_df.index.name = 'parameterId' + parameter_df.index = ["par0", "par1"] + parameter_df.index.name = "parameterId" lint.check_parameter_df(parameter_df) diff --git a/tests/test_mapping.py b/tests/test_mapping.py index 8a410a81..4eaaaeb2 100644 --- a/tests/test_mapping.py +++ b/tests/test_mapping.py @@ -1,28 +1,34 @@ """Tests related to petab.mapping""" -import pandas as pd -from petab.mapping import * -from petab.C import * # noqa: F403 import tempfile + +import pandas as pd import pytest +from petab.C import * # noqa: F403 +from petab.mapping import * + def test_get_mapping_df(): """Test parameters.get_mapping_df.""" # Missing columns - mapping_df = pd.DataFrame(data={ - PETAB_ENTITY_ID: ['e1'], - }) + mapping_df = pd.DataFrame( + data={ + PETAB_ENTITY_ID: ["e1"], + } + ) with pytest.raises(KeyError): get_mapping_df(mapping_df) # check index is correct - mapping_df = pd.DataFrame(data={ - PETAB_ENTITY_ID: ['e1'], - MODEL_ENTITY_ID: ['m1'], - }) - with tempfile.NamedTemporaryFile(mode='w', delete=False) as fh: + mapping_df = pd.DataFrame( + data={ + PETAB_ENTITY_ID: ["e1"], + MODEL_ENTITY_ID: ["m1"], + } + ) + with tempfile.NamedTemporaryFile(mode="w", delete=False) as fh: file_name = fh.name write_mapping_df(mapping_df, file_name) - assert get_mapping_df(file_name).index == ['e1'] + assert get_mapping_df(file_name).index == ["e1"] diff --git a/tests/test_measurements.py b/tests/test_measurements.py index fe74933c..ba6a8495 100644 --- a/tests/test_measurements.py +++ b/tests/test_measurements.py @@ -4,43 +4,51 @@ import numpy as np import pandas as pd + import petab from petab.C import * def test_get_measurement_df(): """Test measurements.get_measurement_df.""" - measurement_df = pd.DataFrame(data={ - OBSERVABLE_ID: ['obs1', 'obs2'], - OBSERVABLE_PARAMETERS: ['', 'p1;p2'], - NOISE_PARAMETERS: ['p3;p4', 'p5'] - }) - - with tempfile.NamedTemporaryFile(mode='w', delete=False) as fh: + measurement_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs1", "obs2"], + OBSERVABLE_PARAMETERS: ["", "p1;p2"], + NOISE_PARAMETERS: ["p3;p4", "p5"], + } + ) + + with tempfile.NamedTemporaryFile(mode="w", delete=False) as fh: file_name = fh.name - measurement_df.to_csv(fh, sep='\t', index=False) + measurement_df.to_csv(fh, sep="\t", index=False) - df = petab.get_measurement_df(file_name).replace(np.nan, '') + df = petab.get_measurement_df(file_name).replace(np.nan, "") assert (df == measurement_df).all().all() # test other arguments - assert (petab.get_measurement_df(measurement_df) == measurement_df) \ - .all().all() + assert ( + (petab.get_measurement_df(measurement_df) == measurement_df) + .all() + .all() + ) assert petab.get_measurement_df(None) is None def test_write_measurement_df(): """Test measurements.get_measurement_df.""" - measurement_df = pd.DataFrame(data={ - OBSERVABLE_ID: ['obs1', 'obs2'], - OBSERVABLE_PARAMETERS: ['', 'p1;p2'], - NOISE_PARAMETERS: ['p3;p4', 'p5'] - }) + measurement_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs1", "obs2"], + OBSERVABLE_PARAMETERS: ["", "p1;p2"], + NOISE_PARAMETERS: ["p3;p4", "p5"], + } + ) with tempfile.TemporaryDirectory() as temp_dir: file_name = Path(temp_dir) / "parameters.tsv" petab.write_measurement_df(measurement_df, file_name) - re_df = petab.get_measurement_df(file_name).replace(np.nan, '') + re_df = petab.get_measurement_df(file_name).replace(np.nan, "") assert (measurement_df == re_df).all().all() @@ -52,13 +60,15 @@ def test_create_measurement_df(): def test_measurements_have_replicates(): """Test measurements.measurements_have_replicates.""" - measurement_df = pd.DataFrame(data={ - OBSERVABLE_ID: ['obs1', 'obs1'], - OBSERVABLE_PARAMETERS: ['', 'p1;p2'], - NOISE_PARAMETERS: ['p3;p4', 'p5'], - TIME: [0, 1], - MEASUREMENT: [42, 137.01] - }) + measurement_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs1", "obs1"], + OBSERVABLE_PARAMETERS: ["", "p1;p2"], + NOISE_PARAMETERS: ["p3;p4", "p5"], + TIME: [0, 1], + MEASUREMENT: [42, 137.01], + } + ) assert not petab.measurements_have_replicates(measurement_df) measurement_df[TIME] = [1, 1] @@ -69,49 +79,69 @@ def test_get_simulation_conditions(): """Test get_simulation_conditions""" # only simulation condition - measurement_df = pd.DataFrame(data={ - SIMULATION_CONDITION_ID: ['c0', 'c1', 'c0', 'c1'], - }) - expected = pd.DataFrame(data={ - SIMULATION_CONDITION_ID: ['c0', 'c1'], - }) + measurement_df = pd.DataFrame( + data={ + SIMULATION_CONDITION_ID: ["c0", "c1", "c0", "c1"], + } + ) + expected = pd.DataFrame( + data={ + SIMULATION_CONDITION_ID: ["c0", "c1"], + } + ) actual = petab.get_simulation_conditions(measurement_df) assert actual.equals(expected) # simulation and preequilibration condition - measurement_df = pd.DataFrame(data={ - SIMULATION_CONDITION_ID: ['c0', 'c1', 'c0', 'c1'], - PREEQUILIBRATION_CONDITION_ID: ['c1', 'c0', 'c1', 'c0'], - }) - expected = pd.DataFrame(data={ - SIMULATION_CONDITION_ID: ['c0', 'c1'], - PREEQUILIBRATION_CONDITION_ID: ['c1', 'c0'], - }) + measurement_df = pd.DataFrame( + data={ + SIMULATION_CONDITION_ID: ["c0", "c1", "c0", "c1"], + PREEQUILIBRATION_CONDITION_ID: ["c1", "c0", "c1", "c0"], + } + ) + expected = pd.DataFrame( + data={ + SIMULATION_CONDITION_ID: ["c0", "c1"], + PREEQUILIBRATION_CONDITION_ID: ["c1", "c0"], + } + ) actual = petab.get_simulation_conditions(measurement_df) assert actual.equals(expected) # simulation with and without preequilibration - measurement_df = pd.DataFrame(data={ - SIMULATION_CONDITION_ID: ['c0', 'c1', 'c0', 'c1'], - PREEQUILIBRATION_CONDITION_ID: ['', '', 'c1', 'c0'], - }) - expected = pd.DataFrame(data={ - SIMULATION_CONDITION_ID: ['c0', 'c1', 'c0', 'c1'], - PREEQUILIBRATION_CONDITION_ID: ['', '', 'c1', 'c0'], - }).sort_values([SIMULATION_CONDITION_ID, PREEQUILIBRATION_CONDITION_ID], - ignore_index=True) + measurement_df = pd.DataFrame( + data={ + SIMULATION_CONDITION_ID: ["c0", "c1", "c0", "c1"], + PREEQUILIBRATION_CONDITION_ID: ["", "", "c1", "c0"], + } + ) + expected = pd.DataFrame( + data={ + SIMULATION_CONDITION_ID: ["c0", "c1", "c0", "c1"], + PREEQUILIBRATION_CONDITION_ID: ["", "", "c1", "c0"], + } + ).sort_values( + [SIMULATION_CONDITION_ID, PREEQUILIBRATION_CONDITION_ID], + ignore_index=True, + ) actual = petab.get_simulation_conditions(measurement_df) assert actual.equals(expected) # simulation with and without preequilibration; NaNs - measurement_df = pd.DataFrame(data={ - SIMULATION_CONDITION_ID: ['c0', 'c1', 'c0', 'c1'], - PREEQUILIBRATION_CONDITION_ID: [np.nan, np.nan, 'c1', 'c0'], - }) - expected = pd.DataFrame(data={ - SIMULATION_CONDITION_ID: ['c0', 'c1', 'c0', 'c1'], - PREEQUILIBRATION_CONDITION_ID: ['', '', 'c1', 'c0'], - }).sort_values([SIMULATION_CONDITION_ID, PREEQUILIBRATION_CONDITION_ID], - ignore_index=True) + measurement_df = pd.DataFrame( + data={ + SIMULATION_CONDITION_ID: ["c0", "c1", "c0", "c1"], + PREEQUILIBRATION_CONDITION_ID: [np.nan, np.nan, "c1", "c0"], + } + ) + expected = pd.DataFrame( + data={ + SIMULATION_CONDITION_ID: ["c0", "c1", "c0", "c1"], + PREEQUILIBRATION_CONDITION_ID: ["", "", "c1", "c0"], + } + ).sort_values( + [SIMULATION_CONDITION_ID, PREEQUILIBRATION_CONDITION_ID], + ignore_index=True, + ) actual = petab.get_simulation_conditions(measurement_df) assert actual.equals(expected) diff --git a/tests/test_model_pysb.py b/tests/test_model_pysb.py index 89d3aacf..972b3e25 100644 --- a/tests/test_model_pysb.py +++ b/tests/test_model_pysb.py @@ -2,8 +2,11 @@ import pysb import pytest -from petab.models.pysb_model import PySBModel, parse_species_name, \ - pattern_from_string +from petab.models.pysb_model import ( + PySBModel, + parse_species_name, + pattern_from_string, +) @pytest.fixture(scope="function") @@ -15,29 +18,32 @@ def uses_pysb(): def test_parse_species_name(): - assert parse_species_name("cyclin(Y='U', b=None)") \ - == [("cyclin", None, {'Y': 'U', 'b': None})] + assert parse_species_name("cyclin(Y='U', b=None)") == [ + ("cyclin", None, {"Y": "U", "b": None}) + ] - assert parse_species_name("cdc2(Y='P', b=1) % cyclin(Y='P', b=1)") \ - == [("cdc2", None, {'Y': 'P', 'b': 1}), - ("cyclin", None, {'Y': 'P', 'b': 1})] + assert parse_species_name("cdc2(Y='P', b=1) % cyclin(Y='P', b=1)") == [ + ("cdc2", None, {"Y": "P", "b": 1}), + ("cyclin", None, {"Y": "P", "b": 1}), + ] - assert parse_species_name("A()") \ - == [("A", None, {})] + assert parse_species_name("A()") == [("A", None, {})] assert parse_species_name( - 'Bax(s1=1, s2=2, t=None) % Bax(s1=3, s2=1, t=None) % ' - 'Bax(s1=2, s2=3, t=None)') \ - == [("Bax", None, {'s1': 1, 's2': 2, 't': None}), - ("Bax", None, {'s1': 3, 's2': 1, 't': None}), - ("Bax", None, {'s1': 2, 's2': 3, 't': None})] + "Bax(s1=1, s2=2, t=None) % Bax(s1=3, s2=1, t=None) % " + "Bax(s1=2, s2=3, t=None)" + ) == [ + ("Bax", None, {"s1": 1, "s2": 2, "t": None}), + ("Bax", None, {"s1": 3, "s2": 1, "t": None}), + ("Bax", None, {"s1": 2, "s2": 3, "t": None}), + ] - assert parse_species_name('A(b=None) ** X') \ - == [("A", "X", {'b': None})] + assert parse_species_name("A(b=None) ** X") == [("A", "X", {"b": None})] - assert parse_species_name('A(b=1) ** X % B(a=1) ** X') \ - == [("A", "X", {'b': 1}), - ("B", "X", {'a': 1})] + assert parse_species_name("A(b=1) ** X % B(a=1) ** X") == [ + ("A", "X", {"b": 1}), + ("B", "X", {"a": 1}), + ] # TODO: MultiState @@ -46,7 +52,7 @@ def test_pysb_model(uses_pysb): model = pysb.Model() pysb.Compartment("c1") pysb.Monomer("A") - pysb.Monomer("B", ["s"], {'s': ["a", "b"]}) + pysb.Monomer("B", ["s"], {"s": ["a", "b"]}) petab_model = PySBModel(model=model, model_id="test_model") assert petab_model.is_state_variable("A()") is True @@ -72,12 +78,12 @@ def test_pattern_parsing(uses_pysb): model = pysb.Model() c1 = pysb.Compartment("c1") A = pysb.Monomer("A") - B = pysb.Monomer("B", ["s"], {'s': ["a", "b"]}) + B = pysb.Monomer("B", ["s"], {"s": ["a", "b"]}) pattern = pysb.as_complex_pattern(A() ** c1) assert pattern_from_string(str(pattern), model).matches(pattern) assert str(pattern) == str(pattern_from_string("A() ** c1", model)) - pattern = pysb.as_complex_pattern(B(s='a') ** c1) + pattern = pysb.as_complex_pattern(B(s="a") ** c1) assert pattern_from_string(str(pattern), model).matches(pattern) assert str(pattern) == str(pattern_from_string("B(s='a') ** c1", model)) diff --git a/tests/test_observables.py b/tests/test_observables.py index 83445e1d..06da3950 100644 --- a/tests/test_observables.py +++ b/tests/test_observables.py @@ -3,8 +3,9 @@ from pathlib import Path import pandas as pd -import petab import pytest + +import petab from petab.C import * # import fixtures @@ -16,43 +17,48 @@ def test_get_observable_df(): """Test measurements.get_measurement_df.""" # without id - observable_df = pd.DataFrame(data={ - OBSERVABLE_NAME: ['observable name 1'], - OBSERVABLE_FORMULA: ['observable_1'], - NOISE_FORMULA: [1], - }) - - with tempfile.NamedTemporaryFile(mode='w', delete=False) as fh: + observable_df = pd.DataFrame( + data={ + OBSERVABLE_NAME: ["observable name 1"], + OBSERVABLE_FORMULA: ["observable_1"], + NOISE_FORMULA: [1], + } + ) + + with tempfile.NamedTemporaryFile(mode="w", delete=False) as fh: file_name = fh.name - observable_df.to_csv(fh, sep='\t', index=False) + observable_df.to_csv(fh, sep="\t", index=False) with pytest.raises(KeyError): petab.get_observable_df(file_name) # with id - observable_df[OBSERVABLE_ID] = ['observable_1'] + observable_df[OBSERVABLE_ID] = ["observable_1"] - with tempfile.NamedTemporaryFile(mode='w', delete=False) as fh: + with tempfile.NamedTemporaryFile(mode="w", delete=False) as fh: file_name = fh.name - observable_df.to_csv(fh, sep='\t', index=False) + observable_df.to_csv(fh, sep="\t", index=False) df = petab.get_observable_df(file_name) assert (df == observable_df.set_index(OBSERVABLE_ID)).all().all() # test other arguments - assert (petab.get_observable_df(observable_df) == observable_df) \ - .all().all() + assert ( + (petab.get_observable_df(observable_df) == observable_df).all().all() + ) assert petab.get_observable_df(None) is None def test_write_observable_df(): """Test measurements.get_measurement_df.""" - observable_df = pd.DataFrame(data={ - OBSERVABLE_ID: ['observable_1'], - OBSERVABLE_NAME: ['observable name 1'], - OBSERVABLE_FORMULA: ['observable_1'], - NOISE_FORMULA: [1], - }).set_index(OBSERVABLE_ID) + observable_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["observable_1"], + OBSERVABLE_NAME: ["observable name 1"], + OBSERVABLE_FORMULA: ["observable_1"], + NOISE_FORMULA: [1], + } + ).set_index(OBSERVABLE_ID) with tempfile.TemporaryDirectory() as temp_dir: file_name = Path(temp_dir) / "observables.tsv" @@ -65,78 +71,88 @@ def test_get_output_parameters(): """Test measurements.get_output_parameters.""" # sbml model import simplesbml + from petab.models.sbml_model import SbmlModel + ss_model = simplesbml.SbmlModel() - ss_model.addParameter('fixedParameter1', 1.0) - ss_model.addParameter('observable_1', 1.0) + ss_model.addParameter("fixedParameter1", 1.0) + ss_model.addParameter("observable_1", 1.0) # observable file - observable_df = pd.DataFrame(data={ - OBSERVABLE_ID: ['observable_1'], - OBSERVABLE_NAME: ['observable name 1'], - OBSERVABLE_FORMULA: ['observable_1 * scaling + offset'], - NOISE_FORMULA: [1], - }).set_index(OBSERVABLE_ID) + observable_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["observable_1"], + OBSERVABLE_NAME: ["observable name 1"], + OBSERVABLE_FORMULA: ["observable_1 * scaling + offset"], + NOISE_FORMULA: [1], + } + ).set_index(OBSERVABLE_ID) output_parameters = petab.get_output_parameters( - observable_df, SbmlModel(sbml_model=ss_model.model)) + observable_df, SbmlModel(sbml_model=ss_model.model) + ) - assert output_parameters == ['offset', 'scaling'] + assert output_parameters == ["offset", "scaling"] # test sympy-special symbols (e.g. N, beta, ...) # see https://github.com/ICB-DCM/pyPESTO/issues/1048 - observable_df = pd.DataFrame(data={ - OBSERVABLE_ID: ['observable_1'], - OBSERVABLE_NAME: ['observable name 1'], - OBSERVABLE_FORMULA: ['observable_1 * N + beta'], - NOISE_FORMULA: [1], - }).set_index(OBSERVABLE_ID) + observable_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["observable_1"], + OBSERVABLE_NAME: ["observable name 1"], + OBSERVABLE_FORMULA: ["observable_1 * N + beta"], + NOISE_FORMULA: [1], + } + ).set_index(OBSERVABLE_ID) output_parameters = petab.get_output_parameters( - observable_df, SbmlModel(sbml_model=ss_model.model)) + observable_df, SbmlModel(sbml_model=ss_model.model) + ) - assert output_parameters == ['N', 'beta'] + assert output_parameters == ["N", "beta"] def test_get_formula_placeholders(): """Test get_formula_placeholders""" # no placeholder - assert petab.get_formula_placeholders('1.0', 'any', 'observable') == [] + assert petab.get_formula_placeholders("1.0", "any", "observable") == [] # multiple placeholders assert petab.get_formula_placeholders( - 'observableParameter1_twoParams * ' - 'observableParameter2_twoParams + otherParam', - 'twoParams', 'observable') \ - == ['observableParameter1_twoParams', - 'observableParameter2_twoParams'] + "observableParameter1_twoParams * " + "observableParameter2_twoParams + otherParam", + "twoParams", + "observable", + ) == ["observableParameter1_twoParams", "observableParameter2_twoParams"] # noise placeholder assert petab.get_formula_placeholders( - '3.0 * noiseParameter1_oneParam', 'oneParam', 'noise') \ - == ['noiseParameter1_oneParam'] + "3.0 * noiseParameter1_oneParam", "oneParam", "noise" + ) == ["noiseParameter1_oneParam"] # multiple instances and in 'wrong' order assert petab.get_formula_placeholders( - 'observableParameter2_twoParams * ' - 'observableParameter1_twoParams + ' - 'otherParam / observableParameter2_twoParams', - 'twoParams', 'observable') \ - == ['observableParameter1_twoParams', - 'observableParameter2_twoParams'] + "observableParameter2_twoParams * " + "observableParameter1_twoParams + " + "otherParam / observableParameter2_twoParams", + "twoParams", + "observable", + ) == ["observableParameter1_twoParams", "observableParameter2_twoParams"] # non-consecutive numbering with pytest.raises(AssertionError): petab.get_formula_placeholders( - 'observableParameter2_twoParams + observableParameter2_twoParams', - 'twoParams', 'observable') + "observableParameter2_twoParams + observableParameter2_twoParams", + "twoParams", + "observable", + ) # empty - assert petab.get_formula_placeholders('', 'any', 'observable') == [] + assert petab.get_formula_placeholders("", "any", "observable") == [] # non-string - assert petab.get_formula_placeholders(1, 'any', 'observable') == [] + assert petab.get_formula_placeholders(1, "any", "observable") == [] def test_create_observable_df(): @@ -147,20 +163,27 @@ def test_create_observable_df(): def test_get_placeholders(): """Test get_placeholders""" - observable_df = pd.DataFrame(data={ - OBSERVABLE_ID: ['obs_1', 'obs_2'], - OBSERVABLE_FORMULA: ['observableParameter1_obs_1 * 2 * foo', - '1 + observableParameter1_obs_2'], - }).set_index(OBSERVABLE_ID) + observable_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs_1", "obs_2"], + OBSERVABLE_FORMULA: [ + "observableParameter1_obs_1 * 2 * foo", + "1 + observableParameter1_obs_2", + ], + } + ).set_index(OBSERVABLE_ID) # test with missing noiseFormula - expected = ['observableParameter1_obs_1', 'observableParameter1_obs_2'] + expected = ["observableParameter1_obs_1", "observableParameter1_obs_2"] actual = petab.get_placeholders(observable_df) assert actual == expected # test with noiseFormula - observable_df[NOISE_FORMULA] = ['noiseParameter1_obs_1', '2.0'] - expected = ['observableParameter1_obs_1', 'noiseParameter1_obs_1', - 'observableParameter1_obs_2'] + observable_df[NOISE_FORMULA] = ["noiseParameter1_obs_1", "2.0"] + expected = [ + "observableParameter1_obs_1", + "noiseParameter1_obs_1", + "observableParameter1_obs_2", + ] actual = petab.get_placeholders(observable_df) assert actual == expected diff --git a/tests/test_parameter_mapping.py b/tests/test_parameter_mapping.py index 8c6f5b79..b8f91288 100644 --- a/tests/test_parameter_mapping.py +++ b/tests/test_parameter_mapping.py @@ -3,35 +3,37 @@ import numpy as np import pandas as pd + import petab from petab.C import * from petab.models.sbml_model import SbmlModel from petab.parameter_mapping import _apply_parameter_table - # import fixtures pytest_plugins = [ - "tests.test_petab", + "tests.test_petab", ] class TestGetSimulationToOptimizationParameterMapping(object): - @staticmethod def test_no_condition_specific(condition_df_2_conditions): # Trivial case - no condition-specific parameters condition_df = condition_df_2_conditions - measurement_df = pd.DataFrame(data={ - OBSERVABLE_ID: ['obs1', 'obs2'], - SIMULATION_CONDITION_ID: ['condition1', 'condition2'], - PREEQUILIBRATION_CONDITION_ID: ['', ''], - OBSERVABLE_PARAMETERS: ['', ''], - NOISE_PARAMETERS: ['', ''] - }) + measurement_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs1", "obs2"], + SIMULATION_CONDITION_ID: ["condition1", "condition2"], + PREEQUILIBRATION_CONDITION_ID: ["", ""], + OBSERVABLE_PARAMETERS: ["", ""], + NOISE_PARAMETERS: ["", ""], + } + ) import simplesbml + ss_model = simplesbml.SbmlModel() ss_model.addParameter("dynamicParameter1", 1.0) ss_model.addParameter("dynamicParameter2", 2.0) @@ -43,27 +45,40 @@ def test_no_condition_specific(condition_df_2_conditions): condition_df["someSpecies"] = [0.0, 0.0] # Test without parameter table - expected = [({}, - {'dynamicParameter1': 1.0, - 'dynamicParameter2': 2.0, - 'dynamicParameter3': 3.0, - 'fixedParameter1': 1.0}, - {}, - {'dynamicParameter1': LIN, - 'dynamicParameter2': LIN, - 'dynamicParameter3': LIN, - 'fixedParameter1': LIN}), - ({}, - {'dynamicParameter1': 1.0, - 'dynamicParameter2': 2.0, - 'dynamicParameter3': 3.0, - 'fixedParameter1': 2.0}, - {}, - {'dynamicParameter1': LIN, - 'dynamicParameter2': LIN, - 'dynamicParameter3': LIN, - 'fixedParameter1': LIN} - )] + expected = [ + ( + {}, + { + "dynamicParameter1": 1.0, + "dynamicParameter2": 2.0, + "dynamicParameter3": 3.0, + "fixedParameter1": 1.0, + }, + {}, + { + "dynamicParameter1": LIN, + "dynamicParameter2": LIN, + "dynamicParameter3": LIN, + "fixedParameter1": LIN, + }, + ), + ( + {}, + { + "dynamicParameter1": 1.0, + "dynamicParameter2": 2.0, + "dynamicParameter3": 3.0, + "fixedParameter1": 2.0, + }, + {}, + { + "dynamicParameter1": LIN, + "dynamicParameter2": LIN, + "dynamicParameter3": LIN, + "fixedParameter1": LIN, + }, + ), + ] model = SbmlModel(sbml_model=ss_model.model) actual = petab.get_optimization_to_simulation_parameter_mapping( @@ -74,42 +89,60 @@ def test_no_condition_specific(condition_df_2_conditions): assert actual == expected # Test with parameter table - parameter_df = pd.DataFrame(data={ - PARAMETER_ID: ['dynamicParameter1', 'dynamicParameter2', - 'dynamicParameter3'], - ESTIMATE: [0, 1, 1], - NOMINAL_VALUE: [11.0, 12.0, None], - PARAMETER_SCALE: [LOG, LOG10, LIN], - }) + parameter_df = pd.DataFrame( + data={ + PARAMETER_ID: [ + "dynamicParameter1", + "dynamicParameter2", + "dynamicParameter3", + ], + ESTIMATE: [0, 1, 1], + NOMINAL_VALUE: [11.0, 12.0, None], + PARAMETER_SCALE: [LOG, LOG10, LIN], + } + ) parameter_df.set_index(PARAMETER_ID, inplace=True) - expected = [({}, - {'dynamicParameter1': 11.0, - 'dynamicParameter2': 'dynamicParameter2', - 'dynamicParameter3': 'dynamicParameter3', - 'fixedParameter1': 1.0}, - {}, - {'dynamicParameter1': LIN, - 'dynamicParameter2': LOG10, - 'dynamicParameter3': LIN, - 'fixedParameter1': LIN}), - ({}, - {'dynamicParameter1': 11.0, - 'dynamicParameter2': 'dynamicParameter2', - 'dynamicParameter3': 'dynamicParameter3', - 'fixedParameter1': 2.0}, - {}, - {'dynamicParameter1': LIN, - 'dynamicParameter2': LOG10, - 'dynamicParameter3': LIN, - 'fixedParameter1': LIN}) - ] + expected = [ + ( + {}, + { + "dynamicParameter1": 11.0, + "dynamicParameter2": "dynamicParameter2", + "dynamicParameter3": "dynamicParameter3", + "fixedParameter1": 1.0, + }, + {}, + { + "dynamicParameter1": LIN, + "dynamicParameter2": LOG10, + "dynamicParameter3": LIN, + "fixedParameter1": LIN, + }, + ), + ( + {}, + { + "dynamicParameter1": 11.0, + "dynamicParameter2": "dynamicParameter2", + "dynamicParameter3": "dynamicParameter3", + "fixedParameter1": 2.0, + }, + {}, + { + "dynamicParameter1": LIN, + "dynamicParameter2": LOG10, + "dynamicParameter3": LIN, + "fixedParameter1": LIN, + }, + ), + ] actual = petab.get_optimization_to_simulation_parameter_mapping( model=model, measurement_df=measurement_df, condition_df=condition_df, - parameter_df=parameter_df + parameter_df=parameter_df, ) assert actual == expected @@ -117,26 +150,38 @@ def test_no_condition_specific(condition_df_2_conditions): # Test with applied scaling expected = [ - ({}, - {'dynamicParameter1': np.log(11.0), - 'dynamicParameter2': 'dynamicParameter2', - 'dynamicParameter3': 'dynamicParameter3', - 'fixedParameter1': 1.0}, - {}, - {'dynamicParameter1': LOG, - 'dynamicParameter2': LOG10, - 'dynamicParameter3': LIN, - 'fixedParameter1': LIN}), - ({}, - {'dynamicParameter1': np.log(11.0), - 'dynamicParameter2': 'dynamicParameter2', - 'dynamicParameter3': 'dynamicParameter3', - 'fixedParameter1': 2.0}, - {}, - {'dynamicParameter1': LOG, - 'dynamicParameter2': LOG10, - 'dynamicParameter3': LIN, - 'fixedParameter1': LIN}), + ( + {}, + { + "dynamicParameter1": np.log(11.0), + "dynamicParameter2": "dynamicParameter2", + "dynamicParameter3": "dynamicParameter3", + "fixedParameter1": 1.0, + }, + {}, + { + "dynamicParameter1": LOG, + "dynamicParameter2": LOG10, + "dynamicParameter3": LIN, + "fixedParameter1": LIN, + }, + ), + ( + {}, + { + "dynamicParameter1": np.log(11.0), + "dynamicParameter2": "dynamicParameter2", + "dynamicParameter3": "dynamicParameter3", + "fixedParameter1": 2.0, + }, + {}, + { + "dynamicParameter1": LOG, + "dynamicParameter2": LOG10, + "dynamicParameter3": LIN, + "fixedParameter1": LIN, + }, + ), ] actual = petab.get_optimization_to_simulation_parameter_mapping( @@ -144,7 +189,7 @@ def test_no_condition_specific(condition_df_2_conditions): measurement_df=measurement_df, condition_df=condition_df, parameter_df=parameter_df, - scaled_parameters=True + scaled_parameters=True, ) assert actual == expected @@ -152,26 +197,38 @@ def test_no_condition_specific(condition_df_2_conditions): # Test without fixed overrides expected = [ - ({}, - {'dynamicParameter1': 'dynamicParameter1', - 'dynamicParameter2': 'dynamicParameter2', - 'dynamicParameter3': 'dynamicParameter3', - 'fixedParameter1': 1.0}, - {}, - {'dynamicParameter1': LOG, - 'dynamicParameter2': LOG10, - 'dynamicParameter3': LIN, - 'fixedParameter1': LIN}), - ({}, - {'dynamicParameter1': 'dynamicParameter1', - 'dynamicParameter2': 'dynamicParameter2', - 'dynamicParameter3': 'dynamicParameter3', - 'fixedParameter1': 2.0}, - {}, - {'dynamicParameter1': LOG, - 'dynamicParameter2': LOG10, - 'dynamicParameter3': LIN, - 'fixedParameter1': LIN}), + ( + {}, + { + "dynamicParameter1": "dynamicParameter1", + "dynamicParameter2": "dynamicParameter2", + "dynamicParameter3": "dynamicParameter3", + "fixedParameter1": 1.0, + }, + {}, + { + "dynamicParameter1": LOG, + "dynamicParameter2": LOG10, + "dynamicParameter3": LIN, + "fixedParameter1": LIN, + }, + ), + ( + {}, + { + "dynamicParameter1": "dynamicParameter1", + "dynamicParameter2": "dynamicParameter2", + "dynamicParameter3": "dynamicParameter3", + "fixedParameter1": 2.0, + }, + {}, + { + "dynamicParameter1": LOG, + "dynamicParameter2": LOG10, + "dynamicParameter3": LIN, + "fixedParameter1": LIN, + }, + ), ] actual = petab.get_optimization_to_simulation_parameter_mapping( @@ -179,7 +236,7 @@ def test_no_condition_specific(condition_df_2_conditions): measurement_df=measurement_df, condition_df=condition_df, parameter_df=parameter_df, - fill_fixed_parameters=False + fill_fixed_parameters=False, ) assert actual == expected @@ -190,77 +247,96 @@ def test_all_override(condition_df_2_conditions): condition_df = condition_df_2_conditions import simplesbml + ss_model = simplesbml.SbmlModel() - ss_model.addParameter('dynamicParameter1', 0.0) - ss_model.addParameter('dynamicParameter2', 0.0) + ss_model.addParameter("dynamicParameter1", 0.0) + ss_model.addParameter("dynamicParameter2", 0.0) model = SbmlModel(sbml_model=ss_model.model) - measurement_df = pd.DataFrame(data={ - OBSERVABLE_ID: ['obs1', 'obs2', 'obs1', 'obs2'], - SIMULATION_CONDITION_ID: ['condition1', 'condition1', - 'condition2', 'condition2'], - PREEQUILIBRATION_CONDITION_ID: ['', '', '', ''], - OBSERVABLE_PARAMETERS: ['obs1par1override;obs1par2cond1override', - 'obs2par1cond1override', - 'obs1par1override;obs1par2cond2override', - 'obs2par1cond2override'], - NOISE_PARAMETERS: ['', '', '', ''] - }) - - parameter_df = pd.DataFrame(data={ - PARAMETER_ID: [ - 'dynamicParameter1', 'dynamicParameter2', 'obs1par1override', - 'obs1par2cond1override', 'obs1par2cond2override', - 'obs2par1cond1override', 'obs2par1cond2override' - ], - ESTIMATE: [1] * 7 - }) + measurement_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs1", "obs2", "obs1", "obs2"], + SIMULATION_CONDITION_ID: [ + "condition1", + "condition1", + "condition2", + "condition2", + ], + PREEQUILIBRATION_CONDITION_ID: ["", "", "", ""], + OBSERVABLE_PARAMETERS: [ + "obs1par1override;obs1par2cond1override", + "obs2par1cond1override", + "obs1par1override;obs1par2cond2override", + "obs2par1cond2override", + ], + NOISE_PARAMETERS: ["", "", "", ""], + } + ) + + parameter_df = pd.DataFrame( + data={ + PARAMETER_ID: [ + "dynamicParameter1", + "dynamicParameter2", + "obs1par1override", + "obs1par2cond1override", + "obs1par2cond2override", + "obs2par1cond1override", + "obs2par1cond2override", + ], + ESTIMATE: [1] * 7, + } + ) parameter_df.set_index(PARAMETER_ID, inplace=True) expected = [ ( {}, - {'fixedParameter1': 1.0, - 'dynamicParameter1': 'dynamicParameter1', - 'dynamicParameter2': 'dynamicParameter2', - 'observableParameter1_obs1': 'obs1par1override', - 'observableParameter2_obs1': 'obs1par2cond1override', - 'observableParameter1_obs2': 'obs2par1cond1override', - }, - {}, - {'fixedParameter1': LIN, - 'dynamicParameter1': LIN, - 'dynamicParameter2': LIN, - 'observableParameter1_obs1': LIN, - 'observableParameter2_obs1': LIN, - 'observableParameter1_obs2': LIN - } + { + "fixedParameter1": 1.0, + "dynamicParameter1": "dynamicParameter1", + "dynamicParameter2": "dynamicParameter2", + "observableParameter1_obs1": "obs1par1override", + "observableParameter2_obs1": "obs1par2cond1override", + "observableParameter1_obs2": "obs2par1cond1override", + }, + {}, + { + "fixedParameter1": LIN, + "dynamicParameter1": LIN, + "dynamicParameter2": LIN, + "observableParameter1_obs1": LIN, + "observableParameter2_obs1": LIN, + "observableParameter1_obs2": LIN, + }, ), ( {}, - {'fixedParameter1': 2.0, - 'dynamicParameter1': 'dynamicParameter1', - 'dynamicParameter2': 'dynamicParameter2', - 'observableParameter1_obs1': 'obs1par1override', - 'observableParameter2_obs1': 'obs1par2cond2override', - 'observableParameter1_obs2': 'obs2par1cond2override' - }, - {}, - {'fixedParameter1': LIN, - 'dynamicParameter1': LIN, - 'dynamicParameter2': LIN, - 'observableParameter1_obs1': LIN, - 'observableParameter2_obs1': LIN, - 'observableParameter1_obs2': LIN - } - ) + { + "fixedParameter1": 2.0, + "dynamicParameter1": "dynamicParameter1", + "dynamicParameter2": "dynamicParameter2", + "observableParameter1_obs1": "obs1par1override", + "observableParameter2_obs1": "obs1par2cond2override", + "observableParameter1_obs2": "obs2par1cond2override", + }, + {}, + { + "fixedParameter1": LIN, + "dynamicParameter1": LIN, + "dynamicParameter2": LIN, + "observableParameter1_obs1": LIN, + "observableParameter2_obs1": LIN, + "observableParameter1_obs2": LIN, + }, + ), ] actual = petab.get_optimization_to_simulation_parameter_mapping( measurement_df=measurement_df, condition_df=condition_df, model=model, - parameter_df=parameter_df + parameter_df=parameter_df, ) assert actual == expected @@ -271,130 +347,171 @@ def test_all_override(condition_df_2_conditions): measurement_df=measurement_df, condition_df=condition_df, model=model, - parameter_df=parameter_df + parameter_df=parameter_df, ) assert actual == expected @staticmethod def test_partial_override(condition_df_2_conditions): # Condition-specific parameters, keeping original parameters - condition_df = pd.DataFrame(data={ - 'conditionId': ['condition1', 'condition2'], - 'conditionName': ['', 'Condition 2'], - 'fixedParameter1': [1.0, 2.0], - 'fixedParameter2': [nan, 2.5], - }) - condition_df.set_index('conditionId', inplace=True) + condition_df = pd.DataFrame( + data={ + "conditionId": ["condition1", "condition2"], + "conditionName": ["", "Condition 2"], + "fixedParameter1": [1.0, 2.0], + "fixedParameter2": [nan, 2.5], + } + ) + condition_df.set_index("conditionId", inplace=True) import simplesbml + ss_model = simplesbml.SbmlModel() - ss_model.addParameter('fixedParameter1', 0.5) - ss_model.addParameter('fixedParameter2', 1.0) - ss_model.addParameter('dynamicParameter1', 0.0) - ss_model.addParameter('observableParameter1_obs1', 0.0) - ss_model.addParameter('observableParameter2_obs1', 0.0) - ss_model.addParameter('observableParameter1_obs2', 0.0) - - measurement_df = pd.DataFrame(data={ - OBSERVABLE_ID: ['obs1', 'obs2', 'obs1', 'obs2'], - SIMULATION_CONDITION_ID: ['condition1', 'condition1', - 'condition2', 'condition2'], - PREEQUILIBRATION_CONDITION_ID: ['', '', '', ''], - OBSERVABLE_PARAMETERS: ['obs1par1override;obs1par2cond1override', - '', - 'obs1par1override;obs1par2cond2override', - 'obs2par1cond2override'], - NOISE_PARAMETERS: ['', '', '', ''] - }) - - parameter_df = pd.DataFrame(data={ - PARAMETER_ID: [ - 'dynamicParameter1', 'obs1par1override', - 'obs1par2cond1override', 'obs1par2cond2override', - 'obs2par1cond2override'], - ESTIMATE: [1, 1, 1, 1, 1], - }) + ss_model.addParameter("fixedParameter1", 0.5) + ss_model.addParameter("fixedParameter2", 1.0) + ss_model.addParameter("dynamicParameter1", 0.0) + ss_model.addParameter("observableParameter1_obs1", 0.0) + ss_model.addParameter("observableParameter2_obs1", 0.0) + ss_model.addParameter("observableParameter1_obs2", 0.0) + + measurement_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs1", "obs2", "obs1", "obs2"], + SIMULATION_CONDITION_ID: [ + "condition1", + "condition1", + "condition2", + "condition2", + ], + PREEQUILIBRATION_CONDITION_ID: ["", "", "", ""], + OBSERVABLE_PARAMETERS: [ + "obs1par1override;obs1par2cond1override", + "", + "obs1par1override;obs1par2cond2override", + "obs2par1cond2override", + ], + NOISE_PARAMETERS: ["", "", "", ""], + } + ) + + parameter_df = pd.DataFrame( + data={ + PARAMETER_ID: [ + "dynamicParameter1", + "obs1par1override", + "obs1par2cond1override", + "obs1par2cond2override", + "obs2par1cond2override", + ], + ESTIMATE: [1, 1, 1, 1, 1], + } + ) parameter_df.set_index(PARAMETER_ID, inplace=True) - expected = [({}, - {'fixedParameter1': 1.0, - 'fixedParameter2': 1.0, - 'dynamicParameter1': 'dynamicParameter1', - 'observableParameter1_obs1': 'obs1par1override', - 'observableParameter2_obs1': 'obs1par2cond1override', - 'observableParameter1_obs2': np.nan, - }, - {}, - {'fixedParameter1': LIN, - 'fixedParameter2': LIN, - 'dynamicParameter1': LIN, - 'observableParameter1_obs1': LIN, - 'observableParameter2_obs1': LIN, - 'observableParameter1_obs2': LIN}), - ({}, - {'fixedParameter1': 2.0, - 'fixedParameter2': 2.5, - 'dynamicParameter1': 'dynamicParameter1', - 'observableParameter1_obs1': 'obs1par1override', - 'observableParameter2_obs1': 'obs1par2cond2override', - 'observableParameter1_obs2': 'obs2par1cond2override' - }, - {}, - {'fixedParameter1': LIN, - 'fixedParameter2': LIN, - 'dynamicParameter1': LIN, - 'observableParameter1_obs1': LIN, - 'observableParameter2_obs1': LIN, - 'observableParameter1_obs2': LIN}), - ] + expected = [ + ( + {}, + { + "fixedParameter1": 1.0, + "fixedParameter2": 1.0, + "dynamicParameter1": "dynamicParameter1", + "observableParameter1_obs1": "obs1par1override", + "observableParameter2_obs1": "obs1par2cond1override", + "observableParameter1_obs2": np.nan, + }, + {}, + { + "fixedParameter1": LIN, + "fixedParameter2": LIN, + "dynamicParameter1": LIN, + "observableParameter1_obs1": LIN, + "observableParameter2_obs1": LIN, + "observableParameter1_obs2": LIN, + }, + ), + ( + {}, + { + "fixedParameter1": 2.0, + "fixedParameter2": 2.5, + "dynamicParameter1": "dynamicParameter1", + "observableParameter1_obs1": "obs1par1override", + "observableParameter2_obs1": "obs1par2cond2override", + "observableParameter1_obs2": "obs2par1cond2override", + }, + {}, + { + "fixedParameter1": LIN, + "fixedParameter2": LIN, + "dynamicParameter1": LIN, + "observableParameter1_obs1": LIN, + "observableParameter2_obs1": LIN, + "observableParameter1_obs2": LIN, + }, + ), + ] actual = petab.get_optimization_to_simulation_parameter_mapping( measurement_df=measurement_df, condition_df=condition_df, model=petab.models.sbml_model.SbmlModel(ss_model.model), - parameter_df=parameter_df + parameter_df=parameter_df, ) # Comparison with NaN containing expected results fails after pickling! # Need to test first for correct NaNs, then for the rest. - assert np.isnan(expected[0][1]['observableParameter1_obs2']) - assert np.isnan(actual[0][1]['observableParameter1_obs2']) - expected[0][1]['observableParameter1_obs2'] = 0.0 - actual[0][1]['observableParameter1_obs2'] = 0.0 + assert np.isnan(expected[0][1]["observableParameter1_obs2"]) + assert np.isnan(actual[0][1]["observableParameter1_obs2"]) + expected[0][1]["observableParameter1_obs2"] = 0.0 + actual[0][1]["observableParameter1_obs2"] = 0.0 assert actual == expected @staticmethod def test_parameterized_condition_table(): - condition_df = pd.DataFrame(data={ - CONDITION_ID: ['condition1', 'condition2', 'condition3'], - CONDITION_NAME: ['', 'Condition 2', ''], - 'dynamicParameter1': ['dynamicOverride1_1', - 'dynamicOverride1_2', 0] - }) + condition_df = pd.DataFrame( + data={ + CONDITION_ID: ["condition1", "condition2", "condition3"], + CONDITION_NAME: ["", "Condition 2", ""], + "dynamicParameter1": [ + "dynamicOverride1_1", + "dynamicOverride1_2", + 0, + ], + } + ) condition_df.set_index(CONDITION_ID, inplace=True) - measurement_df = pd.DataFrame(data={ - SIMULATION_CONDITION_ID: ['condition1', 'condition2', - 'condition3'], - OBSERVABLE_ID: ['obs1', 'obs2', 'obs1'], - OBSERVABLE_PARAMETERS: '', - NOISE_PARAMETERS: '', - }) - - parameter_df = pd.DataFrame(data={ - PARAMETER_ID: ['dynamicOverride1_1', 'dynamicOverride1_2'], - PARAMETER_NAME: ['', '...'], - ESTIMATE: [1, 1] - }) + measurement_df = pd.DataFrame( + data={ + SIMULATION_CONDITION_ID: [ + "condition1", + "condition2", + "condition3", + ], + OBSERVABLE_ID: ["obs1", "obs2", "obs1"], + OBSERVABLE_PARAMETERS: "", + NOISE_PARAMETERS: "", + } + ) + + parameter_df = pd.DataFrame( + data={ + PARAMETER_ID: ["dynamicOverride1_1", "dynamicOverride1_2"], + PARAMETER_NAME: ["", "..."], + ESTIMATE: [1, 1], + } + ) parameter_df.set_index(PARAMETER_ID, inplace=True) import simplesbml + ss_model = simplesbml.SbmlModel() - ss_model.addParameter('dynamicParameter1', 1.0) + ss_model.addParameter("dynamicParameter1", 1.0) - assert petab.get_model_parameters(ss_model.model) \ - == ['dynamicParameter1'] + assert petab.get_model_parameters(ss_model.model) == [ + "dynamicParameter1" + ] actual = petab.get_optimization_to_simulation_parameter_mapping( measurement_df=measurement_df, @@ -403,12 +520,21 @@ def test_parameterized_condition_table(): model=petab.models.sbml_model.SbmlModel(ss_model.model), ) - expected = [({}, {'dynamicParameter1': 'dynamicOverride1_1'}, - {}, {'dynamicParameter1': LIN}), - ({}, {'dynamicParameter1': 'dynamicOverride1_2'}, - {}, {'dynamicParameter1': LIN}), - ({}, {'dynamicParameter1': 0}, - {}, {'dynamicParameter1': LIN})] + expected = [ + ( + {}, + {"dynamicParameter1": "dynamicOverride1_1"}, + {}, + {"dynamicParameter1": LIN}, + ), + ( + {}, + {"dynamicParameter1": "dynamicOverride1_2"}, + {}, + {"dynamicParameter1": LIN}, + ), + ({}, {"dynamicParameter1": 0}, {}, {"dynamicParameter1": LIN}), + ] assert actual == expected @@ -425,41 +551,61 @@ def test_parameterized_condition_table_changed_scale(): # set up model import simplesbml + ss_model = simplesbml.SbmlModel() ss_model.addParameter(overridee_id, 2.0) assert petab.get_model_parameters(ss_model.model) == [overridee_id] - assert petab.get_model_parameters(ss_model.model, with_values=True) \ - == {overridee_id: 2.0} + assert petab.get_model_parameters( + ss_model.model, with_values=True + ) == {overridee_id: 2.0} # set up condition table - condition_df = pd.DataFrame(data={ - CONDITION_ID: - ['condition1', 'condition2', 'condition3', 'condition4'], - overridee_id: - ['dynamicOverrideLog10', 'fixedOverrideLin', - 'fixedOverrideLog10', 10.0] - }) - condition_df.set_index('conditionId', inplace=True) + condition_df = pd.DataFrame( + data={ + CONDITION_ID: [ + "condition1", + "condition2", + "condition3", + "condition4", + ], + overridee_id: [ + "dynamicOverrideLog10", + "fixedOverrideLin", + "fixedOverrideLog10", + 10.0, + ], + } + ) + condition_df.set_index("conditionId", inplace=True) # set up measurement table - measurement_df = pd.DataFrame(data={ - SIMULATION_CONDITION_ID: - ['condition1', 'condition2', 'condition3', 'condition4'], - OBSERVABLE_ID: - ['obs1', 'obs2', 'obs1', 'obs2'], - OBSERVABLE_PARAMETERS: '', - NOISE_PARAMETERS: '', - }) + measurement_df = pd.DataFrame( + data={ + SIMULATION_CONDITION_ID: [ + "condition1", + "condition2", + "condition3", + "condition4", + ], + OBSERVABLE_ID: ["obs1", "obs2", "obs1", "obs2"], + OBSERVABLE_PARAMETERS: "", + NOISE_PARAMETERS: "", + } + ) # set up parameter table - parameter_df = pd.DataFrame(data={ - PARAMETER_ID: ['dynamicOverrideLog10', - 'fixedOverrideLin', - 'fixedOverrideLog10'], - ESTIMATE: [1, 0, 0], - NOMINAL_VALUE: [np.nan, -2, 1000], - PARAMETER_SCALE: [LOG10, LIN, LOG10] - }) + parameter_df = pd.DataFrame( + data={ + PARAMETER_ID: [ + "dynamicOverrideLog10", + "fixedOverrideLin", + "fixedOverrideLog10", + ], + ESTIMATE: [1, 0, 0], + NOMINAL_VALUE: [np.nan, -2, 1000], + PARAMETER_SCALE: [LOG10, LIN, LOG10], + } + ) parameter_df.set_index(PARAMETER_ID, inplace=True) # test without preequilibration condition; unscaled known parameters @@ -468,16 +614,20 @@ def test_parameterized_condition_table_changed_scale(): measurement_df=measurement_df, condition_df=condition_df, parameter_df=parameter_df, - model=petab.models.sbml_model.SbmlModel(ss_model.model) + model=petab.models.sbml_model.SbmlModel(ss_model.model), ) expected = [ - ({}, {overridee_id: 'dynamicOverrideLog10'}, - {}, {overridee_id: LOG10}), + ( + {}, + {overridee_id: "dynamicOverrideLog10"}, + {}, + {overridee_id: LOG10}, + ), ({}, {overridee_id: -2.0}, {}, {overridee_id: LIN}), # not scaled: ({}, {overridee_id: 1000.0}, {}, {overridee_id: LIN}), - ({}, {overridee_id: 10.0}, {}, {overridee_id: LIN}) + ({}, {overridee_id: 10.0}, {}, {overridee_id: LIN}), ] assert actual == expected @@ -493,20 +643,28 @@ def test_parameterized_condition_table_changed_scale(): ) expected = [ - ({}, {overridee_id: 'dynamicOverrideLog10'}, - {}, {overridee_id: LOG10}), + ( + {}, + {overridee_id: "dynamicOverrideLog10"}, + {}, + {overridee_id: LOG10}, + ), ({}, {overridee_id: -2.0}, {}, {overridee_id: LIN}), # scaled fixedOverrideLog10: ({}, {overridee_id: 3.0}, {}, {overridee_id: LOG10}), - ({}, {overridee_id: 10.0}, {}, {overridee_id: LIN}) + ({}, {overridee_id: 10.0}, {}, {overridee_id: LIN}), ] assert actual == expected # Add preeq condition - measurement_df[PREEQUILIBRATION_CONDITION_ID] = \ - ['condition1', 'condition1', 'condition3', 'condition3'] + measurement_df[PREEQUILIBRATION_CONDITION_ID] = [ + "condition1", + "condition1", + "condition3", + "condition3", + ] actual = petab.get_optimization_to_simulation_parameter_mapping( measurement_df=measurement_df, condition_df=condition_df, @@ -515,35 +673,52 @@ def test_parameterized_condition_table_changed_scale(): ) expected = [ - ({overridee_id: 'dynamicOverrideLog10'}, - {overridee_id: 'dynamicOverrideLog10'}, - {overridee_id: LOG10}, {overridee_id: LOG10}), - ({overridee_id: 'dynamicOverrideLog10'}, {overridee_id: -2.0}, - {overridee_id: LOG10}, {overridee_id: LIN}), + ( + {overridee_id: "dynamicOverrideLog10"}, + {overridee_id: "dynamicOverrideLog10"}, + {overridee_id: LOG10}, + {overridee_id: LOG10}, + ), + ( + {overridee_id: "dynamicOverrideLog10"}, + {overridee_id: -2.0}, + {overridee_id: LOG10}, + {overridee_id: LIN}, + ), # not rescaled: - ({overridee_id: 1000.0}, {overridee_id: 1000.0}, - {overridee_id: LIN}, {overridee_id: LIN}), - ({overridee_id: 1000.0}, {overridee_id: 10.0}, - {overridee_id: LIN}, {overridee_id: LIN})] + ( + {overridee_id: 1000.0}, + {overridee_id: 1000.0}, + {overridee_id: LIN}, + {overridee_id: LIN}, + ), + ( + {overridee_id: 1000.0}, + {overridee_id: 10.0}, + {overridee_id: LIN}, + {overridee_id: LIN}, + ), + ] assert actual == expected def test_fill_in_nominal_values(): - parameter_df = pd.DataFrame(data={ - PARAMETER_ID: ['estimated', 'not_estimated'], - NOMINAL_VALUE: [0.0, 2.0], - ESTIMATE: [1, 0] - }) + parameter_df = pd.DataFrame( + data={ + PARAMETER_ID: ["estimated", "not_estimated"], + NOMINAL_VALUE: [0.0, 2.0], + ESTIMATE: [1, 0], + } + ) parameter_df.set_index([PARAMETER_ID], inplace=True) - par_mapping = {'estimated': 'estimated', - 'not_estimated': 'not_estimated'} - scale_mapping = {'estimated': LIN, 'not_estimated': LIN} + par_mapping = {"estimated": "estimated", "not_estimated": "not_estimated"} + scale_mapping = {"estimated": LIN, "not_estimated": LIN} _apply_parameter_table(par_mapping, scale_mapping, parameter_df) - expected_par = {'estimated': 'estimated', 'not_estimated': 2.0} - expected_scale = {'estimated': LIN, 'not_estimated': LIN} + expected_par = {"estimated": "estimated", "not_estimated": 2.0} + expected_scale = {"estimated": LIN, "not_estimated": LIN} assert expected_par == par_mapping assert expected_scale == scale_mapping diff --git a/tests/test_parameters.py b/tests/test_parameters.py index 04e410fa..e9073d31 100644 --- a/tests/test_parameters.py +++ b/tests/test_parameters.py @@ -4,18 +4,21 @@ import numpy as np import pandas as pd -import petab import pytest + +import petab from petab.C import * def test_get_optimization_parameter_scaling(): """Test get_optimization_parameter_scaling""" - df = pd.DataFrame(data={ - PARAMETER_ID: ['p1', 'p2', 'p3'], - ESTIMATE: [1, 0, 1], - PARAMETER_SCALE: [LIN, LOG, LOG10] - }) + df = pd.DataFrame( + data={ + PARAMETER_ID: ["p1", "p2", "p3"], + ESTIMATE: [1, 0, 1], + PARAMETER_SCALE: [LIN, LOG, LOG10], + } + ) df.set_index(PARAMETER_ID, inplace=True) # parameter and scale @@ -28,13 +31,15 @@ def test_get_optimization_parameter_scaling(): def test_get_optimization_parameters(): """Test get_optimization_parameters""" - df = pd.DataFrame(data={ - PARAMETER_ID: ['p1', 'p2', 'p3'], - ESTIMATE: [1, 0, 1], - }) + df = pd.DataFrame( + data={ + PARAMETER_ID: ["p1", "p2", "p3"], + ESTIMATE: [1, 0, 1], + } + ) df.set_index(PARAMETER_ID, inplace=True) - expected = ['p1', 'p3'] + expected = ["p1", "p3"] actual = petab.get_optimization_parameters(df) @@ -44,24 +49,28 @@ def test_get_optimization_parameters(): def test_get_parameter_df(): """Test parameters.get_parameter_df.""" # parameter df missing ids - parameter_df = pd.DataFrame(data={ - PARAMETER_NAME: ['parname1', 'parname2'], - }) - with tempfile.NamedTemporaryFile(mode='w', delete=False) as fh: + parameter_df = pd.DataFrame( + data={ + PARAMETER_NAME: ["parname1", "parname2"], + } + ) + with tempfile.NamedTemporaryFile(mode="w", delete=False) as fh: file_name = fh.name - parameter_df.to_csv(fh, sep='\t', index=False) + parameter_df.to_csv(fh, sep="\t", index=False) with pytest.raises(KeyError): petab.get_parameter_df(file_name) # with ids - parameter_df = pd.DataFrame(data={ - PARAMETER_ID: ['par1', 'par2'], - PARAMETER_NAME: ['parname1', 'parname2'], - }) - with tempfile.NamedTemporaryFile(mode='w', delete=False) as fh: + parameter_df = pd.DataFrame( + data={ + PARAMETER_ID: ["par1", "par2"], + PARAMETER_NAME: ["parname1", "parname2"], + } + ) + with tempfile.NamedTemporaryFile(mode="w", delete=False) as fh: file_name = fh.name - parameter_df.to_csv(fh, sep='\t', index=False) + parameter_df.to_csv(fh, sep="\t", index=False) df = petab.get_parameter_df(file_name) assert (df == parameter_df.set_index(PARAMETER_ID)).all().all() @@ -69,48 +78,61 @@ def test_get_parameter_df(): # Test parameter subset files with tempfile.TemporaryDirectory() as directory: parameter_dfs, parameter_files = ({}, {}) - parameter_dfs['complete'] = pd.DataFrame(data={ - PARAMETER_ID: ['id1', 'id2', 'id3'], - PARAMETER_NAME: ['name1', 'name2', 'name3'] - }) - parameter_dfs['subset1'] = pd.DataFrame(data={ - PARAMETER_ID: ['id1', 'id2'], - PARAMETER_NAME: ['name1', 'name2'] - }) - parameter_dfs['subset2_strict'] = pd.DataFrame(data={ - PARAMETER_ID: ['id3'], - PARAMETER_NAME: ['name3'] - }) - parameter_dfs['subset2_redundance'] = pd.DataFrame(data={ - PARAMETER_ID: ['id2', 'id3'], - PARAMETER_NAME: ['name2', 'name3'] - }) - parameter_dfs['subset2_contradiction'] = pd.DataFrame(data={ - PARAMETER_ID: ['id2', 'id3'], - PARAMETER_NAME: ['different_name2', 'name3'] - }) + parameter_dfs["complete"] = pd.DataFrame( + data={ + PARAMETER_ID: ["id1", "id2", "id3"], + PARAMETER_NAME: ["name1", "name2", "name3"], + } + ) + parameter_dfs["subset1"] = pd.DataFrame( + data={ + PARAMETER_ID: ["id1", "id2"], + PARAMETER_NAME: ["name1", "name2"], + } + ) + parameter_dfs["subset2_strict"] = pd.DataFrame( + data={PARAMETER_ID: ["id3"], PARAMETER_NAME: ["name3"]} + ) + parameter_dfs["subset2_redundance"] = pd.DataFrame( + data={ + PARAMETER_ID: ["id2", "id3"], + PARAMETER_NAME: ["name2", "name3"], + } + ) + parameter_dfs["subset2_contradiction"] = pd.DataFrame( + data={ + PARAMETER_ID: ["id2", "id3"], + PARAMETER_NAME: ["different_name2", "name3"], + } + ) for name, df in parameter_dfs.items(): with tempfile.NamedTemporaryFile( - mode='w', delete=False, dir=directory) as fh: + mode="w", delete=False, dir=directory + ) as fh: parameter_files[name] = fh.name - parameter_dfs[name].to_csv(fh, sep='\t', index=False) + parameter_dfs[name].to_csv(fh, sep="\t", index=False) # Check that subset files are correctly combined - assert petab.get_parameter_df(parameter_files['complete']).equals( - petab.get_parameter_df([parameter_files['subset1'], - parameter_files['subset2_strict']])) + assert petab.get_parameter_df(parameter_files["complete"]).equals( + petab.get_parameter_df( + [parameter_files["subset1"], parameter_files["subset2_strict"]] + ) + ) # Ensure an error is raised if there exist parameterId duplicates # with identical parameter definitions with pytest.raises(ValueError): petab.get_parameter_df( - [parameter_files["subset1"], - parameter_files["subset2_redundance"]] + [ + parameter_files["subset1"], + parameter_files["subset2_redundance"], + ] ) # with non-identical parameter definitions with pytest.raises(ValueError): petab.get_parameter_df( - [parameter_files["subset1"], - parameter_files["subset2_contradiction"], - ] + [ + parameter_files["subset1"], + parameter_files["subset2_contradiction"], + ] ) # Ensure that parameters that differ only by parameterId @@ -141,18 +163,21 @@ def test_get_parameter_df(): assert (df_template == df_test).all().all() # several parameter files assert petab.get_parameter_df(parameter_files["complete"]).equals( - petab.get_parameter_df([parameter_files["subset1"], - parameter_files["subset2"]]) + petab.get_parameter_df( + [parameter_files["subset1"], parameter_files["subset2"]] + ) ) def test_write_parameter_df(): """Test parameters.write_parameter_df.""" - parameter_df = pd.DataFrame(data={ - PARAMETER_ID: ['par1', 'par2'], - # Test utf8 characters - PARAMETER_NAME: ['ɑ', 'β'], - }).set_index(PARAMETER_ID) + parameter_df = pd.DataFrame( + data={ + PARAMETER_ID: ["par1", "par2"], + # Test utf8 characters + PARAMETER_NAME: ["ɑ", "β"], + } + ).set_index(PARAMETER_ID) with tempfile.TemporaryDirectory() as temp_dir: file_name = Path(temp_dir) / "parameters.tsv" @@ -163,14 +188,16 @@ def test_write_parameter_df(): def test_normalize_parameter_df(): """Check parameters.normalize_parameter_df.""" - parameter_df = pd.DataFrame({ - PARAMETER_ID: ['par0', 'par1', 'par2'], - PARAMETER_SCALE: [LOG10, LOG10, LIN], - NOMINAL_VALUE: [1e-2, 1e-3, 1e-4], - ESTIMATE: [1, 1, 0], - LOWER_BOUND: [1e-5, 1e-6, 1e-7], - UPPER_BOUND: [1e5, 1e6, 1e7] - }).set_index(PARAMETER_ID) + parameter_df = pd.DataFrame( + { + PARAMETER_ID: ["par0", "par1", "par2"], + PARAMETER_SCALE: [LOG10, LOG10, LIN], + NOMINAL_VALUE: [1e-2, 1e-3, 1e-4], + ESTIMATE: [1, 1, 0], + LOWER_BOUND: [1e-5, 1e-6, 1e-7], + UPPER_BOUND: [1e5, 1e6, 1e7], + } + ).set_index(PARAMETER_ID) actual = petab.normalize_parameter_df(parameter_df) @@ -186,27 +213,37 @@ def test_normalize_parameter_df(): # check if basic columns match for col in PARAMETER_DF_COLS[1:]: - if col in [INITIALIZATION_PRIOR_PARAMETERS, - OBJECTIVE_PRIOR_PARAMETERS]: + if col in [ + INITIALIZATION_PRIOR_PARAMETERS, + OBJECTIVE_PRIOR_PARAMETERS, + ]: continue - assert ((actual[col] == expected[col]) | - (actual[col].isnull() == expected[col].isnull())).all() + assert ( + (actual[col] == expected[col]) + | (actual[col].isnull() == expected[col].isnull()) + ).all() # check if prior parameters match for col in [INITIALIZATION_PRIOR_PARAMETERS, OBJECTIVE_PRIOR_PARAMETERS]: - for (_, actual_row), (_, expected_row) in \ - zip(actual.iterrows(), expected.iterrows()): - actual_pars = tuple([float(val) for val in - actual_row[col].split(';')]) - expected_pars = tuple([float(val) for val in - expected_row[col].split(';')]) + for (_, actual_row), (_, expected_row) in zip( + actual.iterrows(), expected.iterrows() + ): + actual_pars = tuple( + [float(val) for val in actual_row[col].split(";")] + ) + expected_pars = tuple( + [float(val) for val in expected_row[col].split(";")] + ) assert actual_pars == expected_pars # check is a projection actual2 = petab.normalize_parameter_df(actual) - assert ((actual == actual2) | (actual.isnull() == actual2.isnull())) \ - .all().all() + assert ( + ((actual == actual2) | (actual.isnull() == actual2.isnull())) + .all() + .all() + ) # check is valid petab petab.check_parameter_df(actual) @@ -225,14 +262,22 @@ def test_scale_unscale(): assert petab.unscale(par, LOG10) == 10**par # map scale - assert list(petab.map_scale([par]*3, [LIN, LOG, LOG10])) == \ - [par, np.log(par), np.log10(par)] + assert list(petab.map_scale([par] * 3, [LIN, LOG, LOG10])) == [ + par, + np.log(par), + np.log10(par), + ] # map unscale - assert list(petab.map_unscale([par]*3, [LIN, LOG, LOG10])) == \ - [par, np.exp(par), 10**par] + assert list(petab.map_unscale([par] * 3, [LIN, LOG, LOG10])) == [ + par, + np.exp(par), + 10**par, + ] # map broadcast - assert list(petab.map_scale([par, 2*par], LOG)) == \ - list(np.log([par, 2*par])) - assert list(petab.map_unscale([par, 2*par], LOG)) == \ - list(np.exp([par, 2*par])) + assert list(petab.map_scale([par, 2 * par], LOG)) == list( + np.log([par, 2 * par]) + ) + assert list(petab.map_unscale([par, 2 * par], LOG)) == list( + np.exp([par, 2 * par]) + ) diff --git a/tests/test_petab.py b/tests/test_petab.py index 8c4913ab..21480225 100644 --- a/tests/test_petab.py +++ b/tests/test_petab.py @@ -9,22 +9,27 @@ import libsbml import numpy as np import pandas as pd -import petab import pytest +from yaml import safe_load + +import petab from petab.C import * from petab.models.sbml_model import SbmlModel -from yaml import safe_load @pytest.fixture def condition_df_2_conditions(): - condition_df = pd.DataFrame(data={ - 'conditionId': ['condition1', 'condition2'], - 'conditionName': ['', 'Condition 2'], - 'fixedParameter1': [1.0, 2.0] - }) - condition_df.set_index('conditionId', inplace=True) - condition_df.fixedParameter1 = condition_df.fixedParameter1.astype("object") + condition_df = pd.DataFrame( + data={ + "conditionId": ["condition1", "condition2"], + "conditionName": ["", "Condition 2"], + "fixedParameter1": [1.0, 2.0], + } + ) + condition_df.set_index("conditionId", inplace=True) + condition_df.fixedParameter1 = condition_df.fixedParameter1.astype( + "object" + ) return condition_df @@ -33,33 +38,42 @@ def petab_problem(): """Test petab problem.""" # create test model import simplesbml + model = simplesbml.SbmlModel() - model.addParameter('fixedParameter1', 0.0) - model.addParameter('observable_1', 0.0) - - measurement_df = pd.DataFrame(data={ - OBSERVABLE_ID: ['obs1', 'obs2'], - OBSERVABLE_PARAMETERS: ['', 'p1;p2'], - NOISE_PARAMETERS: ['p3;p4', 'p5'] - }) - - condition_df = pd.DataFrame(data={ - CONDITION_ID: ['condition1', 'condition2'], - CONDITION_NAME: ['', 'Condition 2'], - 'fixedParameter1': [1.0, 2.0] - }).set_index(CONDITION_ID) - - parameter_df = pd.DataFrame(data={ - PARAMETER_ID: ['dynamicParameter1', 'dynamicParameter2'], - PARAMETER_NAME: ['', '...'], - }).set_index(PARAMETER_ID) - - observable_df = pd.DataFrame(data={ - OBSERVABLE_ID: ['observable_1'], - OBSERVABLE_NAME: ['julius'], - OBSERVABLE_FORMULA: ['observable_1'], - NOISE_FORMULA: [1], - }).set_index(OBSERVABLE_ID) + model.addParameter("fixedParameter1", 0.0) + model.addParameter("observable_1", 0.0) + + measurement_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs1", "obs2"], + OBSERVABLE_PARAMETERS: ["", "p1;p2"], + NOISE_PARAMETERS: ["p3;p4", "p5"], + } + ) + + condition_df = pd.DataFrame( + data={ + CONDITION_ID: ["condition1", "condition2"], + CONDITION_NAME: ["", "Condition 2"], + "fixedParameter1": [1.0, 2.0], + } + ).set_index(CONDITION_ID) + + parameter_df = pd.DataFrame( + data={ + PARAMETER_ID: ["dynamicParameter1", "dynamicParameter2"], + PARAMETER_NAME: ["", "..."], + } + ).set_index(PARAMETER_ID) + + observable_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["observable_1"], + OBSERVABLE_NAME: ["julius"], + OBSERVABLE_FORMULA: ["observable_1"], + NOISE_FORMULA: [1], + } + ).set_index(OBSERVABLE_ID) with tempfile.TemporaryDirectory() as temp_dir: sbml_file_name = Path(temp_dir, "model.xml") @@ -83,17 +97,18 @@ def petab_problem(): measurement_file=measurement_file_name, condition_file=condition_file_name, parameter_file=parameter_file_name, - observable_files=observable_file_name) + observable_files=observable_file_name, + ) @pytest.fixture def fujita_model_scaling(): - path = Path(__file__).parent.parent / 'doc' / 'example' / 'example_Fujita' + path = Path(__file__).parent.parent / "doc" / "example" / "example_Fujita" - sbml_file = path / 'Fujita_model.xml' - condition_file = path / 'Fujita_experimentalCondition.tsv' - measurement_file = path / 'Fujita_measurementData.tsv' - parameter_file = path / 'Fujita_parameters_scaling.tsv' + sbml_file = path / "Fujita_model.xml" + condition_file = path / "Fujita_experimentalCondition.tsv" + measurement_file = path / "Fujita_measurementData.tsv" + parameter_file = path / "Fujita_parameters_scaling.tsv" with pytest.deprecated_call(): return petab.Problem.from_files( @@ -105,31 +120,37 @@ def fujita_model_scaling(): def test_split_parameter_replacement_list(): - assert petab.split_parameter_replacement_list('') == [] - assert petab.split_parameter_replacement_list('param1') == ['param1'] - assert petab.split_parameter_replacement_list('param1;param2') \ - == ['param1', 'param2'] - assert petab.split_parameter_replacement_list('1.0') == [1.0] - assert petab.split_parameter_replacement_list('1.0;2.0') == [1.0, 2.0] - assert petab.split_parameter_replacement_list('param1;2.2') \ - == ['param1', 2.2] + assert petab.split_parameter_replacement_list("") == [] + assert petab.split_parameter_replacement_list("param1") == ["param1"] + assert petab.split_parameter_replacement_list("param1;param2") == [ + "param1", + "param2", + ] + assert petab.split_parameter_replacement_list("1.0") == [1.0] + assert petab.split_parameter_replacement_list("1.0;2.0") == [1.0, 2.0] + assert petab.split_parameter_replacement_list("param1;2.2") == [ + "param1", + 2.2, + ] assert petab.split_parameter_replacement_list(np.nan) == [] assert petab.split_parameter_replacement_list(1.5) == [1.5] assert petab.split_parameter_replacement_list(None) == [] with pytest.raises(ValueError): - assert petab.split_parameter_replacement_list('1.0;') + assert petab.split_parameter_replacement_list("1.0;") with pytest.raises(ValueError): - assert petab.split_parameter_replacement_list(';1.0') + assert petab.split_parameter_replacement_list(";1.0") def test_get_measurement_parameter_ids(): measurement_df = pd.DataFrame( data={ - OBSERVABLE_PARAMETERS: ['', 'p1;p2'], - NOISE_PARAMETERS: ['p3;p4', 'p5']}) - expected = ['p1', 'p2', 'p3', 'p4', 'p5'] + OBSERVABLE_PARAMETERS: ["", "p1;p2"], + NOISE_PARAMETERS: ["p3;p4", "p5"], + } + ) + expected = ["p1", "p2", "p3", "p4", "p5"] actual = petab.get_measurement_parameter_ids(measurement_df) # ordering is arbitrary assert set(actual) == set(expected) @@ -140,31 +161,38 @@ def test_serialization(petab_problem): problem_recreated = pickle.loads(pickle.dumps(petab_problem)) assert problem_recreated.measurement_df.equals( - petab_problem.measurement_df) + petab_problem.measurement_df + ) - assert problem_recreated.parameter_df.equals( - petab_problem.parameter_df) + assert problem_recreated.parameter_df.equals(petab_problem.parameter_df) - assert problem_recreated.condition_df.equals( - petab_problem.condition_df) + assert problem_recreated.condition_df.equals(petab_problem.condition_df) # Can't test for equality directly, testing for number of parameters # should do the job here - assert len(problem_recreated.sbml_model.getListOfParameters()) \ - == len(petab_problem.sbml_model.getListOfParameters()) + assert len(problem_recreated.sbml_model.getListOfParameters()) == len( + petab_problem.sbml_model.getListOfParameters() + ) def test_get_priors_from_df(): """Check petab.get_priors_from_df.""" - parameter_df = pd.DataFrame({ - PARAMETER_SCALE: [LOG10, LOG10, LOG10, LOG10, LOG10], - LOWER_BOUND: [1e-8, 1e-9, 1e-10, 1e-11, 1e-5], - UPPER_BOUND: [1e8, 1e9, 1e10, 1e11, 1e5], - ESTIMATE: [1, 1, 1, 1, 0], - INITIALIZATION_PRIOR_TYPE: ['', '', - UNIFORM, NORMAL, ''], - INITIALIZATION_PRIOR_PARAMETERS: ['', '-5;5', '1e-5;1e5', '0;1', ''] - }) + parameter_df = pd.DataFrame( + { + PARAMETER_SCALE: [LOG10, LOG10, LOG10, LOG10, LOG10], + LOWER_BOUND: [1e-8, 1e-9, 1e-10, 1e-11, 1e-5], + UPPER_BOUND: [1e8, 1e9, 1e10, 1e11, 1e5], + ESTIMATE: [1, 1, 1, 1, 0], + INITIALIZATION_PRIOR_TYPE: ["", "", UNIFORM, NORMAL, ""], + INITIALIZATION_PRIOR_PARAMETERS: [ + "", + "-5;5", + "1e-5;1e5", + "0;1", + "", + ], + } + ) prior_list = petab.get_priors_from_df(parameter_df, mode=INITIALIZATION) @@ -173,8 +201,12 @@ def test_get_priors_from_df(): # correct types types = [entry[0] for entry in prior_list] - assert types == [PARAMETER_SCALE_UNIFORM, PARAMETER_SCALE_UNIFORM, - UNIFORM, NORMAL] + assert types == [ + PARAMETER_SCALE_UNIFORM, + PARAMETER_SCALE_UNIFORM, + UNIFORM, + NORMAL, + ] # correct scales scales = [entry[2] for entry in prior_list] @@ -182,8 +214,10 @@ def test_get_priors_from_df(): # correct bounds bounds = [entry[3] for entry in prior_list] - assert bounds == list(zip(parameter_df[LOWER_BOUND], - parameter_df[UPPER_BOUND]))[:4] + assert ( + bounds + == list(zip(parameter_df[LOWER_BOUND], parameter_df[UPPER_BOUND]))[:4] + ) # give correct value for empty prior_pars = [entry[1] for entry in prior_list] @@ -205,39 +239,46 @@ def test_startpoint_sampling(fujita_model_scaling): def test_startpoint_sampling_dict(fujita_model_scaling): n_starts = 10 startpoints = fujita_model_scaling.sample_parameter_startpoints_dict( - n_starts) + n_starts + ) assert len(startpoints) == n_starts for startpoint in startpoints: assert set(startpoint.keys()) == set(fujita_model_scaling.x_free_ids) def test_create_parameter_df( - condition_df_2_conditions): # pylint: disable=W0621 + condition_df_2_conditions, +): # pylint: disable=W0621 """Test petab.create_parameter_df.""" import simplesbml + ss_model = simplesbml.SbmlModel() - ss_model.addSpecies('[x1]', 1.0) - ss_model.addParameter('fixedParameter1', 2.0) - ss_model.addParameter('p0', 3.0) + ss_model.addSpecies("[x1]", 1.0) + ss_model.addParameter("fixedParameter1", 2.0) + ss_model.addParameter("p0", 3.0) model = SbmlModel(sbml_model=ss_model.model) - observable_df = pd.DataFrame(data={ - OBSERVABLE_ID: ['obs1', 'obs2'], - OBSERVABLE_FORMULA: ['x1', '2*x1'] - }).set_index(OBSERVABLE_ID) + observable_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs1", "obs2"], + OBSERVABLE_FORMULA: ["x1", "2*x1"], + } + ).set_index(OBSERVABLE_ID) # Add assignment rule target which should be ignored - ss_model.addParameter('assignment_target', 0.0) - ss_model.addAssignmentRule('assignment_target', "1.0") + ss_model.addParameter("assignment_target", 0.0) + ss_model.addAssignmentRule("assignment_target", "1.0") - measurement_df = pd.DataFrame(data={ - OBSERVABLE_ID: ['obs1', 'obs2'], - OBSERVABLE_PARAMETERS: ['', 'p1;p2'], - NOISE_PARAMETERS: ['p3;p4', 'p5'] - }) + measurement_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs1", "obs2"], + OBSERVABLE_PARAMETERS: ["", "p1;p2"], + NOISE_PARAMETERS: ["p3;p4", "p5"], + } + ) # first model parameters, then row by row noise and sigma overrides - expected = ['p3', 'p4', 'p1', 'p2', 'p5'] + expected = ["p3", "p4", "p1", "p2", "p5"] # Test old API with passing libsbml.Model directly with warnings.catch_warnings(record=True) as w: @@ -246,7 +287,8 @@ def test_create_parameter_df( ss_model.model, condition_df_2_conditions, observable_df, - measurement_df) + measurement_df, + ) assert len(w) == 1 assert issubclass(w[-1].category, DeprecationWarning) assert parameter_df.index.values.tolist() == expected @@ -255,14 +297,15 @@ def test_create_parameter_df( model=model, condition_df=condition_df_2_conditions, observable_df=observable_df, - measurement_df=measurement_df + measurement_df=measurement_df, ) assert parameter_df.index.values.tolist() == expected # test with condition parameter override: - condition_df_2_conditions.loc['condition2', 'fixedParameter1'] \ - = 'overrider' - expected = ['p3', 'p4', 'p1', 'p2', 'p5', 'overrider'] + condition_df_2_conditions.loc[ + "condition2", "fixedParameter1" + ] = "overrider" + expected = ["p3", "p4", "p1", "p2", "p5", "overrider"] parameter_df = petab.create_parameter_df( model=model, @@ -274,110 +317,146 @@ def test_create_parameter_df( assert actual == expected # test with optional parameters - expected = ['p0', 'p3', 'p4', 'p1', 'p2', 'p5', 'overrider'] + expected = ["p0", "p3", "p4", "p1", "p2", "p5", "overrider"] parameter_df = petab.create_parameter_df( model=model, condition_df=condition_df_2_conditions, observable_df=observable_df, measurement_df=measurement_df, - include_optional=True) + include_optional=True, + ) actual = parameter_df.index.values.tolist() assert actual == expected - assert parameter_df.loc['p0', NOMINAL_VALUE] == 3.0 + assert parameter_df.loc["p0", NOMINAL_VALUE] == 3.0 def test_flatten_timepoint_specific_output_overrides(): """Test flatten_timepoint_specific_output_overrides""" - observable_df = pd.DataFrame(data={ - OBSERVABLE_ID: ['obs1'], - OBSERVABLE_FORMULA: [ - 'observableParameter1_obs1 + observableParameter2_obs1'], - NOISE_FORMULA: ['noiseParameter1_obs1'] - }) + observable_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs1"], + OBSERVABLE_FORMULA: [ + "observableParameter1_obs1 + observableParameter2_obs1" + ], + NOISE_FORMULA: ["noiseParameter1_obs1"], + } + ) observable_df.set_index(OBSERVABLE_ID, inplace=True) - observable_df_expected = pd.DataFrame(data={ - OBSERVABLE_ID: [ - 'obs1__obsParOverride1_1_0__noiseParOverride1__condition1', - 'obs1__obsParOverride2_1_0__noiseParOverride1__condition1', - 'obs1__obsParOverride2_1_0__noiseParOverride2__condition1', - ], - OBSERVABLE_FORMULA: [ - 'observableParameter1_obs1__obsParOverride1_1_0__' - 'noiseParOverride1__condition1 + observableParameter2_obs1' - '__obsParOverride1_1_0__noiseParOverride1__condition1', - 'observableParameter1_obs1__obsParOverride2_1_0__noiseParOverride1' - '__condition1 + observableParameter2_obs1__obsParOverride2_1_0' - '__noiseParOverride1__condition1', - 'observableParameter1_obs1__obsParOverride2_1_0' - '__noiseParOverride2__condition1 + observableParameter2_obs1__' - 'obsParOverride2_1_0__noiseParOverride2__condition1'], - NOISE_FORMULA: ['noiseParameter1_obs1__obsParOverride1_1_0__' - 'noiseParOverride1__condition1', - 'noiseParameter1_obs1__obsParOverride2_1_0__' - 'noiseParOverride1__condition1', - 'noiseParameter1_obs1__obsParOverride2_1_0__' - 'noiseParOverride2__condition1'] - }) + observable_df_expected = pd.DataFrame( + data={ + OBSERVABLE_ID: [ + "obs1__obsParOverride1_1_0__noiseParOverride1__condition1", + "obs1__obsParOverride2_1_0__noiseParOverride1__condition1", + "obs1__obsParOverride2_1_0__noiseParOverride2__condition1", + ], + OBSERVABLE_FORMULA: [ + "observableParameter1_obs1__obsParOverride1_1_0__" + "noiseParOverride1__condition1 + observableParameter2_obs1" + "__obsParOverride1_1_0__noiseParOverride1__condition1", + "observableParameter1_obs1__obsParOverride2_1_0__noiseParOverride1" + "__condition1 + observableParameter2_obs1__obsParOverride2_1_0" + "__noiseParOverride1__condition1", + "observableParameter1_obs1__obsParOverride2_1_0" + "__noiseParOverride2__condition1 + observableParameter2_obs1__" + "obsParOverride2_1_0__noiseParOverride2__condition1", + ], + NOISE_FORMULA: [ + "noiseParameter1_obs1__obsParOverride1_1_0__" + "noiseParOverride1__condition1", + "noiseParameter1_obs1__obsParOverride2_1_0__" + "noiseParOverride1__condition1", + "noiseParameter1_obs1__obsParOverride2_1_0__" + "noiseParOverride2__condition1", + ], + } + ) observable_df_expected.set_index(OBSERVABLE_ID, inplace=True) # Measurement table with timepoint-specific overrides - measurement_df = pd.DataFrame(data={ - OBSERVABLE_ID: - ['obs1', 'obs1', 'obs1', 'obs1'], - SIMULATION_CONDITION_ID: - ['condition1', 'condition1', 'condition1', 'condition1'], - PREEQUILIBRATION_CONDITION_ID: - ['', '', '', ''], - TIME: - [1.0, 1.0, 2.0, 2.0], - MEASUREMENT: - [.1] * 4, - OBSERVABLE_PARAMETERS: - ['obsParOverride1;1.0', 'obsParOverride2;1.0', - 'obsParOverride2;1.0', 'obsParOverride2;1.0'], - NOISE_PARAMETERS: - ['noiseParOverride1', 'noiseParOverride1', - 'noiseParOverride2', 'noiseParOverride2'] - }) - - measurement_df_expected = pd.DataFrame(data={ - OBSERVABLE_ID: - ['obs1__obsParOverride1_1_0__noiseParOverride1__condition1', - 'obs1__obsParOverride2_1_0__noiseParOverride1__condition1', - 'obs1__obsParOverride2_1_0__noiseParOverride2__condition1', - 'obs1__obsParOverride2_1_0__noiseParOverride2__condition1'], - SIMULATION_CONDITION_ID: - ['condition1', 'condition1', 'condition1', 'condition1'], - PREEQUILIBRATION_CONDITION_ID: - ['', '', '', ''], - TIME: - [1.0, 1.0, 2.0, 2.0], - MEASUREMENT: - [.1] * 4, - OBSERVABLE_PARAMETERS: - ['obsParOverride1;1.0', 'obsParOverride2;1.0', - 'obsParOverride2;1.0', 'obsParOverride2;1.0'], - NOISE_PARAMETERS: - ['noiseParOverride1', 'noiseParOverride1', - 'noiseParOverride2', 'noiseParOverride2'] - }) - - problem = petab.Problem(measurement_df=measurement_df, - observable_df=observable_df) + measurement_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs1", "obs1", "obs1", "obs1"], + SIMULATION_CONDITION_ID: [ + "condition1", + "condition1", + "condition1", + "condition1", + ], + PREEQUILIBRATION_CONDITION_ID: ["", "", "", ""], + TIME: [1.0, 1.0, 2.0, 2.0], + MEASUREMENT: [0.1] * 4, + OBSERVABLE_PARAMETERS: [ + "obsParOverride1;1.0", + "obsParOverride2;1.0", + "obsParOverride2;1.0", + "obsParOverride2;1.0", + ], + NOISE_PARAMETERS: [ + "noiseParOverride1", + "noiseParOverride1", + "noiseParOverride2", + "noiseParOverride2", + ], + } + ) + + measurement_df_expected = pd.DataFrame( + data={ + OBSERVABLE_ID: [ + "obs1__obsParOverride1_1_0__noiseParOverride1__condition1", + "obs1__obsParOverride2_1_0__noiseParOverride1__condition1", + "obs1__obsParOverride2_1_0__noiseParOverride2__condition1", + "obs1__obsParOverride2_1_0__noiseParOverride2__condition1", + ], + SIMULATION_CONDITION_ID: [ + "condition1", + "condition1", + "condition1", + "condition1", + ], + PREEQUILIBRATION_CONDITION_ID: ["", "", "", ""], + TIME: [1.0, 1.0, 2.0, 2.0], + MEASUREMENT: [0.1] * 4, + OBSERVABLE_PARAMETERS: [ + "obsParOverride1;1.0", + "obsParOverride2;1.0", + "obsParOverride2;1.0", + "obsParOverride2;1.0", + ], + NOISE_PARAMETERS: [ + "noiseParOverride1", + "noiseParOverride1", + "noiseParOverride2", + "noiseParOverride2", + ], + } + ) + + problem = petab.Problem( + measurement_df=measurement_df, observable_df=observable_df + ) assert petab.lint_problem(problem) is False # Ensure having timepoint-specific overrides - assert petab.lint.measurement_table_has_timepoint_specific_mappings( - measurement_df) is True + assert ( + petab.lint.measurement_table_has_timepoint_specific_mappings( + measurement_df + ) + is True + ) petab.flatten_timepoint_specific_output_overrides(problem) # Timepoint-specific overrides should be gone now - assert petab.lint.measurement_table_has_timepoint_specific_mappings( - problem.measurement_df) is False + assert ( + petab.lint.measurement_table_has_timepoint_specific_mappings( + problem.measurement_df + ) + is False + ) assert problem.observable_df.equals(observable_df_expected) is True assert problem.measurement_df.equals(measurement_df_expected) is True @@ -395,7 +474,7 @@ def test_flatten_timepoint_specific_output_overrides(): petab_problem=unflattened_problem, ) # The unflattened simulation dataframe has the original observable IDs. - assert (unflattened_simulation_df[OBSERVABLE_ID] == 'obs1').all() + assert (unflattened_simulation_df[OBSERVABLE_ID] == "obs1").all() def test_flatten_timepoint_specific_output_overrides_special_cases(): @@ -404,70 +483,99 @@ def test_flatten_timepoint_specific_output_overrides_special_cases(): * no preequilibration * no observable parameters """ - observable_df = pd.DataFrame(data={ - OBSERVABLE_ID: ['obs1'], - OBSERVABLE_FORMULA: ['species1'], - NOISE_FORMULA: ['noiseParameter1_obs1'] - }) + observable_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs1"], + OBSERVABLE_FORMULA: ["species1"], + NOISE_FORMULA: ["noiseParameter1_obs1"], + } + ) observable_df.set_index(OBSERVABLE_ID, inplace=True) - observable_df_expected = pd.DataFrame(data={ - OBSERVABLE_ID: ['obs1__noiseParOverride1__condition1', - 'obs1__noiseParOverride2__condition1'], - OBSERVABLE_FORMULA: [ - 'species1', - 'species1'], - NOISE_FORMULA: ['noiseParameter1_obs1__noiseParOverride1__condition1', - 'noiseParameter1_obs1__noiseParOverride2__condition1'] - }) + observable_df_expected = pd.DataFrame( + data={ + OBSERVABLE_ID: [ + "obs1__noiseParOverride1__condition1", + "obs1__noiseParOverride2__condition1", + ], + OBSERVABLE_FORMULA: ["species1", "species1"], + NOISE_FORMULA: [ + "noiseParameter1_obs1__noiseParOverride1__condition1", + "noiseParameter1_obs1__noiseParOverride2__condition1", + ], + } + ) observable_df_expected.set_index(OBSERVABLE_ID, inplace=True) # Measurement table with timepoint-specific overrides - measurement_df = pd.DataFrame(data={ - OBSERVABLE_ID: - ['obs1', 'obs1', 'obs1', 'obs1'], - SIMULATION_CONDITION_ID: - ['condition1', 'condition1', 'condition1', 'condition1'], - TIME: - [1.0, 1.0, 2.0, 2.0], - MEASUREMENT: - [.1] * 4, - NOISE_PARAMETERS: - ['noiseParOverride1', 'noiseParOverride1', - 'noiseParOverride2', 'noiseParOverride2'], - }) - - measurement_df_expected = pd.DataFrame(data={ - OBSERVABLE_ID: - ['obs1__noiseParOverride1__condition1', - 'obs1__noiseParOverride1__condition1', - 'obs1__noiseParOverride2__condition1', - 'obs1__noiseParOverride2__condition1'], - SIMULATION_CONDITION_ID: - ['condition1', 'condition1', 'condition1', 'condition1'], - TIME: - [1.0, 1.0, 2.0, 2.0], - MEASUREMENT: - [.1] * 4, - NOISE_PARAMETERS: - ['noiseParOverride1', 'noiseParOverride1', - 'noiseParOverride2', 'noiseParOverride2'], - }) - - problem = petab.Problem(measurement_df=measurement_df, - observable_df=observable_df) + measurement_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs1", "obs1", "obs1", "obs1"], + SIMULATION_CONDITION_ID: [ + "condition1", + "condition1", + "condition1", + "condition1", + ], + TIME: [1.0, 1.0, 2.0, 2.0], + MEASUREMENT: [0.1] * 4, + NOISE_PARAMETERS: [ + "noiseParOverride1", + "noiseParOverride1", + "noiseParOverride2", + "noiseParOverride2", + ], + } + ) + + measurement_df_expected = pd.DataFrame( + data={ + OBSERVABLE_ID: [ + "obs1__noiseParOverride1__condition1", + "obs1__noiseParOverride1__condition1", + "obs1__noiseParOverride2__condition1", + "obs1__noiseParOverride2__condition1", + ], + SIMULATION_CONDITION_ID: [ + "condition1", + "condition1", + "condition1", + "condition1", + ], + TIME: [1.0, 1.0, 2.0, 2.0], + MEASUREMENT: [0.1] * 4, + NOISE_PARAMETERS: [ + "noiseParOverride1", + "noiseParOverride1", + "noiseParOverride2", + "noiseParOverride2", + ], + } + ) + + problem = petab.Problem( + measurement_df=measurement_df, observable_df=observable_df + ) assert petab.lint_problem(problem) is False # Ensure having timepoint-specific overrides - assert petab.lint.measurement_table_has_timepoint_specific_mappings( - measurement_df) is True + assert ( + petab.lint.measurement_table_has_timepoint_specific_mappings( + measurement_df + ) + is True + ) petab.flatten_timepoint_specific_output_overrides(problem) # Timepoint-specific overrides should be gone now - assert petab.lint.measurement_table_has_timepoint_specific_mappings( - problem.measurement_df) is False + assert ( + petab.lint.measurement_table_has_timepoint_specific_mappings( + problem.measurement_df + ) + is False + ) assert problem.observable_df.equals(observable_df_expected) is True assert problem.measurement_df.equals(measurement_df_expected) is True @@ -483,42 +591,47 @@ def test_concat_measurements(): filename_a = Path(temp_dir) / "measurements.tsv" petab.write_measurement_df(a, filename_a) - expected = pd.DataFrame({ - MEASUREMENT: [1.0, nan], - TIME: [nan, 1.0] - }) + expected = pd.DataFrame({MEASUREMENT: [1.0, nan], TIME: [nan, 1.0]}) assert expected.equals( - petab.concat_tables([a, b], - petab.measurements.get_measurement_df)) + petab.concat_tables([a, b], petab.measurements.get_measurement_df) + ) assert expected.equals( - petab.concat_tables([filename_a, b], - petab.measurements.get_measurement_df)) + petab.concat_tables( + [filename_a, b], petab.measurements.get_measurement_df + ) + ) def test_concat_condition_df(): - df1 = pd.DataFrame(data={ - CONDITION_ID: ['condition1', 'condition2'], - 'par1': [1.1, 1.2], - 'par2': [2.1, 2.2], - 'par3': [3.1, 3.2] - }).set_index(CONDITION_ID) + df1 = pd.DataFrame( + data={ + CONDITION_ID: ["condition1", "condition2"], + "par1": [1.1, 1.2], + "par2": [2.1, 2.2], + "par3": [3.1, 3.2], + } + ).set_index(CONDITION_ID) assert df1.equals(petab.concat_tables(df1, petab.get_condition_df)) - df2 = pd.DataFrame(data={ - CONDITION_ID: ['condition3'], - 'par1': [1.3], - 'par2': [2.3], - }).set_index(CONDITION_ID) - - df_expected = pd.DataFrame(data={ - CONDITION_ID: ['condition1', 'condition2', 'condition3'], - 'par1': [1.1, 1.2, 1.3], - 'par2': [2.1, 2.2, 2.3], - 'par3': [3.1, 3.2, np.nan], - }).set_index(CONDITION_ID) + df2 = pd.DataFrame( + data={ + CONDITION_ID: ["condition3"], + "par1": [1.3], + "par2": [2.3], + } + ).set_index(CONDITION_ID) + + df_expected = pd.DataFrame( + data={ + CONDITION_ID: ["condition1", "condition2", "condition3"], + "par1": [1.1, 1.2, 1.3], + "par2": [2.1, 2.2, 2.3], + "par3": [3.1, 3.2, np.nan], + } + ).set_index(CONDITION_ID) assert df_expected.equals( petab.concat_tables((df1, df2), petab.get_condition_df) ) @@ -526,7 +639,7 @@ def test_concat_condition_df(): def test_get_observable_ids(petab_problem): # pylint: disable=W0621 """Test if observable ids functions returns correct value.""" - assert set(petab_problem.get_observable_ids()) == {'observable_1'} + assert set(petab_problem.get_observable_ids()) == {"observable_1"} def test_parameter_properties(petab_problem): # pylint: disable=W0621 @@ -534,17 +647,19 @@ def test_parameter_properties(petab_problem): # pylint: disable=W0621 Test the petab.Problem functions to get parameter values. """ petab_problem = copy.deepcopy(petab_problem) - petab_problem.parameter_df = pd.DataFrame(data={ - PARAMETER_ID: ['par1', 'par2', 'par3'], - LOWER_BOUND: [0, 0.1, 0.1], - UPPER_BOUND: [100, 100, 200], - PARAMETER_SCALE: ['lin', 'log', 'log10'], - NOMINAL_VALUE: [7, 8, 9], - ESTIMATE: [1, 1, 0], - }).set_index(PARAMETER_ID) - assert petab_problem.x_ids == ['par1', 'par2', 'par3'] - assert petab_problem.x_free_ids == ['par1', 'par2'] - assert petab_problem.x_fixed_ids == ['par3'] + petab_problem.parameter_df = pd.DataFrame( + data={ + PARAMETER_ID: ["par1", "par2", "par3"], + LOWER_BOUND: [0, 0.1, 0.1], + UPPER_BOUND: [100, 100, 200], + PARAMETER_SCALE: ["lin", "log", "log10"], + NOMINAL_VALUE: [7, 8, 9], + ESTIMATE: [1, 1, 0], + } + ).set_index(PARAMETER_ID) + assert petab_problem.x_ids == ["par1", "par2", "par3"] + assert petab_problem.x_free_ids == ["par1", "par2"] + assert petab_problem.x_fixed_ids == ["par3"] assert petab_problem.lb == [0, 0.1, 0.1] assert petab_problem.lb_scaled == [0, np.log(0.1), np.log10(0.1)] assert petab_problem.get_lb(fixed=False, scaled=True) == [0, np.log(0.1)] @@ -599,20 +714,27 @@ def test_to_files(petab_problem): # pylint: disable=W0621 # exemplarily load some parameter_df = petab.get_parameter_df(parameter_file) same_nans = parameter_df.isna() == petab_problem.parameter_df.isna() - assert ((parameter_df == petab_problem.parameter_df) | same_nans) \ - .all().all() + assert ( + ((parameter_df == petab_problem.parameter_df) | same_nans) + .all() + .all() + ) def test_load_remote(): """Test loading remote files""" - yaml_url = "https://raw.githubusercontent.com/PEtab-dev/petab_test_suite" \ - "/main/petabtests/cases/v1.0.0/sbml/0001/_0001.yaml" + yaml_url = ( + "https://raw.githubusercontent.com/PEtab-dev/petab_test_suite" + "/main/petabtests/cases/v1.0.0/sbml/0001/_0001.yaml" + ) petab_problem = petab.Problem.from_yaml(yaml_url) assert petab_problem.sbml_model is not None - assert petab_problem.measurement_df is not None \ - and not petab_problem.measurement_df.empty + assert ( + petab_problem.measurement_df is not None + and not petab_problem.measurement_df.empty + ) def test_problem_from_yaml_v1_empty(): @@ -645,33 +767,42 @@ def test_problem_from_yaml_v1_multiple_files(): with tempfile.TemporaryDirectory() as tmpdir: yaml_path = Path(tmpdir, "problem.yaml") - with open(yaml_path, 'w') as f: + with open(yaml_path, "w") as f: f.write(yaml_config) for i in (1, 2): - condition_df = pd.DataFrame({ - CONDITION_ID: [f"condition{i}"], - }) + condition_df = pd.DataFrame( + { + CONDITION_ID: [f"condition{i}"], + } + ) condition_df.set_index([CONDITION_ID], inplace=True) - petab.write_condition_df(condition_df, - Path(tmpdir, f"conditions{i}.tsv")) - - measurement_df = pd.DataFrame({ - SIMULATION_CONDITION_ID: [f"condition{i}"], - OBSERVABLE_ID: [f"observable{i}"], - TIME: [i], - MEASUREMENT: [1] - }) - petab.write_measurement_df(measurement_df, - Path(tmpdir, f"measurements{i}.tsv")) - - observables_df = pd.DataFrame({ - OBSERVABLE_ID: [f"observable{i}"], - OBSERVABLE_FORMULA: [1], - NOISE_FORMULA: [1], - }) - petab.write_observable_df(observables_df, - Path(tmpdir, f"observables{i}.tsv")) + petab.write_condition_df( + condition_df, Path(tmpdir, f"conditions{i}.tsv") + ) + + measurement_df = pd.DataFrame( + { + SIMULATION_CONDITION_ID: [f"condition{i}"], + OBSERVABLE_ID: [f"observable{i}"], + TIME: [i], + MEASUREMENT: [1], + } + ) + petab.write_measurement_df( + measurement_df, Path(tmpdir, f"measurements{i}.tsv") + ) + + observables_df = pd.DataFrame( + { + OBSERVABLE_ID: [f"observable{i}"], + OBSERVABLE_FORMULA: [1], + NOISE_FORMULA: [1], + } + ) + petab.write_observable_df( + observables_df, Path(tmpdir, f"observables{i}.tsv") + ) petab_problem = petab.Problem.from_yaml(yaml_path) diff --git a/tests/test_sbml.py b/tests/test_sbml.py index 69194d6e..dfa92dad 100644 --- a/tests/test_sbml.py +++ b/tests/test_sbml.py @@ -22,52 +22,70 @@ def create_test_data(): ss_model.addAssignmentRule("species_2", "25") - condition_df = pd.DataFrame({ - petab.CONDITION_ID: ["condition_1"], - "parameter_3": ['parameter_2'], - "species_1": [15], - "species_2": [25], - "species_3": ['parameter_1'], - "species_4": ['not_a_model_parameter'], - "compartment_1": [2], - }) + condition_df = pd.DataFrame( + { + petab.CONDITION_ID: ["condition_1"], + "parameter_3": ["parameter_2"], + "species_1": [15], + "species_2": [25], + "species_3": ["parameter_1"], + "species_4": ["not_a_model_parameter"], + "compartment_1": [2], + } + ) condition_df.set_index([petab.CONDITION_ID], inplace=True) - observable_df = pd.DataFrame({ - petab.OBSERVABLE_ID: ["observable_1"], - petab.OBSERVABLE_FORMULA: ["2 * species_1"], - }) + observable_df = pd.DataFrame( + { + petab.OBSERVABLE_ID: ["observable_1"], + petab.OBSERVABLE_FORMULA: ["2 * species_1"], + } + ) observable_df.set_index([petab.OBSERVABLE_ID], inplace=True) - measurement_df = pd.DataFrame({ - petab.OBSERVABLE_ID: ["observable_1"], - petab.SIMULATION_CONDITION_ID: ["condition_1"], - petab.TIME: [0.0], - }) - - parameter_df = pd.DataFrame({ - petab.PARAMETER_ID: - ["parameter_1", "parameter_2", "not_a_model_parameter"], - petab.PARAMETER_SCALE: [petab.LOG10] * 3, - petab.NOMINAL_VALUE: [1.25, 2.25, 3.25], - petab.ESTIMATE: [0, 1, 0], - }) + measurement_df = pd.DataFrame( + { + petab.OBSERVABLE_ID: ["observable_1"], + petab.SIMULATION_CONDITION_ID: ["condition_1"], + petab.TIME: [0.0], + } + ) + + parameter_df = pd.DataFrame( + { + petab.PARAMETER_ID: [ + "parameter_1", + "parameter_2", + "not_a_model_parameter", + ], + petab.PARAMETER_SCALE: [petab.LOG10] * 3, + petab.NOMINAL_VALUE: [1.25, 2.25, 3.25], + petab.ESTIMATE: [0, 1, 0], + } + ) parameter_df.set_index([petab.PARAMETER_ID], inplace=True) return ss_model, condition_df, observable_df, measurement_df, parameter_df def check_model(condition_model): - assert condition_model.getSpecies( - "species_1").getInitialConcentration() == 15 - assert condition_model.getSpecies( - "species_2").getInitialConcentration() == 25 - assert condition_model.getSpecies( - "species_3").getInitialConcentration() == 1.25 - assert condition_model.getSpecies( - "species_4").getInitialConcentration() == 3.25 - assert len(condition_model.getListOfInitialAssignments()) == 0, \ - "InitialAssignment not removed" + assert ( + condition_model.getSpecies("species_1").getInitialConcentration() == 15 + ) + assert ( + condition_model.getSpecies("species_2").getInitialConcentration() == 25 + ) + assert ( + condition_model.getSpecies("species_3").getInitialConcentration() + == 1.25 + ) + assert ( + condition_model.getSpecies("species_4").getInitialConcentration() + == 3.25 + ) + assert ( + len(condition_model.getListOfInitialAssignments()) == 0 + ), "InitialAssignment not removed" assert condition_model.getCompartment("compartment_1").getSize() == 2.0 assert condition_model.getParameter("parameter_1").getValue() == 1.25 assert condition_model.getParameter("parameter_2").getValue() == 2.25 @@ -77,24 +95,30 @@ def check_model(condition_model): def test_get_condition_specific_models(): """Test for petab.sbml.get_condition_specific_models""" # retrieve test data - ss_model, condition_df, observable_df, measurement_df, parameter_df = \ - create_test_data() + ( + ss_model, + condition_df, + observable_df, + measurement_df, + parameter_df, + ) = create_test_data() petab_problem = petab.Problem( model=petab.models.sbml_model.SbmlModel(ss_model.model), condition_df=condition_df, observable_df=observable_df, measurement_df=measurement_df, - parameter_df=parameter_df + parameter_df=parameter_df, ) # create SBML model for condition with parameters updated from problem with pytest.warns( - UserWarning, - match="An SBML rule was removed to set the " - "component species_2 to a constant value." + UserWarning, + match="An SBML rule was removed to set the " + "component species_2 to a constant value.", ): _, condition_model = petab.get_model_for_condition( - petab_problem, "condition_1") + petab_problem, "condition_1" + ) check_model(condition_model) diff --git a/tests/test_simplify.py b/tests/test_simplify.py index e3c15cf4..3d9a8909 100644 --- a/tests/test_simplify.py +++ b/tests/test_simplify.py @@ -29,9 +29,11 @@ def problem() -> Problem: conditions_df = pd.DataFrame( { - CONDITION_ID: ["condition_used_1", - "condition_unused", - "condition_used_2"], + CONDITION_ID: [ + "condition_used_1", + "condition_unused", + "condition_used_2", + ], "some_parameter": [1.0, 2.0, 3.0], "same_value_for_all_conditions": [4.0] * 3, } @@ -39,15 +41,17 @@ def problem() -> Problem: conditions_df.set_index(CONDITION_ID, inplace=True) measurement_df = pd.DataFrame( - { - OBSERVABLE_ID: ["obs_used", "obs_used_2", "obs_used"], - MEASUREMENT: [1.0, 1.5, 2.0], - SIMULATION_CONDITION_ID: ["condition_used_1", - "condition_used_1", - "condition_used_2"], - TIME: [1.0] * 3, - } - ) + { + OBSERVABLE_ID: ["obs_used", "obs_used_2", "obs_used"], + MEASUREMENT: [1.0, 1.5, 2.0], + SIMULATION_CONDITION_ID: [ + "condition_used_1", + "condition_used_1", + "condition_used_2", + ], + TIME: [1.0] * 3, + } + ) yield Problem( model=SbmlModel(sbml_model=ss_model.getModel()), condition_df=conditions_df, @@ -58,25 +62,26 @@ def problem() -> Problem: def test_remove_nan_measurements(problem): expected = pd.DataFrame( - { - OBSERVABLE_ID: ["obs_used"] * 2, - MEASUREMENT: [1.0, 2.0], - SIMULATION_CONDITION_ID: - ["condition_used_1", "condition_used_2"], - TIME: [1.0] * 2, - } - ) + { + OBSERVABLE_ID: ["obs_used"] * 2, + MEASUREMENT: [1.0, 2.0], + SIMULATION_CONDITION_ID: ["condition_used_1", "condition_used_2"], + TIME: [1.0] * 2, + } + ) problem.measurement_df = pd.DataFrame( - { - OBSERVABLE_ID: ["obs_used", "obs_with_nan", "obs_used"], - MEASUREMENT: [1.0, nan, 2.0], - SIMULATION_CONDITION_ID: ["condition_used_1", - "condition_used_1", - "condition_used_2"], - TIME: [1.0] * 3, - } - ) + { + OBSERVABLE_ID: ["obs_used", "obs_with_nan", "obs_used"], + MEASUREMENT: [1.0, nan, 2.0], + SIMULATION_CONDITION_ID: [ + "condition_used_1", + "condition_used_1", + "condition_used_2", + ], + TIME: [1.0] * 3, + } + ) assert not problem.measurement_df.equals(expected) remove_nan_measurements(problem) @@ -86,12 +91,12 @@ def test_remove_nan_measurements(problem): def test_remove_unused_observables(problem): expected = pd.DataFrame( - { - OBSERVABLE_ID: ["obs_used", "obs_used_2"], - OBSERVABLE_FORMULA: [1.0, 3.0], - NOISE_FORMULA: [1.0, 3.0], - } - ) + { + OBSERVABLE_ID: ["obs_used", "obs_used_2"], + OBSERVABLE_FORMULA: [1.0, 3.0], + NOISE_FORMULA: [1.0, 3.0], + } + ) expected.set_index(OBSERVABLE_ID, inplace=True) assert not problem.observable_df.equals(expected) @@ -102,13 +107,12 @@ def test_remove_unused_observables(problem): def test_remove_unused_conditions(problem): expected = pd.DataFrame( - { - CONDITION_ID: ["condition_used_1", - "condition_used_2"], - "some_parameter": [1.0, 3.0], - "same_value_for_all_conditions": [4.0] * 2, - } - ) + { + CONDITION_ID: ["condition_used_1", "condition_used_2"], + "some_parameter": [1.0, 3.0], + "same_value_for_all_conditions": [4.0] * 2, + } + ) expected.set_index(CONDITION_ID, inplace=True) assert not problem.condition_df.equals(expected) @@ -120,23 +124,27 @@ def test_remove_unused_conditions(problem): def test_condition_parameters_to_parameter_table(problem): expected_conditions = pd.DataFrame( { - CONDITION_ID: ["condition_used_1", - "condition_unused", - "condition_used_2"], + CONDITION_ID: [ + "condition_used_1", + "condition_unused", + "condition_used_2", + ], "some_parameter": [1.0, 2.0, 3.0], } ) expected_conditions.set_index(CONDITION_ID, inplace=True) assert not problem.condition_df.equals(expected_conditions) - expected_parameters = pd.DataFrame({ + expected_parameters = pd.DataFrame( + { PARAMETER_ID: ["same_value_for_all_conditions"], PARAMETER_SCALE: [LIN], LOWER_BOUND: [nan], UPPER_BOUND: [nan], NOMINAL_VALUE: [4.0], ESTIMATE: [0], - }) + } + ) expected_parameters.set_index(PARAMETER_ID, inplace=True) assert problem.parameter_df is None diff --git a/tests/test_simulate.py b/tests/test_simulate.py index 25811a74..068a3a71 100644 --- a/tests/test_simulate.py +++ b/tests/test_simulate.py @@ -5,9 +5,10 @@ import numpy as np import pandas as pd -import petab import pytest import scipy.stats + +import petab from petab.C import MEASUREMENT @@ -24,8 +25,13 @@ def simulate_without_noise(self) -> pd.DataFrame: @pytest.fixture def petab_problem() -> petab.Problem: """Create a PEtab problem for use in tests.""" - petab_yaml_path = Path(__file__).parent.parent.absolute() / \ - 'doc' / 'example' / 'example_Fujita' / 'Fujita.yaml' + petab_yaml_path = ( + Path(__file__).parent.parent.absolute() + / "doc" + / "example" + / "example_Fujita" + / "Fujita.yaml" + ) return petab.Problem.from_yaml(str(petab_yaml_path)) @@ -35,13 +41,13 @@ def test_remove_working_dir(petab_problem): # The working directory exists assert Path(simulator.working_dir).is_dir() synthetic_data_df = simulator.simulate() - synthetic_data_df.to_csv(Path(simulator.working_dir, 'test.csv'), sep='\t') + synthetic_data_df.to_csv(Path(simulator.working_dir, "test.csv"), sep="\t") simulator.remove_working_dir() # The (non-empty) working directory is removed assert not Path(simulator.working_dir).is_dir() # Test creation and removal of a specified working directory - working_dir = Path('tests/test_simulate_working_dir') + working_dir = Path("tests/test_simulate_working_dir") simulator = TestSimulator(petab_problem, working_dir=working_dir) # The working directory is as specified assert working_dir == Path(simulator.working_dir) @@ -59,7 +65,7 @@ def test_remove_working_dir(petab_problem): # Test creation and removal of a specified non-empty working directory simulator = TestSimulator(petab_problem, working_dir=working_dir) synthetic_data_df = simulator.simulate() - synthetic_data_df.to_csv(f'{simulator.working_dir}/test.csv', sep='\t') + synthetic_data_df.to_csv(f"{simulator.working_dir}/test.csv", sep="\t") simulator.remove_working_dir(force=True) # The non-empty, user-specified directory is removed with `force=True` assert not Path(simulator.working_dir).is_dir() @@ -80,13 +86,14 @@ def test_zero_bounded(petab_problem): neg_indices = range(round(n_measurements / 2)) pos_indices = range(len(neg_indices), n_measurements) measurements = [ - negative if index in neg_indices else - (positive if index in pos_indices else np.nan) + negative + if index in neg_indices + else (positive if index in pos_indices else np.nan) for index in range(n_measurements) ] - synthetic_data_df = simulator.simulate(as_measurement=True).assign(**{ - petab.C.MEASUREMENT: measurements - }) + synthetic_data_df = simulator.simulate(as_measurement=True).assign( + **{petab.C.MEASUREMENT: measurements} + ) # All measurements are non-zero assert (synthetic_data_df[MEASUREMENT] != 0).all() # No measurements are NaN @@ -96,10 +103,12 @@ def test_zero_bounded(petab_problem): synthetic_data_df, ) # Both negative and positive values are returned by default. - assert all([ - (synthetic_data_df_with_noise[MEASUREMENT] <= 0).any(), - (synthetic_data_df_with_noise[MEASUREMENT] >= 0).any(), - ]) + assert all( + [ + (synthetic_data_df_with_noise[MEASUREMENT] <= 0).any(), + (synthetic_data_df_with_noise[MEASUREMENT] >= 0).any(), + ] + ) synthetic_data_df_with_noise = simulator.add_noise( synthetic_data_df, @@ -107,26 +116,36 @@ def test_zero_bounded(petab_problem): ) # Values with noise that are different in sign to values without noise are # zeroed. - assert all([ - (synthetic_data_df_with_noise[MEASUREMENT][neg_indices] <= 0).all(), - (synthetic_data_df_with_noise[MEASUREMENT][pos_indices] >= 0).all(), - (synthetic_data_df_with_noise[MEASUREMENT][neg_indices] == 0).any(), - (synthetic_data_df_with_noise[MEASUREMENT][pos_indices] == 0).any(), - (synthetic_data_df_with_noise[MEASUREMENT][neg_indices] < 0).any(), - (synthetic_data_df_with_noise[MEASUREMENT][pos_indices] > 0).any(), - ]) + assert all( + [ + ( + synthetic_data_df_with_noise[MEASUREMENT][neg_indices] <= 0 + ).all(), + ( + synthetic_data_df_with_noise[MEASUREMENT][pos_indices] >= 0 + ).all(), + ( + synthetic_data_df_with_noise[MEASUREMENT][neg_indices] == 0 + ).any(), + ( + synthetic_data_df_with_noise[MEASUREMENT][pos_indices] == 0 + ).any(), + (synthetic_data_df_with_noise[MEASUREMENT][neg_indices] < 0).any(), + (synthetic_data_df_with_noise[MEASUREMENT][pos_indices] > 0).any(), + ] + ) def test_add_noise(petab_problem): """Test the noise generating method.""" - tested_noise_distributions = {'normal', 'laplace'} + tested_noise_distributions = {"normal", "laplace"} assert set(petab.C.NOISE_MODELS) == tested_noise_distributions, ( - 'The noise generation methods have only been tested for ' - f'{tested_noise_distributions}. Please edit this test to include this ' - 'distribution in its tested distributions. The appropriate SciPy ' - 'distribution will need to be added to ' - '`petab_numpy2scipy_distribution` in `_test_add_noise`.' + "The noise generation methods have only been tested for " + f"{tested_noise_distributions}. Please edit this test to include this " + "distribution in its tested distributions. The appropriate SciPy " + "distribution will need to be added to " + "`petab_numpy2scipy_distribution` in `_test_add_noise`." ) for distribution in tested_noise_distributions: @@ -141,8 +160,8 @@ def _test_add_noise(petab_problem) -> None: ks_1samp_pvalue_threshold = 0.05 minimum_fraction_above_threshold = 0.9 petab_numpy2scipy_distribution = { - 'normal': 'norm', - 'laplace': 'laplace', + "normal": "norm", + "laplace": "laplace", } simulator = TestSimulator(petab_problem) @@ -162,8 +181,8 @@ def _test_add_noise(petab_problem) -> None: samples = np.array(samples) expected_noise_values = [ - noise_scaling_factor * - petab.calculate.evaluate_noise_formula( + noise_scaling_factor + * petab.calculate.evaluate_noise_formula( row, simulator.noise_formulas, petab_problem.parameter_df, @@ -172,10 +191,9 @@ def _test_add_noise(petab_problem) -> None: for _, row in synthetic_data_df.iterrows() ] expected_noise_distributions = [ - petab_problem - .observable_df - .loc[row[petab.C.OBSERVABLE_ID]] - .get(petab.C.NOISE_DISTRIBUTION, petab.C.NORMAL) + petab_problem.observable_df.loc[row[petab.C.OBSERVABLE_ID]].get( + petab.C.NOISE_DISTRIBUTION, petab.C.NORMAL + ) for _, row in synthetic_data_df.iterrows() ] @@ -199,29 +217,25 @@ def row2cdf(row, index) -> Callable: scipy.stats, petab_numpy2scipy_distribution[ expected_noise_distributions[index] - ] + ], ).cdf, loc=row[MEASUREMENT], - scale=expected_noise_values[index] + scale=expected_noise_values[index], ) # Test whether the distribution of the samples is equal to the expected # distribution, for each measurement. results = [] for index, row in synthetic_data_df.iterrows(): - results.append(scipy.stats.ks_1samp( - samples[:, index], - row2cdf(row, index) - )) - observed_fraction_above_threshold = ( - sum(r.pvalue > ks_1samp_pvalue_threshold for r in results) - / len(results) - ) + results.append( + scipy.stats.ks_1samp(samples[:, index], row2cdf(row, index)) + ) + observed_fraction_above_threshold = sum( + r.pvalue > ks_1samp_pvalue_threshold for r in results + ) / len(results) # Sufficient distributions of measurement samples are sufficiently similar # to the expected distribution - assert ( - observed_fraction_above_threshold > minimum_fraction_above_threshold - ) + assert observed_fraction_above_threshold > minimum_fraction_above_threshold simulator.remove_working_dir() assert not Path(simulator.working_dir).is_dir() diff --git a/tests/test_visualization.py b/tests/test_visualization.py index 39162c14..feb02a49 100644 --- a/tests/test_visualization.py +++ b/tests/test_visualization.py @@ -8,13 +8,17 @@ import petab from petab.C import * -from petab.visualize import plot_with_vis_spec, plot_without_vis_spec, \ - plot_residuals_vs_simulation, plot_goodness_of_fit -from petab.visualize.plotting import VisSpecParser +from petab.visualize import ( + plot_goodness_of_fit, + plot_residuals_vs_simulation, + plot_with_vis_spec, + plot_without_vis_spec, +) from petab.visualize.lint import validate_visualization_df +from petab.visualize.plotting import VisSpecParser # Avoid errors when plotting without X server -plt.switch_backend('agg') +plt.switch_backend("agg") EXAMPLE_DIR = Path(__file__).parents[1] / "doc" / "example" @@ -23,7 +27,7 @@ def close_fig(): """Close all open matplotlib figures""" yield - plt.close('all') + plt.close("all") @pytest.fixture @@ -38,38 +42,40 @@ def condition_file_Fujita(): @pytest.fixture def data_file_Fujita_wrongNoise(): - return EXAMPLE_DIR / "example_Fujita" \ - / "Fujita_measurementData_wrongNoise.tsv" + return ( + EXAMPLE_DIR + / "example_Fujita" + / "Fujita_measurementData_wrongNoise.tsv" + ) @pytest.fixture def data_file_Fujita_nanData(): - return EXAMPLE_DIR / "example_Fujita" \ - / "Fujita_measurementData_nanData.tsv" + return ( + EXAMPLE_DIR / "example_Fujita" / "Fujita_measurementData_nanData.tsv" + ) @pytest.fixture def simu_file_Fujita(): - return EXAMPLE_DIR / "example_Fujita" \ - / "Fujita_simulatedData.tsv" + return EXAMPLE_DIR / "example_Fujita" / "Fujita_simulatedData.tsv" @pytest.fixture def simu_file_Fujita_t_inf(): - return EXAMPLE_DIR / "example_Fujita" \ - / "Fujita_simulatedData_t_inf.tsv" + return EXAMPLE_DIR / "example_Fujita" / "Fujita_simulatedData_t_inf.tsv" @pytest.fixture def data_file_Fujita_minimal(): - return EXAMPLE_DIR / "example_Fujita"\ - / "Fujita_measurementData_minimal.tsv" + return ( + EXAMPLE_DIR / "example_Fujita" / "Fujita_measurementData_minimal.tsv" + ) @pytest.fixture def data_file_Fujita_t_inf(): - return EXAMPLE_DIR / "example_Fujita"\ - / "Fujita_measurementData_t_inf.tsv" + return EXAMPLE_DIR / "example_Fujita" / "Fujita_measurementData_t_inf.tsv" @pytest.fixture @@ -79,33 +85,47 @@ def visu_file_Fujita_small(): @pytest.fixture def visu_file_Fujita_wo_dsid_wo_yvalues(): - return EXAMPLE_DIR / "example_Fujita" / "visuSpecs" \ - / "Fujita_visuSpec_1.tsv" + return ( + EXAMPLE_DIR / "example_Fujita" / "visuSpecs" / "Fujita_visuSpec_1.tsv" + ) @pytest.fixture def visu_file_Fujita_all_obs_with_diff_settings(): - return EXAMPLE_DIR / "example_Fujita" / "visuSpecs" \ - / "Fujita_visuSpec_3.tsv" + return ( + EXAMPLE_DIR / "example_Fujita" / "visuSpecs" / "Fujita_visuSpec_3.tsv" + ) @pytest.fixture def visu_file_Fujita_minimal(): - return EXAMPLE_DIR / "example_Fujita" / "visuSpecs"\ - / "Fujita_visuSpec_mandatory.tsv" + return ( + EXAMPLE_DIR + / "example_Fujita" + / "visuSpecs" + / "Fujita_visuSpec_mandatory.tsv" + ) @pytest.mark.filterwarnings("ignore:Visualization table is empty") @pytest.fixture def visu_file_Fujita_empty(): - return EXAMPLE_DIR / "example_Fujita" / "visuSpecs" \ - / "Fujita_visuSpec_empty.tsv" + return ( + EXAMPLE_DIR + / "example_Fujita" + / "visuSpecs" + / "Fujita_visuSpec_empty.tsv" + ) @pytest.fixture def visu_file_Fujita_replicates(): - return EXAMPLE_DIR / "example_Fujita" / "visuSpecs" \ - / "Fujita_visuSpec_replicates.tsv" + return ( + EXAMPLE_DIR + / "example_Fujita" + / "visuSpecs" + / "Fujita_visuSpec_replicates.tsv" + ) @pytest.fixture @@ -115,26 +135,36 @@ def data_file_Isensee(): @pytest.fixture def condition_file_Isensee(): - return EXAMPLE_DIR / "example_Isensee" \ - / "Isensee_experimentalCondition.tsv" + return ( + EXAMPLE_DIR / "example_Isensee" / "Isensee_experimentalCondition.tsv" + ) @pytest.fixture def vis_spec_file_Isensee(): - return EXAMPLE_DIR / "example_Isensee" \ - / "Isensee_visualizationSpecification.tsv" + return ( + EXAMPLE_DIR + / "example_Isensee" + / "Isensee_visualizationSpecification.tsv" + ) @pytest.fixture def vis_spec_file_Isensee_replicates(): - return EXAMPLE_DIR / "example_Isensee" \ - / "Isensee_visualizationSpecification_replicates.tsv" + return ( + EXAMPLE_DIR + / "example_Isensee" + / "Isensee_visualizationSpecification_replicates.tsv" + ) @pytest.fixture def vis_spec_file_Isensee_scatterplot(): - return EXAMPLE_DIR / "example_Isensee" \ - / "Isensee_visualizationSpecification_scatterplot.tsv" + return ( + EXAMPLE_DIR + / "example_Isensee" + / "Isensee_visualizationSpecification_scatterplot.tsv" + ) @pytest.fixture @@ -142,257 +172,359 @@ def simulation_file_Isensee(): return EXAMPLE_DIR / "example_Isensee" / "Isensee_simulationData.tsv" -def test_visualization_with_vis_and_sim(data_file_Isensee, - condition_file_Isensee, - vis_spec_file_Isensee, - simulation_file_Isensee, - close_fig): +def test_visualization_with_vis_and_sim( + data_file_Isensee, + condition_file_Isensee, + vis_spec_file_Isensee, + simulation_file_Isensee, + close_fig, +): validate_visualization_df( petab.Problem( condition_df=petab.get_condition_df(condition_file_Isensee), visualization_df=petab.get_visualization_df(vis_spec_file_Isensee), ) ) - plot_with_vis_spec(vis_spec_file_Isensee, condition_file_Isensee, - data_file_Isensee, simulation_file_Isensee) + plot_with_vis_spec( + vis_spec_file_Isensee, + condition_file_Isensee, + data_file_Isensee, + simulation_file_Isensee, + ) -def test_visualization_replicates(data_file_Isensee, - condition_file_Isensee, - vis_spec_file_Isensee_replicates, - simulation_file_Isensee, - close_fig): - plot_with_vis_spec(vis_spec_file_Isensee_replicates, - condition_file_Isensee, - data_file_Isensee, simulation_file_Isensee) +def test_visualization_replicates( + data_file_Isensee, + condition_file_Isensee, + vis_spec_file_Isensee_replicates, + simulation_file_Isensee, + close_fig, +): + plot_with_vis_spec( + vis_spec_file_Isensee_replicates, + condition_file_Isensee, + data_file_Isensee, + simulation_file_Isensee, + ) -def test_visualization_scatterplot(data_file_Isensee, - condition_file_Isensee, - vis_spec_file_Isensee_scatterplot, - simulation_file_Isensee, - close_fig): - plot_with_vis_spec(vis_spec_file_Isensee_scatterplot, - condition_file_Isensee, - data_file_Isensee, simulation_file_Isensee) +def test_visualization_scatterplot( + data_file_Isensee, + condition_file_Isensee, + vis_spec_file_Isensee_scatterplot, + simulation_file_Isensee, + close_fig, +): + plot_with_vis_spec( + vis_spec_file_Isensee_scatterplot, + condition_file_Isensee, + data_file_Isensee, + simulation_file_Isensee, + ) -def test_visualization_small_visu_file_w_datasetid(data_file_Fujita, - condition_file_Fujita, - visu_file_Fujita_small, - close_fig): +def test_visualization_small_visu_file_w_datasetid( + data_file_Fujita, condition_file_Fujita, visu_file_Fujita_small, close_fig +): """ Test: visualization specification file only with few columns in particular datasetId (optional columns are optional) """ - plot_with_vis_spec(visu_file_Fujita_small, condition_file_Fujita, - data_file_Fujita) + plot_with_vis_spec( + visu_file_Fujita_small, condition_file_Fujita, data_file_Fujita + ) def test_visualization_small_visu_file_wo_datasetid( - data_file_Fujita, - condition_file_Fujita, - visu_file_Fujita_wo_dsid_wo_yvalues, - close_fig): + data_file_Fujita, + condition_file_Fujita, + visu_file_Fujita_wo_dsid_wo_yvalues, + close_fig, +): """ Test: visualization specification file only with few columns in particular no datasetId column (optional columns are optional) """ - plot_with_vis_spec(visu_file_Fujita_wo_dsid_wo_yvalues, - condition_file_Fujita, data_file_Fujita) + plot_with_vis_spec( + visu_file_Fujita_wo_dsid_wo_yvalues, + condition_file_Fujita, + data_file_Fujita, + ) def test_visualization_all_obs_with_diff_settings( - data_file_Fujita, - condition_file_Fujita, - visu_file_Fujita_all_obs_with_diff_settings, - close_fig): + data_file_Fujita, + condition_file_Fujita, + visu_file_Fujita_all_obs_with_diff_settings, + close_fig, +): """ Test: visualization specification file only with few columns. In particular, no datasetId column and no yValues column, but more than one plot id. Additionally, having plot id different from 'plot\\d+' for the case of vis_spec expansion is tested. """ - plot_with_vis_spec(visu_file_Fujita_all_obs_with_diff_settings, - condition_file_Fujita, data_file_Fujita) + plot_with_vis_spec( + visu_file_Fujita_all_obs_with_diff_settings, + condition_file_Fujita, + data_file_Fujita, + ) -def test_visualization_minimal_visu_file(data_file_Fujita, - condition_file_Fujita, - visu_file_Fujita_minimal, - close_fig): +def test_visualization_minimal_visu_file( + data_file_Fujita, + condition_file_Fujita, + visu_file_Fujita_minimal, + close_fig, +): """ Test: visualization specification file only with mandatory column plotId (optional columns are optional) """ - plot_with_vis_spec(visu_file_Fujita_minimal, condition_file_Fujita, - data_file_Fujita) + plot_with_vis_spec( + visu_file_Fujita_minimal, condition_file_Fujita, data_file_Fujita + ) -def test_visualization_empty_visu_file(data_file_Fujita, - condition_file_Fujita, - visu_file_Fujita_empty, - close_fig): +def test_visualization_empty_visu_file( + data_file_Fujita, condition_file_Fujita, visu_file_Fujita_empty, close_fig +): """ Test: Empty visualization specification file should default to routine for no file at all """ with pytest.warns(UserWarning, match="Visualization table is empty."): - plot_with_vis_spec(visu_file_Fujita_empty, condition_file_Fujita, - data_file_Fujita) + plot_with_vis_spec( + visu_file_Fujita_empty, condition_file_Fujita, data_file_Fujita + ) -def test_visualization_minimal_data_file(data_file_Fujita_minimal, - condition_file_Fujita, - visu_file_Fujita_wo_dsid_wo_yvalues, - close_fig): +def test_visualization_minimal_data_file( + data_file_Fujita_minimal, + condition_file_Fujita, + visu_file_Fujita_wo_dsid_wo_yvalues, + close_fig, +): """ Test visualization, with the case: data file only with mandatory columns (optional columns are optional) """ - plot_with_vis_spec(visu_file_Fujita_wo_dsid_wo_yvalues, - condition_file_Fujita, data_file_Fujita_minimal) + plot_with_vis_spec( + visu_file_Fujita_wo_dsid_wo_yvalues, + condition_file_Fujita, + data_file_Fujita_minimal, + ) -def test_visualization_with_dataset_list(data_file_Isensee, - condition_file_Isensee, - simulation_file_Isensee, - close_fig): - datasets = [['JI09_150302_Drg345_343_CycNuc__4_ABnOH_and_ctrl', - 'JI09_150302_Drg345_343_CycNuc__4_ABnOH_and_Fsk'], - ['JI09_160201_Drg453-452_CycNuc__ctrl', - 'JI09_160201_Drg453-452_CycNuc__Fsk', - 'JI09_160201_Drg453-452_CycNuc__Sp8_Br_cAMPS_AM']] +def test_visualization_with_dataset_list( + data_file_Isensee, + condition_file_Isensee, + simulation_file_Isensee, + close_fig, +): + datasets = [ + [ + "JI09_150302_Drg345_343_CycNuc__4_ABnOH_and_ctrl", + "JI09_150302_Drg345_343_CycNuc__4_ABnOH_and_Fsk", + ], + [ + "JI09_160201_Drg453-452_CycNuc__ctrl", + "JI09_160201_Drg453-452_CycNuc__Fsk", + "JI09_160201_Drg453-452_CycNuc__Sp8_Br_cAMPS_AM", + ], + ] # TODO: is condition_file needed here - plot_without_vis_spec(condition_file_Isensee, datasets, 'dataset', - data_file_Isensee) - - plot_without_vis_spec(condition_file_Isensee, datasets, 'dataset', - data_file_Isensee, simulation_file_Isensee) + plot_without_vis_spec( + condition_file_Isensee, datasets, "dataset", data_file_Isensee + ) + plot_without_vis_spec( + condition_file_Isensee, + datasets, + "dataset", + data_file_Isensee, + simulation_file_Isensee, + ) -def test_visualization_without_datasets(data_file_Fujita, - condition_file_Fujita, - simu_file_Fujita, - close_fig): - sim_cond_id_list = [['model1_data1'], ['model1_data2', 'model1_data3'], - ['model1_data4', 'model1_data5'], ['model1_data6']] +def test_visualization_without_datasets( + data_file_Fujita, condition_file_Fujita, simu_file_Fujita, close_fig +): + sim_cond_id_list = [ + ["model1_data1"], + ["model1_data2", "model1_data3"], + ["model1_data4", "model1_data5"], + ["model1_data6"], + ] - observable_id_list = [['pS6_tot'], ['pEGFR_tot'], ['pAkt_tot']] + observable_id_list = [["pS6_tot"], ["pEGFR_tot"], ["pAkt_tot"]] - plot_without_vis_spec(condition_file_Fujita, sim_cond_id_list, - 'simulation', data_file_Fujita, - plotted_noise=PROVIDED) + plot_without_vis_spec( + condition_file_Fujita, + sim_cond_id_list, + "simulation", + data_file_Fujita, + plotted_noise=PROVIDED, + ) - plot_without_vis_spec(condition_file_Fujita, observable_id_list, - 'observable', data_file_Fujita, - plotted_noise=PROVIDED) + plot_without_vis_spec( + condition_file_Fujita, + observable_id_list, + "observable", + data_file_Fujita, + plotted_noise=PROVIDED, + ) # with simulations - plot_without_vis_spec(condition_file_Fujita, sim_cond_id_list, - 'simulation', data_file_Fujita, simu_file_Fujita, - plotted_noise=PROVIDED) - - plot_without_vis_spec(condition_file_Fujita, observable_id_list, - 'observable', data_file_Fujita, simu_file_Fujita, - plotted_noise=PROVIDED) + plot_without_vis_spec( + condition_file_Fujita, + sim_cond_id_list, + "simulation", + data_file_Fujita, + simu_file_Fujita, + plotted_noise=PROVIDED, + ) + plot_without_vis_spec( + condition_file_Fujita, + observable_id_list, + "observable", + data_file_Fujita, + simu_file_Fujita, + plotted_noise=PROVIDED, + ) -def test_visualization_only_simulations(condition_file_Fujita, - simu_file_Fujita, - close_fig): - sim_cond_id_list = [['model1_data1'], ['model1_data2', 'model1_data3'], - ['model1_data4', 'model1_data5'], ['model1_data6']] +def test_visualization_only_simulations( + condition_file_Fujita, simu_file_Fujita, close_fig +): + sim_cond_id_list = [ + ["model1_data1"], + ["model1_data2", "model1_data3"], + ["model1_data4", "model1_data5"], + ["model1_data6"], + ] - observable_id_list = [['pS6_tot'], ['pEGFR_tot'], ['pAkt_tot']] + observable_id_list = [["pS6_tot"], ["pEGFR_tot"], ["pAkt_tot"]] - plot_without_vis_spec(condition_file_Fujita, sim_cond_id_list, - 'simulation', simulations_df=simu_file_Fujita, - plotted_noise=PROVIDED) + plot_without_vis_spec( + condition_file_Fujita, + sim_cond_id_list, + "simulation", + simulations_df=simu_file_Fujita, + plotted_noise=PROVIDED, + ) - plot_without_vis_spec(condition_file_Fujita, observable_id_list, - 'observable', simulations_df=simu_file_Fujita, - plotted_noise=PROVIDED) + plot_without_vis_spec( + condition_file_Fujita, + observable_id_list, + "observable", + simulations_df=simu_file_Fujita, + plotted_noise=PROVIDED, + ) def test_simple_visualization( - data_file_Fujita, condition_file_Fujita, close_fig + data_file_Fujita, condition_file_Fujita, close_fig +): + plot_without_vis_spec( + condition_file_Fujita, measurements_df=data_file_Fujita + ) + plot_without_vis_spec( + condition_file_Fujita, + measurements_df=data_file_Fujita, + plotted_noise=PROVIDED, + ) + + +def test_visualization_with__t_inf( + data_file_Fujita_t_inf, + simu_file_Fujita_t_inf, + condition_file_Fujita, + visu_file_Fujita_replicates, + close_fig, ): - plot_without_vis_spec(condition_file_Fujita, - measurements_df=data_file_Fujita) - plot_without_vis_spec(condition_file_Fujita, - measurements_df=data_file_Fujita, - plotted_noise=PROVIDED) - - -def test_visualization_with__t_inf(data_file_Fujita_t_inf, - simu_file_Fujita_t_inf, - condition_file_Fujita, - visu_file_Fujita_replicates, - close_fig): # plot only measurements - plot_without_vis_spec(condition_file_Fujita, - measurements_df=data_file_Fujita_t_inf) + plot_without_vis_spec( + condition_file_Fujita, measurements_df=data_file_Fujita_t_inf + ) # plot only simulation - plot_without_vis_spec(condition_file_Fujita, - simulations_df=simu_file_Fujita_t_inf) + plot_without_vis_spec( + condition_file_Fujita, simulations_df=simu_file_Fujita_t_inf + ) # plot both measurements and simulation - plot_without_vis_spec(condition_file_Fujita, - measurements_df=data_file_Fujita_t_inf, - simulations_df=simu_file_Fujita_t_inf) + plot_without_vis_spec( + condition_file_Fujita, + measurements_df=data_file_Fujita_t_inf, + simulations_df=simu_file_Fujita_t_inf, + ) # plot both measurements and simulation - plot_with_vis_spec(visu_file_Fujita_replicates, - condition_file_Fujita, - measurements_df=data_file_Fujita_t_inf, - simulations_df=simu_file_Fujita_t_inf) + plot_with_vis_spec( + visu_file_Fujita_replicates, + condition_file_Fujita, + measurements_df=data_file_Fujita_t_inf, + simulations_df=simu_file_Fujita_t_inf, + ) -def test_save_plots_to_file(data_file_Isensee, condition_file_Isensee, - vis_spec_file_Isensee, simulation_file_Isensee, - close_fig): +def test_save_plots_to_file( + data_file_Isensee, + condition_file_Isensee, + vis_spec_file_Isensee, + simulation_file_Isensee, + close_fig, +): with TemporaryDirectory() as temp_dir: - plot_with_vis_spec(vis_spec_file_Isensee, condition_file_Isensee, - data_file_Isensee, simulation_file_Isensee, - subplot_dir=temp_dir) - + plot_with_vis_spec( + vis_spec_file_Isensee, + condition_file_Isensee, + data_file_Isensee, + simulation_file_Isensee, + subplot_dir=temp_dir, + ) -def test_save_visu_file(data_file_Isensee, - condition_file_Isensee): +def test_save_visu_file(data_file_Isensee, condition_file_Isensee): with TemporaryDirectory() as temp_dir: - - vis_spec_parser = VisSpecParser(condition_file_Isensee, - data_file_Isensee) + vis_spec_parser = VisSpecParser( + condition_file_Isensee, data_file_Isensee + ) figure, _ = vis_spec_parser.parse_from_id_list() with pytest.warns( - UserWarning, - match="please check that datasetId column corresponds to" + UserWarning, + match="please check that datasetId column corresponds to", ): figure.save_to_tsv(path.join(temp_dir, "visuSpec.tsv")) - datasets = [['JI09_150302_Drg345_343_CycNuc__4_ABnOH_and_ctrl', - 'JI09_150302_Drg345_343_CycNuc__4_ABnOH_and_Fsk'], - ['JI09_160201_Drg453-452_CycNuc__ctrl', - 'JI09_160201_Drg453-452_CycNuc__Fsk', - 'JI09_160201_Drg453-452_CycNuc__Sp8_Br_cAMPS_AM']] + datasets = [ + [ + "JI09_150302_Drg345_343_CycNuc__4_ABnOH_and_ctrl", + "JI09_150302_Drg345_343_CycNuc__4_ABnOH_and_Fsk", + ], + [ + "JI09_160201_Drg453-452_CycNuc__ctrl", + "JI09_160201_Drg453-452_CycNuc__Fsk", + "JI09_160201_Drg453-452_CycNuc__Sp8_Br_cAMPS_AM", + ], + ] - vis_spec_parser = VisSpecParser(condition_file_Isensee, - data_file_Isensee) - figure, _ = vis_spec_parser.parse_from_id_list(datasets, - group_by='dataset') + vis_spec_parser = VisSpecParser( + condition_file_Isensee, data_file_Isensee + ) + figure, _ = vis_spec_parser.parse_from_id_list( + datasets, group_by="dataset" + ) with pytest.warns( - UserWarning, - match="please check that datasetId column corresponds to" + UserWarning, + match="please check that datasetId column corresponds to", ): figure.save_to_tsv(path.join(temp_dir, "visuSpec1.tsv")) @@ -415,9 +547,12 @@ def test_cli(): with TemporaryDirectory() as temp_dir: args = [ "petab_visualize", - "-y", str(fujita_dir / "Fujita.yaml"), - "-s", str(fujita_dir / "Fujita_simulatedData.tsv"), - "-o", temp_dir + "-y", + str(fujita_dir / "Fujita.yaml"), + "-s", + str(fujita_dir / "Fujita_simulatedData.tsv"), + "-o", + temp_dir, ] subprocess.run(args, check=True) @@ -426,16 +561,16 @@ def test_cli(): @pytest.mark.parametrize( "vis_file", ( - "vis_spec_file_Isensee", - "vis_spec_file_Isensee_replicates", - "vis_spec_file_Isensee_scatterplot", - "visu_file_Fujita_wo_dsid_wo_yvalues", - "visu_file_Fujita_all_obs_with_diff_settings", - "visu_file_Fujita_empty", - "visu_file_Fujita_minimal", - "visu_file_Fujita_replicates", - "visu_file_Fujita_small", - ) + "vis_spec_file_Isensee", + "vis_spec_file_Isensee_replicates", + "vis_spec_file_Isensee_scatterplot", + "visu_file_Fujita_wo_dsid_wo_yvalues", + "visu_file_Fujita_all_obs_with_diff_settings", + "visu_file_Fujita_empty", + "visu_file_Fujita_minimal", + "visu_file_Fujita_replicates", + "visu_file_Fujita_small", + ), ) def test_validate(vis_file, request): """Check that all test files pass validation.""" diff --git a/tests/test_visualization_data_overview.py b/tests/test_visualization_data_overview.py index 34193718..c1d89500 100644 --- a/tests/test_visualization_data_overview.py +++ b/tests/test_visualization_data_overview.py @@ -6,13 +6,13 @@ def test_data_overview(): - """Data overview generation with Fujita example data from this repository - """ + """Data overview generation with Fujita example data from this repository""" with TemporaryDirectory() as temp_dir: - outfile = Path(temp_dir) / 'Fujita.html' + outfile = Path(temp_dir) / "Fujita.html" repo_root = Path(__file__).parent.parent - yaml_filename = (repo_root / 'doc' / 'example' / 'example_Fujita' - / 'Fujita.yaml') + yaml_filename = ( + repo_root / "doc" / "example" / "example_Fujita" / "Fujita.yaml" + ) problem = petab.Problem.from_yaml(yaml_filename) - create_report(problem, 'Fujita', output_path=temp_dir) + create_report(problem, "Fujita", output_path=temp_dir) assert outfile.is_file() diff --git a/tests/test_yaml.py b/tests/test_yaml.py index ff0f6454..f739c50b 100644 --- a/tests/test_yaml.py +++ b/tests/test_yaml.py @@ -4,21 +4,25 @@ import pytest from jsonschema.exceptions import ValidationError + from petab.yaml import create_problem_yaml, validate def test_validate(): - data = { - 'format_version': '1' - } + data = {"format_version": "1"} # should fail because we miss some information with pytest.raises(ValidationError): validate(data) # should be well-formed - file_ = Path(__file__).parents[1] / "doc" / "example" / "example_Fujita"\ + file_ = ( + Path(__file__).parents[1] + / "doc" + / "example" + / "example_Fujita" / "Fujita.yaml" + ) validate(file_) @@ -33,12 +37,24 @@ def test_create_problem_yaml(): observable_file = Path(outdir, "observables.tsv") yaml_file = Path(outdir, "problem.yaml") visualization_file = Path(outdir, "visualization.tsv") - for file in (sbml_file, condition_file, measurement_file, - parameter_file, observable_file, visualization_file): + for file in ( + sbml_file, + condition_file, + measurement_file, + parameter_file, + observable_file, + visualization_file, + ): file.touch() - create_problem_yaml(sbml_file, condition_file, measurement_file, - parameter_file, observable_file, yaml_file, - visualization_file) + create_problem_yaml( + sbml_file, + condition_file, + measurement_file, + parameter_file, + observable_file, + yaml_file, + visualization_file, + ) validate(yaml_file) # test for list of files @@ -48,14 +64,24 @@ def test_create_problem_yaml(): measurement_file2 = Path(outdir, "measurements2.tsv") observable_file2 = Path(outdir, "observables2.tsv") yaml_file2 = Path(outdir, "problem2.yaml") - for file in (sbml_file2, condition_file2, measurement_file2, - observable_file2): + for file in ( + sbml_file2, + condition_file2, + measurement_file2, + observable_file2, + ): file.touch() sbml_files = [sbml_file, sbml_file2] condition_files = [condition_file, condition_file2] measurement_files = [measurement_file, measurement_file2] observable_files = [observable_file, observable_file2] - create_problem_yaml(sbml_files, condition_files, measurement_files, - parameter_file, observable_files, yaml_file2) + create_problem_yaml( + sbml_files, + condition_files, + measurement_files, + parameter_file, + observable_files, + yaml_file2, + ) validate(yaml_file2) From cded7adce7304d9d2414eae1ca6c20ef32dc2159 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Sun, 17 Sep 2023 15:54:48 +0200 Subject: [PATCH 5/6] Version 0.2.4, release notes --- CHANGELOG.md | 13 +++++++++++++ petab/version.py | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0a64d4f9..a84fb517 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,19 @@ ## 0.2 series +### 0.2.4 + +* Made figure sizes for visualization functions customizable via `petab.visualize.plotting.DEFAULT_FIGSIZE` + by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/222 +* Fixed Handling missing `nominalValue` in `Problem.get_x_nominal` + by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/223 +* Fixed pandas 2.1.0 `FutureWarnings` + by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/226 +* Added pre-commit-config, ran black, isort, ... + by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/225 + +**Full Changelog**: https://github.com/PEtab-dev/libpetab-python/compare/v0.2.3...v0.2.4 + ### 0.2.3 * Fixed validation failures in case of missing optional fields in visualization tables diff --git a/petab/version.py b/petab/version.py index d35910e3..9ec4593a 100644 --- a/petab/version.py +++ b/petab/version.py @@ -1,2 +1,2 @@ """PEtab library version""" -__version__ = "0.2.3" +__version__ = "0.2.4" From 82d4b2d983b51ce9daaa410be72971ea05f40876 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 18 Sep 2023 09:11:27 +0200 Subject: [PATCH 6/6] Update copyright year --- doc/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/conf.py b/doc/conf.py index 7e3932ae..d29b2984 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -20,7 +20,7 @@ # -- Project information ----------------------------------------------------- project = "libpetab-python" -copyright = "2018, the PEtab developers" +copyright = "2018-2023, the PEtab developers" author = "PEtab developers" # The full version, including alpha/beta/rc tags