diff --git a/docs/conf.py b/docs/conf.py index cb90b729..387af6b0 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -16,7 +16,7 @@ import sys sys.path.insert(0, os.path.abspath(".")) -from pastastore import __version__ +from pastastore import __version__ # noqa: E402 # -- Project information ----------------------------------------------------- @@ -125,7 +125,6 @@ # # html_sidebars = {} - # -- Options for HTMLHelp output --------------------------------------------- # Output file base name for HTML help builder. diff --git a/examples/notebooks/ex02_pastastore_plots_and_maps.ipynb b/examples/notebooks/ex02_pastastore_plots_and_maps.ipynb index 4c2869ce..71ce3365 100644 --- a/examples/notebooks/ex02_pastastore_plots_and_maps.ipynb +++ b/examples/notebooks/ex02_pastastore_plots_and_maps.ipynb @@ -31,10 +31,6 @@ "metadata": {}, "outputs": [], "source": [ - "import sys\n", - "import os\n", - "\n", - "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "import pastastore as pst\n", "import pastas as ps\n", diff --git a/examples/notebooks/ex03_pastastore_yaml_interface.ipynb b/examples/notebooks/ex03_pastastore_yaml_interface.ipynb index 073f445f..20cfd4b5 100644 --- a/examples/notebooks/ex03_pastastore_yaml_interface.ipynb +++ b/examples/notebooks/ex03_pastastore_yaml_interface.ipynb @@ -163,7 +163,7 @@ "metadata": {}, "outputs": [], "source": [ - "from pastastore.datasets import example_pastastore" + "from pastastore.datasets import example_pastastore # noqa: E402" ] }, { @@ -364,7 +364,7 @@ ], "source": [ "ml.solve(report=False)\n", - "ml.plots.results();" + "ml.plots.results()" ] }, { @@ -802,7 +802,7 @@ ], "source": [ "ml.solve(report=False)\n", - "ml.plots.results();" + "ml.plots.results()" ] }, { @@ -910,7 +910,7 @@ ], "source": [ "ml.solve(report=False)\n", - "ml.plots.results();" + "ml.plots.results()" ] }, { diff --git a/pastastore/__init__.py b/pastastore/__init__.py index 1ae7869b..6f258d00 100644 --- a/pastastore/__init__.py +++ b/pastastore/__init__.py @@ -1,10 +1,11 @@ -from . import connectors, util -from .connectors import ( +# ruff: noqa: F401 +from pastastore import connectors, util +from pastastore.connectors import ( ArcticConnector, ArcticDBConnector, DictConnector, PasConnector, PystoreConnector, ) -from .store import PastaStore -from .version import __version__ +from pastastore.store import PastaStore +from pastastore.version import __version__ diff --git a/pastastore/base.py b/pastastore/base.py index ecf5517e..7434e9aa 100644 --- a/pastastore/base.py +++ b/pastastore/base.py @@ -11,10 +11,10 @@ from numpy import isin from packaging.version import parse as parse_version from pastas.io.pas import PastasEncoder -from tqdm import tqdm +from tqdm.auto import tqdm -from .util import ItemInLibraryException, _custom_warning, validate_names -from .version import PASTAS_LEQ_022 +from pastastore.util import ItemInLibraryException, _custom_warning, validate_names +from pastastore.version import PASTAS_LEQ_022 FrameorSeriesUnion = Union[pd.DataFrame, pd.Series] warnings.showwarning = _custom_warning @@ -934,13 +934,24 @@ def empty_library( ) if ui.lower() != "y": return - names = self._parse_names(None, libname) - for name in ( - tqdm(names, desc=f"Deleting items from {libname}") if progressbar else names - ): - self._del_item(libname, name) - self._clear_cache(libname) - print(f"Emptied library {libname} in {self.name}: " f"{self.__class__}") + + if libname == "models": + # also delete linked modelnames linked to oseries + libs = ["models", "oseries_models"] + else: + libs = [libname] + + # delete items and clear caches + for libname in libs: + names = self._parse_names(None, libname) + for name in ( + tqdm(names, desc=f"Deleting items from {libname}") + if progressbar + else names + ): + self._del_item(libname, name) + self._clear_cache(libname) + print(f"Emptied library {libname} in {self.name}: " f"{self.__class__}") def _iter_series(self, libname: str, names: Optional[List[str]] = None): """Internal method iterate over time series in library. diff --git a/pastastore/connectors.py b/pastastore/connectors.py index 146f3bb5..465be383 100644 --- a/pastastore/connectors.py +++ b/pastastore/connectors.py @@ -8,8 +8,8 @@ import pandas as pd from pastas.io.pas import PastasEncoder, pastas_hook -from .base import BaseConnector, ConnectorUtil, ModelAccessor -from .util import _custom_warning +from pastastore.base import BaseConnector, ConnectorUtil, ModelAccessor +from pastastore.util import _custom_warning FrameorSeriesUnion = Union[pd.DataFrame, pd.Series] warnings.showwarning = _custom_warning diff --git a/pastastore/plotting.py b/pastastore/plotting.py index 1f4aa1ae..834f19fe 100644 --- a/pastastore/plotting.py +++ b/pastastore/plotting.py @@ -14,9 +14,6 @@ ax = pstore.maps.oseries() pstore.maps.add_background_map(ax) # for adding a background map """ - -from collections.abc import Iterable - import matplotlib.pyplot as plt import numpy as np import pandas as pd @@ -96,15 +93,11 @@ def _timeseries( if ax is None: if split: - fig, axes = plt.subplots(len(names), 1, sharex=True, figsize=figsize) + _, axes = plt.subplots(len(names), 1, sharex=True, figsize=figsize) else: - fig, axes = plt.subplots(1, 1, figsize=figsize) + _, axes = plt.subplots(1, 1, figsize=figsize) else: axes = ax - if isinstance(axes, Iterable): - fig = axes[0].figure - else: - fig = axes.figure tsdict = self.pstore.conn._get_series( libname, names, progressbar=progressbar, squeeze=False @@ -397,20 +390,31 @@ def _data_availability( linewidth=0, rasterized=True, ) + # make a colorbar in an ax on the # right side, then set the current axes to ax again cb = fig.colorbar(pc, ax=ax, cax=cax, extend="both") cb.set_ticks(bounds) cb.ax.set_yticklabels(labels) cb.ax.minorticks_off() + if set_yticks: - ax.set_yticks(np.arange(0.5, len(series) + 0.5)) + ax.set_yticks(np.arange(0.5, len(series) + 0.5), minor=False) + ax.set_yticks(np.arange(0, len(series) + 1), minor=True) if names is None: names = [s.name for s in series] ax.set_yticklabels(names) + + for tick in ax.yaxis.get_major_ticks(): # don't show major ytick marker + tick.tick1line.set_visible(False) + + ax.grid(True, which="minor", axis="y") + ax.grid(True, which="major", axis="x") + else: ax.set_ylabel("Timeseries (-)") - ax.grid(True) + ax.grid(True, which="both") + ax.grid(True, which="both") return ax @@ -712,6 +716,7 @@ def models( def modelstat( self, statistic, + modelnames=None, label=True, adjust=False, cmap="viridis", @@ -728,6 +733,8 @@ def modelstat( ---------- statistic: str name of the statistic, e.g. "evp" or "aic" + modelnames : list of str, optional + list of modelnames to include label: bool, optional label points, by default True adjust: bool, optional @@ -757,7 +764,9 @@ def modelstat( -------- self.add_background_map """ - statsdf = self.pstore.get_statistics([statistic], progressbar=False).to_frame() + statsdf = self.pstore.get_statistics( + [statistic], modelnames=modelnames, progressbar=False + ).to_frame() statsdf["oseries"] = [ self.pstore.get_models(m, return_dict=True)["oseries"]["name"] diff --git a/pastastore/store.py b/pastastore/store.py index c39fee1a..81c7c3e3 100644 --- a/pastastore/store.py +++ b/pastastore/store.py @@ -8,11 +8,13 @@ import pastas as ps from packaging.version import parse as parse_version from pastas.io.pas import pastas_hook -from tqdm import tqdm +from tqdm.auto import tqdm -from .plotting import Maps, Plots -from .util import _custom_warning -from .yaml_interface import PastastoreYAML +from pastastore.base import BaseConnector +from pastastore.connectors import DictConnector +from pastastore.plotting import Maps, Plots +from pastastore.util import _custom_warning +from pastastore.yaml_interface import PastastoreYAML FrameorSeriesUnion = Union[pd.DataFrame, pd.Series] warnings.showwarning = _custom_warning @@ -38,14 +40,19 @@ class PastaStore: name of the PastaStore, by default takes the name of the Connector object """ - def __init__(self, connector, name: str = None): + def __init__( + self, + connector: Optional[BaseConnector] = None, + name: Optional[str] = None, + ): """Initialize PastaStore for managing pastas time series and models. Parameters ---------- - connector : Connector object - object that provides the interface to the - database + connector : Connector object, optional + object that provides the connection to the database. Default is None, which + will create a DictConnector. This default Connector does not store data on + disk. name : str, optional name of the PastaStore, if not provided uses the Connector name """ @@ -53,6 +60,8 @@ def __init__(self, connector, name: str = None): raise DeprecationWarning( "PastaStore expects the connector as the first argument since v1.1!" ) + if connector is None: + connector = DictConnector("pastas_db") self.conn = connector self.name = name if name is not None else self.conn.name self._register_connector_methods() @@ -300,6 +309,81 @@ def get_nearest_stresses( data = pd.concat([data, series], axis=0) return data + def get_signatures( + self, + signatures=None, + names=None, + libname="oseries", + progressbar=False, + ignore_errors=False, + ): + """Get groundwater signatures. NaN-values are returned when the + signature could not be computed. + + Parameters + ---------- + signatures : list of str, optional + list of groundwater signatures to compute, if None all groundwater + signatures in ps.stats.signatures.__all__ are used, by default None + names : str, list of str, or None, optional + names of the time series, by default None which + uses all the time series in the library + libname : str + name of the library containing the time series + ('oseries' or 'stresses'), by default "oseries" + progressbar : bool, optional + show progressbar, by default False + ignore_errors : bool, optional + ignore errors when True, i.e. when non-existent timeseries is + encountered in names, by default False + + Returns + ------- + signatures_df : pandas.DataFrame + DataFrame containing the signatures (columns) per time series (rows) + """ + names = self.conn._parse_names(names, libname=libname) + + if signatures is None: + signatures = ps.stats.signatures.__all__.copy() + + # create dataframe for results + signatures_df = pd.DataFrame(index=names, columns=signatures, data=np.nan) + + # loop through oseries names + desc = "Get groundwater signatures" + for name in tqdm(names, desc=desc) if progressbar else names: + try: + if libname == "oseries": + s = self.conn.get_oseries(name) + else: + s = self.conn.get_stresses(name) + except Exception as e: + if ignore_errors: + signatures_df.loc[name, :] = np.nan + continue + else: + raise e + + try: + i_signatures = ps.stats.signatures.summary(s.squeeze(), signatures) + except Exception as e: + if ignore_errors: + i_signatures = [] + for signature in signatures: + try: + sign_val = getattr(ps.stats.signatures, signature)( + s.squeeze() + ) + except Exception as _: + sign_val = np.nan + i_signatures.append(sign_val) + else: + raise e + signatures_df.loc[name, signatures] = i_signatures + + return signatures_df + def get_tmin_tmax(self, libname, names=None, progressbar=False): """Get tmin and tmax for time series. @@ -334,6 +418,23 @@ def get_tmin_tmax(self, libname, names=None, progressbar=False): tmintmax.loc[n, "tmax"] = s.last_valid_index() return tmintmax + def get_extent(self, libname, names=None, buffer=0.0): + names = self.conn._parse_names(names, libname=libname) + if libname in ["oseries", "stresses"]: + df = getattr(self, libname) + elif libname == "models": + df = self.oseries + else: + raise ValueError(f"Cannot get extent for library '{libname}'.") + + extent = [ + df.loc[names, "x"].min() - buffer, + df.loc[names, "x"].max() + buffer, + df.loc[names, "y"].min() - buffer, + df.loc[names, "y"].max() + buffer, + ] + return extent + def get_parameters( self, parameters: Optional[List[str]] = None, @@ -428,13 +529,13 @@ def get_statistics( modelnames = self.conn._parse_names(modelnames, libname="models") - # create dataframe for results - s = pd.DataFrame(index=modelnames, columns=statistics, data=np.nan) - # if statistics is str if isinstance(statistics, str): statistics = [statistics] + # create dataframe for results + s = pd.DataFrame(index=modelnames, columns=statistics, data=np.nan) + # loop through model names desc = "Get model statistics" for mlname in tqdm(modelnames, desc=desc) if progressbar else modelnames: @@ -836,7 +937,7 @@ def export_model_series_to_csv( def from_zip( cls, fname: str, - conn, + conn: Optional[BaseConnector] = None, storename: Optional[str] = None, progressbar: bool = True, ): @@ -846,8 +947,9 @@ def from_zip( ---------- fname : str pathname of zipfile - conn : Connector object - connector for storing loaded data + conn : Connector object, optional + connector for storing loaded data, default is None which creates a + DictConnector. This Connector does not store data on disk. storename : str, optional name of the PastaStore, by default None, which defaults to the name of the Connector. @@ -861,6 +963,9 @@ def from_zip( """ from zipfile import ZipFile + if conn is None: + conn = DictConnector("pastas_db") + with ZipFile(fname, "r") as archive: namelist = [ fi for fi in archive.namelist() if not fi.endswith("_meta.json") @@ -868,7 +973,7 @@ def from_zip( for f in tqdm(namelist, desc="Reading zip") if progressbar else namelist: libname, fjson = os.path.split(f) if libname in ["stresses", "oseries"]: - s = pd.read_json(archive.open(f), orient="columns") + s = pd.read_json(archive.open(f), dtype=float, orient="columns") if not isinstance(s.index, pd.DatetimeIndex): s.index = pd.to_datetime(s.index, unit="ms") s = s.sort_index() @@ -983,9 +1088,45 @@ def get_model_timeseries_names( structure.loc[mlnam, pnam] = 1 structure.loc[mlnam, enam] = 1 elif "stress" in sm: - for s in sm["stress"]: + smstress = sm["stress"] + if isinstance(smstress, dict): + smstress = [smstress] + for s in smstress: structure.loc[mlnam, s["name"]] = 1 if dropna: return structure.dropna(how="all", axis=1) else: return structure + + def apply(self, libname, func, names=None, progressbar=True): + """Apply function to items in library. + + Supported libraries are oseries, stresses, and models. + + Parameters + ---------- + libname : str + library name, supports "oseries", "stresses" and "models" + func : callable + function that accepts items from one of the supported libraries as input + names : str, list of str, optional + apply function to these names, by default None which loops over all stored + items in library + progressbar : bool, optional + show progressbar, by default True + + Returns + ------- + dict + dict of results of func, with names as keys and results as values + """ + names = self.conn._parse_names(names, libname) + result = {} + if libname not in ("oseries", "stresses", "models"): + raise ValueError( + "'libname' must be one of ['oseries', 'stresses', 'models']!" + ) + getter = getattr(self.conn, f"get_{libname}") + for n in tqdm(names) if progressbar else names: + result[n] = func(getter(n)) + return result diff --git a/pastastore/util.py b/pastastore/util.py index 70ab042a..313c0396 100644 --- a/pastastore/util.py +++ b/pastastore/util.py @@ -6,9 +6,9 @@ from numpy.lib._iotools import NameValidator from pandas.testing import assert_series_equal from pastas.stats.tests import runs_test, stoffer_toloi -from tqdm import tqdm +from tqdm.auto import tqdm -from .version import PASTAS_LEQ_022 +from pastastore.version import PASTAS_LEQ_022 def _custom_warning(message, category=UserWarning, filename="", lineno=-1, *args): @@ -704,6 +704,8 @@ def frontiers_checks( ) else: tmem = ml.get_response_tmax(sm_name) + if tmem is None: # no rfunc in stressmodel + tmem = 0 check_tmem_passed = tmem < len_oseries_calib / 2 checks.loc[f"calib_period > 2*t_mem_95%: {sm_name}", :] = ( tmem, @@ -736,23 +738,30 @@ def frontiers_checks( "(unit head)/(unit well stress)", check_gain_passed, ) + continue + elif sm._name == "LinearTrend": + gain = ml.parameters.loc[f"{sm_name}_a", "optimal"] + gain_std = ml.parameters.loc[f"{sm_name}_a", "stderr"] + elif sm._name == "StepModel": + gain = ml.parameters.loc[f"{sm_name}_d", "optimal"] + gain_std = ml.parameters.loc[f"{sm_name}_d", "stderr"] else: gain = ml.parameters.loc[f"{sm_name}_A", "optimal"] gain_std = ml.parameters.loc[f"{sm_name}_A", "stderr"] - if gain_std is None: - gain_std = np.nan - check_gain_passed = pd.NA - elif np.isnan(gain_std): - check_gain_passed = pd.NA - else: - check_gain_passed = np.abs(gain) > 2 * gain_std + + if gain_std is None: + gain_std = np.nan + check_gain_passed = pd.NA + elif np.isnan(gain_std): + check_gain_passed = pd.NA + else: check_gain_passed = np.abs(gain) > 2 * gain_std - checks.loc[f"gain > 2*std: {sm_name}", :] = ( - gain, - 2 * gain_std, - "(unit head)/(unit well stress)", - check_gain_passed, - ) + checks.loc[f"gain > 2*std: {sm_name}", :] = ( + gain, + 2 * gain_std, + "(unit head)/(unit well stress)", + check_gain_passed, + ) # Check 5 - Parameter Bounds if check5_parambounds: diff --git a/pastastore/version.py b/pastastore/version.py index 3afd6c04..e832ee3f 100644 --- a/pastastore/version.py +++ b/pastastore/version.py @@ -4,4 +4,4 @@ PASTAS_VERSION = parse_version(ps.__version__) PASTAS_LEQ_022 = PASTAS_VERSION <= parse_version("0.22.0") -__version__ = "1.2.2" +__version__ = "1.3.0" diff --git a/pastastore/yaml_interface.py b/pastastore/yaml_interface.py index 00a28a13..4d07c352 100644 --- a/pastastore/yaml_interface.py +++ b/pastastore/yaml_interface.py @@ -2,14 +2,14 @@ import logging import os from copy import deepcopy -from typing import Dict, List, Optional, Union +from typing import Any, Dict, List, Optional, Union import numpy as np import pandas as pd import pastas as ps import yaml -from .version import PASTAS_LEQ_022 +from pastastore.version import PASTAS_LEQ_022 ps.logger.setLevel("ERROR") @@ -17,7 +17,7 @@ logger = logging.getLogger(__name__) -def _convert_dict_dtypes_for_yaml(d: Dict): +def _convert_dict_dtypes_for_yaml(d: Dict[str, Any]): """Internal method to convert dictionary values for storing in YAML format. Parameters diff --git a/tests/test_001_import.py b/tests/test_001_import.py index f8d84565..6522e5af 100644 --- a/tests/test_001_import.py +++ b/tests/test_001_import.py @@ -4,4 +4,4 @@ def test_import(): with warnings.catch_warnings(): warnings.simplefilter(action="ignore", category=FutureWarning) - import pastastore + import pastastore # noqa: F401 diff --git a/tests/test_002_connectors.py b/tests/test_002_connectors.py index 8989218f..f99d0b5a 100644 --- a/tests/test_002_connectors.py +++ b/tests/test_002_connectors.py @@ -14,7 +14,7 @@ def test_get_library(conn): - olib = conn._get_library("oseries") + _ = conn._get_library("oseries") def test_add_get_series(request, conn): @@ -206,13 +206,13 @@ def test_add_stress(conn): @pytest.mark.dependency() def test_get_oseries(request, conn): depends(request, [f"test_add_oseries[{conn.type}]"]) - o = conn.get_oseries("oseries1") + _ = conn.get_oseries("oseries1") @pytest.mark.dependency() def test_get_oseries_and_metadata(request, conn): depends(request, [f"test_add_oseries[{conn.type}]"]) - o, m = conn.get_oseries("oseries1", return_metadata=True) + _ = conn.get_oseries("oseries1", return_metadata=True) @pytest.mark.dependency() @@ -225,7 +225,7 @@ def test_get_stress(request, conn): @pytest.mark.dependency() def test_get_stress_and_metadata(request, conn): depends(request, [f"test_add_stress[{conn.type}]"]) - s, m = conn.get_stresses("prec", return_metadata=True) + s, _ = conn.get_stresses("prec", return_metadata=True) s.name = "prec" diff --git a/tests/test_003_pastastore.py b/tests/test_003_pastastore.py index a7d398b6..29feae3e 100644 --- a/tests/test_003_pastastore.py +++ b/tests/test_003_pastastore.py @@ -39,7 +39,7 @@ def test_search(pstore): @pytest.mark.dependency() def test_create_model(pstore): - ml = pstore.create_model("oseries1") + _ = pstore.create_model("oseries1") @pytest.mark.dependency() @@ -139,7 +139,7 @@ def test_get_model(request, pstore): f"test_store_model_missing_series[{pstore.type}]", ], ) - ml = pstore.conn.get_models("oseries1") + _ = pstore.conn.get_models("oseries1") @pytest.mark.dependency() @@ -158,7 +158,7 @@ def test_del_model(request, pstore): @pytest.mark.dependency() def test_create_models(pstore): - mls = pstore.create_models_bulk( + _ = pstore.create_models_bulk( ["oseries1", "oseries2"], store=True, progressbar=False ) _ = pstore.conn.models @@ -172,6 +172,13 @@ def test_get_parameters(request, pstore): assert p.isna().sum().sum() == 0 +@pytest.mark.dependency() +def test_get_signatures(request, pstore): + depends(request, [f"test_create_models[{pstore.type}]"]) + s = pstore.get_signatures(progressbar=False) + assert s.shape[1] == len(ps.stats.signatures.__all__) + + @pytest.mark.dependency() def test_iter_models(request, pstore): depends(request, [f"test_create_models[{pstore.type}]"]) @@ -181,13 +188,24 @@ def test_iter_models(request, pstore): @pytest.mark.dependency() def test_solve_models_and_get_stats(request, pstore): depends(request, [f"test_create_models[{pstore.type}]"]) - mls = pstore.solve_models( + _ = pstore.solve_models( ignore_solve_errors=False, progressbar=False, store_result=True ) stats = pstore.get_statistics(["evp", "aic"], progressbar=False) assert stats.index.size == 2 +@pytest.mark.dependency() +def test_apply(request, pstore): + depends(request, [f"test_solve_models_and_get_stats[{pstore.type}]"]) + + def func(ml): + return ml.parameters.loc["recharge_A", "optimal"] + + result = pstore.apply("models", func) + assert len(result) == 2 + + @pytest.mark.dependency() def test_save_and_load_model(request, pstore): ml = pstore.create_model("oseries2") diff --git a/tests/test_004_yaml.py b/tests/test_004_yaml.py index c2799127..fb165c0a 100644 --- a/tests/test_004_yaml.py +++ b/tests/test_004_yaml.py @@ -6,7 +6,6 @@ from pytest_dependency import depends import pastastore as pst -from pastastore.version import PASTAS_LEQ_022 @contextmanager