Skip to content

Commit

Permalink
Support normalize_with_stats for area, zonal, and meridional statistics
Browse files Browse the repository at this point in the history
  • Loading branch information
schlunma committed Nov 22, 2023
1 parent 6195bc2 commit 1cefc1e
Show file tree
Hide file tree
Showing 3 changed files with 201 additions and 15 deletions.
39 changes: 35 additions & 4 deletions doc/recipe/preprocessor.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1904,6 +1904,10 @@ along the longitude coordinate.
Parameters:
* `operator`: Operation to apply.
See :ref:`stat_preprocs` for more details on supported statistics.
* `normalize_with_stats`: If given, do not return the statistics cube
itself, but rather the input cube normalized with the statistics cube.
Can either be `subtract` (statistics cube is subtracted from the input
cube) or `divide` (input cube is divided by the statistics cube).
* Other parameters are directly passed to the `operator` as keyword
arguments.
See :ref:`stat_preprocs` for more details.
Expand All @@ -1921,6 +1925,10 @@ argument:
Parameters:
* `operator`: Operation to apply.
See :ref:`stat_preprocs` for more details on supported statistics.
* `normalize_with_stats`: If given, do not return the statistics cube
itself, but rather the input cube normalized with the statistics cube.
Can either be `subtract` (statistics cube is subtracted from the input
cube) or `divide` (input cube is divided by the statistics cube).
* Other parameters are directly passed to the `operator` as keyword
arguments.
See :ref:`stat_preprocs` for more details.
Expand All @@ -1933,10 +1941,6 @@ See also :func:`esmvalcore.preprocessor.meridional_means`.
``area_statistics``
-------------------

This function calculates statistics over a region.
It takes one argument, ``operator``, which is the name of the operation to
apply.

This function can be used to apply several different operations in the
horizontal plane: for example, mean, sum, standard deviation, median, variance,
minimum, maximum and root mean square.
Expand All @@ -1955,6 +1959,33 @@ The required supplementary variable, either ``areacella`` for atmospheric
variables or ``areacello`` for ocean variables, can be attached to the main
dataset as described in :ref:`supplementary_variables`.

Parameters:
* `operator`: Operation to apply.
See :ref:`stat_preprocs` for more details on supported statistics.
* `normalize_with_stats`: If given, do not return the statistics cube
itself, but rather the input cube normalized with the statistics cube.
Can either be `subtract` (statistics cube is subtracted from the input
cube) or `divide` (input cube is divided by the statistics cube).
* Other parameters are directly passed to the `operator` as keyword
arguments.
See :ref:`stat_preprocs` for more details.

Examples:
* Calculate global mean:

.. code-block:: yaml
area_statistics:
operator: mean
* Subtract global mean from dataset:

.. code-block:: yaml
area_statistics:
operator: mean
normalize_with_stats: subtract
See also :func:`esmvalcore.preprocessor.area_statistics`.


Expand Down
58 changes: 47 additions & 11 deletions esmvalcore/preprocessor/_area.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import logging
import warnings
from pathlib import Path
from typing import TYPE_CHECKING, Iterable, Optional
from typing import TYPE_CHECKING, Iterable, Literal, Optional

import fiona
import iris
Expand All @@ -20,8 +20,13 @@
from iris.cube import Cube, CubeList
from iris.exceptions import CoordinateMultiDimError, CoordinateNotFoundError

from ._shared import get_iris_aggregator, guess_bounds, update_weights_kwargs
from ._supplementary_vars import (
from esmvalcore.preprocessor._shared import (
get_iris_aggregator,
get_normalized_cube,
guess_bounds,
update_weights_kwargs,
)
from esmvalcore.preprocessor._supplementary_vars import (
add_ancillary_variable,
add_cell_measure,
register_supplementaries,
Expand Down Expand Up @@ -188,6 +193,7 @@ def _extract_irregular_region(cube, start_longitude, end_longitude,
def zonal_statistics(
cube: Cube,
operator: str,
normalize_with_stats: Optional[Literal['subtract', 'divide']] = None,
**operator_kwargs
) -> Cube:
"""Compute zonal statistics.
Expand All @@ -200,14 +206,20 @@ def zonal_statistics(
The operation. Used to determine the :class:`iris.analysis.Aggregator`
object used to calculate the statistics. Allowed options are given in
:ref:`this table <supported_stat_operator>`.
normalize_with_stats:
If given, do not return the statistics cube itself, but rather the
input cube normalized with the statistics cube. Can either be
`subtract` (statistics cube is subtracted from the input cube) or
`divide` (input cube is divided by the statistics cube).
**operator_kwargs:
Optional keyword arguments for the :class:`iris.analysis.Aggregator`
object defined by `operator`.
Returns
-------
iris.cube.Cube
Zonal statistics cube.
Zonal statistics cube or input cube normalized by statistics cube (see
`normalize_with_stats`).
Raises
------
Expand All @@ -221,14 +233,19 @@ def zonal_statistics(
"Zonal statistics on irregular grids not yet implemented"
)
(agg, agg_kwargs) = get_iris_aggregator(operator, **operator_kwargs)
cube = cube.collapsed('longitude', agg, **agg_kwargs)
cube.data = cube.core_data().astype(np.float32, casting='same_kind')
return cube
stat_cube = cube.collapsed('longitude', agg, **agg_kwargs)
if normalize_with_stats is not None:
result = get_normalized_cube(cube, stat_cube, normalize_with_stats)
else:
result = stat_cube
result.data = result.core_data().astype(np.float32, casting='same_kind')
return result


def meridional_statistics(
cube: Cube,
operator: str,
normalize_with_stats: Optional[Literal['subtract', 'divide']] = None,
**operator_kwargs,
) -> Cube:
"""Compute meridional statistics.
Expand All @@ -241,6 +258,11 @@ def meridional_statistics(
The operation. Used to determine the :class:`iris.analysis.Aggregator`
object used to calculate the statistics. Allowed options are given in
:ref:`this table <supported_stat_operator>`.
normalize_with_stats:
If given, do not return the statistics cube itself, but rather the
input cube normalized with the statistics cube. Can either be
`subtract` (statistics cube is subtracted from the input cube) or
`divide` (input cube is divided by the statistics cube).
**operator_kwargs:
Optional keyword arguments for the :class:`iris.analysis.Aggregator`
object defined by `operator`.
Expand All @@ -262,9 +284,13 @@ def meridional_statistics(
"Meridional statistics on irregular grids not yet implemented"
)
(agg, agg_kwargs) = get_iris_aggregator(operator, **operator_kwargs)
cube = cube.collapsed('latitude', agg, **agg_kwargs)
cube.data = cube.core_data().astype(np.float32, casting='same_kind')
return cube
stat_cube = cube.collapsed('latitude', agg, **agg_kwargs)
if normalize_with_stats is not None:
result = get_normalized_cube(cube, stat_cube, normalize_with_stats)
else:
result = stat_cube
result.data = result.core_data().astype(np.float32, casting='same_kind')
return result


def compute_area_weights(cube):
Expand Down Expand Up @@ -349,6 +375,7 @@ def _try_adding_calculated_cell_area(cube: Cube) -> None:
def area_statistics(
cube: Cube,
operator: str,
normalize_with_stats: Optional[Literal['subtract', 'divide']] = None,
**operator_kwargs,
) -> Cube:
"""Apply a statistical operator in the horizontal plane.
Expand All @@ -371,6 +398,11 @@ def area_statistics(
The operation. Used to determine the :class:`iris.analysis.Aggregator`
object used to calculate the statistics. Allowed options are given in
:ref:`this table <supported_stat_operator>`.
normalize_with_stats:
If given, do not return the statistics cube itself, but rather the
input cube normalized with the statistics cube. Can either be
`subtract` (statistics cube is subtracted from the input cube) or
`divide` (input cube is divided by the statistics cube).
**operator_kwargs:
Optional keyword arguments for the :class:`iris.analysis.Aggregator`
object defined by `operator`.
Expand All @@ -396,7 +428,11 @@ def area_statistics(
agg, agg_kwargs, 'cell_area', cube, _try_adding_calculated_cell_area
)

result = cube.collapsed(['latitude', 'longitude'], agg, **agg_kwargs)
stat_cube = cube.collapsed(['latitude', 'longitude'], agg, **agg_kwargs)
if normalize_with_stats is not None:
result = get_normalized_cube(cube, stat_cube, normalize_with_stats)
else:
result = stat_cube

# Make sure to preserve dtype
new_dtype = result.dtype
Expand Down
119 changes: 119 additions & 0 deletions tests/unit/preprocessor/_area/test_area.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,45 @@ def test_area_statistics_rms(self):
self.assert_array_equal(result.data, expected)
self.assertEqual(result.units, 'kg m-2 s-1')

def test_area_statistics_subtract_mean(self):
"""Test for area average of a 2D field."""
input_data = self.grid.copy()
self.assertFalse(input_data.cell_measures('cell_area'))

result = area_statistics(
input_data, 'mean', normalize_with_stats='subtract'
)

self.assertEqual(input_data, self.grid)
self.assertEqual(result.shape, input_data.shape)
expected = np.ma.zeros((2, 5, 5), dtype=np.float32)
self.assert_array_equal(result.data, expected)
self.assertFalse(result.cell_measures('cell_area'))
self.assertEqual(result.metadata, self.grid.metadata)
for coord in self.grid.coords():
self.assertEqual(result.coord(coord.name()), coord)

def test_area_statistics_cell_measure_subtract_mean(self):
"""Test for area average of a 2D field.
The area measure is pre-loaded in the cube.
"""
self._add_cell_measure_to_grid()
input_data = self.grid.copy()

result = area_statistics(
input_data, 'mean', normalize_with_stats='subtract'
)

self.assertEqual(input_data, self.grid)
self.assertEqual(result.shape, input_data.shape)
expected = np.ma.zeros((2, 5, 5), dtype=np.float32)
self.assert_array_equal(result.data, expected)
self.assertFalse(result.cell_measures('cell_area'))
self.assertEqual(result.metadata, self.grid.metadata)
for coord in self.grid.coords():
self.assertEqual(result.coord(coord.name()), coord)

def test_extract_region(self):
"""Test for extracting a region from a 2D field."""
result = extract_region(self.grid, 1.5, 2.5, 1.5, 2.5)
Expand Down Expand Up @@ -1270,6 +1309,43 @@ def test_zonal_statistics(make_testcube):
assert res.dtype == np.float32


def test_zonal_statistics_divide_by_min(make_testcube):
"""Test ``zonal_statistics``."""
make_testcube.data = np.ones(make_testcube.shape, dtype=np.float32)
make_testcube.data[0, 0] = 0.0
make_testcube.data[1, 0] = -1.0
make_testcube.data[2, 0] = -0.5
make_testcube.units = 'K'
input_data = make_testcube.copy()

msg = "Division by zero encountered during cube normalization"
with pytest.warns(RuntimeWarning, match=msg):
res = zonal_statistics(
input_data, 'min', normalize_with_stats='divide'
)

assert input_data == make_testcube
assert res.shape == input_data.shape
expected = np.ma.masked_invalid(
[
[np.nan, np.nan, np.nan, np.nan, np.nan],
[1.0, -1.0, -1.0, -1.0, -1.0],
[1.0, -2.0, -2.0, -2.0, -2.0],
[1.0, 1.0, 1.0, 1.0, 1.0],
[1.0, 1.0, 1.0, 1.0, 1.0],
],
)
tests.assert_array_equal(res.data, expected)
assert res.dtype == np.float32

assert res.standard_name == input_data.standard_name
assert res.var_name == input_data.var_name
assert res.long_name == input_data.long_name
assert res.cell_methods == input_data.cell_methods
assert res.attributes == input_data.attributes
assert res.units == '1'


def test_zonal_statistics_2d_lon_fail(irreg_extract_shape_cube):
"""Test ``zonal_statistics``."""
with pytest.raises(ValueError):
Expand All @@ -1286,11 +1362,54 @@ def test_meridional_statistics(make_testcube):
assert res.dtype == np.float32


def test_meridional_statistics_divide_by_max(make_testcube):
"""Test ``meridional_statistics``."""
make_testcube.data = np.ones(make_testcube.shape, dtype=np.float32)
make_testcube.data[:, 0] = 0.0
make_testcube.data[0, 1] = 2.0
make_testcube.units = 'K'
input_data = make_testcube.copy()

msg = "Division by zero encountered during cube normalization"
with pytest.warns(RuntimeWarning, match=msg):
res = meridional_statistics(
input_data, 'max', normalize_with_stats='divide'
)

assert input_data == make_testcube
assert res.shape == input_data.shape
expected = np.ma.masked_invalid(
[
[np.nan, 1.0, 1.0, 1.0, 1.0],
[np.nan, 0.5, 1.0, 1.0, 1.0],
[np.nan, 0.5, 1.0, 1.0, 1.0],
[np.nan, 0.5, 1.0, 1.0, 1.0],
[np.nan, 0.5, 1.0, 1.0, 1.0],
],
)
tests.assert_array_equal(res.data, expected)
assert res.dtype == np.float32

assert res.standard_name == input_data.standard_name
assert res.var_name == input_data.var_name
assert res.long_name == input_data.long_name
assert res.cell_methods == input_data.cell_methods
assert res.attributes == input_data.attributes
assert res.units == '1'


def test_meridional_statistics_2d_lon_fail(irreg_extract_shape_cube):
"""Test ``meridional_statistics``."""
with pytest.raises(ValueError):
meridional_statistics(irreg_extract_shape_cube, 'sum')


def test_meridional_statistics_invalid_norm_fail(make_testcube):
"""Test ``meridional_statistics``."""
msg = "Expected 'subtract' or 'divide' for `normalize_with_stats`"
with pytest.raises(ValueError, match=msg):
meridional_statistics(make_testcube, 'sum', normalize_with_stats='x')


if __name__ == '__main__':
unittest.main()

0 comments on commit 1cefc1e

Please sign in to comment.