Skip to content

Commit

Permalink
implement crps for binomial distribution
Browse files Browse the repository at this point in the history
Co-Authored-By: Sam Allen <34094291+sallen12@users.noreply.github.com>
  • Loading branch information
frazane and sallen12 committed Aug 26, 2024
1 parent 725c1b9 commit 3fbb96b
Show file tree
Hide file tree
Showing 10 changed files with 162 additions and 11 deletions.
2 changes: 2 additions & 0 deletions docs/api/crps.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ When the true forecast CDF is not fully known, but represented by a finite ensem

::: scoringrules.crps_beta

::: scoringrules.crps_binomial

::: scoringrules.crps_exponential

::: scoringrules.crps_lognormal
Expand Down
2 changes: 2 additions & 0 deletions scoringrules/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from scoringrules._brier import brier_score
from scoringrules._crps import (
crps_beta,
crps_binomial,
crps_ensemble,
crps_exponential,
crps_logistic,
Expand Down Expand Up @@ -38,6 +39,7 @@
"backends",
"crps_ensemble",
"crps_beta",
"crps_binomial",
"crps_normal",
"crps_exponential",
"crps_lognormal",
Expand Down
44 changes: 44 additions & 0 deletions scoringrules/_crps.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,50 @@ def crps_beta(
return crps.beta(observation, a, b, lower, upper, backend=backend)


def crps_binomial(
observation: "ArrayLike",
n: "ArrayLike",
prob: "ArrayLike",
/,
*,
backend: "Backend" = None,
) -> "ArrayLike":
r"""Compute the closed form of the CRPS for the binomial distribution.
It is based on the following formulation from
[Jordan et al. (2019)](https://www.jstatsoft.org/article/view/v090i12):
$$
\mathrm{CRPS}(F_{n, p}, y) = 2 \sum_{x = 0}^{n} f_{n,p}(x) (1\{y < x\}
- F_{n,p}(x) + f_{n,p}(x)/2) (x - y),
$$
where $f_{n, p}$ and $F_{n, p}$ are the PDF and CDF of the binomial distribution
with size parameter $n = 0, 1, 2, ...$ and probability parameter $p \in [0, 1]$.
Parameters
----------
observation:
The observed values as an integer or array of integers.
n:
Size parameter of the forecast binomial distribution as an integer or array of integers.
prob:
Probability parameter of the forecast binomial distribution as a float or array of floats.
Returns
-------
score:
The CRPS between Binomial(n, prob) and obs.
Examples
--------
>>> import scoringrules as sr
>>> sr.crps_binomial(4, 10, 0.5)
0.5955715179443359
"""
return crps.binomial(observation, n, prob, backend=backend)


def crps_exponential(
observation: "ArrayLike",
rate: "ArrayLike",
Expand Down
6 changes: 4 additions & 2 deletions scoringrules/backend/jax.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def apply_along_axis(
try:
x_shape = list(x.shape)
return jax.vmap(func1d)(x.reshape(-1, x_shape.pop(axis))).reshape(x_shape)
except Exception:
except (Exception, jax.errors.ConcretizationTypeError):
return jnp.apply_along_axis(func1d, axis, x)

def floor(self, x: "Array") -> "Array":
Expand Down Expand Up @@ -205,7 +205,9 @@ def hypergeometric(self, a: "Array", b: "Array", c: "Array", z: "Array"):
return jsp.special.hyp2f1(a, b, c, z)

def comb(self, n: "ArrayLike", k: "ArrayLike") -> "ArrayLike":
return jsp.special.comb(n, k)
return jsp.special.factorial(n) // (
jsp.special.factorial(k) * jsp.special.factorial(n - k)
)

def expi(self, x: "Array") -> "Array":
return jsp.special.expi(x)
Expand Down
2 changes: 1 addition & 1 deletion scoringrules/backend/tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ def hypergeometric(
raise NotImplementedError

def comb(self, n: "Tensor", k: "Tensor") -> "Tensor":
return self.factorial(n) / (self.factorial(k) * self.factorial(n - k))
return self.factorial(n) // (self.factorial(k) * self.factorial(n - k))

def expi(self, x: "Tensor") -> "Tensor":
return tf.math.special.expint(x)
Expand Down
2 changes: 1 addition & 1 deletion scoringrules/backend/torch.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ def hypergeometric(
return None

def comb(self, n: "Tensor", k: "Tensor") -> "Tensor":
return self.factorial(n) / (self.factorial(k) * self.factorial(n - k))
return self.factorial(n) // (self.factorial(k) * self.factorial(n - k))

def expi(self, x: "Tensor") -> "Tensor":
return None
Expand Down
3 changes: 2 additions & 1 deletion scoringrules/core/crps/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from ._approx import ensemble, ow_ensemble, quantile_pinball, vr_ensemble
from ._closed import beta, exponential, logistic, lognormal, normal
from ._closed import beta, binomial, exponential, logistic, lognormal, normal
from ._gufuncs import estimator_gufuncs, quantile_pinball_gufunc

__all__ = [
"ensemble",
"ow_ensemble",
"vr_ensemble",
"beta",
"binomial",
"exponential",
"logistic",
"lognormal",
Expand Down
70 changes: 69 additions & 1 deletion scoringrules/core/crps/_closed.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
import typing as tp

from scoringrules.backend import backends
from scoringrules.core.stats import _exp_cdf, _logis_cdf, _norm_cdf, _norm_pdf
from scoringrules.core.stats import (
_binom_cdf,
_binom_pdf,
_exp_cdf,
_logis_cdf,
_norm_cdf,
_norm_pdf,
)

if tp.TYPE_CHECKING:
from scoringrules.core.typing import Array, ArrayLike, Backend
Expand Down Expand Up @@ -42,6 +49,67 @@ def beta(
return s


def binomial(
obs: "ArrayLike",
n: "ArrayLike",
prob: "ArrayLike",
backend: "Backend" = None,
) -> "Array":
"""Compute the CRPS for the binomial distribution.
Note
----
This is a bit of a hacky implementation, due to how the arrays
must be broadcasted, but it should work for now.
"""
B = backends.active if backend is None else backends[backend]
obs, n, prob = map(B.asarray, (obs, n, prob))
ones_like_n = 0.0 * n + 1

def _inner(params):
obs, n, prob = params
x = B.arange(0, n + 1)
w = _binom_pdf(x, n, prob)
a = _binom_cdf(x, n, prob) - 0.5 * w
s = 2 * B.sum(w * ((obs < x) - a) * (x - obs))
return s

# if n is a scalar, then if needed we must broadcast k and p to the same shape as n
# TODO: implement B.broadcast() for backends
if n.size == 1:
x = B.arange(0, n + 1)
need_broadcast = not (obs.size == 1 and prob.size == 1)

if need_broadcast:
obs = obs[:, None] if obs.size > 1 else obs[None]
prob = prob[:, None] if prob.size > 1 else prob[None]
x = x[None]
x = x * ones_like_n
prob = prob * ones_like_n
obs = obs * ones_like_n

w = _binom_pdf(x, n, prob)
a = _binom_cdf(x, n, prob) - 0.5 * w
s = 2 * B.sum(
w * ((obs < x) - a) * (x - obs), axis=-1 if need_broadcast else None
)

# otherwise, since x would have variable sizes, we must apply the function along the axis
else:
obs = obs * ones_like_n if obs.size == 1 else obs
prob = prob * ones_like_n if prob.size == 1 else prob

# option 1: in a loop
s = B.stack(
[_inner(params) for params in zip(obs, n, prob, strict=True)], axis=-1
)

# option 2: apply_along_axis (does not work with JAX)
# s = B.apply_along_axis(_inner, B.stack((obs, n, prob), axis=-1), -1)

return s


def exponential(
obs: "ArrayLike", rate: "ArrayLike", backend: "Backend" = None
) -> "Array":
Expand Down
9 changes: 4 additions & 5 deletions scoringrules/core/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,20 +90,19 @@ def _gpd_cdf(x: "ArrayLike", shape: "ArrayLike", backend: "Backend" = None) -> "


def _binom_pdf(
x: "ArrayLike", n: "ArrayLike", prob: "ArrayLike", backend: "Backend" = None
k: "ArrayLike", n: "ArrayLike", prob: "ArrayLike", backend: "Backend" = None
) -> "Array":
"""Probability mass function for the binomial distribution."""
B = backends.active if backend is None else backends[backend]
ind = B.isinteger(x) * (1 - B.isnegative(x)) * (1 - B.negative(n - x))
return ind * B.comb(n, x) * prob**x * (1 - prob) ** (n - x)
return B.comb(n, k) * prob**k * (1 - prob) ** (n - k)


def _binom_cdf(
x: "ArrayLike", n: "ArrayLike", prob: "ArrayLike", backend: "Backend" = None
k: "ArrayLike", n: "ArrayLike", prob: "ArrayLike", backend: "Backend" = None
) -> "Array":
"""Cumulative distribution function for the binomial distribution."""
B = backends.active if backend is None else backends[backend]
return (1 - B.isnegative(x)) * B.betainc(n - B.floor(x), B.floor(x) + 1, 1 - prob)
return B.betainc(n - B.minimum(k, n) + 1e-36, k + 1, 1 - prob)


def _hypergeo_pdf(
Expand Down
33 changes: 33 additions & 0 deletions tests/test_crps.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,3 +137,36 @@ def test_beta(backend):
res = _crps.crps_beta(-3.0, 0.7, 1.1, lower=-5.0, upper=4.0, backend=backend)
expected = 0.883206751
assert np.isclose(res, expected)


@pytest.mark.parametrize("backend", BACKENDS)
def test_binomial(backend):
if backend == "torch":
pytest.skip("Not implemented in torch backend")

# test correctness
res = _crps.crps_binomial(8, 10, 0.9, backend=backend)
expected = 0.6685115
assert np.isclose(res, expected)

res = _crps.crps_binomial(-8, 10, 0.9, backend=backend)
expected = 16.49896
assert np.isclose(res, expected)

res = _crps.crps_binomial(18, 10, 0.9, backend=backend)
expected = 8.498957
assert np.isclose(res, expected)

# test broadcasting
ones = np.ones(2)
k, n, p = 8, 10, 0.9
s = _crps.crps_binomial(k * ones, n, p, backend=backend)
assert np.isclose(s, np.array([0.6685115, 0.6685115])).all()
s = _crps.crps_binomial(k * ones, n * ones, p, backend=backend)
assert np.isclose(s, np.array([0.6685115, 0.6685115])).all()
s = _crps.crps_binomial(k * ones, n * ones, p * ones, backend=backend)
assert np.isclose(s, np.array([0.6685115, 0.6685115])).all()
s = _crps.crps_binomial(k, n * ones, p * ones, backend=backend)
assert np.isclose(s, np.array([0.6685115, 0.6685115])).all()
s = _crps.crps_binomial(k * ones, n, p * ones, backend=backend)
assert np.isclose(s, np.array([0.6685115, 0.6685115])).all()

0 comments on commit 3fbb96b

Please sign in to comment.