Skip to content

Commit

Permalink
Add CRPS for the generalised truncated and censored t distribution (#53)
Browse files Browse the repository at this point in the history
* add crps for generalised truncated and censored t distribution

* skip tests for crps for gtct distribution
  • Loading branch information
sallen12 authored Sep 5, 2024
1 parent 9064086 commit a2027f7
Show file tree
Hide file tree
Showing 7 changed files with 285 additions and 2 deletions.
6 changes: 6 additions & 0 deletions docs/api/crps.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@ When the true forecast CDF is not fully known, but represented by a finite ensem

::: scoringrules.crps_cnormal

::: scoringrules.crps_gtct

::: scoringrules.crps_tt

::: scoringrules.crps_ct

::: scoringrules.crps_hypergeometric

::: scoringrules.crps_laplace
Expand Down
7 changes: 7 additions & 0 deletions scoringrules/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
crps_gtclogistic,
crps_tlogistic,
crps_clogistic,
crps_gtct,
crps_tt,
crps_ct,
crps_gtcnormal,
crps_tnormal,
crps_cnormal,
Expand All @@ -24,6 +27,7 @@
crps_lognormal,
crps_normal,
crps_poisson,
crps_t,
crps_uniform,
crps_quantile,
owcrps_ensemble,
Expand Down Expand Up @@ -67,6 +71,9 @@
"crps_gtcnormal",
"crps_tnormal",
"crps_cnormal",
"crps_gtct",
"crps_tt",
"crps_ct",
"crps_hypergeometric",
"crps_laplace",
"crps_logistic",
Expand Down
160 changes: 160 additions & 0 deletions scoringrules/_crps.py
Original file line number Diff line number Diff line change
Expand Up @@ -1021,6 +1021,162 @@ def crps_cnormal(
return crps.gtcnormal(observation, location, scale, lower, upper, lmass, umass, backend=backend)


def crps_gtct(
observation: "ArrayLike",
df : "ArrayLike",
/,
location: "ArrayLike" = 0.0,
scale: "ArrayLike" = 1.0,
lower: "ArrayLike" = float("-inf"),
upper: "ArrayLike" = float("inf"),
lmass: "ArrayLike" = 0.0,
umass: "ArrayLike" = 0.0,
*,
backend: "Backend" = None,
) -> "ArrayLike":
r"""Compute the closed form of the CRPS for the generalised truncated and censored t distribution.
It is based on the following formulation from
[Jordan et al. (2019)](https://www.jstatsoft.org/article/view/v090i12):
$$ \mathrm{CRPS}(F_{l, L, \nu}^{u, U}, y) = |y - z| + uU^{2} - lL^{2} + \left( \frac{1 - L - U}{F_{\nu}(u) - F_{\nu}(l)} \right) z \left( 2 F_{\nu}(z) - \frac{(1 - 2L) F_{\nu}(u) + (1 - 2U) F_{\nu}(l)}{1 - L - U} \right) - \left( \frac{1 - L - U}{F_{\nu}(u) - F_{\nu}(l)} \right) \left( 2 G_{\nu}(z) - 2 G_{\nu}(u)U - 2 G_{\nu}(l)L \right) - \left( \frac{1 - L - U}{F_{\nu}(u) - F_{\nu}(l)} \right)^{2} \bar{B}_{\nu} \left( H_{\nu}(u) - H_{\nu}(l) \right), $$
$$ \mathrm{CRPS}(F_{l, L, \nu, \mu, \sigma}^{u, U}, y) = \sigma \mathrm{CRPS}(F_{(l - \mu)/\sigma, L, \nu}^{(u - \mu)/\sigma, U}, \frac{y - \mu}{\sigma}), $$
$$ G_{\nu}(x) = - \left( \frac{\nu + x^{2}}{\nu - 1} \right) f_{\nu}(x), $$
$$ H_{\nu}(x) = \frac{1}{2} + \frac{1}{2} \mathrm{sgn}(x) I \left( \frac{1}{2}, \nu - \frac{1}{2}, \frac{x^{2}}{\nu + x^{2}} \right), $$
$$ \bar{B}_{\nu} = \left( \frac{2 \sqrt{\nu}}{\nu - 1} \right) \frac{B(\frac{1}{2}, \nu - \frac{1}{2})}{B(\frac{1}{2}, \frac{\nu}{2})^{2}}, $$
where $F_{\nu}$ is the CDF of the standard t distribution with $\nu > 1$ degrees of freedom,
distribution, $F_{l, L, \nu, \mu, \sigma}^{u, U}$ is the CDF of the t distribution
truncated below at $l$ and above at $u$, with point masses $L, U > 0$ at the lower and upper
boundaries, respectively, and degrees of freedom, location and scale parameters $\nu > 1$, $\mu$ and $\sigma > 0$.
$F_{l, L, \nu}^{u, U} = F_{l, L, \nu, 0, 1}^{u, U}$.
Parameters
----------
observation: ArrayLike
The observed values.
df: ArrayLike
Degrees of freedom parameter of the forecast distribution.
location: ArrayLike
Location parameter of the forecast distribution.
scale: ArrayLike
Scale parameter of the forecast distribution.
lower: ArrayLike
Lower boundary of the truncated forecast distribution.
upper: ArrayLike
Upper boundary of the truncated forecast distribution.
lmass: ArrayLike
Point mass assigned to the lower boundary of the forecast distribution.
umass: ArrayLike
Point mass assigned to the upper boundary of the forecast distribution.
Returns
-------
crps: array_like
The CRPS between gtct(df, location, scale, lower, upper, lmass, umass) and obs.
Examples
--------
>>> import scoringrules as sr
>>> sr.crps_gtct(0.0, 2.0, 0.1, 0.4, -1.0, 1.0, 0.1, 0.1)
"""
return crps.gtct(observation, df, location, scale, lower, upper, lmass, umass, backend=backend)


def crps_tt(
observation: "ArrayLike",
df : "ArrayLike",
/,
location: "ArrayLike" = 0.0,
scale: "ArrayLike" = 1.0,
lower: "ArrayLike" = float("-inf"),
upper: "ArrayLike" = float("inf"),
*,
backend: "Backend" = None,
) -> "ArrayLike":
r"""Compute the closed form of the CRPS for the truncated t distribution.
It is based on the formulation for the generalised truncated and censored t distribution with
lmass and umass set to zero.
Parameters
----------
observation: ArrayLike
The observed values.
df: ArrayLike
Degrees of freedom parameter of the forecast distribution.
location: ArrayLike
Location parameter of the forecast distribution.
scale: ArrayLike
Scale parameter of the forecast distribution.
lower: ArrayLike
Lower boundary of the truncated forecast distribution.
upper: ArrayLike
Upper boundary of the truncated forecast distribution.
Returns
-------
crps: array_like
The CRPS between tt(df, location, scale, lower, upper) and obs.
Examples
--------
>>> import scoringrules as sr
>>> sr.crps_tt(0.0, 2.0, 0.1, 0.4, -1.0, 1.0)
"""
return crps.gtct(observation, df, location, scale, lower, upper, 0.0, 0.0, backend=backend)


def crps_ct(
observation: "ArrayLike",
df : "ArrayLike",
/,
location: "ArrayLike" = 0.0,
scale: "ArrayLike" = 1.0,
lower: "ArrayLike" = float("-inf"),
upper: "ArrayLike" = float("inf"),
*,
backend: "Backend" = None,
) -> "ArrayLike":
r"""Compute the closed form of the CRPS for the censored t distribution.
It is based on the formulation for the generalised truncated and censored t distribution with
lmass and umass set to the tail probabilities of the predictive distribution.
Parameters
----------
observation: ArrayLike
The observed values.
df: ArrayLike
Degrees of freedom parameter of the forecast distribution.
location: ArrayLike
Location parameter of the forecast distribution.
scale: ArrayLike
Scale parameter of the forecast distribution.
lower: ArrayLike
Lower boundary of the truncated forecast distribution.
upper: ArrayLike
Upper boundary of the truncated forecast distribution.
Returns
-------
crps: array_like
The CRPS between ct(df, location, scale, lower, upper) and obs.
Examples
--------
>>> import scoringrules as sr
>>> sr.crps_ct(0.0, 2.0, 0.1, 0.4, -1.0, 1.0)
"""
lmass = stats._t_cdf((lower - location) / scale, df)
umass = 1 - stats._t_cdf((upper - location) / scale, df)
return crps.gtct(observation, df, location, scale, lower, upper, lmass, umass, backend=backend)


def crps_hypergeometric(
observation: "ArrayLike",
m: "ArrayLike",
Expand Down Expand Up @@ -1495,6 +1651,9 @@ def crps_uniform(
"crps_gtcnormal",
"crps_tnormal",
"crps_cnormal",
"crps_gtct",
"crps_tt",
"crps_ct",
"crps_hypergeometric",
"crps_laplace",
"crps_logistic",
Expand All @@ -1503,5 +1662,6 @@ def crps_uniform(
"crps_lognormal",
"crps_normal",
"crps_poisson",
"crps_t",
"crps_uniform",
]
2 changes: 2 additions & 0 deletions scoringrules/core/crps/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
gpd,
gtclogistic,
gtcnormal,
gtct,
hypergeometric,
laplace,
logistic,
Expand All @@ -35,6 +36,7 @@
"gpd",
"gtclogistic",
"gtcnormal",
"gtct",
"hypergeometric",
"laplace",
"logistic",
Expand Down
47 changes: 47 additions & 0 deletions scoringrules/core/crps/_closed.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,53 @@ def gtcnormal(
return sigma * (s1 + s2 + s3 - s4)


def gtct(
obs: "ArrayLike", df: "ArrayLike", location: "ArrayLike", scale: "ArrayLike", lower: "ArrayLike", upper: "ArrayLike", lmass: "ArrayLike", umass: "ArrayLike", backend: "Backend" = None
) -> "Array":
"""Compute the CRPS for the generalised truncated and censored t distribution."""
B = backends.active if backend is None else backends[backend]
df, mu, sigma, lower, upper, lmass, umass, obs = map(B.asarray, (df, location, scale, lower, upper, lmass, umass, obs))
ω = (obs - mu) / sigma
u = (upper - mu) / sigma
l = (lower - mu) / sigma
z = B.minimum(B.maximum(ω, l), u)
F_u = _t_cdf(u, df, backend=backend)
F_l = _t_cdf(l, df, backend=backend)
F_z = _t_cdf(z, df, backend=backend)
f_u = _t_pdf(u, df, backend=backend)
f_l = _t_pdf(l, df, backend=backend)
f_z = _t_pdf(z, df, backend=backend)

u_inf = u == float("inf")
l_inf = l == float("-inf")
u = B.where(u_inf, B.nan, u)
l = B.where(l_inf, B.nan, l)

s1_u = B.where(u_inf and umass == 0.0, 0.0, u * umass**2)
s1_l = B.where(l_inf and lmass == 0.0, 0.0, l * lmass**2)

G_u = B.where(u_inf, 0.0, - f_u * (df + u**2) / (df - 1))
G_l = B.where(l_inf, 0.0, - f_l * (df + l**2) / (df - 1))
G_z = - f_z * (df + z**2) / (df - 1)

I_u = B.where(u_inf, 1.0, B.betainc(1 / 2, df - 1 / 2, (u**2) / (df + u**2)))
I_l = B.where(l_inf, 1.0, B.betainc(1 / 2, df - 1 / 2, (l**2) / (df + l**2)))
sgn_u = B.where(u_inf, 1.0, (u / B.abs(u)))
sgn_l = B.where(l_inf, -1.0, (l / B.abs(l)))
H_u = (sgn_u * I_u + 1) / 2
H_l = (sgn_l * I_l + 1) / 2

Bbar = (2 * B.sqrt(df) / (df - 1)) * B.beta(1 / 2, df - 1 / 2) / (B.beta(1 / 2, df / 2)**2)

c = (1 - lmass - umass) / (F_u - F_l)

s1 = B.abs(ω - z) + s1_u - s1_l
s2 = c * z * (2 * F_z - ((1 - 2 * lmass) * F_u + (1 - 2 * umass) * F_l) / (1 - lmass - umass))
s3 = 2 * c * (G_z - G_u * umass - G_l * lmass)
s4 = c**2 * Bbar * (H_u - H_l)
return sigma * (s1 + s2 - s3 - s4)


def hypergeometric(
obs: "ArrayLike",
m: "ArrayLike",
Expand Down
13 changes: 11 additions & 2 deletions scoringrules/core/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,15 +58,24 @@ def _pois_pdf(x: "ArrayLike", mean: "ArrayLike", backend: "Backend" = None) -> "
def _t_pdf(x: "ArrayLike", df: "ArrayLike", backend: "Backend" = None) -> "Array":
"""Probability density function for the standard Student's t distribution."""
B = backends.active if backend is None else backends[backend]
return ((1 + x**2 / df) ** (-(df + 1) / 2)) / (B.sqrt(df) * B.beta(1 / 2, df / 2))
x_inf = B.abs(x) == float("inf")
x = B.where(x_inf, B.nan, x)
s = ((1 + x**2 / df) ** (-(df + 1) / 2)) / (B.sqrt(df) * B.beta(1 / 2, df / 2))
return B.where(x_inf, 0.0, s)


def _t_cdf(x: "ArrayLike", df: "ArrayLike", backend: "Backend" = None) -> "Array":
"""Cumulative distribution function for the standard Student's t distribution."""
B = backends.active if backend is None else backends[backend]
return 1 / 2 + x * B.hypergeometric(1 / 2, (df + 1) / 2, 3 / 2, -(x**2) / df) / (
x_minf = x == float("-inf")
x_inf = x == float("inf")
x = B.where(x_inf, B.nan, x)
s = 1 / 2 + x * B.hypergeometric(1 / 2, (df + 1) / 2, 3 / 2, -(x**2) / df) / (
B.sqrt(df) * B.beta(1 / 2, df / 2)
)
s = B.where(x_minf, 0.0, s)
s = B.where(x_inf, 1.0, s)
return s


def _gev_cdf(s: "ArrayLike", xi: "ArrayLike", backend: "Backend" = None) -> "Array":
Expand Down
52 changes: 52 additions & 0 deletions tests/test_crps.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,58 @@ def test_cnormal(backend):
assert np.isclose(res, res0)


@pytest.mark.parametrize("backend", BACKENDS)
def test_gtct(backend):
if backend in ["jax", "torch", "tensorflow"]:
pytest.skip("Not implemented in torch or tensorflow backends")
obs, df, location, scale, lower, upper, lmass, umass = 0.9, 20.1, -2.3, 4.1, -7.3, 1.7, 0.0, 0.21
expected = 1.423042
res = _crps.crps_gtct(obs, df, location, scale, lower, upper, lmass, umass, backend=backend)
assert np.isclose(res, expected)

# aligns with crps_t
res0 = _crps.crps_t(obs, df, location, scale, backend=backend)
res = _crps.crps_gtct(obs, df, location, scale, backend=backend)
assert np.isclose(res, res0)

# aligns with crps_tnormal
res0 = _crps.crps_tt(obs, df, location, scale, lower, upper, backend=backend)
res = _crps.crps_gtct(obs, df, location, scale, lower, upper, backend=backend)
assert np.isclose(res, res0)


@pytest.mark.parametrize("backend", BACKENDS)
def test_tt(backend):
if backend in ["jax", "torch", "tensorflow"]:
pytest.skip("Not implemented in torch or tensorflow backends")

obs, df, location, scale, lower, upper = -1.0, 2.9, 3.1, 4.2, 1.5, 17.3
expected = 5.084272
res = _crps.crps_tt(obs, df, location, scale, lower, upper, backend=backend)
assert np.isclose(res, expected)

# aligns with crps_t
res0 = _crps.crps_t(obs, df ,location, scale, backend=backend)
res = _crps.crps_tt(obs, df, location, scale, backend=backend)
assert np.isclose(res, res0)


@pytest.mark.parametrize("backend", BACKENDS)
def test_ct(backend):
if backend in ["jax", "torch", "tensorflow"]:
pytest.skip("Not implemented in torch or tensorflow backends")

obs, df, location, scale, lower, upper = 1.8, 5.4, 0.4, 1.1, 0.0, 2.0
expected = 0.8028996
res = _crps.crps_ct(obs, df, location, scale, lower, upper, backend=backend)
assert np.isclose(res, expected)

# aligns with crps_t
res0 = _crps.crps_t(obs, df, location, scale, backend=backend)
res = _crps.crps_ct(obs, df, location, scale, backend=backend)
assert np.isclose(res, res0)


@pytest.mark.parametrize("backend", BACKENDS)
def test_hypergeometric(backend):
res = _crps.crps_hypergeometric(5 * np.ones((2, 2)), 7, 13, 12, backend=backend)
Expand Down

0 comments on commit a2027f7

Please sign in to comment.