From a2027f7dcc2851e81a440b3b6beb20fb10928116 Mon Sep 17 00:00:00 2001 From: Sam Allen <34094291+sallen12@users.noreply.github.com> Date: Thu, 5 Sep 2024 19:58:34 +0200 Subject: [PATCH] Add CRPS for the generalised truncated and censored t distribution (#53) * add crps for generalised truncated and censored t distribution * skip tests for crps for gtct distribution --- docs/api/crps.md | 6 ++ scoringrules/__init__.py | 7 ++ scoringrules/_crps.py | 160 +++++++++++++++++++++++++++++ scoringrules/core/crps/__init__.py | 2 + scoringrules/core/crps/_closed.py | 47 +++++++++ scoringrules/core/stats.py | 13 ++- tests/test_crps.py | 52 ++++++++++ 7 files changed, 285 insertions(+), 2 deletions(-) diff --git a/docs/api/crps.md b/docs/api/crps.md index 5bbd31b..e8e89bb 100644 --- a/docs/api/crps.md +++ b/docs/api/crps.md @@ -39,6 +39,12 @@ When the true forecast CDF is not fully known, but represented by a finite ensem ::: scoringrules.crps_cnormal +::: scoringrules.crps_gtct + +::: scoringrules.crps_tt + +::: scoringrules.crps_ct + ::: scoringrules.crps_hypergeometric ::: scoringrules.crps_laplace diff --git a/scoringrules/__init__.py b/scoringrules/__init__.py index e54c9e6..a1ad6e3 100644 --- a/scoringrules/__init__.py +++ b/scoringrules/__init__.py @@ -13,6 +13,9 @@ crps_gtclogistic, crps_tlogistic, crps_clogistic, + crps_gtct, + crps_tt, + crps_ct, crps_gtcnormal, crps_tnormal, crps_cnormal, @@ -24,6 +27,7 @@ crps_lognormal, crps_normal, crps_poisson, + crps_t, crps_uniform, crps_quantile, owcrps_ensemble, @@ -67,6 +71,9 @@ "crps_gtcnormal", "crps_tnormal", "crps_cnormal", + "crps_gtct", + "crps_tt", + "crps_ct", "crps_hypergeometric", "crps_laplace", "crps_logistic", diff --git a/scoringrules/_crps.py b/scoringrules/_crps.py index 26ef3bf..ca1b13d 100644 --- a/scoringrules/_crps.py +++ b/scoringrules/_crps.py @@ -1021,6 +1021,162 @@ def crps_cnormal( return crps.gtcnormal(observation, location, scale, lower, upper, lmass, umass, backend=backend) +def crps_gtct( + observation: "ArrayLike", + df : "ArrayLike", + /, + location: "ArrayLike" = 0.0, + scale: "ArrayLike" = 1.0, + lower: "ArrayLike" = float("-inf"), + upper: "ArrayLike" = float("inf"), + lmass: "ArrayLike" = 0.0, + umass: "ArrayLike" = 0.0, + *, + backend: "Backend" = None, +) -> "ArrayLike": + r"""Compute the closed form of the CRPS for the generalised truncated and censored t distribution. + + It is based on the following formulation from + [Jordan et al. (2019)](https://www.jstatsoft.org/article/view/v090i12): + + $$ \mathrm{CRPS}(F_{l, L, \nu}^{u, U}, y) = |y - z| + uU^{2} - lL^{2} + \left( \frac{1 - L - U}{F_{\nu}(u) - F_{\nu}(l)} \right) z \left( 2 F_{\nu}(z) - \frac{(1 - 2L) F_{\nu}(u) + (1 - 2U) F_{\nu}(l)}{1 - L - U} \right) - \left( \frac{1 - L - U}{F_{\nu}(u) - F_{\nu}(l)} \right) \left( 2 G_{\nu}(z) - 2 G_{\nu}(u)U - 2 G_{\nu}(l)L \right) - \left( \frac{1 - L - U}{F_{\nu}(u) - F_{\nu}(l)} \right)^{2} \bar{B}_{\nu} \left( H_{\nu}(u) - H_{\nu}(l) \right), $$ + + $$ \mathrm{CRPS}(F_{l, L, \nu, \mu, \sigma}^{u, U}, y) = \sigma \mathrm{CRPS}(F_{(l - \mu)/\sigma, L, \nu}^{(u - \mu)/\sigma, U}, \frac{y - \mu}{\sigma}), $$ + + $$ G_{\nu}(x) = - \left( \frac{\nu + x^{2}}{\nu - 1} \right) f_{\nu}(x), $$ + + $$ H_{\nu}(x) = \frac{1}{2} + \frac{1}{2} \mathrm{sgn}(x) I \left( \frac{1}{2}, \nu - \frac{1}{2}, \frac{x^{2}}{\nu + x^{2}} \right), $$ + + $$ \bar{B}_{\nu} = \left( \frac{2 \sqrt{\nu}}{\nu - 1} \right) \frac{B(\frac{1}{2}, \nu - \frac{1}{2})}{B(\frac{1}{2}, \frac{\nu}{2})^{2}}, $$ + + where $F_{\nu}$ is the CDF of the standard t distribution with $\nu > 1$ degrees of freedom, + distribution, $F_{l, L, \nu, \mu, \sigma}^{u, U}$ is the CDF of the t distribution + truncated below at $l$ and above at $u$, with point masses $L, U > 0$ at the lower and upper + boundaries, respectively, and degrees of freedom, location and scale parameters $\nu > 1$, $\mu$ and $\sigma > 0$. + $F_{l, L, \nu}^{u, U} = F_{l, L, \nu, 0, 1}^{u, U}$. + + Parameters + ---------- + observation: ArrayLike + The observed values. + df: ArrayLike + Degrees of freedom parameter of the forecast distribution. + location: ArrayLike + Location parameter of the forecast distribution. + scale: ArrayLike + Scale parameter of the forecast distribution. + lower: ArrayLike + Lower boundary of the truncated forecast distribution. + upper: ArrayLike + Upper boundary of the truncated forecast distribution. + lmass: ArrayLike + Point mass assigned to the lower boundary of the forecast distribution. + umass: ArrayLike + Point mass assigned to the upper boundary of the forecast distribution. + + Returns + ------- + crps: array_like + The CRPS between gtct(df, location, scale, lower, upper, lmass, umass) and obs. + + Examples + -------- + >>> import scoringrules as sr + >>> sr.crps_gtct(0.0, 2.0, 0.1, 0.4, -1.0, 1.0, 0.1, 0.1) + """ + return crps.gtct(observation, df, location, scale, lower, upper, lmass, umass, backend=backend) + + +def crps_tt( + observation: "ArrayLike", + df : "ArrayLike", + /, + location: "ArrayLike" = 0.0, + scale: "ArrayLike" = 1.0, + lower: "ArrayLike" = float("-inf"), + upper: "ArrayLike" = float("inf"), + *, + backend: "Backend" = None, +) -> "ArrayLike": + r"""Compute the closed form of the CRPS for the truncated t distribution. + + It is based on the formulation for the generalised truncated and censored t distribution with + lmass and umass set to zero. + + Parameters + ---------- + observation: ArrayLike + The observed values. + df: ArrayLike + Degrees of freedom parameter of the forecast distribution. + location: ArrayLike + Location parameter of the forecast distribution. + scale: ArrayLike + Scale parameter of the forecast distribution. + lower: ArrayLike + Lower boundary of the truncated forecast distribution. + upper: ArrayLike + Upper boundary of the truncated forecast distribution. + + Returns + ------- + crps: array_like + The CRPS between tt(df, location, scale, lower, upper) and obs. + + Examples + -------- + >>> import scoringrules as sr + >>> sr.crps_tt(0.0, 2.0, 0.1, 0.4, -1.0, 1.0) + """ + return crps.gtct(observation, df, location, scale, lower, upper, 0.0, 0.0, backend=backend) + + +def crps_ct( + observation: "ArrayLike", + df : "ArrayLike", + /, + location: "ArrayLike" = 0.0, + scale: "ArrayLike" = 1.0, + lower: "ArrayLike" = float("-inf"), + upper: "ArrayLike" = float("inf"), + *, + backend: "Backend" = None, +) -> "ArrayLike": + r"""Compute the closed form of the CRPS for the censored t distribution. + + It is based on the formulation for the generalised truncated and censored t distribution with + lmass and umass set to the tail probabilities of the predictive distribution. + + Parameters + ---------- + observation: ArrayLike + The observed values. + df: ArrayLike + Degrees of freedom parameter of the forecast distribution. + location: ArrayLike + Location parameter of the forecast distribution. + scale: ArrayLike + Scale parameter of the forecast distribution. + lower: ArrayLike + Lower boundary of the truncated forecast distribution. + upper: ArrayLike + Upper boundary of the truncated forecast distribution. + + Returns + ------- + crps: array_like + The CRPS between ct(df, location, scale, lower, upper) and obs. + + Examples + -------- + >>> import scoringrules as sr + >>> sr.crps_ct(0.0, 2.0, 0.1, 0.4, -1.0, 1.0) + """ + lmass = stats._t_cdf((lower - location) / scale, df) + umass = 1 - stats._t_cdf((upper - location) / scale, df) + return crps.gtct(observation, df, location, scale, lower, upper, lmass, umass, backend=backend) + + def crps_hypergeometric( observation: "ArrayLike", m: "ArrayLike", @@ -1495,6 +1651,9 @@ def crps_uniform( "crps_gtcnormal", "crps_tnormal", "crps_cnormal", + "crps_gtct", + "crps_tt", + "crps_ct", "crps_hypergeometric", "crps_laplace", "crps_logistic", @@ -1503,5 +1662,6 @@ def crps_uniform( "crps_lognormal", "crps_normal", "crps_poisson", + "crps_t", "crps_uniform", ] diff --git a/scoringrules/core/crps/__init__.py b/scoringrules/core/crps/__init__.py index 8c9cea2..9009a00 100644 --- a/scoringrules/core/crps/__init__.py +++ b/scoringrules/core/crps/__init__.py @@ -9,6 +9,7 @@ gpd, gtclogistic, gtcnormal, + gtct, hypergeometric, laplace, logistic, @@ -35,6 +36,7 @@ "gpd", "gtclogistic", "gtcnormal", + "gtct", "hypergeometric", "laplace", "logistic", diff --git a/scoringrules/core/crps/_closed.py b/scoringrules/core/crps/_closed.py index 5f61dbe..557d4cc 100644 --- a/scoringrules/core/crps/_closed.py +++ b/scoringrules/core/crps/_closed.py @@ -330,6 +330,53 @@ def gtcnormal( return sigma * (s1 + s2 + s3 - s4) +def gtct( + obs: "ArrayLike", df: "ArrayLike", location: "ArrayLike", scale: "ArrayLike", lower: "ArrayLike", upper: "ArrayLike", lmass: "ArrayLike", umass: "ArrayLike", backend: "Backend" = None +) -> "Array": + """Compute the CRPS for the generalised truncated and censored t distribution.""" + B = backends.active if backend is None else backends[backend] + df, mu, sigma, lower, upper, lmass, umass, obs = map(B.asarray, (df, location, scale, lower, upper, lmass, umass, obs)) + ω = (obs - mu) / sigma + u = (upper - mu) / sigma + l = (lower - mu) / sigma + z = B.minimum(B.maximum(ω, l), u) + F_u = _t_cdf(u, df, backend=backend) + F_l = _t_cdf(l, df, backend=backend) + F_z = _t_cdf(z, df, backend=backend) + f_u = _t_pdf(u, df, backend=backend) + f_l = _t_pdf(l, df, backend=backend) + f_z = _t_pdf(z, df, backend=backend) + + u_inf = u == float("inf") + l_inf = l == float("-inf") + u = B.where(u_inf, B.nan, u) + l = B.where(l_inf, B.nan, l) + + s1_u = B.where(u_inf and umass == 0.0, 0.0, u * umass**2) + s1_l = B.where(l_inf and lmass == 0.0, 0.0, l * lmass**2) + + G_u = B.where(u_inf, 0.0, - f_u * (df + u**2) / (df - 1)) + G_l = B.where(l_inf, 0.0, - f_l * (df + l**2) / (df - 1)) + G_z = - f_z * (df + z**2) / (df - 1) + + I_u = B.where(u_inf, 1.0, B.betainc(1 / 2, df - 1 / 2, (u**2) / (df + u**2))) + I_l = B.where(l_inf, 1.0, B.betainc(1 / 2, df - 1 / 2, (l**2) / (df + l**2))) + sgn_u = B.where(u_inf, 1.0, (u / B.abs(u))) + sgn_l = B.where(l_inf, -1.0, (l / B.abs(l))) + H_u = (sgn_u * I_u + 1) / 2 + H_l = (sgn_l * I_l + 1) / 2 + + Bbar = (2 * B.sqrt(df) / (df - 1)) * B.beta(1 / 2, df - 1 / 2) / (B.beta(1 / 2, df / 2)**2) + + c = (1 - lmass - umass) / (F_u - F_l) + + s1 = B.abs(ω - z) + s1_u - s1_l + s2 = c * z * (2 * F_z - ((1 - 2 * lmass) * F_u + (1 - 2 * umass) * F_l) / (1 - lmass - umass)) + s3 = 2 * c * (G_z - G_u * umass - G_l * lmass) + s4 = c**2 * Bbar * (H_u - H_l) + return sigma * (s1 + s2 - s3 - s4) + + def hypergeometric( obs: "ArrayLike", m: "ArrayLike", diff --git a/scoringrules/core/stats.py b/scoringrules/core/stats.py index 255fd80..0fd2c05 100644 --- a/scoringrules/core/stats.py +++ b/scoringrules/core/stats.py @@ -58,15 +58,24 @@ def _pois_pdf(x: "ArrayLike", mean: "ArrayLike", backend: "Backend" = None) -> " def _t_pdf(x: "ArrayLike", df: "ArrayLike", backend: "Backend" = None) -> "Array": """Probability density function for the standard Student's t distribution.""" B = backends.active if backend is None else backends[backend] - return ((1 + x**2 / df) ** (-(df + 1) / 2)) / (B.sqrt(df) * B.beta(1 / 2, df / 2)) + x_inf = B.abs(x) == float("inf") + x = B.where(x_inf, B.nan, x) + s = ((1 + x**2 / df) ** (-(df + 1) / 2)) / (B.sqrt(df) * B.beta(1 / 2, df / 2)) + return B.where(x_inf, 0.0, s) def _t_cdf(x: "ArrayLike", df: "ArrayLike", backend: "Backend" = None) -> "Array": """Cumulative distribution function for the standard Student's t distribution.""" B = backends.active if backend is None else backends[backend] - return 1 / 2 + x * B.hypergeometric(1 / 2, (df + 1) / 2, 3 / 2, -(x**2) / df) / ( + x_minf = x == float("-inf") + x_inf = x == float("inf") + x = B.where(x_inf, B.nan, x) + s = 1 / 2 + x * B.hypergeometric(1 / 2, (df + 1) / 2, 3 / 2, -(x**2) / df) / ( B.sqrt(df) * B.beta(1 / 2, df / 2) ) + s = B.where(x_minf, 0.0, s) + s = B.where(x_inf, 1.0, s) + return s def _gev_cdf(s: "ArrayLike", xi: "ArrayLike", backend: "Backend" = None) -> "Array": diff --git a/tests/test_crps.py b/tests/test_crps.py index 6d29703..f1e39e8 100644 --- a/tests/test_crps.py +++ b/tests/test_crps.py @@ -320,6 +320,58 @@ def test_cnormal(backend): assert np.isclose(res, res0) +@pytest.mark.parametrize("backend", BACKENDS) +def test_gtct(backend): + if backend in ["jax", "torch", "tensorflow"]: + pytest.skip("Not implemented in torch or tensorflow backends") + obs, df, location, scale, lower, upper, lmass, umass = 0.9, 20.1, -2.3, 4.1, -7.3, 1.7, 0.0, 0.21 + expected = 1.423042 + res = _crps.crps_gtct(obs, df, location, scale, lower, upper, lmass, umass, backend=backend) + assert np.isclose(res, expected) + + # aligns with crps_t + res0 = _crps.crps_t(obs, df, location, scale, backend=backend) + res = _crps.crps_gtct(obs, df, location, scale, backend=backend) + assert np.isclose(res, res0) + + # aligns with crps_tnormal + res0 = _crps.crps_tt(obs, df, location, scale, lower, upper, backend=backend) + res = _crps.crps_gtct(obs, df, location, scale, lower, upper, backend=backend) + assert np.isclose(res, res0) + + +@pytest.mark.parametrize("backend", BACKENDS) +def test_tt(backend): + if backend in ["jax", "torch", "tensorflow"]: + pytest.skip("Not implemented in torch or tensorflow backends") + + obs, df, location, scale, lower, upper = -1.0, 2.9, 3.1, 4.2, 1.5, 17.3 + expected = 5.084272 + res = _crps.crps_tt(obs, df, location, scale, lower, upper, backend=backend) + assert np.isclose(res, expected) + + # aligns with crps_t + res0 = _crps.crps_t(obs, df ,location, scale, backend=backend) + res = _crps.crps_tt(obs, df, location, scale, backend=backend) + assert np.isclose(res, res0) + + +@pytest.mark.parametrize("backend", BACKENDS) +def test_ct(backend): + if backend in ["jax", "torch", "tensorflow"]: + pytest.skip("Not implemented in torch or tensorflow backends") + + obs, df, location, scale, lower, upper = 1.8, 5.4, 0.4, 1.1, 0.0, 2.0 + expected = 0.8028996 + res = _crps.crps_ct(obs, df, location, scale, lower, upper, backend=backend) + assert np.isclose(res, expected) + + # aligns with crps_t + res0 = _crps.crps_t(obs, df, location, scale, backend=backend) + res = _crps.crps_ct(obs, df, location, scale, backend=backend) + assert np.isclose(res, res0) + + @pytest.mark.parametrize("backend", BACKENDS) def test_hypergeometric(backend): res = _crps.crps_hypergeometric(5 * np.ones((2, 2)), 7, 13, 12, backend=backend)