From a2027f7dcc2851e81a440b3b6beb20fb10928116 Mon Sep 17 00:00:00 2001
From: Sam Allen <34094291+sallen12@users.noreply.github.com>
Date: Thu, 5 Sep 2024 19:58:34 +0200
Subject: [PATCH] Add CRPS for the generalised truncated and censored t
 distribution (#53)

* add crps for generalised truncated and censored t distribution

* skip tests for crps for gtct distribution
---
 docs/api/crps.md                   |   6 ++
 scoringrules/__init__.py           |   7 ++
 scoringrules/_crps.py              | 160 +++++++++++++++++++++++++++++
 scoringrules/core/crps/__init__.py |   2 +
 scoringrules/core/crps/_closed.py  |  47 +++++++++
 scoringrules/core/stats.py         |  13 ++-
 tests/test_crps.py                 |  52 ++++++++++
 7 files changed, 285 insertions(+), 2 deletions(-)

diff --git a/docs/api/crps.md b/docs/api/crps.md
index 5bbd31b..e8e89bb 100644
--- a/docs/api/crps.md
+++ b/docs/api/crps.md
@@ -39,6 +39,12 @@ When the true forecast CDF is not fully known, but represented by a finite ensem
 
 ::: scoringrules.crps_cnormal
 
+::: scoringrules.crps_gtct
+
+::: scoringrules.crps_tt
+
+::: scoringrules.crps_ct
+
 ::: scoringrules.crps_hypergeometric
 
 ::: scoringrules.crps_laplace
diff --git a/scoringrules/__init__.py b/scoringrules/__init__.py
index e54c9e6..a1ad6e3 100644
--- a/scoringrules/__init__.py
+++ b/scoringrules/__init__.py
@@ -13,6 +13,9 @@
     crps_gtclogistic,
     crps_tlogistic,
     crps_clogistic,
+    crps_gtct,
+    crps_tt,
+    crps_ct,
     crps_gtcnormal,
     crps_tnormal,
     crps_cnormal,
@@ -24,6 +27,7 @@
     crps_lognormal,
     crps_normal,
     crps_poisson,
+    crps_t,
     crps_uniform,
     crps_quantile,
     owcrps_ensemble,
@@ -67,6 +71,9 @@
     "crps_gtcnormal",
     "crps_tnormal",
     "crps_cnormal",
+    "crps_gtct",
+    "crps_tt",
+    "crps_ct",
     "crps_hypergeometric",
     "crps_laplace",
     "crps_logistic",
diff --git a/scoringrules/_crps.py b/scoringrules/_crps.py
index 26ef3bf..ca1b13d 100644
--- a/scoringrules/_crps.py
+++ b/scoringrules/_crps.py
@@ -1021,6 +1021,162 @@ def crps_cnormal(
     return crps.gtcnormal(observation, location, scale, lower, upper, lmass, umass, backend=backend)
 
 
+def crps_gtct(
+    observation: "ArrayLike",
+    df : "ArrayLike",
+    /,
+    location: "ArrayLike" = 0.0,
+    scale: "ArrayLike" = 1.0,
+    lower: "ArrayLike" = float("-inf"),
+    upper: "ArrayLike" = float("inf"),
+    lmass: "ArrayLike" = 0.0,
+    umass: "ArrayLike" = 0.0,
+    *,
+    backend: "Backend" = None,
+) -> "ArrayLike":
+    r"""Compute the closed form of the CRPS for the generalised truncated and censored t distribution.
+
+    It is based on the following formulation from
+    [Jordan et al. (2019)](https://www.jstatsoft.org/article/view/v090i12):
+
+    $$ \mathrm{CRPS}(F_{l, L, \nu}^{u, U}, y) = |y - z| + uU^{2} - lL^{2} + \left( \frac{1 - L - U}{F_{\nu}(u) - F_{\nu}(l)} \right) z \left( 2 F_{\nu}(z) - \frac{(1 - 2L) F_{\nu}(u) + (1 - 2U) F_{\nu}(l)}{1 - L - U} \right) - \left( \frac{1 - L - U}{F_{\nu}(u) - F_{\nu}(l)} \right) \left( 2 G_{\nu}(z) - 2 G_{\nu}(u)U - 2 G_{\nu}(l)L \right) - \left( \frac{1 - L - U}{F_{\nu}(u) - F_{\nu}(l)} \right)^{2} \bar{B}_{\nu} \left( H_{\nu}(u) - H_{\nu}(l) \right), $$
+    
+    $$ \mathrm{CRPS}(F_{l, L, \nu, \mu, \sigma}^{u, U}, y) = \sigma \mathrm{CRPS}(F_{(l - \mu)/\sigma, L, \nu}^{(u - \mu)/\sigma, U}, \frac{y - \mu}{\sigma}), $$
+
+    $$ G_{\nu}(x) = - \left( \frac{\nu + x^{2}}{\nu - 1} \right) f_{\nu}(x), $$
+    
+    $$ H_{\nu}(x) = \frac{1}{2} + \frac{1}{2} \mathrm{sgn}(x) I \left( \frac{1}{2}, \nu - \frac{1}{2}, \frac{x^{2}}{\nu + x^{2}} \right), $$
+    
+    $$ \bar{B}_{\nu} = \left( \frac{2 \sqrt{\nu}}{\nu - 1} \right) \frac{B(\frac{1}{2}, \nu - \frac{1}{2})}{B(\frac{1}{2}, \frac{\nu}{2})^{2}}, $$
+
+    where $F_{\nu}$ is the CDF of the standard t distribution with $\nu > 1$ degrees of freedom,
+    distribution, $F_{l, L, \nu, \mu, \sigma}^{u, U}$ is the CDF of the t distribution 
+    truncated below at $l$ and above at $u$, with point masses $L, U > 0$ at the lower and upper
+    boundaries, respectively, and degrees of freedom, location and scale parameters $\nu > 1$, $\mu$ and $\sigma > 0$.
+    $F_{l, L, \nu}^{u, U} = F_{l, L, \nu, 0, 1}^{u, U}$.
+
+    Parameters
+    ----------
+    observation: ArrayLike
+        The observed values.
+    df: ArrayLike
+        Degrees of freedom parameter of the forecast distribution.
+    location: ArrayLike
+        Location parameter of the forecast distribution.
+    scale: ArrayLike
+        Scale parameter of the forecast distribution.
+    lower: ArrayLike
+        Lower boundary of the truncated forecast distribution.
+    upper: ArrayLike
+        Upper boundary of the truncated forecast distribution.
+    lmass: ArrayLike
+        Point mass assigned to the lower boundary of the forecast distribution.
+    umass: ArrayLike
+        Point mass assigned to the upper boundary of the forecast distribution.
+
+    Returns
+    -------
+    crps: array_like
+        The CRPS between gtct(df, location, scale, lower, upper, lmass, umass) and obs.
+
+    Examples
+    --------
+    >>> import scoringrules as sr
+    >>> sr.crps_gtct(0.0, 2.0, 0.1, 0.4, -1.0, 1.0, 0.1, 0.1)
+    """
+    return crps.gtct(observation, df, location, scale, lower, upper, lmass, umass, backend=backend)
+
+
+def crps_tt(
+    observation: "ArrayLike",
+    df : "ArrayLike",
+    /,
+    location: "ArrayLike" = 0.0,
+    scale: "ArrayLike" = 1.0,
+    lower: "ArrayLike" = float("-inf"),
+    upper: "ArrayLike" = float("inf"),
+    *,
+    backend: "Backend" = None,
+) -> "ArrayLike":
+    r"""Compute the closed form of the CRPS for the truncated t distribution.
+
+    It is based on the formulation for the generalised truncated and censored t distribution with 
+    lmass and umass set to zero.
+    
+    Parameters
+    ----------
+    observation: ArrayLike
+        The observed values.
+    df: ArrayLike
+        Degrees of freedom parameter of the forecast distribution.
+    location: ArrayLike
+        Location parameter of the forecast distribution.
+    scale: ArrayLike
+        Scale parameter of the forecast distribution.
+    lower: ArrayLike
+        Lower boundary of the truncated forecast distribution.
+    upper: ArrayLike
+        Upper boundary of the truncated forecast distribution.
+
+    Returns
+    -------
+    crps: array_like
+        The CRPS between tt(df, location, scale, lower, upper) and obs.
+
+    Examples
+    --------
+    >>> import scoringrules as sr
+    >>> sr.crps_tt(0.0, 2.0, 0.1, 0.4, -1.0, 1.0)
+    """
+    return crps.gtct(observation, df, location, scale, lower, upper, 0.0, 0.0, backend=backend)
+
+
+def crps_ct(
+    observation: "ArrayLike",
+    df : "ArrayLike",
+    /,
+    location: "ArrayLike" = 0.0,
+    scale: "ArrayLike" = 1.0,
+    lower: "ArrayLike" = float("-inf"),
+    upper: "ArrayLike" = float("inf"),
+    *,
+    backend: "Backend" = None,
+) -> "ArrayLike":
+    r"""Compute the closed form of the CRPS for the censored t distribution.
+
+    It is based on the formulation for the generalised truncated and censored t distribution with 
+    lmass and umass set to the tail probabilities of the predictive distribution.
+
+    Parameters
+    ----------
+    observation: ArrayLike
+        The observed values.
+    df: ArrayLike
+        Degrees of freedom parameter of the forecast distribution.
+    location: ArrayLike
+        Location parameter of the forecast distribution.
+    scale: ArrayLike
+        Scale parameter of the forecast distribution.
+    lower: ArrayLike
+        Lower boundary of the truncated forecast distribution.
+    upper: ArrayLike
+        Upper boundary of the truncated forecast distribution.
+
+    Returns
+    -------
+    crps: array_like
+        The CRPS between ct(df, location, scale, lower, upper) and obs.
+
+    Examples
+    --------
+    >>> import scoringrules as sr
+    >>> sr.crps_ct(0.0, 2.0, 0.1, 0.4, -1.0, 1.0)
+    """
+    lmass = stats._t_cdf((lower - location) / scale, df)
+    umass = 1 - stats._t_cdf((upper - location) / scale, df)
+    return crps.gtct(observation, df, location, scale, lower, upper, lmass, umass, backend=backend)
+
+
 def crps_hypergeometric(
     observation: "ArrayLike",
     m: "ArrayLike",
@@ -1495,6 +1651,9 @@ def crps_uniform(
     "crps_gtcnormal",
     "crps_tnormal",
     "crps_cnormal",
+    "crps_gtct",
+    "crps_tt",
+    "crps_ct",
     "crps_hypergeometric",
     "crps_laplace",
     "crps_logistic",
@@ -1503,5 +1662,6 @@ def crps_uniform(
     "crps_lognormal",
     "crps_normal",
     "crps_poisson",
+    "crps_t",
     "crps_uniform",
 ]
diff --git a/scoringrules/core/crps/__init__.py b/scoringrules/core/crps/__init__.py
index 8c9cea2..9009a00 100644
--- a/scoringrules/core/crps/__init__.py
+++ b/scoringrules/core/crps/__init__.py
@@ -9,6 +9,7 @@
     gpd,
     gtclogistic,
     gtcnormal,
+    gtct,
     hypergeometric,
     laplace,
     logistic,
@@ -35,6 +36,7 @@
     "gpd",
     "gtclogistic",
     "gtcnormal",
+    "gtct",
     "hypergeometric",
     "laplace",
     "logistic",
diff --git a/scoringrules/core/crps/_closed.py b/scoringrules/core/crps/_closed.py
index 5f61dbe..557d4cc 100644
--- a/scoringrules/core/crps/_closed.py
+++ b/scoringrules/core/crps/_closed.py
@@ -330,6 +330,53 @@ def gtcnormal(
     return sigma * (s1 + s2 + s3 - s4)
 
 
+def gtct(
+    obs: "ArrayLike", df: "ArrayLike", location: "ArrayLike", scale: "ArrayLike", lower: "ArrayLike", upper: "ArrayLike", lmass: "ArrayLike", umass: "ArrayLike", backend: "Backend" = None
+) -> "Array":
+    """Compute the CRPS for the generalised truncated and censored t distribution."""
+    B = backends.active if backend is None else backends[backend]
+    df, mu, sigma, lower, upper, lmass, umass, obs = map(B.asarray, (df, location, scale, lower, upper, lmass, umass, obs))
+    ω = (obs - mu) / sigma
+    u = (upper - mu) / sigma
+    l = (lower - mu) / sigma
+    z = B.minimum(B.maximum(ω, l), u)
+    F_u = _t_cdf(u, df, backend=backend)
+    F_l = _t_cdf(l, df, backend=backend)
+    F_z = _t_cdf(z, df, backend=backend)
+    f_u = _t_pdf(u, df, backend=backend)
+    f_l = _t_pdf(l, df, backend=backend)
+    f_z = _t_pdf(z, df, backend=backend)
+
+    u_inf = u == float("inf")
+    l_inf = l == float("-inf")
+    u = B.where(u_inf, B.nan, u)
+    l = B.where(l_inf, B.nan, l)
+
+    s1_u = B.where(u_inf and umass == 0.0, 0.0, u * umass**2)
+    s1_l = B.where(l_inf and lmass == 0.0, 0.0, l * lmass**2)
+
+    G_u = B.where(u_inf, 0.0, - f_u * (df + u**2) / (df - 1))
+    G_l = B.where(l_inf, 0.0, - f_l * (df + l**2) / (df - 1))
+    G_z = - f_z * (df + z**2) / (df - 1)
+
+    I_u = B.where(u_inf, 1.0, B.betainc(1 / 2, df - 1 / 2, (u**2) / (df + u**2)))
+    I_l = B.where(l_inf, 1.0, B.betainc(1 / 2, df - 1 / 2, (l**2) / (df + l**2)))
+    sgn_u = B.where(u_inf, 1.0, (u / B.abs(u)))
+    sgn_l = B.where(l_inf, -1.0, (l / B.abs(l)))
+    H_u = (sgn_u * I_u + 1) / 2
+    H_l = (sgn_l * I_l + 1) / 2
+
+    Bbar = (2 * B.sqrt(df) / (df - 1)) * B.beta(1 / 2, df - 1 / 2) / (B.beta(1 / 2, df / 2)**2)
+
+    c = (1 - lmass - umass) / (F_u - F_l)
+
+    s1 = B.abs(ω - z) + s1_u - s1_l
+    s2 = c * z * (2 * F_z - ((1 - 2 * lmass) * F_u + (1 - 2 * umass) * F_l) / (1 - lmass - umass))
+    s3 = 2 * c * (G_z - G_u * umass - G_l * lmass)
+    s4 = c**2 * Bbar * (H_u - H_l)
+    return sigma * (s1 + s2 - s3 - s4)
+
+
 def hypergeometric(
     obs: "ArrayLike",
     m: "ArrayLike",
diff --git a/scoringrules/core/stats.py b/scoringrules/core/stats.py
index 255fd80..0fd2c05 100644
--- a/scoringrules/core/stats.py
+++ b/scoringrules/core/stats.py
@@ -58,15 +58,24 @@ def _pois_pdf(x: "ArrayLike", mean: "ArrayLike", backend: "Backend" = None) -> "
 def _t_pdf(x: "ArrayLike", df: "ArrayLike", backend: "Backend" = None) -> "Array":
     """Probability density function for the standard Student's t distribution."""
     B = backends.active if backend is None else backends[backend]
-    return ((1 + x**2 / df) ** (-(df + 1) / 2)) / (B.sqrt(df) * B.beta(1 / 2, df / 2))
+    x_inf = B.abs(x) == float("inf")
+    x = B.where(x_inf, B.nan, x)
+    s = ((1 + x**2 / df) ** (-(df + 1) / 2)) / (B.sqrt(df) * B.beta(1 / 2, df / 2))
+    return B.where(x_inf, 0.0, s)
 
 
 def _t_cdf(x: "ArrayLike", df: "ArrayLike", backend: "Backend" = None) -> "Array":
     """Cumulative distribution function for the standard Student's t distribution."""
     B = backends.active if backend is None else backends[backend]
-    return 1 / 2 + x * B.hypergeometric(1 / 2, (df + 1) / 2, 3 / 2, -(x**2) / df) / (
+    x_minf = x == float("-inf")
+    x_inf = x == float("inf")
+    x = B.where(x_inf, B.nan, x)
+    s = 1 / 2 + x * B.hypergeometric(1 / 2, (df + 1) / 2, 3 / 2, -(x**2) / df) / (
         B.sqrt(df) * B.beta(1 / 2, df / 2)
     )
+    s = B.where(x_minf, 0.0, s)
+    s = B.where(x_inf, 1.0, s)
+    return s
 
 
 def _gev_cdf(s: "ArrayLike", xi: "ArrayLike", backend: "Backend" = None) -> "Array":
diff --git a/tests/test_crps.py b/tests/test_crps.py
index 6d29703..f1e39e8 100644
--- a/tests/test_crps.py
+++ b/tests/test_crps.py
@@ -320,6 +320,58 @@ def test_cnormal(backend):
     assert np.isclose(res, res0)
 
 
+@pytest.mark.parametrize("backend", BACKENDS)
+def test_gtct(backend):
+    if backend in ["jax", "torch", "tensorflow"]:
+        pytest.skip("Not implemented in torch or tensorflow backends")
+    obs, df, location, scale, lower, upper, lmass, umass = 0.9, 20.1, -2.3, 4.1, -7.3, 1.7, 0.0, 0.21
+    expected = 1.423042
+    res = _crps.crps_gtct(obs, df, location, scale, lower, upper, lmass, umass, backend=backend)
+    assert np.isclose(res, expected)
+
+    # aligns with crps_t
+    res0 = _crps.crps_t(obs, df, location, scale, backend=backend)
+    res = _crps.crps_gtct(obs, df, location, scale, backend=backend)
+    assert np.isclose(res, res0)
+
+    # aligns with crps_tnormal
+    res0 = _crps.crps_tt(obs, df, location, scale, lower, upper, backend=backend)
+    res = _crps.crps_gtct(obs, df, location, scale, lower, upper, backend=backend)
+    assert np.isclose(res, res0)
+
+
+@pytest.mark.parametrize("backend", BACKENDS)
+def test_tt(backend):
+    if backend in ["jax", "torch", "tensorflow"]:
+        pytest.skip("Not implemented in torch or tensorflow backends")
+        
+    obs, df, location, scale, lower, upper = -1.0, 2.9, 3.1, 4.2, 1.5, 17.3
+    expected = 5.084272
+    res = _crps.crps_tt(obs, df, location, scale, lower, upper, backend=backend)
+    assert np.isclose(res, expected)
+
+    # aligns with crps_t
+    res0 = _crps.crps_t(obs, df ,location, scale, backend=backend)
+    res = _crps.crps_tt(obs, df, location, scale, backend=backend)
+    assert np.isclose(res, res0)
+
+
+@pytest.mark.parametrize("backend", BACKENDS)
+def test_ct(backend):
+    if backend in ["jax", "torch", "tensorflow"]:
+        pytest.skip("Not implemented in torch or tensorflow backends")
+    
+    obs, df, location, scale, lower, upper = 1.8, 5.4, 0.4, 1.1, 0.0, 2.0
+    expected = 0.8028996
+    res = _crps.crps_ct(obs, df, location, scale, lower, upper, backend=backend)
+    assert np.isclose(res, expected)
+
+    # aligns with crps_t
+    res0 = _crps.crps_t(obs, df, location, scale, backend=backend)
+    res = _crps.crps_ct(obs, df, location, scale, backend=backend)
+    assert np.isclose(res, res0)
+
+
 @pytest.mark.parametrize("backend", BACKENDS)
 def test_hypergeometric(backend):
     res = _crps.crps_hypergeometric(5 * np.ones((2, 2)), 7, 13, 12, backend=backend)