Skip to content

Commit

Permalink
Merge pull request #283 from n-kall/pareto_k
Browse files Browse the repository at this point in the history
Pareto k diagnostics and Pareto smoothing
  • Loading branch information
paul-buerkner authored Aug 31, 2023
2 parents 55e9233 + a85b9b5 commit df1ab19
Show file tree
Hide file tree
Showing 10 changed files with 1,117 additions and 1 deletion.
9 changes: 9 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,12 @@ S3method(order_draws,draws_list)
S3method(order_draws,draws_matrix)
S3method(order_draws,draws_rvars)
S3method(order_draws,rvar)
S3method(pareto_diags,default)
S3method(pareto_diags,rvar)
S3method(pareto_khat,default)
S3method(pareto_khat,rvar)
S3method(pareto_smooth,default)
S3method(pareto_smooth,rvar)
S3method(pillar_shaft,rvar)
S3method(print,draws_array)
S3method(print,draws_df)
Expand Down Expand Up @@ -447,6 +453,9 @@ export(ndraws)
export(niterations)
export(nvariables)
export(order_draws)
export(pareto_diags)
export(pareto_khat)
export(pareto_smooth)
export(quantile2)
export(r_scale)
export(rdo)
Expand Down
81 changes: 81 additions & 0 deletions R/gpd.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#' Quantile function for generalized pareto
#' @noRd
qgeneralized_pareto <- function(p, mu = 0, sigma = 1, k = 0, lower.tail = TRUE, log.p = FALSE) {
stopifnot(length(mu) == 1 && length(sigma) == 1 && length(k) == 1)
if (is.na(sigma) || sigma <= 0) {
return(rep(NaN, length(p)))
}
if (log.p) {
p <- exp(p)
}
if (!lower.tail) {
p <- 1 - p
}
if (k == 0) {
q <- mu - sigma * log1p(-p)
} else {
q <- mu + sigma * expm1(-k * log1p(-p)) / k
}
q
}

#' Estimate parameters of the Generalized Pareto distribution
#'
#' Given a sample \eqn{x}, Estimate the parameters \eqn{k} and \eqn{\sigma} of
#' the generalized Pareto distribution (GPD), assuming the location parameter is
#' 0. By default the fit uses a prior for \eqn{k} (this is in addition to the prior described by Zhang and Stephens, 2009), which will stabilize
#' estimates for very small sample sizes (and low effective sample sizes in the
#' case of MCMC samples). The weakly informative prior is a Gaussian prior
#' centered at 0.5 (see details in Vehtari et al., 2022).
#'
#' @param x A numeric vector. The sample from which to estimate the parameters.
#' @param wip Logical indicating whether to adjust \eqn{k} based on a weakly
#' informative Gaussian prior centered on 0.5. Defaults to `TRUE`.
#' @param min_grid_pts The minimum number of grid points used in the fitting
#' algorithm. The actual number used is `min_grid_pts + floor(sqrt(length(x)))`.
#' @param sort_x If `TRUE` (the default), the first step in the fitting
#' algorithm is to sort the elements of `x`. If `x` is already
#' sorted in ascending order then `sort_x` can be set to `FALSE` to
#' skip the initial sorting step.
#' @return A named list with components `k` and `sigma`.
#'
#' @details Here the parameter \eqn{k} is the negative of \eqn{k} in Zhang &
#' Stephens (2009).
#'
#'
#' @references
#' Zhang, J., and Stephens, M. A. (2009). A new and efficient estimation method
#' for the generalized Pareto distribution. *Technometrics* **51**, 316-325.
#'
#' @noRd
gpdfit <- function(x, wip = TRUE, min_grid_pts = 30, sort_x = TRUE) {
# see section 4 of Zhang and Stephens (2009)
if (sort_x) {
x <- sort.int(x)
}
N <- length(x)
prior <- 3
M <- min_grid_pts + floor(sqrt(N))
jj <- seq_len(M)
xstar <- x[floor(N / 4 + 0.5)] # first quartile of sample
theta <- 1 / x[N] + (1 - sqrt(M / (jj - 0.5))) / prior / xstar
k <- matrixStats::rowMeans2(log1p(-theta %o% x))
l_theta <- N * (log(-theta / k) - k - 1) # profile log-lik
w_theta <- exp(l_theta - matrixStats::logSumExp(l_theta)) # normalize
theta_hat <- sum(theta * w_theta)
k_hat <- mean.default(log1p(-theta_hat * x))
sigma_hat <- -k_hat / theta_hat

# adjust k_hat based on weakly informative prior, Gaussian centered on 0.5.
# this stabilizes estimates for very small Monte Carlo sample sizes and low neff (see Vehtari et al., 2022 for details)
if (wip) {
k_hat <- (k_hat * N + 0.5 * 10) / (N + 10)
}

if (is.na(k_hat)) {
k_hat <- Inf
sigma_hat <- NaN
}

list(k = k_hat, sigma = sigma_hat)
}
Loading

0 comments on commit df1ab19

Please sign in to comment.