diff --git a/R/PlackettLuce.R b/R/PlackettLuce.R index 6e6b6a0..8fe58fa 100644 --- a/R/PlackettLuce.R +++ b/R/PlackettLuce.R @@ -1,872 +1,873 @@ -#' Fit a Plackett-Luce Model -#' -#' Fit a Plackett-Luce model to a set of rankings. The rankings may be partial -#' (each ranking completely ranks a subset of the items) and include ties of -#' arbitrary order. -#' -#' @section Model definition: -#' -#' A single ranking is given by -#' \deqn{R = \{C_1, C_2, \ldots, C_J\}}{R = {C_1, C_2, \ldots, C_J}} -#' where the items in set \eqn{C_1} are ranked higher than (better than) the -#' items in \eqn{C_2}, and so on. If there are multiple objects in set \eqn{C_j} -#' these items are tied in the ranking. -#' -#' For a set if items \eqn{S}, let -#' \deqn{f(S) = \delta_{|S|} -#' \left(\prod_{i \in S} \alpha_i \right)^\frac{1}{|S|}}{ -#' f(S) = delta_{|S|} * (prod_{i in S} alpha_i)^(1/|S|)} -#' where \eqn{|S|} is the cardinality (size) of the set, \eqn{\delta_n}{delta_n} is a -#' parameter related to the prevalence of ties of order \eqn{n} -#' (with \eqn{\delta_1 \equiv 1}), and -#' \eqn{\alpha_i}{alpha_i} is a parameter representing the worth of item \eqn{i}. -#' Then under an extension of the Plackett-Luce model allowing ties up to order -#' \eqn{D}, the probability of the ranking \eqn{R} is given by -#' \deqn{\prod_{j = 1}^J \frac{f(C_j)}{ -#' \sum_{k = 1}^{\min(D_j, D)} \sum_{S \in {A_j \choose k}} f(S)}}{ -#' prod_{j = 1}^J f(C_j)/ -#' (sum_{k = 1}^{min(D_j, D)} sum_{S in choose(A_j, k)} f(S))} -#' where \eqn{D_j} is the cardinality of \eqn{A_j}, the set of -#' alternatives from which \eqn{C_j} is chosen, and -#' \eqn{A_j \choose k}{choose(A_j, k)} is all the possible choices of \eqn{k} -#' items from \eqn{A_j}. The value of \eqn{D} can be set to the maximum number -#' of tied items observed in the data, so that \eqn{\delta_n = 0}{delta_n = 0} for -#' \eqn{n > D}. -#' -#' When the worth parameters are constrained to sum to one, they represent the -#' probability that the corresponding item comes first in a ranking of all -#' items, given that first place is not tied. -#' -#' The 2-way tie prevalence parameter \eqn{\delta_2}{delta_2} is related to -#' the probability that two items \emph{of equal worth} tie for -#' first place, given that the first place is not a 3-way or higher tie. -#' Specifically, that probability is -#' \eqn{\delta_2/(2 + \delta_2)}{delta_2/(2 + delta_2}. -#' -#' The 3-way and higher tie-prevalence parameters are similarly interpretable, -#' in terms of tie probabilities among equal-worth items. -#' -#' When intermediate tie orders are not observed (e.g. ties of order 2 -#' and order 4 are observed, but no ties of order 3), the maximum -#' likelihood estimate of the corresponding tie prevalence parameters -#' is zero, so these parameters are excluded from the model. -#' -#' @section Pseudo-rankings: -#' -#' In order for the maximum likelihood estimate of an object's worth to be -#' defined, the network of rankings must be strongly connected. This means that -#' in every possible partition of the objects into two nonempty subsets, some -#' object in the second set is ranked higher than some object in the first set -#' at least once. -#' -#' If the network of rankings is not strongly connected then pseudo-rankings -#' may be used to connect the network. This approach posits a hypothetical -#' object with log-worth 0 and adds \code{npseudo} wins and \code{npseudo} -#' losses to the set of rankings. -#' -#' The parameter \code{npseudo} is the prior strength. With \code{npseudo = 0} -#' the MLE is the posterior mode. As \code{npseudo} approaches -#' infinity the log-worth estimates all shrink towards 0. The default, -#' \code{npseudo = 0.5}, is sufficient to connect the network and has a weak -#' shrinkage effect. Even for networks that are already connected, adding -#' pseudo-rankings typically reduces both the bias and variance of the -#' estimators of the worth parameters. -#' -#' @section Incorporating prior information on log-worths: -#' -#' Prior information can be incorporated by using \code{normal} to specify a -#' multivariate normal prior on the \emph{log}-worths. The log-worths are then -#' estimated by maximum a posteriori (MAP) estimation. Model summaries (deviance, AIC, -#' standard errors) are based on the log-likelihood evaluated at the MAP -#' estimates, resulting in a finite sample bias that should disappear as -#' the number of rankings increases. Inference based on these model summaries -#' is valid as long as the prior is considered fixed and not tuned as part of -#' the model. -#' -#' Incorporating a prior is an alternative method of penalization, therefore -#' \code{npseudo} is set to zero when a prior is specified. -#' -#' @section Incorporating ranker adherence parameters: -#' -#' When rankings come from different rankers, the model can be extended to -#' allow for varying reliability of the rankers, as proposed by Raman and -#' Joachims (2014). In particular, replacing \eqn{f(S)} by -#' \deqn{h(S) = \delta_{|S|} -#' \left(\prod_{i \in S} \alpha_i \right)^\frac{\eta_g}{|S|}}{ -#' h(S) = delta_{|S|} * (prod_{i in S} alpha_i)^(eta_g/|S|)} -#' where \eqn{\eta_g > 0}{eta_g > 0} is the adherence parameter for ranker \eqn{g}. In -#' the standard model, all rankers are assumed to have equal reliability, so -#' \eqn{\eta_g = 1}{eta_g = 1} for all rankers. Higher \eqn{\eta_g = 1}{eta_g = 1} -#' increases the distance between item worths, giving greater weight -#' to the ranker's choice. Conversely, lower \eqn{\eta_g = 1}{eta_g = 1} shrinks -#' the item worths towards equality so the ranker's choice is less relevant. -#' -#' The adherence parameters are not estimable by maximum likelihood, since -#' for given item worths the maximum likelihood estimate of adherence would be -#' infinity for rankers that give rankings consistent with the items ordered by -#' worth and zero for all other rankers. Therefore it is essential to include a -#' prior on the adherence parameters when these are estimated rather than fixed. -#' Setting \code{gamma = TRUE} specifies the default -#' \eqn{\Gamma(10,10)}{Gamma(10,10)} prior, which has a mean of -#' 1 and a probability of 0.99 that the adherence is between 0.37 and 2. -#' Alternative parameters can be specified by a list with elements \code{shape} -#' and \code{rate}. Setting scale and rate to a common value \eqn{\theta}{theta} -#' specifies a mean of 1; \eqn{\theta \ge}{theta >=} 2 will give low prior probability -#' to near-zero adherence; as \eqn{\theta}{theta} increases the density becomes -#' more concentrated (and more symmetrical) about 1. -#' -#' Since the number of adherence parameters will typically be large and it is -#' assumed the worth and tie parameters are of primary interest, the adherence -#' parameters are not included in model summaries, but are included in the -#' returned object. -#' -#' @section Controlling the fit: -#' -#' For models without priors, using \code{nspseudo = 0} will use standard -#' maximum likelihood, if the network is connected (and throw an error -#' otherwise). -#' -#' The fitting algorithm is set by the \code{method} argument. The default -#' method \code{"iterative scaling"} is a slow but reliable approach. In -#' addition, this has the most control on the accuracy of the final fit, since -#' convergence is determined by direct comparison of the observed and expected -#' values of the sufficient statistics for the worth parameters, rather than a -#' tolerance on change in the log-likelihood. -#' -#' The \code{"iterative scaling"} algorithm is slow because it is a first order -#' method (does not use derivatives of the likelihood). From a set of starting -#' values that are 'close enough' to the final solution, the algorithm can be -#' accelerated using -#' \href{https://en.wikipedia.org/wiki/Steffensen's_method}{Steffensen's method}. -#' \code{PlackettLuce} attempts to apply Steffensen's acceleration when all -#' differences between the observed and expected values of the sufficient -#' statistics are less than \code{steffensen}. This is an ad-hoc rule defining -#' 'close enough' and in some cases the acceleration may produce negative -#' worth parameters or decrease the log-likelihood. \code{PlackettLuce} will -#' only apply the update when it makes an improvement. -#' -#' The \code{"BFGS"} and \code{"L-BFGS"} algorithms are second order methods, -#' therefore can be quicker than the default method. Control parameters can be -#' passed on to \code{\link[stats]{optim}} or \code{\link[lbfgs]{lbfgs}}. -#' -#' For models with priors, the iterative scaling method cannot be used, so BFGS -#' is used by default. -#' -#' @seealso -#' -#' Handling rankings: \code{\link{rankings}}, \code{\link{aggregate}}, -#' \code{\link{group}}, \code{\link{choices}}, -#' \code{\link{adjacency}}, \code{\link{connectivity}}. -#' -#' Inspect fitted Plackett-Luce models: \code{\link{coef}}, \code{deviance}, -#' \code{\link{fitted}}, \code{\link{itempar}}, \code{logLik}, \code{print}, -#' \code{\link{qvcalc}}, \code{\link{summary}}, \code{\link{vcov}}. -#' -#' Fit Plackett-Luce tree: \code{pltree}. -#' -#' Example data sets: \code{\link{beans}}, \code{\link{nascar}}, -#' \code{\link{pudding}}, \code{\link{preflib}}. -#' -#' Vignette: \code{vignette("Overview", package = "PlackettLuce")}. -#' -#' @note As the maximum tie order increases, the number of possible choices for -#' each rank increases rapidly, particularly when the total number of items is -#' high. This means that the model will be slower to fit with higher \eqn{D}. -#' In addition, due to the current implementation of the `vcov()` method, -#' computation of the standard errors (as by `summary()`) can take almost as -#' long as the model fit and may even become infeasible due to memory limits. -#' As a rule of thumb, for > 10 items and > 1000 rankings, we recommend -#' `PlackettLuce()` for ties up to order 4. For higher order ties, a -#' rank-ordered logit model, see [ROlogit::rologit()] or -#' generalized Mallows Model as in [BayesMallows::compute_mallows()] may be -#' more suitable, as they do not model tied events explicitly. -#' -#' @param rankings a \code{"\link{rankings}"} object, or an object that can be -#' coerced by \code{as.rankings}. An [`"aggregated_rankings"`][aggregate()] -#' object can be used to specify rankings and weights simultaneously. -#' A \code{"\link{grouped_rankings}"} object should be used when estimating -#' adherence for rankers with multiple rankings per ranker. -#' @param npseudo when using pseudodata: the number of wins and losses to add -#' between each object and a hypothetical reference object. -#' @param normal a optional list with elements named \code{mu} and \code{Sigma} -#' specifying the mean and covariance matrix of a multivariate normal prior on -#' the \emph{log} worths. -#' @param gamma a optional list with elements named \code{shape} and \code{rate} -#' specifying parameters of a gamma prior on adherence parameters for each -#' ranker (use \code{grouped_rankings} to group multiple rankings by ranker). -#' The short-cut \code{TRUE} may be used to specify a Gamma(10, 10) prior. If -#' \code{NULL} (or \code{FALSE}), adherence is fixed to \code{adherence} for -#' all rankers. -#' @param adherence an optional vector of adherence values for each ranker. If -#' missing, adherence is fixed to 1 for all rankers. If \code{gamma} is not -#' \code{NULL}, this specifies the starting values for the adherence. -#' @param weights an optional vector of weights for each ranking. -#' @param na.action a function to handle any missing rankings, see -#' [na.omit()]. -#' @param start starting values for the worth parameters and the tie parameters -#' on the raw scale (worth parameters need not be scaled to sum to 1). If -#' \code{normal} is specified, \code{exp(normal$mu)} is used as starting values -#' for the worth parameters. Coefficients from a previous fit can be passed as -#' the result of a call to \code{coef.PlackettLuce}, or the \code{coefficients} -#' element of a \code{"PlackettLuce"} object. -#' @param method the method to be used for fitting: \code{"iterative scaling"} -#' (iterative scaling to sequentially update the parameter values), -#' \code{"BFGS"} (the BFGS optimisation algorithm through the -#' \code{\link{optim}} interface), \code{"L-BFGS"} (the limited-memory BFGS -#' optimisation algorithm as implemented in the \code{\link[lbfgs]{lbfgs}} -#' package). Iterative scaling is used by default, unless a prior is specified -#' by \code{normal} or \code{gamma}, in which case the default is \code{"BFGS"}. -#' @param epsilon the maximum absolute difference between the observed and -#' expected sufficient statistics for the ability parameters at convergence. -#' @param steffensen a threshold defined as for \code{epsilon} after which to -#' apply Steffensen acceleration to the iterative scaling updates. -#' @param maxit a vector specifying the maximum number of iterations. If -#' \code{gamma} is \code{NULL}, only the first element is used and specifies the -#' maximum number of iterations of the algorithm specified by \code{method}. If -#' \code{gamma} is not \code{NULL}, a second element may be supplied to specify -#' the maximum number of iterations of an alternating algorithm, where -#' the adherence parameters are updated alternately with the other parameters. -#' The default is to use 10 outer iterations. -#' @param trace logical, if \code{TRUE} show trace of iterations. -#' @param verbose logical, if \code{TRUE} show messages from validity checks on -#' the rankings. -#' @param ... additional arguments passed to \code{optim} or \code{lbfgs}. -#' In particular the convergence tolerance may be adjusted using e.g. -#' \code{control = list(reltol = 1e-10)}. -#' -#' @return An object of class \code{"PlackettLuce"}, which is a list containing the -#' following elements: -#' \item{call}{ The matched call. } -#' \item{coefficients}{ The model coefficients. } -#' \item{loglik}{ The maximized log-likelihood. } -#' \item{null.loglik}{ The maximized log-likelihood for the null model (all -#' alternatives including ties have equal probability). } -#' \item{df.residual}{ The residual degrees of freedom. } -#' \item{df.null}{ The residual degrees of freedom for the null model. } -#' \item{rank}{ The rank of the model. } -#' \item{logposterior}{ If a prior was specified, the maximised log posterior.} -#' \item{gamma}{ If a gamma prior was specified, the list of parameters. } -#' \item{normal}{ If a normal prior was specified, the list of parameters. } -#' \item{iter}{ The number of iterations run. } -#' \item{rankings}{ The rankings passed to \code{rankings}, converted to a -#' \code{"rankings"} object if necessary. } -#' \item{weights}{ The weights applied to each ranking in the fitting. } -#' \item{adherence}{ The fixed or estimated adherence per ranker. } -#' \item{ranker}{ The ranker index mapping rankings to rankers (the -#' \code{"index"} attribute of \code{rankings} if specified as a -#' \code{"grouped_rankings"} object.)} -#' \item{ties}{ The observed tie orders corresponding to the estimated tie -#' parameters. } -#' \item{conv}{ The convergence code: 0 for successful convergence; 1 if reached -#' \code{maxit} (outer) iterations without convergence; 2 if Steffensen acceleration -#' cause log-likelihood to increase; negative number if L-BFGS algorithm failed -#' for other reason.} -#' @references -#' Raman, K. and Joachims, T. (2014) Methods for Ordinal Peer Grading. -#' \href{https://arxiv.org/abs/1404.3656}{arXiv:1404.3656}. -#' @examples -#' # Six partial rankings of four objects, 1 is top rank, e.g -#' # first ranking: item 1, item 2 -#' # second ranking: item 2, item 3, item 4, item 1 -#' # third ranking: items 2, 3, 4 tie for first place, item 1 second -#' R <- matrix(c(1, 2, 0, 0, -#' 4, 1, 2, 3, -#' 2, 1, 1, 1, -#' 1, 2, 3, 0, -#' 2, 1, 1, 0, -#' 1, 0, 3, 2), nrow = 6, byrow = TRUE) -#' colnames(R) <- c("apple", "banana", "orange", "pear") -#' -#' # create rankings object -#' R <- as.rankings(R) -#' -#' # Standard maximum likelihood estimates -#' mod_mle <- PlackettLuce(R, npseudo = 0) -#' coef(mod_mle) -#' -#' # Fit with default settings -#' mod <- PlackettLuce(R) -#' # log-worths are shrunk towards zero -#' coef(mod) -#' -#' # independent N(0, 9) priors on log-worths, as in Raman and Joachims -#' prior <- list(mu = rep(0, ncol(R)), -#' Sigma = diag(rep(9, ncol(R)))) -#' mod_normal <- PlackettLuce(rankings = R, normal = prior) -#' # slightly weaker shrinkage effect vs pseudo-rankings, -#' # with less effect on tie parameters (but note small number of rankings here) -#' coef(mod_normal) -#' -#' # estimate adherence assuming every ranking is from a separate ranker -#' mod_separate <- PlackettLuce(rankings = R, normal = prior, gamma = TRUE) -#' coef(mod_separate) -#' # gives more weight to rankers 4 & 6 which rank apple first, -#' # so worth of apple increased relative to banana -#' mod_separate$adherence -#' -#' # estimate adherence based on grouped rankings -#' # - assume two rankings from each ranker -#' G <- group(R, rep(1:3, each = 2)) -#' mod_grouped <- PlackettLuce(rankings = G, normal = prior, gamma = TRUE) -#' coef(mod_grouped) -#' # first ranker is least consistent so down-weighted -#' mod_grouped$adherence -#' -#' @importFrom igraph as_adj graph_from_edgelist -#' @importFrom RSpectra eigs -#' @importFrom stats optim -#' @export -PlackettLuce <- function(rankings, - npseudo = 0.5, - normal = NULL, - gamma = NULL, - adherence = NULL, - weights = freq(rankings), - na.action = getOption("na.action"), - start = NULL, - method = c("iterative scaling", "BFGS", "L-BFGS"), - epsilon = 1e-7, steffensen = 0.1, maxit = c(500, 10), - trace = FALSE, verbose = TRUE, ...){ - call <- match.call() - - # check rankings object - grouped_rankings <- inherits(rankings, "grouped_rankings") - if (inherits(rankings, "aggregated_rankings")){ - force(weights) - rankings$freq <- NULL - rankings <- as.rankings(rankings) - } - if (!grouped_rankings & !inherits(rankings, "rankings")){ - rankings <- suppressWarnings(as.rankings(rankings, verbose = verbose)) - } - - # define ranker - if (grouped_rankings){ - ranker <- attr(rankings, "index") - # if weights are per group id expand to be per ranking - if (!is.null(weights) & length(weights) == max(ranker)) { - weights <- weights[ranker] - } - }else if (!is.null(adherence)| !is.null(gamma)){ - ranker <- seq_len(nrow(rankings)) - } else ranker <- NULL - - if (!is.null(na.action)) { - rankings <- match.fun(na.action)(rankings) - na.action <- attr(rankings, "na.action") - if (!is.null(na.action)) { - if (!is.null(weights)) weights <- weights[-na.action] - if (!is.null(adherence)) adherence <- adherence[-na.action] - if (!is.null(ranker)) ranker <- ranker[-na.action] - } - } - - # unpack grouped rankings - if (grouped_rankings){ - R <- attr(rankings, "R") - S <- attr(rankings, "S") - id <- attr(rankings, "id") - rankings <- attr(rankings, "rankings") - } - - # attributes - items <- colnames(rankings) - N <- ncol(rankings) # total number of objects - nr <- nrow(rankings) # number of rankings - - # weights - if (is.null(weights)){ - weights <- rep.int(1L, nr) - } - stopifnot(length(weights) == nr) - stopifnot(all(weights > 0L)) - - # check gamma prior specification - if (is.logical(gamma) && length(gamma) == 1L && !is.na(gamma) && !gamma) - gamma <- NULL # in case specified as FALSE - if (!is.null(gamma)) { # specified or TRUE - if (isTRUE(gamma)) gamma <- list(shape = 10, rate = 10) - stopifnot(names(gamma) == c("shape", "rate")) - stopifnot(all(lengths(gamma) == 1L)) - if (gamma$shape != gamma$rate) - warning("mean of adherence prior is not 1") - } - - # adherence should be per ranker - if (!is.null(adherence)) { - stopifnot(length(adherence) == max(ranker)) - stopifnot(all(adherence > 0L)) - a <- adherence[ranker] - } else if (!is.null(gamma)){ - adherence <- rep.int(1L, max(ranker)) - a <- adherence[ranker] - wa <- rowsum(weights, ranker)[,1] - } else a <- NULL - - if (!grouped_rankings){ - # items ranked from last to 1st place - R <- t(apply(rankings, 1L, order, decreasing = TRUE)) - mode(R) <-"integer" - - # adjacency matrix: wins over rest - # N.B. need even if `start` specified; used to check connectivity - X <- adjacency(rankings, weights = weights) - - # sizes of selected sets - set <- apply(rankings, 1L, function(x){ - last <- which(x == max(x)) - ind <- which(x > 0L) - # exclude untied last place - if (length(last) == 1L) ind <- setdiff(ind, last) - list(size = tabulate(x[ind])[x[ind]], item_id = ind) - }) - item_id <- unlist(lapply(set, `[[`, "item_id")) - S <- lapply(set, `[[`, "size") - ns <- lengths(S) - ws <- rep.int(weights, ns) - if (!is.null(adherence)) ranker_id <- rep.int(ranker, ns) - S <- unlist(S) - rm(set) - } else { - # adjacency matrix: wins over rest - # (id extracted from grouped_rankings object) - X <- matrix(0.0, N, N) - X[sort(unique(unlist(id)))] <- - rowsum(rep(weights, lengths(id)), unlist(id)) - class(X) <- c("adjacency", "matrix") - - # (S extracted from grouped_rankings object) - # N.B. here unlist indifferent order than ungrouped - ws <- (weights * as.logical(S))[as.logical(S)] - if (!is.null(adherence)) { - ranker_id <- (ranker * as.logical(S))[as.logical(S)] - } - item_id <- R[as.logical(S)] - S <- S[as.logical(S)] - } - # sufficient statistics - # for delta d, (number of sets with cardinality d)/cardinality - d <- sort(unique(S)) - D <- length(d) - B <- rowsum(ws, S)[,1L] - B <- B/d - # from now only only need weight/size per set, so replace S with this - S <- ws/S - rm(ws) - # for alpha - A <- numeric(N) - item <- sort(unique(item_id)) - if (is.null(adherence)){ - # sum over all sets st object i is in selected set weight/size - A[item] <- unname(rowsum(S, item_id)[,1L]) - } else { - # now (adherence * weight)/size - A[item] <- unname(rowsum(adherence[ranker_id]*S, item_id)[,1L]) - } - - - if (!is.null(normal)){ - # set npseudo to 0 - npseudo <- 0L - # check normal prior specification - stopifnot(names(normal) == c("mu", "Sigma")) - if (length(normal$mu) != N) - stop("`length(normal$mu)` is not equal to the number of items") - if (!identical(dim(normal$Sigma), c(N, N))) - stop("`normal$Sigma` is not a square matrix with number of rows ", - "equal to the number of items") - # compute inverse of variance covariance matrix (used in score function) - Rinv <- solve(chol(normal$Sigma)) - Kinv <- Rinv %*% t(Rinv) - } else Rinv <- Kinv <- NULL - - # check connectivity if npseudo = 0 and normal prior not specified - if (npseudo == 0L & is.null(normal)){ - out <- connectivity(X, verbose = FALSE) - if (out$no > 1L) - stop("Network is not fully connected - cannot estimate all ", - "item parameters with npseudo = 0") - } - - # unique rows with >= 1 element for logical matrix - # case where p is fixed - uniquerow <- function(M){ - len <- ncol(M) - if (len == 1L) return(1L) - pattern <- M[,1L] - max_exp <- .Machine$double.digits - for (k in seq_len(len - 1L)){ - # N.B. can't use integer here, can get integer overflow - pattern <- 2*pattern + M[,k] - if (k == len - 1L || - k %% max_exp == (max_exp - 1L)){ - pattern <- match(pattern, unique(pattern)) - max_exp <- .Machine$double.digits - ceiling(log2(max(pattern))) - } - } - as.integer(pattern) - } - - # max number of objects in set - nc <- max(rowSums(rankings > 0L)) - - # create W so cols are potential set sizes and value > 0 indicates ranking - # aggregate common sets - # - incorporate ranking weights by weighted count vs simple tabulate - W <- G <- list() # weight (currently rep count); group of rankings - P <- logical(nc) # set sizes present in rankings - minrank <- rep.int(1L, nr) - for (i in seq_len(nc)){ - p <- (nc - i + 1L) - set <- rankings >= minrank - r <- which(rowSums(set) == p) - P[p] <- p != 1L && length(r) - if (!P[p]) next - # separate adherence per ranking - if (!is.null(gamma) & !grouped_rankings){ - W[[p]] <- weights[r] - G[[p]] <- r - } else { - g <- uniquerow(set[r, , drop = FALSE]) - # combine within ranker (adherence likely different for each ranker) - if (!is.null(adherence)){ - x <- ranker[r]/10^ceiling(log10(max(ranker[r]))) + g - g <- match(x, x) - } - W[[p]] <- as.vector(unname(rowsum(weights[r], g))) - G[[p]] <- r[!duplicated(g)] - } - minrank[r] <- minrank[r] + 1L - } - P <- which(P) - - # if npseudo > 0 add npseudo wins and losses with hypothetical item - stopifnot(npseudo >= 0L) - if (npseudo > 0L){ - # update R with paired comparisons with hypothetical item (item N + 1) - R <- cbind(R, "NULL" = 0L) - pseudo <- matrix(0L, nrow = N, ncol = N + 1L) - pseudo[, 1L] <- N + 1L - pseudo[, 2L] <- seq_len(N) - R <- rbind(R, pseudo) - # update X with npseudo wins and losses with hypothetical item - X <- cbind(X, npseudo) - X <- rbind(X, c(rep.int(npseudo, N), 0L)) - # update adherence: set to 1 for ghost rater - if (!is.null(adherence)) a <- c(a, rep.int(1L, N)) - # update weights: 2*npseudo comparisons of each pair - W[[2L]] <- c(W[[2L]], rep.int(2L*npseudo, N)) - # update indices - G[[2L]] <- c(G[[2L]], (nr + 1L):(nr + N)) - # add set size of 2 to observed set sizes, if necessary - P <- unique(c(2L, P)) - # update A: npseudo wins for item N + 1 against each other item; - # npseudo wins for other items - A <- c(A + npseudo, N*npseudo) - # update B: 2*npseudo untied choices per item - B[1L] <- B[1L] + 2L*npseudo*N - } - - if (is.null(start)){ - if (!is.null(normal)) { - alpha <- exp(normal$mu) - } else { - # (scaled, un-damped) PageRank on underlying paired comparisons - ncv <- min(nrow(X), 10L) - alpha <- drop(abs(eigs(X/colSums(X), 1L, - opts = list(ncv = ncv))$vectors)) - } - delta <- c(1.0, rep.int(0.1, D - 1L)) - } else { - # if not "coef.PlackettLuce" object, assume on raw scale - # (with ability of hypothetical item 1), i.e. as returned coefficients) - if (!length(start) == N + D - 1) - stop(paste("`start` does not specify enough values for", N, "item", - paste("and", D - 1, "tie parameters")[D > 1])) - alpha <- start[seq_len(N)] - delta <- start[-seq_len(N)] - if (inherits(start, "coef.PlackettLuce")){ - # reverse identifiability constraints and put on raw scale - if (attr(start, "log")){ - alpha <- exp(alpha + attr(start, "const")) - delta <- exp(delta) - } else { - alpha <- alpha*attr(start, "const") - } - } - delta <- c(1.0, delta) - if (npseudo > 0L) { - # set alpha for hypothetical item to 1 as in original fit - if (inherits(start, "coef.PlackettLuce")){ - alpha <- c(alpha, 1.0) - } else { - # set to geometric mean as neutral position - alpha <- c(alpha, exp(mean(log(alpha)))) - } - } - } - if (npseudo) { - N <- N + 1L - alpha <- alpha/alpha[N] - } - - method <- match.arg(method, c("iterative scaling", "BFGS", "L-BFGS")) - if ((!is.null(normal) | !is.null(gamma)) && method == "iterative scaling"){ - if (method %in% names(call)){ - stop("when a prior is specified, `method` cannot be ", - "`\"iterative scaling\"`") - } else method <- "BFGS" - } - - if (method == "BFGS"){ - opt <- function(par, obj, gr, ...){ - dts <- list(...) - do.call("optim", - c(list(par, obj, gr, method = "BFGS", - control = c(eval(dts$control), - list(maxit = maxit[1L]))), - dts[setdiff(names(dts), "control")])) - } - } - - if (method == "L-BFGS"){ - if (trace) message("No trace implemented for L-BFGS") - opt <- function(par, obj, gr, ...){ - lbfgs::lbfgs(obj, gr, par, invisible = 1L, - max_iterations = maxit[1L], ...) - } - } - - # Optimization of log-worths and log-tie parameters via - obj_common <- function(par) { - alpha <- exp(par[1L:N]) - delta <- exp(par[-c(1L:N)]) - # assign to parent environment so can use further quantities in score - assign("fit", expectation("all", alpha, c(1.0, delta), - a, N, d, P, R, G, W), - envir = parent.env(environment())) - -loglik_common(c(alpha, delta), N, normal$mu, Rinv, A, B, fit) - } - gr_common <- function(par) { - alpha <- exp(par[1L:N]) - delta <- exp(par[-c(1L:N)]) - -score_common(c(alpha, delta), N, normal$mu, Kinv, A, B, fit) * - c(alpha, delta) - } - - # Optimization of log-adherence (for non-ghost raters) via - obj_adherence <- function(par){ - adherence <- exp(par) - # assign to parent environment so can use further quantities in score - assign("fit", normalization(alpha, c(1.0, delta), - adherence[ranker], d, P, R, G, W), - envir = parent.env(environment())) - -loglik_adherence(adherence, gamma$shape, gamma$rate, wa, Z, fit) - } - gr_adherence <- function(par) { - adherence <- exp(par) - -score_adherence(adherence, ranker, gamma$shape, gamma$rate, - wa, Z, fit) * adherence - } - - logp <- NULL - if (method != "iterative scaling"){ - i <- 0L - conv <- 1L - if (!is.null(gamma)){ - mode(maxit) <- "integer" - if (length(maxit) != 2L) maxit <- c(maxit, 10L) - } - repeat{ - # fit model with fixed adherence (a) - res <- opt(log(c(alpha, delta[-1L])), obj_common, gr_common, ...) - - if (i == maxit[2L] | is.null(gamma)) break - if (!is.null(gamma)){ - # update alpha, delta & sufficient statistics for adherence - alpha <- exp(res$par[1L:N]) - delta <- c(1.0, exp(res$par[-(1L:N)])) - Z <- unname(rowsum(S*log(alpha[item_id]), ranker_id)[,1L]) - if (i > 0L) logp_old <- logp - # log-posterior after worth update - logp <- -res$value + sum(wa*((gamma$shape - 1L)*log(adherence) - - gamma$rate*adherence)) - if (i > 0L && abs(logp_old - logp) <= - epsilon * (abs(logp) + epsilon)) { - conv <- 0L - break - } - i <- i + 1L - - # fit model with fixed worth/tie parameters - # ignore rankings involving ghost item (adherence fixed to 1) - res2 <- opt(log(adherence), obj_adherence, gr_adherence, ...) - - # update adherence & sufficient stats for alpha (real items) - adherence <- exp(res2$par) - a[1L:nr] <- adherence[ranker] - A[item] <- unname(rowsum(adherence[ranker_id]*S, item_id)[,1L]) - if (npseudo) A[item] <- A[item] + npseudo - # log-posterior after gamma update - # -res2$value + sum(B[-1]*log(delta)) - - # 0.5*tcrossprod((log(alpha) - normal$mu) %*% Rinv)[1] - } - } - if (!is.null(gamma)){ - iter <- i - } else { - conv <- res$convergence - iter <- res$counts # NULL for L-BFGS - } - res <- list(alpha = exp(res$par[1L:N]), - delta = c(1.0, exp(res$par[-(1L:N)])), - logl = -res$value) - } else { - res <- list(alpha = alpha, delta = delta) - res[c("expA", "expB", "theta")] <- - expectation("all", alpha, delta, a, N, d, P, R, G, W) - res$logl <- sum(B[-1L]*log(res$delta)[-1L]) + sum(A*log(res$alpha)) - - sum(res$theta) - oneUpdate <- function(res){ - # update all alphas - res$alpha <- res$alpha*A/res$expA - if (npseudo) res$alpha <- res$alpha/res$alpha[N] - # update all deltas - if (D > 1L) { - res$delta[-1L] <- B[-1L]/ - expectation("delta", res$alpha, res$delta, - a, N, d, P, R, G, W)$expB - } - res[c("expA", "expB", "theta")] <- - expectation("all", res$alpha, res$delta, - a, N, d, P, R, G, W) - res$logl <- sum(B[-1L]*log(res$delta)[-1L]) + - sum(A*log(res$alpha)) - sum(res$theta) - res - } - accelerate <- function(p, p1, p2){ - # only accelerate if parameter has changed in last iteration - z <- p2 != p1 - p2[z] <- p[z] - (p1[z] - p[z])^2L / (p2[z] - 2L * p1[z] + p[z]) - p2 - } - # stopping rule: compare observed & expected sufficient stats - checkConv <- function(res){ - eps <- abs(c(A, B[-1L]) - c(res$expA, res$delta[-1L]*res$expB)) - assign("eps", eps, envir = parent.env(environment())) - ifelse(all(eps < epsilon), 0L, 1L) - } - iter <- 0L - if ((conv <- checkConv(res)) == 0L) maxit <- 0L - updateIter <- function(res){ - if (trace) message("iter ", iter, ", loglik: ", res$logl) - assign("iter", iter + 1L, - envir = parent.env(environment())) - } - eps <- c(A, B[-1L]) - doSteffensen <- FALSE - while(iter < maxit[1L]){ - updateIter(res) - res <- oneUpdate(res) - if ((conv <- checkConv(res)) == 0L) break - if (all(eps < steffensen & !doSteffensen)) doSteffensen <- TRUE - # steffensen - if (doSteffensen){ - res1 <- oneUpdate(res) - if ((conv <- checkConv(res1)) == 0L) { - res <- res1 - break - } - res2 <- oneUpdate(res1) - if ((conv <- checkConv(res2)) == 0L) { - res <- res2 - break - } - # if negative worth or log-likelihood decreased, - # don't apply Steffensen - res$alpha <- - accelerate(res$alpha, res1$alpha, res2$alpha) - if (all(res$alpha > 0L)) { - res$delta[-1L] <- - accelerate(res$delta, res1$delta, res2$delta)[-1L] - res[c("expA", "expB", "theta")] <- - expectation("all", res$alpha, res$delta, - a, N, d, P, R, G, W) - res$logl <- - sum(B[-1L]*log(res$delta)[-1L]) + - sum(A*log(res$alpha)) - sum(res$theta) - if (res$logl < res2$logl) { - res <- res2 - } else if ((conv <- checkConv(res)) == 0L) break - } else res <- res2 - } - } - if (trace) message("iter ", iter, ", loglik: ", res$logl) - } - if (conv[1L] == 1L) warning("Iterations have not converged.") - - res$delta <- structure(res$delta, names = paste0("tie", names(B)))[-1L] - - if (npseudo > 0L) { - # drop hypothetical object - res$alpha <- res$alpha[-N] - N <- N - 1L - # drop weights and indices for pseudodata - i <- seq_len(length(W[[2L]]) - N) - W[[2L]] <- W[[2L]][i] - G[[2L]] <- G[[2L]][i] - if (!length(W[[2L]])) P <- setdiff(P, 2L) - # remove contribution to A and B - A <- A[-(N + 1L)] - npseudo - B[1L] <- B[1L] - 2L*npseudo*N - } - names(res$alpha) <- items - rank <- N + D - 2L - - cf <- c(res$alpha, res$delta) - - # save normal prior info - if (!is.null(normal)) { - normal_prior <- list(mu = normal$mu, invSigma = Kinv) - } else normal_prior <- NULL - - # recompute log-likelihood excluding pseudo-observations/priors - if (npseudo > 0L | !is.null(normal) | !is.null(gamma)) { - if (!is.null(normal) & is.null(gamma)) { - # logp not yet assigned - logp <- res$logl - } - normal <- NULL - logl <- -obj_common(log(cf)) - } else logl <- res$logl - # null log-likelihood - set adherence to 1 if estimated - if (!is.null(gamma)) a <- rep.int(1L, length(a)) - null.loglik <- -obj_common(rep.int(0L, length(cf))) - - # frequencies of sets selected from, for sizes 2 to max observed - freq <- vapply(W[P], sum, 1.0) - # number of possible selections overall - n <- sum(vapply(P, choose, numeric(D), k = d) %*% freq) - df.residual <- n - sum(freq) - rank - - fit <- list(call = call, - coefficients = cf, - loglik = logl, - null.loglik = null.loglik, - df.residual = df.residual, - df.null = n - sum(freq), #naming consistent with glm - rank = rank, - logposterior = logp, - gamma = gamma, - normal = normal_prior, - iter = iter, - rankings = rankings, - weights = weights, - adherence = adherence, - ranker = ranker, - ties = d, - conv = conv, - na.action = na.action) - class(fit) <- "PlackettLuce" - fit -} +#' Fit a Plackett-Luce Model +#' +#' Fit a Plackett-Luce model to a set of rankings. The rankings may be partial +#' (each ranking completely ranks a subset of the items) and include ties of +#' arbitrary order. +#' +#' @section Model definition: +#' +#' A single ranking is given by +#' \deqn{R = \{C_1, C_2, \ldots, C_J\}}{R = {C_1, C_2, \ldots, C_J}} +#' where the items in set \eqn{C_1} are ranked higher than (better than) the +#' items in \eqn{C_2}, and so on. If there are multiple objects in set \eqn{C_j} +#' these items are tied in the ranking. +#' +#' For a set if items \eqn{S}, let +#' \deqn{f(S) = \delta_{|S|} +#' \left(\prod_{i \in S} \alpha_i \right)^\frac{1}{|S|}}{ +#' f(S) = delta_{|S|} * (prod_{i in S} alpha_i)^(1/|S|)} +#' where \eqn{|S|} is the cardinality (size) of the set, \eqn{\delta_n}{delta_n} +#' is a parameter related to the prevalence of ties of order \eqn{n} +#' (with \eqn{\delta_1 \equiv 1}), and \eqn{\alpha_i}{alpha_i} is a +#' parameter representing the worth of item \eqn{i}. +#' Then under an extension of the Plackett-Luce model allowing ties up to order +#' \eqn{D}, the probability of the ranking \eqn{R} is given by +#' \deqn{\prod_{j = 1}^J \frac{f(C_j)}{ +#' \sum_{k = 1}^{\min(D_j, D)} \sum_{S \in {A_j \choose k}} f(S)}}{ +#' prod_{j = 1}^J f(C_j)/ +#' (sum_{k = 1}^{min(D_j, D)} sum_{S in choose(A_j, k)} f(S))} +#' where \eqn{D_j} is the cardinality of \eqn{A_j}, the set of +#' alternatives from which \eqn{C_j} is chosen, and +#' \eqn{A_j \choose k}{choose(A_j, k)} is all the possible choices of \eqn{k} +#' items from \eqn{A_j}. The value of \eqn{D} can be set to the maximum number +#' of tied items observed in the data, so that \eqn{\delta_n = 0}{delta_n = 0} +#' for \eqn{n > D}. +#' +#' When the worth parameters are constrained to sum to one, they represent the +#' probability that the corresponding item comes first in a ranking of all +#' items, given that first place is not tied. +#' +#' The 2-way tie prevalence parameter \eqn{\delta_2}{delta_2} is related to +#' the probability that two items \emph{of equal worth} tie for +#' first place, given that the first place is not a 3-way or higher tie. +#' Specifically, that probability is +#' \eqn{\delta_2/(2 + \delta_2)}{delta_2/(2 + delta_2}. +#' +#' The 3-way and higher tie-prevalence parameters are similarly interpretable, +#' in terms of tie probabilities among equal-worth items. +#' +#' When intermediate tie orders are not observed (e.g. ties of order 2 +#' and order 4 are observed, but no ties of order 3), the maximum +#' likelihood estimate of the corresponding tie prevalence parameters +#' is zero, so these parameters are excluded from the model. +#' +#' @section Pseudo-rankings: +#' +#' In order for the maximum likelihood estimate of an object's worth to be +#' defined, the network of rankings must be strongly connected. This means that +#' in every possible partition of the objects into two nonempty subsets, some +#' object in the second set is ranked higher than some object in the first set +#' at least once. +#' +#' If the network of rankings is not strongly connected then pseudo-rankings +#' may be used to connect the network. This approach posits a hypothetical +#' object with log-worth 0 and adds \code{npseudo} wins and \code{npseudo} +#' losses to the set of rankings. +#' +#' The parameter \code{npseudo} is the prior strength. With \code{npseudo = 0} +#' the MLE is the posterior mode. As \code{npseudo} approaches +#' infinity the log-worth estimates all shrink towards 0. The default, +#' \code{npseudo = 0.5}, is sufficient to connect the network and has a weak +#' shrinkage effect. Even for networks that are already connected, adding +#' pseudo-rankings typically reduces both the bias and variance of the +#' estimators of the worth parameters. +#' +#' @section Incorporating prior information on log-worths: +#' +#' Prior information can be incorporated by using \code{normal} to specify a +#' multivariate normal prior on the \emph{log}-worths. The log-worths are then +#' estimated by maximum a posteriori (MAP) estimation. Model summaries +#' (deviance, AIC, standard errors) are based on the log-likelihood evaluated +#' at the MAP estimates, resulting in a finite sample bias that should +#' disappear as the number of rankings increases. Inference based on these +#' model summaries is valid as long as the prior is considered fixed and not +#' tuned as part of the model. +#' +#' Incorporating a prior is an alternative method of penalization, therefore +#' \code{npseudo} is set to zero when a prior is specified. +#' +#' @section Incorporating ranker adherence parameters: +#' +#' When rankings come from different rankers, the model can be extended to +#' allow for varying reliability of the rankers, as proposed by Raman and +#' Joachims (2014). In particular, replacing \eqn{f(S)} by +#' \deqn{h(S) = \delta_{|S|} +#' \left(\prod_{i \in S} \alpha_i \right)^\frac{\eta_g}{|S|}}{ +#' h(S) = delta_{|S|} * (prod_{i in S} alpha_i)^(eta_g/|S|)} +#' where \eqn{\eta_g > 0}{eta_g > 0} is the adherence parameter for ranker +#' \eqn{g}. In the standard model, all rankers are assumed to have equal +#' reliability, so \eqn{\eta_g = 1}{eta_g = 1} for all rankers. +#' Higher \eqn{\eta_g = 1}{eta_g = 1} increases the distance between item +#' worths, giving greater weight' to the ranker's choice. Conversely, lower +#' \eqn{\eta_g = 1}{eta_g = 1} shrinks the item worths towards equality so the +#' ranker's choice is less relevant. +#' +#' The adherence parameters are not estimable by maximum likelihood, since +#' for given item worths the maximum likelihood estimate of adherence would be +#' infinity for rankers that give rankings consistent with the items ordered by +#' worth and zero for all other rankers. Therefore it is essential to include a +#' prior on the adherence parameters when these are estimated rather than fixed. +#' Setting \code{gamma = TRUE} specifies the default +#' \eqn{\Gamma(10,10)}{Gamma(10,10)} prior, which has a mean of +#' 1 and a probability of 0.99 that the adherence is between 0.37 and 2. +#' Alternative parameters can be specified by a list with elements \code{shape} +#' and \code{rate}. Setting scale and rate to a common value \eqn{\theta}{theta} +#' specifies a mean of 1; \eqn{\theta \ge}{theta >=} 2 will give low prior +#' probability to near-zero adherence; as \eqn{\theta}{theta} increases the +#' density becomes more concentrated (and more symmetrical) about 1. +#' +#' Since the number of adherence parameters will typically be large and it is +#' assumed the worth and tie parameters are of primary interest, the adherence +#' parameters are not included in model summaries, but are included in the +#' returned object. +#' +#' @section Controlling the fit: +#' +#' For models without priors, using \code{nspseudo = 0} will use standard +#' maximum likelihood, if the network is connected (and throw an error +#' otherwise). +#' +#' The fitting algorithm is set by the \code{method} argument. The default +#' method \code{"iterative scaling"} is a slow but reliable approach. In +#' addition, this has the most control on the accuracy of the final fit, since +#' convergence is determined by direct comparison of the observed and expected +#' values of the sufficient statistics for the worth parameters, rather than a +#' tolerance on change in the log-likelihood. +#' +#' The \code{"iterative scaling"} algorithm is slow because it is a first order +#' method (does not use derivatives of the likelihood). From a set of starting +#' values that are 'close enough' to the final solution, the algorithm can be +#' accelerated using +#' \href{https://en.wikipedia.org/wiki/Steffensen's_method}{Steffensen's method}. +#' \code{PlackettLuce} attempts to apply Steffensen's acceleration when all +#' differences between the observed and expected values of the sufficient +#' statistics are less than \code{steffensen}. This is an ad-hoc rule defining +#' 'close enough' and in some cases the acceleration may produce negative +#' worth parameters or decrease the log-likelihood. \code{PlackettLuce} will +#' only apply the update when it makes an improvement. +#' +#' The \code{"BFGS"} and \code{"L-BFGS"} algorithms are second order methods, +#' therefore can be quicker than the default method. Control parameters can be +#' passed on to \code{\link[stats]{optim}} or \code{\link[lbfgs]{lbfgs}}. +#' +#' For models with priors, the iterative scaling method cannot be used, so BFGS +#' is used by default. +#' +#' @seealso +#' +#' Handling rankings: \code{\link{rankings}}, \code{\link{aggregate}}, +#' \code{\link{group}}, \code{\link{choices}}, +#' \code{\link{adjacency}}, \code{\link{connectivity}}. +#' +#' Inspect fitted Plackett-Luce models: \code{\link{coef}}, \code{deviance}, +#' \code{\link{fitted}}, \code{\link{itempar}}, \code{logLik}, \code{print}, +#' \code{\link{qvcalc}}, \code{\link{summary}}, \code{\link{vcov}}. +#' +#' Fit Plackett-Luce tree: \code{pltree}. +#' +#' Example data sets: \code{\link{beans}}, \code{\link{nascar}}, +#' \code{\link{pudding}}, \code{\link{preflib}}. +#' +#' Vignette: \code{vignette("Overview", package = "PlackettLuce")}. +#' +#' @note As the maximum tie order increases, the number of possible choices for +#' each rank increases rapidly, particularly when the total number of items is +#' high. This means that the model will be slower to fit with higher \eqn{D}. +#' In addition, due to the current implementation of the `vcov()` method, +#' computation of the standard errors (as by `summary()`) can take almost as +#' long as the model fit and may even become infeasible due to memory limits. +#' As a rule of thumb, for > 10 items and > 1000 rankings, we recommend +#' `PlackettLuce()` for ties up to order 4. For higher order ties, a +#' rank-ordered logit model, see [ROlogit::rologit()] or +#' generalized Mallows Model as in [BayesMallows::compute_mallows()] may be +#' more suitable, as they do not model tied events explicitly. +#' +#' @param rankings a \code{"\link{rankings}"} object, or an object that can be +#' coerced by \code{as.rankings}. An [`"aggregated_rankings"`][aggregate()] +#' object can be used to specify rankings and weights simultaneously. +#' A \code{"\link{grouped_rankings}"} object should be used when estimating +#' adherence for rankers with multiple rankings per ranker. +#' @param npseudo when using pseudodata: the number of wins and losses to add +#' between each object and a hypothetical reference object. +#' @param normal a optional list with elements named \code{mu} and \code{Sigma} +#' specifying the mean and covariance matrix of a multivariate normal prior on +#' the \emph{log} worths. +#' @param gamma a optional list with elements named \code{shape} and \code{rate} +#' specifying parameters of a gamma prior on adherence parameters for each +#' ranker (use \code{grouped_rankings} to group multiple rankings by ranker). +#' The short-cut \code{TRUE} may be used to specify a Gamma(10, 10) prior. If +#' \code{NULL} (or \code{FALSE}), adherence is fixed to \code{adherence} for +#' all rankers. +#' @param adherence an optional vector of adherence values for each ranker. If +#' missing, adherence is fixed to 1 for all rankers. If \code{gamma} is not +#' \code{NULL}, this specifies the starting values for the adherence. +#' @param weights an optional vector of weights for each ranking. +#' @param na.action a function to handle any missing rankings, see +#' [na.omit()]. +#' @param start starting values for the worth parameters and the tie parameters +#' on the raw scale (worth parameters need not be scaled to sum to 1). If +#' \code{normal} is specified, \code{exp(normal$mu)} is used as starting values +#' for the worth parameters. Coefficients from a previous fit can be passed as +#' the result of a call to \code{coef.PlackettLuce}, or the \code{coefficients} +#' element of a \code{"PlackettLuce"} object. +#' @param method the method to be used for fitting: \code{"iterative scaling"} +#' (iterative scaling to sequentially update the parameter values), +#' \code{"BFGS"} (the BFGS optimisation algorithm through the +#' \code{\link{optim}} interface), \code{"L-BFGS"} (the limited-memory BFGS +#' optimisation algorithm as implemented in the \code{\link[lbfgs]{lbfgs}} +#' package). Iterative scaling is used by default, unless a prior is specified +#' by \code{normal} or \code{gamma}, in which case the default is \code{"BFGS"}. +#' @param epsilon the maximum absolute difference between the observed and +#' expected sufficient statistics for the ability parameters at convergence. +#' @param steffensen a threshold defined as for \code{epsilon} after which to +#' apply Steffensen acceleration to the iterative scaling updates. +#' @param maxit a vector specifying the maximum number of iterations. If +#' \code{gamma} is \code{NULL}, only the first element is used and specifies the +#' maximum number of iterations of the algorithm specified by \code{method}. If +#' \code{gamma} is not \code{NULL}, a second element may be supplied to specify +#' the maximum number of iterations of an alternating algorithm, where +#' the adherence parameters are updated alternately with the other parameters. +#' The default is to use 10 outer iterations. +#' @param trace logical, if \code{TRUE} show trace of iterations. +#' @param verbose logical, if \code{TRUE} show messages from validity checks on +#' the rankings. +#' @param ... additional arguments passed to \code{optim} or \code{lbfgs}. +#' In particular the convergence tolerance may be adjusted using e.g. +#' \code{control = list(reltol = 1e-10)}. +#' +#' @return An object of class \code{"PlackettLuce"}, which is a list containing +#' the following elements: +#' \item{call}{ The matched call. } +#' \item{coefficients}{ The model coefficients. } +#' \item{loglik}{ The maximized log-likelihood. } +#' \item{null.loglik}{ The maximized log-likelihood for the null model (all +#' alternatives including ties have equal probability). } +#' \item{df.residual}{ The residual degrees of freedom. } +#' \item{df.null}{ The residual degrees of freedom for the null model. } +#' \item{rank}{ The rank of the model. } +#' \item{logposterior}{ If a prior was specified, the maximised log posterior.} +#' \item{gamma}{ If a gamma prior was specified, the list of parameters. } +#' \item{normal}{ If a normal prior was specified, the list of parameters. } +#' \item{iter}{ The number of iterations run. } +#' \item{rankings}{ The rankings passed to \code{rankings}, converted to a +#' \code{"rankings"} object if necessary. } +#' \item{weights}{ The weights applied to each ranking in the fitting. } +#' \item{adherence}{ The fixed or estimated adherence per ranker. } +#' \item{ranker}{ The ranker index mapping rankings to rankers (the +#' \code{"index"} attribute of \code{rankings} if specified as a +#' \code{"grouped_rankings"} object.)} +#' \item{ties}{ The observed tie orders corresponding to the estimated tie +#' parameters. } +#' \item{conv}{ The convergence code: 0 for successful convergence; 1 if reached +#' \code{maxit} (outer) iterations without convergence; 2 if Steffensen +#' acceleration cause log-likelihood to increase; negative number if L-BFGS +#' algorithm failed for other reason.} +#' @references +#' Raman, K. and Joachims, T. (2014) Methods for Ordinal Peer Grading. +#' \href{https://arxiv.org/abs/1404.3656}{arXiv:1404.3656}. +#' @examples +#' # Six partial rankings of four objects, 1 is top rank, e.g +#' # first ranking: item 1, item 2 +#' # second ranking: item 2, item 3, item 4, item 1 +#' # third ranking: items 2, 3, 4 tie for first place, item 1 second +#' R <- matrix(c(1, 2, 0, 0, +#' 4, 1, 2, 3, +#' 2, 1, 1, 1, +#' 1, 2, 3, 0, +#' 2, 1, 1, 0, +#' 1, 0, 3, 2), nrow = 6, byrow = TRUE) +#' colnames(R) <- c("apple", "banana", "orange", "pear") +#' +#' # create rankings object +#' R <- as.rankings(R) +#' +#' # Standard maximum likelihood estimates +#' mod_mle <- PlackettLuce(R, npseudo = 0) +#' coef(mod_mle) +#' +#' # Fit with default settings +#' mod <- PlackettLuce(R) +#' # log-worths are shrunk towards zero +#' coef(mod) +#' +#' # independent N(0, 9) priors on log-worths, as in Raman and Joachims +#' prior <- list(mu = rep(0, ncol(R)), +#' Sigma = diag(rep(9, ncol(R)))) +#' mod_normal <- PlackettLuce(rankings = R, normal = prior) +#' # slightly weaker shrinkage effect vs pseudo-rankings, +#' # with less effect on tie parameters (but note small number of rankings here) +#' coef(mod_normal) +#' +#' # estimate adherence assuming every ranking is from a separate ranker +#' mod_separate <- PlackettLuce(rankings = R, normal = prior, gamma = TRUE) +#' coef(mod_separate) +#' # gives more weight to rankers 4 & 6 which rank apple first, +#' # so worth of apple increased relative to banana +#' mod_separate$adherence +#' +#' # estimate adherence based on grouped rankings +#' # - assume two rankings from each ranker +#' G <- group(R, rep(1:3, each = 2)) +#' mod_grouped <- PlackettLuce(rankings = G, normal = prior, gamma = TRUE) +#' coef(mod_grouped) +#' # first ranker is least consistent so down-weighted +#' mod_grouped$adherence +#' +#' @importFrom igraph as_adj graph_from_edgelist +#' @importFrom RSpectra eigs +#' @importFrom stats optim +#' @export +PlackettLuce <- function(rankings, + npseudo = 0.5, + normal = NULL, + gamma = NULL, + adherence = NULL, + weights = freq(rankings), + na.action = getOption("na.action"), + start = NULL, + method = c("iterative scaling", "BFGS", "L-BFGS"), + epsilon = 1e-7, steffensen = 0.1, maxit = c(500, 10), + trace = FALSE, verbose = TRUE, ...){ + call <- match.call() + + # check rankings object + grouped_rankings <- inherits(rankings, "grouped_rankings") + if (inherits(rankings, "aggregated_rankings")){ + force(weights) + rankings$freq <- NULL + rankings <- as.rankings(rankings) + } + if (!grouped_rankings & !inherits(rankings, "rankings")){ + rankings <- suppressWarnings(as.rankings(rankings, verbose = verbose)) + } + + # define ranker + if (grouped_rankings){ + ranker <- attr(rankings, "index") + # if weights are per group id expand to be per ranking + if (!is.null(weights) & length(weights) == max(ranker)) { + weights <- weights[ranker] + } + }else if (!is.null(adherence)| !is.null(gamma)){ + ranker <- seq_len(nrow(rankings)) + } else ranker <- NULL + + if (!is.null(na.action)) { + rankings <- match.fun(na.action)(rankings) + na.action <- attr(rankings, "na.action") + if (!is.null(na.action)) { + if (!is.null(weights)) weights <- weights[-na.action] + if (!is.null(adherence)) adherence <- adherence[-na.action] + if (!is.null(ranker)) ranker <- ranker[-na.action] + } + } + + # unpack grouped rankings + if (grouped_rankings){ + R <- attr(rankings, "R") + S <- attr(rankings, "S") + id <- attr(rankings, "id") + rankings <- attr(rankings, "rankings") + } + + # attributes + items <- colnames(rankings) + N <- ncol(rankings) # total number of objects + nr <- nrow(rankings) # number of rankings + + # weights + if (is.null(weights)){ + weights <- rep.int(1L, nr) + } + stopifnot(length(weights) == nr) + stopifnot(all(weights > 0L)) + + # check gamma prior specification + if (is.logical(gamma) && length(gamma) == 1L && !is.na(gamma) && !gamma) + gamma <- NULL # in case specified as FALSE + if (!is.null(gamma)) { # specified or TRUE + if (isTRUE(gamma)) gamma <- list(shape = 10, rate = 10) + stopifnot(names(gamma) == c("shape", "rate")) + stopifnot(all(lengths(gamma) == 1L)) + if (gamma$shape != gamma$rate) + warning("mean of adherence prior is not 1") + } + + # adherence should be per ranker + if (!is.null(adherence)) { + stopifnot(length(adherence) == max(ranker)) + stopifnot(all(adherence > 0L)) + a <- adherence[ranker] + } else if (!is.null(gamma)){ + adherence <- rep.int(1L, max(ranker)) + a <- adherence[ranker] + wa <- rowsum(weights, ranker)[,1] + } else a <- NULL + + if (!grouped_rankings){ + # items ranked from last to 1st place + R <- t(apply(rankings, 1L, order, decreasing = TRUE)) + mode(R) <-"integer" + + # adjacency matrix: wins over rest + # N.B. need even if `start` specified; used to check connectivity + X <- adjacency(rankings, weights = weights) + + # sizes of selected sets + set <- apply(rankings, 1L, function(x){ + last <- which(x == max(x)) + ind <- which(x > 0L) + # exclude untied last place + if (length(last) == 1L) ind <- setdiff(ind, last) + list(size = tabulate(x[ind])[x[ind]], item_id = ind) + }) + item_id <- unlist(lapply(set, `[[`, "item_id")) + S <- lapply(set, `[[`, "size") + ns <- lengths(S) + ws <- rep.int(weights, ns) + if (!is.null(adherence)) ranker_id <- rep.int(ranker, ns) + S <- unlist(S) + rm(set) + } else { + # adjacency matrix: wins over rest + # (id extracted from grouped_rankings object) + X <- matrix(0.0, N, N) + X[sort(unique(unlist(id)))] <- + rowsum(rep(weights, lengths(id)), unlist(id)) + class(X) <- c("adjacency", "matrix") + + # (S extracted from grouped_rankings object) + # N.B. here unlist indifferent order than ungrouped + ws <- (weights * as.logical(S))[as.logical(S)] + if (!is.null(adherence)) { + ranker_id <- (ranker * as.logical(S))[as.logical(S)] + } + item_id <- R[as.logical(S)] + S <- S[as.logical(S)] + } + # sufficient statistics + # for delta d, (number of sets with cardinality d)/cardinality + d <- sort(unique(S)) + D <- length(d) + B <- rowsum(ws, S)[,1L] + B <- B/d + # from now only only need weight/size per set, so replace S with this + S <- ws/S + rm(ws) + # for alpha + A <- numeric(N) + item <- sort(unique(item_id)) + if (is.null(adherence)){ + # sum over all sets st object i is in selected set weight/size + A[item] <- unname(rowsum(S, item_id)[,1L]) + } else { + # now (adherence * weight)/size + A[item] <- unname(rowsum(adherence[ranker_id]*S, item_id)[,1L]) + } + + + if (!is.null(normal)){ + # set npseudo to 0 + npseudo <- 0L + # check normal prior specification + stopifnot(names(normal) == c("mu", "Sigma")) + if (length(normal$mu) != N) + stop("`length(normal$mu)` is not equal to the number of items") + if (!identical(dim(normal$Sigma), c(N, N))) + stop("`normal$Sigma` is not a square matrix with number of rows ", + "equal to the number of items") + # compute inverse of variance covariance matrix (used in score function) + Rinv <- solve(chol(normal$Sigma)) + Kinv <- Rinv %*% t(Rinv) + } else Rinv <- Kinv <- NULL + + # check connectivity if npseudo = 0 and normal prior not specified + if (npseudo == 0L & is.null(normal)){ + out <- connectivity(X, verbose = FALSE) + if (out$no > 1L) + stop("Network is not fully connected - cannot estimate all ", + "item parameters with npseudo = 0") + } + + # unique rows with >= 1 element for logical matrix + # case where p is fixed + uniquerow <- function(M){ + len <- ncol(M) + if (len == 1L) return(1L) + pattern <- M[,1L] + max_exp <- .Machine$double.digits + for (k in seq_len(len - 1L)){ + # N.B. can't use integer here, can get integer overflow + pattern <- 2*pattern + M[,k] + if (k == len - 1L || + k %% max_exp == (max_exp - 1L)){ + pattern <- match(pattern, unique(pattern)) + max_exp <- .Machine$double.digits - ceiling(log2(max(pattern))) + } + } + as.integer(pattern) + } + + # max number of objects in set + nc <- max(rowSums(rankings > 0L)) + + # create W so cols are potential set sizes and value > 0 indicates ranking + # aggregate common sets + # - incorporate ranking weights by weighted count vs simple tabulate + W <- G <- list() # weight (currently rep count); group of rankings + P <- logical(nc) # set sizes present in rankings + minrank <- rep.int(1L, nr) + for (i in seq_len(nc)){ + p <- (nc - i + 1L) + set <- rankings >= minrank + r <- which(rowSums(set) == p) + P[p] <- p != 1L && length(r) + if (!P[p]) next + # separate adherence per ranking + if (!is.null(gamma) & !grouped_rankings){ + W[[p]] <- weights[r] + G[[p]] <- r + } else { + g <- uniquerow(set[r, , drop = FALSE]) + # combine within ranker (adherence likely different for each ranker) + if (!is.null(adherence)){ + x <- ranker[r]/10^ceiling(log10(max(ranker[r]))) + g + g <- match(x, x) + } + W[[p]] <- as.vector(unname(rowsum(weights[r], g))) + G[[p]] <- r[!duplicated(g)] + } + minrank[r] <- minrank[r] + 1L + } + P <- which(P) + + # if npseudo > 0 add npseudo wins and losses with hypothetical item + stopifnot(npseudo >= 0L) + if (npseudo > 0L){ + # update R with paired comparisons with hypothetical item (item N + 1) + R <- cbind(R, "NULL" = 0L) + pseudo <- matrix(0L, nrow = N, ncol = N + 1L) + pseudo[, 1L] <- N + 1L + pseudo[, 2L] <- seq_len(N) + R <- rbind(R, pseudo) + # update X with npseudo wins and losses with hypothetical item + X <- cbind(X, npseudo) + X <- rbind(X, c(rep.int(npseudo, N), 0L)) + # update adherence: set to 1 for ghost rater + if (!is.null(adherence)) a <- c(a, rep.int(1L, N)) + # update weights: 2*npseudo comparisons of each pair + W[[2L]] <- c(W[[2L]], rep.int(2L*npseudo, N)) + # update indices + G[[2L]] <- c(G[[2L]], (nr + 1L):(nr + N)) + # add set size of 2 to observed set sizes, if necessary + P <- unique(c(2L, P)) + # update A: npseudo wins for item N + 1 against each other item; + # npseudo wins for other items + A <- c(A + npseudo, N*npseudo) + # update B: 2*npseudo untied choices per item + B[1L] <- B[1L] + 2L*npseudo*N + } + + if (is.null(start)){ + if (!is.null(normal)) { + alpha <- exp(normal$mu) + } else { + # (scaled, un-damped) PageRank on underlying paired comparisons + ncv <- min(nrow(X), 10L) + alpha <- drop(abs(eigs(X/colSums(X), 1L, + opts = list(ncv = ncv))$vectors)) + } + delta <- c(1.0, rep.int(0.1, D - 1L)) + } else { + # if not "coef.PlackettLuce" object, assume on raw scale + # (with ability of hypothetical item 1), i.e. as returned coefficients) + if (!length(start) == N + D - 1) + stop(paste("`start` does not specify enough values for", N, "item", + paste("and", D - 1, "tie parameters")[D > 1])) + alpha <- start[seq_len(N)] + delta <- start[-seq_len(N)] + if (inherits(start, "coef.PlackettLuce")){ + # reverse identifiability constraints and put on raw scale + if (attr(start, "log")){ + alpha <- exp(alpha + attr(start, "const")) + delta <- exp(delta) + } else { + alpha <- alpha*attr(start, "const") + } + } + delta <- c(1.0, delta) + if (npseudo > 0L) { + # set alpha for hypothetical item to 1 as in original fit + if (inherits(start, "coef.PlackettLuce")){ + alpha <- c(alpha, 1.0) + } else { + # set to geometric mean as neutral position + alpha <- c(alpha, exp(mean(log(alpha)))) + } + } + } + if (npseudo) { + N <- N + 1L + alpha <- alpha/alpha[N] + } + + method <- match.arg(method, c("iterative scaling", "BFGS", "L-BFGS")) + if ((!is.null(normal) | !is.null(gamma)) && method == "iterative scaling"){ + if (method %in% names(call)){ + stop("when a prior is specified, `method` cannot be ", + "`\"iterative scaling\"`") + } else method <- "BFGS" + } + + if (method == "BFGS"){ + opt <- function(par, obj, gr, ...){ + dts <- list(...) + do.call("optim", + c(list(par, obj, gr, method = "BFGS", + control = c(eval(dts$control), + list(maxit = maxit[1L]))), + dts[setdiff(names(dts), "control")])) + } + } + + if (method == "L-BFGS"){ + if (trace) message("No trace implemented for L-BFGS") + opt <- function(par, obj, gr, ...){ + lbfgs::lbfgs(obj, gr, par, invisible = 1L, + max_iterations = maxit[1L], ...) + } + } + + # Optimization of log-worths and log-tie parameters via + obj_common <- function(par) { + alpha <- exp(par[1L:N]) + delta <- exp(par[-c(1L:N)]) + # assign to parent environment so can use further quantities in score + assign("fit", expectation("all", alpha, c(1.0, delta), + a, N, d, P, R, G, W), + envir = parent.env(environment())) + -loglik_common(c(alpha, delta), N, normal$mu, Rinv, A, B, fit) + } + gr_common <- function(par) { + alpha <- exp(par[1L:N]) + delta <- exp(par[-c(1L:N)]) + -score_common(c(alpha, delta), N, normal$mu, Kinv, A, B, fit) * + c(alpha, delta) + } + + # Optimization of log-adherence (for non-ghost raters) via + obj_adherence <- function(par){ + adherence <- exp(par) + # assign to parent environment so can use further quantities in score + assign("fit", normalization(alpha, c(1.0, delta), + adherence[ranker], d, P, R, G, W), + envir = parent.env(environment())) + -loglik_adherence(adherence, gamma$shape, gamma$rate, wa, Z, fit) + } + gr_adherence <- function(par) { + adherence <- exp(par) + -score_adherence(adherence, ranker, gamma$shape, gamma$rate, + wa, Z, fit) * adherence + } + + logp <- NULL + if (method != "iterative scaling"){ + i <- 0L + conv <- 1L + if (!is.null(gamma)){ + mode(maxit) <- "integer" + if (length(maxit) != 2L) maxit <- c(maxit, 10L) + } + repeat{ + # fit model with fixed adherence (a) + res <- opt(log(c(alpha, delta[-1L])), obj_common, gr_common, ...) + + if (i == maxit[2L] | is.null(gamma)) break + if (!is.null(gamma)){ + # update alpha, delta & sufficient statistics for adherence + alpha <- exp(res$par[1L:N]) + delta <- c(1.0, exp(res$par[-(1L:N)])) + Z <- unname(rowsum(S*log(alpha[item_id]), ranker_id)[,1L]) + if (i > 0L) logp_old <- logp + # log-posterior after worth update + logp <- -res$value + sum(wa*((gamma$shape - 1L)*log(adherence) - + gamma$rate*adherence)) + if (i > 0L && abs(logp_old - logp) <= + epsilon * (abs(logp) + epsilon)) { + conv <- 0L + break + } + i <- i + 1L + + # fit model with fixed worth/tie parameters + # ignore rankings involving ghost item (adherence fixed to 1) + res2 <- opt(log(adherence), obj_adherence, gr_adherence, ...) + + # update adherence & sufficient stats for alpha (real items) + adherence <- exp(res2$par) + a[1L:nr] <- adherence[ranker] + A[item] <- unname(rowsum(adherence[ranker_id]*S, item_id)[,1L]) + if (npseudo) A[item] <- A[item] + npseudo + # log-posterior after gamma update + # -res2$value + sum(B[-1]*log(delta)) - + # 0.5*tcrossprod((log(alpha) - normal$mu) %*% Rinv)[1] + } + } + if (!is.null(gamma)){ + iter <- i + } else { + conv <- res$convergence + iter <- res$counts # NULL for L-BFGS + } + res <- list(alpha = exp(res$par[1L:N]), + delta = c(1.0, exp(res$par[-(1L:N)])), + logl = -res$value) + } else { + res <- list(alpha = alpha, delta = delta) + res[c("expA", "expB", "theta")] <- + expectation("all", alpha, delta, a, N, d, P, R, G, W) + res$logl <- sum(B[-1L]*log(res$delta)[-1L]) + sum(A*log(res$alpha)) - + sum(res$theta) + oneUpdate <- function(res){ + # update all alphas + res$alpha <- res$alpha*A/res$expA + if (npseudo) res$alpha <- res$alpha/res$alpha[N] + # update all deltas + if (D > 1L) { + res$delta[-1L] <- B[-1L]/ + expectation("delta", res$alpha, res$delta, + a, N, d, P, R, G, W)$expB + } + res[c("expA", "expB", "theta")] <- + expectation("all", res$alpha, res$delta, + a, N, d, P, R, G, W) + res$logl <- sum(B[-1L]*log(res$delta)[-1L]) + + sum(A*log(res$alpha)) - sum(res$theta) + res + } + accelerate <- function(p, p1, p2){ + # only accelerate if parameter has changed in last iteration + z <- p2 != p1 + p2[z] <- p[z] - (p1[z] - p[z])^2L / (p2[z] - 2L * p1[z] + p[z]) + p2 + } + # stopping rule: compare observed & expected sufficient stats + checkConv <- function(res){ + eps <- abs(c(A, B[-1L]) - c(res$expA, res$delta[-1L]*res$expB)) + assign("eps", eps, envir = parent.env(environment())) + ifelse(all(eps < epsilon), 0L, 1L) + } + iter <- 0L + if ((conv <- checkConv(res)) == 0L) maxit <- 0L + updateIter <- function(res){ + if (trace) message("iter ", iter, ", loglik: ", res$logl) + assign("iter", iter + 1L, + envir = parent.env(environment())) + } + eps <- c(A, B[-1L]) + doSteffensen <- FALSE + while(iter < maxit[1L]){ + updateIter(res) + res <- oneUpdate(res) + if ((conv <- checkConv(res)) == 0L) break + if (all(eps < steffensen & !doSteffensen)) doSteffensen <- TRUE + # steffensen + if (doSteffensen){ + res1 <- oneUpdate(res) + if ((conv <- checkConv(res1)) == 0L) { + res <- res1 + break + } + res2 <- oneUpdate(res1) + if ((conv <- checkConv(res2)) == 0L) { + res <- res2 + break + } + # if negative worth or log-likelihood decreased, + # don't apply Steffensen + res$alpha <- + accelerate(res$alpha, res1$alpha, res2$alpha) + if (all(res$alpha > 0L)) { + res$delta[-1L] <- + accelerate(res$delta, res1$delta, res2$delta)[-1L] + res[c("expA", "expB", "theta")] <- + expectation("all", res$alpha, res$delta, + a, N, d, P, R, G, W) + res$logl <- + sum(B[-1L]*log(res$delta)[-1L]) + + sum(A*log(res$alpha)) - sum(res$theta) + if (res$logl < res2$logl) { + res <- res2 + } else if ((conv <- checkConv(res)) == 0L) break + } else res <- res2 + } + } + if (trace) message("iter ", iter, ", loglik: ", res$logl) + } + if (conv[1L] == 1L) warning("Iterations have not converged.") + + res$delta <- structure(res$delta, names = paste0("tie", names(B)))[-1L] + + if (npseudo > 0L) { + # drop hypothetical object + res$alpha <- res$alpha[-N] + N <- N - 1L + # drop weights and indices for pseudodata + i <- seq_len(length(W[[2L]]) - N) + W[[2L]] <- W[[2L]][i] + G[[2L]] <- G[[2L]][i] + if (!length(W[[2L]])) P <- setdiff(P, 2L) + # remove contribution to A and B + A <- A[-(N + 1L)] - npseudo + B[1L] <- B[1L] - 2L*npseudo*N + } + names(res$alpha) <- items + rank <- N + D - 2L + + cf <- c(res$alpha, res$delta) + + # save normal prior info + if (!is.null(normal)) { + normal_prior <- list(mu = normal$mu, invSigma = Kinv) + } else normal_prior <- NULL + + # recompute log-likelihood excluding pseudo-observations/priors + if (npseudo > 0L | !is.null(normal) | !is.null(gamma)) { + if (!is.null(normal) & is.null(gamma)) { + # logp not yet assigned + logp <- res$logl + } + normal <- NULL + logl <- -obj_common(log(cf)) + } else logl <- res$logl + # null log-likelihood - set adherence to 1 if estimated + if (!is.null(gamma)) a <- rep.int(1L, length(a)) + null.loglik <- -obj_common(rep.int(0L, length(cf))) + + # frequencies of sets selected from, for sizes 2 to max observed + freq <- vapply(W[P], sum, 1.0) + # number of possible selections overall + n <- sum(vapply(P, choose, numeric(D), k = d) %*% freq) + df.residual <- n - sum(freq) - rank + + fit <- list(call = call, + coefficients = cf, + loglik = logl, + null.loglik = null.loglik, + df.residual = df.residual, + df.null = n - sum(freq), #naming consistent with glm + rank = rank, + logposterior = logp, + gamma = gamma, + normal = normal_prior, + iter = iter, + rankings = rankings, + weights = weights, + adherence = adherence, + ranker = ranker, + ties = d, + conv = conv, + na.action = na.action) + class(fit) <- "PlackettLuce" + fit +} diff --git a/R/adjacency.R b/R/adjacency.R index 97ed47f..37a1074 100644 --- a/R/adjacency.R +++ b/R/adjacency.R @@ -1,50 +1,50 @@ -#' Create an Adjacency Matrix for a set of Rankings -#' -#' Convert a set of rankings to an adjacency matrix summarising wins -#' and losses between pairs of items. -#' -#' For a \code{"rankings"} object based on N items, the adjacency matrix is an -#' N by N matrix, with element (i, j) being the number of times item i wins over -#' item j. For example, in the ranking \\{1\\} > \\{3, 4\\} > \\{2\\}, item 1 wins over -#' items 2, 3, and 4, and items 3 and 4 win over item 2. -#' -#' If \code{weights} is specified, the values in the adjacency matrix are the -#' weighted counts. -#' -#' @param object a \code{\link{rankings}} object, or an object that can be -#' coerced by \code{as.rankings}. -#' @param weights an optional vector of weights for the rankings. -#' @param ... further arguments passed to/from methods. -#' -#' @return An N by N matrix, where N is the number of items that can be ranked. -#' -#' @examples -#' X <- matrix(c(2, 1, 2, 1, 2, -#' 3, 2, 0, 0, 1, -#' 1, 0, 2, 2, 3), nrow = 3, byrow = TRUE) -#' X <- as.rankings(X) -#' adjacency(X) -#' -#' adjacency(X, weights = c(1, 1, 2)) -#' -#' @export -adjacency <- function(object, weights = NULL, ...){ - if (!inherits(object, "rankings")) object <- as.rankings(object) - N <- ncol(object) - if (is.null(weights)) { - weights <- rep.int(1L, nrow(object)) - } else stopifnot(length(weights) == nrow(object)) - nset <- apply(object, 1L, max) - m <- max(nset) - nm <- colnames(object) - X <- matrix(0.0, nrow = N, ncol = N, dimnames = list(nm, nm)) - for (i in 1L:m){ - r <- which(nset >= (i + 1L)) - for(j in r) { - one <- object[j,] == i - two <- object[j,] > i # > gives rest; == i + 1 gives next best - X[one, two] <- X[one, two] + weights[j] - } - } - structure(X, class = c("adjacency", "matrix")) -} +#' Create an Adjacency Matrix for a set of Rankings +#' +#' Convert a set of rankings to an adjacency matrix summarising wins +#' and losses between pairs of items. +#' +#' For a \code{"rankings"} object based on N items, the adjacency matrix is an +#' N by N matrix, with element (i, j) being the number of times item i wins over +#' item j. For example, in the ranking \\{1\\} > \\{3, 4\\} > \\{2\\}, +#' item 1 wins over items 2, 3, and 4, and items 3 and 4 win over item 2. +#' +#' If \code{weights} is specified, the values in the adjacency matrix are the +#' weighted counts. +#' +#' @param object a \code{\link{rankings}} object, or an object that can be +#' coerced by \code{as.rankings}. +#' @param weights an optional vector of weights for the rankings. +#' @param ... further arguments passed to/from methods. +#' +#' @return An N by N matrix, where N is the number of items that can be ranked. +#' +#' @examples +#' X <- matrix(c(2, 1, 2, 1, 2, +#' 3, 2, 0, 0, 1, +#' 1, 0, 2, 2, 3), nrow = 3, byrow = TRUE) +#' X <- as.rankings(X) +#' adjacency(X) +#' +#' adjacency(X, weights = c(1, 1, 2)) +#' +#' @export +adjacency <- function(object, weights = NULL, ...){ + if (!inherits(object, "rankings")) object <- as.rankings(object) + N <- ncol(object) + if (is.null(weights)) { + weights <- rep.int(1L, nrow(object)) + } else stopifnot(length(weights) == nrow(object)) + nset <- apply(object, 1L, max) + m <- max(nset) + nm <- colnames(object) + X <- matrix(0.0, nrow = N, ncol = N, dimnames = list(nm, nm)) + for (i in 1L:m){ + r <- which(nset >= (i + 1L)) + for(j in r) { + one <- object[j,] == i + two <- object[j,] > i # > gives rest; == i + 1 gives next best + X[one, two] <- X[one, two] + weights[j] + } + } + structure(X, class = c("adjacency", "matrix")) +} diff --git a/R/coef.R b/R/coef.R index 9405efc..a2f3de8 100644 --- a/R/coef.R +++ b/R/coef.R @@ -1,70 +1,70 @@ -#' Plackett-Luce Model Summaries -#' -#' Obtain the coefficients, model summary or coefficient variance-covariance -#' matrix for a model fitted by \code{PlackettLuce}. -#' -#' By default, parameters are returned on the log scale, as most suited for -#' inference. If \code{log = FALSE}, the worth parameters are returned, -#' constrained to sum to one so that they represent the probability that -#' the corresponding item comes first in a ranking of all items, given that -#' first place is not tied. -#' -#' The variance-covariance matrix is returned for the worth and tie parameters -#' on the log scale, with the reference as specified by \code{ref}. For models -#' estimated by maximum likelihood, the variance-covariance is the inverse of -#' the Fisher information of the log-likelihood. -#' -#' For models with a normal or gamma prior, the variance-covariance is based on -#' the Fisher information of the log-posterior. When adherence parameters have -#' been estimated, the log-posterior is not linear in the parameters. In this -#' case there is a difference between the expected and observed Fisher -#' information. By default, \code{vcov} will return the variance-covariance -#' based on the expected information, but \code{type} gives to option to use -#' the observed information instead. For large samples, the difference between -#' these options should be small. Note that the estimation of the adherence -#' parameters is accounted for in the computation of the variance-covariance -#' matrix, but only the sub-matrix corresponding to the worth and tie -#' parameters is estimated. -#' @param object An object of class "PlackettLuce" as returned by -#' \code{PlackettLuce}. -#' @param ref An integer or character string specifying the reference item (for -#' which log worth will be set to zero). If \code{NULL} the sum of the log worth -#' parameters is set to zero. -#' @param log A logical indicating whether to return parameters on the log scale -#' with the item specified by \code{ref} set to zero. -#' @param type For \code{coef}, the type of coefficients to return: one of -#' \code{"ties"}, \code{"worth"} or \code{"all"}. For \code{vcov}, the type of -#' Fisher information to base the estimation on: either \code{"expected"} or -#' \code{"observed"}. -#' @param ... additional arguments, passed to \code{vcov} by \code{summary}. -#' @name summaries -#' @export -coef.PlackettLuce <- function(object, ref = 1L, log = TRUE, - type = "all", ...){ - type <- match.arg(type, c("ties", "worth", "all")) - ncoefs <- length(object$coefficients) - id <- seq_len(ncoefs - length(object$ties) + 1L) - if (!log) { - # ignore ref here, always return probabilities - const <- sum(object$coefficients[id]) - coefs <- c(object$coefficients[id]/const, object$coefficients[-id]) - } else { - const <- mean(log(object$coefficients[id])[ref]) - item <- itempar.PlackettLuce(object, ref = ref, log = log, vcov = FALSE) - ref <- attr(item, "ref") - coefs <- c(item, log(object$coefficients[-id])) - } - cls <- c("coef.PlackettLuce", "numeric") - switch(type, - "ties" = return(coefs[-id]), - "worth" = return(structure(coefs[id], ref = ref, log = log, - const = const, class = cls)), - "all" = return(structure(coefs, ref = ref, log = log, - const = const, class = cls))) -} - -#' @method print coef.PlackettLuce -#' @export -print.coef.PlackettLuce <- function (x, ...) { - print.default(c(x)) -} +#' Plackett-Luce Model Summaries +#' +#' Obtain the coefficients, model summary or coefficient variance-covariance +#' matrix for a model fitted by \code{PlackettLuce}. +#' +#' By default, parameters are returned on the log scale, as most suited for +#' inference. If \code{log = FALSE}, the worth parameters are returned, +#' constrained to sum to one so that they represent the probability that +#' the corresponding item comes first in a ranking of all items, given that +#' first place is not tied. +#' +#' The variance-covariance matrix is returned for the worth and tie parameters +#' on the log scale, with the reference as specified by \code{ref}. For models +#' estimated by maximum likelihood, the variance-covariance is the inverse of +#' the Fisher information of the log-likelihood. +#' +#' For models with a normal or gamma prior, the variance-covariance is based on +#' the Fisher information of the log-posterior. When adherence parameters have +#' been estimated, the log-posterior is not linear in the parameters. In this +#' case there is a difference between the expected and observed Fisher +#' information. By default, \code{vcov} will return the variance-covariance +#' based on the expected information, but \code{type} gives to option to use +#' the observed information instead. For large samples, the difference between +#' these options should be small. Note that the estimation of the adherence +#' parameters is accounted for in the computation of the variance-covariance +#' matrix, but only the sub-matrix corresponding to the worth and tie +#' parameters is estimated. +#' @param object An object of class "PlackettLuce" as returned by +#' \code{PlackettLuce}. +#' @param ref An integer or character string specifying the reference item (for +#' which log worth will be set to zero). If \code{NULL} the sum of the log worth +#' parameters is set to zero. +#' @param log A logical indicating whether to return parameters on the log scale +#' with the item specified by \code{ref} set to zero. +#' @param type For \code{coef}, the type of coefficients to return: one of +#' \code{"ties"}, \code{"worth"} or \code{"all"}. For \code{vcov}, the type of +#' Fisher information to base the estimation on: either \code{"expected"} or +#' \code{"observed"}. +#' @param ... additional arguments, passed to \code{vcov} by \code{summary}. +#' @name summaries +#' @export +coef.PlackettLuce <- function(object, ref = 1L, log = TRUE, + type = "all", ...){ + type <- match.arg(type, c("ties", "worth", "all")) + ncoefs <- length(object$coefficients) + id <- seq_len(ncoefs - length(object$ties) + 1L) + if (!log) { + # ignore ref here, always return probabilities + const <- sum(object$coefficients[id]) + coefs <- c(object$coefficients[id]/const, object$coefficients[-id]) + } else { + const <- mean(log(object$coefficients[id])[ref]) + item <- itempar.PlackettLuce(object, ref = ref, log = log, vcov = FALSE) + ref <- attr(item, "ref") + coefs <- c(item, log(object$coefficients[-id])) + } + cls <- c("coef.PlackettLuce", "numeric") + switch(type, + "ties" = return(coefs[-id]), + "worth" = return(structure(coefs[id], ref = ref, log = log, + const = const, class = cls)), + "all" = return(structure(coefs, ref = ref, log = log, + const = const, class = cls))) +} + +#' @method print coef.PlackettLuce +#' @export +print.coef.PlackettLuce <- function (x, ...) { + print.default(c(x)) +} diff --git a/R/fit_adherence.R b/R/fit_adherence.R index 19fe6e2..99d76fe 100644 --- a/R/fit_adherence.R +++ b/R/fit_adherence.R @@ -1,121 +1,121 @@ -# fitting functions for Plackett-Luce model with worth and tie parameters fixed - -# Design loglik as brglm2::brglmFit -# log-likelihood and score functions -# Within optim or nlminb use obj and gr defined in main function - -# omit following constants from log-likelihood/log-posterior: -# contribution from tie in numerator: sum(B[-1]*log(delta)) -# contribution from normal prior: - 0.5*tcrossprod((log(alpha) - mu) %*% Rinv)[1] -# normalising constant from gamma prior: shape*log(rate) - log(gamma(shape)) -loglik_adherence <- function(adherence, shape, rate, wa, Z, fit) { - ## Z = sum(average log worth for each choice) - res <- sum(adherence*Z) - sum(fit$norm) - # prior on adherence - if (!is.null(shape)){ - res <- res + sum(wa*((shape - 1L)*log(adherence) - rate*adherence)) - } - res -} - -# derivatives of log-likelihood for adherence -score_adherence <- function(adherence, ranker, shape, rate, wa, Z, fit) { - res <- Z - rowsum(fit$score, ranker) - # prior on adherence - if (!is.null(shape)){ - res <- res + wa*((shape - 1L)/adherence - rate) - } - res -} - -# compuation of normalization constants and their derivative wrt adherence, -# per ranking -normalization <- function(alpha, # alpha par (worth) - delta, # delta par (tie) - a, # adherence for each ranking - d, # max tie order - P, # set sizes in representative rankings - R, # items in each ranking, from last to first place - G, # group of rankings to include; list for each P - W = NULL){ # weight of rankings; list for each P - nr <- length(a) # number of real rankings - score <- numeric(nr) - # ignore any pseudo-rankings (always pairs) - len <- lengths(G) - pseudo <- len[2L] > nr - if (pseudo) { - real <- G[[2L]] <= nr - len[2L] <- sum(real) - } - norm <- numeric(sum(len)) - z <- 1L - for (p in P){ - # D == 1 - ## numerator of score - if (p == 2L && pseudo) { - r <- G[[p]][real] - } else r <- G[[p]] - nr <- length(r) - item_id <- as.integer(R[r, 1L:p]) - x1 <- matrix(alpha[item_id], - nrow = nr, ncol = p) - y1 <- rowSums(x1^a[r]*log(x1)) - ## denominator of score = exp(contribution to norm constant) - z1 <- rowSums(x1^a[r]) - # D > 1 - e <- d[d <= p][-1] - if (length(e)){ - # index up to max(e) items: start with 1:n - i <- seq_len(max(e)) - # id = index to change next; id2 = first index changed - if (max(e) == p) { - id <- p - 1L - } else id <- max(e) - id2 <- 1L - repeat{ - # work along index vector from 1 to end/first index that = p - v1 <- alpha[R[r, i[1L]]] # ability for first ranked item - last <- i[id] == p - if (last) { - end <- id - } else end <- min(max(e), id + 1L) - for (k in 2L:end){ - # product of first k alphas indexed by i - v1 <- v1 * alpha[R[r, i[k]]] - # ignore if already recorded/tie order not observed - if (k < id2 | ! k %in% d) next - # add to numerators/denominators for sets of order p - v2 <- v1^(a[r]/k) - y1 <- y1 + delta[d == k]*v2*log(v1)/k - # add to denominators for sets - z1 <- z1 + delta[d == k]*v2 - } - # update index - if (i[1L] == (p - 1L)) break - if (last){ - id2 <- id - 1L - v <- i[id2] - len <- min(p - 2L - v, max(e) - id2) - id <- id2 + len - i[id2:id] <- v + seq_len(len + 1L) - } else { - id2 <- id - i[id] <- i[id] + 1L - } - } - } - # add contribution for sets of size s to norm constant/scores - if (!is.null(W)){ - if (p == 2L && pseudo) { - w <- W[[p]][real] - } else w <- W[[p]] - norm[z:(z + nr - 1L)] <- norm[z:(z + nr - 1L)] + w * log(z1) - score[r] <- score[r] + w * y1/z1 - } else { - norm[z:(z + nr - 1L)] <- norm[z:(z + nr - 1L)] + log(z1) - score[r] <- score[r] + y1/z1 - } - z <- z + nr - } - list(norm = norm, score = score) -} +# fitting functions for Plackett-Luce model with worth and tie parameters fixed + +# Design loglik as brglm2::brglmFit +# log-likelihood and score functions +# Within optim or nlminb use obj and gr defined in main function + +# omit following constants from log-likelihood/log-posterior: +# contribution from tie in numerator: sum(B[-1]*log(delta)) +# contribution from normal prior: -0.5*tcrossprod((log(alpha) - mu) %*% Rinv)[1] +# normalising constant from gamma prior: shape*log(rate) - log(gamma(shape)) +loglik_adherence <- function(adherence, shape, rate, wa, Z, fit) { + ## Z = sum(average log worth for each choice) + res <- sum(adherence*Z) - sum(fit$norm) + # prior on adherence + if (!is.null(shape)){ + res <- res + sum(wa*((shape - 1L)*log(adherence) - rate*adherence)) + } + res +} + +# derivatives of log-likelihood for adherence +score_adherence <- function(adherence, ranker, shape, rate, wa, Z, fit) { + res <- Z - rowsum(fit$score, ranker) + # prior on adherence + if (!is.null(shape)){ + res <- res + wa*((shape - 1L)/adherence - rate) + } + res +} + +# compuation of normalization constants and their derivative wrt adherence, +# per ranking +normalization <- function(alpha, # alpha par (worth) + delta, # delta par (tie) + a, # adherence for each ranking + d, # max tie order + P, # set sizes in representative rankings + R, # items in each ranking, from last to first place + G, # group of rankings to include; list for each P + W = NULL){ # weight of rankings; list for each P + nr <- length(a) # number of real rankings + score <- numeric(nr) + # ignore any pseudo-rankings (always pairs) + len <- lengths(G) + pseudo <- len[2L] > nr + if (pseudo) { + real <- G[[2L]] <= nr + len[2L] <- sum(real) + } + norm <- numeric(sum(len)) + z <- 1L + for (p in P){ + # D == 1 + ## numerator of score + if (p == 2L && pseudo) { + r <- G[[p]][real] + } else r <- G[[p]] + nr <- length(r) + item_id <- as.integer(R[r, 1L:p]) + x1 <- matrix(alpha[item_id], + nrow = nr, ncol = p) + y1 <- rowSums(x1^a[r]*log(x1)) + ## denominator of score = exp(contribution to norm constant) + z1 <- rowSums(x1^a[r]) + # D > 1 + e <- d[d <= p][-1] + if (length(e)){ + # index up to max(e) items: start with 1:n + i <- seq_len(max(e)) + # id = index to change next; id2 = first index changed + if (max(e) == p) { + id <- p - 1L + } else id <- max(e) + id2 <- 1L + repeat{ + # work along index vector from 1 to end/first index that = p + v1 <- alpha[R[r, i[1L]]] # ability for first ranked item + last <- i[id] == p + if (last) { + end <- id + } else end <- min(max(e), id + 1L) + for (k in 2L:end){ + # product of first k alphas indexed by i + v1 <- v1 * alpha[R[r, i[k]]] + # ignore if already recorded/tie order not observed + if (k < id2 | ! k %in% d) next + # add to numerators/denominators for sets of order p + v2 <- v1^(a[r]/k) + y1 <- y1 + delta[d == k]*v2*log(v1)/k + # add to denominators for sets + z1 <- z1 + delta[d == k]*v2 + } + # update index + if (i[1L] == (p - 1L)) break + if (last){ + id2 <- id - 1L + v <- i[id2] + len <- min(p - 2L - v, max(e) - id2) + id <- id2 + len + i[id2:id] <- v + seq_len(len + 1L) + } else { + id2 <- id + i[id] <- i[id] + 1L + } + } + } + # add contribution for sets of size s to norm constant/scores + if (!is.null(W)){ + if (p == 2L && pseudo) { + w <- W[[p]][real] + } else w <- W[[p]] + norm[z:(z + nr - 1L)] <- norm[z:(z + nr - 1L)] + w * log(z1) + score[r] <- score[r] + w * y1/z1 + } else { + norm[z:(z + nr - 1L)] <- norm[z:(z + nr - 1L)] + log(z1) + score[r] <- score[r] + y1/z1 + } + z <- z + nr + } + list(norm = norm, score = score) +} diff --git a/R/fit_common.R b/R/fit_common.R index 49bd6af..58d9ef5 100644 --- a/R/fit_common.R +++ b/R/fit_common.R @@ -1,195 +1,197 @@ -# fitting functions for Plackett-Luce model with adherence fixed --------------- - -# Design loglik as brglm2::brglmFit -# log-likelihood and score functions -# Within optim or nlminb use obj and gr wrappers defined in main function - -# full logposterior when estimating adherence (may not have prior on mu) -logp_full <- function(alpha, delta, adherence, - mu, Rinv, shape, rate, wa, A, B, theta){ - res <- sum(B[-1L]*log(delta)) + sum(A*log(alpha))- sum(theta) - # prior on mu - if (!is.null(mu)){ - # -0.5 * (s - mu)^T Sigma^{-1} (s - mu) + standard logL - res <- res - 0.5*tcrossprod((log(alpha) - mu) %*% Rinv)[1L] - } - # prior on adherence - always if estimating adherence - if (!is.null(shape)){ - res <- res + sum(wa*((shape - 1L)*log(adherence) - rate*adherence)) - } - res -} - -score_full <- function(alpha, delta, adherence, ranker, - mu, Kinv, shape, rate, wa, A, B, Z, expA, expB, score){ - a <- A/alpha - expA/alpha - b <- B[-1L]/delta - expB - if (length(adherence)){ - z <- Z - rowsum(score, ranker)[, 1L] - # prior on adherence - if (!is.null(shape)){ - z <- z + wa*((shape - 1L)/adherence - rate) - } - } else z <- numeric(0) - # prior on log-worth - if (!is.null(mu)){ - a <- a - Kinv %*% (log(alpha) - mu)/alpha - } - c(a, b, z) -} - -# omit following constants from log-likelihood/log-posterior: -# contribution from tie in numerator: sum(B[-1]*log(delta)) -# contribution from normal prior: - 0.5*tcrossprod((log(alpha) - mu) %*% Rinv)[1L] -# normalising constant from gamma prior: shape*log(rate) - log(gamma(shape)) -loglik_common <- function(par, N, mu, Rinv, A, B, fit){ - alpha <- par[1L:N] - delta <- par[-c(1L:N)] - res <- sum(B[-1L]*log(delta)) + sum(A*log(alpha))- sum(fit$theta) - if (is.null(mu)) return(res) - # -0.5 * (s - mu)^T Sigma^{-1} (s - mu) + standard logL - res - 0.5*tcrossprod((log(alpha) - mu) %*% Rinv)[1L] -} - -# derivatives of log-likelihood for common parameters (worth, tie) -score_common <- function(par, N, mu, Kinv, A, B, fit) { - alpha <- par[1L:N] - delta <- par[-c(1L:N)] - res <- c(A/alpha - fit$expA/alpha, B[-1L]/delta - fit$expB) - if (is.null(mu)) return(res) - # deriv first part wrt alpha (s) : [-Sigma^{-1} (s - mu)]/alpha - res[1L:N] <- res[1L:N] - Kinv %*% (log(alpha) - mu)/alpha - res -} - -# function to compute expectations of the sufficient statistics of the alphas/deltas -# (for fixed adherence) if ranking weight is NULL, do not aggregate across rankings -expectation <- function(par, # par to compute expectations for - alpha, # alpha par (worth) - delta, # delta par (tie) - a, # adherence for each ranking - N, # number of objects - d, # observed tie orders - P, # set sizes in representative rankings - R, # items in each ranking, from last to first place - G, # group of rankings to include; list for each P - W = NULL){ # weight of rankings; list for each P - D <- length(d) - keepAlpha <- any(par %in% c("alpha", "all")) - keepDelta <- D > 1L && any(par %in% c("delta", "all")) - keepTheta <- any(par %in% c("theta", "all")) - expA <- expB <- theta <- NULL - if (keepAlpha) { - if (!is.null(W)) { - expA <- numeric(N) - } else expA <- matrix(0.0, nrow = nrow(R), ncol = N) - } - if (keepDelta) { - if (!is.null(W)) { - expB <- numeric(D - 1L) - } else expB <- matrix(0.0, nrow = nrow(R), ncol = D - 1L) - } - if (keepTheta) theta <- numeric(sum(lengths(G[P]))) - z <- 1L - for (p in P){ - # D == 1 - ## numerators (for expA, else just to compute denominators) - r <- G[[p]] - nr <- length(r) - item_id <- as.integer(R[r, 1L:p]) - x1 <- matrix(alpha[item_id], - nrow = nr, ncol = p) - if (!is.null(a)) x1 <- x1^a[r] - ## denominators - z1 <- rowSums(x1) - # D > 1 - e <- d[d <= p][-1] - if (length(e)){ - if (keepDelta) - y1 <- matrix(0.0, nrow = nr, ncol = length(e)) - # index up to max(e) items: start with 1:n - i <- seq_len(max(e)) - # id = index to change next; id2 = first index changed - if (max(e) == p) { - id <- p - 1L - } else id <- max(e) - id2 <- 1L - repeat{ - # work along index vector from 1 to end/first index that = p - v1 <- alpha[R[r, i[1L]]] # ability for first ranked item - last <- i[id] == p - if (last) { - end <- id - } else end <- min(max(e), id + 1L) - for (k in 2L:end){ - # product of first k alphas indexed by i - v1 <- v1 * alpha[R[r, i[k]]] - # ignore if already recorded/tie order not observed - if (k < id2 | ! k %in% d) next - # add to numerators/denominators for sets of order p - if (!is.null(a)) { - v2 <- v1^(a[r]/k) - } else v2 <- v1^(1L/k) - v3 <- delta[d == k]*v2 - if (keepAlpha) { - # add to numerators for objects in sets - x1[, i[1L:k]] <- x1[, i[1L:k]] + v3/k - } - if (keepDelta) { - # add to numerator for current tie order for sets - y1[, which(d == k) - 1L] <- y1[, which(d == k) - 1L] + v2 - } - # add to denominators for sets - z1 <- z1 + v3 - } - # update index - if (i[1L] == (p - 1L)) break - if (last){ - id2 <- id - 1L - v <- i[id2] - len <- min(p - 2L - v, max(e) - id2) - id <- id2 + len - i[id2:id] <- v + seq_len(len + 1L) - } else { - id2 <- id - i[id] <- i[id] + 1L - } - } - } - # add contribution for sets of size s to expectation - if (keepAlpha){ - # R[r, 1:s] may only index some alphas - if (!is.null(a)) x1 <- a[r] * x1 - if (!is.null(W)){ - id <- unique(item_id) - add <- drop(rowsum(as.vector(W[[p]] * x1/z1), - item_id, reorder = FALSE)) - expA[id] <- expA[id] + add - } else { - id <- cbind(r, item_id) - expA[id] <- expA[id] + c(x1/z1) - } - } - if (keepDelta && length(e)){ - if (!is.null(W)){ - expB[seq_along(e)] <- expB[seq_along(e)] + colSums(W[[p]] * y1/z1) - } else expB[r, seq_along(e)] <- expB[r, seq_along(e)] + y1/z1 - } - if (keepTheta){ - if (par == "all"){ - # return logtheta - if (!is.null(W)){ - theta[z:(z + nr - 1L)] <- W[[p]] * log(z1) - } else theta[z:(z + nr - 1L)] <- log(z1) - } else { - if (!is.null(W)){ - theta[z:(z + nr - 1L)] <- W[[p]] * z1 - } else theta[z:(z + nr - 1L)] <- z1 - } - z <- z + nr - } - } - list(expA = if (keepAlpha) expA, expB = if (keepDelta) expB, - theta = if (keepTheta) theta) -} - +# fitting functions for Plackett-Luce model with adherence fixed --------------- + +# Design loglik as brglm2::brglmFit +# log-likelihood and score functions +# Within optim or nlminb use obj and gr wrappers defined in main function + +# full logposterior when estimating adherence (may not have prior on mu) +logp_full <- function(alpha, delta, adherence, + mu, Rinv, shape, rate, wa, A, B, theta){ + res <- sum(B[-1L]*log(delta)) + sum(A*log(alpha))- sum(theta) + # prior on mu + if (!is.null(mu)){ + # -0.5 * (s - mu)^T Sigma^{-1} (s - mu) + standard logL + res <- res - 0.5*tcrossprod((log(alpha) - mu) %*% Rinv)[1L] + } + # prior on adherence - always if estimating adherence + if (!is.null(shape)){ + res <- res + sum(wa*((shape - 1L)*log(adherence) - rate*adherence)) + } + res +} + +score_full <- function(alpha, delta, adherence, ranker, + mu, Kinv, shape, rate, wa, A, B, Z, expA, expB, score){ + a <- A/alpha - expA/alpha + b <- B[-1L]/delta - expB + if (length(adherence)){ + z <- Z - rowsum(score, ranker)[, 1L] + # prior on adherence + if (!is.null(shape)){ + z <- z + wa*((shape - 1L)/adherence - rate) + } + } else z <- numeric(0) + # prior on log-worth + if (!is.null(mu)){ + a <- a - Kinv %*% (log(alpha) - mu)/alpha + } + c(a, b, z) +} + +# omit following constants from log-likelihood/log-posterior: +# contribution from tie in numerator: sum(B[-1]*log(delta)) +# contribution from normal prior: -0.5*tcrossprod((log(alpha) - mu) %*% Rinv)[1] +# normalising constant from gamma prior: shape*log(rate) - log(gamma(shape)) +loglik_common <- function(par, N, mu, Rinv, A, B, fit){ + alpha <- par[1L:N] + delta <- par[-c(1L:N)] + res <- sum(B[-1L]*log(delta)) + sum(A*log(alpha))- sum(fit$theta) + if (is.null(mu)) return(res) + # -0.5 * (s - mu)^T Sigma^{-1} (s - mu) + standard logL + res - 0.5*tcrossprod((log(alpha) - mu) %*% Rinv)[1L] +} + +# derivatives of log-likelihood for common parameters (worth, tie) +score_common <- function(par, N, mu, Kinv, A, B, fit) { + alpha <- par[1L:N] + delta <- par[-c(1L:N)] + res <- c(A/alpha - fit$expA/alpha, B[-1L]/delta - fit$expB) + if (is.null(mu)) return(res) + # deriv first part wrt alpha (s) : [-Sigma^{-1} (s - mu)]/alpha + res[1L:N] <- res[1L:N] - Kinv %*% (log(alpha) - mu)/alpha + res +} + +# compute expectations of the sufficient statistics of the alphas/deltas +# (for fixed adherence) if ranking weight is NULL, do not aggregate rankings +expectation <- function(par, # par to compute expectations for + alpha, # alpha par (worth) + delta, # delta par (tie) + a, # adherence for each ranking + N, # number of objects + d, # observed tie orders + P, # set sizes in representative rankings + R, # items in each ranking, from last to first place + G, # group of rankings to include; list for each P + W = NULL){ # weight of rankings; list for each P + D <- length(d) + keepAlpha <- any(par %in% c("alpha", "all")) + keepDelta <- D > 1L && any(par %in% c("delta", "all")) + keepTheta <- any(par %in% c("theta", "all")) + expA <- expB <- theta <- NULL + if (keepAlpha) { + if (!is.null(W)) { + expA <- numeric(N) + } else expA <- matrix(0.0, nrow = nrow(R), ncol = N) + } + if (keepDelta) { + if (!is.null(W)) { + expB <- numeric(D - 1L) + } else expB <- matrix(0.0, nrow = nrow(R), ncol = D - 1L) + } + if (keepTheta) theta <- numeric(sum(lengths(G[P]))) + z <- 1L + for (p in P){ + # D == 1 + ## numerators (for expA, else just to compute denominators) + r <- G[[p]] + nr <- length(r) + item_id <- as.integer(R[r, 1L:p]) + x1 <- matrix(alpha[item_id], + nrow = nr, ncol = p) + if (!is.null(a)) x1 <- x1^a[r] + ## denominators + z1 <- rowSums(x1) + # D > 1 + e <- d[d <= p][-1] + if (length(e)){ + if (keepDelta) + y1 <- matrix(0.0, nrow = nr, ncol = length(e)) + # index up to max(e) items: start with 1:n + i <- seq_len(max(e)) + # id = index to change next; id2 = first index changed + if (max(e) == p) { + id <- p - 1L + } else id <- max(e) + id2 <- 1L + repeat{ + # work along index vector from 1 to end/first index that = p + v1 <- alpha[R[r, i[1L]]] # ability for first ranked item + last <- i[id] == p + if (last) { + end <- id + } else end <- min(max(e), id + 1L) + for (k in 2L:end){ + # product of first k alphas indexed by i + v1 <- v1 * alpha[R[r, i[k]]] + # ignore if already recorded/tie order not observed + if (k < id2 | ! k %in% d) next + # add to numerators/denominators for sets of order p + if (!is.null(a)) { + v2 <- v1^(a[r]/k) + } else v2 <- v1^(1L/k) + v3 <- delta[d == k]*v2 + if (keepAlpha) { + # add to numerators for objects in sets + x1[, i[1L:k]] <- x1[, i[1L:k]] + v3/k + } + if (keepDelta) { + # add to numerator for current tie order for sets + y1[, which(d == k) - 1L] <- + y1[, which(d == k) - 1L] + v2 + } + # add to denominators for sets + z1 <- z1 + v3 + } + # update index + if (i[1L] == (p - 1L)) break + if (last){ + id2 <- id - 1L + v <- i[id2] + len <- min(p - 2L - v, max(e) - id2) + id <- id2 + len + i[id2:id] <- v + seq_len(len + 1L) + } else { + id2 <- id + i[id] <- i[id] + 1L + } + } + } + # add contribution for sets of size s to expectation + if (keepAlpha){ + # R[r, 1:s] may only index some alphas + if (!is.null(a)) x1 <- a[r] * x1 + if (!is.null(W)){ + id <- unique(item_id) + add <- drop(rowsum(as.vector(W[[p]] * x1/z1), + item_id, reorder = FALSE)) + expA[id] <- expA[id] + add + } else { + id <- cbind(r, item_id) + expA[id] <- expA[id] + c(x1/z1) + } + } + if (keepDelta && length(e)){ + if (!is.null(W)){ + expB[seq_along(e)] <- expB[seq_along(e)] + + colSums(W[[p]] * y1/z1) + } else expB[r, seq_along(e)] <- expB[r, seq_along(e)] + y1/z1 + } + if (keepTheta){ + if (par == "all"){ + # return logtheta + if (!is.null(W)){ + theta[z:(z + nr - 1L)] <- W[[p]] * log(z1) + } else theta[z:(z + nr - 1L)] <- log(z1) + } else { + if (!is.null(W)){ + theta[z:(z + nr - 1L)] <- W[[p]] * z1 + } else theta[z:(z + nr - 1L)] <- z1 + } + z <- z + nr + } + } + list(expA = if (keepAlpha) expA, expB = if (keepDelta) expB, + theta = if (keepTheta) theta) +} + diff --git a/R/fitted.R b/R/fitted.R index ebcc31e..1998604 100644 --- a/R/fitted.R +++ b/R/fitted.R @@ -1,142 +1,142 @@ -#' Fitted Probabilities for PlackettLuce Objects -#' -#' Fitted probabilities for all choice/alternative combinations in the data. -#' -#' @param object an object as returned by -#' \code{\link{PlackettLuce}} or \code{\link{pltree}}. -#' @param aggregate logical; if \code{TRUE} observations of the same choice from -#' the same set of alternatives are aggregated. -#' @param free logical; if \code{TRUE} only free choices are included, i.e. -#' choices of one item from a set of one item are excluded. -#' @param ... further arguments, currently ignored. -#' @return A list with the following components -#' \item{choices}{The selected item(s).} -#' \item{alternatives}{The set of item(s) that the choice was made from.} -#' \item{ranking}{The ranking(s) including this choice.} -#' \item{n}{The weighted count of rankings including this -#' choice (equal to the ranking weight if \code{aggregate = FALSE}.} -#' \item{fitted}{The fitted probability of making this choice.} -#' If \code{object} was a \code{"pltree"} object, the list has an -#' additional element, \code{node}, specifying which node the ranking corresponds -#' to. -#' @seealso \code{\link{choices}} -#' @examples -#' R <- matrix(c(1, 2, 0, 0, -#' 4, 1, 2, 3, -#' 2, 1, 1, 1, -#' 1, 2, 3, 0, -#' 2, 1, 1, 0, -#' 1, 0, 3, 2), nrow = 6, byrow = TRUE) -#' colnames(R) <- c("apple", "banana", "orange", "pear") -#' -#' mod <- PlackettLuce(R) -#' fit <- fitted(mod) -#' fit -#' @importFrom stats xtabs -#' @export -fitted.PlackettLuce <- function(object, aggregate = TRUE, free = TRUE, ...) { - # get choices and alternatives for each ranking - choices <- choices(object$rankings, names = FALSE) - # get parameters - d <- object$ties - id <- seq(length(object$coefficients) - length(d) + 1L) - alpha <- object$coefficients[id] - delta <- rep.int(0L, max(d)) - delta[d] <- c(1.0, unname(object$coefficients[-id])) - # if free = TRUE, ignore forced choice (choice of 1) - if (free) choices <- choices[lengths(choices$alternatives) != 1L,] - # id unique choices - unique_choices <- unique(choices$choices) - g <- match(choices$choices, unique_choices) - # compute numerators - if (!is.null(object$adherence)){ - n <- lengths(choices$choices) - a <- rep(object$adherence[object$ranker], tabulate(choices$ranking)) - numerator <- delta[n] * - (vapply(unique_choices, function(x) prod(alpha[x]), 1.0))[g]^a/n - } else { - n <- lengths(unique_choices) - a <- NULL - numerator <- (delta[n] * - vapply(unique_choices, function(x) prod(alpha[x]), 1.0)^(1L/n))[g] - } - # id unique alternatives - size <- lengths(choices$alternatives) - ord <- order(size) - if (!is.null(object$adherence)){ - # don't group - unique_alternatives <- choices$alternatives - } else unique_alternatives <- unique(choices$alternatives[ord]) - # for now work theta out - could perhaps save in object - na <- lengths(unique_alternatives) - R <- matrix(0L, nrow = length(na), ncol = max(na)) - R[cbind(rep(seq_along(unique_alternatives), na), - sequence(na))] <- unlist(unique_alternatives) - G <- seq_along(unique_alternatives) - G <- lapply(seq_len(max(na)), function(i) G[na == i]) - S <- unique(na) - if (free) S <- setdiff(S, 1L) - N <- ncol(object$rankings) - theta <- expectation("theta", alpha, delta[d], a, N, d, S, R, G)$theta - if (!is.null(object$adherence)){ - denominator <- theta[order(unlist(G[S]))] - } else { - denominator <- numeric(length(numerator)) - h <- match(choices$alternatives, unique_alternatives) - denominator <- theta[h] - } - choices$fitted <- numerator/denominator - choices$n <- as.integer(object$weights[unlist(choices$ranking)]) - if (inherits(object$na.action, "exclude")){ - n_miss <- length(object$na.action) - ranking <- seq_len(max(choices$ranking) + n_miss)[-object$na.action] - ranking <- c(ranking[choices$ranking], object$na.action) - id <- order(ranking) - pad <- rep(NA_integer_, n_miss) - old_choices <- choices - choices <- data.frame(matrix(NA, nrow = length(ranking), ncol = 0)) - choices$choices<- c(old_choices$choices, pad)[id] - choices$alternatives <- c(old_choices$alternatives, pad)[id] - choices$ranking <- ranking[id] - choices$fitted <- c(old_choices$fitted, pad)[id] - choices$n <- c(old_choices$n, pad)[id] - g <- c(g, pad)[id] - h <- c(h, pad)[id] - } else choices <- as.data.frame(choices) - if (aggregate){ - if (!is.null(object$adherence)) { - warning("`aggregate` ignored when `object$adherence` is not `NULL`") - return(choices) - } - g <- paste(g, h, sep = ":") - g <- match(g, unique(g)) - keep <- !duplicated(g) - choices$n[keep] <- unname(rowsum(choices$n, g)[,1]) - choices$ranking[keep] <- split(choices$ranking, g) - choices <- choices[keep,] - class(choices) <- c("aggregated_choices", "data.frame") - } - rownames(choices) <- NULL - choices -} - -#' @rdname fitted.PlackettLuce -#' @method fitted pltree -#' @importFrom partykit nodeapply refit.modelparty -#' @export -fitted.pltree <- function(object, aggregate = TRUE, free = TRUE, ...) { - node <- predict.pltree(object, type = "node") - ids <- nodeids(object, terminal = TRUE) - if ("object" %in% object$info$control$terminal) { - fit <- nodeapply(object, ids, - function(n) fitted.PlackettLuce(info_node(n)$object)) - } else { - fit <- lapply(refit.modelparty(object, ids, drop = FALSE), - fitted.PlackettLuce) - } - # combine fitted from each node - n <- vapply(fit, function(x) length(x[[1L]]), 1.0) - fit <- do.call(Map, c(c, fit)) - fit$node <- rep.int(ids, n) - fit -} +#' Fitted Probabilities for PlackettLuce Objects +#' +#' Fitted probabilities for all choice/alternative combinations in the data. +#' +#' @param object an object as returned by +#' \code{\link{PlackettLuce}} or \code{\link{pltree}}. +#' @param aggregate logical; if \code{TRUE} observations of the same choice from +#' the same set of alternatives are aggregated. +#' @param free logical; if \code{TRUE} only free choices are included, i.e. +#' choices of one item from a set of one item are excluded. +#' @param ... further arguments, currently ignored. +#' @return A list with the following components +#' \item{choices}{The selected item(s).} +#' \item{alternatives}{The set of item(s) that the choice was made from.} +#' \item{ranking}{The ranking(s) including this choice.} +#' \item{n}{The weighted count of rankings including this +#' choice (equal to the ranking weight if \code{aggregate = FALSE}.} +#' \item{fitted}{The fitted probability of making this choice.} +#' If \code{object} was a \code{"pltree"} object, the list has an +#' additional element, \code{node}, specifying which node the ranking +#' corresponds to. +#' @seealso \code{\link{choices}} +#' @examples +#' R <- matrix(c(1, 2, 0, 0, +#' 4, 1, 2, 3, +#' 2, 1, 1, 1, +#' 1, 2, 3, 0, +#' 2, 1, 1, 0, +#' 1, 0, 3, 2), nrow = 6, byrow = TRUE) +#' colnames(R) <- c("apple", "banana", "orange", "pear") +#' +#' mod <- PlackettLuce(R) +#' fit <- fitted(mod) +#' fit +#' @importFrom stats xtabs +#' @export +fitted.PlackettLuce <- function(object, aggregate = TRUE, free = TRUE, ...) { + # get choices and alternatives for each ranking + choices <- choices(object$rankings, names = FALSE) + # get parameters + d <- object$ties + id <- seq(length(object$coefficients) - length(d) + 1L) + alpha <- object$coefficients[id] + delta <- rep.int(0L, max(d)) + delta[d] <- c(1.0, unname(object$coefficients[-id])) + # if free = TRUE, ignore forced choice (choice of 1) + if (free) choices <- choices[lengths(choices$alternatives) != 1L,] + # id unique choices + unique_choices <- unique(choices$choices) + g <- match(choices$choices, unique_choices) + # compute numerators + if (!is.null(object$adherence)){ + n <- lengths(choices$choices) + a <- rep(object$adherence[object$ranker], tabulate(choices$ranking)) + numerator <- delta[n] * + (vapply(unique_choices, function(x) prod(alpha[x]), 1.0))[g]^a/n + } else { + n <- lengths(unique_choices) + a <- NULL + numerator <- (delta[n] * + vapply(unique_choices, function(x) prod(alpha[x]), 1.0)^(1L/n))[g] + } + # id unique alternatives + size <- lengths(choices$alternatives) + ord <- order(size) + if (!is.null(object$adherence)){ + # don't group + unique_alternatives <- choices$alternatives + } else unique_alternatives <- unique(choices$alternatives[ord]) + # for now work theta out - could perhaps save in object + na <- lengths(unique_alternatives) + R <- matrix(0L, nrow = length(na), ncol = max(na)) + R[cbind(rep(seq_along(unique_alternatives), na), + sequence(na))] <- unlist(unique_alternatives) + G <- seq_along(unique_alternatives) + G <- lapply(seq_len(max(na)), function(i) G[na == i]) + S <- unique(na) + if (free) S <- setdiff(S, 1L) + N <- ncol(object$rankings) + theta <- expectation("theta", alpha, delta[d], a, N, d, S, R, G)$theta + if (!is.null(object$adherence)){ + denominator <- theta[order(unlist(G[S]))] + } else { + denominator <- numeric(length(numerator)) + h <- match(choices$alternatives, unique_alternatives) + denominator <- theta[h] + } + choices$fitted <- numerator/denominator + choices$n <- as.integer(object$weights[unlist(choices$ranking)]) + if (inherits(object$na.action, "exclude")){ + n_miss <- length(object$na.action) + ranking <- seq_len(max(choices$ranking) + n_miss)[-object$na.action] + ranking <- c(ranking[choices$ranking], object$na.action) + id <- order(ranking) + pad <- rep(NA_integer_, n_miss) + old_choices <- choices + choices <- data.frame(matrix(NA, nrow = length(ranking), ncol = 0)) + choices$choices<- c(old_choices$choices, pad)[id] + choices$alternatives <- c(old_choices$alternatives, pad)[id] + choices$ranking <- ranking[id] + choices$fitted <- c(old_choices$fitted, pad)[id] + choices$n <- c(old_choices$n, pad)[id] + g <- c(g, pad)[id] + h <- c(h, pad)[id] + } else choices <- as.data.frame(choices) + if (aggregate){ + if (!is.null(object$adherence)) { + warning("`aggregate` ignored when `object$adherence` is not `NULL`") + return(choices) + } + g <- paste(g, h, sep = ":") + g <- match(g, unique(g)) + keep <- !duplicated(g) + choices$n[keep] <- unname(rowsum(choices$n, g)[,1]) + choices$ranking[keep] <- split(choices$ranking, g) + choices <- choices[keep,] + class(choices) <- c("aggregated_choices", "data.frame") + } + rownames(choices) <- NULL + choices +} + +#' @rdname fitted.PlackettLuce +#' @method fitted pltree +#' @importFrom partykit nodeapply refit.modelparty +#' @export +fitted.pltree <- function(object, aggregate = TRUE, free = TRUE, ...) { + node <- predict.pltree(object, type = "node") + ids <- nodeids(object, terminal = TRUE) + if ("object" %in% object$info$control$terminal) { + fit <- nodeapply(object, ids, + function(n) fitted.PlackettLuce(info_node(n)$object)) + } else { + fit <- lapply(refit.modelparty(object, ids, drop = FALSE), + fitted.PlackettLuce) + } + # combine fitted from each node + n <- vapply(fit, function(x) length(x[[1L]]), 1.0) + fit <- do.call(Map, c(c, fit)) + fit$node <- rep.int(ids, n) + fit +} diff --git a/R/itempar.R b/R/itempar.R index c2cf096..08759e4 100644 --- a/R/itempar.R +++ b/R/itempar.R @@ -1,127 +1,127 @@ -#' Extract Item Parameters of Plackett-Luce Models -#' -#' Methods for \code{\link[psychotools]{itempar}} to extract the item -#' parameters (abilities or log-abilities) from a Plackett-Luce model or tree. -#' In the case of a tree, item parameters are extracted for each terminal node. -#' -#' @param object a fitted model object as returned by -#' \code{\link{PlackettLuce}} or \code{\link{pltree}}. -#' @param ref a vector of labels or position indices of item parameters which -#' should be used as restriction/for normalization. If \code{NULL} -#' (the default), all items are used with a zero sum (\code{log = TRUE}) or -#' unit sum (\code{log = FALSE}) constraint. -#' @param alias logical. If \code{TRUE} (the default), the aliased parameter is -#' included in the return vector (and in the variance-covariance matrix if -#' \code{vcov = TRUE}). If \code{FALSE}, it is removed. If the restriction given -#' in ref depends on several parameters, the first parameter of the restriction -#' specified is (arbitrarily) chosen to be removed if alias is \code{FALSE}. -#' @param vcov logical. If \code{TRUE} (the default), the (transformed) -#' variance-covariance matrix of the item parameters is attached as attribute -#' \code{vcov}. If \code{FALSE}, a \code{NA}-matrix is attached. -#' @param log logical. Whether to return log-abilities (\code{TRUE}) or -#' abilities (\code{FALSE}). -#' @param ... further arguments which are currently not used. -#' @return An object of class \code{"itempar"}, see -#' \code{\link[psychotools]{itempar}}. -#' @examples -#' R <- matrix(c(1, 2, 0, 0, -#' 4, 1, 2, 3, -#' 2, 1, 1, 1, -#' 1, 2, 3, 0, -#' 2, 1, 1, 0, -#' 1, 0, 3, 2), nrow = 6, byrow = TRUE) -#' colnames(R) <- c("apple", "banana", "orange", "pear") -#' -#' mod <- PlackettLuce(R) -#' coef(mod) -#' -#' # equivalent to default coefficients, i.e. log abilities -#' itempar(mod, ref= 1, log = TRUE) -#' -#' # abilities, normalized so abilities for apple and pear sum to 1 -#' itempar(mod, ref = 1:2) -#' -#' @method itempar PlackettLuce -#' @export -itempar.PlackettLuce <- function(object, ref = NULL, alias = TRUE, vcov = TRUE, - log = FALSE, ...){ - item <- seq_len(length(object$coefficients) - length(object$ties) + 1L) - coefs <- log(object$coefficients[item]) - object_names <- names(coefs) - n <- length(coefs) - id <- seq_len(n)[!is.na(coefs)] - # set reference to one or more indices - if (is.null(ref)) { - ref <- id - } else if (is.vector(ref)){ - if (any(ref %in% object_names)) ref <- match(ref, object_names) - if (!all(ref %in% id)) - stop("Could not match 'ref' to estimable coefficients") - } else if (is.matrix(ref)) { - stop("Handling of contrast matrices in argument 'ref' currently not ", - "implemented for itempar.PlackettLuce().") - } else stop("Argument 'ref' is misspecified (see ?itempar for possible ", - "values).") - ref <- match(ref, id) - # define parameters - if (log){ - # based on contrasts - D <- diag(length(id)) - D[, ref] <- D[, ref] - 1L/length(ref) - coefs[id] <- as.vector(D %*% coefs[id]) - } else { - # constrained so sum of ref = 1 - alpha <- exp(coefs[id]) - denom <- sum(alpha[ref]) - coefs[id] <- alpha/denom - } - # define vcov - if (vcov){ - if (log) { - # vcov of contrasts - V <- D %*% vcov(object)[id, id] %*% t(D) - } else { - # vcov of exp(coefs) - V <- diag(alpha) %*% vcov(object)[id, id] %*% diag(alpha) - # partial derivatives of scaled exp(coefs) wrt exp(coefs) - D <- array(dim = dim(V)) - nonref <- setdiff(seq_along(id), ref) - if (length(nonref)){ - D[, nonref] <- 0 - D[cbind(nonref, nonref)] <- 1L/denom - } - D[, ref] <- -alpha/denom^2 - D[cbind(ref, ref)] <- 1L/denom + D[cbind(ref, ref)] - # vcov of scaled exp coefs - V <- D %*% V %*% t(D) - } - dimnames(V) <- list(object_names[id], object_names[id]) - } - # remove aliased parameter if required - if (!alias) { - alias <- ref[1L] - names(alias) <- names(coefs)[id[ref[1L]]] - coefs <- coefs[-alias] - if (vcov) V <- V[-alias, -alias] - } - structure(coefs, class = "itempar", model = "PlackettLuce", - ref = ref, alias = alias, vcov = if (vcov) V) -} - -#' @importFrom psychotools itempar -#' @export -psychotools::itempar - -#' @rdname itempar.PlackettLuce -#' @method itempar pltree -#' @importFrom partykit nodeapply -#' @export -itempar.pltree <- function (object, ...){ - # so unexported itempar.bttree is used from psychotree - requireNamespace("psychotree") - res <- NextMethod() - if (is.vector(res)){ - return(matrix(res, nrow = 1L, dimnames = list("1", names(res)))) - } - res -} +#' Extract Item Parameters of Plackett-Luce Models +#' +#' Methods for \code{\link[psychotools]{itempar}} to extract the item +#' parameters (abilities or log-abilities) from a Plackett-Luce model or tree. +#' In the case of a tree, item parameters are extracted for each terminal node. +#' +#' @param object a fitted model object as returned by +#' \code{\link{PlackettLuce}} or \code{\link{pltree}}. +#' @param ref a vector of labels or position indices of item parameters which +#' should be used as restriction/for normalization. If \code{NULL} +#' (the default), all items are used with a zero sum (\code{log = TRUE}) or +#' unit sum (\code{log = FALSE}) constraint. +#' @param alias logical. If \code{TRUE} (the default), the aliased parameter is +#' included in the return vector (and in the variance-covariance matrix if +#' \code{vcov = TRUE}). If \code{FALSE}, it is removed. If the restriction given +#' in ref depends on several parameters, the first parameter of the restriction +#' specified is (arbitrarily) chosen to be removed if alias is \code{FALSE}. +#' @param vcov logical. If \code{TRUE} (the default), the (transformed) +#' variance-covariance matrix of the item parameters is attached as attribute +#' \code{vcov}. If \code{FALSE}, a \code{NA}-matrix is attached. +#' @param log logical. Whether to return log-abilities (\code{TRUE}) or +#' abilities (\code{FALSE}). +#' @param ... further arguments which are currently not used. +#' @return An object of class \code{"itempar"}, see +#' \code{\link[psychotools]{itempar}}. +#' @examples +#' R <- matrix(c(1, 2, 0, 0, +#' 4, 1, 2, 3, +#' 2, 1, 1, 1, +#' 1, 2, 3, 0, +#' 2, 1, 1, 0, +#' 1, 0, 3, 2), nrow = 6, byrow = TRUE) +#' colnames(R) <- c("apple", "banana", "orange", "pear") +#' +#' mod <- PlackettLuce(R) +#' coef(mod) +#' +#' # equivalent to default coefficients, i.e. log abilities +#' itempar(mod, ref= 1, log = TRUE) +#' +#' # abilities, normalized so abilities for apple and pear sum to 1 +#' itempar(mod, ref = 1:2) +#' +#' @method itempar PlackettLuce +#' @export +itempar.PlackettLuce <- function(object, ref = NULL, alias = TRUE, vcov = TRUE, + log = FALSE, ...){ + item <- seq_len(length(object$coefficients) - length(object$ties) + 1L) + coefs <- log(object$coefficients[item]) + object_names <- names(coefs) + n <- length(coefs) + id <- seq_len(n)[!is.na(coefs)] + # set reference to one or more indices + if (is.null(ref)) { + ref <- id + } else if (is.vector(ref)){ + if (any(ref %in% object_names)) ref <- match(ref, object_names) + if (!all(ref %in% id)) + stop("Could not match 'ref' to estimable coefficients") + } else if (is.matrix(ref)) { + stop("Handling of contrast matrices in argument 'ref' currently not ", + "implemented for itempar.PlackettLuce().") + } else stop("Argument 'ref' is misspecified (see ?itempar for possible ", + "values).") + ref <- match(ref, id) + # define parameters + if (log){ + # based on contrasts + D <- diag(length(id)) + D[, ref] <- D[, ref] - 1L/length(ref) + coefs[id] <- as.vector(D %*% coefs[id]) + } else { + # constrained so sum of ref = 1 + alpha <- exp(coefs[id]) + denom <- sum(alpha[ref]) + coefs[id] <- alpha/denom + } + # define vcov + if (vcov){ + if (log) { + # vcov of contrasts + V <- D %*% vcov(object)[id, id] %*% t(D) + } else { + # vcov of exp(coefs) + V <- diag(alpha) %*% vcov(object)[id, id] %*% diag(alpha) + # partial derivatives of scaled exp(coefs) wrt exp(coefs) + D <- array(dim = dim(V)) + nonref <- setdiff(seq_along(id), ref) + if (length(nonref)){ + D[, nonref] <- 0 + D[cbind(nonref, nonref)] <- 1L/denom + } + D[, ref] <- -alpha/denom^2 + D[cbind(ref, ref)] <- 1L/denom + D[cbind(ref, ref)] + # vcov of scaled exp coefs + V <- D %*% V %*% t(D) + } + dimnames(V) <- list(object_names[id], object_names[id]) + } + # remove aliased parameter if required + if (!alias) { + alias <- ref[1L] + names(alias) <- names(coefs)[id[ref[1L]]] + coefs <- coefs[-alias] + if (vcov) V <- V[-alias, -alias] + } + structure(coefs, class = "itempar", model = "PlackettLuce", + ref = ref, alias = alias, vcov = if (vcov) V) +} + +#' @importFrom psychotools itempar +#' @export +psychotools::itempar + +#' @rdname itempar.PlackettLuce +#' @method itempar pltree +#' @importFrom partykit nodeapply +#' @export +itempar.pltree <- function (object, ...){ + # so unexported itempar.bttree is used from psychotree + requireNamespace("psychotree") + res <- NextMethod() + if (is.vector(res)){ + return(matrix(res, nrow = 1L, dimnames = list("1", names(res)))) + } + res +} diff --git a/R/nascar.R b/R/nascar.R index ac5d147..f8435b6 100644 --- a/R/nascar.R +++ b/R/nascar.R @@ -1,32 +1,33 @@ -#' Results from 2002 NASCAR Season -#' -#' This is an example dataset from \cite{Hunter 2004} recording the results of -#' 36 car races in the 2002 NASCAR season in the United States. Each record is -#' an ordering of the drivers according to their finishing position. -#' -#' @format A matrix with 36 rows corresponding to the races and 43 columns -#' corresponding to the positions. The columns contain the ID for the driver -#' that came first to last place respectively. The \code{"drivers"} -#' attribute contains the names of the 87 drivers. -#' @references -#' Hunter, D. R. (2004) MM algorithms for generalized Bradley-Terry models. -#' \emph{The Annals of Statistics}, \bold{32(1)}, 384--406. -#' @examples -#' -#' # convert orderings to rankings -#' nascar[1:2, ] -#' R <- as.rankings(nascar, input = "orderings", items = attr(nascar, "drivers")) -#' R[1:2, 1:4, as.rankings = FALSE] -#' format(R[1:2], width = 60) -#' -#' # fit model as in Hunter 2004, excluding drivers that only lose -#' keep <- seq_len(83) -#' R2 <- R[, keep] -#' mod <- PlackettLuce(R2, npseudo = 0) -#' -#' # show coefficients as in Table 2 of Hunter 2004 -#' avRank <- apply(R, 2, function(x) mean(x[x > 0])) -#' coefs <- round(coef(mod)[order(avRank[keep])], 2) -#' head(coefs, 3) -#' tail(coefs, 3) -"nascar" +#' Results from 2002 NASCAR Season +#' +#' This is an example dataset from \cite{Hunter 2004} recording the results of +#' 36 car races in the 2002 NASCAR season in the United States. Each record is +#' an ordering of the drivers according to their finishing position. +#' +#' @format A matrix with 36 rows corresponding to the races and 43 columns +#' corresponding to the positions. The columns contain the ID for the driver +#' that came first to last place respectively. The \code{"drivers"} +#' attribute contains the names of the 87 drivers. +#' @references +#' Hunter, D. R. (2004) MM algorithms for generalized Bradley-Terry models. +#' \emph{The Annals of Statistics}, \bold{32(1)}, 384--406. +#' @examples +#' +#' # convert orderings to rankings +#' nascar[1:2, ] +#' R <- as.rankings(nascar, input = "orderings", +#' items = attr(nascar, "drivers")) +#' R[1:2, 1:4, as.rankings = FALSE] +#' format(R[1:2], width = 60) +#' +#' # fit model as in Hunter 2004, excluding drivers that only lose +#' keep <- seq_len(83) +#' R2 <- R[, keep] +#' mod <- PlackettLuce(R2, npseudo = 0) +#' +#' # show coefficients as in Table 2 of Hunter 2004 +#' avRank <- apply(R, 2, function(x) mean(x[x > 0])) +#' coefs <- round(coef(mod)[order(avRank[keep])], 2) +#' head(coefs, 3) +#' tail(coefs, 3) +"nascar" diff --git a/R/plfit.R b/R/plfit.R index abe1814..f1539d7 100644 --- a/R/plfit.R +++ b/R/plfit.R @@ -1,162 +1,162 @@ -8#' PlackettLuce Wrapper for Model-based Recursive Partitioning -#' -#' This is a wrapper around \code{PlackettLuce} as required by -#' \code{\link[partykit]{mob}} for model-based recursive partitioning. It is -#' not intended for general use. -#' -#' @param y a \code{"\link{grouped_rankings}"} object giving the rankings to -#' model. -#' @param x unused. -#' @inheritParams PlackettLuce -#' @inheritParams summaries -#' @param offset unused. -#' @param ... additional arguments passed to \code{PlackettLuce}. -#' @param estfun logical. If \code{TRUE} the empirical estimating functions -#' (score/gradient contributions) are returned. -#' @param object logical. If \code{TRUE} the fitted model is returned. -#' @return a list with elements -#' \item{coefficients}{ model coefficients. } -#' \item{objfun}{ the negative log-likelihood. } -#' \item{estfun}{ if \code{estfun} the empirical estimating functions. } -#' \item{object}{ if \code{object} the fitted model. } -#' @keywords internal -#' @examples -#' # rankings -#' R <- matrix(c(1, 2, 0, 0, -#' 4, 1, 2, 3, -#' 2, 1, 1, 1, -#' 1, 2, 3, 0, -#' 2, 1, 1, 0, -#' 1, 0, 3, 2), nrow = 6, byrow = TRUE) -#' colnames(R) <- c("apple", "banana", "orange", "pear") -#' R <- as.rankings(R) -#' -#' # group rankings into two groups -#' G <- group(R, rep(1:2, 3)) -#' -#' # plfit() gives the same results as PlackettLuce() -#' pl <- plfit(G) -#' pl$coefficients -#' -pl$objfun -#' -#' mod <- PlackettLuce(R) -#' coef(mod) -#' logLik(mod) -#' @export -plfit <- function (y, x = NULL, ref = 1L, start = NULL, weights = NULL, - offset = NULL, ..., estfun = FALSE, object = FALSE) { - x <- !(is.null(x) || NCOL(x) == 0L) - offset <- !is.null(offset) - if (x || offset) - warning("unused argument(s): ", - paste(c("x"[x], "offset"[offset]), collapse = ",")) - dots <- list(...) - if ("adherence" %in% names(dots) & !"gamma" %in% names(dots)){ - stop("adherence can not be fixed in a Plackett-Luce tree") - } - res <- PlackettLuce(y, start = start, weights = weights, - na.action = NULL, ...) - if (estfun) { - percomp <- estfun.PlackettLuce(res, ref = ref) - estfun <- rowsum(as.matrix(percomp), attr(y, "index")) - } else estfun <- NULL - if (object) { - # returning in order to compute final vcov etc, so can - # aggregate equal rankings now, across rankers - if (is.null(res$gamma)) { - uniq <- !duplicated(attr(y, "id")) - g <- match(attr(y, "id"), attr(y, "id")[uniq]) - res$rankings <- structure(res$rankings[uniq,], - class = "rankings") - res$weights <- c(tapply(res$weights, g, sum)) - } - } - list(coefficients = coef(res, ref = ref), - ties = res$ties, - objfun = -res$loglik, - estfun = estfun, - object = if (object) res else NULL) -} - -# log-likelihood derivatives (score function) per ranking -#' @method estfun PlackettLuce -#' @importFrom sandwich estfun -#' @export -estfun.PlackettLuce <- function(x, ref = 1L, ...) { - d <- x$ties - D <- length(d) - # get coefficients (unconstrained) - coef <- x$coefficients - N <- length(coef) - D + 1L - alpha <- coef[1L:N] - delta <- c(1.0, coef[-c(1L:N)]) - # get free choices and alternatives for each ranking - choices <- choices(x$rankings, names = FALSE) - free <- lengths(choices$alternatives) != 1L - choices <- lapply(choices, `[`, free) - # derivative wrt to log alpha part 1: 1/(size of selected set) - nr <- nrow(x$rankings) - A <- matrix(0.0, nrow = nr, ncol = N, - dimnames = list(NULL, names(alpha))) - choices$choices <- split(choices$choices, choices$ranking) - for (i in seq_len(nr)){ - r <- choices$choices[[i]] - len <- lengths(r) - if (!is.null(x$adherence)){ - A[i, unlist(r)] <- rep(x$adherence[x$ranker[i]]/len, len) - } else A[i, unlist(r)] <- rep(1L/len, len) - } - # derivative wrt delta part 1: row TRUE where selected set has cardinality d - if (D > 1L){ - B <- matrix(nrow = nr, ncol = D - 1L, - dimnames = list(NULL, names(delta[-1L]))) - for (i in seq_along(d[-1L])){ - if (!is.null(x$adherence)) { - B[, i] <- apply(A == x$adherence[x$ranker]/d[i + 1L], 1L, any) - } else B[, i] <- apply(A == 1L/d[i + 1L], 1L, any) - } - } - # derivatives part 2: expectation of alpha | delta per set to choose from - size <- lengths(choices$alternatives) - ord <- order(size) - if (!is.null(x$adherence)){ - # don't group - a <- x$adherence[x$ranker][choices$ranking] - unique_alternatives <- choices$alternatives - } else { - a <- NULL - unique_alternatives <- unique(choices$alternatives[ord]) - } - na <- lengths(unique_alternatives) - R <- matrix(0L, nrow = length(na), ncol = max(na)) - R[cbind(rep(seq_along(unique_alternatives), na), sequence(na))] <- - unlist(unique_alternatives) - G <- seq_along(unique_alternatives) - G <- lapply(seq_len(max(na)), function(i) G[na == i]) - P <- unique(na) - res <- expectation(c("alpha", "delta"), alpha, delta, - a, N, d, P, R, G, W = NULL) - - if (!is.null(x$adherence)){ - h <- seq_len(nrow(R)) - } else { - h <- match(choices$alternatives, unique_alternatives) - } - A <- (A - rowsum(res$expA[h,], choices$ranking)) * x$weights - if (!is.null(ref) && ref %in% names(alpha)) { - ref <- which(names(alpha) == ref) - } - if (D == 1L) { - if (!is.null(ref)) { - return(A[, -ref, drop = FALSE]) - } else return(A) - } - # N.B. expectation of delta should include delta*, but cancelled out in - # in iterative scaling so omitted! - res$expB <- sweep(res$expB, 2L, delta[-1L], "*") - B <- (B - rowsum(res$expB[h,], choices$ranking)) * x$weights - if (!is.null(ref)) { - return(cbind(A[, -ref, drop = FALSE], B)) - } else return(cbind(A[, drop = FALSE], B)) -} - +8#' PlackettLuce Wrapper for Model-based Recursive Partitioning +#' +#' This is a wrapper around \code{PlackettLuce} as required by +#' \code{\link[partykit]{mob}} for model-based recursive partitioning. It is +#' not intended for general use. +#' +#' @param y a \code{"\link{grouped_rankings}"} object giving the rankings to +#' model. +#' @param x unused. +#' @inheritParams PlackettLuce +#' @inheritParams summaries +#' @param offset unused. +#' @param ... additional arguments passed to \code{PlackettLuce}. +#' @param estfun logical. If \code{TRUE} the empirical estimating functions +#' (score/gradient contributions) are returned. +#' @param object logical. If \code{TRUE} the fitted model is returned. +#' @return a list with elements +#' \item{coefficients}{ model coefficients. } +#' \item{objfun}{ the negative log-likelihood. } +#' \item{estfun}{ if \code{estfun} the empirical estimating functions. } +#' \item{object}{ if \code{object} the fitted model. } +#' @keywords internal +#' @examples +#' # rankings +#' R <- matrix(c(1, 2, 0, 0, +#' 4, 1, 2, 3, +#' 2, 1, 1, 1, +#' 1, 2, 3, 0, +#' 2, 1, 1, 0, +#' 1, 0, 3, 2), nrow = 6, byrow = TRUE) +#' colnames(R) <- c("apple", "banana", "orange", "pear") +#' R <- as.rankings(R) +#' +#' # group rankings into two groups +#' G <- group(R, rep(1:2, 3)) +#' +#' # plfit() gives the same results as PlackettLuce() +#' pl <- plfit(G) +#' pl$coefficients +#' -pl$objfun +#' +#' mod <- PlackettLuce(R) +#' coef(mod) +#' logLik(mod) +#' @export +plfit <- function (y, x = NULL, ref = 1L, start = NULL, weights = NULL, + offset = NULL, ..., estfun = FALSE, object = FALSE) { + x <- !(is.null(x) || NCOL(x) == 0L) + offset <- !is.null(offset) + if (x || offset) + warning("unused argument(s): ", + paste(c("x"[x], "offset"[offset]), collapse = ",")) + dots <- list(...) + if ("adherence" %in% names(dots) & !"gamma" %in% names(dots)){ + stop("adherence can not be fixed in a Plackett-Luce tree") + } + res <- PlackettLuce(y, start = start, weights = weights, + na.action = NULL, ...) + if (estfun) { + percomp <- estfun.PlackettLuce(res, ref = ref) + estfun <- rowsum(as.matrix(percomp), attr(y, "index")) + } else estfun <- NULL + if (object) { + # returning in order to compute final vcov etc, so can + # aggregate equal rankings now, across rankers + if (is.null(res$gamma)) { + uniq <- !duplicated(attr(y, "id")) + g <- match(attr(y, "id"), attr(y, "id")[uniq]) + res$rankings <- structure(res$rankings[uniq,], + class = "rankings") + res$weights <- c(tapply(res$weights, g, sum)) + } + } + list(coefficients = coef(res, ref = ref), + ties = res$ties, + objfun = -res$loglik, + estfun = estfun, + object = if (object) res else NULL) +} + +# log-likelihood derivatives (score function) per ranking +#' @method estfun PlackettLuce +#' @importFrom sandwich estfun +#' @export +estfun.PlackettLuce <- function(x, ref = 1L, ...) { + d <- x$ties + D <- length(d) + # get coefficients (unconstrained) + coef <- x$coefficients + N <- length(coef) - D + 1L + alpha <- coef[1L:N] + delta <- c(1.0, coef[-c(1L:N)]) + # get free choices and alternatives for each ranking + choices <- choices(x$rankings, names = FALSE) + free <- lengths(choices$alternatives) != 1L + choices <- lapply(choices, `[`, free) + # derivative wrt to log alpha part 1: 1/(size of selected set) + nr <- nrow(x$rankings) + A <- matrix(0.0, nrow = nr, ncol = N, + dimnames = list(NULL, names(alpha))) + choices$choices <- split(choices$choices, choices$ranking) + for (i in seq_len(nr)){ + r <- choices$choices[[i]] + len <- lengths(r) + if (!is.null(x$adherence)){ + A[i, unlist(r)] <- rep(x$adherence[x$ranker[i]]/len, len) + } else A[i, unlist(r)] <- rep(1L/len, len) + } + # derivative wrt delta part 1: row TRUE where selected set has cardinality d + if (D > 1L){ + B <- matrix(nrow = nr, ncol = D - 1L, + dimnames = list(NULL, names(delta[-1L]))) + for (i in seq_along(d[-1L])){ + if (!is.null(x$adherence)) { + B[, i] <- apply(A == x$adherence[x$ranker]/d[i + 1L], 1L, any) + } else B[, i] <- apply(A == 1L/d[i + 1L], 1L, any) + } + } + # derivatives part 2: expectation of alpha | delta per set to choose from + size <- lengths(choices$alternatives) + ord <- order(size) + if (!is.null(x$adherence)){ + # don't group + a <- x$adherence[x$ranker][choices$ranking] + unique_alternatives <- choices$alternatives + } else { + a <- NULL + unique_alternatives <- unique(choices$alternatives[ord]) + } + na <- lengths(unique_alternatives) + R <- matrix(0L, nrow = length(na), ncol = max(na)) + R[cbind(rep(seq_along(unique_alternatives), na), sequence(na))] <- + unlist(unique_alternatives) + G <- seq_along(unique_alternatives) + G <- lapply(seq_len(max(na)), function(i) G[na == i]) + P <- unique(na) + res <- expectation(c("alpha", "delta"), alpha, delta, + a, N, d, P, R, G, W = NULL) + + if (!is.null(x$adherence)){ + h <- seq_len(nrow(R)) + } else { + h <- match(choices$alternatives, unique_alternatives) + } + A <- (A - rowsum(res$expA[h,], choices$ranking)) * x$weights + if (!is.null(ref) && ref %in% names(alpha)) { + ref <- which(names(alpha) == ref) + } + if (D == 1L) { + if (!is.null(ref)) { + return(A[, -ref, drop = FALSE]) + } else return(A) + } + # N.B. expectation of delta should include delta*, but cancelled out in + # in iterative scaling so omitted! + res$expB <- sweep(res$expB, 2L, delta[-1L], "*") + B <- (B - rowsum(res$expB[h,], choices$ranking)) * x$weights + if (!is.null(ref)) { + return(cbind(A[, -ref, drop = FALSE], B)) + } else return(cbind(A[, drop = FALSE], B)) +} + diff --git a/R/pltree-summaries.R b/R/pltree-summaries.R index 81cd1a2..1acfc37 100644 --- a/R/pltree-summaries.R +++ b/R/pltree-summaries.R @@ -1,205 +1,205 @@ -#' Plackett-Luce Tree Summaries -#' -#' Obtain the coefficients, variance-covariance matrix, AIC, or predictions -#' from a Plackett-Luce tree fitted by [pltree()]. -#' -#' \code{AIC} computes \eqn{-2L + 2p}{-2 * L + 2 * p} where \eqn{L} is the -#' joint likelihood of the observed rankings under the tree model and -#' \eqn{p} is the degrees of freedom used to fit the tree model. -#' -#' @param object a fitted model object of class \code{"pltree"}. -#' @param node a vector of node ids specifying the nodes to summarise, by -#' default the ids of the terminal nodes. -#' @param drop if \code{TRUE} return the coefficients as a vector when only one -#' node is selected. -#' @param newdata an optional data frame to use instead of the -#' original data. For \code{AIC} this must include the response variable. -#' @param type the type of prediction to return for each group, one of: -#' \code{"itempar"} to give the result of \code{\link{itempar}} (by default the -#' fitted probability of each item being ranked first out of all objects), -#' \code{"rank"} the corresponding rank, \code{"best"} the topped ranked item, -#' or \code{"node"} the node of the tree the group belongs to. -#' @param ... additional arguments passed to -#' [`itempar`][itempar.PlackettLuce()] by \code{predict}, and to -#' \code{\link{model.frame}} by \code{AIC}. -#' @examples -#' data(beans) -#' # fit tree based on pairwise comparisons with variety B -#' pairB <- data.frame(Winner = ifelse(beans$var_b == "Worse", -#' "Local", beans$variety_b), -#' Loser = ifelse(beans$var_b == "Worse", -#' beans$variety_b, "Local"), -#' stringsAsFactors = FALSE, row.names = NULL) -#' beans$G <- as.rankings(pairB, input = "orderings", -#' index = rep(seq(nrow(beans)), 1)) -#' -#' mod <- pltree(G ~ ., data = beans[c("G", "maxTN")]) -#' -#' coef(mod, node = 3) -#' AIC(mod) -#' -#' # treat first row from each year as new data -#' newdata <- beans[!duplicated(beans$year),] -#' -#' ## fitted probabilities -#' predict(mod, newdata) -#' -#' ## fitted log-abilities, with Local as reference -#' predict(mod, newdata, log = TRUE, ref = "Local") -#' -#' ## variety ranks -#' predict(mod, newdata, type = "rank") -#' -#' ## top ranked variety -#' predict(mod, newdata, type = "best") -#' -#' ## node the trial belongs to -#' predict(mod, newdata, type = "node") -#' @name pltree-summaries -NULL - -#' @rdname pltree-summaries -#' @method coef pltree -#' @importFrom partykit info_node nodeids -#' @export -coef.pltree <- function (object, node = NULL, drop = TRUE, ...) { - if (is.null(node)){ - ids <- nodeids(object, terminal = TRUE) - } else ids <- node - if ("object" %in% object$info$control$terminal) { - cf <- do.call("rbind", - lapply(ids, FUN = function(n, ...){ - # set ref as specified in plfit if unspecified - info <- info_node(object[[n]]$node) - cll <- as.call(list(coef.PlackettLuce, - info$object, ...)) - cll <- match.call(coef.PlackettLuce, cll) - if (!"ref" %in% names(cll)) { - cll$ref <- attr(info$coefficients, "ref") - } - eval(cll) - }, ...)) - } else{ - cf <- do.call("rbind", - lapply(ids, FUN = function(n, ...){ - # compute coef as returned from original fit - info <- info_node(object[[n]]$node) - n <- length(info$coefficients) - - length(info$ties) + 1L - info$coefficients <- exp(info$coefficients) - id <- seq_len(n) - info$coefficients[id] <- - info$coefficients[id]/sum(info$coefficients[id]) - # parameterize as requested - cll <- as.call(list(coef.PlackettLuce, info, ...)) - cll <- match.call(coef.PlackettLuce, cll) - if (!"ref" %in% names(cll)) { - cll$ref <- attr(info$coefficients, "ref") - } - eval(cll) - }, ...)) - } - rownames(cf) <- ids - if (drop) { - drop(cf) - } else { - cf - } -} - -#' @rdname pltree-summaries -#' @method vcov pltree -vcov.pltree <- function (object, node = nodeids(object, terminal = TRUE), ...){ - nodeapply(object, ids = node, - FUN = function(n) vcov(info_node(n)$object, ...)) -} - -#' @rdname pltree-summaries -#' @method AIC pltree -#' @importFrom stats formula logLik model.frame model.response model.weights -#' @export -AIC.pltree <- function(object, newdata = NULL, ...) { - if (is.null(newdata)) { - return(NextMethod(object, ...)) - } - # create model.frame from newdata - response <- as.character(formula(object)[[2L]]) - if (!response %in% colnames(newdata)) - stop("`newdata` must include response") - f <- formula(object) - environment(f) <- parent.frame() - newdata <- model.frame(f, data = newdata, ...) - # predict node for each grouped rankingnodeids(tm_tree, terminal = TRUE) - node <- partykit::predict.modelparty(object, - newdata = newdata, - type = "node") - # set up to refit models based on newdata - cf <- itempar(object) - if (is.null(dim(cf))) cf <- t(as.matrix(cf)) - nodes <- partykit::nodeids(object, terminal = TRUE) - dots <- object$info$dots - G <- model.response(newdata) - w <- model.weights(newdata) - if (is.null(w)) w <- rep.int(1L, length(G)) - LL <- df <- numeric(length(nodes)) - for (i in seq_along(nodes)){ - # fit model with coef fixed to get logLik - # suppress warning due to fixing maxit - id <- node == nodes[i] - if (sum(id)) { - fit <- suppressWarnings( - do.call("plfit", - c(list(y = G[id,], - start = cf[i,], - weights = w[id], - maxit = 0), - dots))) - LL[i] <- -fit$objfun - } - } - # compute AIC based on total log likelihood of data - # and df of original model fit - -2L*sum(LL) + 2L*attr(logLik(object), "df") -} - -#' @rdname pltree-summaries -#' @method predict pltree -#' @export -predict.pltree <- function(object, newdata = NULL, - type = c("itempar", "rank", "best", "node"), - ...) { - type <- match.arg(type) - if (type == "node"){ - res <- partykit::predict.modelparty(object, - newdata = newdata, - type = "node") - return(structure(as.character(res), - names = as.character(seq_along(res)))) - } - if (is.null(newdata)) { - newdata <- model.frame(object) - na.action <- attr(newdata, "na.action") - } else na.action <- NULL - pred <- switch(type, - itempar = function(obj, ...) { - t(as.matrix(itempar(obj, ...))) - }, - rank = function(obj, ...) { - t(as.matrix(rank(-obj$coefficients))) - }, - best = function(obj, ...) { - nm <- names(obj$coefficients) - nm[which.max(obj$coefficients)] - }) - out <- partykit::predict.modelparty(object, newdata = newdata, - type = pred, ...) - if (is.null(na.action) | !inherits(na.action, "exclude")) return(out) - n_miss <- length(na.action) - current <- seq_len(NROW(out) + n_miss)[-na.action] - id <- order(c(match(current, seq_len(NROW(out))), na.action)) - if (is.matrix(pred)){ - pred <- rbind(pred, - matrix(0L, nrow = n_miss, ncol = ncol(pred)))[id,] - } else pred <- c(pred, rep.int(0L, n_miss))[id] -} - +#' Plackett-Luce Tree Summaries +#' +#' Obtain the coefficients, variance-covariance matrix, AIC, or predictions +#' from a Plackett-Luce tree fitted by [pltree()]. +#' +#' \code{AIC} computes \eqn{-2L + 2p}{-2 * L + 2 * p} where \eqn{L} is the +#' joint likelihood of the observed rankings under the tree model and +#' \eqn{p} is the degrees of freedom used to fit the tree model. +#' +#' @param object a fitted model object of class \code{"pltree"}. +#' @param node a vector of node ids specifying the nodes to summarise, by +#' default the ids of the terminal nodes. +#' @param drop if \code{TRUE} return the coefficients as a vector when only one +#' node is selected. +#' @param newdata an optional data frame to use instead of the +#' original data. For \code{AIC} this must include the response variable. +#' @param type the type of prediction to return for each group, one of: +#' \code{"itempar"} to give the result of \code{\link{itempar}} (by default the +#' fitted probability of each item being ranked first out of all objects), +#' \code{"rank"} the corresponding rank, \code{"best"} the topped ranked item, +#' or \code{"node"} the node of the tree the group belongs to. +#' @param ... additional arguments passed to +#' [`itempar`][itempar.PlackettLuce()] by \code{predict}, and to +#' \code{\link{model.frame}} by \code{AIC}. +#' @examples +#' data(beans) +#' # fit tree based on pairwise comparisons with variety B +#' pairB <- data.frame(Winner = ifelse(beans$var_b == "Worse", +#' "Local", beans$variety_b), +#' Loser = ifelse(beans$var_b == "Worse", +#' beans$variety_b, "Local"), +#' stringsAsFactors = FALSE, row.names = NULL) +#' beans$G <- as.rankings(pairB, input = "orderings", +#' index = rep(seq(nrow(beans)), 1)) +#' +#' mod <- pltree(G ~ ., data = beans[c("G", "maxTN")]) +#' +#' coef(mod, node = 3) +#' AIC(mod) +#' +#' # treat first row from each year as new data +#' newdata <- beans[!duplicated(beans$year),] +#' +#' ## fitted probabilities +#' predict(mod, newdata) +#' +#' ## fitted log-abilities, with Local as reference +#' predict(mod, newdata, log = TRUE, ref = "Local") +#' +#' ## variety ranks +#' predict(mod, newdata, type = "rank") +#' +#' ## top ranked variety +#' predict(mod, newdata, type = "best") +#' +#' ## node the trial belongs to +#' predict(mod, newdata, type = "node") +#' @name pltree-summaries +NULL + +#' @rdname pltree-summaries +#' @method coef pltree +#' @importFrom partykit info_node nodeids +#' @export +coef.pltree <- function (object, node = NULL, drop = TRUE, ...) { + if (is.null(node)){ + ids <- nodeids(object, terminal = TRUE) + } else ids <- node + if ("object" %in% object$info$control$terminal) { + cf <- do.call("rbind", + lapply(ids, FUN = function(n, ...){ + # set ref as specified in plfit if unspecified + info <- info_node(object[[n]]$node) + cll <- as.call(list(coef.PlackettLuce, + info$object, ...)) + cll <- match.call(coef.PlackettLuce, cll) + if (!"ref" %in% names(cll)) { + cll$ref <- attr(info$coefficients, "ref") + } + eval(cll) + }, ...)) + } else{ + cf <- do.call("rbind", + lapply(ids, FUN = function(n, ...){ + # compute coef as returned from original fit + info <- info_node(object[[n]]$node) + n <- length(info$coefficients) - + length(info$ties) + 1L + info$coefficients <- exp(info$coefficients) + id <- seq_len(n) + info$coefficients[id] <- + info$coefficients[id]/sum(info$coefficients[id]) + # parameterize as requested + cll <- as.call(list(coef.PlackettLuce, info, ...)) + cll <- match.call(coef.PlackettLuce, cll) + if (!"ref" %in% names(cll)) { + cll$ref <- attr(info$coefficients, "ref") + } + eval(cll) + }, ...)) + } + rownames(cf) <- ids + if (drop) { + drop(cf) + } else { + cf + } +} + +#' @rdname pltree-summaries +#' @method vcov pltree +vcov.pltree <- function (object, node = nodeids(object, terminal = TRUE), ...){ + nodeapply(object, ids = node, + FUN = function(n) vcov(info_node(n)$object, ...)) +} + +#' @rdname pltree-summaries +#' @method AIC pltree +#' @importFrom stats formula logLik model.frame model.response model.weights +#' @export +AIC.pltree <- function(object, newdata = NULL, ...) { + if (is.null(newdata)) { + return(NextMethod(object, ...)) + } + # create model.frame from newdata + response <- as.character(formula(object)[[2L]]) + if (!response %in% colnames(newdata)) + stop("`newdata` must include response") + f <- formula(object) + environment(f) <- parent.frame() + newdata <- model.frame(f, data = newdata, ...) + # predict node for each grouped rankingnodeids(tm_tree, terminal = TRUE) + node <- partykit::predict.modelparty(object, + newdata = newdata, + type = "node") + # set up to refit models based on newdata + cf <- itempar(object) + if (is.null(dim(cf))) cf <- t(as.matrix(cf)) + nodes <- partykit::nodeids(object, terminal = TRUE) + dots <- object$info$dots + G <- model.response(newdata) + w <- model.weights(newdata) + if (is.null(w)) w <- rep.int(1L, length(G)) + LL <- df <- numeric(length(nodes)) + for (i in seq_along(nodes)){ + # fit model with coef fixed to get logLik + # suppress warning due to fixing maxit + id <- node == nodes[i] + if (sum(id)) { + fit <- suppressWarnings( + do.call("plfit", + c(list(y = G[id,], + start = cf[i,], + weights = w[id], + maxit = 0), + dots))) + LL[i] <- -fit$objfun + } + } + # compute AIC based on total log likelihood of data + # and df of original model fit + -2L*sum(LL) + 2L*attr(logLik(object), "df") +} + +#' @rdname pltree-summaries +#' @method predict pltree +#' @export +predict.pltree <- function(object, newdata = NULL, + type = c("itempar", "rank", "best", "node"), + ...) { + type <- match.arg(type) + if (type == "node"){ + res <- partykit::predict.modelparty(object, + newdata = newdata, + type = "node") + return(structure(as.character(res), + names = as.character(seq_along(res)))) + } + if (is.null(newdata)) { + newdata <- model.frame(object) + na.action <- attr(newdata, "na.action") + } else na.action <- NULL + pred <- switch(type, + itempar = function(obj, ...) { + t(as.matrix(itempar(obj, ...))) + }, + rank = function(obj, ...) { + t(as.matrix(rank(-obj$coefficients))) + }, + best = function(obj, ...) { + nm <- names(obj$coefficients) + nm[which.max(obj$coefficients)] + }) + out <- partykit::predict.modelparty(object, newdata = newdata, + type = pred, ...) + if (is.null(na.action) | !inherits(na.action, "exclude")) return(out) + n_miss <- length(na.action) + current <- seq_len(NROW(out) + n_miss)[-na.action] + id <- order(c(match(current, seq_len(NROW(out))), na.action)) + if (is.matrix(pred)){ + pred <- rbind(pred, + matrix(0L, nrow = n_miss, ncol = ncol(pred)))[id,] + } else pred <- c(pred, rep.int(0L, n_miss))[id] +} + diff --git a/R/qvcalc.PlackettLuce.R b/R/qvcalc.PlackettLuce.R index c6e0526..54c0dbf 100644 --- a/R/qvcalc.PlackettLuce.R +++ b/R/qvcalc.PlackettLuce.R @@ -1,88 +1,88 @@ -#' Quasi Variances for Model Coefficients -#' -#' A method for \code{\link{qvcalc}} to compute a set of quasi variances (and -#' corresponding quasi standard errors) for estimated item parameters from a -#' Plackett-Luce model. -#' -#' For details of the method see Firth (2000), Firth (2003) or Firth and de -#' Menezes (2004). Quasi variances generalize and improve the accuracy of -#' \dQuote{floating absolute risk} (Easton et al., 1991). This device for -#' economical model summary was first suggested by Ridout (1989). -#' -#' Ordinarily the quasi variances are positive and so their square roots -#' (the quasi standard errors) exist and can be used in plots, etc. -#' -#' @param object a \code{"PlackettLuce"} object as returned by -#' \code{PlackettLuce}. -#' @param ... additional arguments, currently ignored.. -#' @inheritParams summaries -#' @return A list of class \code{"qv"}, with components -#' \item{covmat}{The full variance-covariance matrix for the item -#' parameters.} -#' \item{qvframe}{A data frame with variables \code{estimate}, \code{SE}, \code{quasiSE} and -#' \code{quasiVar}, the last two being a quasi standard error and quasi-variance -#' for each parameter.} -#' \item{dispersion}{\code{NULL} (dispersion is fixed to 1).} -#' \item{relerrs}{Relative errors for approximating the standard errors of all -#' simple contrasts.} -#' \item{factorname}{\code{NULL} (not required for this method).} -#' \item{coef.indices}{\code{NULL} (not required for this method).} -#' \item{modelcall}{The call to \code{PlackettLuce} to fit the model from which -#' the item parameters were estimated.} -#' @seealso \code{\link{worstErrors}}, \code{\link{plot.qv}}. -#' @references -#' Easton, D. F, Peto, J. and Babiker, A. G. A. G. (1991) Floating absolute -#' risk: an alternative to relative risk in survival and case-control analysis -#' avoiding an arbitrary reference group. \emph{Statistics in Medicine} \bold{10}, -#' 1025--1035. -#' -#' Firth, D. (2000) Quasi-variances in Xlisp-Stat and on the web. -#' \emph{Journal of Statistical Software} \bold{5.4}, 1--13. -#' At https://www.jstatsoft.org -#' -#' Firth, D. (2003) Overcoming the reference category problem in the -#' presentation of statistical models. \emph{Sociological Methodology} -#' \bold{33}, 1--18. -#' -#' Firth, D. and de Menezes, R. X. (2004) Quasi-variances. -#' \emph{Biometrika} \bold{91}, 65--80. -#' -#' Menezes, R. X. de (1999) More useful standard errors for group and factor -#' effects in generalized linear models. \emph{D.Phil. Thesis}, -#' Department of Statistics, University of Oxford. -#' -#' Ridout, M.S. (1989). Summarizing the results of fitting generalized -#' linear models to data from designed experiments. In: \emph{Statistical -#' Modelling: Proceedings of GLIM89 and the 4th International -#' Workshop on Statistical Modelling held in Trento, Italy, July 17--21, -#' 1989} (A. Decarli et al., eds.), pp 262--269. New York: Springer. -#' @examples -#' # Six partial rankings of four objects, 1 is top rank, e.g -#' # first ranking: item 1, item 2 -#' # second ranking: item 2, item 3, item 4, item 1 -#' # third ranking: items 2, 3, 4 tie for first place, item 1 second -#' R <- matrix(c(1, 2, 0, 0, -#' 4, 1, 2, 3, -#' 2, 1, 1, 1, -#' 1, 2, 3, 0, -#' 2, 1, 1, 0, -#' 1, 0, 3, 2), nrow = 6, byrow = TRUE) -#' colnames(R) <- c("apple", "banana", "orange", "pear") -#' -#' mod <- PlackettLuce(R) -#' qv <- qvcalc(mod) -#' qv -#' plot(qv) -#' @importFrom stats vcov -#' @export -qvcalc.PlackettLuce <- function(object, ref = 1L, ...) { - coefs <- coef(object, ref = ref) - vc <- vcov(object, ref = ref) - nobj <- length(coefs) - length(object$ties) + 1L - qvcalc(vc[1L:nobj, 1L:nobj], estimates = coefs[1L:nobj], - modelcall = object$call) -} - -#' @importFrom qvcalc qvcalc -#' @export -qvcalc::qvcalc +#' Quasi Variances for Model Coefficients +#' +#' A method for \code{\link{qvcalc}} to compute a set of quasi variances (and +#' corresponding quasi standard errors) for estimated item parameters from a +#' Plackett-Luce model. +#' +#' For details of the method see Firth (2000), Firth (2003) or Firth and de +#' Menezes (2004). Quasi variances generalize and improve the accuracy of +#' \dQuote{floating absolute risk} (Easton et al., 1991). This device for +#' economical model summary was first suggested by Ridout (1989). +#' +#' Ordinarily the quasi variances are positive and so their square roots +#' (the quasi standard errors) exist and can be used in plots, etc. +#' +#' @param object a \code{"PlackettLuce"} object as returned by +#' \code{PlackettLuce}. +#' @param ... additional arguments, currently ignored.. +#' @inheritParams summaries +#' @return A list of class \code{"qv"}, with components +#' \item{covmat}{The full variance-covariance matrix for the item +#' parameters.} +#' \item{qvframe}{A data frame with variables \code{estimate}, \code{SE}, +#' \code{quasiSE} and \code{quasiVar}, the last two being a quasi standard +#' error and quasi-variance for each parameter.} +#' \item{dispersion}{\code{NULL} (dispersion is fixed to 1).} +#' \item{relerrs}{Relative errors for approximating the standard errors of all +#' simple contrasts.} +#' \item{factorname}{\code{NULL} (not required for this method).} +#' \item{coef.indices}{\code{NULL} (not required for this method).} +#' \item{modelcall}{The call to \code{PlackettLuce} to fit the model from which +#' the item parameters were estimated.} +#' @seealso \code{\link{worstErrors}}, \code{\link{plot.qv}}. +#' @references +#' Easton, D. F, Peto, J. and Babiker, A. G. A. G. (1991) Floating absolute +#' risk: an alternative to relative risk in survival and case-control analysis +#' avoiding an arbitrary reference group. \emph{Statistics in Medicine} +#' \bold{10}, 1025--1035. +#' +#' Firth, D. (2000) Quasi-variances in Xlisp-Stat and on the web. +#' \emph{Journal of Statistical Software} \bold{5.4}, 1--13. +#' At https://www.jstatsoft.org +#' +#' Firth, D. (2003) Overcoming the reference category problem in the +#' presentation of statistical models. \emph{Sociological Methodology} +#' \bold{33}, 1--18. +#' +#' Firth, D. and de Menezes, R. X. (2004) Quasi-variances. +#' \emph{Biometrika} \bold{91}, 65--80. +#' +#' Menezes, R. X. de (1999) More useful standard errors for group and factor +#' effects in generalized linear models. \emph{D.Phil. Thesis}, +#' Department of Statistics, University of Oxford. +#' +#' Ridout, M.S. (1989). Summarizing the results of fitting generalized +#' linear models to data from designed experiments. In: \emph{Statistical +#' Modelling: Proceedings of GLIM89 and the 4th International +#' Workshop on Statistical Modelling held in Trento, Italy, July 17--21, +#' 1989} (A. Decarli et al., eds.), pp 262--269. New York: Springer. +#' @examples +#' # Six partial rankings of four objects, 1 is top rank, e.g +#' # first ranking: item 1, item 2 +#' # second ranking: item 2, item 3, item 4, item 1 +#' # third ranking: items 2, 3, 4 tie for first place, item 1 second +#' R <- matrix(c(1, 2, 0, 0, +#' 4, 1, 2, 3, +#' 2, 1, 1, 1, +#' 1, 2, 3, 0, +#' 2, 1, 1, 0, +#' 1, 0, 3, 2), nrow = 6, byrow = TRUE) +#' colnames(R) <- c("apple", "banana", "orange", "pear") +#' +#' mod <- PlackettLuce(R) +#' qv <- qvcalc(mod) +#' qv +#' plot(qv) +#' @importFrom stats vcov +#' @export +qvcalc.PlackettLuce <- function(object, ref = 1L, ...) { + coefs <- coef(object, ref = ref) + vc <- vcov(object, ref = ref) + nobj <- length(coefs) - length(object$ties) + 1L + qvcalc(vc[1L:nobj, 1L:nobj], estimates = coefs[1L:nobj], + modelcall = object$call) +} + +#' @importFrom qvcalc qvcalc +#' @export +qvcalc::qvcalc diff --git a/R/simulate.R b/R/simulate.R index d02fbda..f85f225 100644 --- a/R/simulate.R +++ b/R/simulate.R @@ -1,166 +1,166 @@ -#' Simulate from \code{PlackettLuce} fitted objects -#' -#' @inheritParams stats::simulate -#' -#' @param multinomial use multinomial sampling anyway? Default is -#' \code{FALSE}. see Details. -#' @param max_combinations a positive number. Default is -#' \code{20000}. See Details. -#' @param seed an object specifying if and how the random number -#' generator should be initialised. Either \code{NULL} or an -#' integer that will be used in a call to \code{set.seed} before -#' simulating the rankings. If set, the value is saved as the -#' \code{seed} attribute of the returned value. The default, -#' \code{NULL}, will not change the random generator state, and -#' return \code{.Random.seed} as the \code{seed} attribute. -#' -#' @details -#' -#' If \code{multinomial} is \code{FALSE} (default) and there are no -#' tie parameters in the object (i.e. \code{length(object$ties) == 1}), -#' then rankings are sampled by ordering exponential random variates -#' with rate 1 scaled by the estimated item-worth parameters -#' \code{object$coefficients} (see, Diaconis, 1988, Chapter 9D for -#' details). -#' -#' In all other cases, the current implementation uses direct -#' multinomial sampling, and will throw an error if there are more -#' than \code{max_combinations} combinations of items that the sampler -#' has to decide from. This is a hard-coded exit to prevent issues -#' relating to the creation of massive objects in memory. -#' -#' If \code{length(object$ties) > 1} the user's setting for -#' \code{multinomial} is ignored and \code{simulate.PlackettLuce} operates as if -#' \code{multinomial} is \code{TRUE}. -#' -#' @return A \code{data.frame} of \code{\link{rankings}} objects of the same -#' dimension as \code{object$rankings}. -#' -#' @references -#' -#' Diaconis (1988). \emph{Group Representations in Probability and -#' Statistics}. Institute of Mathematical Statistics Lecture Notes -#' 11. Hayward, CA. -#' -#' @examples -#' R <- matrix(c(1, 2, 0, 0, -#' 4, 1, 2, 3, -#' 2, 1, 1, 1, -#' 1, 2, 3, 0, -#' 2, 1, 1, 0, -#' 1, 0, 3, 2), nrow = 6, byrow = TRUE) -#' colnames(R) <- c("apple", "banana", "orange", "pear") -#' mod <- PlackettLuce(R) -#' simulate(mod, 5) -#' -#' s1 <- simulate(mod, 3, seed = 112) -#' s2 <- simulate(mod, 2, seed = 112) -#' -#' identical(s1[1:2], s2[1:2]) -#' -#' @importFrom stats rexp runif simulate -#' @method simulate PlackettLuce -#' @export -simulate.PlackettLuce <- function(object, nsim = 1L, seed = NULL, - multinomial = FALSE, - max_combinations = 2e+04, ...) { - if (!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE)) - runif(1L) - if (is.null(seed)) - RNGstate <- get(".Random.seed", envir = .GlobalEnv) - else { - R.seed <- get(".Random.seed", envir = .GlobalEnv) - set.seed(seed) - RNGstate <- structure(seed, kind = as.list(RNGkind())) - on.exit(assign(".Random.seed", R.seed, envir = .GlobalEnv)) - } - rankings <- unclass(object$rankings) - N <- ncol(rankings) - n_rankings <- nrow(rankings) - id <- seq(length(object$coefficients) - length(object$ties) + 1L) - alpha <- object$coefficients[id] - delta <- numeric(max(object$ties)) - delta[object$ties] <- c(1.0, unname(object$coefficients[-id])) - opt <- seq_len(N) - sets <- as.list(numeric(n_rankings)) - for (i in seq_len(n_rankings)) { - sets[[i]] <- opt[rankings[i,] > 0L] - } - len <- lengths(sets) - - ## If there are no ties use Louis Gordon (1983, Annals of Stat); - ## Diaconis (1988, Chapter 9D) - if (length(object$ties) == 1L & !multinomial) { - sampler <- function(objects) { - v <- numeric(N) - len <- length(objects) - ordering <- objects[order(rexp(len, rate = 1L)/alpha[objects], - decreasing = FALSE)] - v[ordering] <- seq.int(len) - v - } - } - else { - ## NOTE, IK 10/12/2017: This is an inefficient computation - ## with potentially massive objects FIX, IK 10/12/2017: Remove - ## dependence on all combinations. For now a preventive stop - ## if n_combinations is more than max_combinations - n_combinations <- sum(choose(N, object$ties)) - if (n_combinations > max_combinations) { - stop(paste("simulate.PlackettLuce needs to decide between", - n_combinations, - "combinations of items, but 'max_combinations' is", - max_combinations)) - } - - ## Get all possible combinations of objects - combinations <- NULL - for (j in object$ties) { - combinations <- c(combinations, combn(opt, j, simplify = FALSE)) - } - - ## Unormalized probabilities of all combinations - probs <- vapply(combinations, function(z) { - delta[length(z)] * prod(alpha[z])^(1L/length(z)) - }, 1) - ## NOTE, IK 10/12/2017: Normalization is done internally by sample.int - sampler <- function(objects) { - v <- numeric(N) - j <- 1L - indices <- rep(TRUE, n_combinations) - while (length(objects)) { - ## find out which combinations have all of their objects - ## included in `objects` - indices[indices] <- vapply(combinations[indices], function(x) { - all(x %in% objects) - }, TRUE) - ## sample, after setting the probability of all other - ## combinations to zero - sampled <- combinations[[sample.int(n_combinations, 1L, - prob = probs * indices)]] - ## remove the sampled objects from `objects` - objects <- objects[-match(sampled, objects, nomatch = 0L)] - v[sampled] <- j - j <- j + 1L - } - v - } - } - - out <- replicate(nsim, { - R <- t(vapply(sets, sampler, numeric(N))) - colnames(R) <- colnames(rankings) - as.rankings(R) - }, simplify = FALSE) - out <- data.frame(out) - names(out) <- paste("sim", seq_len(nsim), sep = "_") - attr(out, "seed") <- RNGstate - out -} - - -## TODO, IK, 10/12/2017: -## * Handling of weights -## * Avoid constructing all combinations -## * Exploit ideas for multinomial sampling with massive number of categories -## * Add parallelization capabilities +#' Simulate from \code{PlackettLuce} fitted objects +#' +#' @inheritParams stats::simulate +#' +#' @param multinomial use multinomial sampling anyway? Default is +#' \code{FALSE}. see Details. +#' @param max_combinations a positive number. Default is +#' \code{20000}. See Details. +#' @param seed an object specifying if and how the random number +#' generator should be initialised. Either \code{NULL} or an +#' integer that will be used in a call to \code{set.seed} before +#' simulating the rankings. If set, the value is saved as the +#' \code{seed} attribute of the returned value. The default, +#' \code{NULL}, will not change the random generator state, and +#' return \code{.Random.seed} as the \code{seed} attribute. +#' +#' @details +#' +#' If \code{multinomial} is \code{FALSE} (default) and there are no +#' tie parameters in the object (i.e. \code{length(object$ties) == 1}), +#' then rankings are sampled by ordering exponential random variates +#' with rate 1 scaled by the estimated item-worth parameters +#' \code{object$coefficients} (see, Diaconis, 1988, Chapter 9D for +#' details). +#' +#' In all other cases, the current implementation uses direct +#' multinomial sampling, and will throw an error if there are more +#' than \code{max_combinations} combinations of items that the sampler +#' has to decide from. This is a hard-coded exit to prevent issues +#' relating to the creation of massive objects in memory. +#' +#' If \code{length(object$ties) > 1} the user's setting for +#' \code{multinomial} is ignored and \code{simulate.PlackettLuce} operates as if +#' \code{multinomial} is \code{TRUE}. +#' +#' @return A \code{data.frame} of \code{\link{rankings}} objects of the same +#' dimension as \code{object$rankings}. +#' +#' @references +#' +#' Diaconis (1988). \emph{Group Representations in Probability and +#' Statistics}. Institute of Mathematical Statistics Lecture Notes +#' 11. Hayward, CA. +#' +#' @examples +#' R <- matrix(c(1, 2, 0, 0, +#' 4, 1, 2, 3, +#' 2, 1, 1, 1, +#' 1, 2, 3, 0, +#' 2, 1, 1, 0, +#' 1, 0, 3, 2), nrow = 6, byrow = TRUE) +#' colnames(R) <- c("apple", "banana", "orange", "pear") +#' mod <- PlackettLuce(R) +#' simulate(mod, 5) +#' +#' s1 <- simulate(mod, 3, seed = 112) +#' s2 <- simulate(mod, 2, seed = 112) +#' +#' identical(s1[1:2], s2[1:2]) +#' +#' @importFrom stats rexp runif simulate +#' @method simulate PlackettLuce +#' @export +simulate.PlackettLuce <- function(object, nsim = 1L, seed = NULL, + multinomial = FALSE, + max_combinations = 2e+04, ...) { + if (!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE)) + runif(1L) + if (is.null(seed)) + RNGstate <- get(".Random.seed", envir = .GlobalEnv) + else { + R.seed <- get(".Random.seed", envir = .GlobalEnv) + set.seed(seed) + RNGstate <- structure(seed, kind = as.list(RNGkind())) + on.exit(assign(".Random.seed", R.seed, envir = .GlobalEnv)) + } + rankings <- unclass(object$rankings) + N <- ncol(rankings) + n_rankings <- nrow(rankings) + id <- seq(length(object$coefficients) - length(object$ties) + 1L) + alpha <- object$coefficients[id] + delta <- numeric(max(object$ties)) + delta[object$ties] <- c(1.0, unname(object$coefficients[-id])) + opt <- seq_len(N) + sets <- as.list(numeric(n_rankings)) + for (i in seq_len(n_rankings)) { + sets[[i]] <- opt[rankings[i,] > 0L] + } + len <- lengths(sets) + + ## If there are no ties use Louis Gordon (1983, Annals of Stat); + ## Diaconis (1988, Chapter 9D) + if (length(object$ties) == 1L & !multinomial) { + sampler <- function(objects) { + v <- numeric(N) + len <- length(objects) + ordering <- objects[order(rexp(len, rate = 1L)/alpha[objects], + decreasing = FALSE)] + v[ordering] <- seq.int(len) + v + } + } + else { + ## NOTE, IK 10/12/2017: This is an inefficient computation + ## with potentially massive objects FIX, IK 10/12/2017: Remove + ## dependence on all combinations. For now a preventive stop + ## if n_combinations is more than max_combinations + n_combinations <- sum(choose(N, object$ties)) + if (n_combinations > max_combinations) { + stop(paste("simulate.PlackettLuce needs to decide between", + n_combinations, + "combinations of items, but 'max_combinations' is", + max_combinations)) + } + + ## Get all possible combinations of objects + combinations <- NULL + for (j in object$ties) { + combinations <- c(combinations, combn(opt, j, simplify = FALSE)) + } + + ## Unormalized probabilities of all combinations + probs <- vapply(combinations, function(z) { + delta[length(z)] * prod(alpha[z])^(1L/length(z)) + }, 1) + ## NOTE, IK 10/12/2017: Normalization is done internally by sample.int + sampler <- function(objects) { + v <- numeric(N) + j <- 1L + indices <- rep(TRUE, n_combinations) + while (length(objects)) { + ## find out which combinations have all of their objects + ## included in `objects` + indices[indices] <- vapply(combinations[indices], function(x) { + all(x %in% objects) + }, TRUE) + ## sample, after setting the probability of all other + ## combinations to zero + sampled <- combinations[[sample.int(n_combinations, 1L, + prob = probs * indices)]] + ## remove the sampled objects from `objects` + objects <- objects[-match(sampled, objects, nomatch = 0L)] + v[sampled] <- j + j <- j + 1L + } + v + } + } + + out <- replicate(nsim, { + R <- t(vapply(sets, sampler, numeric(N))) + colnames(R) <- colnames(rankings) + as.rankings(R) + }, simplify = FALSE) + out <- data.frame(out) + names(out) <- paste("sim", seq_len(nsim), sep = "_") + attr(out, "seed") <- RNGstate + out +} + + +## TODO, IK, 10/12/2017: +## * Handling of weights +## * Avoid constructing all combinations +## * Exploit ideas for multinomial sampling with massive number of categories +## * Add parallelization capabilities diff --git a/R/utility.R b/R/utility.R index 0ccf638..e35f38b 100644 --- a/R/utility.R +++ b/R/utility.R @@ -1,147 +1,147 @@ -# utility functions to create ranking data structures/X matrix for log-linear -# models maybe just for testing or at some point could be tidied up and exported - -# okay to aggregate for vcov computation, but not for logLik -#' @importFrom methods cbind2 -#' @importFrom utils combn -#' @importFrom Matrix sparseMatrix -poisson_rankings <- function(rankings, weights = NULL, - adherence = NULL, ranker = NULL, - gamma = NULL, aggregate = TRUE, - as.data.frame = FALSE){ - # get choices and alternatives for each ranking - choices <- choices(rankings, names = FALSE) - # include free choices only - size <- lengths(choices$alternatives) - free <- size != 1L - choices <- lapply(choices, `[`, free) - if (!is.null(weights)) { - choices$w <- weights[choices$ranking] - } else choices$w <- rep.int(1L, length(choices$ranking)) - if (aggregate && is.null(adherence)) { - # id unique choices - unique_choices <- unique(choices$choices) - g <- match(choices$choices, unique_choices) - # id unique alternatives - unique_alternatives <- unique(choices$alternatives) - h <- match(choices$alternatives, unique_alternatives) - # count unique choice:alternative combinations - g <- paste(g, h, sep = ":") - g <- match(g, unique(g)) - choices$ranking <- split(choices$ranking, g) - keep <- !duplicated(g) - agg <- c("choices", "alternatives") - choices[agg] <- lapply(choices[agg], function(x) x[keep]) - choices$n <- tabulate(g) - choices$w <- as.numeric(rowsum(choices$w[keep], g[keep])) - size <- lengths(unique_alternatives) - } else { - choices$n <- 1L - size <- lengths(choices$alternatives) - } - - # now create X matrix for (unique) alternative sets - S <- unique(size) - d <- sort(unique(lengths(choices$choices))) - N <- ncol(rankings) - items <- n <- val <- list() - for (s in S){ - # generic choices from set of size s - comb <- list() - x <- d[d <= s] - for (i in seq_along(x)){ - comb[[i]] <- combn(seq_len(s), x[i]) - } - id <- which(size == s) - n[id] <- list(rep(x, vapply(comb, ncol, 1.0))) - if (aggregate){ - items[id] <- lapply(unique_alternatives[id], `[`, unlist(comb)) - } else items[id] <- lapply(choices$alternatives[id], `[`, unlist(comb)) - } - # choice id - x <- sequence(lengths(n)) - # alternative id - if (aggregate){ - z <- rep(seq_along(unique_alternatives), lengths(n)) - } else z <- rep(seq_along(choices$alternatives), lengths(n)) - # columns for item parameters - size <- unlist(n) - A <- sparseMatrix(j = unlist(items), p = c(0L, cumsum(size)), - x = rep(1L/size, size)) - ord <- order(A@i) - all_choices <- split(unlist(items), A@i[ord]) - # columns for tie parameters - if (length(d) > 1L){ - nr <- nrow(rankings) - tied <- list() - x <- d[-1] - for (i in seq_along(x)){ - tied[[i]] <- which(size == x[i]) - } - B <- sparseMatrix(i = unlist(tied), p = c(0L, cumsum(lengths(tied)))) - X <- cbind2(A, B) - } else X <- A - # update X matrix if extimating adherence (nonlinear model) - if (!is.null(adherence)){ - a <- adherence[ranker][choices$ranking[z]] - X[, seq(N)] <- a * X[, seq(N)] # not tie columns - } - # counts - if (aggregate){ - alt <- match(choices$alternatives, unique_alternatives) - } else alt <- seq_along(choices$alternatives) - na <- length(alt) - id <- numeric(na) - row <- split(seq_along(z), z) - for (i in seq_len(na)){ - ch <- match(choices$choices[i], all_choices[row[[alt[i]]]]) - id[i] <- row[[alt[i]]][ch] - } - y <- numeric(length(z)) - y[id] <- choices$n - if (aggregate){ - w <- numeric(length(z)) - w[id] <- choices$w - } else w <- choices$w[z] - if (as.data.frame) { - dat <- data.frame(y = y) - dat$X <- as.matrix(X) - dat$z <- as.factor(z) - dat$w <- w - if (!is.null(gamma)) dat$a <- as.factor(ranker[choices$ranking][z]) - dat - } else { - res <- list(y = y, X = X, z = z, w = w) - if (!is.null(gamma)) res$a <- ranker[choices$ranking][z] - res - } -} - -## A quick way to generate arbitrary ranking data to experiment with -## The larger tie is the lower the chance of a tie is -#' @importFrom stats runif -generate_rankings <- function(maxi, n_rankings = 10L, tie = 5L, seed = NULL) { - if (!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE)) - runif(1L) - if (is.null(seed)) - RNGstate <- get(".Random.seed", envir = .GlobalEnv) - else { - R.seed <- get(".Random.seed", envir = .GlobalEnv) - set.seed(seed) - RNGstate <- structure(seed, kind = as.list(RNGkind())) - on.exit(assign(".Random.seed", R.seed, envir = .GlobalEnv)) - } - mat <- replicate(n_rankings, { - s <- sample(maxi, maxi) - m <- sample(maxi, 1L) - s <- s[s <= m] - v <- numeric(maxi) - v[sample(maxi, min(m, maxi))] <- s - v0 <- v == 0L - if (length(v0)) - v[v0] <- sample(0L:max(s), sum(v0), replace = TRUE, - prob = c(tie, rep(1L/max(s), max(s)))) - v - }) - as.rankings(t(mat)) -} +# utility functions to create ranking data structures/X matrix for log-linear +# models maybe just for testing or at some point could be tidied up and exported + +# okay to aggregate for vcov computation, but not for logLik +#' @importFrom methods cbind2 +#' @importFrom utils combn +#' @importFrom Matrix sparseMatrix +poisson_rankings <- function(rankings, weights = NULL, + adherence = NULL, ranker = NULL, + gamma = NULL, aggregate = TRUE, + as.data.frame = FALSE){ + # get choices and alternatives for each ranking + choices <- choices(rankings, names = FALSE) + # include free choices only + size <- lengths(choices$alternatives) + free <- size != 1L + choices <- lapply(choices, `[`, free) + if (!is.null(weights)) { + choices$w <- weights[choices$ranking] + } else choices$w <- rep.int(1L, length(choices$ranking)) + if (aggregate && is.null(adherence)) { + # id unique choices + unique_choices <- unique(choices$choices) + g <- match(choices$choices, unique_choices) + # id unique alternatives + unique_alternatives <- unique(choices$alternatives) + h <- match(choices$alternatives, unique_alternatives) + # count unique choice:alternative combinations + g <- paste(g, h, sep = ":") + g <- match(g, unique(g)) + choices$ranking <- split(choices$ranking, g) + keep <- !duplicated(g) + agg <- c("choices", "alternatives") + choices[agg] <- lapply(choices[agg], function(x) x[keep]) + choices$n <- tabulate(g) + choices$w <- as.numeric(rowsum(choices$w[keep], g[keep])) + size <- lengths(unique_alternatives) + } else { + choices$n <- 1L + size <- lengths(choices$alternatives) + } + + # now create X matrix for (unique) alternative sets + S <- unique(size) + d <- sort(unique(lengths(choices$choices))) + N <- ncol(rankings) + items <- n <- val <- list() + for (s in S){ + # generic choices from set of size s + comb <- list() + x <- d[d <= s] + for (i in seq_along(x)){ + comb[[i]] <- combn(seq_len(s), x[i]) + } + id <- which(size == s) + n[id] <- list(rep(x, vapply(comb, ncol, 1.0))) + if (aggregate){ + items[id] <- lapply(unique_alternatives[id], `[`, unlist(comb)) + } else items[id] <- lapply(choices$alternatives[id], `[`, unlist(comb)) + } + # choice id + x <- sequence(lengths(n)) + # alternative id + if (aggregate){ + z <- rep(seq_along(unique_alternatives), lengths(n)) + } else z <- rep(seq_along(choices$alternatives), lengths(n)) + # columns for item parameters + size <- unlist(n) + A <- sparseMatrix(j = unlist(items), p = c(0L, cumsum(size)), + x = rep(1L/size, size)) + ord <- order(A@i) + all_choices <- split(unlist(items), A@i[ord]) + # columns for tie parameters + if (length(d) > 1L){ + nr <- nrow(rankings) + tied <- list() + x <- d[-1] + for (i in seq_along(x)){ + tied[[i]] <- which(size == x[i]) + } + B <- sparseMatrix(i = unlist(tied), p = c(0L, cumsum(lengths(tied)))) + X <- cbind2(A, B) + } else X <- A + # update X matrix if extimating adherence (nonlinear model) + if (!is.null(adherence)){ + a <- adherence[ranker][choices$ranking[z]] + X[, seq(N)] <- a * X[, seq(N)] # not tie columns + } + # counts + if (aggregate){ + alt <- match(choices$alternatives, unique_alternatives) + } else alt <- seq_along(choices$alternatives) + na <- length(alt) + id <- numeric(na) + row <- split(seq_along(z), z) + for (i in seq_len(na)){ + ch <- match(choices$choices[i], all_choices[row[[alt[i]]]]) + id[i] <- row[[alt[i]]][ch] + } + y <- numeric(length(z)) + y[id] <- choices$n + if (aggregate){ + w <- numeric(length(z)) + w[id] <- choices$w + } else w <- choices$w[z] + if (as.data.frame) { + dat <- data.frame(y = y) + dat$X <- as.matrix(X) + dat$z <- as.factor(z) + dat$w <- w + if (!is.null(gamma)) dat$a <- as.factor(ranker[choices$ranking][z]) + dat + } else { + res <- list(y = y, X = X, z = z, w = w) + if (!is.null(gamma)) res$a <- ranker[choices$ranking][z] + res + } +} + +## A quick way to generate arbitrary ranking data to experiment with +## The larger tie is the lower the chance of a tie is +#' @importFrom stats runif +generate_rankings <- function(maxi, n_rankings = 10L, tie = 5L, seed = NULL) { + if (!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE)) + runif(1L) + if (is.null(seed)) + RNGstate <- get(".Random.seed", envir = .GlobalEnv) + else { + R.seed <- get(".Random.seed", envir = .GlobalEnv) + set.seed(seed) + RNGstate <- structure(seed, kind = as.list(RNGkind())) + on.exit(assign(".Random.seed", R.seed, envir = .GlobalEnv)) + } + mat <- replicate(n_rankings, { + s <- sample(maxi, maxi) + m <- sample(maxi, 1L) + s <- s[s <= m] + v <- numeric(maxi) + v[sample(maxi, min(m, maxi))] <- s + v0 <- v == 0L + if (length(v0)) + v[v0] <- sample(0L:max(s), sum(v0), replace = TRUE, + prob = c(tie, rep(1L/max(s), max(s)))) + v + }) + as.rankings(t(mat)) +} diff --git a/R/vcov.R b/R/vcov.R index e85a6e6..15467fe 100644 --- a/R/vcov.R +++ b/R/vcov.R @@ -1,111 +1,111 @@ -#' @rdname summaries -#' @export -#' @importFrom stats aggregate -#' @importFrom Matrix crossprod -vcov.PlackettLuce <- function(object, ref = 1L, - type = c("expected", "observed"), ...) { - type <- match.arg(type) - # don't use coef() here, want unconstrained for now - coefs <- log(object$coefficients) - object_names <- names(coefs) - p <- length(coefs) - nobj <- p - length(object$ties) + 1L - # so need to check reference - if (any(ref %in% object_names)) { - ref <- match(ref, object_names) - } else if (is.null(ref)){ - ref <- seq(nobj) - } else if (!all(ref %in% seq(nobj))){ - stop("cannot match `ref` with items") - } - # get setup for equivalent poisson glm/gnm - theLongData <- poisson_rankings(object$rankings, object$weights, - object$adherence, object$ranker, - object$gamma, aggregate = FALSE) - X <- theLongData$X - z <- theLongData$z - y <- theLongData$y - w <- theLongData$w - if (!is.null(object$gamma)){ - a <- theLongData$a - # non-zero values in X cols for log-adherence - A <- as.vector(X[, seq(nobj)] %*% as.vector(coefs)[seq(nobj)]) - } - # Compute the fitted values: - fit <- as.vector(exp(X %*% as.vector(coefs))) - totals <- as.vector(tapply(w * y, z, sum)) - fit <- fit * as.vector(totals/tapply(fit, z, sum))[z] - # Compute the vcov matrix - ## XtWX for worth and tie parameters (and adherence) - WX <- fit * X - XtWX <- as.matrix(crossprod(X, WX)) - if (!is.null(object$gamma)) - AtWX <- rowsum(A*as.matrix(WX[, 1:p]), theLongData$a) - ## covariance of worth and tie with eliminated (and adherence) parameters - ## crossprod(sparse.model.matrix(~as.factor(z) - 1), WX)) - ZtWX <- rowsum(as.matrix(WX), z) - if (!is.null(object$gamma)) ZtWA <- rowsum(fit*A, z)[, 1] - ## diag of (XtWX)^-1 for eliminated - ZtWZinverse <- 1L/totals - ## diag of XtWX for adherence - if (!is.null(object$gamma)) AtWA <- rowsum(fit*A^2, theLongData$a)[,1] - ## components of (generalized) inverse of vcov for worth, tie and adherence - ## this is minus the expectation of the second derivs of the multinomial - ## log-lik i.e. (expected) Fisher info for the multinomial log-likelihood - ### top-left: rows/cols for worth and tie - InfoTL <- XtWX - crossprod(sqrt(ZtWZinverse) * ZtWX) - if (!is.null(object$gamma)){ - ### bottom-left: cross derivs adherence and worth/tie - InfoBL <- AtWX - rowsum(ZtWX * ZtWA * ZtWZinverse, a[!duplicated(z)]) - ### diagonal of bottom-right: rows/cols for adherence - diagInfoBR <- AtWA - rowsum(ZtWA^2 * ZtWZinverse, a[!duplicated(z)])[,1] - } - ## for observed Fisher info, i.e. second derivs of the multinomial log-lik - ## add component from (y - mu)*deta/(dbeta1 dbeta2) - only affects adherence - if (type == "observed" & !is.null(object$gamma)){ - ## adjust variance of adherence - diagInfoBR <- diagInfoBR - rowsum((w*y - fit)*A, a)[,1] - ## adjust covariance with worth parameters - adj <- rowsum(as.matrix((w*y - fit) * X[, seq_len(nobj)]), a) - InfoBL[, seq_len(nobj)] <- InfoBL[, seq_len(nobj)] - adj - } - ## Add in component from gamma prior - if (!is.null(object$gamma)){ - ## minus expectation of second derivs of (independent) gamma priors - diagInfoBR <- diagInfoBR + object$gamma$rate * object$adherence * - rowsum(object$weights, object$ranker)[,1] - } - # create vcov of parameters of interest (worth, tie) - if (!is.null(object$normal)){ - ## minus expectation of second derivs of multivariate normal prior - InfoTL[seq_len(nobj), seq_len(nobj)] <- - InfoTL[seq_len(nobj), seq_len(nobj)] + object$normal$invSigma - ## get vcov for worth and tie only - if (!is.null(object$gamma)){ - result <- chol2inv(chol(InfoTL - crossprod(InfoBL/sqrt(diagInfoBR)))) - } else result <- chol2inv(chol(InfoTL)) - dimnames(result) <- list(names(coef), names(coef)) - } else { - ## drop a column and get vcov for worth and tie only - ## if reference is a single item this is equivalent to taking generalised - ## inverse and then re-parameterising - id <- ifelse(length(ref) == 1L, ref, 1L) - result <- array(0.0, dim = c(p, p), - dimnames = list(names(coefs), names(coefs))) - if (!is.null(object$gamma)){ - result[-id, -id] <- - chol2inv(chol(InfoTL[-id, -id] - - crossprod((InfoBL/sqrt(diagInfoBR))[, -id]))) - } else result[-id, -id] <- chol2inv(chol(InfoTL[-id, -id])) - ## so that's all the work done if the reference is a single item - if (length(ref) == 1) return(result) - } - # compute vcov for specified set of contrasts if necessary - ## equiv of D <- diag(p); D[ref, seq(nobj)] <- 1 - 1/n; t(D) %*% V %*% D - n <- length(ref) - r <- rowSums(result[, ref, drop = FALSE]) - result[, seq(nobj)] <- result[, seq(nobj)] - r/n - r <- colSums(result[ref, , drop = FALSE]) - result[seq(nobj), ] <- result[seq(nobj), ] - rep(r/n, each = nobj) - result -} +#' @rdname summaries +#' @export +#' @importFrom stats aggregate +#' @importFrom Matrix crossprod +vcov.PlackettLuce <- function(object, ref = 1L, + type = c("expected", "observed"), ...) { + type <- match.arg(type) + # don't use coef() here, want unconstrained for now + coefs <- log(object$coefficients) + object_names <- names(coefs) + p <- length(coefs) + nobj <- p - length(object$ties) + 1L + # so need to check reference + if (any(ref %in% object_names)) { + ref <- match(ref, object_names) + } else if (is.null(ref)){ + ref <- seq(nobj) + } else if (!all(ref %in% seq(nobj))){ + stop("cannot match `ref` with items") + } + # get setup for equivalent poisson glm/gnm + theLongData <- poisson_rankings(object$rankings, object$weights, + object$adherence, object$ranker, + object$gamma, aggregate = FALSE) + X <- theLongData$X + z <- theLongData$z + y <- theLongData$y + w <- theLongData$w + if (!is.null(object$gamma)){ + a <- theLongData$a + # non-zero values in X cols for log-adherence + A <- as.vector(X[, seq(nobj)] %*% as.vector(coefs)[seq(nobj)]) + } + # Compute the fitted values: + fit <- as.vector(exp(X %*% as.vector(coefs))) + totals <- as.vector(tapply(w * y, z, sum)) + fit <- fit * as.vector(totals/tapply(fit, z, sum))[z] + # Compute the vcov matrix + ## XtWX for worth and tie parameters (and adherence) + WX <- fit * X + XtWX <- as.matrix(crossprod(X, WX)) + if (!is.null(object$gamma)) + AtWX <- rowsum(A*as.matrix(WX[, 1:p]), theLongData$a) + ## covariance of worth and tie with eliminated (and adherence) parameters + ## crossprod(sparse.model.matrix(~as.factor(z) - 1), WX)) + ZtWX <- rowsum(as.matrix(WX), z) + if (!is.null(object$gamma)) ZtWA <- rowsum(fit*A, z)[, 1] + ## diag of (XtWX)^-1 for eliminated + ZtWZinverse <- 1L/totals + ## diag of XtWX for adherence + if (!is.null(object$gamma)) AtWA <- rowsum(fit*A^2, theLongData$a)[,1] + ## components of (generalized) inverse of vcov for worth, tie and adherence + ## this is minus the expectation of the second derivs of the multinomial + ## log-lik i.e. (expected) Fisher info for the multinomial log-likelihood + ### top-left: rows/cols for worth and tie + InfoTL <- XtWX - crossprod(sqrt(ZtWZinverse) * ZtWX) + if (!is.null(object$gamma)){ + ### bottom-left: cross derivs adherence and worth/tie + InfoBL <- AtWX - rowsum(ZtWX * ZtWA * ZtWZinverse, a[!duplicated(z)]) + ### diagonal of bottom-right: rows/cols for adherence + diagInfoBR <- AtWA - rowsum(ZtWA^2 * ZtWZinverse, a[!duplicated(z)])[,1] + } + ## for observed Fisher info, i.e. second derivs of the multinomial log-lik + ## add component from (y - mu)*deta/(dbeta1 dbeta2) - only affects adherence + if (type == "observed" & !is.null(object$gamma)){ + ## adjust variance of adherence + diagInfoBR <- diagInfoBR - rowsum((w*y - fit)*A, a)[,1] + ## adjust covariance with worth parameters + adj <- rowsum(as.matrix((w*y - fit) * X[, seq_len(nobj)]), a) + InfoBL[, seq_len(nobj)] <- InfoBL[, seq_len(nobj)] - adj + } + ## Add in component from gamma prior + if (!is.null(object$gamma)){ + ## minus expectation of second derivs of (independent) gamma priors + diagInfoBR <- diagInfoBR + object$gamma$rate * object$adherence * + rowsum(object$weights, object$ranker)[,1] + } + # create vcov of parameters of interest (worth, tie) + if (!is.null(object$normal)){ + ## minus expectation of second derivs of multivariate normal prior + InfoTL[seq_len(nobj), seq_len(nobj)] <- + InfoTL[seq_len(nobj), seq_len(nobj)] + object$normal$invSigma + ## get vcov for worth and tie only + if (!is.null(object$gamma)){ + result <- chol2inv(chol(InfoTL - crossprod(InfoBL/sqrt(diagInfoBR)))) + } else result <- chol2inv(chol(InfoTL)) + dimnames(result) <- list(names(coef), names(coef)) + } else { + ## drop a column and get vcov for worth and tie only + ## if reference is a single item this is equivalent to taking generalised + ## inverse and then re-parameterising + id <- ifelse(length(ref) == 1L, ref, 1L) + result <- array(0.0, dim = c(p, p), + dimnames = list(names(coefs), names(coefs))) + if (!is.null(object$gamma)){ + result[-id, -id] <- + chol2inv(chol(InfoTL[-id, -id] - + crossprod((InfoBL/sqrt(diagInfoBR))[, -id]))) + } else result[-id, -id] <- chol2inv(chol(InfoTL[-id, -id])) + ## so that's all the work done if the reference is a single item + if (length(ref) == 1) return(result) + } + # compute vcov for specified set of contrasts if necessary + ## equiv of D <- diag(p); D[ref, seq(nobj)] <- 1 - 1/n; t(D) %*% V %*% D + n <- length(ref) + r <- rowSums(result[, ref, drop = FALSE]) + result[, seq(nobj)] <- result[, seq(nobj)] - r/n + r <- colSums(result[ref, , drop = FALSE]) + result[seq(nobj), ] <- result[seq(nobj), ] - rep(r/n, each = nobj) + result +} diff --git a/tests/testthat/outputs/print.coef.rds b/tests/testthat/outputs/print.coef.rds index 6e6fbad..8983316 100644 --- a/tests/testthat/outputs/print.coef.rds +++ b/tests/testthat/outputs/print.coef.rds @@ -1,2 +1,2 @@ - apple banana orange tie2 - 0.0000000 0.2867987 -0.4636687 -1.0701129 + apple banana orange tie2 + 0.0000000 0.2867987 -0.4636687 -1.0701129 diff --git a/tests/testthat/outputs/print.rds b/tests/testthat/outputs/print.rds index a1fe66b..e6761f5 100644 --- a/tests/testthat/outputs/print.rds +++ b/tests/testthat/outputs/print.rds @@ -1,5 +1,5 @@ -Call: PlackettLuce(rankings = R) - -Coefficients: - apple banana orange tie2 - 0.0000 0.2868 -0.4637 -1.0701 +Call: PlackettLuce(rankings = R) + +Coefficients: + apple banana orange tie2 + 0.0000 0.2868 -0.4637 -1.0701 diff --git a/tests/testthat/outputs/print.summary.rds b/tests/testthat/outputs/print.summary.rds index 8edcaf3..556bef0 100644 --- a/tests/testthat/outputs/print.summary.rds +++ b/tests/testthat/outputs/print.summary.rds @@ -1,12 +1,12 @@ -Call: PlackettLuce(rankings = R) - -Coefficients: - Estimate Std. Error z value Pr(>|z|) -apple 0.0000 NA NA NA -banana 0.2868 1.0129 0.283 0.777 -orange -0.4637 1.0280 -0.451 0.652 -tie2 -1.0701 0.9043 -1.183 0.237 - -Residual deviance: 21.61 on 25 degrees of freedom -AIC: 27.61 -Number of iterations: 5 +Call: PlackettLuce(rankings = R) + +Coefficients: + Estimate Std. Error z value Pr(>|z|) +apple 0.0000 NA NA NA +banana 0.2868 1.0129 0.283 0.777 +orange -0.4637 1.0280 -0.451 0.652 +tie2 -1.0701 0.9043 -1.183 0.237 + +Residual deviance: 21.61 on 25 degrees of freedom +AIC: 27.61 +Number of iterations: 5 diff --git a/tests/testthat/test-adherence.R b/tests/testthat/test-adherence.R index e690bb6..517b8d2 100644 --- a/tests/testthat/test-adherence.R +++ b/tests/testthat/test-adherence.R @@ -1,191 +1,192 @@ -context("implementation [adherence in PlackettLuce]") - -# Get reference agRank implementation -source(system.file(file.path("Reference_Implementations", "sgdPL.R"), - package = "PlackettLuce")) - -# Paired/triple comparisons, no ties -R <- matrix(c(1, 2, 3, 0, - 0, 1, 2, 3, - 2, 1, 0, 3, - 1, 2, 3, 0, - 2, 0, 1, 3, - 1, 0, 3, 2, - 1, 2, 0, 0, - 0, 1, 2, 0), nrow = 8, byrow = TRUE) -colnames(R) <- c("apple", "banana", "orange", "pear") -R <- as.rankings(R) - -test_that("logLik matches agRank, fixed adherence [fake triple comparisons]", { - m <- ncol(R) - n <- nrow(R) - adherence <- seq(0.75, 1.25, length.out = n) - alpha <- seq(0.25, 0.75, length.out = m) - # Non-informative prior on log-worths (large variance) - mu <- log(alpha) - sigma <- diag(1000000, m) - p <- 8 # switch for interactive testing (max = 8) - # Fit model using sgdPL from AgRank with fixed adherence - ## - no iterations, just checking log-likelihood calculations - res <- sgdPL(as.matrix(R[seq(p),]), mu, sigma, rate = 0.1, - adherence = FALSE, maxiter = 0, tol = 1e-12, - start = c(mu, adherence[seq(p)]), decay = 1.001) - # Fit model using PlackettLuce - ## with normal prior to allow low p (BFGS by default) - mod_PL1 <- PlackettLuce(rankings = R[seq(p),], npseudo = 0, maxit = 0, - adherence = adherence[seq(p)], start = alpha, - normal = list(mu = mu, Sigma = sigma)) - ## N.B. - can't test L-BFGS with zero iterations - ## - iterative scaling not implemented with adherence - expect_equal(logLik(mod_PL1)[1], -res$value[1]) - # Same, now iterating to convergence - res <- sgdPL(as.matrix(R[seq(p),]), mu, sigma, rate = 0.1, - adherence = FALSE, maxiter = 8000, - tol = 1e-12, start = c(mu, adherence[seq(p)]), decay = 1.001) - mod_PL2 <- PlackettLuce(rankings = R[seq(p),], npseudo = 0, - adherence = adherence[seq(p)], start = alpha, - normal = list(mu = mu, Sigma = sigma)) - expect_equal(mod_PL2$logposterior, -tail(res$value, 1), - tolerance = 1e-5) - if (require(lbfgs)){ - mod_PL3 <- PlackettLuce(rankings = R[seq(p),], npseudo = 0, - method = "L-BFGS", adherence = adherence[seq(p)], - start = alpha, - normal = list(mu = mu, Sigma = sigma)) - expect_equal(mod_PL3$logposterior, -tail(res$value, 1), - tolerance = 1e-5) - } -}) - -# with grouped rankings - -test_that('estimated adherence works for grouped_rankings [fake triples]', { - # each ranking is a separate group - G <- group(R, seq(nrow(R))) - mod1 <- PlackettLuce(rankings = R, npseudo = 0, - gamma = list(shape = 10, rate = 10)) - mod2 <- PlackettLuce(G, npseudo = 0, gamma = list(shape = 10, rate = 10)) - # remove bits we expect to be different - # iter can be different on some platform due to small difference in rowsums - nm <- setdiff(names(mod1), c("call", "iter")) - expect_equal(mod1[nm], mod2[nm]) - expect_equal(mod1$adherence[mod1$ranker], mod2$adherence[mod2$ranker]) - - # results should be same when fix to returned adherence - mod3 <- PlackettLuce(R, npseudo = 0, start = coef(mod1), method = "BFGS", - adherence = mod1$adherence) - mod4 <- PlackettLuce(G, npseudo = 0, start = coef(mod1), method = "BFGS", - adherence = mod2$adherence) - # BFGS always does another iteration to check converegence, - # iterative scaling has different check so would also iterate a little more - expect_equal(coef(mod1), coef(mod3), tolerance = 1e-4, - check.attributes = FALSE) - expect_equal(coef(mod2), coef(mod4), tolerance = 1e-4, - check.attributes = FALSE) - expect_equal(logLik(mod1), logLik(mod3), tolerance = 1e-8, - check.attributes = FALSE) - expect_equal(logLik(mod2), logLik(mod4), tolerance = 1e-8, - check.attributes = FALSE) -}) - -test_that('estimated adherence works w/ npseudo != 0 [fake triples]', { - mod1 <- PlackettLuce(rankings = R, - gamma = list(shape = 100, rate = 100)) - expect_known_value(mod1, - file = test_path("outputs/pl_adherence_pseudo.rds"), - version = 2) -}) - -test_that('default prior for adherence works [fake triples]', { - mod1 <- PlackettLuce(rankings = R, - gamma = list(shape = 10, rate = 10)) - mod2 <- PlackettLuce(rankings = R, gamma = TRUE) - # remove bits we expect to be different - nm <- setdiff(names(mod1), c("call")) - expect_equal(mod1[nm], mod2[nm]) -}) - -test_that('check on prior for adherence works [fake triples]', { - expect_warning(mod1 <- PlackettLuce(rankings = R, - gamma = list(shape = 100, rate = 10))) -}) - -data(beans, package = "PlackettLuce") - -# Fill in the missing ranking -beans$middle <- complete(beans[c("best", "worst")], - items = c("A", "B", "C")) - -# Use these names to decode the orderings of order 3 -order3 <- decode(beans[c("best", "middle", "worst")], - items = beans[c("variety_a", "variety_b", "variety_c")], - code = c("A", "B", "C")) - -# Convert these results to a vector and get the corresponding trial variety -outcome <- unlist(beans[c("var_a", "var_b", "var_c")]) -trial_variety <- unlist(beans[c("variety_a", "variety_b", "variety_c")]) - -# Create a data frame of the implied orderings of order 2 -order2 <- data.frame(Winner = ifelse(outcome == "Worse", - "Local", trial_variety), - Loser = ifelse(outcome == "Worse", - trial_variety, "Local"), - stringsAsFactors = FALSE, row.names = NULL) - -# Finally combine the rankings of order 2 and order 3 -R <- rbind(as.rankings(order3, input = "ordering"), - as.rankings(order2, input = "ordering")) - -# Group the rankings by the corresponding farm -G <- group(R, rep(seq_len(nrow(beans)), 4)) - -test_that('fixed adherence works for grouped_rankings [beans]', { - # adherence = 1 is same as no adherence - adherence <- rep(1L, nrow(beans)) - mod1 <- PlackettLuce(G) - mod2 <- PlackettLuce(G, adherence = adherence) - # remove bits we expect to be different - # iter can change on same platforms as rankings not aggregated in mod2 - nm <- setdiff(names(mod1), c("call", "adherence", "iter")) - expect_equal(mod1[nm], mod2[nm]) - # adherence != 1 for grouped same as ungrouped with replicated adherence - ranker_adherence <- seq(0.75, 1.25, length.out = nrow(beans)) - ranking_adherence <- rep(ranker_adherence, 4) - mod1 <- PlackettLuce(R, adherence = ranking_adherence) - mod2 <- PlackettLuce(G, adherence = ranker_adherence) - # remove bits we expect to be different - # iter can be different on some platform due to small difference in rowsums - nm <- setdiff(names(mod1), c("call", "adherence", "ranker", "iter")) - expect_equal(mod1[nm], mod2[nm]) - expect_equal(mod1$adherence[mod1$ranker], mod2$adherence[mod2$ranker]) -}) - - -R <- matrix(c(1, 2, 0, 0, - 4, 1, 2, 3, - 2, 1, 1, 1, - 1, 2, 3, 0, - 2, 1, 1, 0, - 1, 0, 3, 2), nrow = 6, byrow = TRUE) -colnames(R) <- c("apple", "banana", "orange", "pear") -R <- as.rankings(R) - -test_that('estimated adherence works for grouped_rankings [partial + ties]', { - w <- c(3, 2, 5, 4, 3, 7) - # replicates with exactly same adherence (does not converge) - mod1 <- suppressWarnings(PlackettLuce(R, npseudo = 0, method = "BFGS", - weights = w, - gamma = list(shape = 10, rate = 10))) - # replicates grouped together by ranker - G <- group(R[rep(seq(6), w),], index = rep(seq(6), w)) - mod2 <- suppressWarnings(PlackettLuce(rankings = G, npseudo = 0, - method = "BFGS", - gamma = list(shape = 10, rate = 10))) - # remove bits we expect to be different - # iter can be different on some platform due to small difference in rowsums - nm <- setdiff(names(mod1), - c("call", "rankings", "ranker", "weights", "iter")) - expect_equal(mod1[nm], mod2[nm]) -}) - - +context("implementation [adherence in PlackettLuce]") + +# Get reference agRank implementation +source(system.file(file.path("Reference_Implementations", "sgdPL.R"), + package = "PlackettLuce")) + +# Paired/triple comparisons, no ties +R <- matrix(c(1, 2, 3, 0, + 0, 1, 2, 3, + 2, 1, 0, 3, + 1, 2, 3, 0, + 2, 0, 1, 3, + 1, 0, 3, 2, + 1, 2, 0, 0, + 0, 1, 2, 0), nrow = 8, byrow = TRUE) +colnames(R) <- c("apple", "banana", "orange", "pear") +R <- as.rankings(R) + +test_that("logLik matches agRank, fixed adherence [fake triple comparisons]", { + m <- ncol(R) + n <- nrow(R) + adherence <- seq(0.75, 1.25, length.out = n) + alpha <- seq(0.25, 0.75, length.out = m) + # Non-informative prior on log-worths (large variance) + mu <- log(alpha) + sigma <- diag(1000000, m) + p <- 8 # switch for interactive testing (max = 8) + # Fit model using sgdPL from AgRank with fixed adherence + ## - no iterations, just checking log-likelihood calculations + res <- sgdPL(as.matrix(R[seq(p),]), mu, sigma, rate = 0.1, + adherence = FALSE, maxiter = 0, tol = 1e-12, + start = c(mu, adherence[seq(p)]), decay = 1.001) + # Fit model using PlackettLuce + ## with normal prior to allow low p (BFGS by default) + mod_PL1 <- PlackettLuce(rankings = R[seq(p),], npseudo = 0, maxit = 0, + adherence = adherence[seq(p)], start = alpha, + normal = list(mu = mu, Sigma = sigma)) + ## N.B. - can't test L-BFGS with zero iterations + ## - iterative scaling not implemented with adherence + expect_equal(logLik(mod_PL1)[1], -res$value[1]) + # Same, now iterating to convergence + res <- sgdPL(as.matrix(R[seq(p),]), mu, sigma, rate = 0.1, + adherence = FALSE, maxiter = 8000, + tol = 1e-12, start = c(mu, adherence[seq(p)]), decay = 1.001) + mod_PL2 <- PlackettLuce(rankings = R[seq(p),], npseudo = 0, + adherence = adherence[seq(p)], start = alpha, + normal = list(mu = mu, Sigma = sigma)) + expect_equal(mod_PL2$logposterior, -tail(res$value, 1), + tolerance = 1e-5) + if (require(lbfgs)){ + mod_PL3 <- PlackettLuce(rankings = R[seq(p),], npseudo = 0, + method = "L-BFGS", + adherence = adherence[seq(p)], + start = alpha, + normal = list(mu = mu, Sigma = sigma)) + expect_equal(mod_PL3$logposterior, -tail(res$value, 1), + tolerance = 1e-5) + } +}) + +# with grouped rankings + +test_that('estimated adherence works for grouped_rankings [fake triples]', { + # each ranking is a separate group + G <- group(R, seq(nrow(R))) + mod1 <- PlackettLuce(rankings = R, npseudo = 0, + gamma = list(shape = 10, rate = 10)) + mod2 <- PlackettLuce(G, npseudo = 0, gamma = list(shape = 10, rate = 10)) + # remove bits we expect to be different + # iter can be different on some platform due to small difference in rowsums + nm <- setdiff(names(mod1), c("call", "iter")) + expect_equal(mod1[nm], mod2[nm]) + expect_equal(mod1$adherence[mod1$ranker], mod2$adherence[mod2$ranker]) + + # results should be same when fix to returned adherence + mod3 <- PlackettLuce(R, npseudo = 0, start = coef(mod1), method = "BFGS", + adherence = mod1$adherence) + mod4 <- PlackettLuce(G, npseudo = 0, start = coef(mod1), method = "BFGS", + adherence = mod2$adherence) + # BFGS always does another iteration to check converegence, + # iterative scaling has different check so would also iterate a little more + expect_equal(coef(mod1), coef(mod3), tolerance = 1e-4, + check.attributes = FALSE) + expect_equal(coef(mod2), coef(mod4), tolerance = 1e-4, + check.attributes = FALSE) + expect_equal(logLik(mod1), logLik(mod3), tolerance = 1e-8, + check.attributes = FALSE) + expect_equal(logLik(mod2), logLik(mod4), tolerance = 1e-8, + check.attributes = FALSE) +}) + +test_that('estimated adherence works w/ npseudo != 0 [fake triples]', { + mod1 <- PlackettLuce(rankings = R, + gamma = list(shape = 100, rate = 100)) + expect_known_value(mod1, + file = test_path("outputs/pl_adherence_pseudo.rds"), + version = 2) +}) + +test_that('default prior for adherence works [fake triples]', { + mod1 <- PlackettLuce(rankings = R, + gamma = list(shape = 10, rate = 10)) + mod2 <- PlackettLuce(rankings = R, gamma = TRUE) + # remove bits we expect to be different + nm <- setdiff(names(mod1), c("call")) + expect_equal(mod1[nm], mod2[nm]) +}) + +test_that('check on prior for adherence works [fake triples]', { + expect_warning(mod1 <- PlackettLuce(rankings = R, + gamma = list(shape = 100, rate = 10))) +}) + +data(beans, package = "PlackettLuce") + +# Fill in the missing ranking +beans$middle <- complete(beans[c("best", "worst")], + items = c("A", "B", "C")) + +# Use these names to decode the orderings of order 3 +order3 <- decode(beans[c("best", "middle", "worst")], + items = beans[c("variety_a", "variety_b", "variety_c")], + code = c("A", "B", "C")) + +# Convert these results to a vector and get the corresponding trial variety +outcome <- unlist(beans[c("var_a", "var_b", "var_c")]) +trial_variety <- unlist(beans[c("variety_a", "variety_b", "variety_c")]) + +# Create a data frame of the implied orderings of order 2 +order2 <- data.frame(Winner = ifelse(outcome == "Worse", + "Local", trial_variety), + Loser = ifelse(outcome == "Worse", + trial_variety, "Local"), + stringsAsFactors = FALSE, row.names = NULL) + +# Finally combine the rankings of order 2 and order 3 +R <- rbind(as.rankings(order3, input = "ordering"), + as.rankings(order2, input = "ordering")) + +# Group the rankings by the corresponding farm +G <- group(R, rep(seq_len(nrow(beans)), 4)) + +test_that('fixed adherence works for grouped_rankings [beans]', { + # adherence = 1 is same as no adherence + adherence <- rep(1L, nrow(beans)) + mod1 <- PlackettLuce(G) + mod2 <- PlackettLuce(G, adherence = adherence) + # remove bits we expect to be different + # iter can change on same platforms as rankings not aggregated in mod2 + nm <- setdiff(names(mod1), c("call", "adherence", "iter")) + expect_equal(mod1[nm], mod2[nm]) + # adherence != 1 for grouped same as ungrouped with replicated adherence + ranker_adherence <- seq(0.75, 1.25, length.out = nrow(beans)) + ranking_adherence <- rep(ranker_adherence, 4) + mod1 <- PlackettLuce(R, adherence = ranking_adherence) + mod2 <- PlackettLuce(G, adherence = ranker_adherence) + # remove bits we expect to be different + # iter can be different on some platform due to small difference in rowsums + nm <- setdiff(names(mod1), c("call", "adherence", "ranker", "iter")) + expect_equal(mod1[nm], mod2[nm]) + expect_equal(mod1$adherence[mod1$ranker], mod2$adherence[mod2$ranker]) +}) + + +R <- matrix(c(1, 2, 0, 0, + 4, 1, 2, 3, + 2, 1, 1, 1, + 1, 2, 3, 0, + 2, 1, 1, 0, + 1, 0, 3, 2), nrow = 6, byrow = TRUE) +colnames(R) <- c("apple", "banana", "orange", "pear") +R <- as.rankings(R) + +test_that('estimated adherence works for grouped_rankings [partial + ties]', { + w <- c(3, 2, 5, 4, 3, 7) + # replicates with exactly same adherence (does not converge) + mod1 <- suppressWarnings(PlackettLuce(R, npseudo = 0, method = "BFGS", + weights = w, + gamma = list(shape = 10, rate = 10))) + # replicates grouped together by ranker + G <- group(R[rep(seq(6), w),], index = rep(seq(6), w)) + mod2 <- suppressWarnings(PlackettLuce(rankings = G, npseudo = 0, + method = "BFGS", + gamma = list(shape = 10, rate = 10))) + # remove bits we expect to be different + # iter can be different on some platform due to small difference in rowsums + nm <- setdiff(names(mod1), + c("call", "rankings", "ranker", "weights", "iter")) + expect_equal(mod1[nm], mod2[nm]) +}) + + diff --git a/tests/testthat/test-agrank.R b/tests/testthat/test-agrank.R index ec4e37d..8309154 100644 --- a/tests/testthat/test-agrank.R +++ b/tests/testthat/test-agrank.R @@ -1,60 +1,60 @@ -context("implementation [MAP in PlackettLuce]") - -# Get reference agRank implementation -source(system.file(file.path("Reference_Implementations", "sgdPL.R"), - package = "PlackettLuce")) - -# Triple comparisons, no ties -R <- matrix(c(1, 2, 3, 0, - 0, 1, 2, 3, - 2, 1, 0, 3, - 1, 2, 3, 0, - 2, 0, 1, 3, - 1, 0, 3, 2), nrow = 6, byrow = TRUE) -colnames(R) <- c("apple", "banana", "orange", "pear") - -test_that("logLik matches agRank [fake triple comparisons]", { - # Fit model using sgdPL from AgRank - ## non-informative prior on log-worths (large variance) - m <- ncol(R) - n <- nrow(R) - mu <- rep(1, m) - sigma <- diag(1000000, m) - ## fit model without adherence parameter (adherence fixed to 1) - res <- sgdPL(R, mu, sigma, rate = 0.1, adherence = FALSE, maxiter = 8000, - tol = 1e-12, start = rep(1, m), decay = 1.001) - ###oscillating behaviour - ###plot(res$value, type = "l") - # Fit Plackett-Luce with standard maximum likelihood - mod_PL <- PlackettLuce(rankings = R, npseudo = 0, method = "BFGS") - ## lowish tolerance as stochastic gradient descent only approximate - expect_equal(logLik(mod_PL)[1], -tail(res$value, 1), - tolerance = 1e-6) -}) - -# Triple comparisons, no ties -prior <- list(mu = c(-0.05, -0.05, -2, -3), - Sigma = matrix(c(1, 0.5, 0.1, -0.5, - 0.5, 1.1, 0.1, 0.1, - 0.1, 0.1, 1.2, 0.2, - -0.5, 0.1, 0.2, 1.3), 4, 4, byrow = TRUE)) - -test_that("logLik matches agRank with normal prior [fake triple comparisons]", { - # Fit model using sgdPL from AgRank - ### N.B. as stochastic gradient, gradients are 1/nobs * full gradient - res <- sgdPL(R, prior$mu, prior$Sigma, rate = 0.1, adherence = FALSE, - maxiter = 8000, - tol = 1e-12, start = prior$mu, decay = 1.001) - ###oscillating behaviour - ###plot(res$value, type = "l") - # Fit Plackett-Luce with standard maximum likelihood (BFGS) - ## check gradient via - ## numDeriv::grad(function(par) obj_common(par), log(c(alpha, delta[-1]))) - mod_PL <- PlackettLuce(rankings = R, npseudo = 0, normal = prior, - start = exp(prior$mu)) - ## lowish tolerance as stochastic gradient descent only approximate - expect_equal(mod_PL$logposterior, -tail(res$value, 1), - tolerance = 1e-5) - expect_equal(mod_PL$logposterior, -tail(res$value, 1), - tolerance = 1e-5) -}) +context("implementation [MAP in PlackettLuce]") + +# Get reference agRank implementation +source(system.file(file.path("Reference_Implementations", "sgdPL.R"), + package = "PlackettLuce")) + +# Triple comparisons, no ties +R <- matrix(c(1, 2, 3, 0, + 0, 1, 2, 3, + 2, 1, 0, 3, + 1, 2, 3, 0, + 2, 0, 1, 3, + 1, 0, 3, 2), nrow = 6, byrow = TRUE) +colnames(R) <- c("apple", "banana", "orange", "pear") + +test_that("logLik matches agRank [fake triple comparisons]", { + # Fit model using sgdPL from AgRank + ## non-informative prior on log-worths (large variance) + m <- ncol(R) + n <- nrow(R) + mu <- rep(1, m) + sigma <- diag(1000000, m) + ## fit model without adherence parameter (adherence fixed to 1) + res <- sgdPL(R, mu, sigma, rate = 0.1, adherence = FALSE, maxiter = 8000, + tol = 1e-12, start = rep(1, m), decay = 1.001) + ###oscillating behaviour + ###plot(res$value, type = "l") + # Fit Plackett-Luce with standard maximum likelihood + mod_PL <- PlackettLuce(rankings = R, npseudo = 0, method = "BFGS") + ## lowish tolerance as stochastic gradient descent only approximate + expect_equal(logLik(mod_PL)[1], -tail(res$value, 1), + tolerance = 1e-6) +}) + +# Triple comparisons, no ties +prior <- list(mu = c(-0.05, -0.05, -2, -3), + Sigma = matrix(c(1, 0.5, 0.1, -0.5, + 0.5, 1.1, 0.1, 0.1, + 0.1, 0.1, 1.2, 0.2, + -0.5, 0.1, 0.2, 1.3), 4, 4, byrow = TRUE)) + +test_that("logLik matches agRank with normal prior [fake triple comparisons]", { + # Fit model using sgdPL from AgRank + ### N.B. as stochastic gradient, gradients are 1/nobs * full gradient + res <- sgdPL(R, prior$mu, prior$Sigma, rate = 0.1, adherence = FALSE, + maxiter = 8000, + tol = 1e-12, start = prior$mu, decay = 1.001) + ###oscillating behaviour + ###plot(res$value, type = "l") + # Fit Plackett-Luce with standard maximum likelihood (BFGS) + ## check gradient via + ## numDeriv::grad(function(par) obj_common(par), log(c(alpha, delta[-1]))) + mod_PL <- PlackettLuce(rankings = R, npseudo = 0, normal = prior, + start = exp(prior$mu)) + ## lowish tolerance as stochastic gradient descent only approximate + expect_equal(mod_PL$logposterior, -tail(res$value, 1), + tolerance = 1e-5) + expect_equal(mod_PL$logposterior, -tail(res$value, 1), + tolerance = 1e-5) +}) diff --git a/tests/testthat/test-algorithms.R b/tests/testthat/test-algorithms.R index 0692fe4..218fcb6 100644 --- a/tests/testthat/test-algorithms.R +++ b/tests/testthat/test-algorithms.R @@ -1,41 +1,41 @@ -context("implementation [algorithms]") - -## Using the artificial example in ?PlackettLuce -R <- matrix(c(1, 2, 0, 0, - 4, 1, 2, 3, - 2, 1, 1, 1, - 1, 2, 3, 0, - 2, 1, 1, 0, - 1, 0, 3, 2), nrow = 6, byrow = TRUE) -colnames(R) <- c("apple", "banana", "orange", "pear") - -tol <- 1e-6 - -model_fruits1 <- PlackettLuce(rankings = R) -model_fruits2 <- PlackettLuce(rankings = R, method = "BFGS", - control = list(reltol = 1e-10)) - -test_that("BFGS gives same log-likelihood as iterative scaling", { - expect_equal(logLik(model_fruits1), logLik(model_fruits2), - tolerance = tol) -}) - -test_that("BFGS gives same coef as iterative scaling", { - expect_equal(coef(model_fruits1), coef(model_fruits2), - tolerance = tol, check.attributes = FALSE) -}) - -if (require(lbfgs)){ - model_fruits3 <- PlackettLuce(rankings = R, method = "L-BFGS", - gtol = 0.2) - - test_that("L-BFGS gives same log-likelihood as iterative scaling", { - expect_equal(logLik(model_fruits1), logLik(model_fruits3), - tolerance = tol) - }) - - test_that("L-BFGS gives same coef as iterative scaling", { - expect_equal(coef(model_fruits1), coef(model_fruits3), - tolerance = tol, check.attributes = FALSE) - }) -} +context("implementation [algorithms]") + +## Using the artificial example in ?PlackettLuce +R <- matrix(c(1, 2, 0, 0, + 4, 1, 2, 3, + 2, 1, 1, 1, + 1, 2, 3, 0, + 2, 1, 1, 0, + 1, 0, 3, 2), nrow = 6, byrow = TRUE) +colnames(R) <- c("apple", "banana", "orange", "pear") + +tol <- 1e-6 + +model_fruits1 <- PlackettLuce(rankings = R) +model_fruits2 <- PlackettLuce(rankings = R, method = "BFGS", + control = list(reltol = 1e-10)) + +test_that("BFGS gives same log-likelihood as iterative scaling", { + expect_equal(logLik(model_fruits1), logLik(model_fruits2), + tolerance = tol) +}) + +test_that("BFGS gives same coef as iterative scaling", { + expect_equal(coef(model_fruits1), coef(model_fruits2), + tolerance = tol, check.attributes = FALSE) +}) + +if (require(lbfgs)){ + model_fruits3 <- PlackettLuce(rankings = R, method = "L-BFGS", + gtol = 0.2) + + test_that("L-BFGS gives same log-likelihood as iterative scaling", { + expect_equal(logLik(model_fruits1), logLik(model_fruits3), + tolerance = tol) + }) + + test_that("L-BFGS gives same coef as iterative scaling", { + expect_equal(coef(model_fruits1), coef(model_fruits3), + tolerance = tol, check.attributes = FALSE) + }) +} diff --git a/tests/testthat/test-estfun.R b/tests/testthat/test-estfun.R index 32d032b..d34f468 100644 --- a/tests/testthat/test-estfun.R +++ b/tests/testthat/test-estfun.R @@ -1,62 +1,62 @@ -context("implementation [estfun in PlackettLuce]") - -## The artificial example in ?PlackettLuce -R <- matrix(c(1, 2, 0, 0, - 4, 1, 2, 3, - 2, 1, 1, 1, - 1, 2, 3, 0, - 2, 1, 1, 0, - 1, 0, 3, 2), nrow = 6, byrow = TRUE) -colnames(R) <- c("apple", "banana", "orange", "pear") - -test_that("estfun matches agRank, fixed adherence [fake triple comparisons]", { - m <- ncol(R) - n <- nrow(R) - adherence <- seq(0.75, 1.25, length.out = n) - alpha <- seq(0.25, 0.75, length.out = m) - delta <- seq(0.5, 1.5, length.out = 2) - # Fit model using PlackettLuce - mod_PL <- PlackettLuce(rankings = R, npseudo = 0, - adherence = adherence, start = c(alpha, delta)) - # sum over all sets st object i is in selected set adherence/size - A <- c(sum(adherence[c(1, 4, 6)]), - sum(adherence[c(2, 3, 4, 5)]*c(1, 1/3, 1, 1/2)), - sum(adherence[c(2, 3, 5)]*c(1, 1/3, 1/2)), - sum(adherence[c(2, 3, 6)]*c(1, 1/3, 1))) - # number of sets with cardinality d - B <- c(8, 1, 1) - # expectations - R2 <- matrix(c(2, 1, 3, 4, - 1, 4, 3, 2, - 1, 2, 3, 4, - 3, 2, 1, 4, - 1, 2, 3, 4, - 3, 4, 1, 2), nrow = 6, byrow = TRUE) - G <- list(NULL, c(1, 2, 4, 6), c(2, 4, 5, 6), c(2, 3)) - W <- list(NULL, c(1, 1, 1, 1), c(1, 1, 1, 1), c(1, 1)) - par <- mod_PL$coefficients - fit <- expectation(c("alpha", "delta"), par[1:m], c(1.0, par[-(1:m)]), - adherence, m, 1:3, 2:4, R2, G, W) - # score wrt log-parameters - score <- score_common(par = par, N = m, - mu = NULL, Kinv = NULL, A = A, B = B, fit = fit)*par - # score = c(A - fit$expA, B[-1] - fit$expB*par[-(1:m)]) - expect_equal(colSums(estfun(mod_PL, ref = NULL)), score) - # model with weights - w <- c(2, 7, 3, 4, 10, 5) - mod_PL <- PlackettLuce(rankings = R, npseudo = 0, weights = w, - adherence = adherence, start = c(alpha, delta)) - A <- c(sum(w[c(1, 4, 6)] * adherence[c(1, 4, 6)]), - sum(w[c(2, 3, 4, 5)] * adherence[c(2, 3, 4, 5)]*c(1, 1/3, 1, 1/2)), - sum(w[c(2, 3, 5)] * adherence[c(2, 3, 5)]*c(1, 1/3, 1/2)), - sum(w[c(2, 3, 6)] * adherence[c(2, 3, 6)]*c(1, 1/3, 1))) - B <- c(sum(w[c(1, 2, 2, 2, 4, 4, 6, 6)]), w[5], w[3]) - W <- list(NULL, w[G[[2]]], w[G[[3]]], w[G[[4]]]) - par <- mod_PL$coefficients - fit <- expectation(c("alpha", "delta"), par[1:m], c(1.0, par[-(1:m)]), - adherence, m, 1:3, 2:4, R2, G, W) - score <- score_common(par = par, N = m, - mu = NULL, Kinv = NULL, A = A, B = B, fit = fit)*par - - expect_equal(colSums(estfun(mod_PL, ref = NULL)), score) -}) +context("implementation [estfun in PlackettLuce]") + +## The artificial example in ?PlackettLuce +R <- matrix(c(1, 2, 0, 0, + 4, 1, 2, 3, + 2, 1, 1, 1, + 1, 2, 3, 0, + 2, 1, 1, 0, + 1, 0, 3, 2), nrow = 6, byrow = TRUE) +colnames(R) <- c("apple", "banana", "orange", "pear") + +test_that("estfun matches agRank, fixed adherence [fake triple comparisons]", { + m <- ncol(R) + n <- nrow(R) + adherence <- seq(0.75, 1.25, length.out = n) + alpha <- seq(0.25, 0.75, length.out = m) + delta <- seq(0.5, 1.5, length.out = 2) + # Fit model using PlackettLuce + mod_PL <- PlackettLuce(rankings = R, npseudo = 0, + adherence = adherence, start = c(alpha, delta)) + # sum over all sets st object i is in selected set adherence/size + A <- c(sum(adherence[c(1, 4, 6)]), + sum(adherence[c(2, 3, 4, 5)]*c(1, 1/3, 1, 1/2)), + sum(adherence[c(2, 3, 5)]*c(1, 1/3, 1/2)), + sum(adherence[c(2, 3, 6)]*c(1, 1/3, 1))) + # number of sets with cardinality d + B <- c(8, 1, 1) + # expectations + R2 <- matrix(c(2, 1, 3, 4, + 1, 4, 3, 2, + 1, 2, 3, 4, + 3, 2, 1, 4, + 1, 2, 3, 4, + 3, 4, 1, 2), nrow = 6, byrow = TRUE) + G <- list(NULL, c(1, 2, 4, 6), c(2, 4, 5, 6), c(2, 3)) + W <- list(NULL, c(1, 1, 1, 1), c(1, 1, 1, 1), c(1, 1)) + par <- mod_PL$coefficients + fit <- expectation(c("alpha", "delta"), par[1:m], c(1.0, par[-(1:m)]), + adherence, m, 1:3, 2:4, R2, G, W) + # score wrt log-parameters + score <- score_common(par = par, N = m, + mu = NULL, Kinv = NULL, A = A, B = B, fit = fit)*par + # score = c(A - fit$expA, B[-1] - fit$expB*par[-(1:m)]) + expect_equal(colSums(estfun(mod_PL, ref = NULL)), score) + # model with weights + w <- c(2, 7, 3, 4, 10, 5) + mod_PL <- PlackettLuce(rankings = R, npseudo = 0, weights = w, + adherence = adherence, start = c(alpha, delta)) + A <- c(sum(w[c(1, 4, 6)] * adherence[c(1, 4, 6)]), + sum(w[c(2, 3, 4, 5)] * adherence[c(2, 3, 4, 5)]*c(1, 1/3, 1, 1/2)), + sum(w[c(2, 3, 5)] * adherence[c(2, 3, 5)]*c(1, 1/3, 1/2)), + sum(w[c(2, 3, 6)] * adherence[c(2, 3, 6)]*c(1, 1/3, 1))) + B <- c(sum(w[c(1, 2, 2, 2, 4, 4, 6, 6)]), w[5], w[3]) + W <- list(NULL, w[G[[2]]], w[G[[3]]], w[G[[4]]]) + par <- mod_PL$coefficients + fit <- expectation(c("alpha", "delta"), par[1:m], c(1.0, par[-(1:m)]), + adherence, m, 1:3, 2:4, R2, G, W) + score <- score_common(par = par, N = m, + mu = NULL, Kinv = NULL, A = A, B = B, fit = fit)*par + + expect_equal(colSums(estfun(mod_PL, ref = NULL)), score) +}) diff --git a/tests/testthat/test-log-likelihood.R b/tests/testthat/test-log-likelihood.R index 5d2fb95..2301cec 100644 --- a/tests/testthat/test-log-likelihood.R +++ b/tests/testthat/test-log-likelihood.R @@ -1,200 +1,200 @@ -context("implementation [log-likelihood and coefficients]") - -## Get the legacy implementation -source_files <- dir(system.file("PlackettLuce0", package = "PlackettLuce"), - full.names = TRUE) - -## N.B. fitted0 requires tibble but unused in tests -for (file0 in source_files) source(file0) - -## Get reference implementations (Davidson, MM algorithms) -source_files <- dir(system.file("Reference_Implementations", - package = "PlackettLuce"), - full.names = TRUE) - -for (file0 in source_files) source(file0) - -coef_tol <- 1e-06 -loglik_tol <- 1e-12 - -## Extractor for the log-likelihood of poisson gnm's adjusting for the -mu part -## which is fixed due to the parameter space restriction to match row totals -logLik_poisson.gnm <- function(x) { - n <- nlevels(x$eliminate) - ll <- logLik(x) + n - attr(ll, "df") <- attr(ll, "df") - n - ll -} - - -## The artificial example in ?PlackettLuce -R <- matrix(c(1, 2, 0, 0, - 4, 1, 2, 3, - 2, 1, 1, 1, - 1, 2, 3, 0, - 2, 1, 1, 0, - 1, 0, 3, 2), nrow = 6, byrow = TRUE) -colnames(R) <- c("apple", "banana", "orange", "pear") -R <- as.rankings(R) -if (require("Matrix") & requireNamespace("igraph") & - requireNamespace("RSpectra")) { - model0 <- PlackettLuce0(rankings = R) - model1 <- PlackettLuce(rankings = R, npseudo = 0) - test_that("coef match legacy code [fake partial rankings with ties]", { - # coefficients - expect_equal(as.vector(coef(model0)), - as.vector(coef(model1)), tolerance = coef_tol) - }) - test_that("logLik matches legacy code [fake partial rankings with ties]", { - # log-likelihood - expect_equal(logLik(model0), logLik(model1), tolerance = loglik_tol) - }) - ## fit null model via glm - dat <- poisson_rankings(R, aggregate = FALSE, as.data.frame = TRUE) - test_that("null deviance matches glm [fake partial rankings with ties]", { - model2 <- glm(y ~ -1 + z, family = poisson, data = dat) - ## null deviance - expect_equal(-2*model1$null.loglik, model2$deviance) - ## null df - expect_equal(model1$df.null, model2$df.residual) - }) -} - - -## simple BT model -M <- matrix(c(1, 2, - 3, 1, - 1, 4, - 2, 1, - 2, 3, - 4, 3), nrow = 6, byrow = TRUE) -R <- as.rankings(M, "ordering") -mod1 <- PlackettLuce(R, npseudo = 0) -dat <- poisson_rankings(R, aggregate = FALSE, as.data.frame = TRUE) -if (require(gnm) & require(BradleyTerry2)){ - ## fit loglinear model - mod2 <- gnm(y ~ -1 + X, family = poisson, eliminate = z, data = dat, - constrain = 1) - BT_data <- data.frame(p1 = factor(M[,1]), p2 = factor(M[,2])) - mod3 <- BTm(rep(1, 6), p1, p2, data = BT_data) - test_that("estimates match gnm, BTm [fake paired comparisons]", { - ## coefficients - expect_equal(unname(coef(mod1)[-1]), unname(coef(mod2)[-1]), - tolerance = coef_tol) - expect_equal(unname(coef(mod2)[-1]), unname(coef(mod3)), - tolerance = coef_tol) - }) - test_that("logLik matches gnm, BTm [fake paired comparisons]", { - ## log-likelihood - expect_equal(logLik(mod1), logLik(mod3), check.attributes = FALSE, - tolerance = 1e-12) - expect_equal(attr(logLik(mod3), "df"), - attr(logLik_poisson.gnm(mod2), "df")) - expect_equal(logLik(mod3), logLik_poisson.gnm(mod2), - check.attributes = FALSE,tolerance = 1e-12) - }) - ## fit null model via glm - mod5 <- glm(y ~ -1 + z, family = poisson, data = dat) - test_that("null deviance matches glm, BTm [fake paired comparisons]", { - ## null deviance - expect_equal(-2*mod1$null.loglik, mod3$null.deviance) - expect_equal(-2*mod1$null.loglik, mod5$deviance) - ## null df - expect_equal(mod1$df.null, mod3$df.null) - expect_equal(mod1$df.null, mod5$df.residual) - }) -} - - -## Icehockey data -if (require(BradleyTerry2)){ - ## loglinear model with BTm - icehockey2 <- subset(icehockey, result != 0.5) #remove ties - mod_BT <- BTm(outcome = result, - player1 = visitor, player2 = opponent, - id = "team", data = icehockey2) - ## compare to PlackettLuce - R <- matrix(0, nrow = nrow(icehockey2), - ncol = nlevels(icehockey2$visitor)) - R[cbind(seq_len(nrow(icehockey2)), icehockey2$visitor)] <- - 2 - icehockey2$result - R[cbind(seq_len(nrow(icehockey2)), icehockey2$opponent)] <- - icehockey2$result + 1 - ## needs 170 iterations - mod_PL <- PlackettLuce(R, npseudo = 0) - test_that("estimates match BTm [icehockey]", { - expect_equal(unname(coef(mod_BT)), unname(coef(mod_PL))[-1], - tolerance = coef_tol) - }) - test_that("log-likelihood matches BTm [icehockey]", { - expect_equal(logLik(mod_BT), logLik(mod_PL), check.attributes = FALSE, - tolerance = loglik_tol) - expect_equal(attr(logLik(mod_BT), "df"), attr(logLik(mod_PL), "df")) - }) -} - - -## partial rankings, no ties -M <- matrix(c(1, 2, 0, 0, - 3, 1, 4, 0, - 1, 4, 0, 0, - 2, 1, 4, 3, - 2, 3, 4, 0, - 1, 2, 3, 0), nrow = 6, byrow = TRUE) -## via Hunter's MM (reference implementation) -gamma <- PL(M) -lambda <- log(c(gamma/sum(gamma))) -lambda <- lambda - lambda[1] -## via PlackettLuce -R <- as.rankings(M, "ordering") -mod1 <- PlackettLuce(R, npseudo = 0) -if (require(gnm)){ - ## fit loglinear model - dat <- dat <- poisson_rankings(R, aggregate = FALSE, as.data.frame = TRUE) - mod2 <- gnm(y ~ -1 + X, family = poisson, eliminate = z, data = dat, - constrain = 1) - test_that("coef match Hunter's MM, gnm [fake partial rankings no ties]", { - expect_equal(unname(coef(mod1))[-1], unname(coef(mod2))[-1], - tolerance = coef_tol) - expect_equal(unname(c(coef(mod1))), lambda, tolerance = coef_tol) - }) - test_that("logLik matches gnm [fake partial rankings no ties]", - { - expect_equal(logLik(mod1), logLik_poisson.gnm(mod2), - check.attributes = FALSE, tolerance = loglik_tol) - }) -} - - -## Nascar example from Hunter -if (require(StatRank)){ - ## 36 races. Partial rankings of length (42 or 43), ranking 83 drivers in - ## 1st to 83rd place (puts zero for 43rd or 44th to last place). No ties. - data(Data.Nascar) - ## StatRank PL function takes ~10min; not sure how to compare coef - ## a <- Estimation.PL.MLE(Data.Nascar)$Mean - ## via Hunter's MM (reference implementation) - gamma <- PL(Data.Nascar) - lambda <- log(c(gamma/sum(gamma))) - lambda <- lambda - lambda[1] - ## via PlackettLuce - R <- as.rankings(Data.Nascar, input = "ordering") - mod1 <- PlackettLuce(R, npseudo = 0) - ## fairly quick - A. Cameron (driver with ID 1) is fixed at zero - dat <- PlackettLuce:::poisson_rankings(R, aggregate = FALSE, - as.data.frame = TRUE) - mod2 <- gnm(y ~ -1 + X, family = poisson, eliminate = z, data = dat, - constrain = 1) - test_that("coef match Hunter's MM, gnm [nascar]", { - expect_equal(unname(coef(mod1))[-1], unname(coef(mod2))[-1], - tolerance = coef_tol) - expect_equal(unname(c(coef(mod1))), lambda, tolerance = coef_tol, - check.attributes = FALSE) - }) - test_that("logLik matches gnm [nascar]", - { - expect_equal(logLik(mod1), logLik_poisson.gnm(mod2), - check.attributes = FALSE, tolerance = loglik_tol) - }) -} - +context("implementation [log-likelihood and coefficients]") + +## Get the legacy implementation +source_files <- dir(system.file("PlackettLuce0", package = "PlackettLuce"), + full.names = TRUE) + +## N.B. fitted0 requires tibble but unused in tests +for (file0 in source_files) source(file0) + +## Get reference implementations (Davidson, MM algorithms) +source_files <- dir(system.file("Reference_Implementations", + package = "PlackettLuce"), + full.names = TRUE) + +for (file0 in source_files) source(file0) + +coef_tol <- 1e-06 +loglik_tol <- 1e-12 + +## Extractor for the log-likelihood of poisson gnm's adjusting for the -mu part +## which is fixed due to the parameter space restriction to match row totals +logLik_poisson.gnm <- function(x) { + n <- nlevels(x$eliminate) + ll <- logLik(x) + n + attr(ll, "df") <- attr(ll, "df") - n + ll +} + + +## The artificial example in ?PlackettLuce +R <- matrix(c(1, 2, 0, 0, + 4, 1, 2, 3, + 2, 1, 1, 1, + 1, 2, 3, 0, + 2, 1, 1, 0, + 1, 0, 3, 2), nrow = 6, byrow = TRUE) +colnames(R) <- c("apple", "banana", "orange", "pear") +R <- as.rankings(R) +if (require("Matrix") & requireNamespace("igraph") & + requireNamespace("RSpectra")) { + model0 <- PlackettLuce0(rankings = R) + model1 <- PlackettLuce(rankings = R, npseudo = 0) + test_that("coef match legacy code [fake partial rankings with ties]", { + # coefficients + expect_equal(as.vector(coef(model0)), + as.vector(coef(model1)), tolerance = coef_tol) + }) + test_that("logLik matches legacy code [fake partial rankings with ties]", { + # log-likelihood + expect_equal(logLik(model0), logLik(model1), tolerance = loglik_tol) + }) + ## fit null model via glm + dat <- poisson_rankings(R, aggregate = FALSE, as.data.frame = TRUE) + test_that("null deviance matches glm [fake partial rankings with ties]", { + model2 <- glm(y ~ -1 + z, family = poisson, data = dat) + ## null deviance + expect_equal(-2*model1$null.loglik, model2$deviance) + ## null df + expect_equal(model1$df.null, model2$df.residual) + }) +} + + +## simple BT model +M <- matrix(c(1, 2, + 3, 1, + 1, 4, + 2, 1, + 2, 3, + 4, 3), nrow = 6, byrow = TRUE) +R <- as.rankings(M, "ordering") +mod1 <- PlackettLuce(R, npseudo = 0) +dat <- poisson_rankings(R, aggregate = FALSE, as.data.frame = TRUE) +if (require(gnm) & require(BradleyTerry2)){ + ## fit loglinear model + mod2 <- gnm(y ~ -1 + X, family = poisson, eliminate = z, data = dat, + constrain = 1) + BT_data <- data.frame(p1 = factor(M[,1]), p2 = factor(M[,2])) + mod3 <- BTm(rep(1, 6), p1, p2, data = BT_data) + test_that("estimates match gnm, BTm [fake paired comparisons]", { + ## coefficients + expect_equal(unname(coef(mod1)[-1]), unname(coef(mod2)[-1]), + tolerance = coef_tol) + expect_equal(unname(coef(mod2)[-1]), unname(coef(mod3)), + tolerance = coef_tol) + }) + test_that("logLik matches gnm, BTm [fake paired comparisons]", { + ## log-likelihood + expect_equal(logLik(mod1), logLik(mod3), check.attributes = FALSE, + tolerance = 1e-12) + expect_equal(attr(logLik(mod3), "df"), + attr(logLik_poisson.gnm(mod2), "df")) + expect_equal(logLik(mod3), logLik_poisson.gnm(mod2), + check.attributes = FALSE,tolerance = 1e-12) + }) + ## fit null model via glm + mod5 <- glm(y ~ -1 + z, family = poisson, data = dat) + test_that("null deviance matches glm, BTm [fake paired comparisons]", { + ## null deviance + expect_equal(-2*mod1$null.loglik, mod3$null.deviance) + expect_equal(-2*mod1$null.loglik, mod5$deviance) + ## null df + expect_equal(mod1$df.null, mod3$df.null) + expect_equal(mod1$df.null, mod5$df.residual) + }) +} + + +## Icehockey data +if (require(BradleyTerry2)){ + ## loglinear model with BTm + icehockey2 <- subset(icehockey, result != 0.5) #remove ties + mod_BT <- BTm(outcome = result, + player1 = visitor, player2 = opponent, + id = "team", data = icehockey2) + ## compare to PlackettLuce + R <- matrix(0, nrow = nrow(icehockey2), + ncol = nlevels(icehockey2$visitor)) + R[cbind(seq_len(nrow(icehockey2)), icehockey2$visitor)] <- + 2 - icehockey2$result + R[cbind(seq_len(nrow(icehockey2)), icehockey2$opponent)] <- + icehockey2$result + 1 + ## needs 170 iterations + mod_PL <- PlackettLuce(R, npseudo = 0) + test_that("estimates match BTm [icehockey]", { + expect_equal(unname(coef(mod_BT)), unname(coef(mod_PL))[-1], + tolerance = coef_tol) + }) + test_that("log-likelihood matches BTm [icehockey]", { + expect_equal(logLik(mod_BT), logLik(mod_PL), check.attributes = FALSE, + tolerance = loglik_tol) + expect_equal(attr(logLik(mod_BT), "df"), attr(logLik(mod_PL), "df")) + }) +} + + +## partial rankings, no ties +M <- matrix(c(1, 2, 0, 0, + 3, 1, 4, 0, + 1, 4, 0, 0, + 2, 1, 4, 3, + 2, 3, 4, 0, + 1, 2, 3, 0), nrow = 6, byrow = TRUE) +## via Hunter's MM (reference implementation) +gamma <- PL(M) +lambda <- log(c(gamma/sum(gamma))) +lambda <- lambda - lambda[1] +## via PlackettLuce +R <- as.rankings(M, "ordering") +mod1 <- PlackettLuce(R, npseudo = 0) +if (require(gnm)){ + ## fit loglinear model + dat <- dat <- poisson_rankings(R, aggregate = FALSE, as.data.frame = TRUE) + mod2 <- gnm(y ~ -1 + X, family = poisson, eliminate = z, data = dat, + constrain = 1) + test_that("coef match Hunter's MM, gnm [fake partial rankings no ties]", { + expect_equal(unname(coef(mod1))[-1], unname(coef(mod2))[-1], + tolerance = coef_tol) + expect_equal(unname(c(coef(mod1))), lambda, tolerance = coef_tol) + }) + test_that("logLik matches gnm [fake partial rankings no ties]", + { + expect_equal(logLik(mod1), logLik_poisson.gnm(mod2), + check.attributes = FALSE, tolerance = loglik_tol) + }) +} + + +## Nascar example from Hunter +if (require(StatRank)){ + ## 36 races. Partial rankings of length (42 or 43), ranking 83 drivers in + ## 1st to 83rd place (puts zero for 43rd or 44th to last place). No ties. + data(Data.Nascar) + ## StatRank PL function takes ~10min; not sure how to compare coef + ## a <- Estimation.PL.MLE(Data.Nascar)$Mean + ## via Hunter's MM (reference implementation) + gamma <- PL(Data.Nascar) + lambda <- log(c(gamma/sum(gamma))) + lambda <- lambda - lambda[1] + ## via PlackettLuce + R <- as.rankings(Data.Nascar, input = "ordering") + mod1 <- PlackettLuce(R, npseudo = 0) + ## fairly quick - A. Cameron (driver with ID 1) is fixed at zero + dat <- PlackettLuce:::poisson_rankings(R, aggregate = FALSE, + as.data.frame = TRUE) + mod2 <- gnm(y ~ -1 + X, family = poisson, eliminate = z, data = dat, + constrain = 1) + test_that("coef match Hunter's MM, gnm [nascar]", { + expect_equal(unname(coef(mod1))[-1], unname(coef(mod2))[-1], + tolerance = coef_tol) + expect_equal(unname(c(coef(mod1))), lambda, tolerance = coef_tol, + check.attributes = FALSE) + }) + test_that("logLik matches gnm [nascar]", + { + expect_equal(logLik(mod1), logLik_poisson.gnm(mod2), + check.attributes = FALSE, tolerance = loglik_tol) + }) +} + diff --git a/tests/testthat/test-pseudo.R b/tests/testthat/test-pseudo.R index f3848e3..6d40df1 100644 --- a/tests/testthat/test-pseudo.R +++ b/tests/testthat/test-pseudo.R @@ -1,195 +1,195 @@ -context("implementation [pseudo-data]") - -## Get the legacy implementation -source_files <- dir(system.file("PlackettLuce0", package = "PlackettLuce"), - full.names = TRUE) - -## N.B. fitted0 requires tibble but unused in tests -for (file0 in source_files) source(file0) - - -coef_tol <- 1e-06 -loglik_tol <- 1e-07 - -## Extractor for the log-likelihood of poisson gnm's adjusting for the -mu part -## which is fixed due to the parameter space restriction to match row totals -logLik_poisson.gnm <- function(x) { - n <- nlevels(x$eliminate) - ll <- logLik(x) + n - attr(ll, "df") <- attr(ll, "df") - n - ll -} - -## Weakly connected network -X <- matrix(c(1, 2, 0, 0, - 2, 1, 3, 0, - 0, 0, 1, 2, - 0, 0, 2, 1), ncol = 4, byrow = TRUE) -X <- as.rankings(X) - -if (require(Matrix)){ - model0 <- PlackettLuce0(rankings = X, network = "pseudodata") - model1 <- PlackettLuce(rankings = X, npseudo = 1) - test_that("coef match legacy code [weakly connected network]", { - # coefficients - expect_equal(as.vector(coef(model0)), - as.vector(coef(model1)), tolerance = coef_tol) - }) - test_that("logLik matches legacy code [weakly connected network]", { - # log-likelihood - expect_equal(logLik(model0), logLik(model1), tolerance = loglik_tol) - }) -} - -G <- group(X, c(1, 1, 2, 2)) -model2 <- PlackettLuce(rankings = G, npseudo = 1) -test_that("pseudodata works with grouped_rankings [weakly connected network]", { - expect_equal(coef(model1), coef(model2), tolerance = coef_tol) - expect_equal(logLik(model1), logLik(model2), tolerance = loglik_tol) -}) - -model3 <- PlackettLuce(rankings = G, npseudo = 0.5) -if (require(gnm) & require(sandwich)){ - ## add pseudodata - N <- ncol(X) - X2 <- cbind(X, "NULL" = 0) - pseudo <- matrix(0, nrow = 2*N, ncol = N + 1) - pseudo[, N + 1] <- 1:2 - pseudo[cbind(seq_len(nrow(pseudo)), - rep(seq_len(N), each = 2))] <- 2:1 - X2 <- rbind(pseudo, X2) - w <- c(rep.int(0.5, nrow(pseudo)), rep.int(1, nrow(X2))) - dat <- PlackettLuce:::poisson_rankings(X2, weights = w, aggregate = FALSE, - as.data.frame = TRUE) - ## fit log-linear model with added pseudodata - model4 <- gnm(y ~ -1 + X, family = poisson, eliminate = z, data = dat, - constrain = 1, weights = w) - test_that("non-integer pseudo data works [weakly connected network]", - { - # coef - expect_equal(as.vector(coef(model3)), - as.vector(parameters(model4)[-(N + 1)]), - tolerance = coef_tol) - # likelihood contributions - keep <- !dat$z %in% seq_len(2*N) - expect_equal(unname(colSums(estfun(model3))), - unname(colSums(estfun(model4)[keep, - N])), - tolerance = loglik_tol) - # rank - expect_equal(model3$rank, - model4$rank - nlevels(dat$z) - 1) - # df.residual - # rank - expect_equal(model3$df.residual, - sum(keep) - 2*N) - - }) -} - -model5 <- PlackettLuce(rankings = G, npseudo = 0.5, method = "BFGS", - control = list(reltol = 1e-10)) -test_that("pseudo data works with BFGS [weakly connected network]", - { - # coef - expect_equivalent(coef(model3), coef(model5), - tolerance = coef_tol) - # log-likelihood - expect_equal(logLik(model3), logLik(model5), - tolerance = loglik_tol) - }) - -model6 <- PlackettLuce(rankings = G, npseudo = 0.5, method = "BFGS", - control = list(reltol = 1e-10)) -test_that("pseudo data works with L-BFGS [weakly connected network]", - { - # coef - expect_equivalent(coef(model3), coef(model6), - tolerance = coef_tol) - # log-likelihood - expect_equal(logLik(model3), logLik(model6), - tolerance = loglik_tol) - }) - - - -## simple BT model -R <- matrix(c(1, 2, 0, 0, - 2, 0, 1, 0, - 1, 0, 0, 2, - 2, 1, 0, 0, - 0, 1, 2, 0, - 0, 0, 2, 0), byrow = TRUE, ncol = 4, - dimnames = list(NULL, letters[1:4])) - -test_that("disconnected network causes error [one always loses]", - { - # error for discinnected network - expect_error(mod <- PlackettLuce(R, npseudo = 0)) - # no error for connected subset - expect_error(mod <- PlackettLuce(R[, 1:3], npseudo = 0), NA) - }) - -test_that("disconnected network works with pseudodata [one always loses]", - { - expect_error(mod <- PlackettLuce(R), NA) - }) - -# weakly connected clusters -X <- matrix(c(1, 2, 0, 0, - 2, 1, 3, 0, - 0, 0, 1, 2, - 0, 0, 2, 1), ncol = 4, byrow = TRUE) -R <- as.rankings(X) - -test_that("disconnected network causes error [weakly connected clusters]", - { - expect_error(mod <- PlackettLuce(R, npseudo = 0)) - }) - -# disconnected clusters -X <- matrix(c(1, 2, 0, 0, - 2, 1, 0, 0, - 0, 0, 1, 2, - 0, 0, 2, 1), ncol = 4, byrow = TRUE) -R <- as.rankings(X) - -test_that("disconnected network causes error [disconnected clusters]", - { - expect_error(mod <- PlackettLuce(R, npseudo = 0)) - }) - -test_that("disconnected network works with pseudodata [disconnected clusters]", - { - expect_error(mod <- PlackettLuce(R), NA) - }) - -# two weakly connected items: -# item 1 always loses; item 4 only wins against item 1 -X <- matrix(c(4, 1, 2, 3, - 0, 2, 1, 3), nr = 2, byrow = TRUE) -R <- as.rankings(X) - -test_that("disconnected network causes error [weakly connected items]", - { - expect_error(mod <- PlackettLuce(R, npseudo = 0)) - }) - -test_that("disconnected network works with pseudodata [weakly connected items]", - { - expect_error(mod <- PlackettLuce(R), NA) - }) - -# item 1 always wins; item 4 always loses -X <- matrix(c(1, 2, 3, 4, - 1, 3, 2, 4), nr = 2, byrow = TRUE) -R <- as.rankings(X) - -test_that("disconnected network causes error [1 wins; 4 loses]", - { - expect_error(mod <- PlackettLuce(R, npseudo = 0)) - }) - -test_that("disconnected network works with pseudodata [1 wins; 4 loses]", - { - expect_error(mod <- PlackettLuce(R), NA) - }) +context("implementation [pseudo-data]") + +## Get the legacy implementation +source_files <- dir(system.file("PlackettLuce0", package = "PlackettLuce"), + full.names = TRUE) + +## N.B. fitted0 requires tibble but unused in tests +for (file0 in source_files) source(file0) + + +coef_tol <- 1e-06 +loglik_tol <- 1e-07 + +## Extractor for the log-likelihood of poisson gnm's adjusting for the -mu part +## which is fixed due to the parameter space restriction to match row totals +logLik_poisson.gnm <- function(x) { + n <- nlevels(x$eliminate) + ll <- logLik(x) + n + attr(ll, "df") <- attr(ll, "df") - n + ll +} + +## Weakly connected network +X <- matrix(c(1, 2, 0, 0, + 2, 1, 3, 0, + 0, 0, 1, 2, + 0, 0, 2, 1), ncol = 4, byrow = TRUE) +X <- as.rankings(X) + +if (require(Matrix)){ + model0 <- PlackettLuce0(rankings = X, network = "pseudodata") + model1 <- PlackettLuce(rankings = X, npseudo = 1) + test_that("coef match legacy code [weakly connected network]", { + # coefficients + expect_equal(as.vector(coef(model0)), + as.vector(coef(model1)), tolerance = coef_tol) + }) + test_that("logLik matches legacy code [weakly connected network]", { + # log-likelihood + expect_equal(logLik(model0), logLik(model1), tolerance = loglik_tol) + }) +} + +G <- group(X, c(1, 1, 2, 2)) +model2 <- PlackettLuce(rankings = G, npseudo = 1) +test_that("pseudodata works with grouped_rankings [weakly connected network]", { + expect_equal(coef(model1), coef(model2), tolerance = coef_tol) + expect_equal(logLik(model1), logLik(model2), tolerance = loglik_tol) +}) + +model3 <- PlackettLuce(rankings = G, npseudo = 0.5) +if (require(gnm) & require(sandwich)){ + ## add pseudodata + N <- ncol(X) + X2 <- cbind(X, "NULL" = 0) + pseudo <- matrix(0, nrow = 2*N, ncol = N + 1) + pseudo[, N + 1] <- 1:2 + pseudo[cbind(seq_len(nrow(pseudo)), + rep(seq_len(N), each = 2))] <- 2:1 + X2 <- rbind(pseudo, X2) + w <- c(rep.int(0.5, nrow(pseudo)), rep.int(1, nrow(X2))) + dat <- PlackettLuce:::poisson_rankings(X2, weights = w, aggregate = FALSE, + as.data.frame = TRUE) + ## fit log-linear model with added pseudodata + model4 <- gnm(y ~ -1 + X, family = poisson, eliminate = z, data = dat, + constrain = 1, weights = w) + test_that("non-integer pseudo data works [weakly connected network]", + { + # coef + expect_equal(as.vector(coef(model3)), + as.vector(parameters(model4)[-(N + 1)]), + tolerance = coef_tol) + # likelihood contributions + keep <- !dat$z %in% seq_len(2*N) + expect_equal(unname(colSums(estfun(model3))), + unname(colSums(estfun(model4)[keep, - N])), + tolerance = loglik_tol) + # rank + expect_equal(model3$rank, + model4$rank - nlevels(dat$z) - 1) + # df.residual + # rank + expect_equal(model3$df.residual, + sum(keep) - 2*N) + + }) +} + +model5 <- PlackettLuce(rankings = G, npseudo = 0.5, method = "BFGS", + control = list(reltol = 1e-10)) +test_that("pseudo data works with BFGS [weakly connected network]", + { + # coef + expect_equivalent(coef(model3), coef(model5), + tolerance = coef_tol) + # log-likelihood + expect_equal(logLik(model3), logLik(model5), + tolerance = loglik_tol) + }) + +model6 <- PlackettLuce(rankings = G, npseudo = 0.5, method = "BFGS", + control = list(reltol = 1e-10)) +test_that("pseudo data works with L-BFGS [weakly connected network]", + { + # coef + expect_equivalent(coef(model3), coef(model6), + tolerance = coef_tol) + # log-likelihood + expect_equal(logLik(model3), logLik(model6), + tolerance = loglik_tol) + }) + + + +## simple BT model +R <- matrix(c(1, 2, 0, 0, + 2, 0, 1, 0, + 1, 0, 0, 2, + 2, 1, 0, 0, + 0, 1, 2, 0, + 0, 0, 2, 0), byrow = TRUE, ncol = 4, + dimnames = list(NULL, letters[1:4])) + +test_that("disconnected network causes error [one always loses]", + { + # error for discinnected network + expect_error(mod <- PlackettLuce(R, npseudo = 0)) + # no error for connected subset + expect_error(mod <- PlackettLuce(R[, 1:3], npseudo = 0), NA) + }) + +test_that("disconnected network works with pseudodata [one always loses]", + { + expect_error(mod <- PlackettLuce(R), NA) + }) + +# weakly connected clusters +X <- matrix(c(1, 2, 0, 0, + 2, 1, 3, 0, + 0, 0, 1, 2, + 0, 0, 2, 1), ncol = 4, byrow = TRUE) +R <- as.rankings(X) + +test_that("disconnected network causes error [weakly connected clusters]", + { + expect_error(mod <- PlackettLuce(R, npseudo = 0)) + }) + +# disconnected clusters +X <- matrix(c(1, 2, 0, 0, + 2, 1, 0, 0, + 0, 0, 1, 2, + 0, 0, 2, 1), ncol = 4, byrow = TRUE) +R <- as.rankings(X) + +test_that("disconnected network causes error [disconnected clusters]", + { + expect_error(mod <- PlackettLuce(R, npseudo = 0)) + }) + +test_that("disconnected network works with pseudodata [disconnected clusters]", + { + expect_error(mod <- PlackettLuce(R), NA) + }) + +# two weakly connected items: +# item 1 always loses; item 4 only wins against item 1 +X <- matrix(c(4, 1, 2, 3, + 0, 2, 1, 3), nr = 2, byrow = TRUE) +R <- as.rankings(X) + +test_that("disconnected network causes error [weakly connected items]", + { + expect_error(mod <- PlackettLuce(R, npseudo = 0)) + }) + +test_that("disconnected network works with pseudodata [weakly connected items]", + { + expect_error(mod <- PlackettLuce(R), NA) + }) + +# item 1 always wins; item 4 always loses +X <- matrix(c(1, 2, 3, 4, + 1, 3, 2, 4), nr = 2, byrow = TRUE) +R <- as.rankings(X) + +test_that("disconnected network causes error [1 wins; 4 loses]", + { + expect_error(mod <- PlackettLuce(R, npseudo = 0)) + }) + +test_that("disconnected network works with pseudodata [1 wins; 4 loses]", + { + expect_error(mod <- PlackettLuce(R), NA) + }) diff --git a/tests/testthat/test-simulate.R b/tests/testthat/test-simulate.R index e395d45..bf64530 100644 --- a/tests/testthat/test-simulate.R +++ b/tests/testthat/test-simulate.R @@ -1,83 +1,84 @@ -context("implementation [simulate.PlackettLuce]") - -## Get the legacy implementation -source_files <- dir(system.file("PlackettLuce0", package = "PlackettLuce"), - full.names = TRUE) - -## N.B. fitted0 requires tibble but unused in tests -for (file0 in source_files) source(file0) - -R <- matrix(c(1, 2, 0, 0, - 4, 1, 2, 3, - 2, 1, 1, 1, - 1, 2, 3, 0, - 2, 1, 1, 0, - 1, 0, 3, 2), nrow = 6, byrow = TRUE) -colnames(R) <- c("apple", "banana", "orange", "pear") -mod <- PlackettLuce(R) - -s1 <- simulate(mod, 5, seed = 112) -s2 <- simulate(mod, 4, seed = 112) - -test_that("the seed argument is respected in [simulate.PlackettLuce]", { - expect_identical(s1[1:3], s2[1:3]) -}) - - -## A small simulation study -## repeat simulation for legacy implementation too, to make sure comparable on -## test machine (robust to changes in RNG) -if (require("Matrix")){ - R <- PlackettLuce:::generate_rankings(maxi = 5, n_rankings = 100, tie = 0, - seed = 123) - sim <- function(mod){ - samples <- simulate.PlackettLuce(mod, 100, seed = 123) - fits <- lapply(samples, PlackettLuce, npseudo = 0.5) - coefs <- vapply(fits, function(fit) { - cc <- coef(fit) - if (length(cc) < 9) - c(cc, rep(0, 9 - length(cc))) - else - cc - }, numeric(length(coef(mod)))) - unname(unclass(rowMeans(coefs) - coef(mod))) - } - - result_biases <- sim(PlackettLuce(R, npseudo = 0)) - mod0 <- PlackettLuce0(R) - mod0$ties <- seq_len(mod0$maxTied) - result_biases0 <- sim(mod0) - - test_that("simulation results are consistent to first version", { - expect_equivalent(result_biases, result_biases0, tolerance = 1e-06) - }) -} - -## par(mfrow = c(3, 3)) -## for (j in 1:9) { hist(coefs[j,], main = paste(j)); abline(v = coef(mod1)[j]) } - -## ## ## No ties -## R <- PlackettLuce:::generate_rankings(maxi = 10, n_rankings = 100, -## tie = 1000, seed = 123) -## mod1 <- PlackettLuce(R) - -## ## Using Diaconis (1998). Chapter 9D -## samples <- simulate(mod1, 1000, seed = 123, multinomial = FALSE) -## fits <- mclapply(samples, PlackettLuce, npseudo = 0.5, mc.cores = 4) -## coefs <- sapply(fits, function(fit) { -## cc <- coef(fit) -## cc -## }) -## result_biases <- unname(unclass(rowMeans(coefs) - coef(mod1))) - -## ## Using multinomial sampling -## samples2 <- simulate(mod1, 1000, seed = 123, multinomial = TRUE) -## fits2 <- mclapply(samples2, PlackettLuce, npseudo = 0.5, mc.cores = 4) -## coefs2 <- sapply(fits2, function(fit) { -## cc <- coef(fit) -## cc -## }) -## result_biases2 <- unname(unclass(rowMeans(coefs2) - coef(mod1))) - - - +context("implementation [simulate.PlackettLuce]") + +## Get the legacy implementation +source_files <- dir(system.file("PlackettLuce0", package = "PlackettLuce"), + full.names = TRUE) + +## N.B. fitted0 requires tibble but unused in tests +for (file0 in source_files) source(file0) + +R <- matrix(c(1, 2, 0, 0, + 4, 1, 2, 3, + 2, 1, 1, 1, + 1, 2, 3, 0, + 2, 1, 1, 0, + 1, 0, 3, 2), nrow = 6, byrow = TRUE) +colnames(R) <- c("apple", "banana", "orange", "pear") +mod <- PlackettLuce(R) + +s1 <- simulate(mod, 5, seed = 112) +s2 <- simulate(mod, 4, seed = 112) + +test_that("the seed argument is respected in [simulate.PlackettLuce]", { + expect_identical(s1[1:3], s2[1:3]) +}) + + +## A small simulation study +## repeat simulation for legacy implementation too, to make sure comparable on +## test machine (robust to changes in RNG) +if (require("Matrix")){ + R <- PlackettLuce:::generate_rankings(maxi = 5, n_rankings = 100, tie = 0, + seed = 123) + sim <- function(mod){ + samples <- simulate.PlackettLuce(mod, 100, seed = 123) + fits <- lapply(samples, PlackettLuce, npseudo = 0.5) + coefs <- vapply(fits, function(fit) { + cc <- coef(fit) + if (length(cc) < 9) + c(cc, rep(0, 9 - length(cc))) + else + cc + }, numeric(length(coef(mod)))) + unname(unclass(rowMeans(coefs) - coef(mod))) + } + + result_biases <- sim(PlackettLuce(R, npseudo = 0)) + mod0 <- PlackettLuce0(R) + mod0$ties <- seq_len(mod0$maxTied) + result_biases0 <- sim(mod0) + + test_that("simulation results are consistent to first version", { + expect_equivalent(result_biases, result_biases0, tolerance = 1e-06) + }) +} + +## par(mfrow = c(3, 3)) +## for (j in 1:9) { +## hist(coefs[j,], main = paste(j)); abline(v = coef(mod1)[j]) } + +## ## ## No ties +## R <- PlackettLuce:::generate_rankings(maxi = 10, n_rankings = 100, +## tie = 1000, seed = 123) +## mod1 <- PlackettLuce(R) + +## ## Using Diaconis (1998). Chapter 9D +## samples <- simulate(mod1, 1000, seed = 123, multinomial = FALSE) +## fits <- mclapply(samples, PlackettLuce, npseudo = 0.5, mc.cores = 4) +## coefs <- sapply(fits, function(fit) { +## cc <- coef(fit) +## cc +## }) +## result_biases <- unname(unclass(rowMeans(coefs) - coef(mod1))) + +## ## Using multinomial sampling +## samples2 <- simulate(mod1, 1000, seed = 123, multinomial = TRUE) +## fits2 <- mclapply(samples2, PlackettLuce, npseudo = 0.5, mc.cores = 4) +## coefs2 <- sapply(fits2, function(fit) { +## cc <- coef(fit) +## cc +## }) +## result_biases2 <- unname(unclass(rowMeans(coefs2) - coef(mod1))) + + + diff --git a/tests/testthat/test-ties.R b/tests/testthat/test-ties.R index a61b94e..379f7ef 100644 --- a/tests/testthat/test-ties.R +++ b/tests/testthat/test-ties.R @@ -1,142 +1,142 @@ -context("implementation [ties]") - -coef_tol <- 1e-06 -loglik_tol <- 1e-12 - -## Extractor for the log-likelihood of poisson gnm's adjusting for the -mu part -## which is fixed due to the parameter space restriction to match row totals -logLik_poisson.gnm <- function(x) { - n <- nlevels(x$eliminate) - ll <- logLik(x) + n - attr(ll, "df") <- attr(ll, "df") - n - ll -} - -## ties, with some orders of tie not observed -R <- matrix(c(1, 2, 0, 0, - 4, 1, 2, 3, - 2, 1, 1, 1, - 1, 2, 3, 0, - 2, 1, 3, 0, - 1, 0, 3, 2, - 1, 1, 1, 1), nrow = 7, byrow = TRUE) -colnames(R) <- c("apple", "banana", "orange", "pear") -R <- as.rankings(R) - -R2 <- matrix(c(1, 2, 0, 0, - 4, 1, 2, 3, - 2, 1, 1, 0, - 1, 2, 3, 0, - 2, 1, 3, 0, - 1, 0, 3, 2, - 1, 1, 1, 1), nrow = 7, byrow = TRUE) -colnames(R2) <- c("apple", "banana", "orange", "pear") -R2 <- as.rankings(R2) - -# compute components for R2 directly for low-level checks -## (sum over all sets st object i is in selected set)/size -A <- c(sum(1, 1, 1, 1, 1/4), - sum(1, 1/2, 1, 1, 1/4), - sum(1, 1/2, 1/4), - sum(1, 1, 1/4)) -## number of sets with cardinality d = c(1, 2, 4) -B <- c(10, 1, 1) -## item id for wins -item_id <- c(1, 2, 3, 4, 2, 3, 1, 2, 2, 1, 1, 4, 1, 2, 3, 4) -## 1/size of winning sets -S <- c(1, 1, 1, 1, 1/2, 1/2, 1, 1, 1, 1, 1, 1, 1/4, 1/4, 1/4, 1/4) -## ranking -ranker_id <- rep(1:7, c(1, 3, 2, 2, 2, 2, 4)) -## reverse orderings (unranked used to fill rows) -R2orderings <- matrix(c(2, 1, 3, 4, - 1, 4, 3, 2, - 1, 2, 3, 4, - 3, 2, 1, 4, - 3, 1, 2, 4, - 3, 4, 1, 2, - 1, 2, 3, 4), nrow = 7, byrow = TRUE) -## rankings which select from sets of size 1:4 -G <- list(NULL, c(1, 2, 4, 5, 6), c(2, 3, 4, 5, 6), c(2, 7)) -## weights corresponding to G (not aggregated) -W <- list(NULL, c(1, 1, 1, 1, 1), c(1, 1, 1, 1, 1), c(1, 1)) -## tie orders -d <- c(1, 2, 4) -## set sizes -P <- 2:4 -N <- ncol(R2) - - -if (require("gnm")){ - test_that("works with missing tie orders [fake partial rankings with ties]", { - # missing lowest tie order - model1 <- PlackettLuce(rankings = R, npseudo = 0) - ## fit model via gnm - dat <- poisson_rankings(R, aggregate = FALSE, as.data.frame = TRUE) - model2 <- gnm(y ~ X, eliminate = z, family = poisson, - data = dat, constrain = 1) - ## coefficients - expect_equal(as.vector(coef(model1)[-1]), - as.vector(coef(model2)[-1]), tolerance = coef_tol) - ## loglik - expect_equal(logLik(model1), logLik_poisson.gnm(model2), - check.attributes = FALSE, tolerance = 1e-12) - expect_equal(attr(logLik(model1), "df"), - attr(logLik_poisson.gnm(model2), "df")) - - # missing intermediate tie order - model1 <- PlackettLuce(rankings = R2, npseudo = 0) - ## fit model via gnm - dat <- poisson_rankings(R2, aggregate = FALSE, - as.data.frame = TRUE) - model2 <- gnm(y ~ X, eliminate = z, family = poisson, - data = dat, constrain = 1) - ## coefficients - expect_equal(as.vector(coef(model1)[-1]), - as.vector(coef(model2)[-1]), tolerance = coef_tol) - ## loglik - expect_equal(logLik(model1), logLik_poisson.gnm(model2), - check.attributes = FALSE, tolerance = 1e-12) - expect_equal(attr(logLik(model1), "df"), - attr(logLik_poisson.gnm(model2), "df")) - ## fitted values - expect_equal(fitted(model1)$fitted, fitted(model2)[dat$y == 1], - check.attributes = FALSE, tolerance = coef_tol) - ## vcov - expect_equal(vcov(model1), unclass(vcov(model2)), - check.attributes = FALSE, tolerance = coef_tol) - ## estfun - par <- model1$coefficients - fit <- expectation(c("alpha", "delta"), par[1:N], c(1.0, par[-(1:N)]), - NULL, N, d, P, R2orderings, G, W) - # score wrt log-parameters - score <- score_common(par = par, N = N, - mu = NULL, Kinv = NULL, A = A, B = B, fit = fit)*par - # score = c(A - fit$expA, B[-1] - fit$expB*par[-(1:m)]) - expect_equal(colSums(estfun(model1, ref = NULL)), score) - }) -} - -test_that("works with adherence missing tie orders", { - # missing intermediate tie order - gamma <- list(shape = 100, rate = 100) - model1 <- PlackettLuce(rankings = R2, npseudo = 0, - gamma = gamma) - alpha <- exp(coef(model1)[1:4]) - delta <- exp(coef(model1)[5:6]) - a <- model1$adherence - wa <- model1$weights - ## compute log-posterior using expectation (tested above) - A <- unname(rowsum(a[ranker_id]*S, item_id)[,1L]) - fit <- expectation("all", alpha, c(1.0, delta), - a, N, d, P, R2orderings, G, W) - res <- -loglik_common(c(alpha, delta), N, NULL, NULL, A, B, fit) - logp <- -res + sum(wa*((gamma$shape - 1L)*log(a) - gamma$rate*a)) - ## compute log-likelihood using normalization - # (only used when estimating adherence) - Z <- unname(rowsum(S*log(alpha[item_id]), ranker_id)[,1L]) - fit2 <- normalization(alpha, c(1.0, delta), - a, d, P, R2orderings, G, W) - res2 <- -loglik_adherence(a, gamma$shape, gamma$rate, wa, Z, fit2) - logp2 <- -res2 + sum(B[-1]*log(delta)) - expect_equal(logp, logp2) -}) +context("implementation [ties]") + +coef_tol <- 1e-06 +loglik_tol <- 1e-12 + +## Extractor for the log-likelihood of poisson gnm's adjusting for the -mu part +## which is fixed due to the parameter space restriction to match row totals +logLik_poisson.gnm <- function(x) { + n <- nlevels(x$eliminate) + ll <- logLik(x) + n + attr(ll, "df") <- attr(ll, "df") - n + ll +} + +## ties, with some orders of tie not observed +R <- matrix(c(1, 2, 0, 0, + 4, 1, 2, 3, + 2, 1, 1, 1, + 1, 2, 3, 0, + 2, 1, 3, 0, + 1, 0, 3, 2, + 1, 1, 1, 1), nrow = 7, byrow = TRUE) +colnames(R) <- c("apple", "banana", "orange", "pear") +R <- as.rankings(R) + +R2 <- matrix(c(1, 2, 0, 0, + 4, 1, 2, 3, + 2, 1, 1, 0, + 1, 2, 3, 0, + 2, 1, 3, 0, + 1, 0, 3, 2, + 1, 1, 1, 1), nrow = 7, byrow = TRUE) +colnames(R2) <- c("apple", "banana", "orange", "pear") +R2 <- as.rankings(R2) + +# compute components for R2 directly for low-level checks +## (sum over all sets st object i is in selected set)/size +A <- c(sum(1, 1, 1, 1, 1/4), + sum(1, 1/2, 1, 1, 1/4), + sum(1, 1/2, 1/4), + sum(1, 1, 1/4)) +## number of sets with cardinality d = c(1, 2, 4) +B <- c(10, 1, 1) +## item id for wins +item_id <- c(1, 2, 3, 4, 2, 3, 1, 2, 2, 1, 1, 4, 1, 2, 3, 4) +## 1/size of winning sets +S <- c(1, 1, 1, 1, 1/2, 1/2, 1, 1, 1, 1, 1, 1, 1/4, 1/4, 1/4, 1/4) +## ranking +ranker_id <- rep(1:7, c(1, 3, 2, 2, 2, 2, 4)) +## reverse orderings (unranked used to fill rows) +R2orderings <- matrix(c(2, 1, 3, 4, + 1, 4, 3, 2, + 1, 2, 3, 4, + 3, 2, 1, 4, + 3, 1, 2, 4, + 3, 4, 1, 2, + 1, 2, 3, 4), nrow = 7, byrow = TRUE) +## rankings which select from sets of size 1:4 +G <- list(NULL, c(1, 2, 4, 5, 6), c(2, 3, 4, 5, 6), c(2, 7)) +## weights corresponding to G (not aggregated) +W <- list(NULL, c(1, 1, 1, 1, 1), c(1, 1, 1, 1, 1), c(1, 1)) +## tie orders +d <- c(1, 2, 4) +## set sizes +P <- 2:4 +N <- ncol(R2) + + +if (require("gnm")){ + test_that("works with missing tie orders", { + # missing lowest tie order + model1 <- PlackettLuce(rankings = R, npseudo = 0) + ## fit model via gnm + dat <- poisson_rankings(R, aggregate = FALSE, as.data.frame = TRUE) + model2 <- gnm(y ~ X, eliminate = z, family = poisson, + data = dat, constrain = 1) + ## coefficients + expect_equal(as.vector(coef(model1)[-1]), + as.vector(coef(model2)[-1]), tolerance = coef_tol) + ## loglik + expect_equal(logLik(model1), logLik_poisson.gnm(model2), + check.attributes = FALSE, tolerance = 1e-12) + expect_equal(attr(logLik(model1), "df"), + attr(logLik_poisson.gnm(model2), "df")) + + # missing intermediate tie order + model1 <- PlackettLuce(rankings = R2, npseudo = 0) + ## fit model via gnm + dat <- poisson_rankings(R2, aggregate = FALSE, + as.data.frame = TRUE) + model2 <- gnm(y ~ X, eliminate = z, family = poisson, + data = dat, constrain = 1) + ## coefficients + expect_equal(as.vector(coef(model1)[-1]), + as.vector(coef(model2)[-1]), tolerance = coef_tol) + ## loglik + expect_equal(logLik(model1), logLik_poisson.gnm(model2), + check.attributes = FALSE, tolerance = 1e-12) + expect_equal(attr(logLik(model1), "df"), + attr(logLik_poisson.gnm(model2), "df")) + ## fitted values + expect_equal(fitted(model1)$fitted, fitted(model2)[dat$y == 1], + check.attributes = FALSE, tolerance = coef_tol) + ## vcov + expect_equal(vcov(model1), unclass(vcov(model2)), + check.attributes = FALSE, tolerance = coef_tol) + ## estfun + par <- model1$coefficients + fit <- expectation(c("alpha", "delta"), par[1:N], c(1.0, par[-(1:N)]), + NULL, N, d, P, R2orderings, G, W) + # score wrt log-parameters + score <- score_common(par = par, N = N, mu = NULL, Kinv = NULL, + A = A, B = B, fit = fit)*par + # score = c(A - fit$expA, B[-1] - fit$expB*par[-(1:m)]) + expect_equal(colSums(estfun(model1, ref = NULL)), score) + }) +} + +test_that("works with adherence missing tie orders", { + # missing intermediate tie order + gamma <- list(shape = 100, rate = 100) + model1 <- PlackettLuce(rankings = R2, npseudo = 0, + gamma = gamma) + alpha <- exp(coef(model1)[1:4]) + delta <- exp(coef(model1)[5:6]) + a <- model1$adherence + wa <- model1$weights + ## compute log-posterior using expectation (tested above) + A <- unname(rowsum(a[ranker_id]*S, item_id)[,1L]) + fit <- expectation("all", alpha, c(1.0, delta), + a, N, d, P, R2orderings, G, W) + res <- -loglik_common(c(alpha, delta), N, NULL, NULL, A, B, fit) + logp <- -res + sum(wa*((gamma$shape - 1L)*log(a) - gamma$rate*a)) + ## compute log-likelihood using normalization + # (only used when estimating adherence) + Z <- unname(rowsum(S*log(alpha[item_id]), ranker_id)[,1L]) + fit2 <- normalization(alpha, c(1.0, delta), + a, d, P, R2orderings, G, W) + res2 <- -loglik_adherence(a, gamma$shape, gamma$rate, wa, Z, fit2) + logp2 <- -res2 + sum(B[-1]*log(delta)) + expect_equal(logp, logp2) +})