Working on pscore and auc

USCbiostats · Oct 24, 2023 · d11fc14 · d11fc14
1 parent 8f0205d
commit d11fc14
Show file tree

Hide file tree

Showing 7 changed files with 163 additions and 35 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -7,7 +7,7 @@ Authors@R: c(person("George", "Vega Yon", role=c("aut","cre"),
          5P01CA196569-02"),
          person("USC Biostatistics", role = "cph")
          )
-Version: 0.3-3
+Version: 0.3-4
 Description: Implements a parsimonious evolutionary model to analyze and
   predict gene-functional annotations in phylogenetic trees as described in Vega
   Yon et al. (2021) <doi:10.1371/journal.pcbi.1007948>. Focusing on

diff --git a/NAMESPACE b/NAMESPACE
@@ -69,6 +69,8 @@ S3method(predict_pre_order,aphylo)
 S3method(predict_pre_order,aphylo_estimates)
 S3method(prediction_score,aphylo_estimates)
 S3method(prediction_score,default)
+S3method(prediction_score,list)
+S3method(prediction_score,matrix)
 S3method(print,aphylo)
 S3method(print,aphylo_auc)
 S3method(print,aphylo_estimates)

diff --git a/R/prediction_score.R b/R/prediction_score.R
@@ -1,13 +1,18 @@
 #' Calculate prediction score (quality of prediction)
 #' 
-#' @param x An object of class [aphylo_estimates] or a numeric matrix.
-#' @param expected Integer vector of length \eqn{n}. Expected values (either 0 or 1).
+#' @param x An object of class [aphylo_estimates] or other numeric vector-like
+#' object (see details).
+#' @param expected Numeric vector-like object length \eqn{n} (see details).
+#' Expected values (either 0 or 1).
 #' @param alpha0,alpha1 Probability of observing a zero an a one, respectively.
 #' @param W A square matrix. Must have as many rows as genes in `expected`.
 #' @param ... Further arguments passed to [predict.aphylo_estimates]
 #' @export
 #' @details In the case of `prediction_score`, `...` are passed to
 #' `predict.aphylo_estimates`.
+#' The function will accept `x` as a numeric vector, list of vectors, or matrix.
+#' Otherwise, it will try to coerce it to a matrix. If it fails, it will throw
+#' an error.
 #' @returns 
 #' A list of class `aphylo_prediction_score`:
 #' -  obs       : Observed 1 - MAE.
@@ -53,7 +58,66 @@ prediction_score <- function(
   ) UseMethod("prediction_score")
 
 #' @export
-#' @rdname prediction_score
+prediction_score.matrix <- function(
+  x,
+  expected,
+  alpha0 = NULL,
+  alpha1 = NULL,
+  W = NULL,
+  ...
+) {
+
+  # Both x and expected should be numeric
+  if (!is.numeric(x) || !is.numeric(expected))
+    stop("`x` and `expected` must be numeric.", call.=FALSE)
+
+  # Checking that it has a single column
+  if (ncol(x) != 1L || ncol(expected) != 1L)
+    stop("`x` and `y` must have a single column.", call.=FALSE)
+
+  # Checking dimensions
+  if (any(dim(x) != dim(expected)))
+    stop("`x` and `expected` differ in length. These must match.", call.=FALSE)
+
+  # Vectorizing
+  x        <- as.vector(x)
+  expected <- as.vector(expected)
+
+  # Passing to default method
+  prediction_score_backend(
+    x        = matrix(x, ncol = 1),
+    expected = matrix(expected, ncol = 1),
+    alpha0   = alpha0,
+    alpha1   = alpha1,
+    W        = W,
+    ...
+  )  
+
+}
+
+#' @export
+prediction_score.list <- function(
+  x,
+  expected,
+  alpha0 = NULL,
+  alpha1 = NULL,
+  W = NULL,
+  ...
+) {
+
+  # Passing to default method
+  prediction_score.matrix(
+    x        = do.call(rbind, x),
+    expected = do.call(rbind, expected),
+    alpha0   = alpha0,
+    alpha1   = alpha1,
+    W        = W,
+    ...
+  )  
+
+}
+
+#' @export
 prediction_score.default <- function(
   x,
   expected,
@@ -62,7 +126,33 @@ prediction_score.default <- function(
   W      = NULL,
   ...
   ) {
-
+
+  x        <- tryCatch(as.matrix(x), error = function(e) e)
+  expected <- tryCatch(as.matrix(expected), error = function(e) e)
+
+  if (inherits(x, "error") || inherits(expected, "error"))
+    stop("If not list or matrix, `x` and `expected` must be numeric.", call.=FALSE)
+
+  prediction_score.matrix(
+    x        = x,
+    expected = expected,
+    alpha0   = alpha0,
+    alpha1   = alpha1,
+    W        = W,
+    ...
+  )
+
+}
+
+prediction_score_backend <- function(
+  x,
+  expected,
+  alpha0 = NULL,
+  alpha1 = NULL,
+  W      = NULL,
+  ...
+  ) {
+
   # Checking dimensions
   if (length(x) != length(expected))
     stop("`x` and `expected` differ in length. These must match.", call.=FALSE)
@@ -81,7 +171,7 @@ prediction_score.default <- function(
   # score.
   if (!length(alpha0))
     alpha0 <- 1 - mean(expected)
-  if (is.null(alpha1))
+  if (!length(alpha1))
     alpha1 <- 1 - alpha0
 
   if (is.null(W))
@@ -227,20 +317,20 @@ prediction_score.aphylo_estimates <- function(
   # Adjusting alphas according to loo logic. To make the benchmark fair, we need
   # to exclude one annotation from each type for the loo
   n <- length(ids)
-  if (is.null(alpha0) && loo) {
+  if (!length(alpha0) && loo) {
 
     alpha0 <- max(sum(expected[ids,] == 0) - ncol(expected), 0)
     alpha0 <- alpha0/((nrow(expected[ids,]) - 1) * ncol(expected))
 
   }
-  if (is.null(alpha1) && loo) {
+  if (!length(alpha1) && loo) {
 
     alpha1 <- max(sum(expected[ids,]) - ncol(expected), 0)
     alpha1 <- alpha1/((nrow(expected[ids,]) - 1) * ncol(expected))
 
   }
 
-  ans <- prediction_score(
+  ans <- prediction_score_backend(
     x        = pred[ids,,drop=FALSE],
     expected = expected[ids,,drop = FALSE],
     alpha0   = alpha0,

diff --git a/inst/tinytest/test-auc.r b/inst/tinytest/test-auc.r
@@ -1,18 +1,52 @@
-# context("Area Under the Curve")
-
-# test_that("Coincides with what the AUC::auc(roc()) function returns.", {
-
-  set.seed(1231)
-  x <- rnorm(100)
-  y <- as.integer((.2*x + rnorm(100)) > 0)
-  p <- stats::predict(stats::glm(y~0+x, family=binomial("probit")), type="response")
-
-
-  ans0 <- auc(p, y, 100)
-  ans1 <- AUC::auc(AUC::roc(p, as.factor(y)))
-
-  expect_equal(ans0$auc, ans1, tol=0.01)
-
-# })
 
+set.seed(1231)
+x <- rnorm(100)
+y <- as.integer((.2*x + rnorm(100)) > 0)
+p <- stats::predict(stats::glm(y~0+x, family=binomial("probit")), type="response")
+
+
+ans0 <- auc(p, y, 100)
+ans1 <- AUC::auc(AUC::roc(p, as.factor(y)))
+
+# Default way
+expect_equal(ans0$auc, ans1, tol=0.01)
+
+# Now using lists
+pscore0 <- prediction_score(
+  as.list(x),
+  as.list(y)
+)
+
+# Now using vectors
+pscore1 <- prediction_score(
+  x,
+  y
+)
+
+pscore2 <- # Now using matrix
+prediction_score(
+  cbind(x),
+  cbind(y)
+)
+
+pscore3 <- # Now using a data frame
+prediction_score(
+  data.frame(x),
+  data.frame(y)
+)
+
+expect_equal(
+  pscore0$auc,
+  pscore1$auc
+)
+
+expect_equal(
+  pscore0$auc,
+  pscore2$auc
+)
+
+expect_equal(
+  pscore0$auc,
+  pscore3$auc
+)
 
diff --git a/man/prediction_score.Rd b/man/prediction_score.Rd
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
@@ -158,13 +158,13 @@ BEGIN_RCPP
 END_RCPP
 }
 // auc
-List auc(NumericVector pred, IntegerVector labels, int nc, bool nine_na);
+List auc(const NumericVector& pred, const IntegerVector& labels, int nc, bool nine_na);
 RcppExport SEXP _aphylo_auc(SEXP predSEXP, SEXP labelsSEXP, SEXP ncSEXP, SEXP nine_naSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
-    Rcpp::traits::input_parameter< NumericVector >::type pred(predSEXP);
-    Rcpp::traits::input_parameter< IntegerVector >::type labels(labelsSEXP);
+    Rcpp::traits::input_parameter< const NumericVector& >::type pred(predSEXP);
+    Rcpp::traits::input_parameter< const IntegerVector& >::type labels(labelsSEXP);
     Rcpp::traits::input_parameter< int >::type nc(ncSEXP);
     Rcpp::traits::input_parameter< bool >::type nine_na(nine_naSEXP);
     rcpp_result_gen = Rcpp::wrap(auc(pred, labels, nc, nine_na));

diff --git a/src/auc.cpp b/src/auc.cpp
@@ -28,8 +28,8 @@ using namespace Rcpp;
 //' plot(ans_auc)
 // [[Rcpp::export]]
 List auc(
-    NumericVector pred,
-    IntegerVector labels,
+    const NumericVector & pred,
+    const IntegerVector & labels,
     int nc = 200,
     bool nine_na = true
 ) {