Update to allow new limma hypparam estimation

jpquast · Nov 2, 2024 · 0ab7714 · 0ab7714
1 parent 77b5010
commit 0ab7714
Show file tree

Hide file tree

Showing 4 changed files with 25 additions and 10 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -16,6 +16,7 @@
 
 ## Additional Changes
 
+* *IMPORTANT!* There has been a change to the hyperparameter estimation of the limma package (3.61.8) in the `eBayes()` function. This leads to a change of results when `method = "moderated_t-test"` is used in the `calculate_diff_abundance()` function. Therefore, we introduced the new argument `limma_legacy_estimation` that allows you to go back to the old method. The default behaviour is the new and improved estimation of parameters.
 * `assign_peptide_type` now takes the `start` argument, containing the start position of a peptide. If a protein does not have any peptide starting at position `1` and there is a peptide starting at position `2`, this peptide will be considered "tryptic" at the N-terminus. This is because the initial Methionine is likely missing due to processing for every copy of the protein and therefore position `2` is the true N-terminus.
 * `extract_metal_binders()` now uses keywords from UniProt as well. In addition, only "enables" GO terms are considered now.
 * `fetch_uniprot()` received another default column "keyword".

diff --git a/R/calculate_diff_abundance.R b/R/calculate_diff_abundance.R
@@ -107,6 +107,11 @@ diff_abundance <-
 #' @param p_adj_method a character value, specifies the p-value correction method. Possible
 #' methods are c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none"). Default
 #' method is `"BH"`.
+#' @param limma_legacy_estimation a logical value, specifying if the new or old hyperparameter
+#' estimation of the limma package should be used. See the `legacy` argument of `?limma:eBayes`
+#' for more information. Our default is the same as for the `limma` package, which introduces
+#' a changed and more robust estimation of hyperparameters. This may cause changes to the
+#' results when compared to before August 2024 (limma 3.61.8).
 #' @param retain_columns a vector indicating if certain columns should be retained from the input
 #' data frame. Default is not retaining additional columns `retain_columns = NULL`. Specific
 #' columns can be retained by providing their names (not in quotations marks, just like other
@@ -214,6 +219,7 @@ calculate_diff_abundance <-
            filter_NA_missingness = TRUE,
            method = c("moderated_t-test", "t-test", "t-test_mean_sd", "proDA"),
            p_adj_method = "BH",
+           limma_legacy_estimation = NULL,
            retain_columns = NULL) {
     . <- NULL
     if (!(ref_condition %in% unique(pull(data, {{ condition }}))) & ref_condition != "all") {
@@ -553,7 +559,11 @@ missingness type is assigned.\n The created comparisons are: \n", prefix = "\n",
       message("DONE", appendLF = TRUE)
       message("[6/7] Compute empirical Bayes statistics ... ", appendLF = FALSE)
 
-      moderated_t_test_fit3 <- limma::eBayes(moderated_t_test_fit2)
+      if (packageVersion("limma") < "3.61.8"){
+        moderated_t_test_fit3 <- limma::eBayes(moderated_t_test_fit2)
+      } else {
+        moderated_t_test_fit3 <- limma::eBayes(moderated_t_test_fit2, legacy = limma_legacy_estimation)
+      }
 
       message("DONE", appendLF = TRUE)
       message("[7/7] Create result table ... ", appendLF = FALSE)
@@ -605,6 +615,10 @@ missingness type is assigned.\n The created comparisons are: \n", prefix = "\n",
         dplyr::arrange(.data$adj_pval, .data$pval)
 
       message("DONE", appendLF = TRUE)
+      # Check the limma version
+      if (packageVersion("limma") < "3.61.8" & !missing(limma_legacy_estimation)) {
+        warning("You provided the 'limma_legacy_estimation' argument, but your limma version does not support it. Update to a version higher than 3.61.8.")
+      }
 
       if (!missing(retain_columns)) {
         moderated_t_test_adj_pval <- data %>%

diff --git a/man/calculate_diff_abundance.Rd b/man/calculate_diff_abundance.Rd
diff --git a/tests/testthat/test-workflow.R b/tests/testthat/test-workflow.R
@@ -326,15 +326,8 @@ test_that("calculate_diff_abundance works", {
     expect_equal(ncol(diff_moderated), 13)
     expect_equal(ncol(diff_proDA), 12)
     expect_equal(round(min(diff_mean_sd$adj_pval, na.rm = TRUE), digits = 9), 0.00758761)
-    expect_equal(round(min(diff_moderated$adj_pval, na.rm = TRUE), digits = 9), 5.7616e-05)
+    expect_equal(round(min(diff_moderated$adj_pval, na.rm = TRUE), digits = 9), 3.87e-05)
     expect_equal(round(min(diff_proDA$adj_pval, na.rm = TRUE), digits = 5), 0.00125)
-
-    # For debugging
-    expect_equal(nrow(diff_moderated), 601)
-    expect_equal(round(min(diff_moderated$pval, na.rm = TRUE), digits = 9), 1.53e-07)
-    expect_equal(diff_moderated[1,]$peptide, "peptide_2_1")
-    expect_equal(diff_moderated %>% filter(pval < 0.05) %>% nrow(), 42)
-    expect_equal(diff_moderated %>% filter(adj_pval < 0.05) %>% nrow(), 4)
   }
 })
 
@@ -496,7 +489,7 @@ if (Sys.getenv("TEST_PROTTI") == "true") {
     expect_equal(ncol(diff_moderated_deprecated), 13)
     expect_equal(ncol(diff_proDA_deprecated), 12)
     expect_equal(round(min(diff_mean_sd_deprecated$adj_pval, na.rm = TRUE), digits = 9), 0.00758761)
-    expect_equal(round(min(diff_moderated_deprecated$adj_pval, na.rm = TRUE), digits = 9), 5.7616e-05)
+    expect_equal(round(min(diff_moderated_deprecated$adj_pval, na.rm = TRUE), digits = 9), 3.87e-05)
     expect_equal(round(min(diff_proDA_deprecated$adj_pval, na.rm = TRUE), digits = 5), 0.00125)
   })
 }