Add new calc_eLOD() function

- added function that calculates the estimated limit of detection (eLOD) for SeqId columns of an input `soma_adat` or `data.frame` - included examples in function documentation of filtering an adat to buffer samples as well as filtering based on vector of SampleIds
SomaLogic · Sep 23, 2024 · 5a0c89c · 5a0c89c
1 parent 9586713
commit 5a0c89c
Show file tree

Hide file tree

Showing 7 changed files with 225 additions and 1 deletion.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -54,4 +54,4 @@ LazyLoad: true
 Config/testthat/edition: 3
 Config/Needs/website: tidyverse/tidytemplate
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.2
diff --git a/NAMESPACE b/NAMESPACE
@@ -68,6 +68,7 @@ export(anti_join)
 export(antilog)
 export(apt2seqid)
 export(arrange)
+export(calc_eLOD)
 export(checkSomaScanVersion)
 export(cleanNames)
 export(col2rn)
@@ -129,6 +130,7 @@ export(slice_sample)
 export(ungroup)
 export(unite)
 export(write_adat)
+importFrom(dplyr,across)
 importFrom(dplyr,all_of)
 importFrom(dplyr,anti_join)
 importFrom(dplyr,any_of)
@@ -148,6 +150,8 @@ importFrom(dplyr,select)
 importFrom(dplyr,semi_join)
 importFrom(dplyr,slice)
 importFrom(dplyr,slice_sample)
+importFrom(dplyr,starts_with)
+importFrom(dplyr,summarise)
 importFrom(dplyr,ungroup)
 importFrom(lifecycle,deprecate_soft)
 importFrom(lifecycle,deprecate_stop)

diff --git a/R/0-declare-global-variables.R b/R/0-declare-global-variables.R
@@ -12,6 +12,7 @@ utils::globalVariables(
     "array_id",
     "blank_col",
     "Dilution",
+    "eLOD",
     "feature",
     "prefix",
     "rn",

diff --git a/R/calc_eLOD.R b/R/calc_eLOD.R
@@ -0,0 +1,85 @@
+#' Calculate Estimated Limit of Detection (eLOD)
+#'
+#' Calculate the estimated limit of detection (eLOD) for SOMAmer reagent
+#' analytes in the provided input data. The input data should be filtered to
+#' include only buffer samples desired for eLOD calculation. eLOD is calculated
+#' using the following steps:
+#'
+#' 1. For each SOMAmer, the median and median absolute deviation (MAD) are
+#'    calculated.
+#' 2. For each SOMAmer, calculate \eqn{eLOD = median + 3.3 * MAD}
+#'
+#' Note: The eLOD is useful for non-core matrices, including cell lysate
+#' and CSF, but should be used carefully for evaluating background signal in
+#' plasma and serum.
+#'
+#' @param data A `soma_adat`, `data.frame`, or `tibble` object including
+#' SeqId columns (`seq.xxxxx.xx`) containing RFU values.
+#' @return A `tibble` object with 2 columns: SeqId and eLOD.
+#' @author Caleb Scheidel
+#' @examples
+#' # filter data frame using vector of SampleId controls
+#' df <- withr::with_seed(101, {
+#'   data.frame(
+#'     SampleType = rep(c("Sample", "Buffer"), each = 10),
+#'     SampleId = paste0("Sample_", 1:20),
+#'     seq.20.1.100 = runif(20, 1, 100),
+#'     seq.21.1.100 = runif(20, 1, 100),
+#'     seq.22.2.100 = runif(20, 1, 100)
+#'   )
+#' })
+#' sample_ids <- paste0("Sample_", 11:20)
+#' selected_samples <- df |> filter(SampleId %in% sample_ids)
+#'
+#' selected_elod <- calc_eLOD(selected_samples)
+#' head(selected_elod)
+#' \dontrun{
+#' # filter `soma_adat` object to buffer samples
+#' buffer_samples <- example_data |> filter(SampleType == "Buffer")
+#'
+#' # calculate eLOD
+#' buffer_elod <- calc_eLOD(buffer_samples)
+#' head(buffer_elod)
+#'
+#' # use eLOD to calculate signal to noise ratio of samples
+#' samples_median <- example_data |> dplyr::filter(SampleType == "Sample") |>
+#'   dplyr::summarise(across(starts_with("seq"), median, .names = "median_{col}")) |>
+#'   tidyr::gather(key = "SeqId", value = "median_signal", starts_with("median_")) |>
+#'   dplyr::mutate(SeqId = gsub("median_seq", "seq", SeqId))
+#'
+#' # analytes with signal to noise > 2
+#' ratios <- samples_median |>
+#'   mutate(signal_to_noise = median_signal / buffer_elod$eLOD) |>
+#'   dplyr::filter(signal_to_noise > 2) |>
+#'   dplyr::arrange(desc(signal_to_noise))
+#'
+#' head(ratios)
+#' }
+#' @importFrom dplyr across mutate select summarise starts_with
+#' @importFrom stats mad median
+#' @importFrom tibble as_tibble
+#' @importFrom tidyr pivot_longer
+#' @export
+calc_eLOD <- function(data) {
+
+  # if `SampleType` in adat, check for buffer samples only
+  if ("SampleType" %in% names(data) ) {
+    if ( any(c("Sample", "Calibrator", "QC") %in% unique(data$SampleType)) ) {
+      stop("Input data must include Buffer SampleType only!", call. = FALSE)
+    }
+  }
+
+  # formula to calculate eLOD
+  elod <- function(x) {
+    median(x) + 3.3 * mad(x, constant = 1.4826)
+  }
+
+  # Calculate eLOD for each SeqId
+  result <- data |>
+    summarise(across(starts_with("seq"), elod, .names = "eLOD_{col}")) |>
+    pivot_longer(starts_with("eLOD"), names_to = "SeqId", values_to = "eLOD") |>
+    mutate(SeqId = gsub("eLOD_seq", "seq", SeqId)) |>
+    select(SeqId, eLOD)
+
+  return(tibble::as_tibble(result))
+}
diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -167,6 +167,11 @@ reference:
     - cleanNames
     - getAdatVersion
 
+  - title: Data Summaries
+    desc: Functions to assist with summarizing SOMAmer RFU values.
+    contents:
+    - calc_eLOD
+
   - title: Data Objects
     desc: Objects provided with `SomaDataIO`.
     contents:

diff --git a/man/calc_eLOD.Rd b/man/calc_eLOD.Rd
diff --git a/tests/testthat/test-calc_eLOD.R b/tests/testthat/test-calc_eLOD.R
@@ -0,0 +1,55 @@
+# Setup ----
+# soma_adat input filtered to "Buffer" samples
+buffer_samples <- example_data |> filter(SampleType == "Buffer")
+
+drop_seqs <- length(getAnalytes(example_data)) - 10
+drop_seqs <- getAnalytes(example_data)[1:drop_seqs]
+
+buffer_samples <- buffer_samples |> select(-all_of(drop_seqs))
+
+# data.frame input
+df <- withr::with_seed(101, {
+  data.frame(
+    SampleType = rep(c("Sample", "Buffer"), each = 10),
+    SampleId = paste0("Sample_", 1:20),
+    seq.20.1.100 = runif(20, 1, 100),
+    seq.21.1.100 = runif(20, 1, 100),
+    seq.22.2.100 = runif(20, 1, 100)
+  )
+})
+sample_ids <- paste0("Sample_", 11:20)
+selected_samples <- df |> filter(SampleId %in% sample_ids)
+
+# Testing ----
+test_that("`calc_eLOD` produces an error when it should", {
+  expect_error(
+    calc_eLOD(example_data),
+    "Input data must include Buffer SampleType only!"
+  )
+})
+
+test_that("`calc_eLOD` works on a soma_adat input filtered to buffer samples", {
+  out <- calc_eLOD(buffer_samples)
+
+  expect_s3_class(out, "tbl_df")
+  expect_equal(dim(out), c(10L, 2L))
+  expect_equal(
+    head(out, 3),
+    tibble(SeqId = c("seq.9981.18", "seq.9983.97", "seq.9984.12"),
+               eLOD  = c(45.08555, 52.98848, 123.02824)),
+    tolerance = 0.00001
+  )
+})
+
+test_that("`calc_eLOD` works on a data.frame input", {
+  out <- calc_eLOD(selected_samples)
+
+  expect_s3_class(out, "tbl_df")
+  expect_equal(dim(out), c(3L, 2L))
+  expect_equal(
+    head(out, 3),
+    tibble(SeqId = c("seq.20.1.100", "seq.21.1.100", "seq.22.2.100"),
+           eLOD  = c(168.0601, 130.7047, 115.9958)),
+    tolerance = 0.0001
+  )
+})