From adb64fdbf752ae951bab30bb38dc3706fdd429e1 Mon Sep 17 00:00:00 2001 From: scheidec Date: Mon, 23 Sep 2024 13:34:53 -0400 Subject: [PATCH] Add new calc_eLOD() function - added function that calculates the estimated limit of detection (eLOD) for SeqId columns of an input `soma_adat` or `data.frame` - included examples in function documentation of filtering an adat to buffer samples as well as filtering based on vector of SampleIds - updated spelling WORDLIST --- NAMESPACE | 5 ++ R/0-declare-global-variables.R | 1 + R/calc_eLOD.R | 93 +++++++++++++++++++++++++++++++++ _pkgdown.yml | 5 ++ inst/WORDLIST | 36 +++++++++---- man/calc_eLOD.Rd | 78 +++++++++++++++++++++++++++ tests/testthat/test-calc_eLOD.R | 62 ++++++++++++++++++++++ 7 files changed, 270 insertions(+), 10 deletions(-) create mode 100644 R/calc_eLOD.R create mode 100644 man/calc_eLOD.Rd create mode 100644 tests/testthat/test-calc_eLOD.R diff --git a/NAMESPACE b/NAMESPACE index 882a549..2afd421 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -68,6 +68,7 @@ export(anti_join) export(antilog) export(apt2seqid) export(arrange) +export(calc_eLOD) export(checkSomaScanVersion) export(cleanNames) export(col2rn) @@ -129,6 +130,7 @@ export(slice_sample) export(ungroup) export(unite) export(write_adat) +importFrom(dplyr,across) importFrom(dplyr,all_of) importFrom(dplyr,anti_join) importFrom(dplyr,any_of) @@ -148,6 +150,8 @@ importFrom(dplyr,select) importFrom(dplyr,semi_join) importFrom(dplyr,slice) importFrom(dplyr,slice_sample) +importFrom(dplyr,starts_with) +importFrom(dplyr,summarise) importFrom(dplyr,ungroup) importFrom(lifecycle,deprecate_soft) importFrom(lifecycle,deprecate_stop) @@ -167,6 +171,7 @@ importFrom(stats,setNames) importFrom(tibble,as_tibble) importFrom(tibble,deframe) importFrom(tibble,enframe) +importFrom(tibble,is_tibble) importFrom(tibble,tibble) importFrom(tidyr,pivot_longer) importFrom(tidyr,separate) diff --git a/R/0-declare-global-variables.R b/R/0-declare-global-variables.R index c268c17..cfbd44e 100644 --- a/R/0-declare-global-variables.R +++ b/R/0-declare-global-variables.R @@ -12,6 +12,7 @@ utils::globalVariables( "array_id", "blank_col", "Dilution", + "eLOD", "feature", "prefix", "rn", diff --git a/R/calc_eLOD.R b/R/calc_eLOD.R new file mode 100644 index 0000000..1030b2b --- /dev/null +++ b/R/calc_eLOD.R @@ -0,0 +1,93 @@ +#' Calculate Estimated Limit of Detection (eLOD) +#' +#' Calculate the estimated limit of detection (eLOD) for SOMAmer reagent +#' analytes in the provided input data. The input data should be filtered to +#' include only buffer samples desired for eLOD calculation. +#' +#' eLOD is calculated using the following steps: +#' +#' 1. For each SOMAmer, the median and adjusted median absolute +#' deviation (\eqn{MAD_{Adjusted}}) are calculated, where +#' \deqn{MAD_{Adjusted} = 1.4826 * MAD} +#' The 1.4826 is a set constant used to adjust the MAD to be reflective of +#' the standard deviation of the normal distribution. +#' 2. For each SOMAmer, calculate \deqn{eLOD = median + 3.3 * MAD_{Adjusted}} +#' +#' Note: The eLOD is useful for non-core matrices, including cell lysate +#' and CSF, but should be used carefully for evaluating background signal in +#' plasma and serum. +#' +#' @param data A `soma_adat`, `data.frame`, or `tibble` object including +#' SeqId columns (`seq.xxxxx.xx`) containing RFU values. +#' @return A `tibble` object with 2 columns: SeqId and eLOD. +#' @author Caleb Scheidel, Christopher Dimapasok +#' @examples +#' # filter data frame using vector of SampleId controls +#' df <- withr::with_seed(101, { +#' data.frame( +#' SampleType = rep(c("Sample", "Buffer"), each = 10), +#' SampleId = paste0("Sample_", 1:20), +#' seq.20.1.100 = runif(20, 1, 100), +#' seq.21.1.100 = runif(20, 1, 100), +#' seq.22.2.100 = runif(20, 1, 100) +#' ) +#' }) +#' sample_ids <- paste0("Sample_", 11:20) +#' selected_samples <- df |> filter(SampleId %in% sample_ids) +#' +#' selected_elod <- calc_eLOD(selected_samples) +#' head(selected_elod) +#' \dontrun{ +#' # filter `soma_adat` object to buffer samples +#' buffer_samples <- example_data |> filter(SampleType == "Buffer") +#' +#' # calculate eLOD +#' buffer_elod <- calc_eLOD(buffer_samples) +#' head(buffer_elod) +#' +#' # use eLOD to calculate signal to noise ratio of samples +#' samples_median <- example_data |> dplyr::filter(SampleType == "Sample") |> +#' dplyr::summarise(across(starts_with("seq"), median, .names = "median_{col}")) |> +#' tidyr::pivot_longer(starts_with("median_"), names_to = "SeqId", +#' values_to = "median_signal") |> +#' dplyr::mutate(SeqId = gsub("median_seq", "seq", SeqId)) +#' +#' # analytes with signal to noise > 2 +#' ratios <- samples_median |> +#' dplyr::mutate(signal_to_noise = median_signal / buffer_elod$eLOD) |> +#' dplyr::filter(signal_to_noise > 2) |> +#' dplyr::arrange(desc(signal_to_noise)) +#' +#' head(ratios) +#' } +#' @importFrom dplyr across mutate select summarise starts_with +#' @importFrom stats mad median +#' @importFrom tibble as_tibble is_tibble +#' @importFrom tidyr pivot_longer +#' @export +calc_eLOD <- function(data) { + + stopifnot("`data` must be a soma_adat, tibble, or data.frame" = + is.soma_adat(data) | is.data.frame(data) | is_tibble(data)) + + # if `SampleType` in adat, check for buffer samples only + if ("SampleType" %in% names(data) ) { + if ( any(c("Sample", "Calibrator", "QC") %in% unique(data$SampleType)) ) { + warning("Ensure input data includes buffer samples only!", call. = FALSE) + } + } + + # formula to calculate eLOD + elod <- function(x) { + median(x) + 3.3 * mad(x, constant = 1.4826) + } + + # Calculate eLOD for each SeqId + result <- data |> + summarise(across(starts_with("seq"), elod, .names = "eLOD_{col}")) |> + pivot_longer(starts_with("eLOD"), names_to = "SeqId", values_to = "eLOD") |> + mutate(SeqId = gsub("eLOD_seq", "seq", SeqId)) |> + select(SeqId, eLOD) + + return(tibble::as_tibble(result)) +} diff --git a/_pkgdown.yml b/_pkgdown.yml index 22ed615..48ac6f8 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -167,6 +167,11 @@ reference: - cleanNames - getAdatVersion + - title: Data Summaries + desc: Functions to assist with summarizing SOMAmer RFU values. + contents: + - calc_eLOD + - title: Data Objects desc: Objects provided with `SomaDataIO`. contents: diff --git a/inst/WORDLIST b/inst/WORDLIST index 2995cf7..bfbc4c1 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -8,14 +8,17 @@ AptName AssayNotes Barcode Biobase +Biometrics CCC CLI CMD +CSF CalQcRatio CalReference Codecov ColCheck Covance +Dimapasok EDTA EID EOL @@ -26,11 +29,11 @@ EntrezGeneSymbol ExpressionSet ExtIdentifier HybControlNormScale +Kuei LF Lifecycle MERCHANTABILITY MacOS -magrittr NormScale ORCID PII @@ -39,15 +42,13 @@ PercentDilution PlateId PlatePosition PlateScale -plex -proteomic QcReference README RFU RFUs RUO ReferenceRFU -Rmarkdown +Reproducibility RowCheck SELEX SG @@ -61,9 +62,11 @@ SampleMatrix SampleNotes SampleType ScannerID +Scheidel SeqId SeqIds SeqidVersion +Setdiff SiteId SlideId SomaId @@ -76,27 +79,40 @@ Tabacman TargetFullName TimePoint TubeUniqueID +Un UniProt +YAML adat aliquot analyte analytes barcode -bioconductor -choosealicense +cli dplyr +eLOD eSet +frac funder -https +intra +leftrightarrow lifecycle +lysate +magrittr medNormRef -mit nd +normals pkgdown +plex pre +proteomic +readxl +rightarrow +rowname +rsample subarray tada th tibble -tldrlegal -www +tidyr +usethis +vectorized diff --git a/man/calc_eLOD.Rd b/man/calc_eLOD.Rd new file mode 100644 index 0000000..84c6518 --- /dev/null +++ b/man/calc_eLOD.Rd @@ -0,0 +1,78 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/calc_eLOD.R +\name{calc_eLOD} +\alias{calc_eLOD} +\title{Calculate Estimated Limit of Detection (eLOD)} +\usage{ +calc_eLOD(data) +} +\arguments{ +\item{data}{A \code{soma_adat}, \code{data.frame}, or \code{tibble} object including +SeqId columns (\code{seq.xxxxx.xx}) containing RFU values.} +} +\value{ +A \code{tibble} object with 2 columns: SeqId and eLOD. +} +\description{ +Calculate the estimated limit of detection (eLOD) for SOMAmer reagent +analytes in the provided input data. The input data should be filtered to +include only buffer samples desired for eLOD calculation. +} +\details{ +eLOD is calculated using the following steps: +\enumerate{ +\item For each SOMAmer, the median and adjusted median absolute +deviation (\eqn{MAD_{Adjusted}}) are calculated, where +\deqn{MAD_{Adjusted} = 1.4826 * MAD} +The 1.4826 is a set constant used to adjust the MAD to be reflective of +the standard deviation of the normal distribution. +\item For each SOMAmer, calculate \deqn{eLOD = median + 3.3 * MAD_{Adjusted}} +} + +Note: The eLOD is useful for non-core matrices, including cell lysate +and CSF, but should be used carefully for evaluating background signal in +plasma and serum. +} +\examples{ +# filter data frame using vector of SampleId controls +df <- withr::with_seed(101, { + data.frame( + SampleType = rep(c("Sample", "Buffer"), each = 10), + SampleId = paste0("Sample_", 1:20), + seq.20.1.100 = runif(20, 1, 100), + seq.21.1.100 = runif(20, 1, 100), + seq.22.2.100 = runif(20, 1, 100) + ) +}) +sample_ids <- paste0("Sample_", 11:20) +selected_samples <- df |> filter(SampleId \%in\% sample_ids) + +selected_elod <- calc_eLOD(selected_samples) +head(selected_elod) +\dontrun{ +# filter `soma_adat` object to buffer samples +buffer_samples <- example_data |> filter(SampleType == "Buffer") + +# calculate eLOD +buffer_elod <- calc_eLOD(buffer_samples) +head(buffer_elod) + +# use eLOD to calculate signal to noise ratio of samples +samples_median <- example_data |> dplyr::filter(SampleType == "Sample") |> + dplyr::summarise(across(starts_with("seq"), median, .names = "median_{col}")) |> + tidyr::pivot_longer(starts_with("median_"), names_to = "SeqId", + values_to = "median_signal") |> + dplyr::mutate(SeqId = gsub("median_seq", "seq", SeqId)) + +# analytes with signal to noise > 2 +ratios <- samples_median |> + dplyr::mutate(signal_to_noise = median_signal / buffer_elod$eLOD) |> + dplyr::filter(signal_to_noise > 2) |> + dplyr::arrange(desc(signal_to_noise)) + +head(ratios) +} +} +\author{ +Caleb Scheidel, Christopher Dimapasok +} diff --git a/tests/testthat/test-calc_eLOD.R b/tests/testthat/test-calc_eLOD.R new file mode 100644 index 0000000..a753f59 --- /dev/null +++ b/tests/testthat/test-calc_eLOD.R @@ -0,0 +1,62 @@ +# Setup ---- +# soma_adat input filtered to "Buffer" samples +buffer_samples <- example_data |> filter(SampleType == "Buffer") + +drop_seqs <- length(getAnalytes(example_data)) - 10 +drop_seqs <- getAnalytes(example_data)[1:drop_seqs] + +buffer_samples <- buffer_samples |> select(-all_of(drop_seqs)) + +# data.frame input +df <- withr::with_seed(101, { + data.frame( + SampleType = rep(c("Sample", "Buffer"), each = 10), + SampleId = paste0("Sample_", 1:20), + seq.20.1.100 = runif(20, 1, 100), + seq.21.1.100 = runif(20, 1, 100), + seq.22.2.100 = runif(20, 1, 100) + ) +}) +sample_ids <- paste0("Sample_", 11:20) +selected_samples <- df |> filter(SampleId %in% sample_ids) + +# Testing ---- +test_that("`calc_eLOD` produces a warning when it should", { + expect_warning( + calc_eLOD(example_data), + "Ensure input data includes buffer samples only!" + ) +}) + +test_that("`calc_eLOD` produces an error when it should", { + expect_error( + calc_eLOD(list(SampleId = 1:3, seq.1000.123 = 100:102)), + "`data` must be a soma_adat, tibble, or data.frame" + ) +}) + +test_that("`calc_eLOD` works on a soma_adat input filtered to buffer samples", { + out <- calc_eLOD(buffer_samples) + + expect_s3_class(out, "tbl_df") + expect_equal(dim(out), c(10L, 2L)) + expect_equal( + head(out, 3), + tibble(SeqId = c("seq.9981.18", "seq.9983.97", "seq.9984.12"), + eLOD = c(45.08555, 52.98848, 123.02824)), + tolerance = 0.00001 + ) +}) + +test_that("`calc_eLOD` works on a data.frame input", { + out <- calc_eLOD(selected_samples) + + expect_s3_class(out, "tbl_df") + expect_equal(dim(out), c(3L, 2L)) + expect_equal( + head(out, 3), + tibble(SeqId = c("seq.20.1.100", "seq.21.1.100", "seq.22.2.100"), + eLOD = c(168.0601, 130.7047, 115.9958)), + tolerance = 0.0001 + ) +})