diff --git a/NAMESPACE b/NAMESPACE index 987fcd2..0038066 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -106,6 +106,7 @@ export(loadAdatsAsList) export(locateSeqId) export(matchSeqIds) export(meltExpressionSet) +export(merge_clin) export(mutate) export(parseHeader) export(pivotExpressionSet) @@ -172,6 +173,7 @@ importFrom(tidyr,unite) importFrom(tools,md5sum) importFrom(utils,capture.output) importFrom(utils,head) +importFrom(utils,read.csv) importFrom(utils,read.delim) importFrom(utils,tail) importFrom(utils,write.table) diff --git a/R/merge-clin.R b/R/merge-clin.R new file mode 100644 index 0000000..cf0dcd7 --- /dev/null +++ b/R/merge-clin.R @@ -0,0 +1,84 @@ +#' Merge Clinical Data into Data Frame +#' +#' Occasionally, additional clinical data is obtained _after_ samples +#' have been submitted to SomaLogic, Inc. or even after 'SomaScan' +#' results have been delivered. +#' This requires the new clinical variables, i.e. non-proteomic, data to be +#' merged with 'SomaScan' data into a "new" ADAT prior to analysis. +#' This wrapper easily merges such clinical variables into an +#' existing 2 dimensional data frame object, e.g. a `soma_adat`, +#' and is a simple wrapper around [dplyr::left_join()]. +#' +#' This is a package export of the `merge_clin.R` command-line tool (R script) +#' that lives in the `cli/merge` system file directory. Please see: +#' \itemize{ +#' \item `dir(system.file("cli/merge", package = "SomaDataIO"), full.names = TRUE)` +#' \item `vignette("clin-merge-tool", package = "SomaDataIO")` +#' } +#' +#' @inheritParams params +#' @param clin_data One of 2 options: +#' \itemize{ +#' \item A data frame containing clinical variables to merge into `x`, or +#' \item A path to a file, typically a `*.csv`, +#' containing clinical variables to merge into `x`. +#' } +#' @param by A character vector of variables to join by. +#' See [dplyr::left_join()] for more details. +#' @param ... Additional parameters passed to [dplyr::left_join()]. +#' @return An object of the same class as `x` with new clinical +#' variables merged. +#' @author Stu Field +#' @seealso [dplyr::left_join()] +#' @examples +#' # retrieve clinical data +#' clin_file <- system.file("cli/merge/meta.csv", package = "SomaDataIO", mustWork = TRUE) +#' clin_file +#' +#' # view clinical data to be merged: +#' # 1) `group` +#' # 2) `newvar` +#' clin_df <- read.csv(clin_file, header = TRUE) +#' clin_df +#' +#' # ensure compatible type for `by =` +#' clin_df$SampleId <- as.character(clin_df$SampleId) +#' +#' # create mini-adat +#' apts <- withr::with_seed(123, sample(getAnalytes(example_data), 3L)) +#' adat <- head(example_data, 10L) |> +#' dplyr::select(SampleId, all_of(apts)) +#' +#' # merge clinical variables +#' adat_merged <- merge_clin(adat, clin_df, by = "SampleId") +#' adat_merged +#' +#' # Alternative syntax: +#' # merge on different variable names +#' clin_df2 <- system.file("cli/merge/meta2.csv", package = "SomaDataIO", +#' mustWork = TRUE) |> read.csv(header = TRUE) +#' clin_df2 +#' +#' clin_df2$ClinKey <- as.character(clin_df2$ClinKey) +#' adat_merged2 <- merge_clin(adat, clin_df2, by = c(SampleId = "ClinKey")) +#' adat_merged2 +#' @importFrom utils read.csv +#' @importFrom dplyr left_join +#' @export +merge_clin <- function(x, clin_data, by = NULL, ...) { + + stopifnot("`adat` must be a `soma_adat`." = is.soma_adat(x)) + + if ( inherits(clin_data, "data.frame") ) { + clin_df <- clin_data + } else if ( is.character(clin_data) && length(clin_data) == 1L && + file.exists(clin_data) ) { + clin_df <- normalizePath(clin_data, mustWork = TRUE) |> + utils::read.csv(header = TRUE) + } else { + stop("Invalid `clin` argument: ", .value(class(clin_data)), call. = FALSE) + } + + dplyr::left_join(x, clin_df, by = by) +} + diff --git a/_pkgdown.yml b/_pkgdown.yml index d82b4aa..acdd335 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -136,6 +136,7 @@ reference: - starts_with("getAnalyte") - getMeta - diffAdats + - merge_clin - title: Transform Between SomaScan Versions desc: > diff --git a/man/merge_clin.Rd b/man/merge_clin.Rd new file mode 100644 index 0000000..c801261 --- /dev/null +++ b/man/merge_clin.Rd @@ -0,0 +1,84 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/merge-clin.R +\name{merge_clin} +\alias{merge_clin} +\title{Merge Clinical Data into Data Frame} +\usage{ +merge_clin(x, clin_data, by = NULL, ...) +} +\arguments{ +\item{x}{A \code{soma_adat} object (with intact attributes), +typically created using \code{\link[=read_adat]{read_adat()}}.} + +\item{clin_data}{One of 2 options: +\itemize{ +\item A data frame containing clinical variables to merge into \code{x}, or +\item A path to a file, typically a \verb{*.csv}, +containing clinical variables to merge into \code{x}. +}} + +\item{by}{A character vector of variables to join by. +See \code{\link[dplyr:mutate-joins]{dplyr::left_join()}} for more details.} + +\item{...}{Additional parameters passed to \code{\link[dplyr:mutate-joins]{dplyr::left_join()}}.} +} +\value{ +An object of the same class as \code{x} with new clinical +variables merged. +} +\description{ +It is very common that additional clinical variables were either +not delivered with SomaScan data or they were collected \emph{after} +the samples were collected, and thus need to be merged/joined +with existing SomaScan data prior to analysis. +This wrapper easily merges such clinical variables into an +existing 2 dimensional data frame object, e.g. a \code{soma_adat}, +and is a simple wrapper around \code{\link[dplyr:mutate-joins]{dplyr::left_join()}}. +} +\details{ +This is an package export of the \code{merge_clin.R} command-line R script +that lives in the \code{cli/merge} system file directory. Please see: +\itemize{ +\item \code{dir(system.file("cli/merge", package = "SomaDataIO"), full.names = TRUE)} +\item \code{vignette("clin-merge-tool", package = "SomaDataIO")} +} +} +\examples{ +# retrieve clinical data +clin_file <- system.file("cli/merge/meta.csv", package = "SomaDataIO", mustWork = TRUE) +clin_file + +# view clinical data to be merged: +# 1) `group` +# 2) `newvar` +clin_df <- read.csv(clin_file, header = TRUE) +clin_df + +# ensure compatible type for `by =` +clin_df$SampleId <- as.character(clin_df$SampleId) + +# create mini-adat +apts <- withr::with_seed(123, sample(getAnalytes(example_data), 3L)) +adat <- head(example_data, 10L) |> + dplyr::select(SampleId, all_of(apts)) + +# merge clinical variables +adat_merged <- merge_clin(adat, clin_df, by = "SampleId") +adat_merged + +# Alternative syntax: +# merge on different variable names +clin_df2 <- system.file("cli/merge/meta2.csv", package = "SomaDataIO", + mustWork = TRUE) |> read.csv(header = TRUE) +clin_df2 + +clin_df2$ClinKey <- as.character(clin_df2$ClinKey) +adat_merged2 <- merge_clin(adat, clin_df2, by = c(SampleId = "ClinKey")) +adat_merged2 +} +\seealso{ +\code{\link[dplyr:mutate-joins]{dplyr::left_join()}} +} +\author{ +Stu Field +} diff --git a/vignettes/cli-merge-tool.Rmd b/vignettes/cli-merge-tool.Rmd index 13bac86..c946eba 100644 --- a/vignettes/cli-merge-tool.Rmd +++ b/vignettes/cli-merge-tool.Rmd @@ -39,6 +39,9 @@ in the `cli/merge/` directory, which allows one to generate an updated `*.adat` file via the command-line without having to launch an integrated development environment ("IDE"), e.g. `RStudio`. +To use `SomaDataIO`s exported functionality fro _within_ and R session, +please see `merge_clin()`. + ----------------