diff --git a/NEWS.md b/NEWS.md index 1f40890..c3f1a5a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -84,6 +84,14 @@ ### Updates and Improvements :hammer: +* `collapseAdats()` better combines `HEADER` information (#86) + - certain information, e.g. `PlateScale` and `Cal*`, + are better maintained in the final collapsed ADAT + - other entries are combined by pasting into a single string + - should result in less duplication of superfluous entries and + retention of more "useful" `HEADER` information + in the resulting (collapsed) `soma_adat` + * Update `read_annotations()` with `11k` content (#85) * Update `transform()` and `scaleAnalytes()` diff --git a/R/loadAdatsAsList.R b/R/loadAdatsAsList.R index 34fd49a..492c08f 100644 --- a/R/loadAdatsAsList.R +++ b/R/loadAdatsAsList.R @@ -50,7 +50,7 @@ #' #' # Alternatively use `collapse = TRUE` #' \donttest{ -#' loadAdatsAsList(files, collapse = TRUE) +#' loadAdatsAsList(files, collapse = TRUE) #' } #' @importFrom stats setNames #' @export @@ -82,15 +82,47 @@ loadAdatsAsList <- function(files, collapse = FALSE, verbose = interactive(), .. #' @export collapseAdats <- function(x) { is_adat <- vapply(x, is.soma_adat, NA) - stopifnot(all(is_adat)) + stopifnot("All entries in list `x` must be `soma_adat` class." = all(is_adat)) common <- Reduce(intersect, lapply(x, names)) # common df names # rm names so rownames are re-constructed via `rbind()` new <- lapply(unname(x), function(.x) dplyr::select(.x, all_of(common))) new <- do.call(rbind, new) new_header <- lapply(x, attr, which = "Header.Meta") |> - lapply(`[[`, "HEADER") - attributes(new)$Header.Meta$HEADER <- Reduce(`c`, new_header) - nms <- names(attributes(x[[1L]])) # attr order or 1st adat + lapply(`[[`, "HEADER") |> + Reduce(f = combine_header) + new_header$CollapsedAdats <- paste(names(x), collapse = ", ") + attributes(new)$Header.Meta$HEADER <- new_header + nms <- names(attributes(x[[1L]])) # attr order of 1st adat attributes(new) <- attributes(new)[nms] # orig order new } + + +# helper to smartly combine header info +# from multiple ADATs +combine_header <- function(x, y) { + # intersecting entries: to be pasted/merged + keep <- c("AssayRobot", "CreatedDate", "Title", "ExpDate") + keep <- intersect(keep, intersect(names(x), names(y))) + # plate and cal entries: to be pasted/merged + plt <- intersect(grep("^Plate|^Cal", names(x), value = TRUE), + grep("^Plate|^Cal", names(y), value = TRUE)) + # new entries in 'y': to be added + set_yx <- setdiff(names(y), names(x)) + for ( i in c(keep, plt, set_yx) ) { + if ( i %in% names(x) ) { + x[[i]] <- paste_xy(x[[i]], y[[i]]) # maintains attrs of 'x' + } else { + x[[i]] <- y[[i]] # new added entries + } + } + x +} + +# pastes and maintains attrs +paste_xy <- function(x, y, sep = ", ", ...) { + atts <- attributes(x) # maintain attrs of 'x' + x <- paste(x, y, sep = sep, ...) + attributes(x) <- atts + x +} diff --git a/man/loadAdatsAsList.Rd b/man/loadAdatsAsList.Rd index 35ad8aa..7643082 100644 --- a/man/loadAdatsAsList.Rd +++ b/man/loadAdatsAsList.Rd @@ -69,7 +69,7 @@ class(collapsed) # Alternatively use `collapse = TRUE` \donttest{ -loadAdatsAsList(files, collapse = TRUE) + loadAdatsAsList(files, collapse = TRUE) } } \seealso{ diff --git a/tests/testthat/_snaps/loadAdatsAsList.md b/tests/testthat/_snaps/loadAdatsAsList.md index 1160f35..90f0d4a 100644 --- a/tests/testthat/_snaps/loadAdatsAsList.md +++ b/tests/testthat/_snaps/loadAdatsAsList.md @@ -51,7 +51,7 @@ [1] "!AssayVersion" $AssayRobot - [1] "Fluent 1 L-307" + [1] "Fluent 1 L-307, Fluent 1 L-307" attr(,"raw_key") [1] "!AssayRobot" @@ -66,7 +66,7 @@ [1] "!CreatedBy" $CreatedDate - [1] "2020-07-24" + [1] "2020-07-24, 2020-07-25" attr(,"raw_key") [1] "!CreatedDate" @@ -121,7 +121,7 @@ [1] "!StudyOrganism" $Title - [1] "Example Adat Set001, Example Adat Set002" + [1] "Example Adat Set001, Example Adat Set002, Example Adat Set001, Example Adat Set002" attr(,"raw_key") [1] "!Title" @@ -215,64 +215,7 @@ attr(,"raw_key") [1] "PlateTailTest_Example_Adat_Set002" - $AdatId - [1] "GID-1234-56-7890-abcdef" - attr(,"raw_key") - [1] "!AdatId" - - $Version - [1] "1.2" - attr(,"raw_key") - [1] "!Version" - - $AssayType - [1] "PharmaServices" - attr(,"raw_key") - [1] "!AssayType" - - $AssayVersion - [1] "V4" - attr(,"raw_key") - [1] "!AssayVersion" - - $AssayRobot - [1] "Fluent 1 L-307" - attr(,"raw_key") - [1] "!AssayRobot" - - $Legal - [1] "Experiment details and data have been processed to protect Personally Identifiable Information (PII) and comply with existing privacy laws." - attr(,"raw_key") - [1] "!Legal" - - $CreatedBy - [1] "PharmaServices" - attr(,"raw_key") - [1] "!CreatedBy" - - $CreatedDate - [1] "2020-07-25" - attr(,"raw_key") - [1] "!CreatedDate" - - $EnteredBy - [1] "Technician2" - attr(,"raw_key") - [1] "!EnteredBy" - - $GeneratedBy - [1] "Px (Build: : ), Canopy_0.1.1" - attr(,"raw_key") - [1] "!GeneratedBy" - - $StudyMatrix - [1] "EDTA Plasma" - attr(,"raw_key") - [1] "!StudyMatrix" - - $Title - [1] "Example Adat Set001, Example Adat Set002" - attr(,"raw_key") - [1] "!Title" + $CollapsedAdats + [1] "example_data10.adat, single_sample.adat"