From d2a5b6386ae167c4824296990528701e98b80e9d Mon Sep 17 00:00:00 2001 From: Sam Park Date: Fri, 14 Jun 2024 10:20:30 -0400 Subject: [PATCH 1/8] added pullProject(), saveProject(), and use phc login token for private peps --- DESCRIPTION | 4 +- NAMESPACE | 3 + R/config.R | 5 + R/pepr.R | 1 + R/project.R | 793 ++++++++++++++++------------- R/utils.R | 131 +++++ man/dot-listOfListToListOfDT.Rd | 18 + man/dot-loadSubsampleAnnotation.Rd | 4 +- man/dot-mergeAttrs.Rd | 2 +- man/dot-modifySamplesFetched.Rd | 18 + man/fetchPEP.Rd | 20 + man/pullProject.Rd | 21 + man/saveProject.Rd | 23 + 13 files changed, 680 insertions(+), 363 deletions(-) create mode 100644 man/dot-listOfListToListOfDT.Rd create mode 100644 man/dot-modifySamplesFetched.Rd create mode 100644 man/fetchPEP.Rd create mode 100644 man/pullProject.Rd create mode 100644 man/saveProject.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 0dc38a9..5e18ab5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: pepr Type: Package Title: Reading Portable Encapsulated Projects -Version: 0.5.0 -Date: 2023-11-16 +Version: 0.5.1 +Date: 2024-06-13 Authors@R: c(person("Nathan", "Sheffield", email = "nathan@code.databio.org", role = c("aut", "cph","cre")),person("Michal","Stolarczyk",email="michal@virginia.edu",role=c("aut"))) Maintainer: Nathan Sheffield diff --git a/NAMESPACE b/NAMESPACE index 4074ce8..cfc45b6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -18,7 +18,9 @@ export(getSample) export(getSubsample) export(listAmendments) export(makeSectionsAbsolute) +export(pullProject) export(sampleTable) +export(saveProject) exportClasses(Config) exportClasses(Project) exportMethods("$") @@ -27,6 +29,7 @@ exportMethods("[[") import(RCurl) import(pryr) import(stringr) +import(tidyr) import(yaml) importFrom(methods,as) importFrom(methods,callNextMethod) diff --git a/R/config.R b/R/config.R index bc26438..f899585 100644 --- a/R/config.R +++ b/R/config.R @@ -26,6 +26,11 @@ setMethod("initialize", "Config", function(.Object, data) { #' @export #' @rdname Config-class Config = function(file, amendments = NULL) { + ### if the config from the Project constructor is a list and not a filepath, it is from a PEP fetched from PEPhub + if (typeof(file) == 'list') { + config = methods::new("Config", data = file) + return(config) + } message("Loading config file: ", file) cfg_data = .loadConfig(filename = file, amendments = amendments) config = methods::new("Config", data = cfg_data) diff --git a/R/pepr.R b/R/pepr.R index 6d02440..8b1ca95 100644 --- a/R/pepr.R +++ b/R/pepr.R @@ -8,6 +8,7 @@ #' @import yaml #' @import stringr #' @import pryr +#' @import tidyr #' #' @references #' GitHub: \url{https://github.com/pepkit/pepr}, Documentation: \url{https://code.databio.org/pepr/} diff --git a/R/project.R b/R/project.R index 61e7807..3187d1d 100644 --- a/R/project.R +++ b/R/project.R @@ -21,49 +21,65 @@ #' #' @export setClass( - "Project", - slots = c( - file = "character", - samples = "data.frame", - config = "list", - sampleNameAttr = "character", - subSampleNameAttr = "character" - ) + "Project", + slots = c( + file = "character", + samples = "data.frame", + config = "list", + sampleNameAttr = "character", + subSampleNameAttr = "character" + ) ) # Project class methods --------------------------------------------------- - setMethod("initialize", "Project", function(.Object, ...) { - .Object = methods::callNextMethod(.Object) # calls generic initialize - ellipsis <- list(...) - stIndex = ellipsis$sampleTableIndex - sstIndex = ellipsis$subSampleTableIndex - if (!is.null(ellipsis$file)) { - # check if 'file' path provided - if (.isCfg(ellipsis$file)) { - # provided 'file' seems to be a config - .Object@file = .makeAbsPath(ellipsis$file, parent = path.expand(getwd())) - # instantiate config object and stick it in the config slot - .Object@config = Config(.Object@file, ellipsis$amendments) - # determine the effective (sub)sample table indexes - .Object = .getTableIndexes(.Object, stIndex, sstIndex) - sampleTablePath = .getSampleTablePathFromConfig(config = config(.Object)) - .Object@samples = .loadSampleAnnotation(sampleTablePath = sampleTablePath) - .Object = .modifySamples(.Object) - } else { - # provided 'file' seems to be a sample table - # determine the effective (sub)sample table indexes - .Object = .getTableIndexes(.Object, stIndex, sstIndex) - .Object@samples = .loadSampleAnnotation(sampleTablePath = ellipsis$file) - .Object = .autoMergeDuplicatedNames(.Object) - } + .Object = methods::callNextMethod(.Object) # calls generic initialize + ellipsis <- list(...) + stIndex = ellipsis$sampleTableIndex + sstIndex = ellipsis$subSampleTableIndex + if (ellipsis$api) { + # called with pullProject + rawPEP = fetchPEP(ellipsis$file, raw = TRUE) + if (length(rawPEP) < 3) { + # peps contain 3 elements, failed pep lookup will result in 1 element + stop('PEP does not exist in database. Did you spell it correctly?') } - # no 'file' provided, creating an empty object - # determine the effective (sub)sample table indexes + .Object@file = ellipsis$file + .Object@config = Config(rawPEP$config) + .Object = .getTableIndexes(.Object, stIndex, sstIndex) - return(.Object) + sampleTablePath = rawPEP$sample_table + + .Object@samples = data.table::setDT(as.data.frame(do.call(rbind, rawPEP$sample_list))) + subsamples = lapply(rawPEP$subsample_list, .listOfListToListOfDT) + + .Object = .modifySamplesFetched(.Object, subsamples) + + } else if (!is.null(ellipsis$file)) { + if (.isCfg(ellipsis$file)) { + # provided 'file' seems to be a config + .Object@file = .makeAbsPath(ellipsis$file, parent = path.expand(getwd())) + # instantiate config object and stick it in the config slot + .Object@config = Config(.Object@file, ellipsis$amendments) + # determine the effective (sub)sample table indexes + .Object = .getTableIndexes(.Object, stIndex, sstIndex) + sampleTablePath = .getSampleTablePathFromConfig(config = config(.Object)) + .Object@samples = .loadSampleAnnotation(sampleTablePath = sampleTablePath) + .Object = .modifySamples(.Object) + } else { + # provided 'file' seems to be a sample table + # determine the effective (sub)sample table indexes + .Object = .getTableIndexes(.Object, stIndex, sstIndex) + .Object@samples = .loadSampleAnnotation(sampleTablePath = ellipsis$file) + .Object = .autoMergeDuplicatedNames(.Object) + } + } + # no 'file' provided, creating an empty object + # determine the effective (sub)sample table indexes + .Object = .getTableIndexes(.Object, stIndex, sstIndex) + return(.Object) }) @@ -87,31 +103,55 @@ Project = function(file = NULL, amendments = NULL, sampleTableIndex = NULL, subSampleTableIndex = NULL) { - methods::new( - "Project", - file = file, - amendments = amendments, - sampleTableIndex = sampleTableIndex, - subSampleTableIndex = subSampleTableIndex - ) + methods::new( + "Project", + file = file, + api = FALSE, + amendments = amendments, + sampleTableIndex = sampleTableIndex, + subSampleTableIndex = subSampleTableIndex + ) +} + + +#' Pull a PEP from PEPhub API +#' +#' This is a helper that creates the project with empty samples and config slots +#' +#' @param registryPath a string specifying a registry path from PEPhub +#' @return an object of \code{"\linkS4class{Project}"} +#' @examples +#' registryPath = 'databio/example:default' +#' p=pullProject(registryPath) +#' @export +pullProject = function(registryPath = NULL) { + methods::new( + "Project", + file = registryPath, + api = TRUE, + amendments = NULL, + sampleTableIndex = NULL, + subSampleTableIndex = NULL + ) } setMethod( - "show", - signature = "Project", - definition = function(object) { - cat("PEP project object. Class: ", class(object), fill = T) - cat(" file: ", object@file, fill = T) - cat(" samples: ", NROW(object@samples), fill = T) - if (length(object@config) != 0) { - .listAmendments(object@config, style = "cat") - } - invisible(NULL) + "show", + signature = "Project", + definition = function(object) { + cat("PEP project object. Class: ", class(object), fill = T) + cat(" file: ", object@file, fill = T) + cat(" samples: ", NROW(object@samples), fill = T) + if (length(object@config) != 0) { + .listAmendments(object@config, style = "cat") } + invisible(NULL) + } ) + #' Extract \code{"\linkS4class{Project}"} #' #' This method can be used to view the config slot of @@ -129,15 +169,15 @@ setMethod( #' #' @export setGeneric("config", function(object) - standardGeneric("config")) + standardGeneric("config")) #' @describeIn config Extract \code{"\linkS4class{Project}"} of the object of \code{"\linkS4class{Project}"} setMethod( - "config", - signature = "Project", - definition = function(object) { - object@config - } + "config", + signature = "Project", + definition = function(object) { + object@config + } ) @@ -148,22 +188,47 @@ setMethod( #' @return modified Project object #' @keywords internal setGeneric(".modifySamples", function(object) - standardGeneric(".modifySamples")) + standardGeneric(".modifySamples")) setMethod( - ".modifySamples", - signature = "Project", - definition = function(object) { - object = .removeAttrs(object) - object = .appendAttrs(object) - object = .duplicateAttrs(object) - object = .implyAttrs(object) - object = .autoMergeDuplicatedNames(object) - object = .mergeAttrs(object, - .getSubSampleTablePathFromConfig(config(object))) - object = .deriveAttrs(object) - return(object) - } + ".modifySamples", + signature = "Project", + definition = function(object) { + object = .removeAttrs(object) + object = .appendAttrs(object) + object = .duplicateAttrs(object) + object = .implyAttrs(object) + object = .autoMergeDuplicatedNames(object) + object = .mergeAttrs(object, + .getSubSampleTablePathFromConfig(config(object))) + object = .deriveAttrs(object) + return(object) + } +) + + +#' Perform all the sample attribute modifications for fetched PEPs +#' +#' @param object an object of \code{"\linkS4class{Project}"} +#' +#' @return modified Project object +#' @keywords internal +setGeneric(".modifySamplesFetched", function(object, subsamples) + standardGeneric(".modifySamplesFetched")) + +setMethod( + ".modifySamplesFetched", + signature = "Project", + definition = function(object, subsamples) { + # object = .removeAttrs(object) + # object = .appendAttrs(object) + # object = .duplicateAttrs(object) + # object = .implyAttrs(object) + # object = .autoMergeDuplicatedNames(object) + object = .mergeAttrs(object, subsampleAannotationPaths = NULL, subsamplesTables = subsamples) + # object = .deriveAttrs(object) + return(object) + } ) @@ -188,21 +253,21 @@ setMethod( #' getSample(p, sampleName) #' @export setGeneric(name = "getSample", function(.Object, sampleName) - standardGeneric("getSample")) + standardGeneric("getSample")) #' @describeIn getSample extracts the sample from the \code{"\linkS4class{Project}"} object setMethod( - f = "getSample", - signature(.Object = "Project", - sampleName = "character"), - definition = function(.Object, sampleName) { - sampleNames = unlist(.Object@samples[[.Object@sampleNameAttr]]) - rowNumber = which(sampleNames == sampleName) - if (length(rowNumber) == 0) - stop("Such sample name does not exist.") - result = sampleTable(.Object)[rowNumber,] - return(result) - } + f = "getSample", + signature(.Object = "Project", + sampleName = "character"), + definition = function(.Object, sampleName) { + sampleNames = unlist(.Object@samples[[.Object@sampleNameAttr]]) + rowNumber = which(sampleNames == sampleName) + if (length(rowNumber) == 0) + stop("Such sample name does not exist.") + result = sampleTable(.Object)[rowNumber,] + return(result) + } ) @@ -230,41 +295,41 @@ setMethod( #' getSubsample(p, sampleName, subsampleName) #' @export setGeneric(name = "getSubsample", function(.Object, sampleName, subsampleName) - standardGeneric("getSubsample")) + standardGeneric("getSubsample")) #' @describeIn getSubsample extracts the subsamples from the \code{"\linkS4class{Project}"} object setMethod( - f = "getSubsample", - signature( - .Object = "Project", - sampleName = "character", - subsampleName = "character" - ), - definition = function(.Object, sampleName, subsampleName) { - if (is.null(.Object@samples[[.Object@subSampleNameAttr]])) - stop( - "There is no subsample_name attribute in the subsample table, ", - " therefore this method cannot be called." - ) - sampleNames = unlist(.Object@samples[[.Object@sampleNameAttr]]) - rowNumber = which(sampleNames == sampleName) - if (length(rowNumber) == 0) - stop("Such sample name does not exist.") - subsampleNames = .Object@samples[[.Object@subSampleNameAttr]][[rowNumber]] - sampleNumber = which(subsampleNames == subsampleName) - if (length(sampleNumber) == 0) - stop("Such sample and sub sample name combination does not exist.") - result = .Object@samples[1, ] - for (iColumn in names(result)) { - if (length(.Object@samples[[iColumn]][[rowNumber]]) > 1) { - result[[iColumn]] = - .Object@samples[[iColumn]][[rowNumber]][[sampleNumber]] - } else{ - result[[iColumn]] = .Object@samples[[iColumn]][[rowNumber]][[1]] - } - } - return(result) + f = "getSubsample", + signature( + .Object = "Project", + sampleName = "character", + subsampleName = "character" + ), + definition = function(.Object, sampleName, subsampleName) { + if (is.null(.Object@samples[[.Object@subSampleNameAttr]])) + stop( + "There is no subsample_name attribute in the subsample table, ", + " therefore this method cannot be called." + ) + sampleNames = unlist(.Object@samples[[.Object@sampleNameAttr]]) + rowNumber = which(sampleNames == sampleName) + if (length(rowNumber) == 0) + stop("Such sample name does not exist.") + subsampleNames = .Object@samples[[.Object@subSampleNameAttr]][[rowNumber]] + sampleNumber = which(subsampleNames == subsampleName) + if (length(sampleNumber) == 0) + stop("Such sample and sub sample name combination does not exist.") + result = .Object@samples[1, ] + for (iColumn in names(result)) { + if (length(.Object@samples[[iColumn]][[rowNumber]]) > 1) { + result[[iColumn]] = + .Object@samples[[iColumn]][[rowNumber]][[sampleNumber]] + } else{ + result[[iColumn]] = .Object@samples[[iColumn]][[rowNumber]][[1]] + } } + return(result) + } ) @@ -286,16 +351,16 @@ setMethod( #' availAmendemtns = listAmendments(p) #' @export setGeneric("listAmendments", function(.Object) - standardGeneric("listAmendments")) + standardGeneric("listAmendments")) #' @describeIn listAmendments list amendments in a \code{"\linkS4class{Project}"} object setMethod( - f = "listAmendments", - signature = signature(.Object = "Project"), - definition = function(.Object) { - config = config(.Object) - .listAmendments(cfg = config, style = "message") - } + f = "listAmendments", + signature = signature(.Object = "Project"), + definition = function(.Object) { + config = config(.Object) + .listAmendments(cfg = config, style = "message") + } ) @@ -322,24 +387,24 @@ setMethod( #' activateAmendments(p, availAmendments[1]) #' @export setGeneric("activateAmendments", function(.Object, amendments) - standardGeneric("activateAmendments")) + standardGeneric("activateAmendments")) #' @describeIn activateAmendments activate amendments in a \code{"\linkS4class{Project}"} object setMethod( - f = "activateAmendments", - signature = signature(.Object = "Project", amendments = "character"), - definition = function(.Object, amendments) { - .Object@config = .applyAmendments(.Object@config, amendments) - .Object@config = makeSectionsAbsolute( - .Object@config, - c(CFG_SAMPLE_TABLE_KEY, CFG_SUBSAMPLE_TABLE_KEY), - .Object@file - ) - sampleTablePath = .getSampleTablePathFromConfig(config = config(.Object)) - .Object@samples = .loadSampleAnnotation(sampleTablePath = sampleTablePath) - .Object = .modifySamples(.Object) - return(.Object) - } + f = "activateAmendments", + signature = signature(.Object = "Project", amendments = "character"), + definition = function(.Object, amendments) { + .Object@config = .applyAmendments(.Object@config, amendments) + .Object@config = makeSectionsAbsolute( + .Object@config, + c(CFG_SAMPLE_TABLE_KEY, CFG_SUBSAMPLE_TABLE_KEY), + .Object@file + ) + sampleTablePath = .getSampleTablePathFromConfig(config = config(.Object)) + .Object@samples = .loadSampleAnnotation(sampleTablePath = sampleTablePath) + .Object = .modifySamples(.Object) + return(.Object) + } ) @@ -359,15 +424,15 @@ setMethod( #' #' @export setGeneric("sampleTable", function(object) - standardGeneric("sampleTable")) + standardGeneric("sampleTable")) #' @describeIn sampleTable extract sample table from a \code{"\linkS4class{Project}"} setMethod( - "sampleTable", - signature = "Project", - definition = function(object) { - object@samples - } + "sampleTable", + signature = "Project", + definition = function(object) { + object@samples + } ) # Sample modifiers -------------------------------------------------------- @@ -379,21 +444,21 @@ setMethod( #' @return an object of \code{"\linkS4class{Project}"} #' @keywords internal .removeAttrs <- function(.Object) { - if (!CFG_S_MODIFIERS_KEY %in% names(config(.Object))) - return(.Object) - modifiers = config(.Object)[[CFG_S_MODIFIERS_KEY]] - if (!CFG_REMOVE_KEY %in% names(modifiers)) - return(.Object) - toRemove = modifiers[[CFG_REMOVE_KEY]] - if (!is.null(toRemove)) { - # get a copy of samples to get the dimensions - for (rem in toRemove) { - if (rem %in% colnames(sampleTable(.Object))) { - .Object@samples[, rem] = NULL - } - } - } + if (!CFG_S_MODIFIERS_KEY %in% names(config(.Object))) + return(.Object) + modifiers = config(.Object)[[CFG_S_MODIFIERS_KEY]] + if (!CFG_REMOVE_KEY %in% names(modifiers)) return(.Object) + toRemove = modifiers[[CFG_REMOVE_KEY]] + if (!is.null(toRemove)) { + # get a copy of samples to get the dimensions + for (rem in toRemove) { + if (rem %in% colnames(sampleTable(.Object))) { + .Object@samples[, rem] = NULL + } + } + } + return(.Object) } @@ -404,31 +469,31 @@ setMethod( #' @return an object of \code{\link{Project-class}} #' @keywords internal .appendAttrs <- function(.Object) { - if (!CFG_S_MODIFIERS_KEY %in% names(config(.Object))) - return(.Object) - modifiers = config(.Object)[[CFG_S_MODIFIERS_KEY]] - if (!CFG_APPEND_KEY %in% names(modifiers)) - return(.Object) - constants = modifiers[[CFG_APPEND_KEY]] - if (is.list(constants)) { - # get names - constantsNames = names(constants) - # get a copy of samples to get the dimensions - colLen = dim(sampleTable(.Object))[1] - for (iConst in seq_along(constants)) { - # create a one column data.table and append it to the current one - if (!constantsNames[iConst] %in% colnames(sampleTable(.Object))) { - tempDT = data.table::data.table(matrix(matrix( - list(constants[[iConst]]), - ncol = 1, - nrow = colLen - ))) - names(tempDT) = constantsNames[iConst] - .Object@samples = cbind(.Object@samples, tempDT) - } - } - } + if (!CFG_S_MODIFIERS_KEY %in% names(config(.Object))) return(.Object) + modifiers = config(.Object)[[CFG_S_MODIFIERS_KEY]] + if (!CFG_APPEND_KEY %in% names(modifiers)) + return(.Object) + constants = modifiers[[CFG_APPEND_KEY]] + if (is.list(constants)) { + # get names + constantsNames = names(constants) + # get a copy of samples to get the dimensions + colLen = dim(sampleTable(.Object))[1] + for (iConst in seq_along(constants)) { + # create a one column data.table and append it to the current one + if (!constantsNames[iConst] %in% colnames(sampleTable(.Object))) { + tempDT = data.table::data.table(matrix(matrix( + list(constants[[iConst]]), + ncol = 1, + nrow = colLen + ))) + names(tempDT) = constantsNames[iConst] + .Object@samples = cbind(.Object@samples, tempDT) + } + } + } + return(.Object) } #' Duplicate a selected attribute across all the samples @@ -438,16 +503,16 @@ setMethod( #' @return an object of \code{"\linkS4class{Project}"} #' @keywords internal .duplicateAttrs <- function(.Object) { - if (!CFG_S_MODIFIERS_KEY %in% names(config(.Object))) - return(.Object) - modifiers = config(.Object)[[CFG_S_MODIFIERS_KEY]] - if (!CFG_DUPLICATE_KEY %in% names(modifiers)) - return(.Object) - duplicated = modifiers[[CFG_DUPLICATE_KEY]] - for (oriAttrName in names(duplicated)) { - .Object@samples[, duplicated[[oriAttrName]]] = .Object@samples[, oriAttrName] - } + if (!CFG_S_MODIFIERS_KEY %in% names(config(.Object))) + return(.Object) + modifiers = config(.Object)[[CFG_S_MODIFIERS_KEY]] + if (!CFG_DUPLICATE_KEY %in% names(modifiers)) return(.Object) + duplicated = modifiers[[CFG_DUPLICATE_KEY]] + for (oriAttrName in names(duplicated)) { + .Object@samples[, duplicated[[oriAttrName]]] = .Object@samples[, oriAttrName] + } + return(.Object) } #' Imply attributes @@ -457,46 +522,46 @@ setMethod( #' @return an object of \code{"\linkS4class{Project}"} #' @keywords internal .implyAttrs = function(.Object) { - if (!CFG_S_MODIFIERS_KEY %in% names(config(.Object))) - return(.Object) - modifiers = config(.Object)[[CFG_S_MODIFIERS_KEY]] - if (!CFG_IMPLY_KEY %in% names(modifiers)) - return(.Object) - implications = modifiers[[CFG_IMPLY_KEY]] - for (implication in implications) { - if (!( - CFG_IMPLY_IF_KEY %in% names(implication) && - CFG_IMPLY_THEN_KEY %in% names(implication) - )) - stop(CFG_IMPLY_KEY, " section is not formatted properly") - implierAttrs = names(implication[[CFG_IMPLY_IF_KEY]]) - implierVals = implication[[CFG_IMPLY_IF_KEY]] - impliedAttrs = names(implication[[CFG_IMPLY_THEN_KEY]]) - impliedVals = as.character(implication[[CFG_IMPLY_THEN_KEY]]) - attrs = colnames(.Object@samples) - if (!all(implierAttrs %in% attrs)) - next - allHitIds = list() - for (i in seq_along(implierAttrs)) { - hitIds = list() - implierStrings = as.character(unlist(implierVals[i])) - for (j in seq_along(implierStrings)) { - hitIds[[j]] = which(.Object@samples[, implierAttrs[i]] == implierStrings[j]) - } - allHitIds[[i]] = Reduce(union, hitIds) - if (length(allHitIds[[i]]) < 1) - break - } - qualIds = Reduce(intersect, allHitIds) - if (length(qualIds) < 1) - next - for (i in seq_along(impliedAttrs)) { - if (!impliedAttrs[i] %in% attrs) - .Object@samples[, impliedAttrs[i]] = "" - .Object@samples[qualIds, impliedAttrs[i]] = impliedVals[i] - } - } + if (!CFG_S_MODIFIERS_KEY %in% names(config(.Object))) + return(.Object) + modifiers = config(.Object)[[CFG_S_MODIFIERS_KEY]] + if (!CFG_IMPLY_KEY %in% names(modifiers)) return(.Object) + implications = modifiers[[CFG_IMPLY_KEY]] + for (implication in implications) { + if (!( + CFG_IMPLY_IF_KEY %in% names(implication) && + CFG_IMPLY_THEN_KEY %in% names(implication) + )) + stop(CFG_IMPLY_KEY, " section is not formatted properly") + implierAttrs = names(implication[[CFG_IMPLY_IF_KEY]]) + implierVals = implication[[CFG_IMPLY_IF_KEY]] + impliedAttrs = names(implication[[CFG_IMPLY_THEN_KEY]]) + impliedVals = as.character(implication[[CFG_IMPLY_THEN_KEY]]) + attrs = colnames(.Object@samples) + if (!all(implierAttrs %in% attrs)) + next + allHitIds = list() + for (i in seq_along(implierAttrs)) { + hitIds = list() + implierStrings = as.character(unlist(implierVals[i])) + for (j in seq_along(implierStrings)) { + hitIds[[j]] = which(.Object@samples[, implierAttrs[i]] == implierStrings[j]) + } + allHitIds[[i]] = Reduce(union, hitIds) + if (length(allHitIds[[i]]) < 1) + break + } + qualIds = Reduce(intersect, allHitIds) + if (length(qualIds) < 1) + next + for (i in seq_along(impliedAttrs)) { + if (!impliedAttrs[i] %in% attrs) + .Object@samples[, impliedAttrs[i]] = "" + .Object@samples[qualIds, impliedAttrs[i]] = impliedVals[i] + } + } + return(.Object) } @@ -507,37 +572,37 @@ setMethod( #' @return an object of \code{"\linkS4class{Project}"} #' @keywords internal .deriveAttrs = function(.Object) { - if (!CFG_S_MODIFIERS_KEY %in% names(config(.Object))) - return(.Object) - parentDir = dirname(.Object@file) - modifiers = config(.Object)[[CFG_S_MODIFIERS_KEY]] - if (!CFG_DERIVE_KEY %in% names(modifiers)) - return(.Object) - derivations = modifiers[[CFG_DERIVE_KEY]] - if (!all(c(CFG_DERIVE_ATTRS_KEY, CFG_DERIVE_SOURCES_KEY) - %in% names(derivations))) - stop(CFG_DERIVE_KEY, " section is not formatted properly") - for (derivedAttr in derivations[[CFG_DERIVE_ATTRS_KEY]]) { - if (!derivedAttr %in% colnames(.Object@samples)) - stop(paste( - "Failed to derive. Sample attribute not found:", - derivedAttr - )) - derivedSamplesVals = .Object@samples[, derivedAttr] - for (derivedSource in names(derivations[[CFG_DERIVE_SOURCES_KEY]])) { - hitIds = which(derivedSamplesVals == derivedSource) - if (length(hitIds) < 1) - next - for (hitId in hitIds) { - rgx = derivations[[CFG_DERIVE_SOURCES_KEY]][[derivedSource]] - res = .matchesAndRegexes(.strformat(rgx, as.list(sampleTable( - .Object - )[hitId, ]), parentDir)) - .Object@samples[hitId, derivedAttr] = list(unique(unlist(res))) - } - } - } + if (!CFG_S_MODIFIERS_KEY %in% names(config(.Object))) + return(.Object) + parentDir = dirname(.Object@file) + modifiers = config(.Object)[[CFG_S_MODIFIERS_KEY]] + if (!CFG_DERIVE_KEY %in% names(modifiers)) return(.Object) + derivations = modifiers[[CFG_DERIVE_KEY]] + if (!all(c(CFG_DERIVE_ATTRS_KEY, CFG_DERIVE_SOURCES_KEY) + %in% names(derivations))) + stop(CFG_DERIVE_KEY, " section is not formatted properly") + for (derivedAttr in derivations[[CFG_DERIVE_ATTRS_KEY]]) { + if (!derivedAttr %in% colnames(.Object@samples)) + stop(paste( + "Failed to derive. Sample attribute not found:", + derivedAttr + )) + derivedSamplesVals = .Object@samples[, derivedAttr] + for (derivedSource in names(derivations[[CFG_DERIVE_SOURCES_KEY]])) { + hitIds = which(derivedSamplesVals == derivedSource) + if (length(hitIds) < 1) + next + for (hitId in hitIds) { + rgx = derivations[[CFG_DERIVE_SOURCES_KEY]][[derivedSource]] + res = .matchesAndRegexes(.strformat(rgx, as.list(sampleTable( + .Object + )[hitId, ]), parentDir)) + .Object@samples[hitId, derivedAttr] = list(unique(unlist(res))) + } + } + } + return(.Object) } @@ -548,12 +613,12 @@ setMethod( #' @return an data.frame with samples; one sample per row #' @keywords internal .loadSampleAnnotation = function(sampleTablePath) { - if (.safeFileExists(sampleTablePath)) { - samples = data.table::fread(sampleTablePath) - } else{ - warning("The sample table does not exist: ", sampleTablePath) - samples = data.frame() - } + if (.safeFileExists(sampleTablePath)) { + samples = data.table::fread(sampleTablePath) + } else{ + warning("The sample table does not exist: ", sampleTablePath) + samples = data.frame() + } } @@ -561,52 +626,56 @@ setMethod( #' #' @param .Object an object of \code{"\linkS4class{Project}"} #' @param path string, a path to the subsample table to read and incorporate +#' @param subsamplesTable data.table, provided from constructor if PEP is fetched from API #' #' @return an object of \code{"\linkS4class{Project}"} #' @keywords internal -.loadSubsampleAnnotation = function(.Object, path) { +.loadSubsampleAnnotation = function(.Object, path = NULL, subsamplesTable = NULL) { + if (is.null(subsamplesTable) & !is.null(path)) { if (.safeFileExists(path)) { - subsamplesTable = data.table::fread(path) + subsamplesTable = data.table::fread(path) } else{ - subsamplesTable = data.table::data.table() + subsamplesTable = data.table::data.table() } - subNames = unique(subsamplesTable[[.Object@sampleNameAttr]]) - samples = sampleTable(.Object) - samples = .listifyDF(samples) - rowNum = nrow(samples) - # Creating a list to be populated in the loop and inserted - # into the samples data.table as a column. This way the "cells" - # in the samples table can consist of multiple elements - for (iName in subNames) { - whichNames = which(subsamplesTable[[.Object@sampleNameAttr]] == iName) - subTable = subsamplesTable[whichNames, ] - dropCol = which(names(subsamplesTable[whichNames, ]) == .Object@sampleNameAttr) - subTable = subset(subTable, select = -dropCol) - colList = vector("list", rowNum) - for (iColumn in seq_len(ncol(subTable))) { - colName = names(subset(subTable, select = iColumn)) - if (!any(names(samples) == colName)) { - # The column doesn't exist, creating - samples[, colName] = NULL - } else{ - colList = samples[[colName]] - } - # The column exists - whichColSamples = which(names(samples) == colName) - whichRowSamples = which(samples[[.Object@sampleNameAttr]] == iName) - if (length(whichRowSamples) < 1) { - warning("No samples named '", iName, "'") - } else { - # Inserting element(s) into the list - colList[[whichRowSamples]] = subTable[[colName]] - # Inserting the list as a column in the data.table - samples[[colName]] = colList - } - } + } + + subNames = unique(subsamplesTable[[.Object@sampleNameAttr]]) + samples = sampleTable(.Object) + samples = .listifyDF(samples) + rowNum = nrow(samples) + # Creating a list to be populated in the loop and inserted + # into the samples data.table as a column. This way the "cells" + # in the samples table can consist of multiple elements + for (iName in subNames) { + whichNames = which(subsamplesTable[[.Object@sampleNameAttr]] == iName) + subTable = subsamplesTable[whichNames, ] + dropCol = which(names(subsamplesTable[whichNames, ]) == .Object@sampleNameAttr) + subTable = subset(subTable, select = -dropCol) + colList = vector("list", rowNum) + for (iColumn in seq_len(ncol(subTable))) { + colName = names(subset(subTable, select = iColumn)) + if (!any(names(samples) == colName)) { + # The column doesn't exist, creating + samples[, colName] = NULL + } else{ + colList = samples[[colName]] + } + # The column exists + whichColSamples = which(names(samples) == colName) + whichRowSamples = which(samples[[.Object@sampleNameAttr]] == iName) + if (length(whichRowSamples) < 1) { + warning("No samples named '", iName, "'") + } else { + # Inserting element(s) into the list + colList[[whichRowSamples]] = subTable[[colName]] + # Inserting the list as a column in the data.table + samples[[colName]] = colList + } } - samples[is.na(samples)] = "" - .Object@samples = samples - return(.Object) + } + samples[is.na(samples)] = "" + .Object@samples = samples + return(.Object) } @@ -618,13 +687,19 @@ setMethod( #' #' @return an object of \code{"\linkS4class{Project}"} #' @keywords internal -.mergeAttrs = function(.Object, subsampleAannotationPaths) { - if (is.null(subsampleAannotationPaths)) - return(.Object) - for (p in subsampleAannotationPaths) { - .Object = .loadSubsampleAnnotation(.Object, p) +.mergeAttrs = function(.Object, subsampleAannotationPaths, subsamplesTables = NULL) { + if (!is.null(subsamplesTables)) { + for (sst in subsamplesTables) { + .Object = .loadSubsampleAnnotation(.Object, subsamplesTable = sst) } return(.Object) + } + if (is.null(subsampleAannotationPaths)) + return(.Object) + for (p in subsampleAannotationPaths) { + .Object = .loadSubsampleAnnotation(.Object, path = p) + } + return(.Object) } @@ -637,39 +712,39 @@ setMethod( #' @return an object of \code{"\linkS4class{Project}"} #' @keywords internal .autoMergeDuplicatedNames = function(.Object) { - s = sampleTable(.Object) - sampleNames = s[[.Object@sampleNameAttr]] - dups = duplicated(sampleNames) - if (!any(dups)) - return(.Object) - duplicatedSampleNames = sampleNames[which(dups)] - if (!is.null(.getSubSampleTablePathFromConfig(config(.Object)))) - stop( - paste0( - "Duplicated sample names were found (", - duplicatedSampleNames, - ") and subsample_table is specified in the config. ", - "You may use either auto-merging or subsample_table-based merging." - ) - ) - rowsWithDuplicates = which(s[[.Object@sampleNameAttr]] %in% duplicatedSampleNames) - dupRows = s[rowsWithDuplicates,] - combinedList = list() - # create a list of list that combined rows with duplicated sample names - for (duplicatedSampleName in duplicatedSampleNames) { - toCombine = s[which(sampleNames == duplicatedSampleName),] - combinedList[[duplicatedSampleName]] = apply(toCombine, 2, function(x) { - return(list(unique(x))) - }) - } - # remove rows that include duplicates - s = s[-rowsWithDuplicates,] - # insert the combined rows - for (combinedRow in combinedList) { - s = rbind(s, combinedRow) - } - .Object@samples = s + s = sampleTable(.Object) + sampleNames = s[[.Object@sampleNameAttr]] + dups = duplicated(sampleNames) + if (!any(dups)) return(.Object) + duplicatedSampleNames = sampleNames[which(dups)] + if (!is.null(.getSubSampleTablePathFromConfig(config(.Object)))) + stop( + paste0( + "Duplicated sample names were found (", + duplicatedSampleNames, + ") and subsample_table is specified in the config. ", + "You may use either auto-merging or subsample_table-based merging." + ) + ) + rowsWithDuplicates = which(s[[.Object@sampleNameAttr]] %in% duplicatedSampleNames) + dupRows = s[rowsWithDuplicates,] + combinedList = list() + # create a list of list that combined rows with duplicated sample names + for (duplicatedSampleName in duplicatedSampleNames) { + toCombine = s[which(sampleNames == duplicatedSampleName),] + combinedList[[duplicatedSampleName]] = apply(toCombine, 2, function(x) { + return(list(unique(x))) + }) + } + # remove rows that include duplicates + s = s[-rowsWithDuplicates,] + # insert the combined rows + for (combinedRow in combinedList) { + s = rbind(s, combinedRow) + } + .Object@samples = s + return(.Object) } #' Set table indexes @@ -688,22 +763,22 @@ setMethod( #' @param sstIndex character string indicating a constructor-specified subsample table index #' @keywords internal .getTableIndexes <- function(.Object, stIndex, sstIndex) { - .getIndexVal <- function(spec, config, default, key) { - if (!is.null(spec)) - return(spec) - if (length(config)) - return(ifelse(is.null(config[[key]]), default, config[[key]])) - return(default) - } - - .Object@sampleNameAttr = .getIndexVal(stIndex, - config(.Object), - SAMPLE_NAME_ATTR, - CFG_SAMPLE_TABLE_INDEX_KEY) - .Object@subSampleNameAttr = .getIndexVal(sstIndex, - config(.Object), - SUBSAMPLE_NAME_ATTR, - CFG_SUBSAMPLE_TABLE_INDEX_KEY) - - return(.Object) + .getIndexVal <- function(spec, config, default, key) { + if (!is.null(spec)) + return(spec) + if (length(config)) + return(ifelse(is.null(config[[key]]), default, config[[key]])) + return(default) + } + + .Object@sampleNameAttr = .getIndexVal(stIndex, + config(.Object), + SAMPLE_NAME_ATTR, + CFG_SAMPLE_TABLE_INDEX_KEY) + .Object@subSampleNameAttr = .getIndexVal(sstIndex, + config(.Object), + SUBSAMPLE_NAME_ATTR, + CFG_SUBSAMPLE_TABLE_INDEX_KEY) + + return(.Object) } diff --git a/R/utils.R b/R/utils.R index 1c7d0eb..4f3e929 100644 --- a/R/utils.R +++ b/R/utils.R @@ -433,3 +433,134 @@ fetchSamples = function(samples, return(FALSE) stop("File path does not point to an annotation or a config: ", filePath) } + + + +#' Fetch a PEP from PEPhub using a registry path (namespace/project:tag) +#' +#' Calls the PEPhub API to fetch PEPs. +#' +#' @param registryPath a string for the PEP registry path (namespace/project:tag) +#' @param raw a boolean for whether to return a raw PEP +#' +#' @return a list, with sublists for config, sample_list, and subsample_list for the fetched PEP +#' @keywords internal +fetchPEP <- function(registryPath, raw = TRUE) { + + BASE_URL <- 'https://pephub-api.databio.org/api/v1/' + reg_split <- strsplit(registryPath, '/|:')[[1]] + query_url <- paste0(BASE_URL, 'projects/', reg_split[[1]], '/', reg_split[[2]], '?tag=', reg_split[[3]], '&raw=', raw) + + jwt_path <- file.path(path.expand('~'), '.pephubclient', 'jwt.txt') + jwt_token <- '' + + if (file.exists(jwt_path)) { + if (difftime(Sys.time(), file.info(jwt_path)$mtime, units = 'days') <= 2) { + jwt_token <- readLines(jwt_path, warn = FALSE) + res <- httr::GET(query_url, httr::add_headers(authorization = jwt_token)) + } else { + warning('Authentication token is more than 2 days old. Generate a new one with PEPhub Client.') + res <- httr::GET(query_url) + } + } else { + warning('No authentication token found. Generate one with PEPhub Client to access private PEPs.') + res <- httr::GET(query_url) + } + + pep <- httr::content(res, as = 'parsed') + + return(pep) +} + + + + + +#' Save a modified PEP Project to a local directory +#' +#' This is a helper that saves a PEP Project to a local output directory +#' +#' @param project a PEP Project +#' @param projectDir a string for the output directory, defaults to current working directory +#' @param overwrite a boolean for whether to overwrite an existing project at the output directory +#' @param sampleTableIndex a string indicating the sample attribute that is used +#' +#' @return a boolean, TRUE if the save was successful and FALSE if otherwise +#' @export +saveProject = function(project = NULL, + outputDir = getwd(), + overwrite = FALSE) { + + saved = FALSE + + if (!file.exists(outputDir)) { + stop('Specified Project directory does not exist.') + } + + pattern = "^[a-zA-Z0-9_-]+/[a-zA-Z0-9_-]+:[a-zA-Z0-9_-]+$" + + if (grepl(pattern, project@file, perl = TRUE)) { + # if file is a registry path + project_name = gsub('/|:', '-', project@file) + project_path = file.path(outputDir, project_name) + } else { + project_path = file.path(outputDir, basename(dirname(project@file))) + } + + if ((!dir.exists(project_path) | overwrite)) { + dir.create(project_path, showWarnings = FALSE) + + samples_table <- as.data.frame(project@samples) + + subsample_cols_idx <- unname(which(sapply(samples_table, function(x) any(sapply(x, is.list))))) + subsample_cols_names <- names(which(sapply(samples_table, function(x) any(sapply(x, is.list))))) + sample_name_col_idx <- which(names(samples_table) == project@sampleNameAttr) + + samples_table_raw <- samples_table + subsamples_table_raw <- NULL + if (length(subsample_cols_idx) > 0) { + samples_table_raw <- samples_table[, -subsample_cols_idx] + subsamples_table <- samples_table[, c(sample_name_col_idx, subsample_cols_idx)] + subsamples_table_raw <- tidyr::unnest(subsamples_table, cols = subsample_cols_names) + } + + sample_table_name <- ifelse(CFG_SAMPLE_TABLE_KEY %in% names(project@config), + basename(project@config[[CFG_SAMPLE_TABLE_KEY]]), + paste0(CFG_SAMPLE_TABLE_KEY, '.csv')) + + subsample_table_name <- ifelse(CFG_SUBSAMPLE_TABLE_KEY %in% names(project@config), + basename(project@config[[CFG_SUBSAMPLE_TABLE_KEY]]), + paste0(CFG_SUBSAMPLE_TABLE_KEY, '.csv')) + + project@config[[CFG_SAMPLE_TABLE_KEY]] <- sample_table_name + project@config[[CFG_SUBSAMPLE_TABLE_KEY]] <- subsample_table_name + + yaml::write_yaml(project@config, file = file.path(project_path, 'project_config.yaml')) + if (!is.null(samples_table_raw)) { + data.table::fwrite(samples_table_raw, file = file.path(project_path, sample_table_name)) + } + if (!is.null(subsamples_table_raw)) { + data.table::fwrite(subsamples_table_raw, file = file.path(project_path, subsample_table_name)) + } + saved = TRUE + } else { + stop('Project directory already exists. Use overwrite = TRUE if you would like to overwrite the existing local PEP.') + } + + return(saved) +} + + +#' Dataframify list sublists +#' +#' This function turns each list sublist into a data frame +#' +#' @param list an object of class list +#' @return an object of class data.frame +#' @keywords internal +.listOfListToListOfDT <- function(list) { + data.table::setDT(as.data.frame(do.call(rbind, list))) +} + + + diff --git a/man/dot-listOfListToListOfDT.Rd b/man/dot-listOfListToListOfDT.Rd new file mode 100644 index 0000000..5e81e54 --- /dev/null +++ b/man/dot-listOfListToListOfDT.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{.listOfListToListOfDT} +\alias{.listOfListToListOfDT} +\title{Dataframify list sublists} +\usage{ +.listOfListToListOfDT(list) +} +\arguments{ +\item{list}{an object of class list} +} +\value{ +an object of class data.frame +} +\description{ +This function turns each list sublist into a data frame +} +\keyword{internal} diff --git a/man/dot-loadSubsampleAnnotation.Rd b/man/dot-loadSubsampleAnnotation.Rd index 47f80da..23bef76 100644 --- a/man/dot-loadSubsampleAnnotation.Rd +++ b/man/dot-loadSubsampleAnnotation.Rd @@ -4,12 +4,14 @@ \alias{.loadSubsampleAnnotation} \title{Load single subsample annotation} \usage{ -.loadSubsampleAnnotation(.Object, path) +.loadSubsampleAnnotation(.Object, path = NULL, subsamplesTable = NULL) } \arguments{ \item{.Object}{an object of \code{"\linkS4class{Project}"}} \item{path}{string, a path to the subsample table to read and incorporate} + +\item{subsamplesTable}{data.table, provided from constructor if PEP is fetched from API} } \value{ an object of \code{"\linkS4class{Project}"} diff --git a/man/dot-mergeAttrs.Rd b/man/dot-mergeAttrs.Rd index 3410abf..457b1dc 100644 --- a/man/dot-mergeAttrs.Rd +++ b/man/dot-mergeAttrs.Rd @@ -4,7 +4,7 @@ \alias{.mergeAttrs} \title{Merge samples defined in sample table with ones in subsample table(s)} \usage{ -.mergeAttrs(.Object, subsampleAannotationPaths) +.mergeAttrs(.Object, subsampleAannotationPaths, subsamplesTables = NULL) } \arguments{ \item{.Object}{an object of \code{"\linkS4class{Project}"}} diff --git a/man/dot-modifySamplesFetched.Rd b/man/dot-modifySamplesFetched.Rd new file mode 100644 index 0000000..231d785 --- /dev/null +++ b/man/dot-modifySamplesFetched.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/project.R +\name{.modifySamplesFetched} +\alias{.modifySamplesFetched} +\title{Perform all the sample attribute modifications for fetched PEPs} +\usage{ +.modifySamplesFetched(object, subsamples) +} +\arguments{ +\item{object}{an object of \code{"\linkS4class{Project}"}} +} +\value{ +modified Project object +} +\description{ +Perform all the sample attribute modifications for fetched PEPs +} +\keyword{internal} diff --git a/man/fetchPEP.Rd b/man/fetchPEP.Rd new file mode 100644 index 0000000..b409584 --- /dev/null +++ b/man/fetchPEP.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{fetchPEP} +\alias{fetchPEP} +\title{Fetch a PEP from PEPhub using a registry path (namespace/project:tag)} +\usage{ +fetchPEP(registryPath, raw = TRUE) +} +\arguments{ +\item{registryPath}{a string for the PEP registry path (namespace/project:tag)} + +\item{raw}{a boolean for whether to return a raw PEP} +} +\value{ +a list, with sublists for config, sample_list, and subsample_list for the fetched PEP +} +\description{ +Calls the PEPhub API to fetch PEPs. +} +\keyword{internal} diff --git a/man/pullProject.Rd b/man/pullProject.Rd new file mode 100644 index 0000000..4bcab77 --- /dev/null +++ b/man/pullProject.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/project.R +\name{pullProject} +\alias{pullProject} +\title{Pull a PEP from PEPhub API} +\usage{ +pullProject(registryPath = NULL) +} +\arguments{ +\item{registryPath}{a string specifying a registry path from PEPhub} +} +\value{ +an object of \code{"\linkS4class{Project}"} +} +\description{ +This is a helper that creates the project with empty samples and config slots +} +\examples{ +registryPath = 'databio/example:default' +p=pullProject(registryPath) +} diff --git a/man/saveProject.Rd b/man/saveProject.Rd new file mode 100644 index 0000000..193bb3a --- /dev/null +++ b/man/saveProject.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{saveProject} +\alias{saveProject} +\title{Save a modified PEP Project to a local directory} +\usage{ +saveProject(project = NULL, outputDir = getwd(), overwrite = FALSE) +} +\arguments{ +\item{project}{a PEP Project} + +\item{overwrite}{a boolean for whether to overwrite an existing project at the output directory} + +\item{projectDir}{a string for the output directory, defaults to current working directory} + +\item{sampleTableIndex}{a string indicating the sample attribute that is used} +} +\value{ +a boolean, TRUE if the save was successful and FALSE if otherwise +} +\description{ +This is a helper that saves a PEP Project to a local output directory +} From 880a590935448d8c139c5d2017806e1f6d01ea57 Mon Sep 17 00:00:00 2001 From: Sam Park Date: Fri, 14 Jun 2024 15:26:06 -0400 Subject: [PATCH 2/8] updated coding style and docs --- R/config.R | 6 +-- R/constants.R | 3 +- R/project.R | 12 +++--- R/utils.R | 83 +++++++++++++++++------------------- vignettes/gettingStarted.Rmd | 14 +++++- 5 files changed, 62 insertions(+), 56 deletions(-) diff --git a/R/config.R b/R/config.R index f899585..947ee7a 100644 --- a/R/config.R +++ b/R/config.R @@ -63,7 +63,7 @@ setMethod( #' .expandList(x) #' @export #' @keywords internal -.expandList <- function(x) { +.expandList = function(x) { if (is.list(x)) return(lapply(x, .expandList)) if (length(x) > 1) @@ -86,7 +86,7 @@ setMethod( #' .getSubscript(l, 1) == .getSubscript(l, "a") #' @export #' @keywords internal -.getSubscript <- function(lst, i) { +.getSubscript = function(lst, i) { if (is.character(i)) return(grep(paste0("^", i, "$"), names(lst))) return(i) @@ -140,7 +140,7 @@ setMethod("[[", "Config", function(x, i) { }) -.DollarNames.Config <- function(x, pattern = "") +.DollarNames.Config = function(x, pattern = "") grep(paste0("^", pattern), grep(names(x), value = TRUE)) #' @rdname select-config diff --git a/R/constants.R b/R/constants.R index 367578e..4fc750a 100644 --- a/R/constants.R +++ b/R/constants.R @@ -20,4 +20,5 @@ CFG_DUPLICATE_KEY = "duplicate" CFG_REMOVE_KEY = "remove" CFG_LOOPER_KEY = "looper" SAMPLE_NAME_ATTR = "sample_name" -SUBSAMPLE_NAME_ATTR = "subsample_name" \ No newline at end of file +SUBSAMPLE_NAME_ATTR = "subsample_name" +BASE_URL = "https://pephub-api.databio.org/api/v1/" \ No newline at end of file diff --git a/R/project.R b/R/project.R index 3187d1d..ef70cd0 100644 --- a/R/project.R +++ b/R/project.R @@ -36,7 +36,7 @@ setClass( setMethod("initialize", "Project", function(.Object, ...) { .Object = methods::callNextMethod(.Object) # calls generic initialize - ellipsis <- list(...) + ellipsis = list(...) stIndex = ellipsis$sampleTableIndex sstIndex = ellipsis$subSampleTableIndex if (ellipsis$api) { @@ -443,7 +443,7 @@ setMethod( #' #' @return an object of \code{"\linkS4class{Project}"} #' @keywords internal -.removeAttrs <- function(.Object) { +.removeAttrs = function(.Object) { if (!CFG_S_MODIFIERS_KEY %in% names(config(.Object))) return(.Object) modifiers = config(.Object)[[CFG_S_MODIFIERS_KEY]] @@ -468,7 +468,7 @@ setMethod( #' #' @return an object of \code{\link{Project-class}} #' @keywords internal -.appendAttrs <- function(.Object) { +.appendAttrs = function(.Object) { if (!CFG_S_MODIFIERS_KEY %in% names(config(.Object))) return(.Object) modifiers = config(.Object)[[CFG_S_MODIFIERS_KEY]] @@ -502,7 +502,7 @@ setMethod( #' #' @return an object of \code{"\linkS4class{Project}"} #' @keywords internal -.duplicateAttrs <- function(.Object) { +.duplicateAttrs = function(.Object) { if (!CFG_S_MODIFIERS_KEY %in% names(config(.Object))) return(.Object) modifiers = config(.Object)[[CFG_S_MODIFIERS_KEY]] @@ -762,8 +762,8 @@ setMethod( #' @param stIndex character string indicating a constructor-specified sample table index #' @param sstIndex character string indicating a constructor-specified subsample table index #' @keywords internal -.getTableIndexes <- function(.Object, stIndex, sstIndex) { - .getIndexVal <- function(spec, config, default, key) { +.getTableIndexes = function(.Object, stIndex, sstIndex) { + .getIndexVal = function(spec, config, default, key) { if (!is.null(spec)) return(spec) if (length(config)) diff --git a/R/utils.R b/R/utils.R index 4f3e929..3ff9128 100644 --- a/R/utils.R +++ b/R/utils.R @@ -358,6 +358,7 @@ fetchSamples = function(samples, } } + #' Check for a section existence in a nested list #' #' @param object list to inspect @@ -435,7 +436,6 @@ fetchSamples = function(samples, } - #' Fetch a PEP from PEPhub using a registry path (namespace/project:tag) #' #' Calls the PEPhub API to fetch PEPs. @@ -445,37 +445,32 @@ fetchSamples = function(samples, #' #' @return a list, with sublists for config, sample_list, and subsample_list for the fetched PEP #' @keywords internal -fetchPEP <- function(registryPath, raw = TRUE) { +fetchPEP = function(registryPath, raw = TRUE) { + pathSplit = strsplit(registryPath, '/|:')[[1]] + queryURL = paste0(BASE_URL, 'projects/', pathSplit[[1]], '/', pathSplit[[2]], '?tag=', pathSplit[[3]], '&raw=', raw) - BASE_URL <- 'https://pephub-api.databio.org/api/v1/' - reg_split <- strsplit(registryPath, '/|:')[[1]] - query_url <- paste0(BASE_URL, 'projects/', reg_split[[1]], '/', reg_split[[2]], '?tag=', reg_split[[3]], '&raw=', raw) + jwtPath = file.path(path.expand('~'), '.pephubclient', 'jwt.txt') + jwtToken = '' - jwt_path <- file.path(path.expand('~'), '.pephubclient', 'jwt.txt') - jwt_token <- '' - - if (file.exists(jwt_path)) { - if (difftime(Sys.time(), file.info(jwt_path)$mtime, units = 'days') <= 2) { - jwt_token <- readLines(jwt_path, warn = FALSE) - res <- httr::GET(query_url, httr::add_headers(authorization = jwt_token)) + if (file.exists(jwtPath)) { + if (difftime(Sys.time(), file.info(jwtPath)$mtime, units = 'days') <= 2) { + jwtToken = readLines(jwtPath, warn = FALSE) + res = httr::GET(queryURL, httr::add_headers(authorization = jwtToken)) } else { warning('Authentication token is more than 2 days old. Generate a new one with PEPhub Client.') - res <- httr::GET(query_url) + res = httr::GET(queryURL) } } else { warning('No authentication token found. Generate one with PEPhub Client to access private PEPs.') - res <- httr::GET(query_url) + res = httr::GET(queryURL) } - pep <- httr::content(res, as = 'parsed') + pep = httr::content(res, as = 'parsed') return(pep) } - - - #' Save a modified PEP Project to a local directory #' #' This is a helper that saves a PEP Project to a local output directory @@ -490,7 +485,6 @@ fetchPEP <- function(registryPath, raw = TRUE) { saveProject = function(project = NULL, outputDir = getwd(), overwrite = FALSE) { - saved = FALSE if (!file.exists(outputDir)) { @@ -501,46 +495,46 @@ saveProject = function(project = NULL, if (grepl(pattern, project@file, perl = TRUE)) { # if file is a registry path - project_name = gsub('/|:', '-', project@file) - project_path = file.path(outputDir, project_name) + projectName = gsub('/|:', '-', project@file) + projectPath = file.path(outputDir, projectName) } else { - project_path = file.path(outputDir, basename(dirname(project@file))) + projectPath = file.path(outputDir, basename(dirname(project@file))) } - if ((!dir.exists(project_path) | overwrite)) { - dir.create(project_path, showWarnings = FALSE) + if ((!dir.exists(projectPath) | overwrite)) { + dir.create(projectPath, showWarnings = FALSE) - samples_table <- as.data.frame(project@samples) + samplesTable = as.data.frame(project@samples) - subsample_cols_idx <- unname(which(sapply(samples_table, function(x) any(sapply(x, is.list))))) - subsample_cols_names <- names(which(sapply(samples_table, function(x) any(sapply(x, is.list))))) - sample_name_col_idx <- which(names(samples_table) == project@sampleNameAttr) + subsampleColsIdx = unname(which(sapply(samplesTable, function(x) any(sapply(x, is.list))))) + subsampleColsNames = names(which(sapply(samplesTable, function(x) any(sapply(x, is.list))))) + sampleNameColIdx = which(names(samplesTable) == project@sampleNameAttr) - samples_table_raw <- samples_table - subsamples_table_raw <- NULL - if (length(subsample_cols_idx) > 0) { - samples_table_raw <- samples_table[, -subsample_cols_idx] - subsamples_table <- samples_table[, c(sample_name_col_idx, subsample_cols_idx)] - subsamples_table_raw <- tidyr::unnest(subsamples_table, cols = subsample_cols_names) + samplesTableRaw = samplesTable + subsamplesTableRaw = NULL + if (length(subsampleColsIdx) > 0) { + samplesTableRaw = samplesTable[, -subsampleColsIdx] + subsamplesTable = samplesTable[, c(sampleNameColIdx, subsampleColsIdx)] + subsamplesTableRaw = tidyr::unnest(subsamplesTable, cols = subsampleColsNames) } - sample_table_name <- ifelse(CFG_SAMPLE_TABLE_KEY %in% names(project@config), + sampleTableName = ifelse(CFG_SAMPLE_TABLE_KEY %in% names(project@config), basename(project@config[[CFG_SAMPLE_TABLE_KEY]]), paste0(CFG_SAMPLE_TABLE_KEY, '.csv')) - subsample_table_name <- ifelse(CFG_SUBSAMPLE_TABLE_KEY %in% names(project@config), + subsampleTableName = ifelse(CFG_SUBSAMPLE_TABLE_KEY %in% names(project@config), basename(project@config[[CFG_SUBSAMPLE_TABLE_KEY]]), paste0(CFG_SUBSAMPLE_TABLE_KEY, '.csv')) - project@config[[CFG_SAMPLE_TABLE_KEY]] <- sample_table_name - project@config[[CFG_SUBSAMPLE_TABLE_KEY]] <- subsample_table_name + project@config[[CFG_SAMPLE_TABLE_KEY]] = sampleTableName + project@config[[CFG_SUBSAMPLE_TABLE_KEY]] = subsampleTableName - yaml::write_yaml(project@config, file = file.path(project_path, 'project_config.yaml')) - if (!is.null(samples_table_raw)) { - data.table::fwrite(samples_table_raw, file = file.path(project_path, sample_table_name)) + yaml::write_yaml(project@config, file = file.path(projectPath, 'project_config.yaml')) + if (!is.null(samplesTableRaw)) { + data.table::fwrite(samplesTableRaw, file = file.path(projectPath, sampleTableName)) } - if (!is.null(subsamples_table_raw)) { - data.table::fwrite(subsamples_table_raw, file = file.path(project_path, subsample_table_name)) + if (!is.null(subsamplesTableRaw)) { + data.table::fwrite(subsamplesTableRaw, file = file.path(projectPath, subsampleTableName)) } saved = TRUE } else { @@ -558,9 +552,8 @@ saveProject = function(project = NULL, #' @param list an object of class list #' @return an object of class data.frame #' @keywords internal -.listOfListToListOfDT <- function(list) { +.listOfListToListOfDT = function(list) { data.table::setDT(as.data.frame(do.call(rbind, list))) } - diff --git a/vignettes/gettingStarted.Rmd b/vignettes/gettingStarted.Rmd index 0f75385..8f3393c 100644 --- a/vignettes/gettingStarted.Rmd +++ b/vignettes/gettingStarted.Rmd @@ -43,7 +43,13 @@ Loading your project metadata into R is a single line of code: p = pepr::Project(file=projectConfigFile) ``` -That's it! You've now have a `Project` object, *p*, to interact with in `R`. +That's it! You now have a `Project` object, *p*, to interact with in `R`. You can also use PEPhub registry paths to fetch them online: + +```{r} +p2 = pepr::pullProject(registryPath='databio/example:default') +``` + +To fetch private PEPs, use PEPhub Client to login and generate an authentication token. You should then be able to fetch your private PEPs by registry path. ## Interfacing with your `pepr::Project` object in R @@ -65,4 +71,10 @@ And you can also access the project configuration metadata with the `config()` f config(p) ``` +If you would like to save your project to a new directory, use the `saveProject()` function: + +```{r} +saveProject(p2, outputDir=tempdir()) +``` + Follow the other vignettes for more advanced capabilities of `pepr`. From a2ed4f702b110790a72aa836398d32807f67453f Mon Sep 17 00:00:00 2001 From: Sam Park Date: Tue, 25 Jun 2024 11:05:02 -0400 Subject: [PATCH 3/8] add warning for pullProject invalid registry path and add a few tests --- NAMESPACE | 2 +- R/pepr.R | 2 +- R/utils.R | 3 +++ tests/testthat/test_project.R | 11 +++++++++++ 4 files changed, 16 insertions(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index cfc45b6..f2f0ebd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -29,8 +29,8 @@ exportMethods("[[") import(RCurl) import(pryr) import(stringr) -import(tidyr) import(yaml) importFrom(methods,as) importFrom(methods,callNextMethod) importFrom(methods,is) +importFrom(tidyr,unnest) diff --git a/R/pepr.R b/R/pepr.R index 8b1ca95..708b37f 100644 --- a/R/pepr.R +++ b/R/pepr.R @@ -8,7 +8,7 @@ #' @import yaml #' @import stringr #' @import pryr -#' @import tidyr +#' @importFrom tidyr unnest #' #' @references #' GitHub: \url{https://github.com/pepkit/pepr}, Documentation: \url{https://code.databio.org/pepr/} diff --git a/R/utils.R b/R/utils.R index 3ff9128..8740adf 100644 --- a/R/utils.R +++ b/R/utils.R @@ -447,6 +447,9 @@ fetchSamples = function(samples, #' @keywords internal fetchPEP = function(registryPath, raw = TRUE) { pathSplit = strsplit(registryPath, '/|:')[[1]] + if (length(pathSplit) < 3) { + stop('Invalid registry path.') + } queryURL = paste0(BASE_URL, 'projects/', pathSplit[[1]], '/', pathSplit[[2]], '?tag=', pathSplit[[3]], '&raw=', raw) jwtPath = file.path(path.expand('~'), '.pephubclient', 'jwt.txt') diff --git a/tests/testthat/test_project.R b/tests/testthat/test_project.R index 7834bbb..fe7bc7c 100644 --- a/tests/testthat/test_project.R +++ b/tests/testthat/test_project.R @@ -157,6 +157,17 @@ test_that("Project constructor throws errors if nonexistant init files are provi expect_error(Project(file = "test.yaml")) }) +context("Pull test PEP from PEPhub") + +test_that("Pulling public PEP from PEPhub creates Project object", { + expect_is(pullProject(registryPath = "databio/example:default"), "Project") +}) + +context("Attempt to pull invalid registry path") + +test_that("pullProject throws error if invalid registry path is provided", { + expect_error(pullProject(registryPath = "invalid path")) +}) context("Sample automerging") From cba24eaeb68108c26331054204c43c0cdbe0656c Mon Sep 17 00:00:00 2001 From: Sam Park Date: Tue, 25 Jun 2024 13:24:02 -0400 Subject: [PATCH 4/8] add tidyr to description --- DESCRIPTION | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5e18ab5..25f10d7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -18,7 +18,8 @@ Imports: pryr, data.table, methods, - RCurl + RCurl, + tidyr Suggests: knitr, testthat, From 66ac9b494ec833ff18a2cf1dd07cabb8d86d9fd4 Mon Sep 17 00:00:00 2001 From: Sam Park Date: Tue, 25 Jun 2024 13:26:26 -0400 Subject: [PATCH 5/8] add httr to description --- DESCRIPTION | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 25f10d7..8135673 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -19,7 +19,8 @@ Imports: data.table, methods, RCurl, - tidyr + tidyr, + httr Suggests: knitr, testthat, From b8a9e261b4af977c53da00814000351e2414f829 Mon Sep 17 00:00:00 2001 From: Sam Park Date: Tue, 25 Jun 2024 13:36:44 -0400 Subject: [PATCH 6/8] address r cmd check errors --- NAMESPACE | 1 + R/pepr.R | 1 + R/utils.R | 3 +-- man/saveProject.Rd | 6 ++---- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index f2f0ebd..2c9fb90 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -27,6 +27,7 @@ exportMethods("$") exportMethods("[") exportMethods("[[") import(RCurl) +import(httr) import(pryr) import(stringr) import(yaml) diff --git a/R/pepr.R b/R/pepr.R index 708b37f..bd09605 100644 --- a/R/pepr.R +++ b/R/pepr.R @@ -8,6 +8,7 @@ #' @import yaml #' @import stringr #' @import pryr +#' @import httr #' @importFrom tidyr unnest #' #' @references diff --git a/R/utils.R b/R/utils.R index 8740adf..469e3ad 100644 --- a/R/utils.R +++ b/R/utils.R @@ -479,9 +479,8 @@ fetchPEP = function(registryPath, raw = TRUE) { #' This is a helper that saves a PEP Project to a local output directory #' #' @param project a PEP Project -#' @param projectDir a string for the output directory, defaults to current working directory +#' @param outputDir a string for the output directory, defaults to current working directory #' @param overwrite a boolean for whether to overwrite an existing project at the output directory -#' @param sampleTableIndex a string indicating the sample attribute that is used #' #' @return a boolean, TRUE if the save was successful and FALSE if otherwise #' @export diff --git a/man/saveProject.Rd b/man/saveProject.Rd index 193bb3a..fa04237 100644 --- a/man/saveProject.Rd +++ b/man/saveProject.Rd @@ -9,11 +9,9 @@ saveProject(project = NULL, outputDir = getwd(), overwrite = FALSE) \arguments{ \item{project}{a PEP Project} -\item{overwrite}{a boolean for whether to overwrite an existing project at the output directory} - -\item{projectDir}{a string for the output directory, defaults to current working directory} +\item{outputDir}{a string for the output directory, defaults to current working directory} -\item{sampleTableIndex}{a string indicating the sample attribute that is used} +\item{overwrite}{a boolean for whether to overwrite an existing project at the output directory} } \value{ a boolean, TRUE if the save was successful and FALSE if otherwise From 108b2bc257f0b75aa1bfe9ce241d36d2f488e35e Mon Sep 17 00:00:00 2001 From: Sam Park Date: Mon, 5 Aug 2024 11:06:07 -0400 Subject: [PATCH 7/8] added saveJWT function to let you write jwt tokens copied from PEPhub --- DESCRIPTION | 4 ++-- NAMESPACE | 1 + R/utils.R | 28 ++++++++++++++++++++++++++++ man/pepr.Rd | 8 ++++++++ man/saveJWT.Rd | 17 +++++++++++++++++ vignettes/gettingStarted.Rmd | 8 +++++++- 6 files changed, 63 insertions(+), 3 deletions(-) create mode 100644 man/saveJWT.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 8135673..c73179a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: pepr Type: Package Title: Reading Portable Encapsulated Projects -Version: 0.5.1 +Version: 0.6.0 Date: 2024-06-13 Authors@R: c(person("Nathan", "Sheffield", email = "nathan@code.databio.org", role = c("aut", "cph","cre")),person("Michal","Stolarczyk",email="michal@virginia.edu",role=c("aut"))) @@ -29,5 +29,5 @@ Suggests: VignetteBuilder: knitr License: BSD_2_clause + file LICENSE BugReports: https://github.com/pepkit/pepr -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.2 Encoding: UTF-8 diff --git a/NAMESPACE b/NAMESPACE index 2c9fb90..729942c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -20,6 +20,7 @@ export(listAmendments) export(makeSectionsAbsolute) export(pullProject) export(sampleTable) +export(saveJWT) export(saveProject) exportClasses(Config) exportClasses(Project) diff --git a/R/utils.R b/R/utils.R index 469e3ad..fa725e3 100644 --- a/R/utils.R +++ b/R/utils.R @@ -559,3 +559,31 @@ saveProject = function(project = NULL, } + +#' Save or update JWT +#' +#' Save or update local authentication token to fetch private PEPs +#' +#' @param jwt a string for the new jwt to save +#' +#' @return a boolean for whether the authentication token was saved +#' @export +saveJWT = function(jwt) { + if (typeof(jwt) == 'character') { + jwtPath = file.path(path.expand('~'), '.pephubclient', 'jwt.txt') + + if (file.exists(jwtPath)) { + warning('Overwriting existing authentication token...') + } + + cat(jwt, file = jwtPath) + print(paste0('JWT saved to ', jwtPath)) + + return(TRUE) + } else { + warning('Invalid authentication token provided.') + return(FALSE) + } +} + + diff --git a/man/pepr.Rd b/man/pepr.Rd index 0f7739b..223e7ed 100644 --- a/man/pepr.Rd +++ b/man/pepr.Rd @@ -2,6 +2,7 @@ % Please edit documentation in R/pepr.R \docType{package} \name{pepr} +\alias{pepr-package} \alias{pepr} \title{pepr} \description{ @@ -9,6 +10,13 @@ Package documentation } \references{ GitHub: \url{https://github.com/pepkit/pepr}, Documentation: \url{https://code.databio.org/pepr/} +} +\seealso{ +Useful links: +\itemize{ + \item Report bugs at \url{https://github.com/pepkit/pepr} +} + } \author{ Michal Stolarczyk, Nathan Sheffield diff --git a/man/saveJWT.Rd b/man/saveJWT.Rd new file mode 100644 index 0000000..b92e3dd --- /dev/null +++ b/man/saveJWT.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{saveJWT} +\alias{saveJWT} +\title{Save or update JWT} +\usage{ +saveJWT(jwt) +} +\arguments{ +\item{jwt}{a string for the new jwt to save} +} +\value{ +a boolean for whether the authentication token was saved +} +\description{ +Save or update local authentication token to fetch private PEPs +} diff --git a/vignettes/gettingStarted.Rmd b/vignettes/gettingStarted.Rmd index 8f3393c..63de8aa 100644 --- a/vignettes/gettingStarted.Rmd +++ b/vignettes/gettingStarted.Rmd @@ -49,7 +49,13 @@ That's it! You now have a `Project` object, *p*, to interact with in `R`. You ca p2 = pepr::pullProject(registryPath='databio/example:default') ``` -To fetch private PEPs, use PEPhub Client to login and generate an authentication token. You should then be able to fetch your private PEPs by registry path. +To fetch private PEPs, use PEPhub Client to login and generate an authentication token. Or, login to PEPhub and generate a new key to save it locally: + +```{r} +pepr::saveJWT('*your authetication token goes here*') +``` + +You should then be able to fetch your private PEPs by registry path. ## Interfacing with your `pepr::Project` object in R From c9640be5f205f431dad0d35ec8a1757a904e2694 Mon Sep 17 00:00:00 2001 From: Sam Park Date: Mon, 5 Aug 2024 11:19:28 -0400 Subject: [PATCH 8/8] update getting started vignette --- DESCRIPTION | 2 +- vignettes/gettingStarted.Rmd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index c73179a..6542ceb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,7 +2,7 @@ Package: pepr Type: Package Title: Reading Portable Encapsulated Projects Version: 0.6.0 -Date: 2024-06-13 +Date: 2024-08-10 Authors@R: c(person("Nathan", "Sheffield", email = "nathan@code.databio.org", role = c("aut", "cph","cre")),person("Michal","Stolarczyk",email="michal@virginia.edu",role=c("aut"))) Maintainer: Nathan Sheffield diff --git a/vignettes/gettingStarted.Rmd b/vignettes/gettingStarted.Rmd index 63de8aa..f5fd9ee 100644 --- a/vignettes/gettingStarted.Rmd +++ b/vignettes/gettingStarted.Rmd @@ -51,7 +51,7 @@ p2 = pepr::pullProject(registryPath='databio/example:default') To fetch private PEPs, use PEPhub Client to login and generate an authentication token. Or, login to PEPhub and generate a new key to save it locally: -```{r} +```{r, eval=FALSE} pepr::saveJWT('*your authetication token goes here*') ```