From 7c03904223efe73e54cf0bd084d2991957d61aeb Mon Sep 17 00:00:00 2001
From: Charles Plessy <charles.plessy@oist.jp>
Date: Mon, 26 Jun 2023 10:45:39 +0900
Subject: [PATCH 01/17] Re-activate importPublicData

---
 R/ImportMethods.R | 678 +++++++++++++++++++++++-----------------------
 1 file changed, 338 insertions(+), 340 deletions(-)

diff --git a/R/ImportMethods.R b/R/ImportMethods.R
index 0ace8a9..2dbcf72 100644
--- a/R/ImportMethods.R
+++ b/R/ImportMethods.R
@@ -650,343 +650,341 @@ setMethod( "getCTSS", "CAGEexp"
 "FANTOM5mouseSamples"
 
 
-#' #' importPublicData
-#' #' @noRd
-#' #' @importFrom utils data
-#' #' @export
-#' 
-#' setGeneric(
-#' name="importPublicData",
-#' def=function(source, dataset, group, sample){
-#' 	standardGeneric("importPublicData")
-#' }
-#' )
-#' 
-#' setMethod("importPublicData",
-#' signature(source = "character", dataset = "character", sample = "character"),
-#' function (source, dataset, group, sample){
-#' 	
-#' 	if(source == "ENCODE"){
-#' 		
-#' 		if("ENCODEprojectCAGE" %in% rownames(installed.packages()) == FALSE){
-#' 			stop("Requested CAGE data package is not installed! Please install and load the ENCODEprojectCAGE package, which is available for download from http://promshift.genereg.net/CAGEr/PackageSource/.")
-#' 		}else if(!("package:ENCODEprojectCAGE" %in% search())){
-#' 			stop("Requested CAGE data package is not loaded! Please load the data package by calling 'library(ENCODEprojectCAGE)'")
-#' 		}
-#' 			
-#' 		if(dataset[1] == "ENCODEtissueCAGEfly"){
-#' 			genome.name <- "BSgenome.Dmelanogaster.UCSC.dm3"
-#' 			ENCODEtissueCAGEfly <- NULL
-#' 			data("ENCODEtissueCAGEfly", envir = environment())
-#' 			if(group == "embryo"){
-#' 				if(sample == "mixed_embryos_0-24hr"){
-#' 					
-#' 					ctssTable <- ENCODEtissueCAGEfly[["embryo"]]
-#' 					
-#' 				}else{
-#' 					stop("Specified sample not valid! The dataset 'ENCODEtissueCAGEfly' containes only one sample named 'mixed_embryos_0-24hr'!")
-#' 				}
-#' 			}else{
-#' 				stop("Specified group not valid! The dataset 'ENCODEtissueCAGEfly' containes only one group named 'embryo'!")
-#' 			}
-#' 		
-#' 		}else{
-#' 			genome.name <- "BSgenome.Hsapiens.UCSC.hg19"
-#' 			ENCODEhumanCellLinesSamples <- NULL
-#' 			data("ENCODEhumanCellLinesSamples", envir = environment())
-#' 			info.df <- ENCODEhumanCellLinesSamples
-#' 			
-#' 			if(!(all(dataset %in% info.df$dataset))){
-#' 				stop("Specified dataset(s) not found! Call data(ENCODEhumanCellLinesSamples) and check 'dataset' column for available ENCODE datasets!")
-#' 			}
-#' 			if(length(dataset) == 1){
-#' 				
-#' 				if(!(all(group %in% info.df[info.df$dataset == dataset,"group"]))){
-#' 					stop("Some of the provided groups cannot be found in the specified dataset!")
-#' 				}
-#' 				if(length(group) == 1){
-#' 					if(!(all(sample %in% info.df[info.df$group == group,"sample"]))){
-#' 						stop("Some of the provided samples cannot be found in the specified group!")
-#' 					}
-#' 				}else if(length(group) == length(sample)){
-#' 					if(!all(sapply(c(1:length(group)), function(x) {sample[x] %in% info.df[info.df$group == group[x],"sample"]}))){
-#' 						stop("Provided 'group' and 'sample' do not match! Some of the provided samples cannot be found in the corresponding groups!")
-#' 					}
-#' 				}else{
-#' 					stop("Number of elements in the 'group' must be either 1 or must match the number of elements in the 'sample'!")
-#' 				}
-#' 								
-#' 								
-#' 			}else if(length(dataset) == length(group)){
-#' 				if(!all(sapply(c(1:length(dataset)), function(x) {group[x] %in% info.df[info.df$dataset == dataset[x],"group"]}))){
-#' 					stop("Provided 'dataset' and 'group' do not match! Some of the provided groups cannot be found in the corresponding datasets!")
-#' 				}
-#' 				if(length(group) == length(sample)){
-#' 					if(!all(sapply(c(1:length(group)), function(x) {sample[x] %in% info.df[info.df$group == group[x],"sample"]}))){
-#' 						stop("Provided 'group' and 'sample' do not match! Some of the provided samples cannot be found in the corresponding groups!")
-#' 					}
-#' 				}else{
-#' 					stop("Number of elements in the 'group' must match the number of elements in the 'sample'!")
-#' 				}
-#' 			
-#' 				
-#' 			}else{
-#' 				stop("Number of elements in the 'dataset' must be either 1 or must match the number of elements in the 'group' and in the 'sample'!")
-#' 			}
-#' 			
-#' 			data(list = dataset, envir = environment())
-#' 			
-#' 			if(length(unique(dataset))>1){
-#' 				
-#' 				for(i in 1:length(unique(dataset))){
-#' 					dset <- get(unique(dataset)[i])
-#' 					for(j in 1:length(unique(group[which(dataset == unique(dataset)[i])]))){
-#' 						g <- unique(group[which(dataset == unique(dataset)[i])])[j]
-#' 						ctss <- dset[[g]][, c("chr", "pos", "strand", sample[which((group == g) & (dataset == unique(dataset)[i]))])]
-#' 						discard <- apply(ctss[, c(4:ncol(ctss)), drop = F], 1, function(x) {sum(x > 0)}) >= 1
-#' 						ctss <- data.table(ctss[discard,])
-#' 						setkeyv(ctss, cols = c("chr", "pos", "strand"))
-#' 						if(i == 1 & j == 1){
-#' 							ctssTable <- ctss							
-#' 						}else{
-#' 							ctssTable <- merge(ctssTable, ctss, all.x = T, all.y = T)
-#' 						}
-#' 					}
-#' 				}
-#' 				ctssTable[is.na(ctssTable)] <- 0
-#' 				
-#' 			}else{
-#' 				
-#' 				selected.dataset <- get(dataset)
-#' 				if(length(unique(group))>1){
-#' 					
-#' 					for(i in 1:length(unique(group))){
-#' 						g <- unique(group)[i]
-#' 						ctss <- selected.dataset[[g]][, c("chr", "pos", "strand", sample[which(group == g)])]
-#' 						discard <- apply(ctss[, c(4:ncol(ctss)), drop = F], 1, function(x) {sum(x > 0)}) >= 1
-#' 						ctss <- data.table(ctss[discard,])
-#' 						setkeyv(ctss, cols = c("chr", "pos", "strand"))
-#' 						if(i == 1){
-#' 							ctssTable <- ctss							
-#' 						}else{
-#' 							ctssTable <- merge(ctssTable, ctss, all.x = T, all.y = T)
-#' 						}						
-#' 					}
-#' 					
-#' 					ctssTable[is.na(ctssTable)] <- 0					
-#' 					
-#' 				}else{
-#' 					ctssTable <- selected.dataset[[group]][,c("chr", "pos", "strand", sample)]
-#' 				}
-#' 			}
-#' 			
-#' 			ctssTable <- data.frame(ctssTable, stringsAsFactors = F, check.names = F)
-#' 		}
-#' 		
-#' 		
-#' 	}else if(source == "FANTOM3and4"){
-#' 		
-#' 		if("FANTOM3and4CAGE" %in% rownames(installed.packages()) == FALSE){
-#' 			stop("Requested CAGE data package is not installed! Please install and load the FANTOM3and4CAGE package available from Bioconductor.")
-#' 		}else if(!("package:FANTOM3and4CAGE" %in% search())){
-#' 			stop("Requested CAGE data package is not loaded! Please load the data package by calling 'library(FANTOM3and4CAGE)'")
-#' 		}
-#' 		FANTOMhumanSamples <- FANTOMmouseSamples <- NULL
-#' 		data("FANTOMhumanSamples", envir = environment())
-#' 		info.df1 <- FANTOMhumanSamples
-#' 		data("FANTOMmouseSamples", envir = environment())
-#' 		info.df2 <- FANTOMmouseSamples
-#' 				
-#' 		if(!(all(dataset %in% info.df1$dataset) | all(dataset %in% info.df2$dataset))){
-#' 			stop("Specified dataset(s) not found! Call data(FANTOMhumanSamples) and data(FANTOMmouseSamples) and check 'dataset' column for available ENCODE datasets!")
-#' 		}
-#' 		if(length(grep("human", dataset))>0){
-#' 			genome.name <- "BSgenome.Hsapiens.UCSC.hg18"
-#' 			info.df <- info.df1
-#' 		}else if(length(grep("mouse", dataset))>0){
-#' 			genome.name <- "BSgenome.Mmusculus.UCSC.mm9"
-#' 			info.df <- info.df2
-#' 		}
-#' 		
-#' 		if(length(dataset) == 1){
-#' 			
-#' 			if(!(all(group %in% info.df[info.df$dataset == dataset,"group"]))){
-#' 				stop("Some of the provided groups cannot be found in the specified dataset!")
-#' 			}
-#' 			if(length(group) == 1){
-#' 				if(!(all(sample %in% info.df[info.df$group == group,"sample"]))){
-#' 					stop("Some of the provided samples cannot be found in the specified group!")
-#' 				}
-#' 			}else if(length(group) == length(sample)){
-#' 				if(!all(sapply(c(1:length(group)), function(x) {sample[x] %in% info.df[info.df$group == group[x],"sample"]}))){
-#' 					stop("Provided 'group' and 'sample' do not match! Some of the provided samples cannot be found in the corresponding groups!")
-#' 				}
-#' 			}else{
-#' 				stop("Number of elements in the 'group' must be either 1 or must match the number of elements in the 'sample'!")
-#' 			}
-#' 			
-#' 			
-#' 		}else if(length(dataset) == length(group)){
-#' 			if(!all(sapply(c(1:length(dataset)), function(x) {group[x] %in% info.df[info.df$dataset == dataset[x],"group"]}))){
-#' 				stop("Provided 'dataset' and 'group' do not match! Some of the provided groups cannot be found in the corresponding datasets!")
-#' 			}
-#' 			if(length(group) == length(sample)){
-#' 				if(!all(sapply(c(1:length(group)), function(x) {sample[x] %in% info.df[info.df$group == group[x],"sample"]}))){
-#' 					stop("Provided 'group' and 'sample' do not match! Some of the provided samples cannot be found in the corresponding groups!")
-#' 				}
-#' 			}else{
-#' 				stop("Number of elements in the 'group' must match the number of elements in the 'sample'!")
-#' 			}
-#' 			
-#' 			
-#' 		}else{
-#' 			stop("Number of elements in the 'dataset' must be either 1 or must match the number of elements in the 'group' and in the 'sample'!")
-#' 		}
-#' 		
-#' 		data(list = dataset, envir = environment())
-#' 		
-#' 		if(length(unique(dataset))>1){
-#' 			
-#' 			for(i in 1:length(unique(dataset))){
-#' 				dset <- get(unique(dataset)[i])
-#' 				for(j in 1:length(unique(group[which(dataset == unique(dataset)[i])]))){
-#' 					g <- unique(group[which(dataset == unique(dataset)[i])])[j]
-#' 					ctss <- dset[[g]][, c("chr", "pos", "strand", sample[which(group == g)])]
-#' 					discard <- apply(ctss[, c(4:ncol(ctss)), drop = F], 1, function(x) {sum(x > 0)}) >= 1
-#' 					ctss <- data.table(ctss[discard,])
-#' 					setnames(ctss, c("chr", "pos", "strand", paste(g, sample[which(group == g)], sep = "__")))
-#' 					setkeyv(ctss, cols = c("chr", "pos", "strand"))
-#' 					if(i == 1 & j == 1){
-#' 						ctssTable <- ctss							
-#' 					}else{
-#' 						ctssTable <- merge(ctssTable, ctss, all.x = T, all.y = T)
-#' 					}
-#' 				}
-#' 			}
-#' 			
-#' 			ctssTable[is.na(ctssTable)] <- 0
-#' 			
-#' 		}else{
-#' 			
-#' 			selected.dataset <- get(dataset)
-#' 			if(length(unique(group))>1){
-#' 				
-#' 				for(i in 1:length(unique(group))){
-#' 					g <- unique(group)[i]
-#' 					ctss <- selected.dataset[[g]][, c("chr", "pos", "strand", sample[which(group == g)])]
-#' 					discard <- apply(ctss[, c(4:ncol(ctss)), drop = F], 1, function(x) {sum(x > 0)}) >= 1
-#' 					ctss <- data.table(ctss[discard,])
-#' 					setnames(ctss, c("chr", "pos", "strand", paste(g, sample[which(group == g)], sep = "__")))
-#' 					setkeyv(ctss, cols = c("chr", "pos", "strand"))
-#' 					if(i == 1){
-#' 						ctssTable <- ctss							
-#' 					}else{
-#' 						ctssTable <- merge(ctssTable, ctss, all.x = T, all.y = T)
-#' 					}						
-#' 				}
-#' 				
-#' 				ctssTable[is.na(ctssTable)] <- 0					
-#' 				
-#' 			}else{
-#' 				ctssTable <- selected.dataset[[group]][,c("chr", "pos", "strand", sample)]
-#' 				setnames(ctssTable, c("chr", "pos", "strand", paste(group, sample, sep = "__")))
-#' 			}
-#' 		}
-#' 		
-#' 		ctssTable <- data.frame(ctssTable, stringsAsFactors = F, check.names = F)
-#' 	
-#' 			
-#' 		
-#' 	}else if (source == "FANTOM5"){
-#' 		
-#' 		if(length(dataset) != 1){
-#' 			stop("For FANTOM5 only one dataset can be specified and it can be either 'human' or 'mouse'!")
-#' 		}else if(!(dataset %in% c("human", "mouse"))){
-#' 			stop("For FANTOM5, dataset can be either 'human' or 'mouse'!")
-#' 		}
-#' 		if(dataset == "human"){
-#' 		  FANTOM5humanSamples <- NULL
-#' 			data("FANTOM5humanSamples", envir = environment())
-#' 			samples.info <- FANTOM5humanSamples
-#' 			genome.name <- "BSgenome.Hsapiens.UCSC.hg19"
-#' 		}else if(dataset == "mouse"){
-#' 		  FANTOM5mouseSamples <- NULL
-#' 			data("FANTOM5mouseSamples", envir = environment())
-#' 			samples.info <- FANTOM5mouseSamples
-#' 			genome.name <- "BSgenome.Mmusculus.UCSC.mm9"
-#' 		}		
-#' 
-#' 		if(!(all(sample %in% samples.info$sample))){
-#' 			stop(paste("Some sample names cannot be found for the specified dataset! Call data(FANTOM5", dataset, "Samples) and check the 'sample' column for valid sample names!", sep = ""))
-#' 		}
-#' 		
-#' 		
-#' 		
-#' 		for(i in c(1:length(sample))){
-#' 			
-#' 			message("Fetching sample: ", sample[i], "...")
-#' 			sample.url <- samples.info[samples.info$sample == sample[i], "data_url"]
-#' 			con <- gzcon(url(paste(sample.url)))
-#' 			ctss <- scan(con, what = list(character(), NULL, integer(), NULL, integer(), character()))
-#' 			ctss.df <- data.table(chr = ctss[[1]], pos = ctss[[3]], strand = ctss[[6]], tagCount = ctss[[5]])
-#' 			setnames(ctss.df, c("chr", "pos", "strand", sample[i]))
-#' 			setkeyv(ctss.df, cols = c("chr", "pos", "strand"))
-#' 			if(i == 1){
-#' 				ctss.table <- ctss.df
-#' 			}else{
-#' 				message("Adding sample to CTSS table...\n")
-#' 				ctss.table <- merge(ctss.table, ctss.df, all.x = T, all.y = T)
-#' 				ctss.table[is.na(ctss.table)] <- 0
-#' 			}
-#' 			
-#' 		}
-#' 		
-#' 		ctssTable <- data.frame(ctss.table, stringsAsFactors = F, check.names = F)
-#' 		
-#' 		
-#' 	}else if (source == "ZebrafishDevelopment"){
-#' 
-#' 		if("ZebrafishDevelopmentalCAGE" %in% rownames(installed.packages()) == FALSE){
-#' 			stop("Requested CAGE data package is not installed! Please install and load the ZebrafishDevelopmentalCAGE package, which is available for download from http://promshift.genereg.net/CAGEr/PackageSource/.")
-#' 		}else if(!("package:ZebrafishDevelopmentalCAGE" %in% search())){
-#' 			stop("Requested CAGE data package is not loaded! Please load the data package by calling 'library(ZebrafishDevelopmentalCAGE)'")
-#' 		}
-#' 		
-#' 	  ZebrafishSamples <- NULL
-#' 		data("ZebrafishSamples", envir = environment())
-#' 		if(dataset == "ZebrafishCAGE"){
-#' 			if(group == "development"){
-#' 				if(!(all(sample %in% ZebrafishSamples$sample))){
-#' 					stop("Some sample names cannot be found for the specified dataset! Call data(ZebrafishSamples) and check the 'sample' column for valid sample names!")
-#' 				}else{
-#' 					genome.name <- "BSgenome.Drerio.UCSC.danRer7"
-#' 					ZebrafishCAGE <- NULL
-#' 					data("ZebrafishCAGE", envir = environment())
-#' 					ctssTable <- ZebrafishCAGE[["development"]][,c("chr", "pos", "strand", sample)]
-#'                     ctssTable <- ctssTable[apply(ctssTable[,4:ncol(ctssTable),drop=FALSE], 1, function(x) {any(x>0)}),]
-#' 				}
-#' 			}else{
-#' 				stop("Invalid group name! There is only one group in this dataset named 'development'.")
-#' 			}
-#' 		}else{
-#' 			stop("Invalid dataset name! There is only one available dataset named 'ZebrafishCAGE'.")
-#' 		}
-#' 		
-#' 		
-#' 	}else{
-#' 		stop("Currently only the following public CAGE data resources are supported: 'FANTOM5', 'FANTOM3and4', 'ENCODE', 'ZebrafishDevelopment'. Refer to CAGEr vignette on how to use those resources!")
-#' 	}
-#' 	
-#'     rownames(ctssTable) <- c(1:nrow(ctssTable))
-#'     
-#' 	sample.labels <- colnames(ctssTable)[4:ncol(ctssTable)]
-#' 	names(sample.labels) <- rainbow(n = length(sample.labels))
-#' 	myCAGEset <- new("CAGEset", genomeName = genome.name, inputFiles = paste(source, sample.labels, sep = "__"), inputFilesType = source, sampleLabels = sample.labels)
-#' 	myCAGEset@librarySizes <- as.integer(colSums(ctssTable[,4:ncol(ctssTable),drop=FALSE]))
-#' 	myCAGEset@CTSScoordinates <- ctssTable[, c("chr", "pos", "strand")]
-#' 	myCAGEset@tagCountMatrix <- ctssTable[,4:ncol(ctssTable),drop=FALSE]
-#' 	
-#' 	return(myCAGEset)
-#' 	
-#' }
-#' )
+#' importPublicData
+#' @noRd
+#' @importFrom utils data
+#' @export
+
+setGeneric("importPublicData",
+           function(source, dataset, group, sample)
+             standardGeneric("importPublicData"))
+
+setMethod("importPublicData",
+signature(source = "character", dataset = "character", sample = "character"),
+.importPublicData (source, dataset, group, sample))
+
+.importPublicData <- function (source, dataset, group, sample){
+
+	if(source == "ENCODE"){
+
+		if("ENCODEprojectCAGE" %in% rownames(installed.packages()) == FALSE){
+			stop("Requested CAGE data package is not installed! Please install and load the ENCODEprojectCAGE package, which is available for download from http://promshift.genereg.net/CAGEr/PackageSource/.")
+		}else if(!("package:ENCODEprojectCAGE" %in% search())){
+			stop("Requested CAGE data package is not loaded! Please load the data package by calling 'library(ENCODEprojectCAGE)'")
+		}
+
+		if(dataset[1] == "ENCODEtissueCAGEfly"){
+			genome.name <- "BSgenome.Dmelanogaster.UCSC.dm3"
+			ENCODEtissueCAGEfly <- NULL
+			data("ENCODEtissueCAGEfly", envir = environment())
+			if(group == "embryo"){
+				if(sample == "mixed_embryos_0-24hr"){
+
+					ctssTable <- ENCODEtissueCAGEfly[["embryo"]]
+
+				}else{
+					stop("Specified sample not valid! The dataset 'ENCODEtissueCAGEfly' containes only one sample named 'mixed_embryos_0-24hr'!")
+				}
+			}else{
+				stop("Specified group not valid! The dataset 'ENCODEtissueCAGEfly' containes only one group named 'embryo'!")
+			}
+
+		}else{
+			genome.name <- "BSgenome.Hsapiens.UCSC.hg19"
+			ENCODEhumanCellLinesSamples <- NULL
+			data("ENCODEhumanCellLinesSamples", envir = environment())
+			info.df <- ENCODEhumanCellLinesSamples
+
+			if(!(all(dataset %in% info.df$dataset))){
+				stop("Specified dataset(s) not found! Call data(ENCODEhumanCellLinesSamples) and check 'dataset' column for available ENCODE datasets!")
+			}
+			if(length(dataset) == 1){
+
+				if(!(all(group %in% info.df[info.df$dataset == dataset,"group"]))){
+					stop("Some of the provided groups cannot be found in the specified dataset!")
+				}
+				if(length(group) == 1){
+					if(!(all(sample %in% info.df[info.df$group == group,"sample"]))){
+						stop("Some of the provided samples cannot be found in the specified group!")
+					}
+				}else if(length(group) == length(sample)){
+					if(!all(sapply(c(1:length(group)), function(x) {sample[x] %in% info.df[info.df$group == group[x],"sample"]}))){
+						stop("Provided 'group' and 'sample' do not match! Some of the provided samples cannot be found in the corresponding groups!")
+					}
+				}else{
+					stop("Number of elements in the 'group' must be either 1 or must match the number of elements in the 'sample'!")
+				}
+
+
+			}else if(length(dataset) == length(group)){
+				if(!all(sapply(c(1:length(dataset)), function(x) {group[x] %in% info.df[info.df$dataset == dataset[x],"group"]}))){
+					stop("Provided 'dataset' and 'group' do not match! Some of the provided groups cannot be found in the corresponding datasets!")
+				}
+				if(length(group) == length(sample)){
+					if(!all(sapply(c(1:length(group)), function(x) {sample[x] %in% info.df[info.df$group == group[x],"sample"]}))){
+						stop("Provided 'group' and 'sample' do not match! Some of the provided samples cannot be found in the corresponding groups!")
+					}
+				}else{
+					stop("Number of elements in the 'group' must match the number of elements in the 'sample'!")
+				}
+
+
+			}else{
+				stop("Number of elements in the 'dataset' must be either 1 or must match the number of elements in the 'group' and in the 'sample'!")
+			}
+
+			data(list = dataset, envir = environment())
+
+			if(length(unique(dataset))>1){
+
+				for(i in 1:length(unique(dataset))){
+					dset <- get(unique(dataset)[i])
+					for(j in 1:length(unique(group[which(dataset == unique(dataset)[i])]))){
+						g <- unique(group[which(dataset == unique(dataset)[i])])[j]
+						ctss <- dset[[g]][, c("chr", "pos", "strand", sample[which((group == g) & (dataset == unique(dataset)[i]))])]
+						discard <- apply(ctss[, c(4:ncol(ctss)), drop = F], 1, function(x) {sum(x > 0)}) >= 1
+						ctss <- data.table(ctss[discard,])
+						setkeyv(ctss, cols = c("chr", "pos", "strand"))
+						if(i == 1 & j == 1){
+							ctssTable <- ctss
+						}else{
+							ctssTable <- merge(ctssTable, ctss, all.x = T, all.y = T)
+						}
+					}
+				}
+				ctssTable[is.na(ctssTable)] <- 0
+
+			}else{
+
+				selected.dataset <- get(dataset)
+				if(length(unique(group))>1){
+
+					for(i in 1:length(unique(group))){
+						g <- unique(group)[i]
+						ctss <- selected.dataset[[g]][, c("chr", "pos", "strand", sample[which(group == g)])]
+						discard <- apply(ctss[, c(4:ncol(ctss)), drop = F], 1, function(x) {sum(x > 0)}) >= 1
+						ctss <- data.table(ctss[discard,])
+						setkeyv(ctss, cols = c("chr", "pos", "strand"))
+						if(i == 1){
+							ctssTable <- ctss
+						}else{
+							ctssTable <- merge(ctssTable, ctss, all.x = T, all.y = T)
+						}
+					}
+
+					ctssTable[is.na(ctssTable)] <- 0
+
+				}else{
+					ctssTable <- selected.dataset[[group]][,c("chr", "pos", "strand", sample)]
+				}
+			}
+
+			ctssTable <- data.frame(ctssTable, stringsAsFactors = F, check.names = F)
+		}
+
+
+	}else if(source == "FANTOM3and4"){
+
+		if("FANTOM3and4CAGE" %in% rownames(installed.packages()) == FALSE){
+			stop("Requested CAGE data package is not installed! Please install and load the FANTOM3and4CAGE package available from Bioconductor.")
+		}else if(!("package:FANTOM3and4CAGE" %in% search())){
+			stop("Requested CAGE data package is not loaded! Please load the data package by calling 'library(FANTOM3and4CAGE)'")
+		}
+		FANTOMhumanSamples <- FANTOMmouseSamples <- NULL
+		data("FANTOMhumanSamples", envir = environment())
+		info.df1 <- FANTOMhumanSamples
+		data("FANTOMmouseSamples", envir = environment())
+		info.df2 <- FANTOMmouseSamples
+
+		if(!(all(dataset %in% info.df1$dataset) | all(dataset %in% info.df2$dataset))){
+			stop("Specified dataset(s) not found! Call data(FANTOMhumanSamples) and data(FANTOMmouseSamples) and check 'dataset' column for available ENCODE datasets!")
+		}
+		if(length(grep("human", dataset))>0){
+			genome.name <- "BSgenome.Hsapiens.UCSC.hg18"
+			info.df <- info.df1
+		}else if(length(grep("mouse", dataset))>0){
+			genome.name <- "BSgenome.Mmusculus.UCSC.mm9"
+			info.df <- info.df2
+		}
+
+		if(length(dataset) == 1){
+
+			if(!(all(group %in% info.df[info.df$dataset == dataset,"group"]))){
+				stop("Some of the provided groups cannot be found in the specified dataset!")
+			}
+			if(length(group) == 1){
+				if(!(all(sample %in% info.df[info.df$group == group,"sample"]))){
+					stop("Some of the provided samples cannot be found in the specified group!")
+				}
+			}else if(length(group) == length(sample)){
+				if(!all(sapply(c(1:length(group)), function(x) {sample[x] %in% info.df[info.df$group == group[x],"sample"]}))){
+					stop("Provided 'group' and 'sample' do not match! Some of the provided samples cannot be found in the corresponding groups!")
+				}
+			}else{
+				stop("Number of elements in the 'group' must be either 1 or must match the number of elements in the 'sample'!")
+			}
+
+
+		}else if(length(dataset) == length(group)){
+			if(!all(sapply(c(1:length(dataset)), function(x) {group[x] %in% info.df[info.df$dataset == dataset[x],"group"]}))){
+				stop("Provided 'dataset' and 'group' do not match! Some of the provided groups cannot be found in the corresponding datasets!")
+			}
+			if(length(group) == length(sample)){
+				if(!all(sapply(c(1:length(group)), function(x) {sample[x] %in% info.df[info.df$group == group[x],"sample"]}))){
+					stop("Provided 'group' and 'sample' do not match! Some of the provided samples cannot be found in the corresponding groups!")
+				}
+			}else{
+				stop("Number of elements in the 'group' must match the number of elements in the 'sample'!")
+			}
+
+
+		}else{
+			stop("Number of elements in the 'dataset' must be either 1 or must match the number of elements in the 'group' and in the 'sample'!")
+		}
+
+		data(list = dataset, envir = environment())
+
+		if(length(unique(dataset))>1){
+
+			for(i in 1:length(unique(dataset))){
+				dset <- get(unique(dataset)[i])
+				for(j in 1:length(unique(group[which(dataset == unique(dataset)[i])]))){
+					g <- unique(group[which(dataset == unique(dataset)[i])])[j]
+					ctss <- dset[[g]][, c("chr", "pos", "strand", sample[which(group == g)])]
+					discard <- apply(ctss[, c(4:ncol(ctss)), drop = F], 1, function(x) {sum(x > 0)}) >= 1
+					ctss <- data.table(ctss[discard,])
+					setnames(ctss, c("chr", "pos", "strand", paste(g, sample[which(group == g)], sep = "__")))
+					setkeyv(ctss, cols = c("chr", "pos", "strand"))
+					if(i == 1 & j == 1){
+						ctssTable <- ctss
+					}else{
+						ctssTable <- merge(ctssTable, ctss, all.x = T, all.y = T)
+					}
+				}
+			}
+
+			ctssTable[is.na(ctssTable)] <- 0
+
+		}else{
+
+			selected.dataset <- get(dataset)
+			if(length(unique(group))>1){
+
+				for(i in 1:length(unique(group))){
+					g <- unique(group)[i]
+					ctss <- selected.dataset[[g]][, c("chr", "pos", "strand", sample[which(group == g)])]
+					discard <- apply(ctss[, c(4:ncol(ctss)), drop = F], 1, function(x) {sum(x > 0)}) >= 1
+					ctss <- data.table(ctss[discard,])
+					setnames(ctss, c("chr", "pos", "strand", paste(g, sample[which(group == g)], sep = "__")))
+					setkeyv(ctss, cols = c("chr", "pos", "strand"))
+					if(i == 1){
+						ctssTable <- ctss
+					}else{
+						ctssTable <- merge(ctssTable, ctss, all.x = T, all.y = T)
+					}
+				}
+
+				ctssTable[is.na(ctssTable)] <- 0
+
+			}else{
+				ctssTable <- selected.dataset[[group]][,c("chr", "pos", "strand", sample)]
+				setnames(ctssTable, c("chr", "pos", "strand", paste(group, sample, sep = "__")))
+			}
+		}
+
+		ctssTable <- data.frame(ctssTable, stringsAsFactors = F, check.names = F)
+
+
+
+	}else if (source == "FANTOM5"){
+
+		if(length(dataset) != 1){
+			stop("For FANTOM5 only one dataset can be specified and it can be either 'human' or 'mouse'!")
+		}else if(!(dataset %in% c("human", "mouse"))){
+			stop("For FANTOM5, dataset can be either 'human' or 'mouse'!")
+		}
+		if(dataset == "human"){
+		  FANTOM5humanSamples <- NULL
+			data("FANTOM5humanSamples", envir = environment())
+			samples.info <- FANTOM5humanSamples
+			genome.name <- "BSgenome.Hsapiens.UCSC.hg19"
+		}else if(dataset == "mouse"){
+		  FANTOM5mouseSamples <- NULL
+			data("FANTOM5mouseSamples", envir = environment())
+			samples.info <- FANTOM5mouseSamples
+			genome.name <- "BSgenome.Mmusculus.UCSC.mm9"
+		}
+
+		if(!(all(sample %in% samples.info$sample))){
+			stop(paste("Some sample names cannot be found for the specified dataset! Call data(FANTOM5", dataset, "Samples) and check the 'sample' column for valid sample names!", sep = ""))
+		}
+
+
+
+		for(i in c(1:length(sample))){
+
+			message("Fetching sample: ", sample[i], "...")
+			sample.url <- samples.info[samples.info$sample == sample[i], "data_url"]
+			con <- gzcon(url(paste(sample.url)))
+			ctss <- scan(con, what = list(character(), NULL, integer(), NULL, integer(), character()))
+			ctss.df <- data.table(chr = ctss[[1]], pos = ctss[[3]], strand = ctss[[6]], tagCount = ctss[[5]])
+			setnames(ctss.df, c("chr", "pos", "strand", sample[i]))
+			setkeyv(ctss.df, cols = c("chr", "pos", "strand"))
+			if(i == 1){
+				ctss.table <- ctss.df
+			}else{
+				message("Adding sample to CTSS table...\n")
+				ctss.table <- merge(ctss.table, ctss.df, all.x = T, all.y = T)
+				ctss.table[is.na(ctss.table)] <- 0
+			}
+
+		}
+
+		ctssTable <- data.frame(ctss.table, stringsAsFactors = F, check.names = F)
+
+
+	}else if (source == "ZebrafishDevelopment"){
+
+		if("ZebrafishDevelopmentalCAGE" %in% rownames(installed.packages()) == FALSE){
+			stop("Requested CAGE data package is not installed! Please install and load the ZebrafishDevelopmentalCAGE package, which is available for download from http://promshift.genereg.net/CAGEr/PackageSource/.")
+		}else if(!("package:ZebrafishDevelopmentalCAGE" %in% search())){
+			stop("Requested CAGE data package is not loaded! Please load the data package by calling 'library(ZebrafishDevelopmentalCAGE)'")
+		}
+
+	  ZebrafishSamples <- NULL
+		data("ZebrafishSamples", envir = environment())
+		if(dataset == "ZebrafishCAGE"){
+			if(group == "development"){
+				if(!(all(sample %in% ZebrafishSamples$sample))){
+					stop("Some sample names cannot be found for the specified dataset! Call data(ZebrafishSamples) and check the 'sample' column for valid sample names!")
+				}else{
+					genome.name <- "BSgenome.Drerio.UCSC.danRer7"
+					ZebrafishCAGE <- NULL
+					data("ZebrafishCAGE", envir = environment())
+					ctssTable <- ZebrafishCAGE[["development"]][,c("chr", "pos", "strand", sample)]
+                    ctssTable <- ctssTable[apply(ctssTable[,4:ncol(ctssTable),drop=FALSE], 1, function(x) {any(x>0)}),]
+				}
+			}else{
+				stop("Invalid group name! There is only one group in this dataset named 'development'.")
+			}
+		}else{
+			stop("Invalid dataset name! There is only one available dataset named 'ZebrafishCAGE'.")
+		}
+
+
+	}else{
+		stop("Currently only the following public CAGE data resources are supported: 'FANTOM5', 'FANTOM3and4', 'ENCODE', 'ZebrafishDevelopment'. Refer to CAGEr vignette on how to use those resources!")
+	}
+
+    rownames(ctssTable) <- c(1:nrow(ctssTable))
+
+	sample.labels <- colnames(ctssTable)[4:ncol(ctssTable)]
+	names(sample.labels) <- rainbow(n = length(sample.labels))
+	myCAGEset <- new("CAGEset", genomeName = genome.name, inputFiles = paste(source, sample.labels, sep = "__"), inputFilesType = source, sampleLabels = sample.labels)
+	myCAGEset@librarySizes <- as.integer(colSums(ctssTable[,4:ncol(ctssTable),drop=FALSE]))
+	myCAGEset@CTSScoordinates <- ctssTable[, c("chr", "pos", "strand")]
+	myCAGEset@tagCountMatrix <- ctssTable[,4:ncol(ctssTable),drop=FALSE]
+
+	return(myCAGEset)
+
+}
\ No newline at end of file

From c9033910d990b826fc2389a26466b8f4708aece9 Mon Sep 17 00:00:00 2001
From: Charles Plessy <charles.plessy@oist.jp>
Date: Mon, 26 Jun 2023 10:50:54 +0900
Subject: [PATCH 02/17] Use match.arg to ensure valid source.

---
 R/ImportMethods.R | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/R/ImportMethods.R b/R/ImportMethods.R
index 2dbcf72..17a86d7 100644
--- a/R/ImportMethods.R
+++ b/R/ImportMethods.R
@@ -656,14 +656,17 @@ setMethod( "getCTSS", "CAGEexp"
 #' @export
 
 setGeneric("importPublicData",
-           function(source, dataset, group, sample)
+           function(source = c("FANTOM5", "FANTOM3and4", "ENCODE", "ZebrafishDevelopment"),
+                    dataset,
+                    group,
+                    sample)
              standardGeneric("importPublicData"))
 
-setMethod("importPublicData",
-signature(source = "character", dataset = "character", sample = "character"),
-.importPublicData (source, dataset, group, sample))
+setMethod("importPublicData", signature(source = "character", dataset = "character", sample = "character"),
+          .importPublicData(source, dataset, group, sample))
 
-.importPublicData <- function (source, dataset, group, sample){
+.importPublicData <- function(source = c("FANTOM5", "FANTOM3and4", "ENCODE", "ZebrafishDevelopment"), dataset, group, sample) {
+  source <- match.arg(source)
 
 	if(source == "ENCODE"){
 
@@ -972,10 +975,7 @@ signature(source = "character", dataset = "character", sample = "character"),
 		}
 
 
-	}else{
-		stop("Currently only the following public CAGE data resources are supported: 'FANTOM5', 'FANTOM3and4', 'ENCODE', 'ZebrafishDevelopment'. Refer to CAGEr vignette on how to use those resources!")
 	}
-
     rownames(ctssTable) <- c(1:nrow(ctssTable))
 
 	sample.labels <- colnames(ctssTable)[4:ncol(ctssTable)]

From 6bffcac0bfffaf131faad3ddec0dbdc91245925e Mon Sep 17 00:00:00 2001
From: Charles Plessy <charles.plessy@oist.jp>
Date: Mon, 26 Jun 2023 13:03:36 +0900
Subject: [PATCH 03/17] Import zebrafish CAGE data as CAGEexp (see #78)

---
 R/ImportMethods.R | 74 +++++++++++++++++++++++++++--------------------
 1 file changed, 43 insertions(+), 31 deletions(-)

diff --git a/R/ImportMethods.R b/R/ImportMethods.R
index 17a86d7..879b2cd 100644
--- a/R/ImportMethods.R
+++ b/R/ImportMethods.R
@@ -946,36 +946,8 @@ setMethod("importPublicData", signature(source = "character", dataset = "charact
 		ctssTable <- data.frame(ctss.table, stringsAsFactors = F, check.names = F)
 
 
-	}else if (source == "ZebrafishDevelopment"){
-
-		if("ZebrafishDevelopmentalCAGE" %in% rownames(installed.packages()) == FALSE){
-			stop("Requested CAGE data package is not installed! Please install and load the ZebrafishDevelopmentalCAGE package, which is available for download from http://promshift.genereg.net/CAGEr/PackageSource/.")
-		}else if(!("package:ZebrafishDevelopmentalCAGE" %in% search())){
-			stop("Requested CAGE data package is not loaded! Please load the data package by calling 'library(ZebrafishDevelopmentalCAGE)'")
-		}
-
-	  ZebrafishSamples <- NULL
-		data("ZebrafishSamples", envir = environment())
-		if(dataset == "ZebrafishCAGE"){
-			if(group == "development"){
-				if(!(all(sample %in% ZebrafishSamples$sample))){
-					stop("Some sample names cannot be found for the specified dataset! Call data(ZebrafishSamples) and check the 'sample' column for valid sample names!")
-				}else{
-					genome.name <- "BSgenome.Drerio.UCSC.danRer7"
-					ZebrafishCAGE <- NULL
-					data("ZebrafishCAGE", envir = environment())
-					ctssTable <- ZebrafishCAGE[["development"]][,c("chr", "pos", "strand", sample)]
-                    ctssTable <- ctssTable[apply(ctssTable[,4:ncol(ctssTable),drop=FALSE], 1, function(x) {any(x>0)}),]
-				}
-			}else{
-				stop("Invalid group name! There is only one group in this dataset named 'development'.")
-			}
-		}else{
-			stop("Invalid dataset name! There is only one available dataset named 'ZebrafishCAGE'.")
-		}
-
-
-	}
+	} else if (source == "ZebrafishDevelopment") .importPublicData_ZF (group = group, sample = sample)
+  
     rownames(ctssTable) <- c(1:nrow(ctssTable))
 
 	sample.labels <- colnames(ctssTable)[4:ncol(ctssTable)]
@@ -987,4 +959,44 @@ setMethod("importPublicData", signature(source = "character", dataset = "charact
 
 	return(myCAGEset)
 
-}
\ No newline at end of file
+}
+
+.importPublicData_ZF <- function(group = "development", sample = NULL) {
+  if (group != "development")
+    stop("Invalid group name! There is only one group in this dataset named 'development'.")
+  if (! requireNamespace("ZebrafishDevelopmentalCAGE"))
+    stop ("This function requires the ", dQuote("ZebrafishDevelopmentalCAGE"),
+          " package; please install it from http://promshift.genereg.net/CAGEr/PackageSource/.")
+  if (! requireNamespace("BSgenome.Drerio.UCSC.danRer7"))
+    stop ("This function requires the ", dQuote("BSgenome.Drerio.UCSC.danRer7"), " package.")
+
+  ZebrafishSamples <- NULL
+  data("ZebrafishSamples", package = "ZebrafishDevelopmentalCAGE", envir = environment())
+  
+  validSampleNames <- levels(ZebrafishSamples$sample)
+  if (is.null(sample )) sample <- validSampleNames
+  if ( ! all(sample %in% validSampleNames))
+    stop("At least one sample name is not valid. ",
+         "Valid sample names are: ", paste(validSampleNames, collapse = ", "), ".")
+  
+  genome.name <- "BSgenome.Drerio.UCSC.danRer7"
+  ZebrafishCAGE <- NULL
+  data("ZebrafishCAGE", package = "ZebrafishDevelopmentalCAGE", envir = environment())
+  # The vignette of ZebrafishDevelopmentalCAGE states that the provided coordinates are 1-based.
+  ctssRanges <- CTSS(ZebrafishCAGE[["development"]]$chr,
+                     ZebrafishCAGE[["development"]]$pos,
+                     ZebrafishCAGE[["development"]]$strand,
+                     seqinfo = seqinfo(BSgenome.Drerio.UCSC.danRer7:::BSgenome.Drerio.UCSC.danRer7))
+  ctssDF <- lapply(ZebrafishCAGE[["development"]][ , sample], Rle) |> DataFrame()
+  ctssSE <- SummarizedExperiment(c(counts = ctssDF), ctssRanges)
+  ctssSE <- ctssSE[rowSums(ZebrafishCAGE[["development"]][ , sample]) > 0,]
+  ctssSE <- sort(ctssSE)
+  ce <- CAGEexp(genomeName = "BSgenome.Drerio.UCSC.danRer7",
+                colData = DataFrame( sampleLabels = sample,
+                                     inputFiles = NA,
+                                     inputFilesType = 'ctss',
+                                     librarySizes = sapply(assay(ctssSE), sum),
+                                     row.names = sample))
+  CTSStagCountSE(ce) <- ctssSE
+  ce
+}

From 14a2396057a022762acaf6f68945d00ad689dae0 Mon Sep 17 00:00:00 2001
From: Charles Plessy <charles.plessy@oist.jp>
Date: Tue, 27 Jun 2023 10:45:32 +0900
Subject: [PATCH 04/17] Support loading data from URLs.

Closes #50
See also #52 and #78
---
 R/ImportMethods.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/ImportMethods.R b/R/ImportMethods.R
index 879b2cd..8d5f930 100644
--- a/R/ImportMethods.R
+++ b/R/ImportMethods.R
@@ -121,7 +121,8 @@ setGeneric( "getCTSS"
 
 checkFilesExist <- function(paths) {
   for (f in paths)
-    if (! file.exists(f)) stop("Could not locate input file ", f)
+    if (isFALSE(grepl("^http", f)))
+      if (! file.exists(f)) stop("Could not locate input file ", f)
 }
 
 #' toCTSSdt

From 2afbddc8691a08395f63b8a0180aa864e80c24f9 Mon Sep 17 00:00:00 2001
From: Charles Plessy <charles.plessy@oist.jp>
Date: Tue, 27 Jun 2023 10:46:30 +0900
Subject: [PATCH 05/17] Correct the ZF loaded to track BSgenome name.

---
 R/ImportMethods.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/ImportMethods.R b/R/ImportMethods.R
index 8d5f930..95f7186 100644
--- a/R/ImportMethods.R
+++ b/R/ImportMethods.R
@@ -980,14 +980,14 @@ setMethod("importPublicData", signature(source = "character", dataset = "charact
     stop("At least one sample name is not valid. ",
          "Valid sample names are: ", paste(validSampleNames, collapse = ", "), ".")
   
-  genome.name <- "BSgenome.Drerio.UCSC.danRer7"
   ZebrafishCAGE <- NULL
   data("ZebrafishCAGE", package = "ZebrafishDevelopmentalCAGE", envir = environment())
   # The vignette of ZebrafishDevelopmentalCAGE states that the provided coordinates are 1-based.
   ctssRanges <- CTSS(ZebrafishCAGE[["development"]]$chr,
                      ZebrafishCAGE[["development"]]$pos,
                      ZebrafishCAGE[["development"]]$strand,
-                     seqinfo = seqinfo(BSgenome.Drerio.UCSC.danRer7:::BSgenome.Drerio.UCSC.danRer7))
+                     seqinfo = seqinfo(BSgenome.Drerio.UCSC.danRer7:::BSgenome.Drerio.UCSC.danRer7),
+                     bsgenomeName = "BSgenome.Drerio.UCSC.danRer7")
   ctssDF <- lapply(ZebrafishCAGE[["development"]][ , sample], Rle) |> DataFrame()
   ctssSE <- SummarizedExperiment(c(counts = ctssDF), ctssRanges)
   ctssSE <- ctssSE[rowSums(ZebrafishCAGE[["development"]][ , sample]) > 0,]

From 8dcec188623e55b74fec0ad9e7a07fd3c861c6af Mon Sep 17 00:00:00 2001
From: Charles Plessy <charles.plessy@oist.jp>
Date: Tue, 27 Jun 2023 10:48:33 +0900
Subject: [PATCH 06/17] Load FANTOM5 data using getCTSS().

Also rename function parameter, because "source" is already a function name.

See also #78
---
 R/ImportMethods.R | 95 +++++++++++++++++++----------------------------
 1 file changed, 38 insertions(+), 57 deletions(-)

diff --git a/R/ImportMethods.R b/R/ImportMethods.R
index 95f7186..0fd1216 100644
--- a/R/ImportMethods.R
+++ b/R/ImportMethods.R
@@ -657,19 +657,16 @@ setMethod( "getCTSS", "CAGEexp"
 #' @export
 
 setGeneric("importPublicData",
-           function(source = c("FANTOM5", "FANTOM3and4", "ENCODE", "ZebrafishDevelopment"),
+           function(origin = c("FANTOM5", "FANTOM3and4", "ENCODE", "ZebrafishDevelopment"),
                     dataset,
                     group,
                     sample)
              standardGeneric("importPublicData"))
 
-setMethod("importPublicData", signature(source = "character", dataset = "character", sample = "character"),
-          .importPublicData(source, dataset, group, sample))
+.importPublicData <- function(origin = c("FANTOM5", "FANTOM3and4", "ENCODE", "ZebrafishDevelopment"), dataset, group, sample) {
+ # origin <- match.arg(origin)
 
-.importPublicData <- function(source = c("FANTOM5", "FANTOM3and4", "ENCODE", "ZebrafishDevelopment"), dataset, group, sample) {
-  source <- match.arg(source)
-
-	if(source == "ENCODE"){
+	if(origin == "ENCODE"){
 
 		if("ENCODEprojectCAGE" %in% rownames(installed.packages()) == FALSE){
 			stop("Requested CAGE data package is not installed! Please install and load the ENCODEprojectCAGE package, which is available for download from http://promshift.genereg.net/CAGEr/PackageSource/.")
@@ -787,7 +784,7 @@ setMethod("importPublicData", signature(source = "character", dataset = "charact
 		}
 
 
-	}else if(source == "FANTOM3and4"){
+	}else if(origin == "FANTOM3and4"){
 
 		if("FANTOM3and4CAGE" %in% rownames(installed.packages()) == FALSE){
 			stop("Requested CAGE data package is not installed! Please install and load the FANTOM3and4CAGE package available from Bioconductor.")
@@ -900,60 +897,15 @@ setMethod("importPublicData", signature(source = "character", dataset = "charact
 
 
 
-	}else if (source == "FANTOM5"){
-
-		if(length(dataset) != 1){
-			stop("For FANTOM5 only one dataset can be specified and it can be either 'human' or 'mouse'!")
-		}else if(!(dataset %in% c("human", "mouse"))){
-			stop("For FANTOM5, dataset can be either 'human' or 'mouse'!")
-		}
-		if(dataset == "human"){
-		  FANTOM5humanSamples <- NULL
-			data("FANTOM5humanSamples", envir = environment())
-			samples.info <- FANTOM5humanSamples
-			genome.name <- "BSgenome.Hsapiens.UCSC.hg19"
-		}else if(dataset == "mouse"){
-		  FANTOM5mouseSamples <- NULL
-			data("FANTOM5mouseSamples", envir = environment())
-			samples.info <- FANTOM5mouseSamples
-			genome.name <- "BSgenome.Mmusculus.UCSC.mm9"
-		}
-
-		if(!(all(sample %in% samples.info$sample))){
-			stop(paste("Some sample names cannot be found for the specified dataset! Call data(FANTOM5", dataset, "Samples) and check the 'sample' column for valid sample names!", sep = ""))
-		}
-
-
-
-		for(i in c(1:length(sample))){
-
-			message("Fetching sample: ", sample[i], "...")
-			sample.url <- samples.info[samples.info$sample == sample[i], "data_url"]
-			con <- gzcon(url(paste(sample.url)))
-			ctss <- scan(con, what = list(character(), NULL, integer(), NULL, integer(), character()))
-			ctss.df <- data.table(chr = ctss[[1]], pos = ctss[[3]], strand = ctss[[6]], tagCount = ctss[[5]])
-			setnames(ctss.df, c("chr", "pos", "strand", sample[i]))
-			setkeyv(ctss.df, cols = c("chr", "pos", "strand"))
-			if(i == 1){
-				ctss.table <- ctss.df
-			}else{
-				message("Adding sample to CTSS table...\n")
-				ctss.table <- merge(ctss.table, ctss.df, all.x = T, all.y = T)
-				ctss.table[is.na(ctss.table)] <- 0
-			}
-
-		}
-
-		ctssTable <- data.frame(ctss.table, stringsAsFactors = F, check.names = F)
-
-
-	} else if (source == "ZebrafishDevelopment") .importPublicData_ZF (group = group, sample = sample)
+	}else if (origin == "FANTOM5") {
+	  .importPublicData_F5 (dataset = dataset, group = group, sample = sample)
+	}else if (origin == "ZebrafishDevelopment") .importPublicData_ZF (group = group, sample = sample)
   
     rownames(ctssTable) <- c(1:nrow(ctssTable))
 
 	sample.labels <- colnames(ctssTable)[4:ncol(ctssTable)]
 	names(sample.labels) <- rainbow(n = length(sample.labels))
-	myCAGEset <- new("CAGEset", genomeName = genome.name, inputFiles = paste(source, sample.labels, sep = "__"), inputFilesType = source, sampleLabels = sample.labels)
+	myCAGEset <- new("CAGEset", genomeName = genome.name, inputFiles = paste(origin, sample.labels, sep = "__"), inputFilesType = origin, sampleLabels = sample.labels)
 	myCAGEset@librarySizes <- as.integer(colSums(ctssTable[,4:ncol(ctssTable),drop=FALSE]))
 	myCAGEset@CTSScoordinates <- ctssTable[, c("chr", "pos", "strand")]
 	myCAGEset@tagCountMatrix <- ctssTable[,4:ncol(ctssTable),drop=FALSE]
@@ -962,6 +914,32 @@ setMethod("importPublicData", signature(source = "character", dataset = "charact
 
 }
 
+.importPublicData_F5 <- function(dataset = c("human", "mouse"), group = NULL, sample = NULL) {
+  dataset <- match.arg(dataset)
+  if (dataset == "human") {
+    if (! requireNamespace("BSgenome.Hsapiens.UCSC.hg19"))
+      stop ("This function requires the ", dQuote("BSgenome.Hsapiens.UCSC.hg19"), " package.")
+    FANTOM5humanSamples <- NULL
+    data("FANTOM5humanSamples", package = "CAGEr", envir = environment())
+    samples.info <- FANTOM5humanSamples
+    genome.name <- "BSgenome.Hsapiens.UCSC.hg19"
+    genome.obj <- BSgenome.Hsapiens.UCSC.hg19::BSgenome.Hsapiens.UCSC.hg19
+  } else {
+    if (! requireNamespace("BSgenome.Mmusculus.UCSC.mm9"))
+      stop ("This function requires the ", dQuote("BSgenome.Mmusculus.UCSC.mm9"), " package.")
+    FANTOM5mouseSamples <- NULL
+    data("FANTOM5mouseSamples", envir = environment())
+    samples.info <- FANTOM5mouseSamples
+    genome.name <- "BSgenome.Mmusculus.UCSC.mm9"
+    genome.obj <- BSgenome.Mmusculus.UCSC.mm9::BSgenome.Mmusculus.UCSC.mm9
+  }
+  ce <- CAGEexp(genomeName = genome.name,
+          inputFiles = samples.info[samples.info$sample %in% sample,"data_url"],
+          inputFilesType = "bedScore",
+          sampleLabels = sample)
+  getCTSS(ce)
+}
+
 .importPublicData_ZF <- function(group = "development", sample = NULL) {
   if (group != "development")
     stop("Invalid group name! There is only one group in this dataset named 'development'.")
@@ -1001,3 +979,6 @@ setMethod("importPublicData", signature(source = "character", dataset = "charact
   CTSStagCountSE(ce) <- ctssSE
   ce
 }
+
+setMethod("importPublicData", signature(origin = "character", dataset = "character", sample = "character"),
+          .importPublicData)
\ No newline at end of file

From d2cdd4c28bc7ac521769d20ab0a67459c61b0fa4 Mon Sep 17 00:00:00 2001
From: Charles Plessy <charles.plessy@oist.jp>
Date: Tue, 27 Jun 2023 10:50:40 +0900
Subject: [PATCH 07/17] Return a CAGEexp object.

---
 R/ImportMethods.R | 25 +++++++------------------
 1 file changed, 7 insertions(+), 18 deletions(-)

diff --git a/R/ImportMethods.R b/R/ImportMethods.R
index 0fd1216..60146f4 100644
--- a/R/ImportMethods.R
+++ b/R/ImportMethods.R
@@ -895,23 +895,12 @@ setGeneric("importPublicData",
 
 		ctssTable <- data.frame(ctssTable, stringsAsFactors = F, check.names = F)
 
-
-
-	}else if (origin == "FANTOM5") {
-	  .importPublicData_F5 (dataset = dataset, group = group, sample = sample)
-	}else if (origin == "ZebrafishDevelopment") .importPublicData_ZF (group = group, sample = sample)
-  
-    rownames(ctssTable) <- c(1:nrow(ctssTable))
-
-	sample.labels <- colnames(ctssTable)[4:ncol(ctssTable)]
-	names(sample.labels) <- rainbow(n = length(sample.labels))
-	myCAGEset <- new("CAGEset", genomeName = genome.name, inputFiles = paste(origin, sample.labels, sep = "__"), inputFilesType = origin, sampleLabels = sample.labels)
-	myCAGEset@librarySizes <- as.integer(colSums(ctssTable[,4:ncol(ctssTable),drop=FALSE]))
-	myCAGEset@CTSScoordinates <- ctssTable[, c("chr", "pos", "strand")]
-	myCAGEset@tagCountMatrix <- ctssTable[,4:ncol(ctssTable),drop=FALSE]
-
-	return(myCAGEset)
-
+  } else if (origin == "FANTOM5") {
+    ce <- .importPublicData_F5 (dataset = dataset, group = group, sample = sample)
+  } else if (origin == "ZebrafishDevelopment") {
+    ce <- .importPublicData_ZF (group = group, sample = sample)
+  }
+  ce
 }
 
 .importPublicData_F5 <- function(dataset = c("human", "mouse"), group = NULL, sample = NULL) {
@@ -928,7 +917,7 @@ setGeneric("importPublicData",
     if (! requireNamespace("BSgenome.Mmusculus.UCSC.mm9"))
       stop ("This function requires the ", dQuote("BSgenome.Mmusculus.UCSC.mm9"), " package.")
     FANTOM5mouseSamples <- NULL
-    data("FANTOM5mouseSamples", envir = environment())
+    data("FANTOM5mouseSamples", package = "CAGEr", envir = environment())
     samples.info <- FANTOM5mouseSamples
     genome.name <- "BSgenome.Mmusculus.UCSC.mm9"
     genome.obj <- BSgenome.Mmusculus.UCSC.mm9::BSgenome.Mmusculus.UCSC.mm9

From dceee1efda78e5a0c5e8a3b10641303b11646581 Mon Sep 17 00:00:00 2001
From: Charles Plessy <charles.plessy@oist.jp>
Date: Tue, 27 Jun 2023 11:08:13 +0900
Subject: [PATCH 08/17] Validate sample names.

---
 R/ImportMethods.R | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/R/ImportMethods.R b/R/ImportMethods.R
index 60146f4..733e94e 100644
--- a/R/ImportMethods.R
+++ b/R/ImportMethods.R
@@ -922,6 +922,14 @@ setGeneric("importPublicData",
     genome.name <- "BSgenome.Mmusculus.UCSC.mm9"
     genome.obj <- BSgenome.Mmusculus.UCSC.mm9::BSgenome.Mmusculus.UCSC.mm9
   }
+  
+  validSampleNames <- samples.info$sample
+  if (is.null(sample)) sample <- validSampleNames
+  if ( ! all(sample %in% validSampleNames))
+    stop("At least one sample name is not valid. ",
+         "Call data('FANTOM5humanSamples', package='CAGEr') or ",
+         "data('FANTOM5mouseSamples', package='CAGEr') to check valid names.")
+  
   ce <- CAGEexp(genomeName = genome.name,
           inputFiles = samples.info[samples.info$sample %in% sample,"data_url"],
           inputFilesType = "bedScore",

From 56f1cd5f9ea5217d483fc48038eb664640085b21 Mon Sep 17 00:00:00 2001
From: Charles Plessy <charles.plessy@oist.jp>
Date: Tue, 27 Jun 2023 11:38:09 +0900
Subject: [PATCH 09/17] Factorise code out of .importPublicData_ZF

---
 R/ImportMethods.R | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/R/ImportMethods.R b/R/ImportMethods.R
index 733e94e..153f110 100644
--- a/R/ImportMethods.R
+++ b/R/ImportMethods.R
@@ -663,6 +663,18 @@ setGeneric("importPublicData",
                     sample)
              standardGeneric("importPublicData"))
 
+.df2SE <- function(df, sample, genome.name) {
+  ctssRanges <- CTSS(df$chr,
+                     df$pos,
+                     df$strand,
+                     seqinfo = seqinfo(CAGEr:::getRefGenome(genome.name)),
+                     bsgenomeName = genome.name)
+  ctssDF <- lapply(df[ , sample], Rle) |> DataFrame()
+  ctssSE <- SummarizedExperiment(c(counts = ctssDF), ctssRanges)
+  ctssSE <- ctssSE[rowSums(df[ , sample]) > 0,]
+  sort(ctssSE)
+}
+
 .importPublicData <- function(origin = c("FANTOM5", "FANTOM3and4", "ENCODE", "ZebrafishDevelopment"), dataset, group, sample) {
  # origin <- match.arg(origin)
 
@@ -958,15 +970,7 @@ setGeneric("importPublicData",
   ZebrafishCAGE <- NULL
   data("ZebrafishCAGE", package = "ZebrafishDevelopmentalCAGE", envir = environment())
   # The vignette of ZebrafishDevelopmentalCAGE states that the provided coordinates are 1-based.
-  ctssRanges <- CTSS(ZebrafishCAGE[["development"]]$chr,
-                     ZebrafishCAGE[["development"]]$pos,
-                     ZebrafishCAGE[["development"]]$strand,
-                     seqinfo = seqinfo(BSgenome.Drerio.UCSC.danRer7:::BSgenome.Drerio.UCSC.danRer7),
-                     bsgenomeName = "BSgenome.Drerio.UCSC.danRer7")
-  ctssDF <- lapply(ZebrafishCAGE[["development"]][ , sample], Rle) |> DataFrame()
-  ctssSE <- SummarizedExperiment(c(counts = ctssDF), ctssRanges)
-  ctssSE <- ctssSE[rowSums(ZebrafishCAGE[["development"]][ , sample]) > 0,]
-  ctssSE <- sort(ctssSE)
+  ctssSE <- .df2SE(ZebrafishCAGE$development, sample, "BSgenome.Drerio.UCSC.danRer7")
   ce <- CAGEexp(genomeName = "BSgenome.Drerio.UCSC.danRer7",
                 colData = DataFrame( sampleLabels = sample,
                                      inputFiles = NA,

From 6413eddb83fff8b908161e50f2c385e15daa19f8 Mon Sep 17 00:00:00 2001
From: Charles Plessy <charles.plessy@oist.jp>
Date: Tue, 27 Jun 2023 13:48:48 +0900
Subject: [PATCH 10/17] Draft import function for FANTOM 3/4

See also #78
---
 R/ImportMethods.R | 164 +++++++++++++++-------------------------------
 1 file changed, 53 insertions(+), 111 deletions(-)

diff --git a/R/ImportMethods.R b/R/ImportMethods.R
index 153f110..4504a52 100644
--- a/R/ImportMethods.R
+++ b/R/ImportMethods.R
@@ -670,6 +670,7 @@ setGeneric("importPublicData",
                      seqinfo = seqinfo(CAGEr:::getRefGenome(genome.name)),
                      bsgenomeName = genome.name)
   ctssDF <- lapply(df[ , sample], Rle) |> DataFrame()
+  colnames(ctssDF) <- make.names(colnames(ctssDF))
   ctssSE <- SummarizedExperiment(c(counts = ctssDF), ctssRanges)
   ctssSE <- ctssSE[rowSums(df[ , sample]) > 0,]
   sort(ctssSE)
@@ -796,117 +797,8 @@ setGeneric("importPublicData",
 		}
 
 
-	}else if(origin == "FANTOM3and4"){
-
-		if("FANTOM3and4CAGE" %in% rownames(installed.packages()) == FALSE){
-			stop("Requested CAGE data package is not installed! Please install and load the FANTOM3and4CAGE package available from Bioconductor.")
-		}else if(!("package:FANTOM3and4CAGE" %in% search())){
-			stop("Requested CAGE data package is not loaded! Please load the data package by calling 'library(FANTOM3and4CAGE)'")
-		}
-		FANTOMhumanSamples <- FANTOMmouseSamples <- NULL
-		data("FANTOMhumanSamples", envir = environment())
-		info.df1 <- FANTOMhumanSamples
-		data("FANTOMmouseSamples", envir = environment())
-		info.df2 <- FANTOMmouseSamples
-
-		if(!(all(dataset %in% info.df1$dataset) | all(dataset %in% info.df2$dataset))){
-			stop("Specified dataset(s) not found! Call data(FANTOMhumanSamples) and data(FANTOMmouseSamples) and check 'dataset' column for available ENCODE datasets!")
-		}
-		if(length(grep("human", dataset))>0){
-			genome.name <- "BSgenome.Hsapiens.UCSC.hg18"
-			info.df <- info.df1
-		}else if(length(grep("mouse", dataset))>0){
-			genome.name <- "BSgenome.Mmusculus.UCSC.mm9"
-			info.df <- info.df2
-		}
-
-		if(length(dataset) == 1){
-
-			if(!(all(group %in% info.df[info.df$dataset == dataset,"group"]))){
-				stop("Some of the provided groups cannot be found in the specified dataset!")
-			}
-			if(length(group) == 1){
-				if(!(all(sample %in% info.df[info.df$group == group,"sample"]))){
-					stop("Some of the provided samples cannot be found in the specified group!")
-				}
-			}else if(length(group) == length(sample)){
-				if(!all(sapply(c(1:length(group)), function(x) {sample[x] %in% info.df[info.df$group == group[x],"sample"]}))){
-					stop("Provided 'group' and 'sample' do not match! Some of the provided samples cannot be found in the corresponding groups!")
-				}
-			}else{
-				stop("Number of elements in the 'group' must be either 1 or must match the number of elements in the 'sample'!")
-			}
-
-
-		}else if(length(dataset) == length(group)){
-			if(!all(sapply(c(1:length(dataset)), function(x) {group[x] %in% info.df[info.df$dataset == dataset[x],"group"]}))){
-				stop("Provided 'dataset' and 'group' do not match! Some of the provided groups cannot be found in the corresponding datasets!")
-			}
-			if(length(group) == length(sample)){
-				if(!all(sapply(c(1:length(group)), function(x) {sample[x] %in% info.df[info.df$group == group[x],"sample"]}))){
-					stop("Provided 'group' and 'sample' do not match! Some of the provided samples cannot be found in the corresponding groups!")
-				}
-			}else{
-				stop("Number of elements in the 'group' must match the number of elements in the 'sample'!")
-			}
-
-
-		}else{
-			stop("Number of elements in the 'dataset' must be either 1 or must match the number of elements in the 'group' and in the 'sample'!")
-		}
-
-		data(list = dataset, envir = environment())
-
-		if(length(unique(dataset))>1){
-
-			for(i in 1:length(unique(dataset))){
-				dset <- get(unique(dataset)[i])
-				for(j in 1:length(unique(group[which(dataset == unique(dataset)[i])]))){
-					g <- unique(group[which(dataset == unique(dataset)[i])])[j]
-					ctss <- dset[[g]][, c("chr", "pos", "strand", sample[which(group == g)])]
-					discard <- apply(ctss[, c(4:ncol(ctss)), drop = F], 1, function(x) {sum(x > 0)}) >= 1
-					ctss <- data.table(ctss[discard,])
-					setnames(ctss, c("chr", "pos", "strand", paste(g, sample[which(group == g)], sep = "__")))
-					setkeyv(ctss, cols = c("chr", "pos", "strand"))
-					if(i == 1 & j == 1){
-						ctssTable <- ctss
-					}else{
-						ctssTable <- merge(ctssTable, ctss, all.x = T, all.y = T)
-					}
-				}
-			}
-
-			ctssTable[is.na(ctssTable)] <- 0
-
-		}else{
-
-			selected.dataset <- get(dataset)
-			if(length(unique(group))>1){
-
-				for(i in 1:length(unique(group))){
-					g <- unique(group)[i]
-					ctss <- selected.dataset[[g]][, c("chr", "pos", "strand", sample[which(group == g)])]
-					discard <- apply(ctss[, c(4:ncol(ctss)), drop = F], 1, function(x) {sum(x > 0)}) >= 1
-					ctss <- data.table(ctss[discard,])
-					setnames(ctss, c("chr", "pos", "strand", paste(g, sample[which(group == g)], sep = "__")))
-					setkeyv(ctss, cols = c("chr", "pos", "strand"))
-					if(i == 1){
-						ctssTable <- ctss
-					}else{
-						ctssTable <- merge(ctssTable, ctss, all.x = T, all.y = T)
-					}
-				}
-
-				ctssTable[is.na(ctssTable)] <- 0
-
-			}else{
-				ctssTable <- selected.dataset[[group]][,c("chr", "pos", "strand", sample)]
-				setnames(ctssTable, c("chr", "pos", "strand", paste(group, sample, sep = "__")))
-			}
-		}
-
-		ctssTable <- data.frame(ctssTable, stringsAsFactors = F, check.names = F)
-
+  } else if (origin == "FANTOM3and4") {
+    ce <- .importPublicData_F34 (dataset = dataset, group = group, sample = sample)
   } else if (origin == "FANTOM5") {
     ce <- .importPublicData_F5 (dataset = dataset, group = group, sample = sample)
   } else if (origin == "ZebrafishDevelopment") {
@@ -915,6 +807,56 @@ setGeneric("importPublicData",
   ce
 }
 
+.importPublicData_F34 <- function (dataset, group = NULL, sample = NULL) {
+  if (length(unique(dataset))>1) stop("merging datasets not supported yet.")
+  if (! requireNamespace("FANTOM3and4CAGE"))
+    stop ("This function requires the ", dQuote("FANTOM3and4CAGE"), " package.")
+  if (all(dataset %in% c("FANTOMtissueCAGEhuman"  ,   "FANTOMtimecourseCAGEhuman"))) {
+    if (! requireNamespace("BSgenome.Hsapiens.UCSC.hg18"))
+      stop ("This function requires the ", dQuote("BSgenome.Hsapiens.UCSC.hg18"), " package.")
+    FANTOMhumanSamples <- FANTOMtimecourseCAGEhuman <- FANTOMtissueCAGEhuman <- NULL
+      data("FANTOMhumanSamples",        package = "FANTOM3and4CAGE", envir = environment())
+    data("FANTOMtimecourseCAGEhuman", package = "FANTOM3and4CAGE", envir = environment())
+    data("FANTOMtissueCAGEhuman",     package = "FANTOM3and4CAGE", envir = environment())
+    samples.info <- FANTOMhumanSamples
+    genome.name <- "BSgenome.Hsapiens.UCSC.hg18"
+  } else if (all(dataset %in% c("FANTOMtissueCAGEmouse", "FANTOMtimecourseCAGEmouse"))) {
+    if (! requireNamespace("BSgenome.Mmusculus.UCSC.mm9"))
+      stop ("This function requires the ", dQuote("BSgenome.Mmusculus.UCSC.mm9"), " package.")
+    FANTOMmouseSamples <- NULL
+    data("FANTOMmouseSamples", package = "FANTOM3and4CAGE", envir = environment())
+    data("FANTOMtimecourseCAGEmouse", package = "FANTOM3and4CAGE", envir = environment())
+    data("FANTOMtissueCAGEmouse",     package = "FANTOM3and4CAGE", envir = environment())
+    samples.info <- FANTOMmouseSamples
+    genome.name <- "BSgenome.Mmusculus.UCSC.mm9"
+  } else {
+    stop("At least one dataset name is not valid")
+  }
+  
+  validSampleNames <- samples.info$sample
+  if (is.null(sample)) sample <- validSampleNames
+  if ( ! all(sample %in% validSampleNames))
+    stop("At least one sample name is not valid. ",
+         "Call data('FANTOMhumanSamples', package='FANTOM3and4CAGE') or ",
+         "data('FANTOMmouseSamples', package='FANTOM3and4CAGE') to check valid names.")
+  validGroupNames <- unique(samples.info$group)
+  if ( ! all(group %in% validGroupNames))
+    stop("At least one group name is not valid. ",
+         "Call data('FANTOMhumanSamples', package='FANTOM3and4CAGE') or ",
+         "data('FANTOMmouseSamples', package='FANTOM3and4CAGE') to check valid names.")
+  
+  df <- get(dataset)[[group]]
+  se <- .df2SE(df, sample, genome.name)
+  ce <- CAGEexp(genomeName = genome.name,
+                colData = DataFrame( sampleLabels = make.names(sample),
+                                     inputFiles = NA,
+                                     inputFilesType = 'ctss',
+                                     librarySizes = sapply(assay(se), sum),
+                                     row.names = make.names(sample)))
+  CTSStagCountSE(ce) <- se
+  ce
+}
+
 .importPublicData_F5 <- function(dataset = c("human", "mouse"), group = NULL, sample = NULL) {
   dataset <- match.arg(dataset)
   if (dataset == "human") {

From 794225c7ee64b7cf39293d50a1baf8d328a4a4b6 Mon Sep 17 00:00:00 2001
From: Charles Plessy <charles.plessy@oist.jp>
Date: Tue, 27 Jun 2023 15:28:52 +0900
Subject: [PATCH 11/17] Document import of CAGE resources to CAGEexp

---
 vignettes/CAGE_Resources.Rmd | 50 ++++++++++++++++++++----------------
 1 file changed, 28 insertions(+), 22 deletions(-)

diff --git a/vignettes/CAGE_Resources.Rmd b/vignettes/CAGE_Resources.Rmd
index 3f31812..f6ee747 100644
--- a/vignettes/CAGE_Resources.Rmd
+++ b/vignettes/CAGE_Resources.Rmd
@@ -28,9 +28,6 @@ set.seed(0xdada)
 Available CAGE data resources
 =============================
 
-Note: this still uses the `CAGEset` class.
-
-
 There are several large collections of CAGE data available that provide single
 base-pair resolution TSSs for numerous human and mouse primary cells, cell lines
 and tissues. Together with several minor datasets for other model organisms
@@ -55,12 +52,12 @@ each TSS detected by CAGE. The list of all available samples for both human and
 mouse (as presented in the Supplementary Table 1 of the publication) has been
 included in _CAGEr_ to facilitate browsing, searching and selecting samples
 of interest. TSSs for selected samples are then fetched directly form the web
-resource and imported into a `CAGEset` object enabling their further
+resource and imported into a `CAGEexp` object enabling their further
 manipulation with _CAGEr_.
 
 ## FANTOM3 and 4
 
-Previous FANTOM projects (3 and 4) [@Consortium:2005kp,@Faulkner:2009fw,@Suzuki:2009gy])
+Previous FANTOM projects (3 and 4) [@Consortium:2005kp] [@Faulkner:2009fw] [@Suzuki:2009gy]
 produced CAGE datasets for multiple human and mouse tissues as well as several
 timecourses, including differentiation of a THP-1 human myeloid leukemia cell
 line. All this TSS data has been grouped into datasets by the organism and
@@ -68,7 +65,7 @@ tissue of origin and has been collected into an R data package named
 _FANTOM3and4CAGE_, which is available from Bioconductor <https://bioconductor.org/packages/FANTOM3and4CAGE>.
 The vignette accompanying the package provides information on available datasets
 and lists of samples. When the data package is installed, _CAGEr_ can import the
-TSSs for selected samples directly into a `CAGEset` object for further
+TSSs for selected samples directly into a `CAGEexp` object for further
 manipulation.
 
 ## ENCODE cell lines
@@ -100,14 +97,12 @@ data has been collected into an R data package named _ZebrafishDevelopmentalCAGE
 which is available for download from CAGEr web site at <http://promshift.genereg.net/CAGEr>.
 As with other data packages mentioned above, once the package is installed
 _CAGEr_ can use it to import stage-specific single base pair TSSs into a
-`CAGEset` object.
+`CAGEexp` object.
 
 Importing public TSS data for manipulation in _CAGEr_
 =====================================================
 
-Note: this still uses the `CAGEset` class.
-
-The data from above mentioned resources can be imported into a `CAGEset` object
+The data from above mentioned resources can be imported into a `CAGEexp` object
 using the `importPublicData()` function.  It function has four arguments: `source`,
 `dataset`, `group` and `sample`. Argument `source` accepts one of the following
 values: `"FANTOM5"`, `"FANTOM3and4"`, `"ENCODE"`, or `"ZebrafishDevelopment"`,
@@ -146,8 +141,8 @@ Provided information facilitates searching for samples of interest, _e.g._ we
 can search for astrocyte samples:
 
 ```{r}
-astrocyteSamples <- FANTOM5humanSamples[grep("Astrocyte", 
-				FANTOM5humanSamples[,"description"]),]
+astrocyteSamples <-
+  FANTOM5humanSamples[grep("Astrocyte", FANTOM5humanSamples[,"description"]),]
 astrocyteSamples
 ```
 
@@ -170,11 +165,11 @@ astrocyteSamples[,"sample"]
 and to import first three samples type:
 
 ```
-astrocyteCAGEset <- importPublicData(source = "FANTOM5", dataset = "human", 
+astrocyteCAGEexp <- importPublicData(origin = "FANTOM5", dataset = "human", 
 					sample = astrocyteSamples[1:3,"sample"])
 ```
 
-The resulting `astrocyteCAGEset` is a `CAGEset` object that can be included in
+The resulting `astrocyteCAGEexp` is a `CAGEexp` object that can be included in
 the _CAGEr_ workflow described above to perform normalisation, clustering,
 visualisation, etc.
 
@@ -205,22 +200,27 @@ values that should be passed to corresponding arguments in `importPublicData()`
 function. For example to import human kidney normal and malignancy samples call:
 
 ```
-kidneyCAGEset <- importPublicData(source = "FANTOM3and4",
+kidneyCAGEexp <- importPublicData(origin = "FANTOM3and4",
 			dataset = "FANTOMtissueCAGEhuman", 
 			group = "kidney", sample = c("kidney", "malignancy"))
 ```
 
-When the samples belong to different groups or different datasets, it is necessary to provide the dataset and group assignment for each sample separately:
+When the samples belong to different groups or different datasets, it is
+necessary to provide the dataset and group assignment for each sample separately:
+
+Note: this functionality is disable for the moment.  Please open an issue if you
+need it.
 
 ```
-mixedCAGEset <- importPublicData(source = "FANTOM3and4",
+mixedCAGEexp <- importPublicData(origin = "FANTOM3and4",
 		dataset = c("FANTOMtissueCAGEmouse", "FANTOMtissueCAGEmouse", 
 		"FANTOMtimecourseCAGEmouse"), group = c("liver", "liver", 
 		"liver_under_constant_darkness"), 
 		sample = c("cloned_mouse", "control_mouse", "4_hr"))
 ```
 
-For more details about datasets available in the \emph{FANTOM3and4CAGE} data package please refer to the vignette accompanying the package.
+For more details about datasets available in the _FANTOM3and4CAGE_ data package
+please refer to the vignette accompanying the package.
 
 ## _ENCODEprojectCAGE_ data package
 
@@ -236,10 +236,16 @@ library(ENCODEprojectCAGE)
 data(ENCODEhumanCellLinesSamples)
 ```
 
-The information provided in this data frame is analogous to the one in previously discussed data package and provides values to be used with `importPublicData()` function. The command to import whole cell CAGE samples for three different cell lines would look like this:
+The information provided in this data frame is analogous to the one in
+previously discussed data package and provides values to be used with
+`importPublicData()` function. The command to import whole cell CAGE samples for
+three different cell lines would look like this:
+
+Note: this functionality is disable for the moment.  Please open an issue if you
+need it.
 
 ```
-ENCODEset <- importPublicData(source = "ENCODE", 
+ENCODEset <- importPublicData(origin = "ENCODE", 
 	dataset = c("A549", "H1-hESC", "IMR90"), 
 	group = c("cell", "cell", "cell"), sample = c("A549_cell_rep1", 
 	"H1-hESC_cell_rep1", "IMR90_cell_rep1"))
@@ -266,7 +272,7 @@ belong to the same group called `development`. To import selected samples from
 this dataset type:
 
 ```
-zebrafishCAGEset <- importPublicData(source = "ZebrafishDevelopment", 
+zebrafishCAGEexp <- importPublicData(origin = "ZebrafishDevelopment", 
 			dataset = "ZebrafishCAGE", group = "development", 
 			sample = c("zf_64cells", "zf_prim6"))
 ```
@@ -275,7 +281,7 @@ For more details please refer to the vignette accompanying the data package.
 
 
 Importing TSS data from any of the four above explained resources results in the
-`CAGEset` object that can be directly included into the workflow provided by
+`CAGEexp` object that can be directly included into the workflow provided by
 _CAGEr_ to perform normalisation, clustering, promoter width analysis,
 visualisation, _etc_. This high-resolution TSS data can then easily be
 integrated with other genomic data types to perform various promoter-centred

From 3edefb78589c2d81e7ea3ac33b68de747dc89157 Mon Sep 17 00:00:00 2001
From: Charles Plessy <charles.plessy@oist.jp>
Date: Tue, 27 Jun 2023 15:29:42 +0900
Subject: [PATCH 12/17] Clean up :: and ::: calls.

---
 R/ImportMethods.R | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/R/ImportMethods.R b/R/ImportMethods.R
index 4504a52..ca5f340 100644
--- a/R/ImportMethods.R
+++ b/R/ImportMethods.R
@@ -667,7 +667,7 @@ setGeneric("importPublicData",
   ctssRanges <- CTSS(df$chr,
                      df$pos,
                      df$strand,
-                     seqinfo = seqinfo(CAGEr:::getRefGenome(genome.name)),
+                     seqinfo = seqinfo(getRefGenome(genome.name)),
                      bsgenomeName = genome.name)
   ctssDF <- lapply(df[ , sample], Rle) |> DataFrame()
   colnames(ctssDF) <- make.names(colnames(ctssDF))
@@ -866,7 +866,6 @@ setGeneric("importPublicData",
     data("FANTOM5humanSamples", package = "CAGEr", envir = environment())
     samples.info <- FANTOM5humanSamples
     genome.name <- "BSgenome.Hsapiens.UCSC.hg19"
-    genome.obj <- BSgenome.Hsapiens.UCSC.hg19::BSgenome.Hsapiens.UCSC.hg19
   } else {
     if (! requireNamespace("BSgenome.Mmusculus.UCSC.mm9"))
       stop ("This function requires the ", dQuote("BSgenome.Mmusculus.UCSC.mm9"), " package.")
@@ -874,7 +873,6 @@ setGeneric("importPublicData",
     data("FANTOM5mouseSamples", package = "CAGEr", envir = environment())
     samples.info <- FANTOM5mouseSamples
     genome.name <- "BSgenome.Mmusculus.UCSC.mm9"
-    genome.obj <- BSgenome.Mmusculus.UCSC.mm9::BSgenome.Mmusculus.UCSC.mm9
   }
   
   validSampleNames <- samples.info$sample

From dbcab6c2989dc6547cc8a89402e50cb36177c26b Mon Sep 17 00:00:00 2001
From: Charles Plessy <charles.plessy@oist.jp>
Date: Tue, 27 Jun 2023 15:30:07 +0900
Subject: [PATCH 13/17] Enforce sanitised names.

---
 R/ImportMethods.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/ImportMethods.R b/R/ImportMethods.R
index ca5f340..be466a3 100644
--- a/R/ImportMethods.R
+++ b/R/ImportMethods.R
@@ -885,7 +885,7 @@ setGeneric("importPublicData",
   ce <- CAGEexp(genomeName = genome.name,
           inputFiles = samples.info[samples.info$sample %in% sample,"data_url"],
           inputFilesType = "bedScore",
-          sampleLabels = sample)
+          sampleLabels = make.names(sample))
   getCTSS(ce)
 }
 

From 35fa520283792cdc3bd1875a5feb496abbbe0e79 Mon Sep 17 00:00:00 2001
From: Charles Plessy <charles.plessy@oist.jp>
Date: Tue, 27 Jun 2023 15:30:27 +0900
Subject: [PATCH 14/17] Limited ENCODE import (no merging).

---
 R/ImportMethods.R | 164 +++++++++++++---------------------------------
 1 file changed, 44 insertions(+), 120 deletions(-)

diff --git a/R/ImportMethods.R b/R/ImportMethods.R
index be466a3..e88f3a1 100644
--- a/R/ImportMethods.R
+++ b/R/ImportMethods.R
@@ -678,126 +678,9 @@ setGeneric("importPublicData",
 
 .importPublicData <- function(origin = c("FANTOM5", "FANTOM3and4", "ENCODE", "ZebrafishDevelopment"), dataset, group, sample) {
  # origin <- match.arg(origin)
-
-	if(origin == "ENCODE"){
-
-		if("ENCODEprojectCAGE" %in% rownames(installed.packages()) == FALSE){
-			stop("Requested CAGE data package is not installed! Please install and load the ENCODEprojectCAGE package, which is available for download from http://promshift.genereg.net/CAGEr/PackageSource/.")
-		}else if(!("package:ENCODEprojectCAGE" %in% search())){
-			stop("Requested CAGE data package is not loaded! Please load the data package by calling 'library(ENCODEprojectCAGE)'")
-		}
-
-		if(dataset[1] == "ENCODEtissueCAGEfly"){
-			genome.name <- "BSgenome.Dmelanogaster.UCSC.dm3"
-			ENCODEtissueCAGEfly <- NULL
-			data("ENCODEtissueCAGEfly", envir = environment())
-			if(group == "embryo"){
-				if(sample == "mixed_embryos_0-24hr"){
-
-					ctssTable <- ENCODEtissueCAGEfly[["embryo"]]
-
-				}else{
-					stop("Specified sample not valid! The dataset 'ENCODEtissueCAGEfly' containes only one sample named 'mixed_embryos_0-24hr'!")
-				}
-			}else{
-				stop("Specified group not valid! The dataset 'ENCODEtissueCAGEfly' containes only one group named 'embryo'!")
-			}
-
-		}else{
-			genome.name <- "BSgenome.Hsapiens.UCSC.hg19"
-			ENCODEhumanCellLinesSamples <- NULL
-			data("ENCODEhumanCellLinesSamples", envir = environment())
-			info.df <- ENCODEhumanCellLinesSamples
-
-			if(!(all(dataset %in% info.df$dataset))){
-				stop("Specified dataset(s) not found! Call data(ENCODEhumanCellLinesSamples) and check 'dataset' column for available ENCODE datasets!")
-			}
-			if(length(dataset) == 1){
-
-				if(!(all(group %in% info.df[info.df$dataset == dataset,"group"]))){
-					stop("Some of the provided groups cannot be found in the specified dataset!")
-				}
-				if(length(group) == 1){
-					if(!(all(sample %in% info.df[info.df$group == group,"sample"]))){
-						stop("Some of the provided samples cannot be found in the specified group!")
-					}
-				}else if(length(group) == length(sample)){
-					if(!all(sapply(c(1:length(group)), function(x) {sample[x] %in% info.df[info.df$group == group[x],"sample"]}))){
-						stop("Provided 'group' and 'sample' do not match! Some of the provided samples cannot be found in the corresponding groups!")
-					}
-				}else{
-					stop("Number of elements in the 'group' must be either 1 or must match the number of elements in the 'sample'!")
-				}
-
-
-			}else if(length(dataset) == length(group)){
-				if(!all(sapply(c(1:length(dataset)), function(x) {group[x] %in% info.df[info.df$dataset == dataset[x],"group"]}))){
-					stop("Provided 'dataset' and 'group' do not match! Some of the provided groups cannot be found in the corresponding datasets!")
-				}
-				if(length(group) == length(sample)){
-					if(!all(sapply(c(1:length(group)), function(x) {sample[x] %in% info.df[info.df$group == group[x],"sample"]}))){
-						stop("Provided 'group' and 'sample' do not match! Some of the provided samples cannot be found in the corresponding groups!")
-					}
-				}else{
-					stop("Number of elements in the 'group' must match the number of elements in the 'sample'!")
-				}
-
-
-			}else{
-				stop("Number of elements in the 'dataset' must be either 1 or must match the number of elements in the 'group' and in the 'sample'!")
-			}
-
-			data(list = dataset, envir = environment())
-
-			if(length(unique(dataset))>1){
-
-				for(i in 1:length(unique(dataset))){
-					dset <- get(unique(dataset)[i])
-					for(j in 1:length(unique(group[which(dataset == unique(dataset)[i])]))){
-						g <- unique(group[which(dataset == unique(dataset)[i])])[j]
-						ctss <- dset[[g]][, c("chr", "pos", "strand", sample[which((group == g) & (dataset == unique(dataset)[i]))])]
-						discard <- apply(ctss[, c(4:ncol(ctss)), drop = F], 1, function(x) {sum(x > 0)}) >= 1
-						ctss <- data.table(ctss[discard,])
-						setkeyv(ctss, cols = c("chr", "pos", "strand"))
-						if(i == 1 & j == 1){
-							ctssTable <- ctss
-						}else{
-							ctssTable <- merge(ctssTable, ctss, all.x = T, all.y = T)
-						}
-					}
-				}
-				ctssTable[is.na(ctssTable)] <- 0
-
-			}else{
-
-				selected.dataset <- get(dataset)
-				if(length(unique(group))>1){
-
-					for(i in 1:length(unique(group))){
-						g <- unique(group)[i]
-						ctss <- selected.dataset[[g]][, c("chr", "pos", "strand", sample[which(group == g)])]
-						discard <- apply(ctss[, c(4:ncol(ctss)), drop = F], 1, function(x) {sum(x > 0)}) >= 1
-						ctss <- data.table(ctss[discard,])
-						setkeyv(ctss, cols = c("chr", "pos", "strand"))
-						if(i == 1){
-							ctssTable <- ctss
-						}else{
-							ctssTable <- merge(ctssTable, ctss, all.x = T, all.y = T)
-						}
-					}
-
-					ctssTable[is.na(ctssTable)] <- 0
-
-				}else{
-					ctssTable <- selected.dataset[[group]][,c("chr", "pos", "strand", sample)]
-				}
-			}
-
-			ctssTable <- data.frame(ctssTable, stringsAsFactors = F, check.names = F)
-		}
-
-
-  } else if (origin == "FANTOM3and4") {
+	if (origin == "ENCODE") {
+	  ce <- .importPublicData_ENCODE (dataset = dataset, group = group, sample = sample)
+	} else if (origin == "FANTOM3and4") {
     ce <- .importPublicData_F34 (dataset = dataset, group = group, sample = sample)
   } else if (origin == "FANTOM5") {
     ce <- .importPublicData_F5 (dataset = dataset, group = group, sample = sample)
@@ -807,6 +690,47 @@ setGeneric("importPublicData",
   ce
 }
 
+.importPublicData_ENCODE <- function (dataset, group = NULL, sample = NULL) {
+  if (length(unique(dataset))>1) stop ("Merging datasets not supported yet.")
+  if (! requireNamespace("ENCODEprojectCAGE"))
+    stop ("This function requires the ", dQuote("ENCODEprojectCAGE"), " package.",
+          " package; please install it from http://promshift.genereg.net/CAGEr/PackageSource/.")
+  if (dataset == "ENCODEtissueCAGEfly") {
+    if (group != "embryo") stop("Only 'embryo' is allowed as group for dataset 'ENCODEtissueCAGEfly'.")
+    if (sample != "mixed_embryos_0-24hr") stop("Only 'mixed_embryos_0-24hr' is allowed as sample for dataset 'ENCODEtissueCAGEfly'.")
+    if (! requireNamespace("BSgenome.Dmelanogaster.UCSC.dm3"))
+      stop ("This function requires the ", dQuote("BSgenome.Dmelanogaster.UCSC.dm3"), " package.")
+    genome.name <- "BSgenome.Dmelanogaster.UCSC.dm3"
+    ENCODEtissueCAGEfly <- NULL
+    data("ENCODEtissueCAGEfly", package = "ENCODEprojectCAGE", envir = environment())
+    df <- ENCODEtissueCAGEfly$embryo
+  } else {
+    if (! requireNamespace("BSgenome.Hsapiens.UCSC.hg19"))
+      stop ("This function requires the ", dQuote("BSgenome.Hsapiens.UCSC.hg19"), " package.")
+    genome.name <- "BSgenome.Hsapiens.UCSC.hg19"
+    ENCODEhumanCellLinesSamples <- NULL
+    data("ENCODEhumanCellLinesSamples", package = "ENCODEprojectCAGE", envir = environment())
+    info.df <- ENCODEhumanCellLinesSamples
+    if(!(all(dataset %in% info.df$dataset)))
+      stop("Specified dataset(s) not found! Call data(ENCODEhumanCellLinesSamples) and check 'dataset' column for available ENCODE datasets!")
+    if(!(all(group %in% info.df[info.df$dataset == dataset,"group"])))
+      stop("Some of the provided groups cannot be found in the specified dataset!")
+    if(!(all(sample %in% info.df[,"sample"])))
+      stop("Some of the provided samples cannot be found!")
+    data(list= dataset, package = "ENCODEprojectCAGE", envir = environment())
+    df <- get(dataset)[[group]]
+  }
+  se <- .df2SE(df, sample, genome.name)
+  ce <- CAGEexp(genomeName = genome.name,
+                colData = DataFrame( sampleLabels = make.names(sample),
+                                     inputFiles = NA,
+                                     inputFilesType = 'ctss',
+                                     librarySizes = sapply(assay(se), sum),
+                                     row.names = make.names(sample)))
+  CTSStagCountSE(ce) <- se
+  ce
+} 
+
 .importPublicData_F34 <- function (dataset, group = NULL, sample = NULL) {
   if (length(unique(dataset))>1) stop("merging datasets not supported yet.")
   if (! requireNamespace("FANTOM3and4CAGE"))

From cfb88eb918465466784f8455dfb75790f4585103 Mon Sep 17 00:00:00 2001
From: Charles Plessy <charles.plessy@oist.jp>
Date: Tue, 27 Jun 2023 18:33:23 +0900
Subject: [PATCH 15/17] Restore the importPublicData function.

See: #78
---
 NAMESPACE | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/NAMESPACE b/NAMESPACE
index acb0ee7..3070776 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -45,6 +45,7 @@ export(getExpressionProfiles)
 export(getShiftingPromoters)
 export(hanabi)
 export(hanabiPlot)
+export(importPublicData)
 export(inputFiles)
 export(inputFilesType)
 export(librarySizes)
@@ -191,6 +192,7 @@ importFrom(stats,median)
 importFrom(stats,p.adjust)
 importFrom(stringdist,stringdist)
 importFrom(stringi,stri_sub)
+importFrom(utils,data)
 importFrom(utils,head)
 importFrom(utils,installed.packages)
 importFrom(utils,read.table)

From 8b70db0948b2b0d100efcd6fb187fc2b7b07232e Mon Sep 17 00:00:00 2001
From: Charles Plessy <charles.plessy@oist.jp>
Date: Wed, 28 Jun 2023 08:55:56 +0900
Subject: [PATCH 16/17] Restore documentation (removing some redundancy in the
 text).

---
 R/ImportMethods.R          | 131 ++++++++++++++++++++++++++++++-
 man/FANTOM5humanSamples.Rd |   3 +-
 man/FANTOM5mouseSamples.Rd |   3 +-
 man/importPublicData.Rd    | 154 +++++++++++++++++++++++++++++++++++++
 4 files changed, 288 insertions(+), 3 deletions(-)
 create mode 100644 man/importPublicData.Rd

diff --git a/R/ImportMethods.R b/R/ImportMethods.R
index e88f3a1..4bd2824 100644
--- a/R/ImportMethods.R
+++ b/R/ImportMethods.R
@@ -652,7 +652,134 @@ setMethod( "getCTSS", "CAGEexp"
 
 
 #' importPublicData
-#' @noRd
+#' 
+#' Imports CAGE data from different sources into a [`CAGEexp`] object.  After
+#' the object has been created the data can be further manipulated and
+#' visualized using other functions available in the _CAGEr_ package and
+#' integrated with other analyses in R.  Available resources include:
+#' 
+#'   - FANTOM5 datasets (Forrest _et al.,_ Nature 2014) for numerous human and
+#'   mouse samples (primary cells, cell lines and tissues), which are fetched
+#'   directly from FANTOM5 online resource at
+#'   \href{https://fantom.gsc.riken.jp/5/data}{https://fantom.gsc.riken.jp/5/data}.
+#'   
+#'   - FANTOM3 and 4 datasets (Carninci _et al., _ Science 2005, Faulkner
+#'   _et al.,_ Nature Genetics 2009, Suzuki _et al._ Nature Genetics 2009) from
+#'   _FANTOM3and4CAGE_ data package available from Bioconductor.
+#'   
+#'   - ENCODE datasets (Djebali _et al._ Nature 2012) for numerous human cell
+#'   lines from _ENCODEprojectCAGE_ data package, which is available for
+#'   download from \href{http://promshift.genereg.net/CAGEr/}{http://promshift.genereg.net/CAGEr/}.
+#'   
+#'   - Zebrafish (_Danio rerio_) developmental timecourse datasets (Nepal _et al._ Genome
+#'   Research 2013) from \emph{ZebrafishDevelopmentalCAGE} data package, which
+#'   is available for download from
+#'   \href{http://promshift.genereg.net/CAGEr/}{http://promshift.genereg.net/CAGEr/}.
+#' 
+#' @param origin Character vector specifying one of the available resources for
+#'        CAGE data (`"FANTOM5"`, `"FANTOM3and4"`, `"ENCODE"` or `"ZebrafishDevelopment"`).
+#'        
+#' @param dataset Character vector specifying one or more of the datasets
+#'        available in the selected resource. For FANTOM5 it can be either
+#'        `"human"` or  `"mouse"`, and only one of them can be specified at a
+#'        time. For other resources please refer to the vignette of the
+#'        corresponding data package for the list of available datasets.
+#'        Multiple datasets mapped to the same genome can be specified to
+#'        combine selected samples from each.
+#'        
+#' @param group Character string specifying one or more groups within specified
+#'        dataset(s), from which the samples should be selected.  The `group`
+#'        argument is used only when importing TSSs from data packages and
+#'        ignored for "FANTOM5".  For available groups in each dataset please
+#'        refer to the vignette of the corresponding data package. Either only
+#'        one group has to be specified (if all selected samples belong to the
+#'        same group) or one group per sample (if samples belong to different
+#'        groups).  In the latter case, the number of elements in `group` must
+#'        match the number of elements in `sample`.
+#'        
+#' @param sample Character string specifying one or more CAGE samples.  Check
+#'        the corresponding data package for available samples within each group
+#'        and their labels. For FANTOM5 resource, list of all human (~1000) and
+#'        mouse (~) samples can be obtained in _CAGEr_ by loading
+#'        `data(FANTOM5humanSamples)` and `data(FANTOM5mouseSamples)`,
+#'        respectively.  Use the names from the \code{sample} column to specify
+#'        which samples should be imported.
+#' 
+#' @return A [`CAGEexp`] object is returned, containing information on library
+#' size, CTSS coordinates and tag count matrix.  The object is ready for _CAGEr_
+#' analysis (normalisation, tag clustering, …).
+#' 
+#' @references 
+#' 
+#'  - Carninci _et al.,_ (2005). _The Transcriptional Landscape of the Mammalian
+#'  Genome_. Science **309**(5740):1559-1563.
+#'  
+#'  - Djebali _et al.,_ (2012). _Landscape of transcription in human cells._
+#'  Nature **488**(7414):101-108.
+#'  
+#'  - Faulkner _et al.,_ (2009). _The regulated retrotransposon transcriptome of
+#'  mammalian cells._, Nature Genetics **41**:563-571.
+#'  
+#'  - Forrest _et al.,_ (2014). _A promoter-level mammalian expression atlas._
+#'  Nature **507**(7493):462-470.
+#'  
+#'  - Nepal _et al.,_ (2013). _Dynamic regulation of the transcription
+#'  initiation landscape at single nucleotide resolution during vertebrate
+#'  embryogenesis_. Genome Research **23**(11):1938-1950.
+#'  
+#'  - Suzuki_et al.,_ (2009). The transcriptional network that controls growth
+#'  arrest and differentiation in a human myeloid leukemia cell line_. Nature
+#'  Genetics **41**:553-562.
+#'  
+#' @author Vanja Haberle
+#' @author Charles Plessy
+#' 
+#' @family FANTOM data
+#' 
+#' @examples 
+#' \dontrun{
+#' ### importing FANTOM5 data
+#' 
+#' # list of FANTOM5 human tissue samples
+#' 
+#' data(FANTOM5humanSamples)
+#' head(subset(FANTOM5humanSamples, type == "tissue"))
+#' 
+#' # import selected samples
+#' f5 <- importPublicData(
+#'   origin="FANTOM5", dataset = "human",
+#'   sample = c("adipose_tissue__adult__pool1", "adrenal_gland__adult__pool1",
+#'              "aorta__adult__pool1"))
+#' 
+#' CTSScoordinatesGR(f5)
+#' 
+#' ### importing FANTOM3/4 data from a data package
+#' 
+#' library(FANTOM3and4CAGE)
+#' 
+#' # list of mouse datasets available in this package
+#' 
+#' data(FANTOMmouseSamples)
+#' unique(FANTOMmouseSamples$dataset)
+#' head(subset(FANTOMmouseSamples, dataset == "FANTOMtissueCAGEmouse"))
+#' head(subset(FANTOMmouseSamples, dataset == "FANTOMtimecourseCAGEmouse"))
+#' 
+#' # import selected samples from two different mouse datasets
+#' 
+#' f34 <- importPublicData(
+#'   origin="FANTOM3and4", dataset = c("FANTOMtissueCAGEmouse", "FANTOMtimecourseCAGEmouse"),
+#'   group = c("brain", "adipogenic_induction"),
+#'   sample = c("CCL-131_Neuro-2a_treatment_for_6hr_with_MPP+", "DFAT-D1_preadipocytes_2days"))
+#' 
+#' f34 <- importPublicData(
+#'   origin="FANTOM3and4", dataset = c("FANTOMtissueCAGEmouse"),
+#'   group = c("brain"),
+#'   sample = c("CCL-131_Neuro-2a_treatment_for_6hr_with_MPP+"))
+#' 
+#' CTSScoordinatesGR(f34)
+#' 
+#' }
+#' 
 #' @importFrom utils data
 #' @export
 
@@ -845,5 +972,7 @@ setGeneric("importPublicData",
   ce
 }
 
+#' @rdname importPublicData
+
 setMethod("importPublicData", signature(origin = "character", dataset = "character", sample = "character"),
           .importPublicData)
\ No newline at end of file
diff --git a/man/FANTOM5humanSamples.Rd b/man/FANTOM5humanSamples.Rd
index 4840553..f29bedc 100644
--- a/man/FANTOM5humanSamples.Rd
+++ b/man/FANTOM5humanSamples.Rd
@@ -18,7 +18,8 @@ libraries.  Its use is described in more details in the vignette
 }
 \seealso{
 Other FANTOM data: 
-\code{\link{FANTOM5mouseSamples}}
+\code{\link{FANTOM5mouseSamples}},
+\code{\link{importPublicData}()}
 }
 \concept{FANTOM data}
 \keyword{datasets}
diff --git a/man/FANTOM5mouseSamples.Rd b/man/FANTOM5mouseSamples.Rd
index 179cd51..011f352 100644
--- a/man/FANTOM5mouseSamples.Rd
+++ b/man/FANTOM5mouseSamples.Rd
@@ -18,7 +18,8 @@ libraries.  Its use is described in more details in the vignette
 }
 \seealso{
 Other FANTOM data: 
-\code{\link{FANTOM5humanSamples}}
+\code{\link{FANTOM5humanSamples}},
+\code{\link{importPublicData}()}
 }
 \concept{FANTOM data}
 \keyword{datasets}
diff --git a/man/importPublicData.Rd b/man/importPublicData.Rd
new file mode 100644
index 0000000..5e2cb52
--- /dev/null
+++ b/man/importPublicData.Rd
@@ -0,0 +1,154 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ImportMethods.R
+\name{importPublicData}
+\alias{importPublicData}
+\alias{importPublicData,character,character,ANY,character-method}
+\title{importPublicData}
+\usage{
+importPublicData(
+  origin = c("FANTOM5", "FANTOM3and4", "ENCODE", "ZebrafishDevelopment"),
+  dataset,
+  group,
+  sample
+)
+
+\S4method{importPublicData}{character,character,ANY,character}(
+  origin = c("FANTOM5", "FANTOM3and4", "ENCODE", "ZebrafishDevelopment"),
+  dataset,
+  group,
+  sample
+)
+}
+\arguments{
+\item{origin}{Character vector specifying one of the available resources for
+CAGE data (\code{"FANTOM5"}, \code{"FANTOM3and4"}, \code{"ENCODE"} or \code{"ZebrafishDevelopment"}).}
+
+\item{dataset}{Character vector specifying one or more of the datasets
+available in the selected resource. For FANTOM5 it can be either
+\code{"human"} or  \code{"mouse"}, and only one of them can be specified at a
+time. For other resources please refer to the vignette of the
+corresponding data package for the list of available datasets.
+Multiple datasets mapped to the same genome can be specified to
+combine selected samples from each.}
+
+\item{group}{Character string specifying one or more groups within specified
+dataset(s), from which the samples should be selected.  The \code{group}
+argument is used only when importing TSSs from data packages and
+ignored for "FANTOM5".  For available groups in each dataset please
+refer to the vignette of the corresponding data package. Either only
+one group has to be specified (if all selected samples belong to the
+same group) or one group per sample (if samples belong to different
+groups).  In the latter case, the number of elements in \code{group} must
+match the number of elements in \code{sample}.}
+
+\item{sample}{Character string specifying one or more CAGE samples.  Check
+the corresponding data package for available samples within each group
+and their labels. For FANTOM5 resource, list of all human (~1000) and
+mouse (~) samples can be obtained in \emph{CAGEr} by loading
+\code{data(FANTOM5humanSamples)} and \code{data(FANTOM5mouseSamples)},
+respectively.  Use the names from the \code{sample} column to specify
+which samples should be imported.}
+}
+\value{
+A \code{\link{CAGEexp}} object is returned, containing information on library
+size, CTSS coordinates and tag count matrix.  The object is ready for \emph{CAGEr}
+analysis (normalisation, tag clustering, …).
+}
+\description{
+Imports CAGE data from different sources into a \code{\link{CAGEexp}} object.  After
+the object has been created the data can be further manipulated and
+visualized using other functions available in the \emph{CAGEr} package and
+integrated with other analyses in R.  Available resources include:
+}
+\details{
+\itemize{
+\item FANTOM5 datasets (Forrest \emph{et al.,} Nature 2014) for numerous human and
+mouse samples (primary cells, cell lines and tissues), which are fetched
+directly from FANTOM5 online resource at
+\href{https://fantom.gsc.riken.jp/5/data}{https://fantom.gsc.riken.jp/5/data}.
+\item FANTOM3 and 4 datasets (Carninci _et al., _ Science 2005, Faulkner
+\emph{et al.,} Nature Genetics 2009, Suzuki \emph{et al.} Nature Genetics 2009) from
+\emph{FANTOM3and4CAGE} data package available from Bioconductor.
+\item ENCODE datasets (Djebali \emph{et al.} Nature 2012) for numerous human cell
+lines from \emph{ENCODEprojectCAGE} data package, which is available for
+download from \href{http://promshift.genereg.net/CAGEr/}{http://promshift.genereg.net/CAGEr/}.
+\item Zebrafish (\emph{Danio rerio}) developmental timecourse datasets (Nepal \emph{et al.} Genome
+Research 2013) from \emph{ZebrafishDevelopmentalCAGE} data package, which
+is available for download from
+\href{http://promshift.genereg.net/CAGEr/}{http://promshift.genereg.net/CAGEr/}.
+}
+}
+\examples{
+\dontrun{
+### importing FANTOM5 data
+
+# list of FANTOM5 human tissue samples
+
+data(FANTOM5humanSamples)
+head(subset(FANTOM5humanSamples, type == "tissue"))
+
+# import selected samples
+f5 <- importPublicData(
+  origin="FANTOM5", dataset = "human",
+  sample = c("adipose_tissue__adult__pool1", "adrenal_gland__adult__pool1",
+             "aorta__adult__pool1"))
+
+CTSScoordinatesGR(f5)
+
+### importing FANTOM3/4 data from a data package
+
+library(FANTOM3and4CAGE)
+
+# list of mouse datasets available in this package
+
+data(FANTOMmouseSamples)
+unique(FANTOMmouseSamples$dataset)
+head(subset(FANTOMmouseSamples, dataset == "FANTOMtissueCAGEmouse"))
+head(subset(FANTOMmouseSamples, dataset == "FANTOMtimecourseCAGEmouse"))
+
+# import selected samples from two different mouse datasets
+
+f34 <- importPublicData(
+  origin="FANTOM3and4", dataset = c("FANTOMtissueCAGEmouse", "FANTOMtimecourseCAGEmouse"),
+  group = c("brain", "adipogenic_induction"),
+  sample = c("CCL-131_Neuro-2a_treatment_for_6hr_with_MPP+", "DFAT-D1_preadipocytes_2days"))
+
+f34 <- importPublicData(
+  origin="FANTOM3and4", dataset = c("FANTOMtissueCAGEmouse"),
+  group = c("brain"),
+  sample = c("CCL-131_Neuro-2a_treatment_for_6hr_with_MPP+"))
+
+CTSScoordinatesGR(f34)
+
+}
+
+}
+\references{
+\itemize{
+\item Carninci \emph{et al.,} (2005). \emph{The Transcriptional Landscape of the Mammalian
+Genome}. Science \strong{309}(5740):1559-1563.
+\item Djebali \emph{et al.,} (2012). \emph{Landscape of transcription in human cells.}
+Nature \strong{488}(7414):101-108.
+\item Faulkner \emph{et al.,} (2009). \emph{The regulated retrotransposon transcriptome of
+mammalian cells.}, Nature Genetics \strong{41}:563-571.
+\item Forrest \emph{et al.,} (2014). \emph{A promoter-level mammalian expression atlas.}
+Nature \strong{507}(7493):462-470.
+\item Nepal \emph{et al.,} (2013). \emph{Dynamic regulation of the transcription
+initiation landscape at single nucleotide resolution during vertebrate
+embryogenesis}. Genome Research \strong{23}(11):1938-1950.
+\item Suzuki_et al.,_ (2009). The transcriptional network that controls growth
+arrest and differentiation in a human myeloid leukemia cell line_. Nature
+Genetics \strong{41}:553-562.
+}
+}
+\seealso{
+Other FANTOM data: 
+\code{\link{FANTOM5humanSamples}},
+\code{\link{FANTOM5mouseSamples}}
+}
+\author{
+Vanja Haberle
+
+Charles Plessy
+}
+\concept{FANTOM data}

From a0191418d5b73978bf4313352772a388cc986102 Mon Sep 17 00:00:00 2001
From: Charles Plessy <charles.plessy@oist.jp>
Date: Wed, 28 Jun 2023 09:37:27 +0900
Subject: [PATCH 17/17] Fix function for one-sample cases.

---
 R/ImportMethods.R | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/R/ImportMethods.R b/R/ImportMethods.R
index 4bd2824..4089ef4 100644
--- a/R/ImportMethods.R
+++ b/R/ImportMethods.R
@@ -796,10 +796,14 @@ setGeneric("importPublicData",
                      df$strand,
                      seqinfo = seqinfo(getRefGenome(genome.name)),
                      bsgenomeName = genome.name)
-  ctssDF <- lapply(df[ , sample], Rle) |> DataFrame()
+  ctssDF <- lapply(df[ , sample, drop = FALSE], Rle) |> DataFrame()
   colnames(ctssDF) <- make.names(colnames(ctssDF))
   ctssSE <- SummarizedExperiment(c(counts = ctssDF), ctssRanges)
-  ctssSE <- ctssSE[rowSums(df[ , sample]) > 0,]
+  if (ncol(ctssDF) == 1) {
+    ctssSE <- ctssSE[        df[ , sample]  > 0,]
+  } else {
+    ctssSE <- ctssSE[rowSums(df[ , sample]) > 0,]
+  }
   sort(ctssSE)
 }