From a11dec1eda8785273664bdd562de17e209096d79 Mon Sep 17 00:00:00 2001 From: Bastien Job Date: Tue, 28 Jul 2020 23:37:56 +0200 Subject: [PATCH 1/4] Update EaCoN_functions.R Temprary hack to disable winsorization (smoothing) that relies on runmed, which usage is bugged since R>3.6 when used with NA values without the new "na.action" parameter... --- R/EaCoN_functions.R | 85 +++++++++++++++++++++++---------------------- 1 file changed, 44 insertions(+), 41 deletions(-) diff --git a/R/EaCoN_functions.R b/R/EaCoN_functions.R index bffa066..6effd35 100644 --- a/R/EaCoN_functions.R +++ b/R/EaCoN_functions.R @@ -92,13 +92,13 @@ Segment.ASCAT <- function(data = NULL, mingap = 5E+06, smooth.k = NULL, BAF.filt ## Winsorization - if(!is.null(smooth.k)) { - tmsg("Smoothing L2R outliers ...") - cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE) - cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = smooth.k, tau = 1, verbose = FALSE) - data$data$Tumor_LogR[,1] <- cndf.wins[, 3, drop = FALSE] - rm(list = c("cndf", "cndf.wins")) - } + #if(!is.null(smooth.k)) { + # tmsg("Smoothing L2R outliers ...") + # cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE) + # cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = smooth.k, tau = 1, verbose = FALSE) + # data$data$Tumor_LogR[,1] <- cndf.wins[, 3, drop = FALSE] + # rm(list = c("cndf", "cndf.wins")) + #} ## BAF filtering tmsg("Filtering BAF...") @@ -200,12 +200,12 @@ Segment.ASCAT <- function(data = NULL, mingap = 5E+06, smooth.k = NULL, BAF.filt } else stop(tmsg("Invalid recentering method called !"), call. = FALSE) ## Winsorization (for aesthetics) - tmsg("Smoothing L2R (for plots)...") - cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE) - cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = 5, tau = 1, verbose = FALSE) - data$data$Tumor_LogR_wins <- cndf.wins[, 3, drop = FALSE] - colnames(data$data$Tumor_LogR_wins) <- samplename - rm(list = c("cndf", "cndf.wins")) + #tmsg("Smoothing L2R (for plots)...") + #cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE) + #cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = 5, tau = 1, verbose = FALSE) + #data$data$Tumor_LogR_wins <- cndf.wins[, 3, drop = FALSE] + #colnames(data$data$Tumor_LogR_wins) <- samplename + #rm(list = c("cndf", "cndf.wins")) ## PELT rescue @@ -353,7 +353,8 @@ Segment.ASCAT <- function(data = NULL, mingap = 5E+06, smooth.k = NULL, BAF.filt l2r.value <- data.frame(Chr = l2r.chr, Start = as.integer(data$data$SNPpos$pos), End = as.integer(data$data$SNPpos$pos), - Value = data$data$Tumor_LogR_wins[,1], + #Value = data$data$Tumor_LogR_wins[,1], + Value = data$data$Tumor_LogR[,1], stringsAsFactors = FALSE) baf.value <- data.frame(Chr = l2r.chr, Start = as.integer(data$data$SNPpos$pos), @@ -475,13 +476,13 @@ Segment.FACETS <- function(data = NULL, smooth.k = NULL, BAF.filter = .75, homoC )) ## Winsorization - if(!is.null(smooth.k)) { - tmsg("Smoothing L2R outliers ...") - cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE) - cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = smooth.k, tau = 1, verbose = FALSE) - data$data$Tumor_LogR[,1] <- cndf.wins[, 3, drop = FALSE] - rm(list = c("cndf", "cndf.wins")) - } + #if(!is.null(smooth.k)) { + # tmsg("Smoothing L2R outliers ...") + # cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE) + # cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = smooth.k, tau = 1, verbose = FALSE) + # data$data$Tumor_LogR[,1] <- cndf.wins[, 3, drop = FALSE] + # rm(list = c("cndf", "cndf.wins")) + #} ## BAF filtering tmsg("Filtering BAF...") @@ -625,12 +626,12 @@ Segment.FACETS <- function(data = NULL, smooth.k = NULL, BAF.filter = .75, homoC } else stop(tmsg("Invalid recentering method called !"), call. = FALSE) ## Winsorization - tmsg("Smoothing L2R (for plots)...") - cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE) - cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = 5, tau = 1, verbose = FALSE) - data$data$Tumor_LogR_wins <- cndf.wins[, 3, drop = FALSE] - colnames(data$data$Tumor_LogR_wins) <- samplename - rm(list = c("cndf", "cndf.wins")) + #tmsg("Smoothing L2R (for plots)...") + #cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE) + #cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = 5, tau = 1, verbose = FALSE) + #data$data$Tumor_LogR_wins <- cndf.wins[, 3, drop = FALSE] + #colnames(data$data$Tumor_LogR_wins) <- samplename + #rm(list = c("cndf", "cndf.wins")) ## PELT rescue @@ -781,7 +782,8 @@ Segment.FACETS <- function(data = NULL, smooth.k = NULL, BAF.filter = .75, homoC l2r.value <- data.frame(Chr = l2r.chr, Start = data$data$SNPpos$pos, End = data$data$SNPpos$pos, - Value = data$data$Tumor_LogR_wins[,1], + #Value = data$data$Tumor_LogR_wins[,1], + Value = data$data$Tumor_LogR[,1], stringsAsFactors = FALSE) # baf.chr <- if(length(grep(pattern = "chr", x = names(cs$chrom2chr), ignore.case = TRUE)) > 0) unlist(cs$chrom2chr[paste0("chr", as.character(data$data$SNPpos$chrs))]) else unlist(cs$chrom2chr[as.character(data$data$SNPpos$chrs)]) baf.value <- data.frame(Chr = l2r.chr, @@ -897,13 +899,13 @@ Segment.SEQUENZA <- function(data = NULL, smooth.k = NULL, BAF.filter = .75, hom )) ## Winsorization - if(!is.null(smooth.k)) { - tmsg("Smoothing L2R outliers ...") - cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE) - cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = smooth.k, tau = 1, verbose = FALSE) - data$data$Tumor_LogR[,1] <- cndf.wins[, 3, drop = FALSE] - rm(list = c("cndf", "cndf.wins")) - } + #if(!is.null(smooth.k)) { + # tmsg("Smoothing L2R outliers ...") + # cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE) + # cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = smooth.k, tau = 1, verbose = FALSE) + # data$data$Tumor_LogR[,1] <- cndf.wins[, 3, drop = FALSE] + # rm(list = c("cndf", "cndf.wins")) + #} ## BAF filtering tmsg("Filtering BAF...") @@ -1060,11 +1062,11 @@ Segment.SEQUENZA <- function(data = NULL, smooth.k = NULL, BAF.filter = .75, hom } else stop(tmsg("Invalid recentering method called !"), call. = FALSE) ## Winsorization - tmsg("Smoothing L2R (for plots)...") - cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE) - cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = 5, tau = 1, verbose = FALSE) - data$data$Tumor_LogR_wins <- cndf.wins[, 3, drop = FALSE] - colnames(data$data$Tumor_LogR_wins) <- samplename + #tmsg("Smoothing L2R (for plots)...") + #cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE) + #cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = 5, tau = 1, verbose = FALSE) + #data$data$Tumor_LogR_wins <- cndf.wins[, 3, drop = FALSE] + #colnames(data$data$Tumor_LogR_wins) <- samplename rm(list = c("cndf", "cndf.wins")) @@ -1216,7 +1218,8 @@ Segment.SEQUENZA <- function(data = NULL, smooth.k = NULL, BAF.filter = .75, hom l2r.value <- data.frame(Chr = l2r.chr, Start = data$data$SNPpos$pos, End = data$data$SNPpos$pos, - Value = data$data$Tumor_LogR_wins[,1], + #Value = data$data$Tumor_LogR_wins[,1], + Value = data$data$Tumor_LogR[,1], stringsAsFactors = FALSE) # baf.chr <- if(length(grep(pattern = "chr", x = names(cs$chrom2chr), ignore.case = TRUE)) > 0) unlist(cs$chrom2chr[paste0("chr", as.character(data$data$SNPpos$chrs))]) else unlist(cs$chrom2chr[as.character(data$data$SNPpos$chrs)]) baf.value <- data.frame(Chr = l2r.chr, From 56a5d7cbc248f6f4c3a833bc01daa860a4fed499 Mon Sep 17 00:00:00 2001 From: aoumess Date: Mon, 17 Aug 2020 11:04:04 +0200 Subject: [PATCH 2/4] . Trying a better fix for the NA winsorize bug. --- R/EaCoN_functions.R | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/R/EaCoN_functions.R b/R/EaCoN_functions.R index 6effd35..2c6b31d 100644 --- a/R/EaCoN_functions.R +++ b/R/EaCoN_functions.R @@ -207,6 +207,16 @@ Segment.ASCAT <- function(data = NULL, mingap = 5E+06, smooth.k = NULL, BAF.filt #colnames(data$data$Tumor_LogR_wins) <- samplename #rm(list = c("cndf", "cndf.wins")) + ## Winsorization (for aesthetics) *FIXATTEMPT* + tmsg("Smoothing L2R (for plots)...") + cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE) + l2r.nona <- !is.na(data$data$Tumor_LogR[[1]]) + cndf <- cndf[l2r.nona,] + cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = 5, tau = 1, verbose = FALSE) + data$data$Tumor_LogR_wins <- data$data$Tumor_LogR + data$data$Tumor_LogR_wins[l2r.nona,] <- cndf.wins[, 3, drop = FALSE] + colnames(data$data$Tumor_LogR_wins) <- samplename + rm(list = c("cndf", "cndf.wins", "l2r.nona")) ## PELT rescue if (!is.null(SER.pen)) { From 82964150e5db6b973437fc5c253a735750e4807e Mon Sep 17 00:00:00 2001 From: aoumess Date: Mon, 17 Aug 2020 14:37:18 +0200 Subject: [PATCH 3/4] . Tested fix to the NA handling error . Fixed a desynch between available and reference chromosomes in binned object (using base::droplevels) --- R/EaCoN_functions.R | 16 +++++++++++++++- R/wes_process.R | 4 ++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/R/EaCoN_functions.R b/R/EaCoN_functions.R index 2c6b31d..05ae6be 100644 --- a/R/EaCoN_functions.R +++ b/R/EaCoN_functions.R @@ -20,6 +20,7 @@ Segment.ASCAT <- function(data = NULL, mingap = 5E+06, smooth.k = NULL, BAF.filt # source("~/git_gustaveroussy/EaCoN/R/mini_functions.R") # source("~/git_gustaveroussy/EaCoN/R/plot_functions.R") + `%do%` <- foreach::"%do%" calling.method <- tolower(calling.method) @@ -100,6 +101,17 @@ Segment.ASCAT <- function(data = NULL, mingap = 5E+06, smooth.k = NULL, BAF.filt # rm(list = c("cndf", "cndf.wins")) #} + ## Winsorization *FIXATTEMPT* + if(!is.null(smooth.k)) { + tmsg("Smoothing L2R outliers ...") + cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE) + l2r.nona <- !is.na(data$data$Tumor_LogR[[1]]) + cndf <- cndf[l2r.nona,] + cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = smooth.k, tau = 1, verbose = FALSE) + data$data$Tumor_LogR[l2r.nona,1] <- cndf.wins[, 3, drop = FALSE] + rm(list = c("cndf", "cndf.wins", "l2r.nona")) + } + ## BAF filtering tmsg("Filtering BAF...") if ("Tumor_BAF.unisomy" %in% names(data$data)) { @@ -130,6 +142,7 @@ Segment.ASCAT <- function(data = NULL, mingap = 5E+06, smooth.k = NULL, BAF.filt ## Computing gaps if (!is.null(mingap)) { + # `%do%` <- foreach::"%do%" data$data$chr <- foreach(k = data$data$ch, .combine = "c") %do% { gapz <- which(diff(data$data$SNPpos$pos[k]) >= mingap) return(unname(split(k, findInterval(k, k[gapz+1])))) @@ -209,6 +222,7 @@ Segment.ASCAT <- function(data = NULL, mingap = 5E+06, smooth.k = NULL, BAF.filt ## Winsorization (for aesthetics) *FIXATTEMPT* tmsg("Smoothing L2R (for plots)...") + # str(data) cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE) l2r.nona <- !is.na(data$data$Tumor_LogR[[1]]) cndf <- cndf[l2r.nona,] @@ -249,7 +263,7 @@ Segment.ASCAT <- function(data = NULL, mingap = 5E+06, smooth.k = NULL, BAF.filt tmsg(paste0(" Found ", length(rescued), ".")) if (length(rescued) > seg.maxn) tmsg("WARNING : Many small events found, profile may be noisy ! Consider using 'smooth.k', or for WES data, strengthen low depth filtering !") data$meta$eacon[["PELT-nseg"]] <- length(rescued) - `%do%` <- foreach::"%do%" + # `%do%` <- foreach::"%do%" foreach::foreach(re = rescued, .combine = "c") %do% { interv <- mydf$idx.ori[seg.start[re]]:mydf$idx.ori[seg.end[re]] data$data$Tumor_LogR_segmented[interv] <- median(data$data$Tumor_LogR[interv, 1], na.rm = TRUE) diff --git a/R/wes_process.R b/R/wes_process.R index 6f5d921..9b3c32c 100644 --- a/R/wes_process.R +++ b/R/wes_process.R @@ -338,6 +338,10 @@ WES.Bin <- function(testBAM = NULL, refBAM = NULL, BINpack = NULL, samplename = meta.w$SNP.tot.count.ref.summary <- my.summary(SNP.all$tot_count.ref[!is.na(SNP.all$tot_count.ref)]) gc() + ## Cleaning uncovered chr levels + CN.all$chr <- droplevels(CN.all$chr) + SNP.all$chr <- droplevels(SNP.all$chr) + WESobj <- list(RD = CN.all, SNP = SNP.all, meta = list(basic = meta.b, WES = meta.w)) rm(CN.all, SNP.all) gc() From 71d4de2dfe43a481b8bc3ecd9253a35e88d7a23d Mon Sep 17 00:00:00 2001 From: aoumess Date: Mon, 17 Aug 2020 14:57:51 +0200 Subject: [PATCH 4/4] . Added NA 'winsfix' to ASCAT and sequenza segmentation functions. . Fixed a few calls to the '%do%' and '%dopar%' operators without loading it from 'foreach' package. --- DESCRIPTION | 4 +- NEWS | 8 +++- R/EaCoN_functions.R | 105 +++++++++++++++++++++----------------------- 3 files changed, 59 insertions(+), 58 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6398d53..ca480c8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,8 +2,8 @@ Encoding: UTF-8 Package: EaCoN Type: Package Title: EaCoN : Easy Copy Number ! -Version: 0.3.4-1 -Date: 2018-12-10 +Version: 0.3.5 +Date: 2020-08-17 Author: Bastien Job Authors@R: person("Bastien", "Job", email = "bastien.job@inserm.fr", role = c("aut", "cre")) Depends: R(>= 3.1.0) diff --git a/NEWS b/NEWS index 9aa09ce..3affd5e 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,12 @@ EaCoN ----- +v0.3.5 (20200817)*CloudyMonday* +----------------- +* CORR : Segment.*() : Added a patch to handle the NA behavior in copynumber::winsorize (error raised by new handling of NA values in runmed). The patch consists on applying winsorization on non-NA values only (whereas all values were transmitted in earlier versions). +* CORR : WES.Bin() : Better handling of a possible desynch in chr names (when a canonical chr had no remaining values, its level was kept. This raised a rare error). +* MOD : Many funcs : Fixed calls to the "%do%" and "%dopar" operators without loading it. + v0.3.4-1 (20181210) *PostRoscovite* ----------------- * CORR : SNP6.Process(), CSHD.Process() : Edited code to handle changes in the rcnorm package, to discard the "chromosomes" package dependency. @@ -83,7 +89,7 @@ v0.3.0 (20180724) *PapoQueen* * All : Removed "EaCoN." prefix from most functions (less self-centric...) * All : Took care of vectors and columns that could be converted to factor or integer (to free some RAM up). * All : Added missing support for manual PELT penalty (only asymptotic mode was considered when SER.value was numeric). -* SNP6 : Revamped BAF homozygous calling and rescaling. +* SNP6 : Revamped BAF homozygous calling and rescaling. * Defined the novel sets of default parameters for all supported technologies. * Redacted the README.md diff --git a/R/EaCoN_functions.R b/R/EaCoN_functions.R index 05ae6be..adb6c41 100644 --- a/R/EaCoN_functions.R +++ b/R/EaCoN_functions.R @@ -93,15 +93,6 @@ Segment.ASCAT <- function(data = NULL, mingap = 5E+06, smooth.k = NULL, BAF.filt ## Winsorization - #if(!is.null(smooth.k)) { - # tmsg("Smoothing L2R outliers ...") - # cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE) - # cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = smooth.k, tau = 1, verbose = FALSE) - # data$data$Tumor_LogR[,1] <- cndf.wins[, 3, drop = FALSE] - # rm(list = c("cndf", "cndf.wins")) - #} - - ## Winsorization *FIXATTEMPT* if(!is.null(smooth.k)) { tmsg("Smoothing L2R outliers ...") cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE) @@ -143,7 +134,7 @@ Segment.ASCAT <- function(data = NULL, mingap = 5E+06, smooth.k = NULL, BAF.filt ## Computing gaps if (!is.null(mingap)) { # `%do%` <- foreach::"%do%" - data$data$chr <- foreach(k = data$data$ch, .combine = "c") %do% { + data$data$chr <- foreach::foreach(k = data$data$ch, .combine = "c") %do% { gapz <- which(diff(data$data$SNPpos$pos[k]) >= mingap) return(unname(split(k, findInterval(k, k[gapz+1])))) } @@ -213,16 +204,7 @@ Segment.ASCAT <- function(data = NULL, mingap = 5E+06, smooth.k = NULL, BAF.filt } else stop(tmsg("Invalid recentering method called !"), call. = FALSE) ## Winsorization (for aesthetics) - #tmsg("Smoothing L2R (for plots)...") - #cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE) - #cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = 5, tau = 1, verbose = FALSE) - #data$data$Tumor_LogR_wins <- cndf.wins[, 3, drop = FALSE] - #colnames(data$data$Tumor_LogR_wins) <- samplename - #rm(list = c("cndf", "cndf.wins")) - - ## Winsorization (for aesthetics) *FIXATTEMPT* tmsg("Smoothing L2R (for plots)...") - # str(data) cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE) l2r.nona <- !is.na(data$data$Tumor_LogR[[1]]) cndf <- cndf[l2r.nona,] @@ -377,8 +359,8 @@ Segment.ASCAT <- function(data = NULL, mingap = 5E+06, smooth.k = NULL, BAF.filt l2r.value <- data.frame(Chr = l2r.chr, Start = as.integer(data$data$SNPpos$pos), End = as.integer(data$data$SNPpos$pos), - #Value = data$data$Tumor_LogR_wins[,1], - Value = data$data$Tumor_LogR[,1], + Value = data$data$Tumor_LogR_wins[,1], + # Value = data$data$Tumor_LogR[,1], stringsAsFactors = FALSE) baf.value <- data.frame(Chr = l2r.chr, Start = as.integer(data$data$SNPpos$pos), @@ -500,13 +482,15 @@ Segment.FACETS <- function(data = NULL, smooth.k = NULL, BAF.filter = .75, homoC )) ## Winsorization - #if(!is.null(smooth.k)) { - # tmsg("Smoothing L2R outliers ...") - # cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE) - # cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = smooth.k, tau = 1, verbose = FALSE) - # data$data$Tumor_LogR[,1] <- cndf.wins[, 3, drop = FALSE] - # rm(list = c("cndf", "cndf.wins")) - #} + if(!is.null(smooth.k)) { + tmsg("Smoothing L2R outliers ...") + cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE) + l2r.nona <- !is.na(data$data$Tumor_LogR[[1]]) + cndf <- cndf[l2r.nona,] + cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = smooth.k, tau = 1, verbose = FALSE) + data$data$Tumor_LogR[l2r.nona,1] <- cndf.wins[, 3, drop = FALSE] + rm(list = c("cndf", "cndf.wins", "l2r.nona")) + } ## BAF filtering tmsg("Filtering BAF...") @@ -649,13 +633,16 @@ Segment.FACETS <- function(data = NULL, smooth.k = NULL, BAF.filter = .75, homoC tmsg("No recentering.") } else stop(tmsg("Invalid recentering method called !"), call. = FALSE) - ## Winsorization - #tmsg("Smoothing L2R (for plots)...") - #cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE) - #cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = 5, tau = 1, verbose = FALSE) - #data$data$Tumor_LogR_wins <- cndf.wins[, 3, drop = FALSE] - #colnames(data$data$Tumor_LogR_wins) <- samplename - #rm(list = c("cndf", "cndf.wins")) + ## Winsorization (for aesthetics) + tmsg("Smoothing L2R (for plots)...") + cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE) + l2r.nona <- !is.na(data$data$Tumor_LogR[[1]]) + cndf <- cndf[l2r.nona,] + cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = 5, tau = 1, verbose = FALSE) + data$data$Tumor_LogR_wins <- data$data$Tumor_LogR + data$data$Tumor_LogR_wins[l2r.nona,] <- cndf.wins[, 3, drop = FALSE] + colnames(data$data$Tumor_LogR_wins) <- samplename + rm(list = c("cndf", "cndf.wins", "l2r.nona")) ## PELT rescue @@ -806,8 +793,8 @@ Segment.FACETS <- function(data = NULL, smooth.k = NULL, BAF.filter = .75, homoC l2r.value <- data.frame(Chr = l2r.chr, Start = data$data$SNPpos$pos, End = data$data$SNPpos$pos, - #Value = data$data$Tumor_LogR_wins[,1], - Value = data$data$Tumor_LogR[,1], + Value = data$data$Tumor_LogR_wins[,1], + # Value = data$data$Tumor_LogR[,1], stringsAsFactors = FALSE) # baf.chr <- if(length(grep(pattern = "chr", x = names(cs$chrom2chr), ignore.case = TRUE)) > 0) unlist(cs$chrom2chr[paste0("chr", as.character(data$data$SNPpos$chrs))]) else unlist(cs$chrom2chr[as.character(data$data$SNPpos$chrs)]) baf.value <- data.frame(Chr = l2r.chr, @@ -870,6 +857,8 @@ Segment.SEQUENZA <- function(data = NULL, smooth.k = NULL, BAF.filter = .75, hom calling.method <- tolower(calling.method) + `%do%` <- foreach::"%do%" + if (!is.list(data)) stop(tmsg("data should be a list !"), call. = FALSE) if (!dir.exists(out.dir)) stop(tmsg(paste0("Output directory [", out.dir, "] does not exist !")), call. = FALSE) if (!(calling.method %in% c("mad", "density"))) stop(tmsg("calling.method should be 'MAD' or 'density' !"), call. = FALSE) @@ -923,13 +912,15 @@ Segment.SEQUENZA <- function(data = NULL, smooth.k = NULL, BAF.filter = .75, hom )) ## Winsorization - #if(!is.null(smooth.k)) { - # tmsg("Smoothing L2R outliers ...") - # cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE) - # cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = smooth.k, tau = 1, verbose = FALSE) - # data$data$Tumor_LogR[,1] <- cndf.wins[, 3, drop = FALSE] - # rm(list = c("cndf", "cndf.wins")) - #} + if(!is.null(smooth.k)) { + tmsg("Smoothing L2R outliers ...") + cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE) + l2r.nona <- !is.na(data$data$Tumor_LogR[[1]]) + cndf <- cndf[l2r.nona,] + cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = smooth.k, tau = 1, verbose = FALSE) + data$data$Tumor_LogR[l2r.nona,1] <- cndf.wins[, 3, drop = FALSE] + rm(list = c("cndf", "cndf.wins", "l2r.nona")) + } ## BAF filtering tmsg("Filtering BAF...") @@ -1085,14 +1076,16 @@ Segment.SEQUENZA <- function(data = NULL, smooth.k = NULL, BAF.filter = .75, hom tmsg("No recentering.") } else stop(tmsg("Invalid recentering method called !"), call. = FALSE) - ## Winsorization - #tmsg("Smoothing L2R (for plots)...") - #cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE) - #cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = 5, tau = 1, verbose = FALSE) - #data$data$Tumor_LogR_wins <- cndf.wins[, 3, drop = FALSE] - #colnames(data$data$Tumor_LogR_wins) <- samplename - rm(list = c("cndf", "cndf.wins")) - + ## Winsorization (for aesthetics) + tmsg("Smoothing L2R (for plots)...") + cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE) + l2r.nona <- !is.na(data$data$Tumor_LogR[[1]]) + cndf <- cndf[l2r.nona,] + cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = 5, tau = 1, verbose = FALSE) + data$data$Tumor_LogR_wins <- data$data$Tumor_LogR + data$data$Tumor_LogR_wins[l2r.nona,] <- cndf.wins[, 3, drop = FALSE] + colnames(data$data$Tumor_LogR_wins) <- samplename + rm(list = c("cndf", "cndf.wins", "l2r.nona")) ## PELT rescue if (!is.null(SER.pen)) { @@ -1125,7 +1118,6 @@ Segment.SEQUENZA <- function(data = NULL, smooth.k = NULL, BAF.filter = .75, hom tmsg(paste0(" Found ", length(rescued), ".")) if (length(rescued) > seg.maxn) tmsg("WARNING : Many small events found, profile may be noisy ! Consider using 'smooth.k', or for WES data, strengthen low depth filtering !") data$meta$eacon[["PELT-nseg"]] <- length(rescued) - `%do%` <- foreach::"%do%" foreach::foreach(re = rescued, .combine = "c") %do% { interv <- mydf$idx.ori[seg.start[re]]:mydf$idx.ori[seg.end[re]] data$data$Tumor_LogR_segmented[interv] <- median(data$data$Tumor_LogR[interv, 1], na.rm = TRUE) @@ -1242,8 +1234,8 @@ Segment.SEQUENZA <- function(data = NULL, smooth.k = NULL, BAF.filter = .75, hom l2r.value <- data.frame(Chr = l2r.chr, Start = data$data$SNPpos$pos, End = data$data$SNPpos$pos, - #Value = data$data$Tumor_LogR_wins[,1], - Value = data$data$Tumor_LogR[,1], + Value = data$data$Tumor_LogR_wins[,1], + # Value = data$data$Tumor_LogR[,1], stringsAsFactors = FALSE) # baf.chr <- if(length(grep(pattern = "chr", x = names(cs$chrom2chr), ignore.case = TRUE)) > 0) unlist(cs$chrom2chr[paste0("chr", as.character(data$data$SNPpos$chrs))]) else unlist(cs$chrom2chr[as.character(data$data$SNPpos$chrs)]) baf.value <- data.frame(Chr = l2r.chr, @@ -1361,6 +1353,7 @@ ASCN.ASCAT <- function(data = NULL, gammaRange = c(.35,.95), nsubthread = 1, clu cls <- parallel::makeCluster(spec = nsubthread, type = cluster.type, outfile = "") doParallel::registerDoParallel(cls) gamma <- 0 + `%dopar%` <- foreach::"%dopar%" fit.val <- as.data.frame(foreach::foreach(gamma = gammavec, .combine = "rbind", .inorder = TRUE) %dopar% { tmsg(paste0(" gamma = ", gamma)) odirg <- paste0(odir, "/gamma", sprintf("%.2f", gamma)) @@ -2042,6 +2035,8 @@ Annotate <- function(data = NULL, refGene.table = NULL, targets.table = NULL, re oridir <- getwd() + `%do%` <- foreach::"%do%" + if (!is.list(data)) stop(tmsg("data should be a list !"), call. = FALSE) valid.genomes <- get.valid.genomes()