Skip to content

Commit

Permalink
. Added NA 'winsfix' to ASCAT and sequenza segmentation functions.
Browse files Browse the repository at this point in the history
. Fixed a few calls to the '%do%' and '%dopar%' operators without loading it from 'foreach' package.
  • Loading branch information
aoumess committed Aug 17, 2020
1 parent 8296415 commit 71d4de2
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 58 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ Encoding: UTF-8
Package: EaCoN
Type: Package
Title: EaCoN : Easy Copy Number !
Version: 0.3.4-1
Date: 2018-12-10
Version: 0.3.5
Date: 2020-08-17
Author: Bastien Job
Authors@R: person("Bastien", "Job", email = "bastien.job@inserm.fr", role = c("aut", "cre"))
Depends: R(>= 3.1.0)
Expand Down
8 changes: 7 additions & 1 deletion NEWS
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
EaCoN
-----

v0.3.5 (20200817)*CloudyMonday*
-----------------
* CORR : Segment.*() : Added a patch to handle the NA behavior in copynumber::winsorize (error raised by new handling of NA values in runmed). The patch consists on applying winsorization on non-NA values only (whereas all values were transmitted in earlier versions).
* CORR : WES.Bin() : Better handling of a possible desynch in chr names (when a canonical chr had no remaining values, its level was kept. This raised a rare error).
* MOD : Many funcs : Fixed calls to the "%do%" and "%dopar" operators without loading it.

v0.3.4-1 (20181210) *PostRoscovite*
-----------------
* CORR : SNP6.Process(), CSHD.Process() : Edited code to handle changes in the rcnorm package, to discard the "chromosomes" package dependency.
Expand Down Expand Up @@ -83,7 +89,7 @@ v0.3.0 (20180724) *PapoQueen*
* All : Removed "EaCoN." prefix from most functions (less self-centric...)
* All : Took care of vectors and columns that could be converted to factor or integer (to free some RAM up).
* All : Added missing support for manual PELT penalty (only asymptotic mode was considered when SER.value was numeric).
* SNP6 : Revamped BAF homozygous calling and rescaling.
* SNP6 : Revamped BAF homozygous calling and rescaling.
* Defined the novel sets of default parameters for all supported technologies.
* Redacted the README.md

Expand Down
105 changes: 50 additions & 55 deletions R/EaCoN_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -93,15 +93,6 @@ Segment.ASCAT <- function(data = NULL, mingap = 5E+06, smooth.k = NULL, BAF.filt


## Winsorization
#if(!is.null(smooth.k)) {
# tmsg("Smoothing L2R outliers ...")
# cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE)
# cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = smooth.k, tau = 1, verbose = FALSE)
# data$data$Tumor_LogR[,1] <- cndf.wins[, 3, drop = FALSE]
# rm(list = c("cndf", "cndf.wins"))
#}

## Winsorization *FIXATTEMPT*
if(!is.null(smooth.k)) {
tmsg("Smoothing L2R outliers ...")
cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE)
Expand Down Expand Up @@ -143,7 +134,7 @@ Segment.ASCAT <- function(data = NULL, mingap = 5E+06, smooth.k = NULL, BAF.filt
## Computing gaps
if (!is.null(mingap)) {
# `%do%` <- foreach::"%do%"
data$data$chr <- foreach(k = data$data$ch, .combine = "c") %do% {
data$data$chr <- foreach::foreach(k = data$data$ch, .combine = "c") %do% {
gapz <- which(diff(data$data$SNPpos$pos[k]) >= mingap)
return(unname(split(k, findInterval(k, k[gapz+1]))))
}
Expand Down Expand Up @@ -213,16 +204,7 @@ Segment.ASCAT <- function(data = NULL, mingap = 5E+06, smooth.k = NULL, BAF.filt
} else stop(tmsg("Invalid recentering method called !"), call. = FALSE)

## Winsorization (for aesthetics)
#tmsg("Smoothing L2R (for plots)...")
#cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE)
#cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = 5, tau = 1, verbose = FALSE)
#data$data$Tumor_LogR_wins <- cndf.wins[, 3, drop = FALSE]
#colnames(data$data$Tumor_LogR_wins) <- samplename
#rm(list = c("cndf", "cndf.wins"))

## Winsorization (for aesthetics) *FIXATTEMPT*
tmsg("Smoothing L2R (for plots)...")
# str(data)
cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE)
l2r.nona <- !is.na(data$data$Tumor_LogR[[1]])
cndf <- cndf[l2r.nona,]
Expand Down Expand Up @@ -377,8 +359,8 @@ Segment.ASCAT <- function(data = NULL, mingap = 5E+06, smooth.k = NULL, BAF.filt
l2r.value <- data.frame(Chr = l2r.chr,
Start = as.integer(data$data$SNPpos$pos),
End = as.integer(data$data$SNPpos$pos),
#Value = data$data$Tumor_LogR_wins[,1],
Value = data$data$Tumor_LogR[,1],
Value = data$data$Tumor_LogR_wins[,1],
# Value = data$data$Tumor_LogR[,1],
stringsAsFactors = FALSE)
baf.value <- data.frame(Chr = l2r.chr,
Start = as.integer(data$data$SNPpos$pos),
Expand Down Expand Up @@ -500,13 +482,15 @@ Segment.FACETS <- function(data = NULL, smooth.k = NULL, BAF.filter = .75, homoC
))

## Winsorization
#if(!is.null(smooth.k)) {
# tmsg("Smoothing L2R outliers ...")
# cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE)
# cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = smooth.k, tau = 1, verbose = FALSE)
# data$data$Tumor_LogR[,1] <- cndf.wins[, 3, drop = FALSE]
# rm(list = c("cndf", "cndf.wins"))
#}
if(!is.null(smooth.k)) {
tmsg("Smoothing L2R outliers ...")
cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE)
l2r.nona <- !is.na(data$data$Tumor_LogR[[1]])
cndf <- cndf[l2r.nona,]
cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = smooth.k, tau = 1, verbose = FALSE)
data$data$Tumor_LogR[l2r.nona,1] <- cndf.wins[, 3, drop = FALSE]
rm(list = c("cndf", "cndf.wins", "l2r.nona"))
}

## BAF filtering
tmsg("Filtering BAF...")
Expand Down Expand Up @@ -649,13 +633,16 @@ Segment.FACETS <- function(data = NULL, smooth.k = NULL, BAF.filter = .75, homoC
tmsg("No recentering.")
} else stop(tmsg("Invalid recentering method called !"), call. = FALSE)

## Winsorization
#tmsg("Smoothing L2R (for plots)...")
#cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE)
#cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = 5, tau = 1, verbose = FALSE)
#data$data$Tumor_LogR_wins <- cndf.wins[, 3, drop = FALSE]
#colnames(data$data$Tumor_LogR_wins) <- samplename
#rm(list = c("cndf", "cndf.wins"))
## Winsorization (for aesthetics)
tmsg("Smoothing L2R (for plots)...")
cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE)
l2r.nona <- !is.na(data$data$Tumor_LogR[[1]])
cndf <- cndf[l2r.nona,]
cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = 5, tau = 1, verbose = FALSE)
data$data$Tumor_LogR_wins <- data$data$Tumor_LogR
data$data$Tumor_LogR_wins[l2r.nona,] <- cndf.wins[, 3, drop = FALSE]
colnames(data$data$Tumor_LogR_wins) <- samplename
rm(list = c("cndf", "cndf.wins", "l2r.nona"))


## PELT rescue
Expand Down Expand Up @@ -806,8 +793,8 @@ Segment.FACETS <- function(data = NULL, smooth.k = NULL, BAF.filter = .75, homoC
l2r.value <- data.frame(Chr = l2r.chr,
Start = data$data$SNPpos$pos,
End = data$data$SNPpos$pos,
#Value = data$data$Tumor_LogR_wins[,1],
Value = data$data$Tumor_LogR[,1],
Value = data$data$Tumor_LogR_wins[,1],
# Value = data$data$Tumor_LogR[,1],
stringsAsFactors = FALSE)
# baf.chr <- if(length(grep(pattern = "chr", x = names(cs$chrom2chr), ignore.case = TRUE)) > 0) unlist(cs$chrom2chr[paste0("chr", as.character(data$data$SNPpos$chrs))]) else unlist(cs$chrom2chr[as.character(data$data$SNPpos$chrs)])
baf.value <- data.frame(Chr = l2r.chr,
Expand Down Expand Up @@ -870,6 +857,8 @@ Segment.SEQUENZA <- function(data = NULL, smooth.k = NULL, BAF.filter = .75, hom

calling.method <- tolower(calling.method)

`%do%` <- foreach::"%do%"

if (!is.list(data)) stop(tmsg("data should be a list !"), call. = FALSE)
if (!dir.exists(out.dir)) stop(tmsg(paste0("Output directory [", out.dir, "] does not exist !")), call. = FALSE)
if (!(calling.method %in% c("mad", "density"))) stop(tmsg("calling.method should be 'MAD' or 'density' !"), call. = FALSE)
Expand Down Expand Up @@ -923,13 +912,15 @@ Segment.SEQUENZA <- function(data = NULL, smooth.k = NULL, BAF.filter = .75, hom
))

## Winsorization
#if(!is.null(smooth.k)) {
# tmsg("Smoothing L2R outliers ...")
# cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE)
# cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = smooth.k, tau = 1, verbose = FALSE)
# data$data$Tumor_LogR[,1] <- cndf.wins[, 3, drop = FALSE]
# rm(list = c("cndf", "cndf.wins"))
#}
if(!is.null(smooth.k)) {
tmsg("Smoothing L2R outliers ...")
cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE)
l2r.nona <- !is.na(data$data$Tumor_LogR[[1]])
cndf <- cndf[l2r.nona,]
cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = smooth.k, tau = 1, verbose = FALSE)
data$data$Tumor_LogR[l2r.nona,1] <- cndf.wins[, 3, drop = FALSE]
rm(list = c("cndf", "cndf.wins", "l2r.nona"))
}

## BAF filtering
tmsg("Filtering BAF...")
Expand Down Expand Up @@ -1085,14 +1076,16 @@ Segment.SEQUENZA <- function(data = NULL, smooth.k = NULL, BAF.filter = .75, hom
tmsg("No recentering.")
} else stop(tmsg("Invalid recentering method called !"), call. = FALSE)

## Winsorization
#tmsg("Smoothing L2R (for plots)...")
#cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE)
#cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = 5, tau = 1, verbose = FALSE)
#data$data$Tumor_LogR_wins <- cndf.wins[, 3, drop = FALSE]
#colnames(data$data$Tumor_LogR_wins) <- samplename
rm(list = c("cndf", "cndf.wins"))

## Winsorization (for aesthetics)
tmsg("Smoothing L2R (for plots)...")
cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE)
l2r.nona <- !is.na(data$data$Tumor_LogR[[1]])
cndf <- cndf[l2r.nona,]
cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = 5, tau = 1, verbose = FALSE)
data$data$Tumor_LogR_wins <- data$data$Tumor_LogR
data$data$Tumor_LogR_wins[l2r.nona,] <- cndf.wins[, 3, drop = FALSE]
colnames(data$data$Tumor_LogR_wins) <- samplename
rm(list = c("cndf", "cndf.wins", "l2r.nona"))

## PELT rescue
if (!is.null(SER.pen)) {
Expand Down Expand Up @@ -1125,7 +1118,6 @@ Segment.SEQUENZA <- function(data = NULL, smooth.k = NULL, BAF.filter = .75, hom
tmsg(paste0(" Found ", length(rescued), "."))
if (length(rescued) > seg.maxn) tmsg("WARNING : Many small events found, profile may be noisy ! Consider using 'smooth.k', or for WES data, strengthen low depth filtering !")
data$meta$eacon[["PELT-nseg"]] <- length(rescued)
`%do%` <- foreach::"%do%"
foreach::foreach(re = rescued, .combine = "c") %do% {
interv <- mydf$idx.ori[seg.start[re]]:mydf$idx.ori[seg.end[re]]
data$data$Tumor_LogR_segmented[interv] <- median(data$data$Tumor_LogR[interv, 1], na.rm = TRUE)
Expand Down Expand Up @@ -1242,8 +1234,8 @@ Segment.SEQUENZA <- function(data = NULL, smooth.k = NULL, BAF.filter = .75, hom
l2r.value <- data.frame(Chr = l2r.chr,
Start = data$data$SNPpos$pos,
End = data$data$SNPpos$pos,
#Value = data$data$Tumor_LogR_wins[,1],
Value = data$data$Tumor_LogR[,1],
Value = data$data$Tumor_LogR_wins[,1],
# Value = data$data$Tumor_LogR[,1],
stringsAsFactors = FALSE)
# baf.chr <- if(length(grep(pattern = "chr", x = names(cs$chrom2chr), ignore.case = TRUE)) > 0) unlist(cs$chrom2chr[paste0("chr", as.character(data$data$SNPpos$chrs))]) else unlist(cs$chrom2chr[as.character(data$data$SNPpos$chrs)])
baf.value <- data.frame(Chr = l2r.chr,
Expand Down Expand Up @@ -1361,6 +1353,7 @@ ASCN.ASCAT <- function(data = NULL, gammaRange = c(.35,.95), nsubthread = 1, clu
cls <- parallel::makeCluster(spec = nsubthread, type = cluster.type, outfile = "")
doParallel::registerDoParallel(cls)
gamma <- 0
`%dopar%` <- foreach::"%dopar%"
fit.val <- as.data.frame(foreach::foreach(gamma = gammavec, .combine = "rbind", .inorder = TRUE) %dopar% {
tmsg(paste0(" gamma = ", gamma))
odirg <- paste0(odir, "/gamma", sprintf("%.2f", gamma))
Expand Down Expand Up @@ -2042,6 +2035,8 @@ Annotate <- function(data = NULL, refGene.table = NULL, targets.table = NULL, re

oridir <- getwd()

`%do%` <- foreach::"%do%"

if (!is.list(data)) stop(tmsg("data should be a list !"), call. = FALSE)

valid.genomes <- get.valid.genomes()
Expand Down

0 comments on commit 71d4de2

Please sign in to comment.