Skip to content

Commit

Permalink
Merge pull request #21 from gustaveroussy/winsfix
Browse files Browse the repository at this point in the history
Winsfix
  • Loading branch information
aoumess authored Aug 17, 2020
2 parents abb78d9 + 71d4de2 commit 9247c92
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 24 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ Encoding: UTF-8
Package: EaCoN
Type: Package
Title: EaCoN : Easy Copy Number !
Version: 0.3.4-1
Date: 2018-12-10
Version: 0.3.5
Date: 2020-08-17
Author: Bastien Job
Authors@R: person("Bastien", "Job", email = "bastien.job@inserm.fr", role = c("aut", "cre"))
Depends: R(>= 3.1.0)
Expand Down
8 changes: 7 additions & 1 deletion NEWS
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
EaCoN
-----

v0.3.5 (20200817)*CloudyMonday*
-----------------
* CORR : Segment.*() : Added a patch to handle the NA behavior in copynumber::winsorize (error raised by new handling of NA values in runmed). The patch consists on applying winsorization on non-NA values only (whereas all values were transmitted in earlier versions).
* CORR : WES.Bin() : Better handling of a possible desynch in chr names (when a canonical chr had no remaining values, its level was kept. This raised a rare error).
* MOD : Many funcs : Fixed calls to the "%do%" and "%dopar" operators without loading it.

v0.3.4-1 (20181210) *PostRoscovite*
-----------------
* CORR : SNP6.Process(), CSHD.Process() : Edited code to handle changes in the rcnorm package, to discard the "chromosomes" package dependency.
Expand Down Expand Up @@ -83,7 +89,7 @@ v0.3.0 (20180724) *PapoQueen*
* All : Removed "EaCoN." prefix from most functions (less self-centric...)
* All : Took care of vectors and columns that could be converted to factor or integer (to free some RAM up).
* All : Added missing support for manual PELT penalty (only asymptotic mode was considered when SER.value was numeric).
* SNP6 : Revamped BAF homozygous calling and rescaling.
* SNP6 : Revamped BAF homozygous calling and rescaling.
* Defined the novel sets of default parameters for all supported technologies.
* Redacted the README.md

Expand Down
64 changes: 43 additions & 21 deletions R/EaCoN_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ Segment.ASCAT <- function(data = NULL, mingap = 5E+06, smooth.k = NULL, BAF.filt
# source("~/git_gustaveroussy/EaCoN/R/mini_functions.R")
# source("~/git_gustaveroussy/EaCoN/R/plot_functions.R")

`%do%` <- foreach::"%do%"

calling.method <- tolower(calling.method)

Expand Down Expand Up @@ -93,11 +94,13 @@ Segment.ASCAT <- function(data = NULL, mingap = 5E+06, smooth.k = NULL, BAF.filt

## Winsorization
if(!is.null(smooth.k)) {
tmsg("Smoothing L2R outliers ...")
tmsg("Smoothing L2R outliers ...")
cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE)
l2r.nona <- !is.na(data$data$Tumor_LogR[[1]])
cndf <- cndf[l2r.nona,]
cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = smooth.k, tau = 1, verbose = FALSE)
data$data$Tumor_LogR[,1] <- cndf.wins[, 3, drop = FALSE]
rm(list = c("cndf", "cndf.wins"))
data$data$Tumor_LogR[l2r.nona,1] <- cndf.wins[, 3, drop = FALSE]
rm(list = c("cndf", "cndf.wins", "l2r.nona"))
}

## BAF filtering
Expand Down Expand Up @@ -130,7 +133,8 @@ Segment.ASCAT <- function(data = NULL, mingap = 5E+06, smooth.k = NULL, BAF.filt

## Computing gaps
if (!is.null(mingap)) {
data$data$chr <- foreach(k = data$data$ch, .combine = "c") %do% {
# `%do%` <- foreach::"%do%"
data$data$chr <- foreach::foreach(k = data$data$ch, .combine = "c") %do% {
gapz <- which(diff(data$data$SNPpos$pos[k]) >= mingap)
return(unname(split(k, findInterval(k, k[gapz+1]))))
}
Expand Down Expand Up @@ -202,11 +206,13 @@ Segment.ASCAT <- function(data = NULL, mingap = 5E+06, smooth.k = NULL, BAF.filt
## Winsorization (for aesthetics)
tmsg("Smoothing L2R (for plots)...")
cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE)
l2r.nona <- !is.na(data$data$Tumor_LogR[[1]])
cndf <- cndf[l2r.nona,]
cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = 5, tau = 1, verbose = FALSE)
data$data$Tumor_LogR_wins <- cndf.wins[, 3, drop = FALSE]
data$data$Tumor_LogR_wins <- data$data$Tumor_LogR
data$data$Tumor_LogR_wins[l2r.nona,] <- cndf.wins[, 3, drop = FALSE]
colnames(data$data$Tumor_LogR_wins) <- samplename
rm(list = c("cndf", "cndf.wins"))

rm(list = c("cndf", "cndf.wins", "l2r.nona"))

## PELT rescue
if (!is.null(SER.pen)) {
Expand Down Expand Up @@ -239,7 +245,7 @@ Segment.ASCAT <- function(data = NULL, mingap = 5E+06, smooth.k = NULL, BAF.filt
tmsg(paste0(" Found ", length(rescued), "."))
if (length(rescued) > seg.maxn) tmsg("WARNING : Many small events found, profile may be noisy ! Consider using 'smooth.k', or for WES data, strengthen low depth filtering !")
data$meta$eacon[["PELT-nseg"]] <- length(rescued)
`%do%` <- foreach::"%do%"
# `%do%` <- foreach::"%do%"
foreach::foreach(re = rescued, .combine = "c") %do% {
interv <- mydf$idx.ori[seg.start[re]]:mydf$idx.ori[seg.end[re]]
data$data$Tumor_LogR_segmented[interv] <- median(data$data$Tumor_LogR[interv, 1], na.rm = TRUE)
Expand Down Expand Up @@ -354,6 +360,7 @@ Segment.ASCAT <- function(data = NULL, mingap = 5E+06, smooth.k = NULL, BAF.filt
Start = as.integer(data$data$SNPpos$pos),
End = as.integer(data$data$SNPpos$pos),
Value = data$data$Tumor_LogR_wins[,1],
# Value = data$data$Tumor_LogR[,1],
stringsAsFactors = FALSE)
baf.value <- data.frame(Chr = l2r.chr,
Start = as.integer(data$data$SNPpos$pos),
Expand Down Expand Up @@ -475,12 +482,14 @@ Segment.FACETS <- function(data = NULL, smooth.k = NULL, BAF.filter = .75, homoC
))

## Winsorization
if(!is.null(smooth.k)) {
if(!is.null(smooth.k)) {
tmsg("Smoothing L2R outliers ...")
cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE)
l2r.nona <- !is.na(data$data$Tumor_LogR[[1]])
cndf <- cndf[l2r.nona,]
cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = smooth.k, tau = 1, verbose = FALSE)
data$data$Tumor_LogR[,1] <- cndf.wins[, 3, drop = FALSE]
rm(list = c("cndf", "cndf.wins"))
data$data$Tumor_LogR[l2r.nona,1] <- cndf.wins[, 3, drop = FALSE]
rm(list = c("cndf", "cndf.wins", "l2r.nona"))
}

## BAF filtering
Expand Down Expand Up @@ -624,13 +633,16 @@ Segment.FACETS <- function(data = NULL, smooth.k = NULL, BAF.filter = .75, homoC
tmsg("No recentering.")
} else stop(tmsg("Invalid recentering method called !"), call. = FALSE)

## Winsorization
## Winsorization (for aesthetics)
tmsg("Smoothing L2R (for plots)...")
cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE)
l2r.nona <- !is.na(data$data$Tumor_LogR[[1]])
cndf <- cndf[l2r.nona,]
cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = 5, tau = 1, verbose = FALSE)
data$data$Tumor_LogR_wins <- cndf.wins[, 3, drop = FALSE]
data$data$Tumor_LogR_wins <- data$data$Tumor_LogR
data$data$Tumor_LogR_wins[l2r.nona,] <- cndf.wins[, 3, drop = FALSE]
colnames(data$data$Tumor_LogR_wins) <- samplename
rm(list = c("cndf", "cndf.wins"))
rm(list = c("cndf", "cndf.wins", "l2r.nona"))


## PELT rescue
Expand Down Expand Up @@ -782,6 +794,7 @@ Segment.FACETS <- function(data = NULL, smooth.k = NULL, BAF.filter = .75, homoC
Start = data$data$SNPpos$pos,
End = data$data$SNPpos$pos,
Value = data$data$Tumor_LogR_wins[,1],
# Value = data$data$Tumor_LogR[,1],
stringsAsFactors = FALSE)
# baf.chr <- if(length(grep(pattern = "chr", x = names(cs$chrom2chr), ignore.case = TRUE)) > 0) unlist(cs$chrom2chr[paste0("chr", as.character(data$data$SNPpos$chrs))]) else unlist(cs$chrom2chr[as.character(data$data$SNPpos$chrs)])
baf.value <- data.frame(Chr = l2r.chr,
Expand Down Expand Up @@ -844,6 +857,8 @@ Segment.SEQUENZA <- function(data = NULL, smooth.k = NULL, BAF.filter = .75, hom

calling.method <- tolower(calling.method)

`%do%` <- foreach::"%do%"

if (!is.list(data)) stop(tmsg("data should be a list !"), call. = FALSE)
if (!dir.exists(out.dir)) stop(tmsg(paste0("Output directory [", out.dir, "] does not exist !")), call. = FALSE)
if (!(calling.method %in% c("mad", "density"))) stop(tmsg("calling.method should be 'MAD' or 'density' !"), call. = FALSE)
Expand Down Expand Up @@ -900,9 +915,11 @@ Segment.SEQUENZA <- function(data = NULL, smooth.k = NULL, BAF.filter = .75, hom
if(!is.null(smooth.k)) {
tmsg("Smoothing L2R outliers ...")
cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE)
l2r.nona <- !is.na(data$data$Tumor_LogR[[1]])
cndf <- cndf[l2r.nona,]
cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = smooth.k, tau = 1, verbose = FALSE)
data$data$Tumor_LogR[,1] <- cndf.wins[, 3, drop = FALSE]
rm(list = c("cndf", "cndf.wins"))
data$data$Tumor_LogR[l2r.nona,1] <- cndf.wins[, 3, drop = FALSE]
rm(list = c("cndf", "cndf.wins", "l2r.nona"))
}

## BAF filtering
Expand Down Expand Up @@ -1059,14 +1076,16 @@ Segment.SEQUENZA <- function(data = NULL, smooth.k = NULL, BAF.filter = .75, hom
tmsg("No recentering.")
} else stop(tmsg("Invalid recentering method called !"), call. = FALSE)

## Winsorization
## Winsorization (for aesthetics)
tmsg("Smoothing L2R (for plots)...")
cndf <- data.frame(Chr = rep(unlist(cs$chrom2chr[data$data$chrs]), vapply(data$data$ch, length, 1L)), Position = unlist(data$data$ch), MySample = data$data$Tumor_LogR[[1]], stringsAsFactors = FALSE)
l2r.nona <- !is.na(data$data$Tumor_LogR[[1]])
cndf <- cndf[l2r.nona,]
cndf.wins <- copynumber::winsorize(data = cndf, pos.unit = "bp", method = "mad", k = 5, tau = 1, verbose = FALSE)
data$data$Tumor_LogR_wins <- cndf.wins[, 3, drop = FALSE]
data$data$Tumor_LogR_wins <- data$data$Tumor_LogR
data$data$Tumor_LogR_wins[l2r.nona,] <- cndf.wins[, 3, drop = FALSE]
colnames(data$data$Tumor_LogR_wins) <- samplename
rm(list = c("cndf", "cndf.wins"))

rm(list = c("cndf", "cndf.wins", "l2r.nona"))

## PELT rescue
if (!is.null(SER.pen)) {
Expand Down Expand Up @@ -1099,7 +1118,6 @@ Segment.SEQUENZA <- function(data = NULL, smooth.k = NULL, BAF.filter = .75, hom
tmsg(paste0(" Found ", length(rescued), "."))
if (length(rescued) > seg.maxn) tmsg("WARNING : Many small events found, profile may be noisy ! Consider using 'smooth.k', or for WES data, strengthen low depth filtering !")
data$meta$eacon[["PELT-nseg"]] <- length(rescued)
`%do%` <- foreach::"%do%"
foreach::foreach(re = rescued, .combine = "c") %do% {
interv <- mydf$idx.ori[seg.start[re]]:mydf$idx.ori[seg.end[re]]
data$data$Tumor_LogR_segmented[interv] <- median(data$data$Tumor_LogR[interv, 1], na.rm = TRUE)
Expand Down Expand Up @@ -1217,6 +1235,7 @@ Segment.SEQUENZA <- function(data = NULL, smooth.k = NULL, BAF.filter = .75, hom
Start = data$data$SNPpos$pos,
End = data$data$SNPpos$pos,
Value = data$data$Tumor_LogR_wins[,1],
# Value = data$data$Tumor_LogR[,1],
stringsAsFactors = FALSE)
# baf.chr <- if(length(grep(pattern = "chr", x = names(cs$chrom2chr), ignore.case = TRUE)) > 0) unlist(cs$chrom2chr[paste0("chr", as.character(data$data$SNPpos$chrs))]) else unlist(cs$chrom2chr[as.character(data$data$SNPpos$chrs)])
baf.value <- data.frame(Chr = l2r.chr,
Expand Down Expand Up @@ -1334,6 +1353,7 @@ ASCN.ASCAT <- function(data = NULL, gammaRange = c(.35,.95), nsubthread = 1, clu
cls <- parallel::makeCluster(spec = nsubthread, type = cluster.type, outfile = "")
doParallel::registerDoParallel(cls)
gamma <- 0
`%dopar%` <- foreach::"%dopar%"
fit.val <- as.data.frame(foreach::foreach(gamma = gammavec, .combine = "rbind", .inorder = TRUE) %dopar% {
tmsg(paste0(" gamma = ", gamma))
odirg <- paste0(odir, "/gamma", sprintf("%.2f", gamma))
Expand Down Expand Up @@ -2015,6 +2035,8 @@ Annotate <- function(data = NULL, refGene.table = NULL, targets.table = NULL, re

oridir <- getwd()

`%do%` <- foreach::"%do%"

if (!is.list(data)) stop(tmsg("data should be a list !"), call. = FALSE)

valid.genomes <- get.valid.genomes()
Expand Down
4 changes: 4 additions & 0 deletions R/wes_process.R
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,10 @@ WES.Bin <- function(testBAM = NULL, refBAM = NULL, BINpack = NULL, samplename =
meta.w$SNP.tot.count.ref.summary <- my.summary(SNP.all$tot_count.ref[!is.na(SNP.all$tot_count.ref)])
gc()

## Cleaning uncovered chr levels
CN.all$chr <- droplevels(CN.all$chr)
SNP.all$chr <- droplevels(SNP.all$chr)

WESobj <- list(RD = CN.all, SNP = SNP.all, meta = list(basic = meta.b, WES = meta.w))
rm(CN.all, SNP.all)
gc()
Expand Down

0 comments on commit 9247c92

Please sign in to comment.