Skip to content

Commit

Permalink
release v0.8.2
Browse files Browse the repository at this point in the history
Merge branch 'devel' of github.com:broadinstitute/inferCNV


Former-commit-id: 699187c
Former-commit-id: e0ed404
  • Loading branch information
brianjohnhaas committed Nov 8, 2018
2 parents d3ebc5f + 8138feb commit bcceb91
Show file tree
Hide file tree
Showing 15 changed files with 334 additions and 122 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: infercnv
Type: Package
Title: Infer Copy Number Variation from Single-Cell RNA-Seq Data
Version: 0.8.1
Date: 2017-05-25
Version: 0.8.2
Date: 2018-11-08
Authors@R: c( person("Timothy", "Tickle", email = "ttickle@broadinstitute.org", role = c("aut", "cre")), person("Itay", "Tirosh", email = "tirosh@broadinstitute.org", role = "aut"), person("Christophe", "Georgescu", email = "cgeorges@broadinstitute.org", role = "aut"), person("Maxwell", "Brown", email = "mbrown@broadinstitute.org", role = "aut"), person("Brian", "Haas", email = "bhaas@broadinstitute.org", role = "aut"))
Author: Timothy Tickle [aut, cre], Itay Tirosh [aut], Christophe Georgescu [aut], Maxwell Brown [aut], Brian Haas [aut]
Maintainer: Christophe Georgescu <cgeorges@broadinstitute.org>
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ import(RColorBrewer)
import(coin)
import(futile.logger)
importFrom(Matrix,Matrix)
importFrom(Matrix,colSums)
importFrom(Matrix,rowMeans)
importFrom(ape,as.phylo)
importFrom(ape,write.tree)
importFrom(binhf,ansc)
Expand Down
104 changes: 42 additions & 62 deletions R/NextGenHeatMap.R
Original file line number Diff line number Diff line change
Expand Up @@ -84,21 +84,13 @@ Create_NGCHM <- function(infercnv_obj,
}

## set variables
reference_idx = row.names(plot_data[unlist(infercnv_obj@reference_grouped_cell_indices),])
ref_index = infercnv_obj@reference_grouped_cell_indices
ref_groups = names(infercnv_obj@reference_grouped_cell_indices)
reference_idx = row.names(plot_data[unlist(ref_index),])
ref_groups = names(ref_index)

# ---------------------- Import Dendrogram & Order Rows -----------------------------------------------------------------------------------
# IF Cluster By Group is set to TRUE:
# Get the order of the rows (cell lines) from the dendrogram created by infer_cnv
#

## import and read the dendrogram for the observed data created using the ape library
#den_path <- paste(out_dir, "observations_dendrogram.txt", sep=.Platform$file.sep)
#phylo <- ape::read.tree(file = den_path)
# if multiphylo trees, need to iterate to get to labels
#obs_order <- rev(unlist(lapply(1:length(phylo), function(x) phylo[[x]]$tip.label))) # vector holding cell line order taken from the dendrogram


# read the file containing the groupings created by infer_cnv
row_groups_path <- paste(out_dir, "observation_groupings.txt", sep=.Platform$file.sep)
Expand Down Expand Up @@ -213,75 +205,63 @@ Create_NGCHM <- function(infercnv_obj,
display = "visible",
thickness = as.integer(20))

# Covariate bar for annotation groups
# Covariate to identify Reference and Observed data
annotation_col <- as.character(unlist(row_groups["Annotation.Color"])) # group colors
annotation_group <- as.character(unlist(row_groups["Annotation.Group"]))# group number
names(annotation_group) <- cells
names(annotation_col) <- cells
annotation_palette <- get_group_color_palette()(length(unique(annotation_group)))
annotation_unique_group <- unique(annotation_group)
## create color mapping
colMap_annotation <- NGCHM::chmNewColorMap(values = as.vector(annotation_unique_group), # row names are the cells
colors = annotation_palette,
missing.color = "white")
annotation_cov <- NGCHM::chmNewCovariate(fullname = 'Annotation',
values = annotation_group,
value.properties = colMap_annotation,
type = "discrete")
hm <- NGCHM::chmAddCovariateBar(hm, "row", annotation_cov,
display = "visible",
thickness = as.integer(20))
# Covariate to identify Reference and Observed data

cell_type <- replace(row_order, 1:length(row_order) %in% unlist(infercnv_obj@observation_grouped_cell_indices), paste("Observed"))
len <-lengths(ref_index)
ref_bar_labels <- unlist(sapply(1:length(len), function(x){ rep(ref_groups[x],len[x]) }))
names(ref_bar_labels) <- reference_idx

ref_groups = names(infercnv_obj@reference_grouped_cell_indices)
# if you want the exact coloring as the original inferCNV plots
#annotation_palette <- c(get_group_color_palette()(length(ref_index)), get_group_color_palette()(length(annotation_unique_group)))

## Label the references based on index locations
if (length(ref_groups) > 1) {
for(i in 1:length(ref_groups)){
cell_type <- replace(cell_type, infercnv_obj@reference_grouped_cell_indices[[i]], paste("Reference",toString(i),sep = ""))
}
} else {
for(i in 1:length(ref_groups)){
cell_type <- replace(cell_type, 1:length(cell_type) %in% infercnv_obj@reference_grouped_cell_indices[[1]], paste("Reference"))
}
}
# make a new variable for later use that has the cell type and cell ID as the name
## cell ID's need to map to cell types
names(cell_type) <- row_order
# combine reference and observed labels
annotation_group <- c(ref_bar_labels,annotation_group)

# change the observed group names in bar to group namnes
observed_data <- infercnv_obj@observation_grouped_cell_indices
lapply(1:length(observed_data), function(x) {
tmp <- names(observed_data[x])
annotation_group <<- replace(annotation_group, observed_data[[x]], tmp) } )
unique_group <- unique(annotation_group)
annotation_palette <- get_group_color_palette()(length(unique_group))

# check if all reference cells are in cell type
if (!(all(reference_idx %in% names(cell_type)))){
missing_refs <- reference_idx[which(!(reference_idx %in% names(cell_type)))]
# check if all reference cells are included
if (!(all(reference_idx %in% names(annotation_group)))){
missing_refs <- reference_idx[which(!(reference_idx %in% names(annotation_group)))]
error_message <- paste("Error: Not all references are accounted for.",
"Make sure the reference names match the names in the data.\n",
"Check the following reference cell lines: ",
paste(missing_refs, collapse = ","))
stop(error_message)
}
if (!is.null(cell_type)){
## unique group names
types <- unique(cell_type)
## create colors for groups
type_palette <- get_group_color_palette()(length(types))
names(type_palette) <- types

colMap_type <- NGCHM::chmNewColorMap(values = types,
names = types,
colors = type_palette,
missing.color = "white",
type = "linear")

type_cov <- NGCHM::chmNewCovariate(fullname = 'Cell Type',
values = cell_type,
value.properties = colMap_type,
type = "discrete")
hm <- NGCHM::chmAddCovariateBar(hm, "row", type_cov,
display = "visible",
thickness = as.integer(20))
# check if all observed cells are included
observed_idx <- row.names(plot_data[unlist(infercnv_obj@observation_grouped_cell_indices),])
if (!(all(observed_idx %in% names(annotation_group)))){
missing_obs <- reference_idx[which(!(observed_idx %in% names(annotation_group)))]
error_message <- paste("Error: Not all observed cell lines are accounted for.",
"Make sure the reference names match the names in the data.\n",
"Check the following reference cell lines: ",
paste(missing_obs, collapse = ","))
stop(error_message)
}

## create color mapping
colMap_annotation <- NGCHM::chmNewColorMap(values = as.vector(unique_group),
colors = annotation_palette,
missing.color = "white")
annotation_cov <- NGCHM::chmNewCovariate(fullname = 'Annotation',
values = annotation_group,
value.properties = colMap_annotation,
type = "discrete")
hm <- NGCHM::chmAddCovariateBar(hm, "row", annotation_cov,
display = "visible",
thickness = as.integer(20))

#---------------------------------------Export the heat map-----------------------------------------------------------------------------------------------------------------------
## adjust the size of the heat map
#hm@width <- as.integer(500)
Expand Down
2 changes: 1 addition & 1 deletion R/inferCNV_constants.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ C_OUTPUT_FORMAT <- c("pdf", "png")
#' @importFrom ape write.tree as.phylo
#' @importFrom fastcluster hclust
#' @import RColorBrewer
#' @importFrom Matrix Matrix
#' @importFrom Matrix Matrix rowMeans colSums
#' @import coin
#' @importFrom dplyr %>% count

Expand Down
2 changes: 1 addition & 1 deletion R/inferCNV_heatmap.R
Original file line number Diff line number Diff line change
Expand Up @@ -563,7 +563,7 @@ plot_cnv <- function(infercnv_obj,
observation_file_base,
sep=" "))
row.names(obs_data) <- orig_row_names
write.table(obs_data[data_observations$rowInd,data_observations$colInd],
write.table(t(obs_data[data_observations$rowInd,data_observations$colInd]),
file=observation_file_base)
}
}
Expand Down
55 changes: 33 additions & 22 deletions R/inferCNV_ops.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@
#'
#' @param include.spike If true, introduces an artificial spike-in of data at ~0x and 2x for scaling residuals between 0-2. (default: F)
#'
#' @param spike_in_chrs vector listing of chr names to use for modeling spike-ins (default: NULL - uses the two largest chrs. ex. c('chr1', 'chr2') )
#'
#' @param spike_in_multiplier vector of weights matching spike_in_chrs (default: c(0.01, 2.0) for modeling loss/gain of both chrs)
#'
#' @param pseudocount Number of counts to add to each gene of each cell post-filtering of genes and cells and pre-total sum count normalization. (default: 0)
#'
#' @param debug If true, output debug level logging.
Expand Down Expand Up @@ -107,7 +111,7 @@ run <- function(infercnv_obj,
use_zscores=FALSE,
remove_genes_at_chr_ends=FALSE,

mask_nonDE_genes=TRUE,
mask_nonDE_genes=FALSE,
mask_nonDE_pval=0.05,
test.use='wilcoxon',

Expand All @@ -116,7 +120,11 @@ run <- function(infercnv_obj,
debug=FALSE, #for debug level logging

include.spike = FALSE,


# must specify both below if to be used, and must match in vec length
spike_in_chrs = NULL, # use defaults
spike_in_multiplier_vec = NULL, # use defaults

pseudocount = 0

) {
Expand Down Expand Up @@ -202,10 +210,14 @@ run <- function(infercnv_obj,
if (include.spike) {
step_count = step_count + 1
flog.info(sprintf("\n\n\tSTEP %02d: Spiking in genes with variation added for tracking\n", step_count))

if (! (is.null(spike_in_chrs) && is.null(spike_in_multiplier_vec)) ) {
infercnv_obj <- spike_in_variation_chrs(infercnv_obj, spike_in_chrs, spike_in_multiplier_vec)
} else {
infercnv_obj <- spike_in_variation_chrs(infercnv_obj)
}

infercnv_obj <- spike_in_variation_chrs(infercnv_obj)

# Plot incremental steps.
# Plot incremental steps.
if (plot_steps){

infercnv_obj_spiked <- infercnv_obj
Expand Down Expand Up @@ -657,9 +669,6 @@ run <- function(infercnv_obj,
output_filename=sprintf("infercnv.%02d_scaled_by_spike", step_count))
}

# remove the spike now
infercnv_obj <- remove_spike(infercnv_obj)

}


Expand Down Expand Up @@ -697,6 +706,12 @@ run <- function(infercnv_obj,
}
}

if (include.spike) {
# remove the spike before making the final plot.
infercnv_obj <- remove_spike(infercnv_obj)
}


save('infercnv_obj', file=file.path(out_dir, "run.final.infercnv_obj"))

flog.info("Making the final infercnv heatmap")
Expand Down Expand Up @@ -1143,7 +1158,7 @@ center_cell_expr_across_chromosome <- function(infercnv_obj, method="mean") { #

#' @title require_above_min_mean_expr_cutoff ()
#'
#' @description Filters out genes that have fewer than the corresponding mean value across the reference cell values.
#' @description Filters out genes that have fewer than the corresponding mean value across all cell values.
#'
#' @param infercnv_obj infercnv_object
#'
Expand All @@ -1158,10 +1173,8 @@ require_above_min_mean_expr_cutoff <- function(infercnv_obj, min_mean_expr_cutof

flog.info(paste("::above_min_mean_expr_cutoff:Start", sep=""))

# restrict to reference cells:
ref_cells_data <- infercnv_obj@expr.data[ , get_reference_grouped_cell_indices(infercnv_obj) ]

indices <-.below_min_mean_expr_cutoff(ref_cells_data, min_mean_expr_cutoff)
indices <-.below_min_mean_expr_cutoff(infercnv_obj@expr.data, min_mean_expr_cutoff)
if (length(indices) > 0) {
flog.info(sprintf("Removing %d genes from matrix as below mean expr threshold: %g",
length(indices), min_mean_expr_cutoff))
Expand Down Expand Up @@ -1195,7 +1208,7 @@ require_above_min_mean_expr_cutoff <- function(infercnv_obj, min_mean_expr_cutof

#' @title require_above_min_cells_ref()
#'
#' @description Filters out genes that have fewer than specified number of reference cells expressing them.
#' @description Filters out genes that have fewer than specified number of cells expressing them.
#'
#' @param infercnv_obj infercnv_object
#'
Expand All @@ -1207,15 +1220,11 @@ require_above_min_mean_expr_cutoff <- function(infercnv_obj, min_mean_expr_cutof
#'

require_above_min_cells_ref <- function(infercnv_obj, min_cells_per_gene) {

ref_cell_indices = get_reference_grouped_cell_indices(infercnv_obj)

ref_data = infercnv_obj@expr.data[,ref_cell_indices]

ref_genes_passed = which(apply(ref_data, 1, function(x) { sum(x>0 & ! is.na(x)) >= min_cells_per_gene}))
genes_passed = which(apply(infercnv_obj@expr.data, 1, function(x) { sum(x>0 & ! is.na(x)) >= min_cells_per_gene}))

num_genes_total = dim(ref_data)[1]
num_removed = num_genes_total - length(ref_genes_passed)
num_genes_total = dim(infercnv_obj@expr.data)[1]
num_removed = num_genes_total - length(genes_passed)
if (num_removed > 0) {

flog.info(sprintf("Removed %d genes having fewer than %d min cells per gene = %g %% genes removed here",
Expand All @@ -1229,7 +1238,7 @@ require_above_min_cells_ref <- function(infercnv_obj, min_cells_per_gene) {
}


infercnv_obj <- remove_genes(infercnv_obj, -1 * ref_genes_passed)
infercnv_obj <- remove_genes(infercnv_obj, -1 * genes_passed)


}
Expand Down Expand Up @@ -1904,7 +1913,9 @@ anscombe_transform <- function(infercnv_obj) {

}


#' @keywords internal
#' @noRd
#'
add_pseudocount <- function(infercnv_obj, pseudocount) {

flog.info(sprintf("Adding pseudocount: %g", pseudocount))
Expand Down
Loading

0 comments on commit bcceb91

Please sign in to comment.