-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
62 changed files
with
7,450 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
Package: KlebanoffMR4050 | ||
Type: Package | ||
Title: Single-Cell-Sequencing Data Generated by The Klebanoff Lab for Sample MR4050 | ||
Version: 0.1.1 | ||
Author: c(person("Friederike", "Dündar", email = "frd2007@med.cornell.edu", role = c("aut","cre")), | ||
person("Paul","Zumbo", email="paz2005@med.cornell.edu", role = c("aut","ctb"))) | ||
Maintainer: Friederike Dündar <frd2007@med.cornell.edu> | ||
Description: SingleCellExperiment object and list of differentially expressed genes | ||
as determined using 5'-DGE and V(D)J sequencing of tumor-antigen-specific T cells | ||
and corresponding control cells; all obtained from donor MR4050 (breast | ||
cancer patient MK-02). The tumor-specific antigen is a mutant form of PIK3CA. | ||
Depends: | ||
R (>= 3.5.0) | ||
Imports: | ||
BiocFileCache, | ||
data.table, | ||
EnsDb.Hsapiens.v86, | ||
ggplot2, | ||
magrittr, | ||
scater, | ||
SingleCellExperiment | ||
Suggests: | ||
stringr, | ||
usethis | ||
License: MIT | ||
Encoding: UTF-8 | ||
LazyData: true | ||
RoxygenNote: 7.1.1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
export(check_columns) | ||
export(extract_markers) | ||
export(load_DE_results) | ||
export(load_MR4050filt) | ||
export(load_MR4050merged) | ||
export(load_MR4050shared) | ||
export(load_RDSdata_from_Box) | ||
export(load_data_from_Box) | ||
export(load_sce) | ||
export(make_long_dt) | ||
export(my_table) | ||
export(plot_tgram) | ||
export(prep_data4tgram) | ||
export(run_DE) | ||
import(data.table) | ||
import(kableExtra) | ||
import(magrittr) | ||
import(scater) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
#' Load the list of DE test results (any direction) for all clonotypes present | ||
#' in both conditions ('shared' clonotypes) | ||
#' | ||
#' @details The p- and q-values here represent a two-tailed test for any direction | ||
#' of the logFC. | ||
#' For details on how the DE analysis was done, see the vignette "DE_genes" | ||
#' and the wrapper function \code{\link{run_DE}}. | ||
#' | ||
#' @format Nested list where names correspond to the abbreviated clonotype IDs. | ||
#' For every clonotype, there's a list that contains: | ||
#' \describe{ | ||
#' \item{findMarkers_results:}{A SimpleDataFrameList, i.e. the original result | ||
#' of \code{scran::findMarkers()}, but only for the MUT comparison} | ||
#' \item{marker_IDs:}{a data.table with the genes that passed the FDR threshold; | ||
#' if that is NULL, this implies that there were no DEG for that particular | ||
#' clonotype comparing MUT vs WT} | ||
#' } | ||
#' @usage load_DE_results("MR4050") | ||
#' @examples \dontrun{ | ||
#' library(KlebanoffMR4050) | ||
#' | ||
#' sce.shared <- load_MR4050shared() | ||
#' sce.shared$antigen <- factor(gsub("\\..*","",sce.shared$Sample), | ||
#' levels = c("WT", "MUT"), ordered = TRUE) | ||
#' | ||
#' delist.both <- lapply( unique(sce.shared$id), function(x){ | ||
#' run_DE( | ||
#' sce.shared[, sce.shared$id == x], | ||
#' group_identifier = "antigen", | ||
#' direction = "any", | ||
#' FDR = 0.05, rank = Inf, | ||
#' comp_name = paste0(x, "_")) | ||
#' }) | ||
#' names(delist.both) <- unique(sce.shared$id) | ||
#' } | ||
#' | ||
#'@export | ||
#' | ||
load_DE_results <- function(sample = "MR4050"){ | ||
fn <- system.file("extdata", "delist.both", | ||
package = paste0("Klebanoff", sample), mustWork = TRUE) | ||
|
||
fin <- read.table(fn, stringsAsFactor = FALSE)[[1]] | ||
load_data_from_Box(fin, load_rds = FALSE) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,177 @@ | ||
#' Load the SCE for the batch-corrected merged data set | ||
#' | ||
#' @description Load the SingleCellExperiment object that holds the batch-corrected | ||
#' reduced dimensionality results (MUT/WT = batch). | ||
#' | ||
#' @usage sce.filt <- load_MR4050filt() | ||
#' | ||
#' @seealso \code{\link{load_MR4050shared}}, \code{\link{load_MR4050merged}} | ||
#' @return an SCE object that needs to be assigned to an object in the environment | ||
#' | ||
#' @export | ||
#' | ||
load_MR4050filt <- function(){ | ||
out <- load_sce(which_assays = "all", sample = "MR4050") | ||
|
||
} | ||
|
||
#' Load the SCE for the batch-corrected merged data set | ||
#' | ||
#' @description Load the SingleCellExperiment object that holds the batch-corrected | ||
#' reduced dimensionality results (MUT/WT = batch). | ||
#' | ||
#' @usage sce.merged <- load_MR4050merged() | ||
#' @return an SCE object that needs to be assigned to an object in the environment | ||
#' | ||
#' @export | ||
#' | ||
load_MR4050merged <- function(){ | ||
out <- load_sce(which_assays = "reconstructed", sample = "MR4050Merged") | ||
return(out) | ||
} | ||
|
||
#' Load the SCE for the batch-corrected merged data set | ||
#' | ||
#' @description Load the SingleCellExperiment object that holds the SCE | ||
#' representing cells with clonotypes that are present in both conditions. | ||
#' The UMAP coordinates were re-calculated on the reduced subset after removal | ||
#' of suspected doublets (see the vignette about the filtering). | ||
#' | ||
#' @return an SCE object that needs to be assigned to an object in the environment | ||
#' | ||
#' @usage sce.shared <- load_MR4050shared() | ||
#' | ||
#' @export | ||
load_MR4050shared <- function(){ | ||
out <- load_sce(which_assays = "logcounts", sample = "MR4050Shared") | ||
return(out) | ||
} | ||
|
||
|
||
#' Filtered cells ' ' @format Named list of cell numbers following the filtering steps | ||
#described in this vignette. | ||
"cell_filt" | ||
|
||
#' Filtered genes ' ' @format Named list of gene numbers following the filtering steps | ||
#described in this vignette. | ||
"gene_filt" | ||
|
||
|
||
#' Load the filtered and processed SingleCellExperiment object | ||
#' | ||
#' @description Use this function to load the processed and filtered gene expression | ||
#' data plus the clonotype information stored within one SingleCellExperiment | ||
#' object. | ||
#' | ||
#' | ||
#' @param which_assays can be "all" (default) or individual assays, e.g. c("logcounts", | ||
#' "counts") etc. If space and memory are problematic, definitely limit the selection here! | ||
#' assays that are available are: "counts", "logcounts" | ||
#' @param ... Additional parameters passed on to \code{load_RDSdata_from_Box}, e.g. | ||
#' \code{check_for_updates = TRUE} | ||
#' | ||
#' @details | ||
#' For the entire code of the filtering and processing, see the vignette | ||
#' \code{01_processing.Rmd}. | ||
#' | ||
#' The resulting SCE object contains the usual content: colData with information | ||
#' about individidual cells, rowData with info about individual genes, reducedDims, | ||
#' etc. | ||
#' | ||
#' The \code{colData} includes: | ||
#' | ||
#' \describe{ | ||
#' \item{Barcode:}{Each cell's barcode used for keeping track of its identity during sequencing.} | ||
#' \item{Sample:}{'WT' or 'MUT'} | ||
#' \item{raw_clonotype_id:}{e.g. 'clonotype94'} | ||
#' \item{cdr3s_aa:}{The amino acid sequence of the CDR3 portion, e.g. "TRA:CIARGGGGADGLTF;TRA:CGADRNGNEKLTF;TRB:CASSLTTDREPYEQYF"} | ||
#' \item{multiTRA:}{TRUE/FALSE entries based on whether \code{cdr3s_aa} contained more than one entry for TRA} | ||
#' \item{multiTRB:}{TRUE/FALSE entries based on whether \code{cdr3s_aa} contained more than one entry for TRB} | ||
#' \item{numTRA:}{Number of TRA sequences within \code{cdr3s_aa}} | ||
#' \item{numTRB:}{Number of TRB sequences within \code{cdr3s_aa}} | ||
#' \item{cluster:}{clustering results of all cells} | ||
#' } | ||
#' | ||
#' The object also containes the coordinates from dimensionality reductions (see | ||
#' examples for more details). | ||
#' | ||
#' | ||
#' @usage sce.filt <- load_sce(which_assays = "logcounts", sample = "MR4050") | ||
#' | ||
#' @return A SingleCellExperiment object with cells from 'mutant' samples | ||
#' (stimulation with tumor antigen) and from the 'wt' sample (stimulation | ||
#' with an irrelevant antigen)). | ||
#' | ||
#' | ||
#' @examples \dontrun{ | ||
#' | ||
#' > library(SingleCellExperiment) | ||
#' > sce.MR4050 <- load_sce(which_assays = "all", sample = "MR4050") | ||
#' | ||
#' > reducedDimNames(sce.MR4050) | ||
#' "corrected" "TSNE" "UMAP" | ||
#' | ||
#' > assayNames(sce.MR4050) | ||
#' [1] "counts" "logcounts" | ||
#' } | ||
#' | ||
#' @return SCE object | ||
#' | ||
#' | ||
#' @export | ||
#' | ||
load_sce <- function(which_assays = "all", sample = "Sample", ...){ | ||
|
||
## the Box links are noted in the text file | ||
fl <- system.file("extdata", paste0("sce_storage_", sample, ".txt"), | ||
package = "KlebanoffMR4050") | ||
if(fl == ""){stop(paste("sce_storage_", sample, ".txt does not exist in package 'KlebanoffMR4050'."))} | ||
|
||
inf <- read.table(fl,stringsAsFactors = FALSE) | ||
|
||
if(unique(inf$V3) != sample){stop("The sce_storage.txt file must contain a third column holding the sample name. Which should be the same as the one specified via sample = .")} | ||
|
||
## DOWNLOAD AND CACHE THE FILES FROM THE BOX ============================== | ||
## note: using the default cache of BioC here, we may want to change that | ||
## to something more specific via the `cache_path` option of `load_RDSdata_fromBox()` | ||
|
||
## load colData | ||
cold <- load_RDSdata_from_Box( | ||
shared_link = inf[inf$V1 == "colData",]$V2, data_name = paste0("KlebColData",sample), ...) | ||
|
||
## load rowData | ||
rowd <- load_RDSdata_from_Box( | ||
shared_link = inf[inf$V1 == "rowData",]$V2, data_name = paste0("KlebRowData", sample) , ...) | ||
|
||
## get reducedDims | ||
rdms <- load_RDSdata_from_Box( | ||
shared_link = inf[inf$V1 == "reducedDims",]$V2, data_name = paste0("KlebRedDims", sample), ... ) | ||
|
||
## metadata | ||
metd <- load_RDSdata_from_Box( | ||
shared_link = inf[inf$V1 == "metadata",]$V2, data_name = paste0("KlebMetadata", sample), ... ) | ||
|
||
## get assayData | ||
if(which_assays == "all"){ | ||
## extract corresponding assay entry from the text file | ||
asss <- grep("^assay:", unique(inf$V1), value = TRUE) | ||
}else{ | ||
asss <- unlist(lapply(which_assays, function(x) grep(paste0(":",x,"$"), unique(inf$V1), ignore.case = TRUE, value=TRUE))) | ||
if(length(which_assays) == 0){ | ||
warning("None of the assays you specified are part of the file stored in inst/extdata, i.e. we can't find the links.") | ||
} | ||
} | ||
|
||
assl <- list() | ||
for(i in asss){ | ||
j <- gsub("^assay:","", i) | ||
assl[[j]] <- load_RDSdata_from_Box( | ||
shared_link = inf[inf$V1 == i,]$V2, data_name = paste0("Kleb",j, sample), ...) | ||
} | ||
|
||
## construct the SCE object ============================================= | ||
return(SingleCellExperiment::SingleCellExperiment(assays = assl, | ||
colData = cold, rowData = rowd, | ||
metadata = metd, reducedDims = rdms)) | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
#' Shared clonotypes | ||
#' | ||
#' @description \code{data.table} of the clonotypes that are found in both | ||
#' conditions MUT and WT in the MR4050 data set. | ||
#' | ||
#' @usage data(shared_clonotypes) | ||
#' | ||
#' @examples \dontrun{ | ||
#' | ||
#' ## count TRA/TRB | ||
#' clono_freq <- colData(sce.filt)[, c("Sample" ,"cdr3s_aa")] %>% | ||
#' as.data.frame %>% data.table(., keep.rownames = TRUE) %>% | ||
#' .[!is.na(cdr3s_aa), .N, c("cdr3s_aa","Sample")] | ||
#' setorder(clono_freq, N) | ||
#' | ||
#' ## formatting the TRA/TRB notations | ||
#' ## will only work if there's just one TRA | ||
#' ct <- dcast(clono_freq, cdr3s_aa ~ Sample, value.var = "N") %>% | ||
#' .[!is.na(MUT.MR4050) & !is.na(WT.MR4050)] | ||
#' | ||
#' ct[, TRA := gsub(";*TRB:[A-Z]+", "", cdr3s_aa)] | ||
#' ct[, TRA := ifelse(TRA == "", NA, TRA)] | ||
#' ct[, TRB := gsub(".*(TRB:[A-Z]+)", "\\1", cdr3s_aa)] | ||
#' ct[, TRB := ifelse(grepl("^TRA", TRB), NA, TRB)] # if only TRB was present, | ||
#' I need to fill in the NA | ||
#' setorder(ct, -MUT.MR4050, -WT.MR4050 ) | ||
#' shared_clonotypes <- copy(ct) | ||
#' } | ||
#' | ||
#' @seealso \code{clonotype_ids} | ||
"shared_clonotypes" | ||
|
||
|
||
|
||
#' Table of customized clonotype IDs for sample MR4050 | ||
#' | ||
#' @description \code{data.table} with our customized clonotype IDs for ease of | ||
#' visualization and comparison. I.e., the CDR3s amino acid sequences are re- | ||
#' placed with arbitrary IDs. Note thate these clonotypes are those that are | ||
#' found in both conditions of patient MR4050, i.e. MUT and WT. | ||
#' | ||
#' @details See section "Adding the clonotype IDs" in the vignette "Filtering and Processing" | ||
#' about how the consolidation and clean up of the TRA/TRB sequences was done. | ||
#' | ||
#' @seealso \code{shared_clonotypes} | ||
"clonotype_ids" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
#' Reading in clonotype info per barcode | ||
#' | ||
#' @details This function expects the files "filtered_contig_annotations.csv" and | ||
#' "clonotypes.csv" to be found in the "outs" folder after following \code{cellRanger_result_dir}. | ||
#' These files are used to match the cell barcodes with unique clonotype IDs. | ||
#' | ||
#' @param cellRanger_result_dir e.g. "/scratchLocal/frd2007/2018-11_Smita_Tcells/data/wildtype/". | ||
#' This should be the path to the un-tar'ed directory from cellRanger. | ||
#' | ||
#' @export | ||
#' | ||
reading_in_clonotypes <- function(cellRanger_result_dir){ | ||
|
||
## reading barcodes (including empty ones) | ||
bcs <- fread(paste0(cellRanger_result_dir,"outs/filtered_contig_annotations.csv"), sep = ",") | ||
|
||
#> head(barcode_csv) | ||
#barcode is_cell contig_id high_confidence length | ||
#1 AAACCTGAGTGACATA-1 True AAACCTGAGTGACATA-1_contig_1 True 568 | ||
#2 AAACCTGAGTGACATA-1 True AAACCTGAGTGACATA-1_contig_3 True 567 | ||
#3 AAACCTGAGTGACATA-1 True AAACCTGAGTGACATA-1_contig_4 True 712 | ||
#4 AAACCTGCACCGCTAG-1 True AAACCTGCACCGCTAG-1_contig_1 True 564 | ||
#5 AAACCTGCACCGCTAG-1 True AAACCTGCACCGCTAG-1_contig_2 True 584 | ||
#6 AAACCTGCACTTCTGC-1 True AAACCTGCACTTCTGC-1_contig_1 True 569 | ||
#chain v_gene d_gene j_gene c_gene full_length productive | ||
#1 TRA TRAV19 None TRAJ50 TRAC True True | ||
#2 TRA TRAV13-1 None TRAJ45 TRAC True None | ||
#3 TRB TRBV19 TRBD2 TRBJ2-7 TRBC2 True True | ||
#4 TRB TRBV9 TRBD2 TRBJ2-6 TRBC2 True True | ||
#5 TRA TRAV12-2 None TRAJ44 TRAC True True | ||
#6 TRA TRAV25 None TRAJ54 TRAC True True | ||
#cdr3 cdr3_nt | ||
#1 CALSSMKTSYDKVIF TGTGCTCTGAGTAGCATGAAAACCTCCTACGACAAGGTGATATTT | ||
#2 None None | ||
#3 CATSLALAAYEQYF TGTGCCACGAGTCTGGCGCTAGCGGCGTACGAGCAGTACTTC | ||
#4 CASSGLAGGPVSGANVLTF TGTGCCAGCAGCGGACTAGCGGGAGGGCCCGTTTCTGGGGCCAACGTCCTGACTTTC | ||
#5 CAGNTGTASKLTF TGTGCCGGAAATACCGGCACTGCCAGTAAACTCACCTTT | ||
#6 CAGLQGAQKLVF TGTGCAGGCCTTCAGGGAGCCCAGAAGCTGGTATTT | ||
#reads umis raw_clonotype_id raw_consensus_id | ||
#1 2511 7 clonotype146 clonotype146_consensus_1 | ||
#2 679 2 clonotype146 None | ||
#3 850 4 clonotype146 clonotype146_consensus_2 | ||
#4 10237 36 clonotype1 clonotype1_consensus_1 | ||
#5 3470 10 clonotype1 clonotype1_consensus_2 | ||
#6 1780 7 clonotype147 clonotype147_consensus_1 | ||
|
||
## extract those with a clonotype | ||
bcs.sub <- bcs[ raw_clonotype_id != "None", c("raw_clonotype_id","barcode"), with=FALSE] %>% unique | ||
|
||
## reading clonotypes | ||
clotp <- fread(paste0(cellRanger_result_dir, "outs/clonotypes.csv"), sep = ",") | ||
setnames(clotp, "clonotype_id", "raw_clonotype_id") | ||
|
||
## join the informationbiocon | ||
jnd <- merge(clotp, bcs.sub, by = "raw_clonotype_id", all = TRUE) | ||
|
||
## sanity check | ||
tst <- jnd[raw_clonotype_id == "clonotype1", .N] == unique(jnd[raw_clonotype_id == "clonotype1"]$frequency) | ||
if(isFALSE(tst)){ | ||
warning("The number of rows for clonotype1 does not match the frequency taken from the clonotypes.csv file.") | ||
} | ||
|
||
return(jnd) | ||
} |
Oops, something went wrong.