Skip to content

Commit

Permalink
gsva template
Browse files Browse the repository at this point in the history
  • Loading branch information
abartlett004 committed Sep 10, 2024
1 parent ef85fdc commit 695962c
Show file tree
Hide file tree
Showing 2 changed files with 217 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@ tests/*
.DS_Store
.quarto
inst/templates/chipseq/QC/QC.html
*.html
216 changes: 216 additions & 0 deletions inst/templates/rnaseq/DE/GSVA.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
---
title: "GSVA"
author: "Harvard Chan Bioinformatics Core"
date: "`r Sys.Date()`"
output:
html_document:
code_folding: hide
df_print: paged
highlights: pygments
number_sections: true
self_contained: true
theme: default
toc: true
toc_float:
collapsed: true
smooth_scroll: true
editor_options:
chunk_output_type: inline
params:
# set column name and contrasts to be factors of interest
column: "sample_type"
contrasts: !r list(c("sample_type", "tumor", "normal"))
project_file: ../information.R
params_file: params_de-example.R
functions_file: ../libs
# if working on o2, select from gene set repository at /n/app/bcbio/platform/gene_sets/20240904
geneset_fn: ~/Downloads/h.all.v2024.1.Hs.entrez.gmt
---
```{r libraries, message = FALSE, warning=FALSE}
# path to libraries if working on O2
# .libPaths("/n/app/bcbio/R4.3.1_rnaseq/")
## load libraries
library(GSVA)
library(GSEABase)
library(reshape2)
library(ChIPpeakAnno)
library(org.Hs.eg.db)
# library(org.Mm.eg.db)
library(AnnotationDbi)
library(DESeq2)
library(limma)
library(gridExtra)
library(bcbioR)
library(ggprism)
library(knitr)
library(rstudioapi)
library(tidyverse)
colors=cb_friendly_cols(1:15)
ggplot2::theme_set(theme_prism(base_size = 14))
opts_chunk[["set"]](
cache = F,
cache.lazy = FALSE,
dev = c("png", "pdf"),
error = TRUE,
highlight = TRUE,
message = FALSE,
prompt = FALSE,
tidy = FALSE,
warning = FALSE,
echo = T,
fig.height = 4)
# set seed for reproducibility
set.seed(1234567890L)
```

```{r}
# This set up the working directory to this file so all files can be found
setwd(fs::path_dir(getSourceEditorContext()$path))
```

```{r load_params, cache = FALSE, message = FALSE, warning=FALSE}
source(params$project_file)
source(params$params_file)
map(list.files(params$functions_file,pattern = "*.R$",full.names = T),source) %>% invisible()
column=params$column
contrasts=params$contrasts
subset_column=params$subset_column
subset_value=params$subset_value
```

```{r sanitize_datatable}
sanitize_datatable = function(df, ...) {
# remove dashes which cause wrapping
DT::datatable(df, ..., rownames=gsub("-", "_", rownames(df)),
colnames=gsub("-", "_", colnames(df)))
}
```

# Overview

- Project: `r project`
- PI: `r PI`
- Analyst: `r analyst`
- Experiment: `r experiment`
- Aim: `r aim`

```{r load_data}
coldata <- load_coldata(coldata_fn, column,
subset_column, subset_value)
coldata[[contrasts[[1]][1]]] = relevel(as.factor(coldata[[contrasts[[1]][1]]]), contrasts[[1]][3])
coldata$sample=row.names(coldata)
counts <- load_counts(counts_fn)
counts <- counts[,colnames(counts) %in% coldata$sample]
```

# Data

```{r show_coldata}
coldata %>% sanitize_datatable()
```

```{r normalize_data}
dds <- DESeqDataSetFromMatrix(counts,
colData = coldata,
design = ~ 1)
dds <- DESeq(dds)
norm_counts <- counts(dds, normalized=TRUE)
```


```{r ensembl_to_entrez}
## convert ensembl to entrez
entrezIDs_all = convert2EntrezID(IDs=rownames(norm_counts), orgAnn="org.Hs.eg.db",
ID_type="ensembl_gene_id")
entrezid <- mapIds(org.Hs.eg.db, keys = rownames(norm_counts), keytype="ENSEMBL", column = "ENTREZID")
counts_entrez <- norm_counts
stopifnot(nrow(counts_entrez) == length(entrezid))
rownames(counts_entrez) <- entrezid
```


# Prep and run GSVA

```{r load_genesets}
gene_sets = read_table(params$geneset_fn, col_names = F)
names(gene_sets)[1:2] <- c('pathway', 'url')
gene_sets_long = gene_sets %>%
dplyr::select(-url) %>%
pivot_longer(!pathway, names_to = 'column_num', values_to = 'entrez_id') %>%
filter(!is.na(entrez_id))
genes_by_pathway <- split(gene_sets_long$entrez_id, gene_sets_long$pathway)
```


```{r GSVA, message = F, warning = F}
gsvaPar <- GSVA::gsvaParam(counts_entrez, genes_by_pathway, kcdf = "Poisson")
gsva.es <- gsva(gsvaPar, verbose = F)
```

## Test for Significance

```{r limma}
mod <- model.matrix(~ factor(coldata[[column]]))
fit <- lmFit(gsva.es, mod)
fit <- eBayes(fit)
res <- topTable(fit, coef=paste0("factor(coldata[[column]])",contrasts[[1]][2]),number=Inf,sort.by="P")
res %>% sanitize_datatable()
```

## Graph top 5 pathways{.tabset}

```{r graph pathways, results = 'asis'}
scores <- t(gsva.es)
sig <- subset(res, res$adj.P.Val < 0.1)
pathways <- rownames(sig)[1:5]
to_graph = data.frame(scores[,pathways]) %>% rownames_to_column('sample') %>%
pivot_longer(!sample, names_to = 'pathway', values_to = 'enrichment_score')
to_graph <- left_join(to_graph, coldata)
for (single_pathway in pathways) {
cat('### ', single_pathway, '\n')
to_graph_single_pathway <- to_graph %>% filter(pathway == single_pathway)
p <- ggplot(to_graph_single_pathway, aes(x = .data[[column]], y = enrichment_score)) + geom_boxplot() +
geom_point(alpha=0.5) + ggtitle(single_pathway)
print(p)
cat('\n\n')
}
```

# R session

List and version of tools used for the QC report generation.

```{r}
sessionInfo()
```

0 comments on commit 695962c

Please sign in to comment.