Skip to content

Latest commit

 

History

History
302 lines (264 loc) · 9.82 KB

20190423_Fluomics.fig2d.md

File metadata and controls

302 lines (264 loc) · 9.82 KB
title author date output
R code to generate Figure 2D
Slim Fourati
23 April, 2019
github_documents

Load required packages

suppressPackageStartupMessages(library(package = "knitr"))
suppressPackageStartupMessages(library(package = "GSEABase"))
suppressPackageStartupMessages(library(package = "EDASeq"))
suppressPackageStartupMessages(library(package = "biomaRt"))
suppressPackageStartupMessages(library(package = "tidyverse"))

Define session options

workDir <- dirname(getwd())
opts_chunk$set(tidy = FALSE, fig.path = "../figure/")
options(stringsAsFactors  = FALSE,
        readr.num_columns = 0)

Load GSEA output

load(file = file.path(workDir, "output/fluomics.gseaOutput.RData"))

Read MSigDB genesets description and identify FAS-related genesets

xmlFile <- file.path(workDir, "utils/msigdb_v6.2.xml")
msig <- getBroadSets(uri = xmlFile)
descDF <- sapply(msig, FUN = description) %>%
  data.frame(DESC = .) %>%
  mutate(NAME = names(msig))

gsNames <- descDF %>%
  filter((grepl(pattern = "FAS_", NAME)))

Convert mouse genes to human genes

humanGenes <- gseaOutput$LEADING_EDGE %>%
  strsplit(",") %>%
  unlist() %>%
  unique()
human <- useMart("ensembl", dataset = "hsapiens_gene_ensembl")
mouse <- useMart("ensembl", dataset = "mmusculus_gene_ensembl")
human2mouse <- getLDS(attributes  = "hgnc_symbol",
                      filters     = "hgnc_symbol",
                      values      = humanGenes,
                      mart        = human,
                      attributesL = c("ensembl_gene_id", "mgi_symbol"),
                      martL       = mouse,
                      uniqueRows  = TRUE)

Extract FAS genesets leading edge

leGenes <- gseaOutput %>%
  filter(NAME %in% gsNames$NAME & `FDR q-val` <= 0.25)
leGenes$LEADING_EDGE %>%
  strsplit(split = ",") %>%
  setNames(nm = leGenes$NAME) %>%
  stack() %>%
  filter(!duplicated(values)) -> leGenes
leGenes <- merge(x    = leGenes,
                 y    = human2mouse,
                 by.x = "values",
                 by.y = "HGNC.symbol")

Load SeqExpressionSet

load(file = file.path(workDir, "output/fluomics.seqSet.RData"))

Perform SLEA

normMat <- normCounts(seqSet)
normMat <- t(scale(t(normMat)))
B <- 1000
gs <- intersect(leGenes$Gene.stable.ID, featureNames(seqSet))
ngenes <- length(gs)
mu <- colMeans(normMat[gs, ])
muPermut <- mclapply(1:B, FUN = function(seed) {
  set.seed(seed = seed)
  muHat <- colMeans(normMat[sample.int(nrow(normMat), ngenes), ],
                    na.rm = TRUE)
  return(value = muHat)
})
muPermut <- do.call(what = rbind, args = muPermut)
zscore <- (mu - colMeans(muPermut)) / apply(muPermut, MARGIN = 2, FUN = sd)
sleaDF <- data.frame(slea = zscore)

Load cibersort output

load(file = file.path(workDir, "output/fluomics.cibersort.RData"))

Plot M1 vs FASE

plotDF <- sleaDF %>%
  rownames_to_column() %>%
  merge(y = pData(seqSet), by.x = "rowname", by.y = "Run") %>%
  merge(y = freqDF, by.x = "rowname", by.y = "Column") %>%
  filter(virus %in% c("H1N1", "H5N1", "mock")) %>%
  mutate(`Macrophages M1` = as.numeric(`Macrophages M1`)) %>%
  select(rowname, slea, `Macrophages M1`, virus, time_point,
					biological_replicate)

fit <- cor.test(formula = ~slea+`Macrophages M1`,
		data    = plotDF,
		method  = "spearman")
 
ggplot(data = plotDF,
       mapping = aes(x = `Macrophages M1`, y = slea)) +
  geom_point(mapping = aes(color = time_point, shape = virus), size = 2) +
  geom_smooth(method = "lm", color = "black", se = FALSE) +
  labs(x     = "Macrophages M1 (CIBERSORT %)",
       y     = "FAS pathway [SLEA z-score]",
       title = paste0("Spearman cor: rho=",
		      signif(fit$estimate, digits = 3),
		      " p=",
		      signif(fit$p.value, digits = 3))) +
  theme_bw()

plot of chunk plot-m1-fas

Plot M1 and FAS over time

plotDF2 <- plotDF %>%
  rename(`FAS pathway (SLEA z-score)`   = slea,
         `Macrophages M1 (CIBERSORT %)` = `Macrophages M1`) %>%
  gather(cname, value, -rowname, -time_point, -virus,
	 -biological_replicate) %>%
  mutate(time_point = factor(time_point),
	 time_point = relevel(time_point, ref = "12h"),
	 time.num   = as.numeric(time_point),
         cname = factor(cname,
                        levels = rev(unique(cname))))
plotDF3 <- plotDF2 %>%
  group_by(virus, cname, time.num) %>%
  summarize(mu = mean(value))

ggplot() +
  geom_line(data = plotDF3,
            mapping = aes(x = time.num, y = mu, color = virus)) +
  geom_jitter(data = plotDF2, mapping = aes(x = time.num, y = value,
                                            color = virus),
              width = 0.1) +
  facet_wrap(facet = ~cname, scale = "free", ncol = 1) +
  scale_x_continuous(breaks = 1:5, labels = levels(plotDF2$time_point)) +
  labs(x = "Time", y = NULL) +
  theme_bw()

plot of chunk over-time

Compare M1 correlation with FAS and FAS at day + 1

plotDF4 <- plotDF2 %>%
  filter(virus != "mock") %>%
  mutate(time.num = ifelse(test = grepl(pattern = "M1", cname),
                           yes  = time.num + 1,
                           no   = time.num)) %>%
  select(-rowname, -time_point) %>%
  spread(cname, value)
cor.test(formula = ~slea+`Macrophages M1`,
	 data    = filter(plotDF, virus != "mock" &
				    time_point != "12h"),
	 method  = "spearman")
## 
## 	Spearman's rank correlation rho
## 
## data:  slea and Macrophages M1
## S = 562.49, p-value = 1.974e-05
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.7554398
cor.test(formula = ~`FAS pathway (SLEA z-score)` +
	   `Macrophages M1 (CIBERSORT %)`,
         data    = plotDF4,
         use     = "pair",
         method  = "spearman")
## 
## 	Spearman's rank correlation rho
## 
## data:  FAS pathway (SLEA z-score) and Macrophages M1 (CIBERSORT %)
## S = 494.26, p-value = 5.534e-06
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.7851031
ggplot(data = plotDF4,
       mapping = aes(x = `Macrophages M1 (CIBERSORT %)`,
		     y = `FAS pathway (SLEA z-score)`)) +
  geom_point(mapping = aes(shape = virus, color = factor(time.num)),
	     size = 2) +
  stat_smooth(method = "lm", se = FALSE, color = "black") +
  scale_color_discrete(name = "Time",
		       labels = c("12h", "01d", "02d", "03d", "04d")) +
  labs(x = "Macrophages M1 (CIBERSORT %)",
       y = "FAS pathway @ T+1 (SLEA z-score) ") +
  theme_bw()

plot of chunk t-vs-tm1

Print session info

sessionInfo()
## R version 3.5.3 (2019-03-11)
## Platform: x86_64-apple-darwin18.2.0 (64-bit)
## Running under: macOS Mojave 10.14.4
## 
## Matrix products: default
## BLAS/LAPACK: /usr/local/Cellar/openblas/0.3.5/lib/libopenblasp-r0.3.5.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats4    parallel  stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] forcats_0.4.0               stringr_1.4.0              
##  [3] dplyr_0.8.0.1               purrr_0.3.2                
##  [5] readr_1.3.1                 tidyr_0.8.3                
##  [7] tibble_2.1.1                ggplot2_3.1.0              
##  [9] tidyverse_1.2.1             biomaRt_2.38.0             
## [11] EDASeq_2.16.3               ShortRead_1.40.0           
## [13] GenomicAlignments_1.18.1    SummarizedExperiment_1.12.0
## [15] DelayedArray_0.8.0          matrixStats_0.54.0         
## [17] Rsamtools_1.34.1            GenomicRanges_1.34.0       
## [19] GenomeInfoDb_1.18.2         Biostrings_2.50.2          
## [21] XVector_0.22.0              BiocParallel_1.16.6        
## [23] GSEABase_1.44.0             graph_1.60.0               
## [25] annotate_1.60.1             XML_3.98-1.19              
## [27] AnnotationDbi_1.44.0        IRanges_2.16.0             
## [29] S4Vectors_0.20.1            Biobase_2.42.0             
## [31] BiocGenerics_0.28.0         knitr_1.22                 
## 
## loaded via a namespace (and not attached):
##  [1] nlme_3.1-137           bitops_1.0-6           lubridate_1.7.4       
##  [4] bit64_0.9-7            RColorBrewer_1.1-2     progress_1.2.0        
##  [7] httr_1.4.0             tools_3.5.3            backports_1.1.3       
## [10] R6_2.4.0               DBI_1.0.0              lazyeval_0.2.2        
## [13] colorspace_1.4-1       withr_2.1.2            tidyselect_0.2.5      
## [16] prettyunits_1.0.2      curl_3.3               bit_1.1-14            
## [19] compiler_3.5.3         cli_1.1.0              rvest_0.3.2           
## [22] xml2_1.2.0             labeling_0.3           rtracklayer_1.42.2    
## [25] scales_1.0.0           genefilter_1.64.0      DESeq_1.34.1          
## [28] digest_0.6.18          R.utils_2.8.0          pkgconfig_2.0.2       
## [31] highr_0.7              rlang_0.3.1            readxl_1.3.1          
## [34] rstudioapi_0.9.0       RSQLite_2.1.1          generics_0.0.2        
## [37] hwriter_1.3.2          jsonlite_1.6           R.oo_1.22.0           
## [40] RCurl_1.95-4.12        magrittr_1.5           GenomeInfoDbData_1.2.0
## [43] Matrix_1.2-15          Rcpp_1.0.1             munsell_0.5.0         
## [46] R.methodsS3_1.7.1      stringi_1.4.3          zlibbioc_1.28.0       
## [49] plyr_1.8.4             grid_3.5.3             blob_1.1.1            
## [52] crayon_1.3.4           lattice_0.20-38        haven_2.1.0           
## [55] splines_3.5.3          GenomicFeatures_1.34.6 hms_0.4.2             
## [58] pillar_1.3.1           geneplotter_1.60.0     glue_1.3.1            
## [61] evaluate_0.13          latticeExtra_0.6-28    modelr_0.1.4          
## [64] cellranger_1.1.0       gtable_0.2.0           assertthat_0.2.0      
## [67] xfun_0.5               aroma.light_3.12.0     xtable_1.8-3          
## [70] broom_0.5.1            survival_2.43-3        memoise_1.1.0