diff --git a/Demonstrations/.Rhistory b/Demonstrations/.Rhistory new file mode 100644 index 0000000..b74126c --- /dev/null +++ b/Demonstrations/.Rhistory @@ -0,0 +1,512 @@ +print("Not enough intervals to test") +return(list(NA, NA, NA, NA)) +} +data_plot_ratio <- data_plot_ratio[intervals_test, ] +x_test <- as.double(data_plot_ratio$Interval) +y1_test <- logit(data_plot_ratio$FC_F0) +y2_test <- logit(data_plot_ratio$FC_F1) +X_augmented = as.matrix(data.frame(c(x_test, x_test), c(rep(0, length(x_test)), rep(1, length(x_test))))) +Y_augmented = as.matrix(data.frame(c(y1_test, y2_test), c(rep(0, length(x_test)), rep(1, length(x_test))))) +not_exclude <- !is.infinite(Y_augmented[,1]) +if(sum(!not_exclude) / length(not_exclude) > 0.1){ +print("Warning: More than 10% of values are NA") +} +X_augmented_run <- X_augmented[not_exclude, ] +Y_augmented_run <- Y_augmented[not_exclude, ] +tryCatch({ +gp_res_parent_constant = py$fit_vanilla_4(X_augmented_run, Y_augmented_run, model = "conserved") +gp_res_parent_cis_static = py$fit_vanilla_4(X_augmented_run, Y_augmented_run, model = "cis_static") +gp_res_parent_cis_dynamic = py$fit_vanilla_4(X_augmented_run, Y_augmented_run, model = "cis_dynamic") +gp_res_parent_trans_static = py$fit_vanilla_4(X_augmented_run, Y_augmented_run, model = "trans_static") +gp_res_parent_trans_dynamic = py$fit_vanilla_4(X_augmented_run, Y_augmented_run, model = "trans_dynamic") +return(list(gp_res_parent_constant, gp_res_parent_cis_static, gp_res_parent_cis_dynamic, gp_res_parent_trans_static, +gp_res_parent_trans_dynamic)) +}, error=function(cond) { +return(NA) +}) +} +allelic_ratios +run_coreg_gp_python_expressed_intervals_new("Dnajc2") +allelic_ratios +ratios_all_genes <- readRDS("./Scripts/Demonstrations/normalized_ratios_trans.rds") +ratios_all_genes <- readRDS("./normalized_ratios_trans.rds") +ratios_all_genes +lapply(ratios_all_genes, function(x){x[c("Ddt", "Prm1"), ]}) +lapply(ratios_all_genes, function(x){x[c("Ddt", "Tnp2"), ]}) +saveRDS(lapply(ratios_all_genes, function(x){x[c("Ddt", "Tnp2"), ]}), "./test_ratios_trans.rds") +allelic_ratios <- readRDS("./test_ratios_trans.rds") +allelic_ratios +# ratios_all_genes <- readRDS("./normalized_ratios_trans.rds") +# saveRDS(lapply(ratios_all_genes, function(x){x[c("Ddt", "Tnp2"), ]}), "./test_ratios_trans.rds") +total_exp <- readRDS("./Data/processed/total_exp.rds") +# ratios_all_genes <- readRDS("./normalized_ratios_trans.rds") +# saveRDS(lapply(ratios_all_genes, function(x){x[c("Ddt", "Tnp2"), ]}), "./test_ratios_trans.rds") +total_exp <- readRDS("../../Data/processed/total_exp.rds") +total_exp +ratios_all_genes <- readRDS("./normalized_ratios_trans.rds") +saveRDS(lapply(ratios_all_genes, function(x){x[c("Dnajc2", "Ddt", "Tnp2"), ]}), "./test_ratios_trans.rds") +total_exp <- readRDS("../../Data/processed/total_exp.rds") +total_exp <- total_exp[c("Dnajc2", "Ddt", "Tnp2"), ] +saveRDS(total_exp, "./test_counts_trans.rds") +allelic_ratios <- readRDS("./test_ratios_trans.rds") +total_exp <- readRDS("./test_counts_trans.rds") +plot_cis_trans_gene_mean("Dnajc2") +plot_cis_trans_gene_mean +plot_cis_trans_gene_mean("Ddt") +plot_cis_trans_gene_mean("Tnp2") +# Not run -- subset data on genes to show +ratios_all_genes <- readRDS("./normalized_ratios_trans.rds") +saveRDS(lapply(ratios_all_genes, function(x){x[c("Dnajc2", "Ddt", "Tex21"), ]}), "./test_ratios_trans.rds") +total_exp <- readRDS("../../Data/processed/total_exp.rds") +total_exp <- total_exp[c("Dnajc2", "Ddt", "Tex21"), ] +saveRDS(total_exp, "./test_counts_trans.rds") +allelic_ratios <- readRDS("./test_ratios_trans.rds") +total_exp <- readRDS("./test_counts_trans.rds") +plot_cis_trans_gene_mean("Tex21") +# Not run -- subset data on genes to show +ratios_all_genes <- readRDS("./normalized_ratios_trans.rds") +saveRDS(lapply(ratios_all_genes, function(x){x[c("Dnajc2", "Ddt", "Tex21"), ]}), "./test_ratios_trans.rds") +total_exp <- readRDS("../../Data/processed/total_exp.rds") +total_exp <- total_exp[c("Dnajc2", "Ddt", "Tex21"), ] +saveRDS(total_exp, "./test_counts_trans.rds") +allelic_ratios <- readRDS("./test_ratios_trans.rds") +total_exp <- readRDS("./test_counts_trans.rds") +plot_cis_trans_gene_mean("Tex21") +allelic_ratios <- readRDS("./test_ratios_trans.rds") +total_exp <- readRDS("./test_counts_trans.rds") +run_coreg_gp_python_expressed_intervals_new <- function(gene, model = "full"){ +rs_f0_noStab <- allelic_ratios[[1]] +rs_f1_noStab <- allelic_ratios[[2]] +data_plot_ratio <- data.frame( +FC_F0 = rs_f0_noStab[gene, ], +FC_F1 = rs_f1_noStab[gene, ], +Interval = 1:ncol(rs_f0_noStab) +) +# get intervals in which gene is expressed +intervals_test <- total_exp[gene, ] > 0.5 +print(table(intervals_test)) +if (sum(intervals_test) < 5){ +print("Not enough intervals to test") +return(list(NA, NA, NA, NA)) +} +data_plot_ratio <- data_plot_ratio[intervals_test, ] +x_test <- as.double(data_plot_ratio$Interval) +y1_test <- logit(data_plot_ratio$FC_F0) +y2_test <- logit(data_plot_ratio$FC_F1) +X_augmented = as.matrix(data.frame(c(x_test, x_test), c(rep(0, length(x_test)), rep(1, length(x_test))))) +Y_augmented = as.matrix(data.frame(c(y1_test, y2_test), c(rep(0, length(x_test)), rep(1, length(x_test))))) +not_exclude <- !is.infinite(Y_augmented[,1]) +if(sum(!not_exclude) / length(not_exclude) > 0.1){ +print("Warning: More than 10% of values are NA") +} +X_augmented_run <- X_augmented[not_exclude, ] +Y_augmented_run <- Y_augmented[not_exclude, ] +tryCatch({ +gp_res_parent_constant = py$fit_vanilla_4(X_augmented_run, Y_augmented_run, model = "conserved") +gp_res_parent_cis_static = py$fit_vanilla_4(X_augmented_run, Y_augmented_run, model = "cis_static") +gp_res_parent_cis_dynamic = py$fit_vanilla_4(X_augmented_run, Y_augmented_run, model = "cis_dynamic") +gp_res_parent_trans_static = py$fit_vanilla_4(X_augmented_run, Y_augmented_run, model = "trans_static") +gp_res_parent_trans_dynamic = py$fit_vanilla_4(X_augmented_run, Y_augmented_run, model = "trans_dynamic") +return(list(gp_res_parent_constant, gp_res_parent_cis_static, gp_res_parent_cis_dynamic, gp_res_parent_trans_static, +gp_res_parent_trans_dynamic)) +}, error=function(cond) { +return(NA) +}) +} +allelic_ratios +run_coreg_gp_python_expressed_intervals_new("Ddt") +gp_results <- lapply(rownames(allelic_ratios), run_coreg_gp_python_expressed_intervals_new) +run_coreg_gp <- function(gene, model = "full"){ +rs_f0_noStab <- allelic_ratios[[1]] +rs_f1_noStab <- allelic_ratios[[2]] +data_plot_ratio <- data.frame( +FC_F0 = rs_f0_noStab[gene, ], +FC_F1 = rs_f1_noStab[gene, ], +Interval = 1:ncol(rs_f0_noStab) +) +# get intervals in which gene is expressed +intervals_test <- total_exp[gene, ] > 0.5 +print(table(intervals_test)) +if (sum(intervals_test) < 5){ +print("Not enough intervals to test") +return(list(NA, NA, NA, NA)) +} +data_plot_ratio <- data_plot_ratio[intervals_test, ] +x_test <- as.double(data_plot_ratio$Interval) +y1_test <- logit(data_plot_ratio$FC_F0) +y2_test <- logit(data_plot_ratio$FC_F1) +X_augmented = as.matrix(data.frame(c(x_test, x_test), c(rep(0, length(x_test)), rep(1, length(x_test))))) +Y_augmented = as.matrix(data.frame(c(y1_test, y2_test), c(rep(0, length(x_test)), rep(1, length(x_test))))) +not_exclude <- !is.infinite(Y_augmented[,1]) +if(sum(!not_exclude) / length(not_exclude) > 0.1){ +print("Warning: More than 10% of values are NA") +} +X_augmented_run <- X_augmented[not_exclude, ] +Y_augmented_run <- Y_augmented[not_exclude, ] +tryCatch({ +gp_res_parent_constant = py$fit_vanilla_4(X_augmented_run, Y_augmented_run, model = "conserved") +gp_res_parent_cis_static = py$fit_vanilla_4(X_augmented_run, Y_augmented_run, model = "cis_static") +gp_res_parent_cis_dynamic = py$fit_vanilla_4(X_augmented_run, Y_augmented_run, model = "cis_dynamic") +gp_res_parent_trans_static = py$fit_vanilla_4(X_augmented_run, Y_augmented_run, model = "trans_static") +gp_res_parent_trans_dynamic = py$fit_vanilla_4(X_augmented_run, Y_augmented_run, model = "trans_dynamic") +return(list(gp_res_parent_constant, gp_res_parent_cis_static, gp_res_parent_cis_dynamic, gp_res_parent_trans_static, +gp_res_parent_trans_dynamic)) +}, error=function(cond) { +return(NA) +}) +} +allelic_ratios <- readRDS("./test_ratios_trans.rds") +total_exp <- readRDS("./test_counts_trans.rds") +gp_results <- lapply(rownames(allelic_ratios), run_coreg_gp) +gp_results +rownames(allelic_ratios) +gp_results <- lapply(rownames(total_exp), run_coreg_gp) +gp_results +names(gp_results) <- rownames(total_exp) +plot_cis_trans_gene_mean +plot_cis_trans_gene_mean <- function(gene){ +asd <- results_per_gene_expressed_intervals[[gene]] +for_plotting <- lapply(1:length(asd[[5]][[2]]), function(i){ +xx = asd[[5]][[2]][[i]] +xx = data.frame(do.call("cbind", xx)) +xx$Sample = i +xx$X3 <- sqrt(xx$X3) +xx}) +for_plotting_merged <- do.call("rbind", for_plotting) +for_plotting_merged$Sample <- as.factor(for_plotting_merged$Sample) +for_plotting_merged$mean <- for_plotting_merged$X2 +data_plot <- data.frame( +"F0_P" = data_f0_b6[gene, ], +"F0_M" = data_f0_cast[gene, ], +"F1_P" = data_f1_b6[gene, ], +"F1_M" = data_f1_cast[gene, ] +) +norm_factors <- c(sum(colSums(data_f0_b6)), +sum(colSums(data_f0_cast))) +norm_factors <- norm_factors / norm_factors[[1]] +data_plot[,1:2] <- data.frame(t(t(data_plot[,1:2]) / norm_factors)) +data_plot$Interval <- 1:nrow(data_plot) +for_plotting_merged$Sample <- ifelse(for_plotting_merged$Sample == 1, "F0", "F1") +ggplot(for_plotting_merged, aes(X1, y = rev_logit(X2), col = Sample)) + +geom_line(linetype = "dashed", size = 1.5) + +scale_color_manual(values = c("F0" = "black", "F1" = "darkgrey")) + +# geom_ribbon(aes(ymin = rev_logit(error_lower), ymax = rev_logit(error_upper), fill = Sample), +# alpha = 0.2) + +theme(legend.position="top") + +geom_point(data = data_plot, aes(Interval, (F0_P) / (F0_M + F0_P)), col = "black", size = 2) + +geom_point(data = data_plot, aes(Interval, (F1_P) / (F1_M + F1_P)), col = "darkgrey", size = 2) + +xlim(0, 100) + scale_fill_manual(values = c("red", "darkgrey")) + ylim(c(0, 1)) + xlab("Pseudotime") + +ylab("Allelic Imbalance") + theme_classic() + ggtitle(gene) + +geom_hline(yintercept = 0.5, linetype = "dashed") + +theme(text = element_text(size = 30), legend.position = "None") + +scale_x_continuous(breaks = c(0, 50, 100)) + +scale_y_continuous(breaks = c(0, 0.5, 1), limits = c(0, 1)) +} +plot_cis_trans_gene_mean("Dnajc2") +plot_cis_trans_gene_mean <- function(gene){ +asd <- gp_results[[gene]] +for_plotting <- lapply(1:length(asd[[5]][[2]]), function(i){ +xx = asd[[5]][[2]][[i]] +xx = data.frame(do.call("cbind", xx)) +xx$Sample = i +xx$X3 <- sqrt(xx$X3) +xx}) +for_plotting_merged <- do.call("rbind", for_plotting) +for_plotting_merged$Sample <- as.factor(for_plotting_merged$Sample) +for_plotting_merged$mean <- for_plotting_merged$X2 +data_plot <- data.frame( +"F0_P" = data_f0_b6[gene, ], +"F0_M" = data_f0_cast[gene, ], +"F1_P" = data_f1_b6[gene, ], +"F1_M" = data_f1_cast[gene, ] +) +norm_factors <- c(sum(colSums(data_f0_b6)), +sum(colSums(data_f0_cast))) +norm_factors <- norm_factors / norm_factors[[1]] +data_plot[,1:2] <- data.frame(t(t(data_plot[,1:2]) / norm_factors)) +data_plot$Interval <- 1:nrow(data_plot) +for_plotting_merged$Sample <- ifelse(for_plotting_merged$Sample == 1, "F0", "F1") +ggplot(for_plotting_merged, aes(X1, y = rev_logit(X2), col = Sample)) + +geom_line(linetype = "dashed", size = 1.5) + +scale_color_manual(values = c("F0" = "black", "F1" = "darkgrey")) + +# geom_ribbon(aes(ymin = rev_logit(error_lower), ymax = rev_logit(error_upper), fill = Sample), +# alpha = 0.2) + +theme(legend.position="top") + +geom_point(data = data_plot, aes(Interval, (F0_P) / (F0_M + F0_P)), col = "black", size = 2) + +geom_point(data = data_plot, aes(Interval, (F1_P) / (F1_M + F1_P)), col = "darkgrey", size = 2) + +xlim(0, 100) + scale_fill_manual(values = c("red", "darkgrey")) + ylim(c(0, 1)) + xlab("Pseudotime") + +ylab("Allelic Imbalance") + theme_classic() + ggtitle(gene) + +geom_hline(yintercept = 0.5, linetype = "dashed") + +theme(text = element_text(size = 30), legend.position = "None") + +scale_x_continuous(breaks = c(0, 50, 100)) + +scale_y_continuous(breaks = c(0, 0.5, 1), limits = c(0, 1)) +} +plot_cis_trans_gene_mean("Dnajc2") +plot_cis_trans_gene_mean("Pr1") +plot_cis_trans_gene_mean("Prm1") +plot_cis_trans_gene_mean("Tex21") +setwd("~/Desktop/Projects/ASE_Spermatogenesis_Paper_rerun_Revisions/Scripts/Demonstrations/") +library(ggplot2) +library(tidyverse) +library(viridis) +source("./functions.R") +setwd("~/Desktop/Projects/ASE_Spermatogenesis_Paper_rerun_Revisions/Scripts/Demonstrations/") +library(ggplot2) +library(tidyverse) +library(viridis) +source("./functions.R") +allelic_ratios <- readRDS("./test_ratios_trans.rds") +total_exp <- readRDS("./test_counts_trans.rds") +rm(list = ls()) +allelic_ratios <- readRDS("./test_ratios_trans.rds") +total_exp <- readRDS("./test_counts_trans.rds") +reticulate::repl_python() +run_coreg_gp <- function(gene, model = "full"){ +rs_f0_noStab <- allelic_ratios[[1]] +rs_f1_noStab <- allelic_ratios[[2]] +data_plot_ratio <- data.frame( +FC_F0 = rs_f0_noStab[gene, ], +FC_F1 = rs_f1_noStab[gene, ], +Interval = 1:ncol(rs_f0_noStab) +) +# get intervals in which gene is expressed +intervals_test <- total_exp[gene, ] > 0.5 +print(table(intervals_test)) +if (sum(intervals_test) < 5){ +print("Not enough intervals to test") +return(list(NA, NA, NA, NA)) +} +data_plot_ratio <- data_plot_ratio[intervals_test, ] +x_test <- as.double(data_plot_ratio$Interval) +y1_test <- logit(data_plot_ratio$FC_F0) +y2_test <- logit(data_plot_ratio$FC_F1) +X_augmented = as.matrix(data.frame(c(x_test, x_test), c(rep(0, length(x_test)), rep(1, length(x_test))))) +Y_augmented = as.matrix(data.frame(c(y1_test, y2_test), c(rep(0, length(x_test)), rep(1, length(x_test))))) +not_exclude <- !is.infinite(Y_augmented[,1]) +if(sum(!not_exclude) / length(not_exclude) > 0.1){ +print("Warning: More than 10% of values are NA") +} +X_augmented_run <- X_augmented[not_exclude, ] +Y_augmented_run <- Y_augmented[not_exclude, ] +tryCatch({ +gp_res_parent_constant = py$fit_vanilla_4(X_augmented_run, Y_augmented_run, model = "conserved") +gp_res_parent_cis_static = py$fit_vanilla_4(X_augmented_run, Y_augmented_run, model = "cis_static") +gp_res_parent_cis_dynamic = py$fit_vanilla_4(X_augmented_run, Y_augmented_run, model = "cis_dynamic") +gp_res_parent_trans_static = py$fit_vanilla_4(X_augmented_run, Y_augmented_run, model = "trans_static") +gp_res_parent_trans_dynamic = py$fit_vanilla_4(X_augmented_run, Y_augmented_run, model = "trans_dynamic") +return(list(gp_res_parent_constant, gp_res_parent_cis_static, gp_res_parent_cis_dynamic, gp_res_parent_trans_static, +gp_res_parent_trans_dynamic)) +}, error=function(cond) { +return(NA) +}) +} +gp_results <- lapply(rownames(total_exp), run_coreg_gp) +names(gp_results) <- rownames(total_exp) +run_coreg_gp <- function(gene, model = "full"){ +logit <- function(x) {log(x / (1 - x))} +rs_f0_noStab <- allelic_ratios[[1]] +rs_f1_noStab <- allelic_ratios[[2]] +data_plot_ratio <- data.frame( +FC_F0 = rs_f0_noStab[gene, ], +FC_F1 = rs_f1_noStab[gene, ], +Interval = 1:ncol(rs_f0_noStab) +) +# get intervals in which gene is expressed +intervals_test <- total_exp[gene, ] > 0.5 +print(table(intervals_test)) +if (sum(intervals_test) < 5){ +print("Not enough intervals to test") +return(list(NA, NA, NA, NA)) +} +data_plot_ratio <- data_plot_ratio[intervals_test, ] +x_test <- as.double(data_plot_ratio$Interval) +y1_test <- logit(data_plot_ratio$FC_F0) +y2_test <- logit(data_plot_ratio$FC_F1) +X_augmented = as.matrix(data.frame(c(x_test, x_test), c(rep(0, length(x_test)), rep(1, length(x_test))))) +Y_augmented = as.matrix(data.frame(c(y1_test, y2_test), c(rep(0, length(x_test)), rep(1, length(x_test))))) +not_exclude <- !is.infinite(Y_augmented[,1]) +if(sum(!not_exclude) / length(not_exclude) > 0.1){ +print("Warning: More than 10% of values are NA") +} +X_augmented_run <- X_augmented[not_exclude, ] +Y_augmented_run <- Y_augmented[not_exclude, ] +tryCatch({ +gp_res_parent_constant = py$fit_vanilla_4(X_augmented_run, Y_augmented_run, model = "conserved") +gp_res_parent_cis_static = py$fit_vanilla_4(X_augmented_run, Y_augmented_run, model = "cis_static") +gp_res_parent_cis_dynamic = py$fit_vanilla_4(X_augmented_run, Y_augmented_run, model = "cis_dynamic") +gp_res_parent_trans_static = py$fit_vanilla_4(X_augmented_run, Y_augmented_run, model = "trans_static") +gp_res_parent_trans_dynamic = py$fit_vanilla_4(X_augmented_run, Y_augmented_run, model = "trans_dynamic") +return(list(gp_res_parent_constant, gp_res_parent_cis_static, gp_res_parent_cis_dynamic, gp_res_parent_trans_static, +gp_res_parent_trans_dynamic)) +}, error=function(cond) { +return(NA) +}) +} +gp_results <- lapply(rownames(total_exp), run_coreg_gp) +names(gp_results) <- rownames(total_exp) +plot_cis_trans_gene_mean("Tex21") +plot_cis_trans_gene_mean <- function(gene){ +asd <- gp_results[[gene]] +for_plotting <- lapply(1:length(asd[[5]][[2]]), function(i){ +xx = asd[[5]][[2]][[i]] +xx = data.frame(do.call("cbind", xx)) +xx$Sample = i +xx$X3 <- sqrt(xx$X3) +xx}) +for_plotting_merged <- do.call("rbind", for_plotting) +for_plotting_merged$Sample <- as.factor(for_plotting_merged$Sample) +for_plotting_merged$mean <- for_plotting_merged$X2 +data_plot <- data.frame( +"F0_P" = data_f0_b6[gene, ], +"F0_M" = data_f0_cast[gene, ], +"F1_P" = data_f1_b6[gene, ], +"F1_M" = data_f1_cast[gene, ] +) +norm_factors <- c(sum(colSums(data_f0_b6)), +sum(colSums(data_f0_cast))) +norm_factors <- norm_factors / norm_factors[[1]] +data_plot[,1:2] <- data.frame(t(t(data_plot[,1:2]) / norm_factors)) +data_plot$Interval <- 1:nrow(data_plot) +for_plotting_merged$Sample <- ifelse(for_plotting_merged$Sample == 1, "F0", "F1") +ggplot(for_plotting_merged, aes(X1, y = rev_logit(X2), col = Sample)) + +geom_line(linetype = "dashed", size = 1.5) + +scale_color_manual(values = c("F0" = "black", "F1" = "darkgrey")) + +# geom_ribbon(aes(ymin = rev_logit(error_lower), ymax = rev_logit(error_upper), fill = Sample), +# alpha = 0.2) + +theme(legend.position="top") + +geom_point(data = data_plot, aes(Interval, (F0_P) / (F0_M + F0_P)), col = "black", size = 2) + +geom_point(data = data_plot, aes(Interval, (F1_P) / (F1_M + F1_P)), col = "darkgrey", size = 2) + +xlim(0, 100) + scale_fill_manual(values = c("red", "darkgrey")) + ylim(c(0, 1)) + xlab("Pseudotime") + +ylab("Allelic Imbalance") + theme_classic() + ggtitle(gene) + +geom_hline(yintercept = 0.5, linetype = "dashed") + +theme(text = element_text(size = 30), legend.position = "None") + +scale_x_continuous(breaks = c(0, 50, 100)) + +scale_y_continuous(breaks = c(0, 0.5, 1), limits = c(0, 1)) +} +plot_cis_trans_gene_mean("Tex21") +# # Not run -- subset data on genes to show +# ratios_all_genes <- readRDS("./normalized_ratios_trans.rds") +# saveRDS(lapply(ratios_all_genes, function(x){x[c("Dnajc2", "Ddt", "Tex21"), ]}), "./test_ratios_trans.rds") +# total_exp <- readRDS("../../Data/processed/total_exp.rds") +# total_exp <- total_exp[c("Dnajc2", "Ddt", "Tex21"), ] +# saveRDS(total_exp, "./test_counts_trans.rds") +list_data <- readRDS("./Data/processed//binned_data.rds") +saveRDS(lapply(list_data, function(x){x[c("Dnajc2", "Ddt", "Tex21"), ]})) +# # Not run -- subset data on genes to show +# ratios_all_genes <- readRDS("./normalized_ratios_trans.rds") +# saveRDS(lapply(ratios_all_genes, function(x){x[c("Dnajc2", "Ddt", "Tex21"), ]}), "./test_ratios_trans.rds") +# total_exp <- readRDS("../../Data/processed/total_exp.rds") +# total_exp <- total_exp[c("Dnajc2", "Ddt", "Tex21"), ] +# saveRDS(total_exp, "./test_counts_trans.rds") +list_data <- readRDS("../../Data/processed//binned_data.rds") +saveRDS(lapply(list_data, function(x){x[c("Dnajc2", "Ddt", "Tex21"), ]}), "./test_counts_trans_per_sample.rds") +list_data <- readRDS("./test_counts_trans_per_sample.rds") +data_f0_b6 <- list_data[[1]] +data_f0_cast <- list_data[[2]] +data_f1_b6 <- list_data[[3]] +data_f1_cast <- list_data[[4]] +plot_cis_trans_gene_mean <- function(gene){ +asd <- gp_results[[gene]] +for_plotting <- lapply(1:length(asd[[5]][[2]]), function(i){ +xx = asd[[5]][[2]][[i]] +xx = data.frame(do.call("cbind", xx)) +xx$Sample = i +xx$X3 <- sqrt(xx$X3) +xx}) +for_plotting_merged <- do.call("rbind", for_plotting) +for_plotting_merged$Sample <- as.factor(for_plotting_merged$Sample) +for_plotting_merged$mean <- for_plotting_merged$X2 +data_plot <- data.frame( +"F0_P" = data_f0_b6[gene, ], +"F0_M" = data_f0_cast[gene, ], +"F1_P" = data_f1_b6[gene, ], +"F1_M" = data_f1_cast[gene, ] +) +norm_factors <- c(sum(colSums(data_f0_b6)), +sum(colSums(data_f0_cast))) +norm_factors <- norm_factors / norm_factors[[1]] +data_plot[,1:2] <- data.frame(t(t(data_plot[,1:2]) / norm_factors)) +data_plot$Interval <- 1:nrow(data_plot) +for_plotting_merged$Sample <- ifelse(for_plotting_merged$Sample == 1, "F0", "F1") +ggplot(for_plotting_merged, aes(X1, y = rev_logit(X2), col = Sample)) + +geom_line(linetype = "dashed", size = 1.5) + +scale_color_manual(values = c("F0" = "black", "F1" = "darkgrey")) + +# geom_ribbon(aes(ymin = rev_logit(error_lower), ymax = rev_logit(error_upper), fill = Sample), +# alpha = 0.2) + +theme(legend.position="top") + +geom_point(data = data_plot, aes(Interval, (F0_P) / (F0_M + F0_P)), col = "black", size = 2) + +geom_point(data = data_plot, aes(Interval, (F1_P) / (F1_M + F1_P)), col = "darkgrey", size = 2) + +xlim(0, 100) + scale_fill_manual(values = c("red", "darkgrey")) + ylim(c(0, 1)) + xlab("Pseudotime") + +ylab("Allelic Imbalance") + theme_classic() + ggtitle(gene) + +geom_hline(yintercept = 0.5, linetype = "dashed") + +theme(text = element_text(size = 30), legend.position = "None") + +scale_x_continuous(breaks = c(0, 50, 100)) + +scale_y_continuous(breaks = c(0, 0.5, 1), limits = c(0, 1)) +} +plot_cis_trans_gene_mean("Tex21") +plot_cis_trans_gene_mean <- function(gene){ +rev_logit <- function(x){ +1 / (1 + exp(-x)) +} +asd <- gp_results[[gene]] +for_plotting <- lapply(1:length(asd[[5]][[2]]), function(i){ +xx = asd[[5]][[2]][[i]] +xx = data.frame(do.call("cbind", xx)) +xx$Sample = i +xx$X3 <- sqrt(xx$X3) +xx}) +for_plotting_merged <- do.call("rbind", for_plotting) +for_plotting_merged$Sample <- as.factor(for_plotting_merged$Sample) +for_plotting_merged$mean <- for_plotting_merged$X2 +data_plot <- data.frame( +"F0_P" = data_f0_b6[gene, ], +"F0_M" = data_f0_cast[gene, ], +"F1_P" = data_f1_b6[gene, ], +"F1_M" = data_f1_cast[gene, ] +) +norm_factors <- c(sum(colSums(data_f0_b6)), +sum(colSums(data_f0_cast))) +norm_factors <- norm_factors / norm_factors[[1]] +data_plot[,1:2] <- data.frame(t(t(data_plot[,1:2]) / norm_factors)) +data_plot$Interval <- 1:nrow(data_plot) +for_plotting_merged$Sample <- ifelse(for_plotting_merged$Sample == 1, "F0", "F1") +ggplot(for_plotting_merged, aes(X1, y = rev_logit(X2), col = Sample)) + +geom_line(linetype = "dashed", size = 1.5) + +scale_color_manual(values = c("F0" = "black", "F1" = "darkgrey")) + +# geom_ribbon(aes(ymin = rev_logit(error_lower), ymax = rev_logit(error_upper), fill = Sample), +# alpha = 0.2) + +theme(legend.position="top") + +geom_point(data = data_plot, aes(Interval, (F0_P) / (F0_M + F0_P)), col = "black", size = 2) + +geom_point(data = data_plot, aes(Interval, (F1_P) / (F1_M + F1_P)), col = "darkgrey", size = 2) + +xlim(0, 100) + scale_fill_manual(values = c("red", "darkgrey")) + ylim(c(0, 1)) + xlab("Pseudotime") + +ylab("Allelic Imbalance") + theme_classic() + ggtitle(gene) + +geom_hline(yintercept = 0.5, linetype = "dashed") + +theme(text = element_text(size = 30), legend.position = "None") + +scale_x_continuous(breaks = c(0, 50, 100)) + +scale_y_continuous(breaks = c(0, 0.5, 1), limits = c(0, 1)) +} +plot_cis_trans_gene_mean("Tex21") +plot_cis_trans_gene_mean("Dnajc2") +gp_results +gp_results$Dnajc2 +setwd("~/Desktop/Projects/ASE_Spermatogenesis_Paper_rerun_Revisions/Scripts/Demonstrations/") +library(ggplot2) +library(tidyverse) +library(viridis) +source("./functions.R") +# # Not run -- subset data on genes to show +# ratios_all_genes <- readRDS("./normalized_ratios_trans.rds") +# saveRDS(lapply(ratios_all_genes, function(x){x[c("Dnajc2", "Ddt", "Tex21"), ]}), "./test_ratios_trans.rds") +# total_exp <- readRDS("../../Data/processed/total_exp.rds") +# total_exp <- total_exp[c("Dnajc2", "Ddt", "Tex21"), ] +# saveRDS(total_exp, "./test_counts_trans.rds") +# list_data <- readRDS("../../Data/processed//binned_data.rds") +# saveRDS(lapply(list_data, function(x){x[c("Dnajc2", "Ddt", "Tex21"), ]}), "./test_counts_trans_per_sample.rds") +allelic_ratios <- readRDS("./test_ratios_trans.rds") +total_exp <- readRDS("./test_counts_trans.rds") +list_data <- readRDS("./test_counts_trans_per_sample.rds") +data_f0_b6 <- list_data[[1]] +data_f0_cast <- list_data[[2]] +data_f1_b6 <- list_data[[3]] +data_f1_cast <- list_data[[4]] +reticulate::repl_python() diff --git a/Demonstrations/dynamic_allelic_imbalance.Rmd b/Demonstrations/dynamic_allelic_imbalance.Rmd new file mode 100644 index 0000000..99e950c --- /dev/null +++ b/Demonstrations/dynamic_allelic_imbalance.Rmd @@ -0,0 +1,114 @@ +--- +title: "dynamic_allelic_imbalance" +author: "Jasper Panten" +date: "`r Sys.Date()`" +output: + html_document: + toc: true + toc_depth: 2 +editor_options: + chunk_output_type: console +--- + +```{r setup} +knitr::opts_chunk$set(echo = TRUE, message=FALSE, warning=FALSE) +``` + +```{r load_libraries} + +setwd("~/Desktop/Projects/ASE_Spermatogenesis_Paper_rerun_Revisions/Scripts/Demonstrations/") + +library(ggplot2) +library(tidyverse) +library(viridis) +source("./functions.R") + +``` + +This script demonstrate the analysis of dynamic allelic imbalance during mouse spermatogenesis in F1 hybrid mice, crossed between C57BL6 and CAST/EiJ strains. We quantified gene expression using 10x single-cell RNA-Seq in single germ cells, which can be arranged according to their differentiation stage, represented by a pseudotemporal ordering. Finally, we use heterozygous variants within reads to quantify expression from the B6 or CAST haplotypes. + +For this demonstration, we load allele-specific counts for two selected genes, including their celltype and pseudotime-annotations: + +```{r load_data} + +allelic_counts <- readRDS("./test_counts_allelic.rds") + +colour_vector <- c("SG" = "#046735", + "SC" = "#D31281", + "RS" = "#282B69", + "ES" = "grey60") + +allelic_counts@colData %>% data.frame() %>% + ggplot(aes(x = rank(Pseudotime), y = Pseudotime, col = CellType)) + geom_point() + theme_bw(base_size = 20) + + xlab("Cells ordered by pseudotime") + ylab("Pseudotime") + scale_color_manual(values = colour_vector) + +``` + +We then plot the allelic expression across differentiation, either by plotting the haplotype-specific expression level or the allelic bias B6 / (B6 + CAST), where 0.5 represents equal allelic contribution: + +```{r plot_allelic_counts_ddt} + +plot_gene(allelic_counts, "Ddt") + +``` + +In contrast, for most genes there is no obvious difference in allelic ratios over time: + +```{r plot_allelic_counts_tex21} + +plot_gene(allelic_counts, "Tex21") + +``` + +The dynamic allelic imbalance arises from genetic effects acting in a celltype-specific manner on gene expression. Since both alleles are measured in the same nuclear environment, these genetic effects are only acting in cis. +We can discover two kinds of allelic imbalance changes driven by cis-effects. First, we can ask whether the allelic ratios are significantly different to 0.5, but without taking cell type into account. The statistical models are implemented in the scDALI package (Heinen et al, https://github.com/PMBio/scdali), and are formulated as a generalized linear mixed model, where the counts follow a beta-binomial distribution and the cell state is given by a random effect. + +```{r test_persistent_imbalance} + +A <- t(counts_reference(allelic_counts["Ddt", ])) +D <- t(counts_reference(allelic_counts["Ddt", ]) + + counts_alternative(allelic_counts["Ddt", ])) + +pvals_persistent <- test_mean_R(A, D, mean_mu = .5) + +pvals_persistent$Mean # average allelic imbalance +pvals_persistent$pval # average allelic imbalance + +``` + +Ddt shows strong allelic bias (towards the CAST allele). + +We can now test whether this bias is significantly variable by using the pseudotime-aware test: + + +```{r test_dynamic_imbalance} +pseudotime <- allelic_counts$Pseudotime + +results_dynamic <- test_regions_R(A, D, pseudotime) +results_dynamic + +``` + +The allelic imbalance, that is, the cis-action on Ddt during spermatogenesis is strongly celltype-dependent. +To get an estimate of the strength of the variability, we can interpolate the estimate over time using gaussian process regression on the allelic rates: + +```{r interpolate_allelic_rate} + +gp_results <- run_gp_R(A, D, cell_state = pseudotime, kernel = "RBF") + +plot_gene_GP(allelic_counts, "Ddt", gp_results) + +names(gp_results) # this returns a function with variance estimate (of the interpolated trajectory) + +``` + +A useful quantification is the magnitude of the allelic balance shift, for example as the quantile difference between the 10% cells with highest and lowest allelic imbalance: + +```{r quantile_difference_allelic_rate} + +quantile(gp_results[[1]], 0.9) - quantile(gp_results[[1]], 0.1) # 0.2688 + +``` + +This corresponds to a shift in allelic imbalance of ~0.25, that is, from around balanced (0.5) to 3-fold overexpression of the CAST allele (0.25). + diff --git a/Demonstrations/dynamic_allelic_imbalance.html b/Demonstrations/dynamic_allelic_imbalance.html new file mode 100644 index 0000000..1a04250 --- /dev/null +++ b/Demonstrations/dynamic_allelic_imbalance.html @@ -0,0 +1,497 @@ + + + + + + + + + + + + + + + +dynamic_allelic_imbalance + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + +
knitr::opts_chunk$set(echo = TRUE, message=FALSE, warning=FALSE)
+
setwd("~/Desktop/Projects/ASE_Spermatogenesis_Paper_rerun_Revisions/Scripts/Demonstrations/")
+
+library(ggplot2)
+library(tidyverse)
+library(viridis)
+source("./functions.R")
+

This script demonstrate the analysis of dynamic allelic imbalance +during mouse spermatogenesis in F1 hybrid mice, crossed between C57BL6 +and CAST/EiJ strains. We quantified gene expression using 10x +single-cell RNA-Seq in single germ cells, which can be arranged +according to their differentiation stage, represented by a +pseudotemporal ordering. Finally, we use heterozygous variants within +reads to quantify expression from the B6 or CAST haplotypes.

+

For this demonstration, we load allele-specific counts for two +selected genes, including their celltype and pseudotime-annotations:

+
allelic_counts <- readRDS("./test_counts_allelic.rds")
+
+colour_vector <- c("SG" = "#046735",
+                   "SC" = "#D31281",
+                   "RS" = "#282B69",
+                   "ES" = "grey60")
+
+allelic_counts@colData %>% data.frame() %>%
+  ggplot(aes(x = rank(Pseudotime), y = Pseudotime, col = CellType)) + geom_point() + theme_bw(base_size = 20) + 
+    xlab("Cells ordered by pseudotime") + ylab("Pseudotime") + scale_color_manual(values = colour_vector)
+

+

We then plot the allelic expression across differentiation, either by +plotting the haplotype-specific expression level or the allelic bias B6 +/ (B6 + CAST), where 0.5 represents equal allelic contribution:

+
plot_gene(allelic_counts, "Ddt")
+
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+##    0.00   19.05   54.27   54.95   91.07  101.59
+

+

In contrast, for most genes there is no obvious difference in allelic +ratios over time:

+
plot_gene(allelic_counts, "Tex21")
+
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+##    0.00   19.05   54.27   54.95   91.07  101.59
+

+

The dynamic allelic imbalance arises from genetic effects acting in a +celltype-specific manner on gene expression. Since both alleles are +measured in the same nuclear environment, these genetic effects are only +acting in cis. We can discover two kinds of allelic imbalance changes +driven by cis-effects. First, we can ask whether the allelic ratios are +significantly different to 0.5, but without taking cell type into +account. The statistical models are implemented in the scDALI package +(Heinen et al, https://github.com/PMBio/scdali), and are formulated as +a generalized linear mixed model, where the counts follow a +beta-binomial distribution and the cell state is given by a random +effect.

+
A <- t(counts_reference(allelic_counts["Ddt", ]))
+D <- t(counts_reference(allelic_counts["Ddt", ]) + 
+       counts_alternative(allelic_counts["Ddt", ]))
+
+pvals_persistent <- test_mean_R(A, D, mean_mu = .5)
+
+pvals_persistent$Mean # average allelic imbalance
+
## [1] 0.4204326
+
pvals_persistent$pval # average allelic imbalance
+
## [1] 0
+

Ddt shows strong allelic bias (towards the CAST allele).

+

We can now test whether this bias is significantly variable by using +the pseudotime-aware test:

+
pseudotime <- allelic_counts$Pseudotime
+
+results_dynamic <- test_regions_R(A, D, pseudotime)
+results_dynamic
+
## [1] 1.594737e-249
+

The allelic imbalance, that is, the cis-action on Ddt during +spermatogenesis is strongly celltype-dependent. To get an estimate of +the strength of the variability, we can interpolate the estimate over +time using gaussian process regression on the allelic rates:

+
gp_results <- run_gp_R(A, D, cell_state = pseudotime, kernel = "RBF")
+
+plot_gene_GP(allelic_counts,  "Ddt", gp_results)
+
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+##    0.00   19.05   54.27   54.95   91.07  101.59
+

+
names(gp_results) # this returns a function with variance estimate (of the interpolated trajectory)
+
## [1] "posterior_mean" "posterior_var"
+

A useful quantification is the magnitude of the allelic balance +shift, for example as the quantile difference between the 10% cells with +highest and lowest allelic imbalance:

+
quantile(gp_results[[1]], 0.9) - quantile(gp_results[[1]], 0.1) # 0.2688
+
##       90% 
+## 0.2688179
+

This corresponds to a shift in allelic imbalance of ~0.25, that is, +from around balanced (0.5) to 3-fold overexpression of the CAST allele +(0.25).

+ + + + +
+ + + + + + + + + + + + + + + diff --git a/Demonstrations/dynamic_trans_effects.Rmd b/Demonstrations/dynamic_trans_effects.Rmd new file mode 100644 index 0000000..94f59d9 --- /dev/null +++ b/Demonstrations/dynamic_trans_effects.Rmd @@ -0,0 +1,261 @@ +--- +title: "dynamic_allelic_imbalance" +author: "Jasper Panten" +date: "`r Sys.Date()`" +output: + html_document: + toc: true + toc_depth: 2 +editor_options: + chunk_output_type: console +--- + +```{r setup} +knitr::opts_chunk$set(echo = TRUE, message=FALSE, warning=FALSE) +``` + +```{r load_libraries} + +setwd("~/Desktop/Projects/ASE_Spermatogenesis_Paper_rerun_Revisions/Scripts/Demonstrations/") + +library(ggplot2) +library(tidyverse) +library(viridis) +source("./functions.R") + +``` + +This script demonstrate the analysis of dynamic trans effects during mouse spermatogenesis in F1 hybrid mice. The F1 model allows to distinguish transcriptional changes between the strains caused by cis-effects and trans-effects, because in the F1 hybrid, all the trans-environment is the same, so all differences are cis-driven, whereas allelic differences between the F0s are driven by cis + trans effects. The trans-effect is therefore the residual between allelic imbalance in F0 and F1. +The single-cell resolution of our dataset now allows to discover dynamic trans effects, where this residual varies across differentiation. To this end, we use non-parametric regression models (based on gaussian processes) to discover genes where the two allelic trajectories diverge significantly. We set up different models to distinguish the following cases: +- both F0 and F1 follow the same trajectory: there is no trans, but possibly a (dynamic) cis-effect +- the difference in F0 - F1 trajectories is constant: there is a persistent trans-effect, invariable across differentiation +- F0 - F1 varies over time: there is a dynamic trans-effect + +For this demonstration, we load allele-specific counts for three selected genes in the F0 and F1, which have previously been aggregated across intervals in the pseudotemporal ordering: + +```{r load_data} + +# # Not run -- subset data on genes to show +# ratios_all_genes <- readRDS("./normalized_ratios_trans.rds") +# saveRDS(lapply(ratios_all_genes, function(x){x[c("Dnajc2", "Ddt", "Tex21"), ]}), "./test_ratios_trans.rds") +# total_exp <- readRDS("../../Data/processed/total_exp.rds") +# total_exp <- total_exp[c("Dnajc2", "Ddt", "Tex21"), ] +# saveRDS(total_exp, "./test_counts_trans.rds") +# list_data <- readRDS("../../Data/processed//binned_data.rds") +# saveRDS(lapply(list_data, function(x){x[c("Dnajc2", "Ddt", "Tex21"), ]}), "./test_counts_trans_per_sample.rds") + +allelic_ratios <- readRDS("./test_ratios_trans.rds") +total_exp <- readRDS("./test_counts_trans.rds") + +list_data <- readRDS("./test_counts_trans_per_sample.rds") + +data_f0_b6 <- list_data[[1]] +data_f0_cast <- list_data[[2]] +data_f1_b6 <- list_data[[3]] +data_f1_cast <- list_data[[4]] + +``` + +We use gpflow to set up the different gaussian process models. The different models are encoded by varying mean and covariance functions, and we use the difference in likelihood to the fit data to evaluate the significance of the result (see paper for details): + +```{python asd} + +import gpflow +import numpy as np +import matplotlib.pyplot as plt +import tensorflow as tf +from gpflow.utilities import print_summary, set_trainable, to_default_float +from gpflow.mean_functions import * +import matplotlib.pyplot as plt +import pandas as pd + +def plot(m): + Xtest = np.linspace(0, 100, 100)[:, None] + mu, var = m.predict_f(np.hstack((Xtest, np.zeros_like(Xtest)))) + out1 = [Xtest, mu.numpy(), var.numpy()] + mu, var = m.predict_f(np.hstack((Xtest, np.ones_like(Xtest)))) + out2 = [Xtest, mu.numpy(), var.numpy()] + return([out1, out2]) + +def fit_vanilla_4(X, Y, model = "trans_dynamic", n_iterations = 100): + # + X = np.array(X) + Y = np.array(Y) + # + X = X.astype("float64") + Y = Y.astype("float64") + # + output_dim = 2 # Number of outputs + rank = 1 # Rank of W + # + k = gpflow.kernels.Matern52(active_dims=[0]) + k_const = gpflow.kernels.Constant(active_dims=[0]) + # + # Coregion kernel + coreg = gpflow.kernels.Coregion(output_dim=output_dim, rank=rank, active_dims=[1]) + if ((model == "conserved") | (model == "cis_static")): + kern = k_const * coreg + if ((model == "cis_dynamic") | (model == "trans_static") | (model == "trans_dynamic")): + kern = k * coreg + lik = gpflow.likelihoods.SwitchedLikelihood([gpflow.likelihoods.Gaussian(), gpflow.likelihoods.Gaussian()]) + # + if ((model == "conserved") | (model == "cis_static") | (model == "cis_dynamic")): + mean_function = Constant() + if ((model == "trans_static") | (model == "trans_dynamic")): + mean_function = SwitchedMeanFunction([Constant(), Constant()]) + m = gpflow.models.VGP((X, Y), kernel=kern, likelihood = lik, mean_function = mean_function) + # + if ((model == "conserved") | (model == "cis_static") | (model == "cis_dynamic") | ( model == "trans_static")): + m.kernel.kernels[1].W.assign(np.array([[1], [1]])) + m.kernel.kernels[1].kappa.assign(np.array([0.000001, 0.000001])) + set_trainable(m.kernel.kernels[1].kappa, False) + set_trainable(m.kernel.kernels[1].W, False) + # + # + if (model == "conserved"): + m.mean_function.c.assign([0.5]) + set_trainable(m.mean_function.c, False) + # fit the covariance function parameters + gpflow.optimizers.Scipy().minimize( + m.training_loss, m.trainable_variables, options=dict(maxiter=n_iterations), method="L-BFGS-B", + ) + predictions = plot(m) + covariance = m.kernel.kernels[1].output_covariance().numpy() + likelihood = m.maximum_log_likelihood_objective().numpy() + return([m, predictions, covariance, likelihood]) + + +``` + +We now fit the different models to the data for each gene. Since we don't account for the binomial noise of the data here, we restrict the analysis to regions of differentiation where the gene is sufficiently expressed: + +```{r asdasdasdasda} + +run_coreg_gp <- function(gene, model = "full"){ + + logit <- function(x) {log(x / (1 - x))} + + rs_f0_noStab <- allelic_ratios[[1]] + rs_f1_noStab <- allelic_ratios[[2]] + + data_plot_ratio <- data.frame( + FC_F0 = rs_f0_noStab[gene, ], + FC_F1 = rs_f1_noStab[gene, ], + Interval = 1:ncol(rs_f0_noStab) + ) + + # get intervals in which gene is expressed + intervals_test <- total_exp[gene, ] > 0.5 + + print(table(intervals_test)) + + if (sum(intervals_test) < 5){ + print("Not enough intervals to test") + return(list(NA, NA, NA, NA)) + } + + data_plot_ratio <- data_plot_ratio[intervals_test, ] + + x_test <- as.double(data_plot_ratio$Interval) + + y1_test <- logit(data_plot_ratio$FC_F0) + y2_test <- logit(data_plot_ratio$FC_F1) + + X_augmented = as.matrix(data.frame(c(x_test, x_test), c(rep(0, length(x_test)), rep(1, length(x_test))))) + Y_augmented = as.matrix(data.frame(c(y1_test, y2_test), c(rep(0, length(x_test)), rep(1, length(x_test))))) + + not_exclude <- !is.infinite(Y_augmented[,1]) + + if(sum(!not_exclude) / length(not_exclude) > 0.1){ + print("Warning: More than 10% of values are NA") + } + + X_augmented_run <- X_augmented[not_exclude, ] + Y_augmented_run <- Y_augmented[not_exclude, ] + + tryCatch({ + gp_res_parent_constant = py$fit_vanilla_4(X_augmented_run, Y_augmented_run, model = "conserved") + gp_res_parent_cis_static = py$fit_vanilla_4(X_augmented_run, Y_augmented_run, model = "cis_static") + gp_res_parent_cis_dynamic = py$fit_vanilla_4(X_augmented_run, Y_augmented_run, model = "cis_dynamic") + gp_res_parent_trans_static = py$fit_vanilla_4(X_augmented_run, Y_augmented_run, model = "trans_static") + gp_res_parent_trans_dynamic = py$fit_vanilla_4(X_augmented_run, Y_augmented_run, model = "trans_dynamic") + return(list(gp_res_parent_constant, gp_res_parent_cis_static, gp_res_parent_cis_dynamic, gp_res_parent_trans_static, + gp_res_parent_trans_dynamic)) + }, error=function(cond) { + return(NA) + }) +} + +gp_results <- lapply(rownames(total_exp), run_coreg_gp) +names(gp_results) <- rownames(total_exp) + +``` + +We can now visualize the results. For, Dnajc2 shows strong divergence in the allelic trajectories later in differentiation, and shows strong evidence for a dynamic trans effect: + +```{r asdas} + +plot_cis_trans_gene_mean <- function(gene){ + + rev_logit <- function(x){ + 1 / (1 + exp(-x)) + } + + asd <- gp_results[[gene]] + + for_plotting <- lapply(1:length(asd[[5]][[2]]), function(i){ + xx = asd[[5]][[2]][[i]] + xx = data.frame(do.call("cbind", xx)) + xx$Sample = i + xx$X3 <- sqrt(xx$X3) + xx}) + + for_plotting_merged <- do.call("rbind", for_plotting) + for_plotting_merged$Sample <- as.factor(for_plotting_merged$Sample) + for_plotting_merged$mean <- for_plotting_merged$X2 + + data_plot <- data.frame( + "F0_P" = data_f0_b6[gene, ], + "F0_M" = data_f0_cast[gene, ], + "F1_P" = data_f1_b6[gene, ], + "F1_M" = data_f1_cast[gene, ] + ) + + norm_factors <- c(sum(colSums(data_f0_b6)), + sum(colSums(data_f0_cast))) + norm_factors <- norm_factors / norm_factors[[1]] + + data_plot[,1:2] <- data.frame(t(t(data_plot[,1:2]) / norm_factors)) + data_plot$Interval <- 1:nrow(data_plot) + for_plotting_merged$Sample <- ifelse(for_plotting_merged$Sample == 1, "F0", "F1") + + ggplot(for_plotting_merged, aes(X1, y = rev_logit(X2), col = Sample)) + + geom_line(linetype = "dashed", size = 1.5) + + scale_color_manual(values = c("F0" = "black", "F1" = "darkgrey")) + + # geom_ribbon(aes(ymin = rev_logit(error_lower), ymax = rev_logit(error_upper), fill = Sample), + # alpha = 0.2) + + theme(legend.position="top") + + geom_point(data = data_plot, aes(Interval, (F0_P) / (F0_M + F0_P)), col = "black", size = 2) + + geom_point(data = data_plot, aes(Interval, (F1_P) / (F1_M + F1_P)), col = "darkgrey", size = 2) + + xlim(0, 100) + scale_fill_manual(values = c("red", "darkgrey")) + ylim(c(0, 1)) + xlab("Pseudotime") + + ylab("Allelic Imbalance") + theme_classic() + ggtitle(gene) + + geom_hline(yintercept = 0.5, linetype = "dashed") + + theme(text = element_text(size = 30), legend.position = "None") + + scale_x_continuous(breaks = c(0, 50, 100)) + + scale_y_continuous(breaks = c(0, 0.5, 1), limits = c(0, 1)) +} + +plot_cis_trans_gene_mean("Dnajc2") +gp_results$Dnajc2[[5]][[4]] - gp_results$Dnajc2[[4]][[4]] # high log-likelihood ratio compared to persistent only model + +``` + +Meanwhile, genes like Ddt, which show strong dynamic cis-effects, do not show evidence for trans-effects + +```{r asdasdasda} + +plot_cis_trans_gene_mean("Ddt") +gp_results$Ddt[[5]][[4]] - gp_results$Ddt[[4]][[4]] # both models fit equally well + +``` + diff --git a/Demonstrations/dynamic_trans_effects.html b/Demonstrations/dynamic_trans_effects.html new file mode 100644 index 0000000..d31f0f1 --- /dev/null +++ b/Demonstrations/dynamic_trans_effects.html @@ -0,0 +1,654 @@ + + + + + + + + + + + + + + + +dynamic_allelic_imbalance + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + +
knitr::opts_chunk$set(echo = TRUE, message=FALSE, warning=FALSE)
+
setwd("~/Desktop/Projects/ASE_Spermatogenesis_Paper_rerun_Revisions/Scripts/Demonstrations/")
+
+library(ggplot2)
+library(tidyverse)
+library(viridis)
+source("./functions.R")
+

This script demonstrate the analysis of dynamic trans effects during +mouse spermatogenesis in F1 hybrid mice. The F1 model allows to +distinguish transcriptional changes between the strains caused by +cis-effects and trans-effects, because in the F1 hybrid, all the +trans-environment is the same, so all differences are cis-driven, +whereas allelic differences between the F0s are driven by cis + trans +effects. The trans-effect is therefore the residual between allelic +imbalance in F0 and F1. The single-cell resolution of our dataset now +allows to discover dynamic trans effects, where this residual varies +across differentiation. To this end, we use non-parametric regression +models (based on gaussian processes) to discover genes where the two +allelic trajectories diverge significantly. We set up different models +to distinguish the following cases: - both F0 and F1 follow the same +trajectory: there is no trans, but possibly a (dynamic) cis-effect - the +difference in F0 - F1 trajectories is constant: there is a persistent +trans-effect, invariable across differentiation - F0 - F1 varies over +time: there is a dynamic trans-effect

+

For this demonstration, we load allele-specific counts for three +selected genes in the F0 and F1, which have previously been aggregated +across intervals in the pseudotemporal ordering:

+
# # Not run -- subset data on genes to show
+# ratios_all_genes <- readRDS("./normalized_ratios_trans.rds")
+# saveRDS(lapply(ratios_all_genes, function(x){x[c("Dnajc2", "Ddt", "Tex21"), ]}), "./test_ratios_trans.rds")
+# total_exp <- readRDS("../../Data/processed/total_exp.rds")
+# total_exp <- total_exp[c("Dnajc2", "Ddt", "Tex21"), ]
+# saveRDS(total_exp, "./test_counts_trans.rds")
+# list_data <- readRDS("../../Data/processed//binned_data.rds")
+# saveRDS(lapply(list_data, function(x){x[c("Dnajc2", "Ddt", "Tex21"), ]}), "./test_counts_trans_per_sample.rds")
+
+allelic_ratios <- readRDS("./test_ratios_trans.rds")
+total_exp <- readRDS("./test_counts_trans.rds")
+
+list_data <- readRDS("./test_counts_trans_per_sample.rds")
+
+data_f0_b6 <- list_data[[1]]
+data_f0_cast <- list_data[[2]]
+data_f1_b6 <- list_data[[3]]
+data_f1_cast <- list_data[[4]]
+

We use gpflow to set up the different gaussian process models. The +different models are encoded by varying mean and covariance functions, +and we use the difference in likelihood to the fit data to evaluate the +significance of the result (see paper for details):

+

+import gpflow
+import numpy as np
+import matplotlib.pyplot as plt
+import tensorflow as tf
+from gpflow.utilities import print_summary, set_trainable, to_default_float
+from gpflow.mean_functions import *
+import matplotlib.pyplot as plt
+import pandas as pd
+
+def plot(m):
+    Xtest = np.linspace(0, 100, 100)[:, None]
+    mu, var = m.predict_f(np.hstack((Xtest, np.zeros_like(Xtest))))
+    out1 = [Xtest, mu.numpy(), var.numpy()]
+    mu, var = m.predict_f(np.hstack((Xtest, np.ones_like(Xtest))))
+    out2 = [Xtest, mu.numpy(), var.numpy()]
+    return([out1, out2])
+
+def fit_vanilla_4(X, Y, model = "trans_dynamic",  n_iterations = 100):
+  #
+  X = np.array(X)
+  Y = np.array(Y)
+  #
+  X = X.astype("float64")
+  Y = Y.astype("float64")
+  #
+  output_dim = 2  # Number of outputs
+  rank = 1  # Rank of W
+  #
+  k = gpflow.kernels.Matern52(active_dims=[0])
+  k_const = gpflow.kernels.Constant(active_dims=[0])
+  #
+  # Coregion kernel
+  coreg = gpflow.kernels.Coregion(output_dim=output_dim, rank=rank, active_dims=[1])
+  if ((model == "conserved") | (model == "cis_static")):
+    kern = k_const * coreg
+  if ((model == "cis_dynamic") | (model == "trans_static") | (model == "trans_dynamic")):
+    kern = k * coreg
+  lik = gpflow.likelihoods.SwitchedLikelihood([gpflow.likelihoods.Gaussian(), gpflow.likelihoods.Gaussian()])
+  #
+  if ((model == "conserved") | (model == "cis_static") | (model == "cis_dynamic")):
+    mean_function = Constant()
+  if ((model == "trans_static") | (model == "trans_dynamic")):
+    mean_function = SwitchedMeanFunction([Constant(), Constant()])
+  m = gpflow.models.VGP((X, Y), kernel=kern, likelihood = lik, mean_function = mean_function)
+  #
+  if ((model == "conserved") | (model == "cis_static") | (model == "cis_dynamic") | ( model == "trans_static")):
+    m.kernel.kernels[1].W.assign(np.array([[1], [1]]))
+    m.kernel.kernels[1].kappa.assign(np.array([0.000001, 0.000001]))
+    set_trainable(m.kernel.kernels[1].kappa, False)
+    set_trainable(m.kernel.kernels[1].W, False)
+  #
+  #
+  if (model == "conserved"):
+    m.mean_function.c.assign([0.5])
+    set_trainable(m.mean_function.c, False)
+  # fit the covariance function parameters
+  gpflow.optimizers.Scipy().minimize(
+    m.training_loss, m.trainable_variables, options=dict(maxiter=n_iterations), method="L-BFGS-B",
+  )
+  predictions = plot(m)
+  covariance = m.kernel.kernels[1].output_covariance().numpy()
+  likelihood = m.maximum_log_likelihood_objective().numpy()
+  return([m, predictions, covariance, likelihood])
+
+

We now fit the different models to the data for each gene. Since we +don’t account for the binomial noise of the data here, we restrict the +analysis to regions of differentiation where the gene is sufficiently +expressed:

+
run_coreg_gp <- function(gene, model = "full"){
+  
+  logit <- function(x) {log(x / (1 - x))}
+  
+  rs_f0_noStab <- allelic_ratios[[1]]
+  rs_f1_noStab <- allelic_ratios[[2]]
+  
+  data_plot_ratio <- data.frame(
+    FC_F0 = rs_f0_noStab[gene, ],
+    FC_F1 = rs_f1_noStab[gene, ],
+    Interval = 1:ncol(rs_f0_noStab)
+  )
+  
+  # get intervals in which gene is expressed
+  intervals_test <- total_exp[gene, ] > 0.5
+  
+  print(table(intervals_test))
+  
+  if (sum(intervals_test) < 5){
+    print("Not enough intervals to test")
+    return(list(NA, NA, NA, NA))
+  }
+  
+  data_plot_ratio <- data_plot_ratio[intervals_test, ]
+  
+  x_test <- as.double(data_plot_ratio$Interval)
+  
+  y1_test <- logit(data_plot_ratio$FC_F0)
+  y2_test <- logit(data_plot_ratio$FC_F1)
+  
+  X_augmented = as.matrix(data.frame(c(x_test, x_test), c(rep(0, length(x_test)), rep(1, length(x_test)))))
+  Y_augmented = as.matrix(data.frame(c(y1_test, y2_test), c(rep(0, length(x_test)), rep(1, length(x_test)))))
+  
+  not_exclude <- !is.infinite(Y_augmented[,1])
+  
+  if(sum(!not_exclude) / length(not_exclude) > 0.1){
+    print("Warning: More than 10% of values are NA")
+  }
+  
+  X_augmented_run <- X_augmented[not_exclude, ]
+  Y_augmented_run <- Y_augmented[not_exclude, ]
+  
+  tryCatch({
+    gp_res_parent_constant = py$fit_vanilla_4(X_augmented_run, Y_augmented_run, model = "conserved")
+    gp_res_parent_cis_static = py$fit_vanilla_4(X_augmented_run, Y_augmented_run, model = "cis_static")
+    gp_res_parent_cis_dynamic = py$fit_vanilla_4(X_augmented_run, Y_augmented_run, model = "cis_dynamic")
+    gp_res_parent_trans_static = py$fit_vanilla_4(X_augmented_run, Y_augmented_run, model = "trans_static")
+    gp_res_parent_trans_dynamic = py$fit_vanilla_4(X_augmented_run, Y_augmented_run, model = "trans_dynamic")
+    return(list(gp_res_parent_constant, gp_res_parent_cis_static, gp_res_parent_cis_dynamic, gp_res_parent_trans_static,
+                gp_res_parent_trans_dynamic))
+  }, error=function(cond) {
+    return(NA)
+  })
+}
+
+gp_results <- lapply(rownames(total_exp), run_coreg_gp)
+
## intervals_test
+## FALSE  TRUE 
+##     8    92 
+## intervals_test
+## TRUE 
+##  100 
+## intervals_test
+## FALSE  TRUE 
+##    42    58
+
names(gp_results) <- rownames(total_exp)
+

We can now visualize the results. For, Dnajc2 shows strong divergence +in the allelic trajectories later in differentiation, and shows strong +evidence for a dynamic trans effect:

+
plot_cis_trans_gene_mean <- function(gene){
+  
+  rev_logit <- function(x){
+  1 / (1 + exp(-x))
+  }
+  
+  asd <- gp_results[[gene]]
+  
+  for_plotting <- lapply(1:length(asd[[5]][[2]]), function(i){
+    xx = asd[[5]][[2]][[i]]
+    xx = data.frame(do.call("cbind", xx))
+    xx$Sample = i
+    xx$X3 <- sqrt(xx$X3)
+    xx})
+  
+  for_plotting_merged <- do.call("rbind", for_plotting)
+  for_plotting_merged$Sample <- as.factor(for_plotting_merged$Sample)
+  for_plotting_merged$mean <- for_plotting_merged$X2
+
+  data_plot <- data.frame(
+    "F0_P" = data_f0_b6[gene, ],
+    "F0_M" = data_f0_cast[gene, ],
+    "F1_P" = data_f1_b6[gene, ],
+    "F1_M" = data_f1_cast[gene, ]
+  )
+  
+  norm_factors <- c(sum(colSums(data_f0_b6)), 
+                    sum(colSums(data_f0_cast)))
+  norm_factors <- norm_factors / norm_factors[[1]]
+  
+  data_plot[,1:2] <- data.frame(t(t(data_plot[,1:2]) / norm_factors))
+  data_plot$Interval <- 1:nrow(data_plot)
+  for_plotting_merged$Sample <- ifelse(for_plotting_merged$Sample == 1, "F0", "F1")
+  
+  ggplot(for_plotting_merged, aes(X1, y = rev_logit(X2), col = Sample)) + 
+    geom_line(linetype = "dashed", size = 1.5) + 
+    scale_color_manual(values = c("F0" = "black", "F1" = "darkgrey")) + 
+    # geom_ribbon(aes(ymin = rev_logit(error_lower), ymax = rev_logit(error_upper), fill = Sample), 
+    #             alpha = 0.2) + 
+    theme(legend.position="top") + 
+    geom_point(data = data_plot, aes(Interval, (F0_P) / (F0_M + F0_P)), col = "black", size = 2) + 
+    geom_point(data = data_plot, aes(Interval, (F1_P) / (F1_M + F1_P)), col = "darkgrey", size = 2) + 
+    xlim(0, 100) + scale_fill_manual(values = c("red", "darkgrey")) + ylim(c(0, 1)) + xlab("Pseudotime") + 
+    ylab("Allelic Imbalance") + theme_classic() + ggtitle(gene) + 
+    geom_hline(yintercept = 0.5, linetype = "dashed") + 
+    theme(text = element_text(size = 30), legend.position = "None") + 
+    scale_x_continuous(breaks = c(0, 50, 100)) + 
+    scale_y_continuous(breaks = c(0, 0.5, 1), limits = c(0, 1))
+}
+
+plot_cis_trans_gene_mean("Dnajc2")
+

+
gp_results$Dnajc2[[5]][[4]] - gp_results$Dnajc2[[4]][[4]] # high log-likelihood ratio compared to persistent only model
+
## [1] 42.58957
+

Meanwhile, genes like Ddt, which show strong dynamic cis-effects, do +not show evidence for trans-effects

+
plot_cis_trans_gene_mean("Ddt")
+

+
gp_results$Ddt[[5]][[4]] - gp_results$Ddt[[4]][[4]] # both models fit equally well
+
## [1] -0.4489531
+ + + + +
+ + + + + + + + + + + + + + + diff --git a/Demonstrations/functions.R b/Demonstrations/functions.R new file mode 100644 index 0000000..827c5b8 --- /dev/null +++ b/Demonstrations/functions.R @@ -0,0 +1,241 @@ +counts_reference <- function(sce){ + assays(sce)[["counts_reference"]] +} + +counts_alternative <- function(sce){ + assays(sce)[["counts_alternative"]] +} + +allelic_ratios <- function(sce){ + assays(sce)[["allelic_ratios"]] +} + +# initialize python path -- this should point +library(reticulate) +use_python("/Users/jasper/Library/r-miniconda/bin/python", required = T) +dali_path = "~/Desktop/PhD/Projects/ASE_Spermatogenesis/Scripts/dali/" + +# define R wrappers around scDALI functions +test_regions_R <- function(A, D, + cell_state, + n_cores = 1L) +{ + source_python(paste0(dali_path, "/dali/my_test_regions.py")) + test_regions(np_array(A), + np_array(D), + np_array(cell_state), + n_cores = n_cores) +} + +test_mean_R <- function(a, d, mean_mu = 0.5) +{ + source_python(paste0(dali_path, "/dali/my_test_mean.py")) + res = test_mean(np_array(a), + np_array(d), + mean_mu = mean_mu) + names(res) = c("Mean", "Theta", "NLL_null", "NLL_alt", "pval") + res +} + +run_gp_R <- function(A, D, + cell_state, + kernel = "Linear", + n_cores = 1L) +{ + source_python(paste0(dali_path, "/dali/my_run_gp.py")) + res = run_gp(np_array(A), + np_array(D), + np_array(cell_state), + kernel = kernel, + n_cores = n_cores) + res +} + +# This function takes a cell x genes matrix and a pseudotemporal ordering vector and +# partitions the matrix into n_partitions intervals with even width in pseudotime space, +# computing average values per gene +make_pseudotime_smooth <- function(data, pt, n_partitions = 100){ + pseudotime_here <- pt + pseudotime_here <- pseudotime_here - min(pseudotime_here) + pseudotime_here <- pseudotime_here / max(pseudotime_here) # between 0 and 1 + intervals <- c(0, 1:n_partitions / n_partitions) + + expression_smoothed <- do.call("cbind", + lapply(1:n_partitions, function(i){ + p_lower <- intervals[i] + p_upper <- intervals[i + 1] + ix <- which(p_lower <= pseudotime_here & pseudotime_here <= p_upper) + rowMeans(data[,ix]) + })) + expression_smoothed +} + +### plotting functions +plot_gene_GP <- + function(sce, gene, latent_ase, remove_zero = F, scale_var = 1.96, textsize = 20){ + data_test <- data.frame( + pt_here = sce$Pseudotime, + exp_ref = as.numeric(counts_reference(sce[gene,])), + exp_alt = as.numeric(counts_alternative(sce[gene,])), + #ase = as.numeric(allelic_ratios(sce[gene,])), + latent_ase = latent_ase$posterior_mean, + latent_var_lower = latent_ase$posterior_mean - scale_var * sqrt(latent_ase$posterior_var), + latent_var_upper = latent_ase$posterior_mean + scale_var * sqrt(latent_ase$posterior_var) + ) + data_test$ase <- data_test$exp_ref / (data_test$exp_alt + data_test$exp_ref) + + if (remove_zero){ + data_test <- data_test[data_test$exp_ref + data_test$exp_alt > 0,] + } + + data_test$exp_total = data_test$exp_ref + data_test$exp_alt + data_test <- data_test[order(data_test$pt),] + + print(summary(data_test$pt_here)) + + p1 <- ggplot(data_test) + + geom_jitter(aes(pt_here, exp_ref, color = "B6"), alpha = 0.2, size = 0.2, height = 0.01) + + geom_jitter(aes(pt_here, exp_alt, color = "Cast"), alpha = 0.2, size = 0.2, height = 0.01) + + geom_smooth(aes(pt_here, exp_ref, color = "B6"), size = 1) + + geom_smooth(aes(pt_here, exp_alt, color = "Cast"), size = 1) + + theme_classic() + xlim(c(0, max(data_test$pt_here + 1))) + xlab("") + ylab("Expression") + + theme(legend.position="top") + + scale_y_log10(expand = c(0, 0)) + + scale_color_manual(values = c("B6" = "black", "Cast" = "chocolate")) + + theme(text = element_text(size = textsize)) + + theme(axis.line.x = element_blank(), + axis.text.x = element_blank(), + axis.ticks.x = element_blank(), + axis.title.x = element_blank()) + + ggtitle(gene) + p1 + + p2 <- ggplot(data_test, aes(pt_here, ase)) + + ylim(c(0, 1)) + geom_point(width = 0.1, height = 0.02, size = 0.1) + + scale_color_viridis() + + geom_line(aes(pt_here, latent_ase), color = "green") + + geom_ribbon(aes(x = pt_here, ymin = latent_var_lower, ymax = latent_var_upper), + color = "green", alpha = 0.2) + + theme_classic() + xlim(c(0, max(data_test$pt_here + 1))) + + xlab("Pseudotime") + ylab("Allelic Bias") + + theme(text = element_text(size = textsize)) + + scale_y_continuous(breaks = c(0, 0.5, 1), expand = c(0, 0)) + + theme(axis.line.x = element_blank(), + axis.text.x = element_blank(), + axis.ticks.x = element_blank(), + axis.title.x = element_blank()) + + + cowplot::plot_grid(p1, p2, + nrow = 2, align = "v", + rel_heights = c(0.4, 0.6)) + } + +plot_gene <- + function(sce, gene, remove_zero = F, scale_var = 1.96, textsize = 20){ + data_test <- data.frame( + pt_here = sce$Pseudotime, + exp_ref = as.numeric(counts_reference(sce[gene,])), + exp_alt = as.numeric(counts_alternative(sce[gene,])) + ) + data_test$ase <- data_test$exp_ref / (data_test$exp_alt + data_test$exp_ref) + + if (remove_zero){ + data_test <- data_test[data_test$exp_ref + data_test$exp_alt > 0,] + } + + data_test$exp_total = data_test$exp_ref + data_test$exp_alt + data_test <- data_test[order(data_test$pt),] + + print(summary(data_test$pt_here)) + + p1 <- ggplot(data_test) + + geom_jitter(aes(pt_here, exp_ref, color = "B6"), alpha = 0.2, size = 0.2, height = 0.01) + + geom_jitter(aes(pt_here, exp_alt, color = "Cast"), alpha = 0.2, size = 0.2, height = 0.01) + + geom_smooth(aes(pt_here, exp_ref, color = "B6"), size = 1) + + geom_smooth(aes(pt_here, exp_alt, color = "Cast"), size = 1) + + theme_classic() + xlim(c(0, max(data_test$pt_here + 1))) + xlab("") + ylab("Expression") + + theme(legend.position="top") + + scale_y_log10(expand = c(0, 0)) + + scale_color_manual(values = c("B6" = "black", "Cast" = "chocolate")) + + theme(text = element_text(size = textsize)) + + theme(axis.line.x = element_blank(), + axis.text.x = element_blank(), + axis.ticks.x = element_blank(), + axis.title.x = element_blank()) + + ggtitle(gene) + p1 + + p2 <- ggplot(data_test, aes(pt_here, ase)) + + ylim(c(0, 1)) + geom_point(width = 0.1, height = 0.02, size = 0.1) + + scale_color_viridis() + + theme_classic() + xlim(c(0, max(data_test$pt_here + 1))) + + xlab("Pseudotime") + ylab("Allelic Bias") + + theme(text = element_text(size = textsize)) + + scale_y_continuous(breaks = c(0, 0.5, 1), expand = c(0, 0)) + + theme(axis.line.x = element_blank(), + axis.text.x = element_blank(), + axis.ticks.x = element_blank(), + axis.title.x = element_blank()) + + + cowplot::plot_grid(p1, p2, + nrow = 2, align = "v", + rel_heights = c(0.4, 0.6)) + } + + + +#### + + + +plot_cis_trans_gene_mean <- function(gene){ + + rev_logit <- function(x){ + 1 / (1 + exp(-x)) + } + + asd <- results_per_gene_expressed_intervals[[gene]] + + for_plotting <- lapply(1:length(asd[[5]][[2]]), function(i){ + xx = asd[[5]][[2]][[i]] + xx = data.frame(do.call("cbind", xx)) + xx$Sample = i + xx$X3 <- sqrt(xx$X3) + xx}) + + for_plotting_merged <- do.call("rbind", for_plotting) + for_plotting_merged$Sample <- as.factor(for_plotting_merged$Sample) + for_plotting_merged$mean <- for_plotting_merged$X2 + + data_plot <- data.frame( + "F0_P" = data_f0_b6[gene, ], + "F0_M" = data_f0_cast[gene, ], + "F1_P" = data_f1_b6[gene, ], + "F1_M" = data_f1_cast[gene, ] + ) + + norm_factors <- c(sum(colSums(data_f0_b6)), + sum(colSums(data_f0_cast))) + norm_factors <- norm_factors / norm_factors[[1]] + + data_plot[,1:2] <- data.frame(t(t(data_plot[,1:2]) / norm_factors)) + data_plot$Interval <- 1:nrow(data_plot) + for_plotting_merged$Sample <- ifelse(for_plotting_merged$Sample == 1, "F0", "F1") + + ggplot(for_plotting_merged, aes(X1, y = rev_logit(X2), col = Sample)) + + geom_line(linetype = "dashed", size = 1.5) + + scale_color_manual(values = c("F0" = "black", "F1" = "darkgrey")) + + # geom_ribbon(aes(ymin = rev_logit(error_lower), ymax = rev_logit(error_upper), fill = Sample), + # alpha = 0.2) + + theme(legend.position="top") + + geom_point(data = data_plot, aes(Interval, (F0_P) / (F0_M + F0_P)), col = "black", size = 2) + + geom_point(data = data_plot, aes(Interval, (F1_P) / (F1_M + F1_P)), col = "darkgrey", size = 2) + + xlim(0, 100) + scale_fill_manual(values = c("red", "darkgrey")) + ylim(c(0, 1)) + xlab("Pseudotime") + + ylab("Allelic Imbalance") + theme_classic() + ggtitle(gene) + + geom_hline(yintercept = 0.5, linetype = "dashed") + + theme(text = element_text(size = 30), legend.position = "None") + + scale_x_continuous(breaks = c(0, 50, 100)) + + scale_y_continuous(breaks = c(0, 0.5, 1), limits = c(0, 1)) +} \ No newline at end of file diff --git a/Demonstrations/test_counts_trans.rds b/Demonstrations/test_counts_trans.rds new file mode 100644 index 0000000..5076326 Binary files /dev/null and b/Demonstrations/test_counts_trans.rds differ diff --git a/Demonstrations/test_counts_trans_per_sample.rds b/Demonstrations/test_counts_trans_per_sample.rds new file mode 100644 index 0000000..ef2189e Binary files /dev/null and b/Demonstrations/test_counts_trans_per_sample.rds differ diff --git a/Demonstrations/test_ratios_trans.rds b/Demonstrations/test_ratios_trans.rds new file mode 100644 index 0000000..1fc8f85 Binary files /dev/null and b/Demonstrations/test_ratios_trans.rds differ