Skip to content

Commit

Permalink
update case studies
Browse files Browse the repository at this point in the history
  • Loading branch information
linquynus committed Sep 16, 2019
1 parent 320be6f commit 9606d62
Show file tree
Hide file tree
Showing 2 changed files with 108 additions and 50 deletions.
121 changes: 75 additions & 46 deletions inst/case_study/case_study_of_CEBPB.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ library(gplots)
# CEBPB motifs in TFregulomeR
CEBPB_record <- dataBrowser(tf = "CEBPB")

# (Meth)Motif logo for all CEBPB in TFregulomeR compendium for Figure S1
# (Meth)Motif logo for all CEBPB in TFregulomeR compendium for Supplementary Figure 1
for (id in CEBPB_record$ID){
motif_matrix <- searchMotif(id = id)
plotLogo(MM_object = motif_matrix)
Expand All @@ -31,18 +31,18 @@ for (i in seq(1,16,1)){
}
K562_CEBPB_all_peaks$sum <- apply(K562_CEBPB_all_peaks[,CEBPB_record$cell_tissue_name],1,sum)

# Peak numbers, MethMotif logs and read enrichments for 16 K562 CEBPB sub-ensembles for Figure S2 and Figure 1A
# Peak numbers, MethMotif logs and read enrichments for 16 K562 CEBPB sub-ensembles for Supplementary Figure 2 and Figure 1A
sub_ensemble_peak_num <- c()
sub_ensemble_read_score <- as.data.frame(matrix(nrow = nrow(K562_CEBPB_all_peaks),
sub_ensemble_read_score <- as.data.frame(matrix(nrow = nrow(K562_CEBPB_all_peaks),
ncol = 2))
colnames(sub_ensemble_read_score) <- c("cell_type_num", "read_fold_change")
sub_ensemble_read_score$cell_type_num <- K562_CEBPB_all_peaks$sum
for (i in seq(1,16,1)){
peak_subset_i <- K562_CEBPB_all_peaks[which(K562_CEBPB_all_peaks$sum==i),]
sub_ensemble_peak_num <- c(sub_ensemble_peak_num, nrow(peak_subset_i))
sub_ensemble_read_score[which(sub_ensemble_read_score$cell_type_num==i),'read_fold_change'] <-
sub_ensemble_read_score[which(sub_ensemble_read_score$cell_type_num==i),'read_fold_change'] <-
peak_subset_i$tag_fold_change

# MethMotif logo
common_peak_i <- commonPeaks(user_target_peak_list = list(peak_subset_i[,1:5]),
user_target_peak_id = "MM1_HSA_K562_CEBPB",
Expand All @@ -56,7 +56,7 @@ for (i in seq(1,16,1)){
sub_ensemble_read_score$cell_type_num <- factor(sub_ensemble_read_score$cell_type_num,
levels = seq(1,16,1))
pdf("read_enrichment_scores_across_16_K562_CEBPB_sub-ensembles.pdf")
boxplot(read_fold_change~cell_type_num,sub_ensemble_read_score,
boxplot(read_fold_change~cell_type_num,sub_ensemble_read_score,
xlab = "number of cell types sharing the peaks",
ylab = "Read enrichment score", outline=FALSE)
dev.off()
Expand Down Expand Up @@ -84,19 +84,19 @@ cofactor_16_subsets_matrix <- cofactor_16_subsets_res$intersection_matrix
cofactor_16_subsets_matrix_t <- as.data.frame(t(cofactor_16_subsets_matrix))
# filter out cofactor whose binding percents are less than 5 in all sub-ensembles
cofactor_16_subsets_matrix_filtered <- cofactor_16_subsets_matrix_t[!(cofactor_16_subsets_matrix_t$user_peak_x1<=5 &
cofactor_16_subsets_matrix_t$user_peak_x2<=5 &
cofactor_16_subsets_matrix_t$user_peak_x3<=5 &
cofactor_16_subsets_matrix_t$user_peak_x2<=5 &
cofactor_16_subsets_matrix_t$user_peak_x3<=5 &
cofactor_16_subsets_matrix_t$user_peak_x4<=5 &
cofactor_16_subsets_matrix_t$user_peak_x5<=5 &
cofactor_16_subsets_matrix_t$user_peak_x6<=5 &
cofactor_16_subsets_matrix_t$user_peak_x7<=5 &
cofactor_16_subsets_matrix_t$user_peak_x8<=5 &
cofactor_16_subsets_matrix_t$user_peak_x5<=5 &
cofactor_16_subsets_matrix_t$user_peak_x6<=5 &
cofactor_16_subsets_matrix_t$user_peak_x7<=5 &
cofactor_16_subsets_matrix_t$user_peak_x8<=5 &
cofactor_16_subsets_matrix_t$user_peak_x9<=5 &
cofactor_16_subsets_matrix_t$user_peak_x10<=5 &
cofactor_16_subsets_matrix_t$user_peak_x11<=5 &
cofactor_16_subsets_matrix_t$user_peak_x12<=5 &
cofactor_16_subsets_matrix_t$user_peak_x13<=5 &
cofactor_16_subsets_matrix_t$user_peak_x14<=5 &
cofactor_16_subsets_matrix_t$user_peak_x10<=5 &
cofactor_16_subsets_matrix_t$user_peak_x11<=5 &
cofactor_16_subsets_matrix_t$user_peak_x12<=5 &
cofactor_16_subsets_matrix_t$user_peak_x13<=5 &
cofactor_16_subsets_matrix_t$user_peak_x14<=5 &
cofactor_16_subsets_matrix_t$user_peak_x15<=5 &
cofactor_16_subsets_matrix_t$user_peak_x16<=5),]
color <- colorRampPalette(c("white","#D46A6A", "#801515", "#550000"))
Expand Down Expand Up @@ -133,50 +133,50 @@ for (i in seq(1,16,1)){
tag_density_value = "median",
return_methylation_profile = TRUE,
angle_of_methylation_profile = "x")

meth_matrix_i <- cofactor_in_subsets_res_i$methylation_profile_matrix
CEBPB_CEBPD_meth_i <- meth_matrix_i["MM1_HSA_K562_CEBPB","MM1_HSA_K562_CEBPD"][[1]]
CEBPB_CEBPD_meth_value_i <- sum(CEBPB_CEBPD_meth_i[9:10])*100/sum(CEBPB_CEBPD_meth_i)
CEBPB_CEBPD_meth_value <- c(CEBPB_CEBPD_meth_value, CEBPB_CEBPD_meth_value_i)

CEBPB_ATF4_meth_i <- meth_matrix_i["MM1_HSA_K562_CEBPB","MM1_HSA_K562_ATF4"][[1]]
CEBPB_ATF4_meth_value_i <- sum(CEBPB_ATF4_meth_i[9:10])*100/sum(CEBPB_ATF4_meth_i)
CEBPB_ATF4_meth_value <- c(CEBPB_ATF4_meth_value, CEBPB_ATF4_meth_value_i)

tag_density_median_i <- cofactor_in_subsets_res_i$tag_density_matrix
CEBPB_CEBPD_tag_median <- c(CEBPB_CEBPD_tag_median,
CEBPB_CEBPD_tag_median <- c(CEBPB_CEBPD_tag_median,
tag_density_median_i["MM1_HSA_K562_CEBPB",
"MM1_HSA_K562_CEBPD"])
CEBPB_ATF4_tag_median <- c(CEBPB_ATF4_tag_median,
CEBPB_ATF4_tag_median <- c(CEBPB_ATF4_tag_median,
tag_density_median_i["MM1_HSA_K562_CEBPB",
"MM1_HSA_K562_ATF4"])

cofactor_tag_q1_i <- intersectPeakMatrixResult(intersectPeakMatrix = cofactor_in_subsets_i,
return_tag_density = TRUE,
angle_of_tag_density = "x",
tag_density_value = "quartile_25")
tag_density_q1_i <- cofactor_tag_q1_i$tag_density_matrix
CEBPB_CEBPD_tag_q1 <- c(CEBPB_CEBPD_tag_q1,
CEBPB_CEBPD_tag_q1 <- c(CEBPB_CEBPD_tag_q1,
tag_density_q1_i["MM1_HSA_K562_CEBPB",
"MM1_HSA_K562_CEBPD"])
CEBPB_ATF4_tag_q1 <- c(CEBPB_ATF4_tag_q1,
CEBPB_ATF4_tag_q1 <- c(CEBPB_ATF4_tag_q1,
tag_density_q1_i["MM1_HSA_K562_CEBPB",
"MM1_HSA_K562_ATF4"])
cofactor_tag_q3_i <- intersectPeakMatrixResult(intersectPeakMatrix = cofactor_in_subsets_i,
return_tag_density = TRUE,
tag_density_value = "quartile_75",
angle_of_tag_density = "x")
tag_density_q3_i <- cofactor_tag_q3_i$tag_density_matrix
CEBPB_CEBPD_tag_q3 <- c(CEBPB_CEBPD_tag_q3,
CEBPB_CEBPD_tag_q3 <- c(CEBPB_CEBPD_tag_q3,
tag_density_q3_i["MM1_HSA_K562_CEBPB",
"MM1_HSA_K562_CEBPD"])
CEBPB_ATF4_tag_q3 <- c(CEBPB_ATF4_tag_q3,
CEBPB_ATF4_tag_q3 <- c(CEBPB_ATF4_tag_q3,
tag_density_q3_i["MM1_HSA_K562_CEBPB",
"MM1_HSA_K562_ATF4"])
}
pdf("mCG_percentage_of_CEBPB-CEPBD_and_CEBPB-ATF4_in_16_subsets.pdf")
plot(x = seq(1,16,1),
y = CEBPB_CEBPD_meth_value,
y = CEBPB_CEBPD_meth_value,
type="l", ylim = c(0,30), xlim = c(0,17), ylab = "5mC percentage (%)",
xlab = "number of shared cell types", col="blue")
lines(x = seq(1,16,1),
Expand All @@ -185,7 +185,7 @@ dev.off()

pdf("read_enrichments_of_CEBPB-CEPBD_and_CEBPB-ATF4_in_16_subsets.pdf")
plot(x = seq(1,16,1),
y = CEBPB_CEBPD_tag_median,
y = CEBPB_CEBPD_tag_median,
type="l", ylim = c(0,100), xlim = c(0,17), ylab = "tag density median",
xlab = "number of shared cell types", col="blue")
points(x = seq(1,16,1),
Expand Down Expand Up @@ -216,13 +216,13 @@ K562_exclusivePeak_output <- exclusivePeaks(target_peak_id = "MM1_HSA_K562_CEBPB
motif_only_for_target_peak = TRUE,
excluded_peak_id = CEBPB_record_ID_noK562,
motif_only_for_excluded_peak = TRUE)
K562_exclusivePeak_result <- exclusivePeakResult(exclusivePeaks = K562_exclusivePeak_output,
K562_exclusivePeak_result <- exclusivePeakResult(exclusivePeaks = K562_exclusivePeak_output,
return_exclusive_peak_sites = TRUE)
K562_exclusivePeak_peak <- K562_exclusivePeak_result$exclusive_peak_list$MM1_HSA_K562_CEBPB_exclusive_peaks

K562_exclusivePeak_with_ATF4_output <- commonPeaks(user_target_peak_list = list(K562_exclusivePeak_peak),
user_target_peak_id = "MM1_HSA_K562_CEBPB",
compared_peak_id = "MM1_HSA_K562_ATF4",
compared_peak_id = "MM1_HSA_K562_ATF4",
motif_only_for_compared_peak = TRUE)
K562_exclusivePeak_with_ATF4_res <- commonPeakResult(commonPeaks = K562_exclusivePeak_with_ATF4_output,
return_common_peak_sites = TRUE,
Expand All @@ -232,7 +232,7 @@ K562_exclusivePeak_without_ATF4_peaks <- K562_exclusivePeak_peak[!(K562_exclusiv

K562_exclusivePeak_without_ATF4_output <- commonPeaks(user_target_peak_list = list(K562_exclusivePeak_without_ATF4_peaks),
user_target_peak_id = "MM1_HSA_K562_CEBPB",
compared_peak_id = "MM1_HSA_K562_CEBPB",
compared_peak_id = "MM1_HSA_K562_CEBPB",
motif_only_for_compared_peak = TRUE)
commonPeakResult(commonPeaks = K562_exclusivePeak_without_ATF4_output,
save_MethMotif_logo = TRUE)
Expand All @@ -258,51 +258,51 @@ K562_commonPeak_without_CEBPB_peaks <- K562_commonPeak_peak[!(K562_commonPeak_pe

K562_commonPeak_without_ATF4_output <- commonPeaks(user_target_peak_list = list(K562_commonPeak_without_CEBPB_peaks),
user_target_peak_id = "MM1_HSA_K562_CEBPB",
compared_peak_id = "MM1_HSA_K562_CEBPB",
compared_peak_id = "MM1_HSA_K562_CEBPB",
motif_only_for_compared_peak = TRUE)
commonPeakResult(commonPeaks = K562_commonPeak_without_ATF4_output,
save_MethMotif_logo = TRUE)


# motif in shared and exclusive CEBPB targets in all other 15 cell types (Figure S3)
# motif in shared and exclusive CEBPB targets in all other 15 cell types (Supplementary Figure 3)
for (i in CEBPB_record$ID){
common_i <- commonPeaks(target_peak_id = i,
motif_only_for_target_peak = TRUE,
motif_only_for_target_peak = TRUE,
compared_peak_id = CEBPB_record$ID,
motif_only_for_compared_peak = TRUE)
common_i_res <- commonPeakResult(commonPeaks = common_i,
save_MethMotif_logo = TRUE)

# exclude ID i from all CEBPB IDs
cebpd_id_no_i <- CEBPB_record$ID[!(CEBPB_record$ID %in% i)]
exclusive_i <- exclusivePeaks(target_peak_id = i,
motif_only_for_target_peak = TRUE,
excluded_peak_id = cebpd_id_no_i,
excluded_peak_id = cebpd_id_no_i,
motif_only_for_excluded_peak = TRUE)
exclusive_i_res <- exclusivePeakResult(exclusivePeaks = exclusive_i,
exclusive_i_res <- exclusivePeakResult(exclusivePeaks = exclusive_i,
save_MethMotif_logo = TRUE)
}


# functions of CEBPB/CEBPD and CEBPB/ATF4 targets in K562 (Figure S4)
# functions of CEBPB/CEBPD and CEBPB/ATF4 targets in K562 (Supplementary Figure 4)
# load required package for GREAT annotation
library(rGREAT)
# load required package for genomic conversion from hg38 to hg19
library(liftOver)
# all CEBPB-CEBPD co-binding regions in K562
K562_CEBPB_CEBPD <- commonPeaks(target_peak_id = "MM1_HSA_K562_CEBPB",
motif_only_for_target_peak = TRUE,
compared_peak_id = "MM1_HSA_K562_CEBPD",
compared_peak_id = "MM1_HSA_K562_CEBPD",
motif_only_for_compared_peak = TRUE)
K562_CEBPB_CEBPD_res <- commonPeakResult(commonPeaks = K562_CEBPB_CEBPD,
return_common_peak_sites = TRUE,
save_MethMotif_logo = TRUE,
return_common_peak_sites = TRUE,
save_MethMotif_logo = TRUE,
return_summary = TRUE)
K562_CEBPB_CEBPD_res$peak_summary
#> percentage_in_original_inputs(%)
#> MM1_HSA_K562_CEBPB_common_peaks 6.532727
K562_CEBPB_CEBPD_peak <- K562_CEBPB_CEBPD_res$common_peak_list$MM1_HSA_K562_CEBPB_common_peaks
K562_CEBPB_CEBPD_great <- greatAnnotate(peaks = K562_CEBPB_CEBPD_peak,
K562_CEBPB_CEBPD_great <- greatAnnotate(peaks = K562_CEBPB_CEBPD_peak,
return_annotation = TRUE)
K562_CEBPB_CEBPD_great_bp <- K562_CEBPB_CEBPD_great[which(K562_CEBPB_CEBPD_great$category=="BP"),]

Expand All @@ -311,9 +311,9 @@ K562_CEBPB_ATF4 <- commonPeaks(target_peak_id = "MM1_HSA_K562_CEBPB",
motif_only_for_target_peak = TRUE,
compared_peak_id = "MM1_HSA_K562_ATF4",
motif_only_for_compared_peak = TRUE)
K562_CEBPB_ATF4_res <- commonPeakResult(commonPeaks = K562_CEBPB_ATF4,
return_common_peak_sites = TRUE,
save_MethMotif_logo = TRUE,
K562_CEBPB_ATF4_res <- commonPeakResult(commonPeaks = K562_CEBPB_ATF4,
return_common_peak_sites = TRUE,
save_MethMotif_logo = TRUE,
return_summary = TRUE)
K562_CEBPB_ATF4_res$peak_summary
#> percentage_in_original_inputs(%)
Expand All @@ -323,3 +323,32 @@ K562_CEBPB_ATF4_great <- greatAnnotate(peaks = K562_CEBPB_ATF4_peak, return_anno
K562_CEBPB_ATF4_great_bp <- K562_CEBPB_ATF4_great[which(K562_CEBPB_ATF4_great$category=="BP"),]



# identify CEBPB motif in K562 ATF4 peaks(Supplementary Figure 5A)
# 1) plot ATF4 overal motif logo
ATF4_motif <- searchMotif(id="MM1_HSA_K562_ATF4")
plotLogo(MM_object = ATF4_motif)
# 2) get ATF4 peaks co-bound by CEBPB
ATF4_with_CEBPB <- commonPeaks(target_peak_id = "MM1_HSA_K562_ATF4",
motif_only_for_target_peak = TRUE,
compared_peak_id = "MM1_HSA_K562_CEBPB",
motif_only_for_compared_peak = TRUE)
ATF4_with_CEBPB_res <- commonPeakResult(commonPeaks = ATF4_with_CEBPB,
return_common_peak_sites = TRUE,
save_MethMotif_logo = TRUE)
ATF4_peaks_with_CEBPB = ATF4_with_CEBPB_res$common_peak_list$MM1_HSA_K562_ATF4_common_peaks

K562_TFBS <- dataBrowser(cell_tissue_name = "K562")
# 3) get all K562 PWM IDs except CEBPB and ATF4
K562_TFBS_no_CEBPB_ATF4 <- K562_TFBS$ID[(K562_TFBS$ID != "MM1_HSA_K562_ATF4" &
K562_TFBS$ID != "MM1_HSA_K562_CEBPB")]
# 4) filter out the peaks also co-bound by other TFs in the ATF4-CEBPB co-binding peaks obtained at step 2.
ATF4_peaks_with_CEBPB_no_other <- exclusivePeaks(user_target_peak_list = list(ATF4_peaks_with_CEBPB),
user_target_peak_id = "MM1_HSA_K562_ATF4",
excluded_peak_id = K562_TFBS_no_CEBPB_ATF4,
motif_only_for_excluded_peak = TRUE)

ATF4_peaks_with_CEBPB_no_other_res <- exclusivePeakResult(exclusivePeaks = ATF4_peaks_with_CEBPB_no_other,
save_MethMotif_logo = TRUE)


Loading

0 comments on commit 9606d62

Please sign in to comment.