Skip to content
This repository has been archived by the owner on Apr 13, 2024. It is now read-only.

Commit

Permalink
Add Count_score for maxMAFstats.R
Browse files Browse the repository at this point in the history
  • Loading branch information
wanyuac committed Jul 18, 2018
1 parent c7a79fb commit f5afcdf
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 8 deletions.
4 changes: 2 additions & 2 deletions unicity_assessment/maxMAF_vs_readDepth.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
# Author: Yu Wan <wanyuac@gmail.com>
# Copyright 2018 Yu Wan
# Licensed under the Apache License, Version 2.0
# First edition: 9 Mar 2018; the latest edition: 17 July 2018
# First edition: 9 Mar 2018; the latest edition: 18 July 2018

# Read arguments from the command line ###############
library(optparse)
Expand Down Expand Up @@ -142,7 +142,7 @@ pl <- ggplot(data = tab, mapping = aes(x = "", y = maxMAF)) +
# drawn at a log scale.
pb <- ggplot(data = tab, mapping = aes(x = "", y = AG_ratio)) +
geom_boxplot(outlier.size = opts$point_size) +
labs(x = NULL, y = "Allele-genome depth ratio") +
labs(x = NULL, y = "Allele-genome read depth ratio") +
scale_y_continuous(trans = "log2") + coord_flip() + theme_bw() +
theme(legend.position = "none",
axis.text.x = element_text(size = 10), axis.title.x = element_text(size = 12),
Expand Down
22 changes: 16 additions & 6 deletions unicity_assessment/maxMAFstats.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
# Author: Yu Wan <wanyuac@gmail.com>
# Copyright 2018 Yu Wan
# Licensed under the Apache License, Version 2.0
# First edition: 31 Jan 2018, the latest edition: 17 Jul 2018
# First edition: 31 Jan 2018, the latest edition: 18 Jul 2018

# Define functions ###############
searchAlleleID <- function(strain, allele, mapping) {
Expand Down Expand Up @@ -149,17 +149,27 @@ summariseMaxMAF <- function(m, pam) {
# m: a matrix of maxMAF; pam: an allelic PAM, which is used for counting the number of occurrence
# Initialisation
alleles <- colnames(pam)
sumry <- data.frame(Allele = character(0), Count = integer(0),

# Construct the summary table
# Count_score: number of alleles having scores and maxMAF information. Count_score <= Count.
sumry <- data.frame(Allele = character(0), Count = integer(0), Count_score = integer(0),
Max = numeric(0), P75 = numeric(0), Median = numeric(0),
P25 = numeric(0), Min = numeric(0), stringsAsFactors = FALSE) # the summary table
P25 = numeric(0), Min = numeric(0), stringsAsFactors = FALSE)

# Compute summary statistics
for (a in alleles) {
n <- sum(pam[, a]) # number of occrrence events, given 1 for presence and 0 for absence
maxmaf_qu <- as.numeric(quantile(m[, a], probs = c(0, 0.25, 0.5, 0.75, 1),
na.rm = TRUE))
maxmafs <- m[, a]
maxmafs <- maxmafs[!is.na(maxmafs)]
k <- length(maxmafs)
if (k > 0) {
maxmaf_qu <- as.numeric(quantile(maxmafs, probs = c(0, 0.25, 0.5, 0.75, 1)))
} else { # no maxmaf information for this allele at all
maxmaf_qu <- rep(NA, times = 5)
}

sumry <- rbind.data.frame(sumry,
data.frame(Allele = a, Count = n,
data.frame(Allele = a, Count = n, Count_score = k,
Max = maxmaf_qu[5], P75 = maxmaf_qu[4],
Median = maxmaf_qu[3], P25 = maxmaf_qu[2],
Min = maxmaf_qu[1], stringsAsFactors = FALSE),
Expand Down

0 comments on commit f5afcdf

Please sign in to comment.