Skip to content

Commit

Permalink
Merge pull request #44 from alephnull7/master
Browse files Browse the repository at this point in the history
Updates to tabular stats and paper figures
  • Loading branch information
michaelgruenstaeudl authored May 25, 2024
2 parents 3cac42c + 1689c2d commit 3d70111
Show file tree
Hide file tree
Showing 26 changed files with 218 additions and 146 deletions.
12 changes: 11 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,17 @@
CHANGELOG
---------

#### Version 1.1.1 (2024.05.20)
#### Version 1.1.2 (2024.05.24)
* Updates to summary tabular statistics files
* Inclusion of unpartitioned statistics for coding and noncoding regions summaries
* Standardize name of summary groups to `Chromosome`
* Update WRSD metric to in terms of kilobases
* Updates to `scripts_for_figures_tables`
* `Figure1` updated to use manual jittering for boxplot with outlier labels
* Include `Figure1b` in output file
* Changes to value exactness in some tables

#### Version 1.1.1 (2024.05.21)
* Minor fixes to tabular statistics file creation
* A coverage of 0 for a window is always labeled as low coverage

Expand Down
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: PACVr
Version: 1.1.1
Date: 2024-05-20
Version: 1.1.2
Date: 2024-05-24
Title: Plastome Assembly Coverage Visualization
Authors@R: c(person("Gregory", "Smith", role=c("ctb")),
person("Nils", "Jenke", role=c("ctb")),
Expand Down
22 changes: 4 additions & 18 deletions R/AnalysisSpecs.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env RScript
#contributors=c("Gregory Smith", "Nils Jenke", "Michael Gruenstaeudl")
#email="m_gruenstaeudl@fhsu.edu"
#version="2024.05.20.2131"
#version="2024.05.24.2053"

AnalysisSpecs <- R6Class("AnalysisSpecs",
public = list(
Expand All @@ -10,9 +10,9 @@ AnalysisSpecs <- R6Class("AnalysisSpecs",
isIRCheck = FALSE,
windowSize = 250,
isSyntenyLine = FALSE,
regions_name = "Source",
regions_start = "srcStart",
regions_end = "srcEnd",
regions_name = "Chromosome",
regions_start = "chromStart",
regions_end = "chromEnd",

# constructor
initialize = function(IRCheck = NA,
Expand All @@ -26,7 +26,6 @@ AnalysisSpecs <- R6Class("AnalysisSpecs",
private$setSyntenyLineType(IRCheck)
private$setIsIRCheck(IRCheck)
private$setIsSyntenyLine()
private$setCovFields()
}
),

Expand Down Expand Up @@ -59,19 +58,6 @@ AnalysisSpecs <- R6Class("AnalysisSpecs",
# precondition: `syntenyLineType` is set
setIsSyntenyLine = function() {
self$isSyntenyLine <- !is.null(self$syntenyLineType)
},

# precondition: `isIRCheck` is set
setCovFields = function() {
if (self$isIRCheck) {
self$regions_name <- "Chromosome"
self$regions_start <- "chromStart"
self$regions_end <- "chromEnd"
} else {
self$regions_name <- "Source"
self$regions_start <- "srcStart"
self$regions_end <- "srcEnd"
}
}
)
)
Expand Down
2 changes: 1 addition & 1 deletion R/GBKData.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env RScript
#contributors=c("Gregory Smith", "Nils Jenke", "Michael Gruenstaeudl")
#email="m_gruenstaeudl@fhsu.edu"
#version="2024.05.20.2131"
#version="2024.05.24.2053"

GBKData <- R6Class("GBKData",
public = list(
Expand Down
2 changes: 1 addition & 1 deletion R/IROps.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env RScript
#contributors=c("Gregory Smith", "Nils Jenke", "Michael Gruenstaeudl")
#email="m_gruenstaeudl@fhsu.edu"
#version="2024.05.20.2131"
#version="2024.05.24.2053"

checkIREquality <- function(gbkData,
analysisSpecs) {
Expand Down
2 changes: 1 addition & 1 deletion R/OutputSpecs.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env RScript
#contributors=c("Gregory Smith", "Nils Jenke", "Michael Gruenstaeudl")
#email="m_gruenstaeudl@fhsu.edu"
#version="2024.05.20.2131"
#version="2024.05.24.2053"

OutputSpecs <- R6Class("OutputSpecs",
public = list(
Expand Down
2 changes: 1 addition & 1 deletion R/PACVr.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env RScript
#contributors=c("Gregory Smith", "Nils Jenke", "Michael Gruenstaeudl")
#email="m_gruenstaeudl@fhsu.edu"
#version="2024.05.20.2131"
#version="2024.05.24.2053"

PACVr.read.gb <- function(gbkFile) {
gbkRaw <- getGbkRaw(gbkFile)
Expand Down
2 changes: 1 addition & 1 deletion R/RCircosOps.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env RScript
#contributors=c("Gregory Smith", "Nils Jenke", "Michael Gruenstaeudl")
#email="m_gruenstaeudl@fhsu.edu"
#version="2024.05.20.2131"
#version="2024.05.24.2053"


# The following R functions were taken from the R package RCircos and then modified.
Expand Down
2 changes: 1 addition & 1 deletion R/compileStats.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env RScript
#contributors=c("Gregory Smith", "Nils Jenke", "Michael Gruenstaeudl")
#email="m_gruenstaeudl@fhsu.edu"
#version="2024.05.20.2131"
#version="2024.05.24.2053"

PACVr.compileCovStats <- function(gbkData,
coverageRaw,
Expand Down
91 changes: 56 additions & 35 deletions R/coverageCalcOps.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env RScript
#contributors=c("Gregory Smith", "Nils Jenke", "Michael Gruenstaeudl")
#email="m_gruenstaeudl@fhsu.edu"
#version="2024.05.20.2131"
#version="2024.05.24.2053"

CovCalc <- function(coverageRaw,
windowSize = 250,
Expand Down Expand Up @@ -159,41 +159,58 @@ setLowCoverage <- function(covDataField, regions_name = NULL) {
# adapted from `nilsj9/PlastidSequenceCoverage`
getCovSummaries <- function(covData,
analysisSpecs) {
regions_name <- analysisSpecs$regions_name

covData <- filterCovData(covData,
analysisSpecs)
covSummaries <- getCovDepths(covData,
regions_name)
covSummaries <- updateRegionsSummary(covSummaries,
covData$ir_regions,
regions_name)
analysisSpecs$regions_name)
covSummaries <- updateSummaries(covSummaries,
covData,
analysisSpecs)
return(covSummaries)
}

updateSummaries <- function(covSummaries,
covData,
analysisSpecs) {
covSummaries$regions_summary <- updateCovSummary(covSummaries$regions_summary,
covData$ir_regions,
analysisSpecs,
TRUE)
covSummaries$genes_summary <- updateCovSummary(covSummaries$genes_summary,
covData$ir_genes,
analysisSpecs)
covSummaries$noncoding_summary <- updateCovSummary(covSummaries$noncoding_summary,
covData$ir_noncoding,
analysisSpecs)
return(covSummaries)
}

updateRegionsSummary <- function(covSummaries,
covDataRegions,
regions_name) {
covSumRegions <- covSummaries$regions_summary
regions_evenness <- getCovEvenness(covDataRegions,
regions_name)
if (regions_name == "Source") {
covSumRegions[regions_name] <- "Complete_genome"
regions_evenness[regions_name] <- "Complete_genome"
updateCovSummary <- function(covSummary,
covDataField,
analysisSpecs,
isRegions = FALSE) {
regions_name <- analysisSpecs$regions_name
isIRCheck <- analysisSpecs$isIRCheck

cov_evenness <- getCovEvenness(covDataField,
regions_name)
if (!isIRCheck) {
unpartName <- ifelse(isRegions, "Complete_genome", "Unpartitioned")
covSummary[regions_name] <- unpartName
cov_evenness[regions_name] <- unpartName
}
covSumRegions <- dplyr::full_join(covSumRegions,
regions_evenness,
regions_name)

if (regions_name != "Source") {
genome_summary <- getGenomeSummary(covDataRegions,
regions_name)
covSumRegions <- dplyr::bind_rows(covSumRegions,
genome_summary)
covSummary <- dplyr::full_join(covSummary,
cov_evenness,
regions_name)

if (isIRCheck) {
genome_summary <- getGenomeSummary(covDataField,
regions_name,
isRegions)
covSummary <- dplyr::bind_rows(covSummary,
genome_summary)
}

covSummaries$regions_summary <- covSumRegions
return(covSummaries)
return(covSummary)
}

filterCovData <- function(covData,
Expand Down Expand Up @@ -256,7 +273,7 @@ getCovDepth <- function(covDataField, regions_name = NULL) {
}
covDepth <- covDataField %>%
calcCovDepth()
if (!is.null(regions_name) && regions_name == "Source") {
if (nrow(covDepth) == 1) {
covDepth[regions_name] <- "Unpartitioned"
}
return(covDepth)
Expand All @@ -275,19 +292,19 @@ calcCovDepth <- function(df) {
lowCovWin_abs = sum(lowCoverage == "*", na.rm = TRUE),
regionLen = sum(length, na.rm = TRUE),
.groups = "drop") %>%
addLowCovWin_relToRegionLen()
addLowCovWin_perKilobase()
)
}

addLowCovWin_relToRegionLen <- function(df) {
addLowCovWin_perKilobase <- function(df) {
lowCovWin_abs <-
lowCovWin_relToRegionLen <-
lowCovWin_perKilobase <-
regionLen <-
NULL

return (
df %>%
dplyr::mutate(lowCovWin_relToRegionLen = lowCovWin_abs / regionLen)
dplyr::mutate(lowCovWin_perKilobase = lowCovWin_abs / regionLen * 1000)
)
}

Expand Down Expand Up @@ -326,13 +343,17 @@ evennessScore <- function(coverage) {
return(E)
}

getGenomeSummary <- function(covDataField, regions_name) {
getGenomeSummary <- function(covDataField, regions_name, isRegions) {
genome_depth <- getCovDepth(covDataField)
genome_evenness <- getCovEvenness(covDataField)

genome_summary <- genome_depth %>%
dplyr::bind_cols(genome_evenness)
genome_summary[regions_name] <- "Complete_genome"
if (isRegions) {
genome_summary[regions_name] <- "Complete_genome"
} else {
genome_summary[regions_name] <- "Unpartitioned"
}
return(genome_summary)
}

2 changes: 1 addition & 1 deletion R/customRead.gb.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env RScript
#contributors=c("Gregory Smith", "Nils Jenke", "Michael Gruenstaeudl")
#email="m_gruenstaeudl@fhsu.edu"
#version="2024.05.20.2131"
#version="2024.05.24.2053"

read.gbWithHandling <- function(gbkRaw, count=0) {
gbkData <- tryCatch({
Expand Down
2 changes: 1 addition & 1 deletion R/helpers.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env RScript
#contributors=c("Gregory Smith", "Nils Jenke", "Michael Gruenstaeudl")
#email="m_gruenstaeudl@fhsu.edu"
#version="2024.05.20.2131"
#version="2024.05.24.2053"

HistCol <- function(cov, threshold, relative, logScale) {
# Function to generate color vector for histogram data
Expand Down
2 changes: 1 addition & 1 deletion R/parsingOps.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env RScript
#contributors=c("Gregory Smith", "Nils Jenke", "Michael Gruenstaeudl")
#email="m_gruenstaeudl@fhsu.edu"
#version="2024.05.20.2131"
#version="2024.05.24.2053"

PACVr.parseGenes <- function (gbkSeqFeatures) {
# Function to extract gene information from Genbank flatfile data
Expand Down
2 changes: 1 addition & 1 deletion R/quadripOps.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env RScript
#contributors=c("Gregory Smith", "Nils Jenke", "Michael Gruenstaeudl")
#email="m_gruenstaeudl@fhsu.edu"
#version="2024.05.20.2131"
#version="2024.05.24.2053"

FilterByKeywords <- function(allRegions, where) {
# Function to filter list based on genomic keywords
Expand Down
2 changes: 1 addition & 1 deletion R/readingOps.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env RScript
#contributors=c("Gregory Smith", "Nils Jenke", "Michael Gruenstaeudl")
#email="m_gruenstaeudl@fhsu.edu"
#version="2024.05.20.2131"
#version="2024.05.24.2053"

read.gbSeqFeaturesAdapt <- function(gbkData, analysisSpecs) {
gbkSeqFeatures <- read.gbSeqFeatures(gbkData,
Expand Down
2 changes: 1 addition & 1 deletion R/visualisationOps.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env RScript
#contributors=c("Gregory Smith", "Nils Jenke", "Michael Gruenstaeudl")
#email="m_gruenstaeudl@fhsu.edu"
#version="2024.05.20.2131"
#version="2024.05.24.2053"

vizWithRCircos <- function(gbkData,
coverage,
Expand Down
2 changes: 1 addition & 1 deletion R/writingOps.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env RScript
#contributors=c("Gregory Smith", "Nils Jenke", "Michael Gruenstaeudl")
#email="m_gruenstaeudl@fhsu.edu"
#version="2024.05.20.2131"
#version="2024.05.24.2053"

writeCovTables <- function(covData, sample_name, dir) {
writeStatsTable(covData$ir_genes, sample_name, dir, "coverage.genes")
Expand Down
Loading

0 comments on commit 3d70111

Please sign in to comment.