Skip to content

Commit

Permalink
Merge pull request #39 from alephnull7/master
Browse files Browse the repository at this point in the history
Version 1.0.11
  • Loading branch information
michaelgruenstaeudl authored Apr 8, 2024
2 parents b102b44 + e76876b commit de6eebf
Show file tree
Hide file tree
Showing 17 changed files with 174 additions and 129 deletions.
12 changes: 11 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,22 @@
CHANGELOG
---------

#### Version 1.0.10 (2024.03.22)
#### Version 1.0.11 (2024.03.27)
* Minor fixes to tabular statistics file creation
* A coverage of 0 for a window is always labeled as low coverage

#### Version 1.0.10 (2024.03.23)
* Dynamic changes to `IRCheck` as `PACVr.complete()` progresses
* If no samples from `gbkFile` contain the qualifiers needed for specified analysis, continues with `IRCheck = NA`
* If no IR regions are identified, continues with `IRCheck = NA`
* If there are missing IR regions needed for synteny test, continues with `IRCheck = 0`
* If no synteny is found, continues with `IRCheck = 0`
* When `tabularCovStats = TRUE`, if IR mismatches are identified, continues with `IRCheck = NA`
* For `tabularCovStats = TRUE`, summary data corresponding to the complete genome is included in `<sampleName>_summary.regions`
* `<sampleName>_coverage.summary.regions` has been renamed `<sampleName>_summary.regions`
* The number of ambiguous nucleotides is always counted, named `N_count`
* When synteny testing is indicated by `IRCheck` and possible, as detailed above, the number of mismatches between the IRs is counted as `IR_mismatches`
* The evenness metric for `tabularCovStats` has been renamed `E_score`

#### Version 1.0.9 (2024.03.01)
* Improving variable and file names
Expand Down
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: PACVr
Version: 1.0.10
Date: 2024-03-22
Version: 1.0.11
Date: 2024-03-27
Title: Plastome Assembly Coverage Visualization
Authors@R: c(person("Gregory", "Smith", role=c("ctb")),
person("Nils", "Jenke", role=c("ctb")),
Expand Down
6 changes: 5 additions & 1 deletion R/AnalysisSpecs.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env RScript
#contributors=c("Gregory Smith", "Nils Jenke", "Michael Gruenstaeudl")
#email="m_gruenstaeudl@fhsu.edu"
#version="2024.03.22.1637"
#version="2024.03.27.0311"

AnalysisSpecs <- R6Class("AnalysisSpecs",
public = list(
Expand Down Expand Up @@ -67,6 +67,10 @@ AnalysisSpecs <- R6Class("AnalysisSpecs",
self$regions_name <- "Chromosome"
self$regions_start <- "chromStart"
self$regions_end <- "chromEnd"
} else {
self$regions_name <- "Source"
self$regions_start <- "srcStart"
self$regions_end <- "srcEnd"
}
}
)
Expand Down
2 changes: 1 addition & 1 deletion R/GBKData.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env RScript
#contributors=c("Gregory Smith", "Nils Jenke", "Michael Gruenstaeudl")
#email="m_gruenstaeudl@fhsu.edu"
#version="2024.03.22.1637"
#version="2024.03.27.0311"

GBKData <- R6Class("GBKData",
public = list(
Expand Down
4 changes: 2 additions & 2 deletions R/IROps.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env RScript
#contributors=c("Gregory Smith", "Nils Jenke", "Michael Gruenstaeudl")
#email="m_gruenstaeudl@fhsu.edu"
#version="2024.03.22.1637"
#version="2024.03.27.0311"

checkIREquality <- function(gbkData,
analysisSpecs) {
Expand All @@ -24,7 +24,7 @@ checkIREquality <- function(gbkData,
IR_mismatches <- 0
}

if (IR_mismatches > 0) {
if (IR_mismatches < 0) {
logger::log_warn(
"Proceeding with coverage depth visualization, but without quadripartite genome structure ..."
)
Expand Down
2 changes: 1 addition & 1 deletion R/OutputSpecs.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env RScript
#contributors=c("Gregory Smith", "Nils Jenke", "Michael Gruenstaeudl")
#email="m_gruenstaeudl@fhsu.edu"
#version="2024.03.22.1637"
#version="2024.03.27.0311"

OutputSpecs <- R6Class("OutputSpecs",
public = list(
Expand Down
2 changes: 1 addition & 1 deletion R/PACVr.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env RScript
#contributors=c("Gregory Smith", "Nils Jenke", "Michael Gruenstaeudl")
#email="m_gruenstaeudl@fhsu.edu"
#version="2024.03.22.1637"
#version="2024.03.27.0311"

PACVr.read.gb <- function(gbkFile) {
gbkRaw <- getGbkRaw(gbkFile)
Expand Down
2 changes: 1 addition & 1 deletion R/RCircosOps.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env RScript
#contributors=c("Gregory Smith", "Nils Jenke", "Michael Gruenstaeudl")
#email="m_gruenstaeudl@fhsu.edu"
#version="2024.03.22.1637"
#version="2024.03.27.0311"


# The following R functions were taken from the R package RCircos and then modified.
Expand Down
2 changes: 1 addition & 1 deletion R/compileStats.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env RScript
#contributors=c("Gregory Smith", "Nils Jenke", "Michael Gruenstaeudl")
#email="m_gruenstaeudl@fhsu.edu"
#version="2024.03.22.1637"
#version="2024.03.27.0311"

PACVr.compileCovStats <- function(gbkData,
coverageRaw,
Expand Down
66 changes: 34 additions & 32 deletions R/coverageCalcOps.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env RScript
#contributors=c("Gregory Smith", "Nils Jenke", "Michael Gruenstaeudl")
#email="m_gruenstaeudl@fhsu.edu"
#version="2024.03.22.1637"
#version="2024.03.27.0311"

CovCalc <- function(coverageRaw,
windowSize = 250,
Expand Down Expand Up @@ -126,39 +126,34 @@ filter_IR_regions <- function(coverageRaw, seqnames, covData, analysisSpecs) {
return(covData)
}

setLowCoverage <- function(covData, analysisSpecs) {
# ir_regions
ir_regions <- covData$ir_regions
regions_name <- analysisSpecs$regions_name
aggFormula <- stats::as.formula(paste("coverage ~", regions_name))
cov_regions <-
aggregate(
aggFormula,
data = ir_regions,
FUN = function(x)
ceiling(mean(x) - sd(x))
)
ir_regions$lowCoverage <- ir_regions$coverage < cov_regions$coverage[match(ir_regions[[regions_name]],
cov_regions[[regions_name]])]
ir_regions$lowCoverage[ir_regions$lowCoverage == TRUE] <- "*"
ir_regions$lowCoverage[ir_regions$lowCoverage == FALSE] <- ""
covData$ir_regions <- ir_regions

# ir_genes
ir_genes <- covData$ir_genes
ir_genes$lowCoverage <- ir_genes$coverage < mean(ir_genes$coverage) - sd(ir_genes$coverage)
ir_genes$lowCoverage[ir_genes$lowCoverage == TRUE] <- "*"
ir_genes$lowCoverage[ir_genes$lowCoverage == FALSE] <- ""
covData$ir_genes <- ir_genes
setLowCoverages <- function(covData, analysisSpecs) {
covData$ir_regions <- setLowCoverage(covData$ir_regions,
analysisSpecs$regions_name)
covData$ir_genes <- setLowCoverage(covData$ir_genes)
covData$ir_noncoding <- setLowCoverage(covData$ir_noncoding)
return(covData)
}

# ir_noncoding
ir_noncoding <- covData$ir_noncoding
ir_noncoding$lowCoverage <- ir_noncoding$coverage < mean(ir_noncoding$coverage) - sd(ir_noncoding$coverage)
ir_noncoding$lowCoverage[ir_noncoding$lowCoverage == TRUE] <- "*"
ir_noncoding$lowCoverage[ir_noncoding$lowCoverage == FALSE] <- ""
covData$ir_noncoding <- ir_noncoding
setLowCoverage <- function(covDataField, regions_name = NULL) {
if (!is.null(regions_name)) {
aggFormula <- stats::as.formula(paste("coverage ~", regions_name))
cov_regions <-
aggregate(
aggFormula,
data = covDataField,
FUN = function(x)
ceiling(mean(x) - sd(x))
)
lowThreshold <- cov_regions$coverage[match(covDataField[[regions_name]],
cov_regions[[regions_name]])]
} else {
lowThreshold <- mean(covDataField$coverage) - sd(covDataField$coverage)
}

return(covData)
covDataField$lowCoverage <- (covDataField$coverage < lowThreshold) |
(covDataField$coverage == 0)
covDataField$lowCoverage <- ifelse(covDataField$lowCoverage, "*", "")
return(covDataField)
}

# adapted from `nilsj9/PlastidSequenceCoverage`
Expand All @@ -182,6 +177,10 @@ updateRegionsSummary <- function(covSummaries,
covSumRegions <- covSummaries$regions_summary
regions_evenness <- getCovEvenness(covDataRegions,
regions_name)
if (regions_name == "Source") {
covSumRegions[regions_name] <- "Complete_genome"
regions_evenness[regions_name] <- "Complete_genome"
}
covSumRegions <- dplyr::full_join(covSumRegions,
regions_evenness,
regions_name)
Expand Down Expand Up @@ -257,6 +256,9 @@ getCovDepth <- function(covDataField, regions_name = NULL) {
}
covDepth <- covDataField %>%
calcCovDepth()
if (!is.null(regions_name) && regions_name == "Source") {
covDepth[regions_name] <- "Unpartitioned"
}
return(covDepth)
}

Expand Down
2 changes: 1 addition & 1 deletion R/customRead.gb.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env RScript
#contributors=c("Gregory Smith", "Nils Jenke", "Michael Gruenstaeudl")
#email="m_gruenstaeudl@fhsu.edu"
#version="2024.03.22.1637"
#version="2024.03.27.0311"

read.gbWithHandling <- function(gbkRaw, count=0) {
gbkData <- tryCatch({
Expand Down
2 changes: 1 addition & 1 deletion R/helpers.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env RScript
#contributors=c("Gregory Smith", "Nils Jenke", "Michael Gruenstaeudl")
#email="m_gruenstaeudl@fhsu.edu"
#version="2024.03.22.1637"
#version="2024.03.27.0311"

HistCol <- function(cov, threshold, relative, logScale) {
# Function to generate color vector for histogram data
Expand Down
2 changes: 1 addition & 1 deletion R/parsingOps.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env RScript
#contributors=c("Gregory Smith", "Nils Jenke", "Michael Gruenstaeudl")
#email="m_gruenstaeudl@fhsu.edu"
#version="2024.03.22.1637"
#version="2024.03.27.0311"

PACVr.parseGenes <- function (gbkSeqFeatures) {
# Function to extract gene information from Genbank flatfile data
Expand Down
Loading

0 comments on commit de6eebf

Please sign in to comment.