Skip to content

Commit

Permalink
Update to version 1.0.6
Browse files Browse the repository at this point in the history
  • Loading branch information
michaelgruenstaeudl committed Jan 8, 2024
1 parent 23939ee commit fa24f71
Show file tree
Hide file tree
Showing 15 changed files with 156 additions and 244 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
CHANGELOG
---------

#### Version 1.0.6 (2024.01.07)
* Moved all functions that pertain to inverted repeats or quadripartite genome structure into a separate file and made their application optional

#### Version 1.0.5 (2023.12.07)
* Minor bugfixes
* New version submitted to CRAN

#### Version 1.0.4 (2023.12.06)
* Implemented a logger
* Several bug fixes (e.g., error caused by function masking)
Expand Down
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: PACVr
Version: 1.0.5
Date: 2023-12-07
Version: 1.0.6
Date: 2024-01-07
Title: Plastome Assembly Coverage Visualization
Authors@R: c(person("Gregory", "Smith", role=c("ctb")),
person("Nils", "Jenke", role=c("ctb")),
Expand Down
23 changes: 5 additions & 18 deletions R/IRoperations.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/R
#!/usr/bin/env RScript
#contributors=c("Gregory Smith", "Nils Jenke", "Michael Gruenstaeudl")
#email="m_gruenstaeudl@fhsu.edu"
#version="2023.12.18.2100"
#version="2024.01.07.2200"

checkIREquality <- function(gbkData, regions, dir, sample_name) {
gbkSeq <- read.gbSeq(gbkData)
Expand All @@ -13,18 +13,8 @@ checkIREquality <- function(gbkData, regions, dir, sample_name) {
IR_diff_gaps <- c()
if (repeatB[2] - repeatB[1] != repeatA[2] - repeatA[1]) {
message("WARNING: Inverted repeats differ in sequence length")
message(paste(
"The IRb has a total lengths of: ",
repeatB[2] - repeatB[1],
" bp",
sep = ""
))
message(paste(
"The IRa has a total lengths of: ",
repeatA[2] - repeatA[1],
" bp",
sep = ""
))
message(paste("The IRb has a total lengths of: ", repeatB[2] - repeatB[1], " bp", sep = ""))
message(paste("The IRa has a total lengths of: ", repeatA[2] - repeatA[1], " bp", sep = ""))
}
if (gbkSeq[[1]][repeatB[1]:repeatB[2]] != Biostrings::reverseComplement(gbkSeq[[1]][repeatA[1]:repeatA[2]])) {
IRa_seq <- Biostrings::DNAString(gbkSeq[[1]][repeatB[1]:repeatB[2]])
Expand All @@ -33,10 +23,7 @@ checkIREquality <- function(gbkData, regions, dir, sample_name) {
IRb_seq <- split(IRb_seq, ceiling(seq_along(IRb_seq) / 10000))

for (i in 1:min(length(IRa_seq), length(IRb_seq))) {
subst_mat <-
Biostrings::nucleotideSubstitutionMatrix(match = 1,
mismatch = -3,
baseOnly = TRUE)
subst_mat <- Biostrings::nucleotideSubstitutionMatrix(match = 1, mismatch = -3, baseOnly = TRUE)
globalAlign <- tryCatch({
Biostrings::pairwiseAlignment(
IRa_seq[[i]],
Expand Down
24 changes: 12 additions & 12 deletions R/PACVr.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/R
#!/usr/bin/env RScript
#contributors=c("Gregory Smith", "Nils Jenke", "Michael Gruenstaeudl")
#email="m_gruenstaeudl@fhsu.edu"
#version="2023.11.23.1530"
#version="2024.01.07.2200"

PACVr.parseName <- function (gbkData) {
return(read.gbSampleName(gbkData))
Expand Down Expand Up @@ -123,15 +123,15 @@ PACVr.visualizeWithRCircos <- function(gbkData,
#' package="PACVr")
#' outFile <- paste(tempdir(), "/NC_045072__all_reads.pdf", sep="")
#' PACVr.complete(gbkFile=gbkFile, bamFile=bamFile, windowSize=250, logScale=FALSE,
#' threshold=0.5, syntenyLineType=3, relative=TRUE, textSize=0.5,
#' threshold=0.5, syntenyLineType=1, relative=TRUE, textSize=0.5,
#' regionsCheck=FALSE, verbose=FALSE, output=outFile
#' }
PACVr.complete <- function(gbkFile,
bamFile,
windowSize=250,
logScale=FALSE,
threshold=0.5,
syntenyLineType=3,
syntenyLineType=1,
relative=TRUE,
textSize=0.5,
regionsCheck=FALSE,
Expand All @@ -144,35 +144,35 @@ PACVr.complete <- function(gbkFile,

###################################
if (regionsCheck) {
logger::log_info('Parsing different genome regions')
logger::log_info('Parsing the different genome regions')
regions <- PACVr.parseRegions(gbkData,
gbkDataDF)
} else {
regions <- PACVr.parseSource(gbkDataDF)
}

###################################
logger::log_info('Parsing different genes')
logger::log_info('Parsing the different genes')
genes <- PACVr.parseGenes(gbkDataDF)

###################################
logger::log_info('Calculating sequencing coverage')
logger::log_info('Calculating the sequencing coverage')
coverage <- PACVr.calcCoverage(bamFile,
windowSize)

###################################
linkData <- NULL
IRCheck <- regionsCheck && isSyntenyLineType(syntenyLineType)
if (IRCheck) {
logger::log_info('Inferring IR regions and genes within IRs')
logger::log_info('Inferring the IR regions and the genes within the IRs')
linkData <- PACVr.generateIRGeneData(genes,
regions,
syntenyLineType)
}

###################################
if (regionsCheck && verbose) {
logger::log_info('Generating statistical information on sequencing coverage')
logger::log_info('Generating statistical information on the sequencing coverage')
PACVr.verboseInformation(gbkData,
bamFile,
genes,
Expand All @@ -182,7 +182,7 @@ PACVr.complete <- function(gbkFile,

###################################
if (!is.na(output)) {
logger::log_info('Generating visualization of sequencing coverage')
logger::log_info('Generating a visualization of the sequencing coverage')
pdf(output, width=10, height=10)
PACVr.visualizeWithRCircos(
gbkData,
Expand All @@ -198,7 +198,7 @@ PACVr.complete <- function(gbkFile,
textSize
)
dev.off()
logger::log_info('Saved visualization including coverage as `{output}`')
logger::log_info('Visualization (including coverage) saved as `{output}`')
} else {
logger::log_info('No coverage data inferred; generating empty visualization')
PACVr.visualizeWithRCircos(
Expand All @@ -215,7 +215,7 @@ PACVr.complete <- function(gbkFile,
textSize
)
dev.off()
logger::log_info('Saved visualization excluding coverage as `{output}`')
logger::log_info('Visualization (excluding coverage) saved as `{output}`')
}
######################################################################
logger::log_success('Done.')
Expand Down
72 changes: 28 additions & 44 deletions R/customizedRCircos.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/R
#!/usr/bin/env RScript
#contributors=c("Gregory Smith", "Nils Jenke", "Michael Gruenstaeudl")
#email="m_gruenstaeudl@fhsu.edu"
#version="2023.12.07.1830"
#version="2024.01.07.2200"


# The following R functions were taken from the R package RCircos and then modified.
Expand All @@ -18,7 +18,7 @@ PACVr.Get.Start.End.Locations <- function(plot.data, plot.width) {
for (aChr in seq_len(length(chromosomes))) {
cyto.rows <- which(cyto.chroms == chromosomes[aChr])
chr.start <- min(RCircos.Cyto$StartPoint[cyto.rows])
chr.end <- max(RCircos.Cyto$EndPoint[cyto.rows])
chr.end <- max(RCircos.Cyto$EndPoint[cyto.rows])
data.rows <- which(dataChroms == chromosomes[aChr])
start.outliers <- which(locations[data.rows, 1] < chr.start)
which(locations[data.rows, 1] < chr.start)
Expand All @@ -45,19 +45,15 @@ PACVr.Histogram.Plot <- function(hist.data = NULL,
warning("Genomic data missing in input.")
stop()
}
boundary <-
RCircos::RCircos.Get.Plot.Boundary(track.num, side, inside.pos,
outside.pos, FALSE)
boundary <- RCircos::RCircos.Get.Plot.Boundary(track.num, side, inside.pos, outside.pos, FALSE)
outerPos <- boundary[1]
innerPos <- boundary[2]
if (is.null(genomic.columns) ||
genomic.columns < 2 || genomic.columns > 3) {
innerPos <- boundary[2]
if (is.null(genomic.columns) || genomic.columns < 2 || genomic.columns > 3) {
warning("Number of columns for genomic position incorrect.")
stop()
}
if (is.null(data.col) || data.col <= genomic.columns) {
warning(paste("Number of input columns must be > ", genomic.columns, ".", sep =
""))
warning(paste("Number of input columns must be > ", genomic.columns, ".", sep=""))
stop()
}
RCircos.Pos <- RCircos::RCircos.Get.Plot.Positions()
Expand Down Expand Up @@ -113,9 +109,7 @@ PACVr.Chromosome.Ideogram.Plot <- function(tick.interval = 0) {

##### tick length, text size, text orientation #####
PACVr.Ideogram.Tick.Plot <-
function(tick.num = 10,
track.for.ticks = 3,
add.text.size = 0)
function(tick.num=10, track.for.ticks=3, add.text.size=0)
{
RCircos.Pos <- RCircos::RCircos.Get.Plot.Positions()
RCircos.Par <- RCircos::RCircos.Get.Plot.Parameters()
Expand All @@ -124,7 +118,7 @@ PACVr.Ideogram.Tick.Plot <-
(RCircos.Pos[1:(nrow(RCircos.Pos) / 2), 3] + 270) %% 360
RCircos.Pos[(nrow(RCircos.Pos) / 2 + 1):nrow(RCircos.Pos), 3] <-
(RCircos.Pos[(nrow(RCircos.Pos) / 2 + 1):nrow(RCircos.Pos), 3] + 90) %% 360

endchr <- RCircos.Cyto$ChromEnd[length(RCircos.Cyto$ChromEnd)]
tick.interval <- endchr / tick.num / 1000

Expand All @@ -134,8 +128,8 @@ PACVr.Ideogram.Tick.Plot <-
# names use two tracks. There will be total of 6 tracks needed.
# ===================================================================
track.height <- RCircos.Par$track.height
tick.height <- track.height * track.for.ticks
ticks.span <- RCircos.Par$chr.ideo.pos + tick.height * 2
tick.height <- track.height * track.for.ticks
ticks.span <- RCircos.Par$chr.ideo.pos + tick.height * 2

if (RCircos.Par$plot.radius < ticks.span)
{
Expand All @@ -156,17 +150,17 @@ PACVr.Ideogram.Tick.Plot <-
mid.pos <- RCircos.Pos[, 1:2] * (start.pos + track.height / 4)
the.interval <- tick.interval * 1000
short.tick <- round(the.interval / RCircos.Par$base.per.unit, digits = 0)
long.tick <- round(the.interval / RCircos.Par$base.per.unit, digits = 0)
long.tick <- round(the.interval / RCircos.Par$base.per.unit, digits = 0)
#short.tick <- round(the.interval/RCircos.Par$base.per.unit, digits=0);
#long.tick <- short.tick*2;
#long.tick <- short.tick*2;

lab.pos <- RCircos.Pos[, 1:2] * (start.pos + tick.height / 2)
lab.pos <- RCircos.Pos[, 1:2] * (start.pos + tick.height / 2)
chroms <- unique(RCircos.Cyto$Chromosome)
for (aChr in seq_len(length(chroms)))
{
the.chr <- RCircos.Cyto[RCircos.Cyto[, 1] == chroms[aChr],]
the.chr <- RCircos.Cyto[RCircos.Cyto[, 1] == chroms[aChr],]
chr.start <- the.chr$StartPoint[1]
chr.end <- the.chr$EndPoint[nrow(the.chr)]
chr.end <- the.chr$EndPoint[nrow(the.chr)]

total.ticks <- tick.num
for (a.tick in seq_len(total.ticks))
Expand All @@ -178,8 +172,7 @@ PACVr.Ideogram.Tick.Plot <-
c(innerPos[tick.pos, 2], outerPos[tick.pos, 2]),
col = the.chr$ChrColor[1])

lab.text <-
paste0(round((a.tick - 1) * tick.interval, 1), "kb")
lab.text <- paste0(round((a.tick - 1) * tick.interval, 1), "kb")

graphics::text(
lab.pos[tick.pos, 1] ,
Expand All @@ -205,7 +198,7 @@ PACVr.Ideogram.Tick.Plot <-
# parameter, direct work with RCircosEnvironment is needed.
# =======================================================
old.name.pos <- RCircos.Par$chr.name.pos
old.out.pos <- RCircos.Par$track.out.start
old.out.pos <- RCircos.Par$track.out.start
old.distance <- old.out.pos - old.name.pos

RCircos.Par$chr.name.pos <- ticks.span
Expand Down Expand Up @@ -245,12 +238,9 @@ PACVr.Gene.Name.Plot <- function(gene.data = NULL,
# Convert raw data to plot data. The raw data will be validated
# first during the conversion
# =============================================================
boundary <- RCircos::RCircos.Get.Plot.Boundary(track.num, side, inside.pos,
outside.pos, FALSE)
gene.data <- RCircos::RCircos.Get.Single.Point.Positions(gene.data,
genomic.columns)
gene.data <- RCircos::RCircos.Get.Gene.Label.Locations(gene.data, genomic.columns,
is.sorted)
boundary <- RCircos::RCircos.Get.Plot.Boundary(track.num, side, inside.pos, outside.pos, FALSE)
gene.data <- RCircos::RCircos.Get.Single.Point.Positions(gene.data, genomic.columns)
gene.data <- RCircos::RCircos.Get.Gene.Label.Locations(gene.data, genomic.columns, is.sorted)

# Label positions
# =============================================================
Expand All @@ -261,7 +251,7 @@ PACVr.Gene.Name.Plot <- function(gene.data = NULL,
textSide <- rep(4, nrow(gene.data))
textSide[thePoints <= rightSide] <- 2
} else {
labelPos <- boundary[2] - correction
labelPos <- boundary[2] - correction
textSide <- rep(2, nrow(gene.data))
textSide[thePoints <= rightSide] <- 4
}
Expand Down Expand Up @@ -296,8 +286,7 @@ PACVr.Gene.Connector.Plot <- function(genomic.data = NULL,
if (is.null(genomic.data))
stop("Genomic data missing for RCircos.Gene.Connector.Plot().\n")

boundary <- RCircos::RCircos.Get.Plot.Boundary(track.num, side, inside.pos,
outside.pos, erase.area = FALSE)
boundary <- RCircos::RCircos.Get.Plot.Boundary(track.num, side, inside.pos, outside.pos, erase.area=FALSE)
outerPos <- boundary[1]
innerPos <- boundary[2]
RCircos.Pos <- RCircos::RCircos.Get.Plot.Positions()
Expand Down Expand Up @@ -388,20 +377,16 @@ PACVr.Line.Plot <-
{
if (is.null(line.data))
stop("Genomic data missing in RCircos.Line.Plot().\n")
boundary <-
RCircos::RCircos.Get.Plot.Boundary(track.num, side, inside.pos,
outside.pos, FALSE)
boundary <- RCircos::RCircos.Get.Plot.Boundary(track.num, side, inside.pos, outside.pos, FALSE)
outerPos <- boundary[1]
innerPos <- boundary[2]
if (is.null(genomic.columns))
stop("Missing number of columns for genomic position.\n")
if (is.null(data.col) || data.col <= genomic.columns)
stop("Line data column must be ", genomic.columns +
1, " or bigger.\n")
stop("Line data column must be ", genomic.columns + 1, " or bigger.\n")
RCircos.Pos <- RCircos::RCircos.Get.Plot.Positions()
RCircos.Par <- RCircos::RCircos.Get.Plot.Parameters()
line.data <- RCircos::RCircos.Get.Single.Point.Positions(line.data,
genomic.columns)
line.data <- RCircos::RCircos.Get.Single.Point.Positions(line.data, genomic.columns)
pointValues <- as.numeric(line.data[, data.col])
if (is.null(min.value) || is.null(max.value)) {
min.value <- min(pointValues)
Expand All @@ -415,8 +400,7 @@ PACVr.Line.Plot <-
min.value,
max.value,
plot.type = "points",
outerPos -
innerPos)
outerPos - innerPos)
pointHeight <- pointHeight + innerPos
line.colors <- RCircos::RCircos.Get.Plot.Colors(line.data, RCircos.Par$line.color)
#RCircos.Track.Outline(outerPos, innerPos, RCircos.Par$sub.tracks)
Expand Down Expand Up @@ -490,7 +474,7 @@ PACVr.Reset.Plot.Parameters <- function (new.params = NULL)
new.params$highlight.pos <- old.params$highlight.pos + differ
new.name.pos <- old.params$chr.name.pos + differ
if (new.params$chr.name.pos < new.name.pos)
new.params$chr.name.pos <- new.name.pos
new.params$chr.name.pos <- new.name.pos

}

Expand Down
Loading

0 comments on commit fa24f71

Please sign in to comment.