From 96a2220b51c108e163f5beade087958339927813 Mon Sep 17 00:00:00 2001 From: michaelgruenstaeudl Date: Mon, 5 Feb 2024 11:52:30 -0600 Subject: [PATCH] Improvement of variable names --- R/PACVr.R | 80 ++++++------ R/helpers.R | 12 +- R/quadripartiteOperations.R | 140 +++++++++++---------- README.md | 4 +- inst/extdata/PACVr_Rscript.R | 20 +-- inst/extdata/README_USAGE/Plastid_Genome.R | 4 +- man/PACVr.complete.Rd | 20 +-- 7 files changed, 143 insertions(+), 137 deletions(-) diff --git a/R/PACVr.R b/R/PACVr.R index 71fc63a0..ec5f7da0 100644 --- a/R/PACVr.R +++ b/R/PACVr.R @@ -16,10 +16,10 @@ PACVr.parseName <- function (gbkData) { return(read.gbSampleName(gbkData)) } -PACVr.parseRegions <- function (gbkData, gbkDataDF) { - raw_regions <- ExtractAllRegions(gbkDataDF) - regions <- fillDataFrame(gbkData, raw_regions) - return(regions) +PACVr.parseQuadripRegions <- function (gbkData, gbkDataDF) { + raw_quadripRegions <- ParseQuadripartiteStructure(gbkDataDF) + quadripRegions <- fillDataFrame(gbkData, raw_quadripRegions) + return(quadripRegions) } PACVr.parseSource <- function(gbkDataDF) { @@ -38,11 +38,11 @@ PACVr.calcCoverage <- return(coverage) } -PACVr.generateIRGeneData <- function(genes, regions, +PACVr.generateIRGeneData <- function(genes, quadripRegions, syntenyLineType) { # Parse GenBank file - if ("IRb" %in% regions[, 4] && - "IRa" %in% regions[, 4]) { + if ("IRb" %in% quadripRegions[, 4] && + "IRa" %in% quadripRegions[, 4]) { linkData <- GenerateIRSynteny(genes, syntenyLineType) return(linkData) } @@ -52,7 +52,7 @@ PACVr.generateIRGeneData <- function(genes, regions, PACVr.verboseInformation <- function(gbkData, bamFile, genes, - regions, + quadripRegions, IRCheck, output) { sampleName <- read.gbSampleName(gbkData) @@ -74,15 +74,15 @@ PACVr.verboseInformation <- function(gbkData, dir.create(tmpDir) } # Step 3. Write output - writeTables(regions, bamFile, genes, tmpDir, sampleName) + writeTables(quadripRegions, bamFile, genes, tmpDir, sampleName) if (IRCheck) { - checkIREquality(gbkData, regions, tmpDir, sampleName) + checkIREquality(gbkData, quadripRegions, tmpDir, sampleName) } } PACVr.visualizeWithRCircos <- function(gbkData, genes, - regions, + quadripRegions, coverage, windowSize, logScale, @@ -97,7 +97,7 @@ PACVr.visualizeWithRCircos <- function(gbkData, visualizeWithRCircos( plotTitle, genes, - regions, + quadripRegions, coverage, windowSize, threshold, @@ -123,22 +123,24 @@ PACVr.visualizeWithRCircos <- function(gbkData, #' log-transformed before visualizing it #' @param threshold a numeric value that specifies the threshold for plotting #' coverage depth bars in red as opposed to the default black -#' @param regionsCheck a numeric value that specifies if region analysis of -#' genome should be performed, and if performed, the type of line for -#' visualizing gene synteny; -#' 0 = region analysis and no line, 1 = region analysis and ribbon lines, -#' 2 = region analysis and solid lines, otherwise = no analysis and no line +#' @param IRCheck a numeric value that specifies if tests +#' for IRs of input genome should be performed, and - if IRs are present - +#' which line type to be used for visualizing gene synteny between IRs; +#' 0 = IR presence test but no synteny visualization, +#' 1 = IR presence test and synteny visualization, with ribbon lines between IRs, +#' 2 = IR presence test and synteny visualization, with solid lines between IRs, +#' otherwise = neither IR presence test nor synteny visualization #' @param relative a boolean that specifies whether the threshold is a relative #' value of the average coverage instead of an absolute value #' @param textSize a numeric value that specifies the relative font size of the #' text element in the visualization #' @param verbose a boolean, that when TRUE, generates additional files with #' detailed genomic region information; -#' requires a `regionsCheck` value that will perform region analysis +#' requires a `IRCheck` value that will perform region analysis #' @param output a character string that specifies the name of, and path to, #' the output file #' @return A file in pdf format containing a circular visualization of the -#' submitted plastid sample. +#' input plastid genome and its sequence reads. #' As a function, returns 0 in case of visualization success. #' @export #' @examples @@ -147,7 +149,7 @@ PACVr.visualizeWithRCircos <- function(gbkData, #' bamFile <- system.file("extdata", "NC_045072/NC_045072_subsampled.bam", package="PACVr") #' outFile <- paste(tempdir(), "/NC_045072__all_reads.pdf", sep="") #' PACVr.complete(gbkFile=gbkFile, bamFile=bamFile, windowSize=250, logScale=FALSE, -#' threshold=0.5, regionsCheck=1, relative=TRUE, textSize=0.5, +#' threshold=0.5, IRCheck=1, relative=TRUE, textSize=0.5, #' verbose=FALSE, output=outFile) #' } #' \dontrun{ @@ -155,7 +157,7 @@ PACVr.visualizeWithRCircos <- function(gbkData, #' bamFile <- system.file("extdata", "MG936619/MG936619_subsampled.bam", package="PACVr") #' outFile <- paste(tempdir(), "/MG936619_CoverageViz.pdf", sep="") #' PACVr.complete(gbkFile=gbkFile, bamFile=bamFile, windowSize=50, logScale=FALSE, -#' threshold=0.5, regionsCheck=NA, relative=TRUE, textSize=0.5, +#' threshold=0.5, IRCheck=NA, relative=TRUE, textSize=0.5, #' verbose=FALSE, output=outFile) #' } @@ -164,27 +166,27 @@ PACVr.complete <- function(gbkFile, windowSize=250, logScale=FALSE, threshold=0.5, - regionsCheck=NA, + IRCheck=NA, relative=TRUE, textSize=0.5, verbose=FALSE, output=NA) { ###################################################################### gbkData <- PACVr.read.gb(gbkFile) - isRegionsCheck <- getIsRegionsCheck(regionsCheck) - gbkDataDF <- read.gb2DF(gbkData, isRegionsCheck) + isIRCheck <- getIsIRCheck(IRCheck) + gbkDataDF <- read.gb2DF(gbkData, isIRCheck) if (is.null(gbkDataDF)) { logger::log_error(paste("No usable data to perform specified analysis")) return(NULL) } ################################### - if (isRegionsCheck) { + if (isIRCheck) { logger::log_info('Parsing the different genome regions') - regions <- PACVr.parseRegions(gbkData, - gbkDataDF) + quadripRegions <- PACVr.parseQuadripRegions(gbkData, + gbkDataDF) } else { - regions <- PACVr.parseSource(gbkDataDF) + quadripRegions <- PACVr.parseQuadripRegions(gbkDataDF) } ################################### @@ -198,26 +200,26 @@ PACVr.complete <- function(gbkFile, ################################### linkData <- NULL - IRCheck <- isSyntenyLineType(regionsCheck) + IRCheck <- isSyntenyLineType(IRCheck) if (IRCheck) { logger::log_info('Inferring the IR regions and the genes within the IRs') linkData <- PACVr.generateIRGeneData(genes, - regions, - regionsCheck) + quadripRegions, + IRCheck) } ################################### - if (isRegionsCheck && verbose) { + if (isIRCheck && verbose) { logger::log_info('Generating statistical information on the sequencing coverage') PACVr.verboseInformation(gbkData, bamFile, genes, - regions, + quadripRegions, IRCheck, output) } else if (verbose) { - logger::log_warn(paste0('Verbose output requires `regionsCheck` in ', - '`', deparse(getRegionsCheckTypes()), '`')) + logger::log_warn(paste0('Verbose output requires `IRCheck` in ', + '`', deparse(getIRCheckTypes()), '`')) } ################################### @@ -227,14 +229,14 @@ PACVr.complete <- function(gbkFile, PACVr.visualizeWithRCircos( gbkData, genes, - regions, + quadripRegions, coverage, windowSize, threshold, logScale, relative, linkData, - regionsCheck, + IRCheck, textSize ) dev.off() @@ -244,14 +246,14 @@ PACVr.complete <- function(gbkFile, PACVr.visualizeWithRCircos( gbkData, genes, - regions, + quadripRegions, coverage, windowSize, threshold, logScale, relative, linkData, - regionsCheck, + IRCheck, textSize ) dev.off() diff --git a/R/helpers.R b/R/helpers.R index b98b4875..ad3f846a 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -3,10 +3,10 @@ #email="m_gruenstaeudl@fhsu.edu" #version="2024.02.01.1736" -read.gb2DF <- function(gbkData, regionsCheck) { +read.gb2DF <- function(gbkData, IRPresenceAndSyntenyCheck) { fileDF <- data.frame() for (sample in gbkData) { - sampleDF <- parseFeatures(sample$FEATURES, regionsCheck) + sampleDF <- parseFeatures(sample$FEATURES, IRPresenceAndSyntenyCheck) if (!is.null(sampleDF)) { fileDF <- dplyr::bind_rows(fileDF, sampleDF) } @@ -18,7 +18,7 @@ read.gb2DF <- function(gbkData, regionsCheck) { return(fileDF) } -parseFeatures <- function(features, regionsCheck) { +parseFeatures <- function(features, IRPresenceAndSyntenyCheck) { sampleDF <- data.frame() for (feature in features) { feature <- parseFeature(feature) @@ -27,7 +27,7 @@ parseFeatures <- function(features, regionsCheck) { } } # check if can we can use the sample - subsetCols <- checkFeatureQualifiers(sampleDF, regionsCheck) + subsetCols <- checkFeatureQualifiers(sampleDF, IRPresenceAndSyntenyCheck) if (is.null(subsetCols)) { return(NULL) } @@ -372,9 +372,9 @@ validateColors <- function(colorsToValidate) { } } -checkFeatureQualifiers <- function(sampleDF, regionsCheck) { +checkFeatureQualifiers <- function(sampleDF, IRPresenceAndSyntenyCheck) { subsetCols <- c("gene", "note", "type") - if (regionsCheck) { + if (IRPresenceAndSyntenyCheck) { subsetCols <- c(subsetCols, "standard_name") } missingCols <- subsetCols[!(subsetCols %in% colnames(sampleDF))] diff --git a/R/quadripartiteOperations.R b/R/quadripartiteOperations.R index a58cfccf..7ccca9c4 100644 --- a/R/quadripartiteOperations.R +++ b/R/quadripartiteOperations.R @@ -23,15 +23,16 @@ FilterByKeywords <- function(allRegions, where) { return(out) } -ExtractAllRegions <- function(gbkDataDF) { - # Function to extract specific region information from Genbank flatfile data +ParseQuadripartiteStructure <- function(gbkDataDF) { + # Function to extract the quadripartite region information from + # Genbank flatfile data # ARGS: # gbkDataDF (resulting data frame from parsing read.gb object) # RETURNS: # regions in data frame format logger::log_info(' Extracting information on genomic regions') allRegions <- read.gbOther(gbkDataDF) - regions <- tryCatch( + quadripRegions <- tryCatch( tryCatch( FilterByKeywords(allRegions, "note"), warning = function(w) @@ -52,105 +53,106 @@ ExtractAllRegions <- function(gbkDataDF) { ) } ) - regions <- regions[, c("start", "end", "note")] - colnames(regions) <- c("chromStart", "chromEnd", "Band") - regions$Chromosome <- "" - regions$Stain <- "gpos100" - regions <- regions[c("Chromosome", "chromStart", "chromEnd", "Band", "Stain")] - regions <- regions[order(regions[, 3], decreasing=FALSE),] - regions$Band[which(grepl("LSC|large|long", regions$Band, ignore.case=TRUE) == TRUE)] <- "LSC" - regions$Band[which(grepl("SSC|small|short", regions$Band, ignore.case=TRUE) == TRUE)] <- "SSC" - regions$Band[which(grepl("IRa|\\sa|IR1|\\s1", regions$Band, ignore.case=TRUE) == TRUE)] <- "###A" - regions$Band[which(grepl("IRb|\\sb|IR2|\\s2", regions$Band, ignore.case=TRUE) == TRUE)] <- "###B" - regions$Band[which(grepl("IR|invert|repeat", regions$Band, ignore.case=TRUE) == TRUE)] <- "IR" - regions$Band[which(grepl("###A", regions$Band, ignore.case=TRUE) == TRUE)] <- "IRa" - regions$Band[which(grepl("###B", regions$Band, ignore.case=TRUE) == TRUE)] <- "IRb" - row.names(regions) <- 1:nrow(regions) - regions <- regions[order(regions[, 3], decreasing=FALSE),] - return(regions) + quadripRegions <- quadripRegions[, c("start", "end", "note")] + colnames(quadripRegions) <- c("chromStart", "chromEnd", "Band") + quadripRegions$Chromosome <- "" + quadripRegions$Stain <- "gpos100" + quadripRegions <- quadripRegions[c("Chromosome", "chromStart", "chromEnd", "Band", "Stain")] + quadripRegions <- quadripRegions[order(quadripRegions[, 3], decreasing=FALSE),] + quadripRegions$Band[which(grepl("LSC|large|long", quadripRegions$Band, ignore.case=TRUE) == TRUE)] <- "LSC" + quadripRegions$Band[which(grepl("SSC|small|short", quadripRegions$Band, ignore.case=TRUE) == TRUE)] <- "SSC" + quadripRegions$Band[which(grepl("IRa|\\sa|IR1|\\s1", quadripRegions$Band, ignore.case=TRUE) == TRUE)] <- "###A" + quadripRegions$Band[which(grepl("IRb|\\sb|IR2|\\s2", quadripRegions$Band, ignore.case=TRUE) == TRUE)] <- "###B" + quadripRegions$Band[which(grepl("IR|invert|repeat", quadripRegions$Band, ignore.case=TRUE) == TRUE)] <- "IR" + quadripRegions$Band[which(grepl("###A", quadripRegions$Band, ignore.case=TRUE) == TRUE)] <- "IRa" + quadripRegions$Band[which(grepl("###B", quadripRegions$Band, ignore.case=TRUE) == TRUE)] <- "IRb" + row.names(quadripRegions) <- 1:nrow(quadripRegions) + quadripRegions <- quadripRegions[order(quadripRegions[, 3], decreasing=FALSE),] + return(quadripRegions) } -fillDataFrame <- function(gbkData, regions) { - # Function to annotate plastid genome with quadripartite regions based on their position within the genome +fillDataFrame <- function(gbkData, quadripRegions) { + # Function to annotate plastid genome with quadripartite regions + # based on their position within the genome # ARGS: # gbkData (i.e., GenBank flatfile data as parsed by read.gb()) # RETURNS: # ... logger::log_info(' Annotating plastid genome with quadripartite regions') seqLength <- read.gbLengths(gbkData) - if ((nrow(regions) == 0) || (regions[1, 2] == -1)) { - regions[1,] <- c("", as.numeric(1), as.numeric(seqLength), "NA", "gpos100") - regions[, 2] <- as.numeric(regions[, 2]) - regions[, 3] <- as.numeric(regions[, 3]) - return(regions) + if ((nrow(quadripRegions) == 0) || (quadripRegions[1, 2] == -1)) { + quadripRegions[1,] <- c("", as.numeric(1), as.numeric(seqLength), "NA", "gpos100") + quadripRegions[, 2] <- as.numeric(quadripRegions[, 2]) + quadripRegions[, 3] <- as.numeric(quadripRegions[, 3]) + return(quadripRegions) } else { start <- 1 - for (i in 1:nrow(regions)) { - if (regions[i, 2] > start) { - regions[nrow(regions) + 1,] <- c("", start, as.numeric(regions[i, 2]) - 1, "NA", "gpos100") + for (i in 1:nrow(quadripRegions)) { + if (quadripRegions[i, 2] > start) { + quadripRegions[nrow(quadripRegions) + 1,] <- c("", start, as.numeric(quadripRegions[i, 2]) - 1, "NA", "gpos100") } - start <- as.numeric(regions[i, 3]) + 1 + start <- as.numeric(quadripRegions[i, 3]) + 1 } if (start - 1 < seqLength) { - regions[nrow(regions) + 1,] <- c("", start, seqLength, "NA", "gpos100") + quadripRegions[nrow(quadripRegions) + 1,] <- c("", start, seqLength, "NA", "gpos100") } - regions <- regions[order(as.numeric(regions[, 2]), decreasing=FALSE),] - row.names(regions) <- 1:nrow(regions) - regions[, 2] <- as.numeric(regions[, 2]) - regions[, 3] <- as.numeric(regions[, 3]) + quadripRegions <- quadripRegions[order(as.numeric(quadripRegions[, 2]), decreasing=FALSE),] + row.names(quadripRegions) <- 1:nrow(quadripRegions) + quadripRegions[, 2] <- as.numeric(quadripRegions[, 2]) + quadripRegions[, 3] <- as.numeric(quadripRegions[, 3]) - regionAvail <- boolToDeci(c("LSC", "IRb", "SSC", "IRa") %in% regions[, 4]) - regions[, 6] <- regions[, 3] - regions[, 2] + regionAvail <- boolToDeci(c("LSC", "IRb", "SSC", "IRa") %in% quadripRegions[, 4]) + quadripRegions[, 6] <- quadripRegions[, 3] - quadripRegions[, 2] if (regionAvail == 5) { # only IRa and IRb - regions[which(regions[, 4] != "NA"), 6] <- 0 - regions[which(regions[, 6] == max(regions[, 6])), 4] <- "LSC" - regions[which(regions[, 4] != "NA"), 6] <- 0 - regions[which(regions[, 6] == max(regions[, 6])), 4] <- "SSC" + quadripRegions[which(quadripRegions[, 4] != "NA"), 6] <- 0 + quadripRegions[which(quadripRegions[, 6] == max(quadripRegions[, 6])), 4] <- "LSC" + quadripRegions[which(quadripRegions[, 4] != "NA"), 6] <- 0 + quadripRegions[which(quadripRegions[, 6] == max(quadripRegions[, 6])), 4] <- "SSC" message("Annotation for LSC and SSC were automatically added") } else if (regionAvail == 7) { # only IRa, SSC and IRb - regions[which(regions[, 4] != "NA"), 6] <- 0 - regions[which(regions[, 6] == max(regions[, 6])), 4] <- "LSC" + quadripRegions[which(quadripRegions[, 4] != "NA"), 6] <- 0 + quadripRegions[which(quadripRegions[, 6] == max(quadripRegions[, 6])), 4] <- "LSC" message("Annotation for LSC was automatically added") } else if (regionAvail == 10) { # only LSC and SSC - IRs <- data.frame(table(regions[which(regions[, 4] == "NA"), 6]), stringsAsFactors=FALSE) + IRs <- data.frame(table(quadripRegions[which(quadripRegions[, 4] == "NA"), 6]), stringsAsFactors=FALSE) IRs <- IRs[IRs$Freq == 2, 1] if (length(IRs) >= 1) { IRs <- max(as.numeric(as.character(IRs))) - regions[which(regions[, 6] == IRs), 4] <- c("IRb", "IRa") + quadripRegions[which(quadripRegions[, 6] == IRs), 4] <- c("IRb", "IRa") message("Annotation for IRb and IRa were automatically added") } } else if (regionAvail == 11) { # only LSC, SSC and IRa - regions[which(regions[, 4] == "NA" & regions[, 6] == regions[which(regions[, 4] == "IRa"), 6]), 4] <- "IRb" + quadripRegions[which(quadripRegions[, 4] == "NA" & quadripRegions[, 6] == quadripRegions[which(quadripRegions[, 4] == "IRa"), 6]), 4] <- "IRb" message("Annotation for IRb was automatically added") } else if (regionAvail == 13) { # only LSC, IRb and IRa - regions[which(regions[, 4] != "NA"), 6] <- 0 - regions[which(regions[, 6] == max(regions[, 6])), 4] <- "SSC" + quadripRegions[which(quadripRegions[, 4] != "NA"), 6] <- 0 + quadripRegions[which(quadripRegions[, 6] == max(quadripRegions[, 6])), 4] <- "SSC" message("Annotation for SSC was automatically added") } else if (regionAvail == 14) { # only LSC, IRb and SSC - regions[which(regions[, 4] == "NA" & regions[, 6] == regions[which(regions[, 4] == "IRb"), 6]), 4] <- "IRa" + quadripRegions[which(quadripRegions[, 4] == "NA" & quadripRegions[, 6] == quadripRegions[which(quadripRegions[, 4] == "IRb"), 6]), 4] <- "IRa" message("Annotation for IRa was automatically added") } - regions <- regions[-6] - regions$Stain[which(regions$Band == "LSC")] <- "gpos75" - regions$Stain[which(regions$Band == "SSC")] <- "gpos50" - regions$Stain[which(regions$Band == "IRa")] <- "gpos25" - regions$Stain[which(regions$Band == "IRb")] <- "gpos25" - return(regions) + quadripRegions <- quadripRegions[-6] + quadripRegions$Stain[which(quadripRegions$Band == "LSC")] <- "gpos75" + quadripRegions$Stain[which(quadripRegions$Band == "SSC")] <- "gpos50" + quadripRegions$Stain[which(quadripRegions$Band == "IRa")] <- "gpos25" + quadripRegions$Stain[which(quadripRegions$Band == "IRb")] <- "gpos25" + return(quadripRegions) } } -plotAverageLines <- function(regions, coverage, windowSize, positions) { +plotAverageLines <- function(quadripRegions, coverage, windowSize, positions) { averageLines <- c() - for (i in 1:nrow(regions)) { - lineData <- GenerateHistogramData(regions[i,], coverage, windowSize, (i == nrow(regions))) - averageLines <- c(averageLines, paste(regions[i, 4], ": ", trunc(lineData[1, 4]), "X", sep = "")) + for (i in 1:nrow(quadripRegions)) { + lineData <- GenerateHistogramData(quadripRegions[i,], coverage, windowSize, (i == nrow(quadripRegions))) + averageLines <- c(averageLines, paste(quadripRegions[i, 4], ": ", trunc(lineData[1, 4]), "X", sep = "")) PACVr.Line.Plot( line.data = lineData, data.col = 4, @@ -167,9 +169,9 @@ plotAverageLines <- function(regions, coverage, windowSize, positions) { return(averageLines) } -plotRegionNames <- function(regions) { +plotRegionNames <- function(quadripRegions) { PACVr.Gene.Name.Plot( - gene.data = regions, + gene.data = quadripRegions, name.col = 4, track.num = 1, side = "out", @@ -179,16 +181,16 @@ plotRegionNames <- function(regions) { ) } -isRealRegions <- function(regions) { - return(nrow(regions) > 1) +isRealRegions <- function(quadripRegions) { + return(nrow(quadripRegions) > 1) } -getIsRegionsCheck <- function(regionsCheck) { - regionsCheckTypes <- getRegionsCheckTypes() - return(regionsCheck %in% regionsCheckTypes) +getIsIRCheck <- function(IRCheck) { + IRCheckTypes <- getIRCheckTypes() + return(IRCheck %in% IRCheckTypes) } -getRegionsCheckTypes <- function() { - regionsCheckTypes <- c(0, 1, 2) - return(regionsCheckTypes) +getIRCheckTypes <- function() { + IRCheckTypes <- c(0, 1, 2) + return(IRCheckTypes) } diff --git a/README.md b/README.md index 6d3f7371..00a0a61e 100644 --- a/README.md +++ b/README.md @@ -34,12 +34,12 @@ PACVr.complete(gbkFile, bamFile, windowSize=250, logScale=FALSE, ## COVERAGE VALUES PLUS REGION INDICATORS ## PACVr.complete(gbkFile, bamFile, windowSize=250, logScale=FALSE, threshold=0.5, relative=TRUE, textSize=0.5, - regionsCheck=0, output=outFile) + IRCheck=0, output=outFile) ## COVERAGE VALUES PLUS REGION INDICATORS PLUS IR SYNTENY LINES ## PACVr.complete(gbkFile, bamFile, windowSize=250, logScale=FALSE, threshold=0.5, relative=TRUE, textSize=0.5, - regionsCheck=1, output=outFile) + IRCheck=1, output=outFile) ``` ### Mitochondrial Genome diff --git a/inst/extdata/PACVr_Rscript.R b/inst/extdata/PACVr_Rscript.R index e8f6a9ca..f4022882 100644 --- a/inst/extdata/PACVr_Rscript.R +++ b/inst/extdata/PACVr_Rscript.R @@ -44,17 +44,17 @@ CmdLineArgs <- function() { dest = "threshold", help = "a numeric value that specifies the threshold for plotting coverage depth bars in red as opposed to the default black [default = %default]", metavar = "integer"), - make_option(opt_str = c("-rc","--regionsCheck"), + make_option(opt_str = c("-irc","--IRCheck"), type = "numeric", default = 1, - dest = "regionsCheck", - help = paste("a numeric value that specifies if region analysis of", - "genome should be performed, and if performed, the type of line for", - "visualizing gene synteny;", - "0 = region analysis and no line,", - "1 = region analysis and ribbon lines,", - "2 = region analysis and solid lines,", - "otherwise = no analysis and no line", + dest = "IRCheck", + help = paste("a numeric value that specifies if tests for IRs of input genome", + "should be performed, and - if IRs are present - which line type", + "to be used for visualizing gene synteny between IRs;", + "0 = IR presence test but no synteny visualization,", + "1 = IR presence test and synteny visualization, with ribbon lines between IRs,", + "2 = IR presence test and synteny visualization, with solid lines between IRs,", + "otherwise = neither IR presence test nor synteny visualization", "[default = %default]"), metavar = "integer"), make_option(opt_str = c("-r","--relative"), @@ -109,7 +109,7 @@ PACVr.complete(gbkFile = opt$gbkFile, windowSize = opt$windowSize, logScale = opt$logScale, threshold = opt$threshold, - regionsCheck = opt$regionsCheck, + IRCheck = opt$IRCheck, relative = opt$relative, textSize = opt$textSize, verbose = opt$verbose, diff --git a/inst/extdata/README_USAGE/Plastid_Genome.R b/inst/extdata/README_USAGE/Plastid_Genome.R index f7947522..0254bde4 100644 --- a/inst/extdata/README_USAGE/Plastid_Genome.R +++ b/inst/extdata/README_USAGE/Plastid_Genome.R @@ -18,11 +18,11 @@ exitStatusVec <- c(exitStatusVec, exitStatus) ## COVERAGE VALUES PLUS REGION INDICATORS ## exitStatus <- PACVr.complete(gbkFile, bamFile, windowSize=250, logScale=FALSE, threshold=0.5, relative=TRUE, textSize=0.5, - regionsCheck=0, output=outFile) + IRCheck=0, output=outFile) exitStatusVec <- c(exitStatusVec, exitStatus) ## COVERAGE VALUES PLUS REGION INDICATORS PLUS IR SYNTENY LINES ## exitStatus <- PACVr.complete(gbkFile, bamFile, windowSize=250, logScale=FALSE, threshold=0.5, relative=TRUE, textSize=0.5, - regionsCheck=1, output=outFile) + IRCheck=1, output=outFile) exitStatusVec <- c(exitStatusVec, exitStatus) diff --git a/man/PACVr.complete.Rd b/man/PACVr.complete.Rd index c22e2308..517384f9 100644 --- a/man/PACVr.complete.Rd +++ b/man/PACVr.complete.Rd @@ -10,7 +10,7 @@ PACVr.complete( windowSize = 250, logScale = FALSE, threshold = 0.5, - regionsCheck = NA, + IRCheck = NA, relative = TRUE, textSize = 0.5, verbose = FALSE, @@ -33,11 +33,13 @@ log-transformed before visualizing it} \item{threshold}{a numeric value that specifies the threshold for plotting coverage depth bars in red as opposed to the default black} -\item{regionsCheck}{a numeric value that specifies if region analysis of -genome should be performed, and if performed, the type of line for -visualizing gene synteny; -0 = region analysis and no line, 1 = region analysis and ribbon lines, -2 = region analysis and solid lines, otherwise = no analysis and no line} +\item{IRCheck}{a numeric value that specifies if tests +for IRs of input genome should be performed, and - if IRs are present - +which line type to be used for visualizing gene synteny between IRs; +0 = IR presence test but no synteny visualization, +1 = IR presence test and synteny visualization, with ribbon lines between IRs, +2 = IR presence test and synteny visualization, with solid lines between IRs, +otherwise = neither IR presence test nor synteny visualization} \item{relative}{a boolean that specifies whether the threshold is a relative value of the average coverage instead of an absolute value} @@ -47,7 +49,7 @@ text element in the visualization} \item{verbose}{a boolean, that when TRUE, generates additional files with detailed genomic region information; -requires a `regionsCheck` value that will perform region analysis} +requires a `IRCheck` value that will perform region analysis} \item{output}{a character string that specifies the name of, and path to, the output file} @@ -67,7 +69,7 @@ gbkFile <- system.file("extdata", "NC_045072/NC_045072.gb", package="PACVr") bamFile <- system.file("extdata", "NC_045072/NC_045072_subsampled.bam", package="PACVr") outFile <- paste(tempdir(), "/NC_045072__all_reads.pdf", sep="") PACVr.complete(gbkFile=gbkFile, bamFile=bamFile, windowSize=250, logScale=FALSE, - threshold=0.5, regionsCheck=1, relative=TRUE, textSize=0.5, + threshold=0.5, IRCheck=1, relative=TRUE, textSize=0.5, verbose=FALSE, output=outFile) } \dontrun{ @@ -75,7 +77,7 @@ gbkFile <- system.file("extdata", "MG936619/MG936619.gb", package="PACVr") bamFile <- system.file("extdata", "MG936619/MG936619_subsampled.bam", package="PACVr") outFile <- paste(tempdir(), "/MG936619_CoverageViz.pdf", sep="") PACVr.complete(gbkFile=gbkFile, bamFile=bamFile, windowSize=50, logScale=FALSE, - threshold=0.5, regionsCheck=NA, relative=TRUE, textSize=0.5, + threshold=0.5, IRCheck=NA, relative=TRUE, textSize=0.5, verbose=FALSE, output=outFile) } }