diff --git a/R/QA.R b/R/QA.R index f431b66..efab542 100644 --- a/R/QA.R +++ b/R/QA.R @@ -1,7 +1,6 @@ -QA = function(known, isoscape, bySite = TRUE, - valiStation = 1, valiTime = 50, by = 2, - prior = NULL, mask = NULL, setSeed = TRUE, - name = NULL){ +QA = function(known, isoscape, bySite = TRUE, valiStation = 1, + valiTime = 50, recal = TRUE, by = 2, prior = NULL, + mask = NULL, setSeed = TRUE, name = NULL){ #space to handle messages and warnings mstack = wstack = character(0) @@ -123,6 +122,21 @@ QA = function(known, isoscape, bySite = TRUE, stop("invalid object provided for known") } + #check recal + if(class(recal) != "logical"){ + stop("recal must be logical") + } + if(!recal){ + valiTime = nrow(known) + valiStation = 1 + bySite = FALSE + } + + #check valiTime + if(valiTime < 2){ + stop("valiTime must be an integer greater than 1") + } + #check valiStation if(bySite){ if(valiStation > (length(unique(known$Site_ID)) - 3)){ @@ -136,11 +150,6 @@ QA = function(known, isoscape, bySite = TRUE, } } - #check valiTime - if(valiTime < 2){ - stop("valiTime must be an integer greater than 1") - } - #check by if(!(as.integer(by) == by) || by < 1 || by > 25){ stop("by must be an integer between 1 and 25") @@ -157,7 +166,6 @@ QA = function(known, isoscape, bySite = TRUE, if(!is.logical(setSeed)){ stop("setSeed must be logical") } - if(setSeed){ set.seed(100) } @@ -169,11 +177,15 @@ QA = function(known, isoscape, bySite = TRUE, rowLength = nrow(known) ids = seq_len(rowLength) } - val_stations = sort(sample(ids, valiStation, replace = FALSE)) - for (i in seq_len(valiTime)[-1]){ - val_stations = rbind(val_stations, - sort(sample(ids, valiStation, - replace = FALSE))) + if(recal){ + val_stations = sort(sample(ids, valiStation, replace = FALSE)) + for (i in seq_len(valiTime)[-1]){ + val_stations = rbind(val_stations, + sort(sample(ids, valiStation, + replace = FALSE))) + } + } else{ + val_stations = matrix(ids, nrow = rowLength) } xx = seq(1, 101, by) @@ -197,28 +209,32 @@ QA = function(known, isoscape, bySite = TRUE, m = known[-val_stations[i,],] } - if(ni > 1){ - rescales = list() - for(j in 1:ni){ - m_sub = m - m_sub@data = m_sub@data[,(j * 2 - 1):(j * 2)] - class(m_sub) = "QAData" - rescales[[j]] = withCallingHandlers( + if(recal){ + if(ni > 1){ + rescales = list() + for(j in 1:ni){ + m_sub = m + m_sub@data = m_sub@data[,(j * 2 - 1):(j * 2)] + class(m_sub) = "QAData" + rescales[[j]] = withCallingHandlers( + message = addm, + warning = addw, + calRaster(m_sub, isoscape[[j]], mask, genplot = FALSE, + verboseLM = FALSE)[[1]] + ) + } + rescale = isoStack(rescales) + } else{ + class(m) = "QAData" + rescale = withCallingHandlers( message = addm, warning = addw, - calRaster(m_sub, isoscape[[j]], mask, genplot = FALSE, - verboseLM = FALSE)[[1]] + calRaster(m, isoscape, mask, genplot = FALSE, + verboseLM = FALSE) ) - } - rescale = isoStack(rescales) + } } else{ - class(m) = "QAData" - rescale = withCallingHandlers( - message = addm, - warning = addw, - calRaster(m, isoscape, mask, genplot = FALSE, - verboseLM = FALSE) - ) + rescale = isoscape } pd = withCallingHandlers( diff --git a/Rmarkdown/assignR.Rmd b/Rmarkdown/assignR.Rmd index 5453608..b6bc4b4 100644 --- a/Rmarkdown/assignR.Rmd +++ b/Rmarkdown/assignR.Rmd @@ -1,8 +1,7 @@ --- -title: "assignR Examples" +title: "assignR Examples - Dev Release" author: "Gabe Bowen, Chao Ma" -params: - date: +date: "`r format(Sys.Date(), '%B %d, %Y')`" output: html_document: toc: true @@ -320,7 +319,9 @@ qtlRaster(Ll_up, threshold = 0.1) ***** # Quality Analysis -How good are the geographic assignments? What area or probability threshold should be used? Is it better to use isoscape *A* or *B* for my analysis? These questions can be answered through split-sample validation using `QA`. +How good are the geographic assignments? What area or probability threshold should be used? Is it better to use isoscape *A* or *B* for my analysis? The `QA` function is designed to help answer these questions. + +`QA` uses known-origin data to test the quality of isotope-based assignments and returns a set of metrics from this test. The default method conducts a split-sample test, iteratively splitting the dataset and using part to calibrate the isoscape(s) and the rest to evaluate assignment quality. The option `recal = FALSE` allows `QA` to be run without the `calRaster` calibration step. This provides a less complete assessment of methodological error but allows evaluation of assignments to tissue isoscapes made outside of the `QA` function, for example those calibrated using a different known-origin dataset or made through spatial modeling of tissue data, directly. We will run quality assessment on the Loggerhead shrike known-origin dataset and precipitation isoscape. These analyses take some time to run, depending on the number of stations and iterations used. diff --git a/Rmarkdown/renderVignette.R b/Rmarkdown/renderVignette.R index d8233b1..22424f1 100644 --- a/Rmarkdown/renderVignette.R +++ b/Rmarkdown/renderVignette.R @@ -1,5 +1,4 @@ rmarkdown::render( "Rmarkdown/assignR.Rmd", - params = list(date = format(Sys.Date(), "%B %d, %Y")), output_file = "../docs/index.html" ) diff --git a/docs/index.html b/docs/index.html index 5af55ce..d0fb3bd 100644 --- a/docs/index.html +++ b/docs/index.html @@ -11,8 +11,9 @@ + -assignR Examples +assignR Examples - Dev Release @@ -240,8 +241,9 @@ -

assignR Examples

+

assignR Examples - Dev Release

Gabe Bowen, Chao Ma

+

February 17, 2022

@@ -658,7 +660,8 @@

Summarization

Quality Analysis

-

How good are the geographic assignments? What area or probability threshold should be used? Is it better to use isoscape A or B for my analysis? These questions can be answered through split-sample validation using QA.

+

How good are the geographic assignments? What area or probability threshold should be used? Is it better to use isoscape A or B for my analysis? The QA function is designed to help answer these questions.

+

QA uses known-origin data to test the quality of isotope-based assignments and returns a set of metrics from this test. The default method conducts a split-sample test, iteratively splitting the dataset and using part to calibrate the isoscape(s) and the rest to evaluate assignment quality. The option recal = FALSE allows QA to be run without the calRaster calibration step. This provides a less complete assessment of methodological error but allows evaluation of assignments to tissue isoscapes made outside of the QA function, for example those calibrated using a different known-origin dataset or made through spatial modeling of tissue data, directly.

We will run quality assessment on the Loggerhead shrike known-origin dataset and precipitation isoscape. These analyses take some time to run, depending on the number of stations and iterations used.

qa1 = QA(Ll_d, d2h_lrNA, valiStation = 8, valiTime = 4, by = 5, mask = naMap, name = "normal")
## known was reprojected
diff --git a/man/QA.rd b/man/QA.rd index d968538..f68cc66 100644 --- a/man/QA.rd +++ b/man/QA.rd @@ -7,12 +7,13 @@ Quality assessment of geographic assignments } \description{ -How well does a given isoscape and known origin data set constrain the geographic origin of samples? Uses iterative re-sampling of the known origin data set to evaluate sample assignments and reports a suite of quality metrics. +How well does a given isoscape and/or known origin data set constrain the geographic origin of samples? Uses iterative re-sampling of known origin data to evaluate sample assignments and reports a suite of quality metrics. } \usage{ QA(known, isoscape, bySite = TRUE, valiStation = 1, valiTime = 50, - by = 2, prior = NULL, mask = NULL, setSeed = TRUE, name = NULL) + recal = TRUE, by = 2, prior = NULL, mask = NULL, setSeed = TRUE, + name = NULL) } \arguments{ @@ -24,8 +25,9 @@ subOrigData, list of subOrigData, or SpatialPointsDataFrame. Known-origin tissue \item{bySite}{logical. Resample known by site (TRUE) or by sample (FALSE)?} \item{valiStation}{numeric. How many sites or samples from known are withheld for validation? Must be two or more smaller than the length of \code{known}. } - \item{valiTime}{numeric. How many times do you want to randomly draw validation samples and run the validation? Must be an integer equal to or greater than one. + \item{valiTime}{numeric. How many times do you want to randomly draw validation samples and run the validation? Must be an integer greater than one. } + \item{recal}{logical. Recalibrate the isoscape(s) using the known-origin data? If FALSE, \code{isoscape} should be a calibrated product appropriate to the samples, and a single iteration is run for each sample in \code{known}; parameters \code{bySite}, \code{valiStation}, and \code{valiTime} are ignored.} \item{by}{integer. Threshold increment to use in evaluating assignment performance. Must be between 1 and 25.} \item{prior}{raster. Optional raster layer with prior probabilities, which has the same projection, resolution and extent as \code{isoscape}. }