Updates and fixes

SPATIAL-Lab · Jan 27, 2022 · 35463c6 · 35463c6
1 parent 966a378
commit 35463c6
Show file tree

Hide file tree

Showing 10 changed files with 250 additions and 16 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -14,17 +14,19 @@ export(unionP)
 export(wDist)
 S3method(plot,QA)
 S3method(plot,isoStack)
+S3method(plot,wDist)
 S3method(c,wDist)
 import(rgdal)
 import(maptools)
 importFrom(sp, proj4string, spTransform, spplot, plot, identicalCRS, merge)
 importFrom(raster, crs, "crs<-", crop, plot, values, getValues, setValues, minValue, 
             maxValue, extract, stack, ncell, nlayers, writeRaster, cellStats, 
            overlay, projectRaster, compareRaster, res, extent, compareCRS,
-           raster, resample, mask, intersect)
-importFrom(grDevices, dev.off, pdf, png)
-importFrom(graphics, abline, boxplot, legend, lines, par, text, title)
-importFrom(stats, coef, cov, cor, dnorm, lm, median, na.omit, rnorm, sd, var)
+           raster, resample, mask, intersect, rasterToPoints)
+importFrom(grDevices, dev.off, pdf, png, col2rgb, rgb)
+importFrom(graphics, abline, boxplot, legend, lines, par, text, title, polygon)
+importFrom(stats, coef, cov, cor, dnorm, lm, median, na.omit, rnorm, sd, var,
+           density, weighted.mean)
 importFrom(utils, data, download.file, setTxtProgressBar, txtProgressBar, 
            tail, unzip)
 importFrom(mvnfast, dmvn)

diff --git a/NEWS.md b/NEWS.md
@@ -1,7 +1,9 @@
 # assignR news
 
 ## assignR 2.1.1.9000
+* Add wDist function and c and plot methods for summarizing weighted distance and bearing distributions using sample collection locations and posterior probability maps
 * Bug fixes
+* Documentation edits
 
 ## assignR 2.1.1
 * Bug fixes

diff --git a/R/wDist.R b/R/wDist.R
@@ -109,7 +109,7 @@ c.wDist = function(...){
   a = list(...)
 
   if(class(a[[1]])[1] != "wDist"){
-    stop("x must be one or more wDist objects")
+    stop("... must be one or more wDist objects")
   }
 
   n = 0
@@ -143,3 +143,129 @@ c.wDist = function(...){
 
   return(s)
 }
+
+plot.wDist = function(x, ..., bin = 20, pty = "both"){
+
+  if(class(x) != "wDist"){
+    stop("x must be one or more wDist objects")
+  }
+
+  n = length(x)
+  if(n == 0){
+    stop("x is empty")
+  }
+  if(n > 5){
+    message("x length >5, only the first 5 samples will be plotted")
+    n = 5
+  }
+
+  if(!is.numeric(bin)){
+    stop("bin must be numeric")
+  }
+  if(length(bin) > 1){
+    stop("bin must be length 1")
+  }
+  if(bin <=0 | bin > 90){
+    stop("bin must be a value between 0 and 90")
+  }
+  if(360 %% bin != 0){
+    stop("bin should be a factor of 360")
+  }
+
+  if(!(pty %in% c("both", "dist", "bear"))){
+    stop("pty not valid for plot.xist")
+  }
+
+  if(pty %in% c("both", "dist")){
+    #Distance
+    d.xmax = d.ymax = 0
+    d.dens = list()
+    for(i in seq_len(n)){
+      d.dens[[i]] = x[[i]]$d.dens
+      d.xmax = max(d.xmax, max(d.dens[[i]]$x))
+      d.ymax = max(d.ymax, max(d.dens[[i]]$y))
+    }
+
+    plot(d.dens[[1]], xlim = c(0, d.xmax), ylim = c(0, d.ymax),
+         main = "", ylab = "Probability density", xlab = "Distance (m)")
+    for(i in seq_len(n-1)){
+      lines(d.dens[[i+1]], col = i+1, )
+    }
+    legend("topright", legend = names(x), lty = 1, col = seq_len(n), 
+           inset = 0.01)    
+  }
+
+  if(pty %in% c("both", "bear")){
+    #Bearing
+    b.dens = list()
+    for(i in seq_len(n)){
+      b.dens[[i]] = x[[i]]$b.dens
+    }
+
+    arc = function(a1, a2, b){
+      a = seq(a1, a2, by = 0.5)
+      r = 2 * pi * a / 360
+      x = sin(r) * b
+      y = cos(r) * b
+      return(cbind(x, y))
+    }
+
+    wedge = function(a1, a2, b){
+      xy = arc(a1, a2, b)
+      xy = rbind(c(0,0), xy, c(0,0))
+      return(xy)
+    }
+
+    bins = seq(-180, 179.9, by = bin)
+    vals = numeric(length(bins))
+
+    p = par(no.readonly = TRUE)
+    on.exit(par(p))
+    if(n > 3){
+      mfr = 2
+      if(n == 5){
+        mfc = 3
+      }else{
+        mfc = 2
+      }
+    } else{
+      mfr = 1
+      mfc = n
+    }
+    par(mfrow = c(mfr, mfc), mar = c(1,1,3,1))
+
+    for(i in seq_len(n)){
+      b = b.dens[[i]]$x
+      for(j in seq_along(b)){
+        if(b[j] < -180){
+          b[j] = b[j] + 360
+        } else if(b[j] >= 180){
+          b[j] = b[j] - 360
+        }
+      }
+      y = b.dens[[i]]$y
+      for(j in seq_along(bins)){
+        vals[j] = sum(y[b >= bins[j] & b < bins[j] + bin])
+      }
+
+      b.max = max(vals)
+      xy = arc(-180, 180, b.max)
+      plot(xy, type = "l", col = "dark grey", axes = FALSE,
+           xlab = "", ylab = "", asp = 1, main = names(x)[i])
+      text(xy[600,1], xy[600,2], signif(b.max, 2), col = "dark grey", pos = 1,
+           offset = 2, adj = c(0,1))
+      lines(arc(-180, 180, b.max/2), col = "dark grey")
+      for(j in c(-180, -90, 0, 90)){
+        lines(wedge(j, j, b.max * 1.05), col = "dark grey")
+      }
+      for(j in seq_along(bins)){
+        xy = wedge(bins[j], bins[j] + bin, vals[j])
+        c = col2rgb(i)
+        polygon(xy, col = rgb(c[1], c[2], c[3],
+                              alpha = 200, maxColorValue = 255))
+      }
+    }
+  }
+
+  return()
+}
diff --git a/man/c.wDist.Rd b/man/c.wDist.Rd
@@ -50,7 +50,7 @@ un = data.frame(id,d2H)
 pp = pdRaster(r, unknown = un, mask = naMap)
 
 # random collection locations
-sites = d$data[sample(seq(length(d$data)), 5),]
+sites = d$data[sample(seq(length(d$data)), 4),]
 
 # generate a wDist object
 wd = wDist(pp, sites)

diff --git a/man/oddsRatio.Rd b/man/oddsRatio.Rd
@@ -19,7 +19,7 @@ oddsRatio(pdR, inputP)
 RasterStack or RasterBrick of probability density maps, e.g., as produced by \code{pdRaster}.
 }
   \item{inputP}{
-SpatialPoints or SpatialPolygons (or *DataFrame equivalent) of length 2
+SpatialPoints* or SpatialPolygons* object of length 2
 }
 }
 

diff --git a/man/plot.wDist.Rd b/man/plot.wDist.Rd
@@ -0,0 +1,72 @@
+\name{plot.wDist}
+
+\alias{plot.wDist}
+
+\title{
+Plot weighted distance and bearing distributions
+}
+
+\description{
+Plot the output from \code{\link{wDist}}, including weighted kernel density distributions for distance and bearing of travel.
+}
+
+\usage{
+\method{plot}{wDist}(x, ..., bin = 20, pty = "both")
+}
+
+\arguments{
+  \item{x}{
+    A wDist object
+  }
+  \item{...}{
+    Other arguments to be passed to plot
+  }
+  \item{bin}{
+    numeric. Bin width used to generate rose plot of travel bearings, in degrees. Must be a factor of 360.
+  }
+  \item{pty}{
+    character. Type of plot to produce. Must be one of \dQuote{dist}, \dQuote{bear}, or \dQuote{both}.
+  }
+}
+
+\details{
+For the default \code{pty}, two plot panels will be printed to the active graphical device showing the distance and bearing distributions for (up to) the first five samples in \code{wd}. If more than five items exist in \code{wd}, those beyond the fifth will be ignored and a message returned.
+}
+
+\seealso{
+\code{\link{wDist}}
+}
+
+\examples{
+library(sp)
+
+# load North America boundary and global isoscape
+data("naMap")
+data("d2h_lrNA")
+
+# load hydrogen isotope data for human hair in North America
+d = subOrigData(group = "Modern human", mask = naMap, niter = 100)
+
+# rescale from environmental isoscape to tissue isoscape
+r = calRaster(known = d, isoscape = d2h_lrNA, mask = naMap)
+
+# four unknown-origin examples
+id = c("A", "B", "C", "D")
+d2H = c(-110, -90, -105, -102)
+un = data.frame(id,d2H)
+
+# posterior probabilities
+pp = pdRaster(r, unknown = un, mask = naMap)
+
+# random collection locations
+sites = d$data[sample(seq(length(d$data)), 4),]
+
+# generate a wDist object
+wd = wDist(pp, sites)
+
+# plot distributions
+plot(wd)
+
+# plot only bearing distriubtions with a finer bin size
+plot(wd, bin = 5, pty = "bear")
+}
diff --git a/man/wDist.Rd b/man/wDist.Rd
@@ -28,18 +28,18 @@ SpatialPoints* object containing the collection locations for the n samples repr
 
 Distances and bearings are calculated on the WGS84 geoid using functions from the \pkg{geosphere} package.
 
-Bearing values correspond to the intial bearing from source to collection location, and are reported on a scale of -180 to +180 degrees. The statistical metrics are rectified so that values for distributions spanning due south are reported correctly. Both weighted bearing and distance distributions are often multimodal, and it is recommended to review the distribution densities to assess the representativeness of the statitics (e.g., using \code{\link{plot.wDist}}). 
+Bearing values correspond to the initial bearing from source to collection location, and are reported on a scale of -180 to +180 degrees. The statistical metrics are rectified so that values for distributions spanning due south are reported correctly. Both weighted bearing and distance distributions are often multimodal, and it is recommended to review the distribution densities to assess the representativeness of the statistics (e.g., using \code{\link{plot.wDist}}). 
 }
 
 \value{
 Returns an object of class \dQuote{wDist}, a list of length n.
   \item{stats}{
 named number. Statistics for the distance (dist, meters) and bearing (bear, degrees) between source and collection locations, including the weighted mean (wMean) and quantile (wXX) values.}
   \item{d.density}{
-density. Weighted kernal density for the distance between source and collection locations (meters). See \code{\link{stats::density()}}. 
+density. Weighted kernel density for the distance between source and collection locations (meters). See \code{\link[stats]{density}}. 
   }
   \item{s.density}{
-density. Weighted kernal density for the bearing between source and collection locations (degrees). See \code{\link{stats::density()}}.
+density. Weighted kernel density for the bearing between source and collection locations (degrees). See \code{\link[stats]{density}}.
   }
 }
 
@@ -65,12 +65,12 @@ un = data.frame(id,d2H)
 pp = pdRaster(r, unknown = un, mask = naMap)
 
 # random collection locations
-sites = d$data[sample(seq(length(d$data)), 5),]
+sites = d$data[sample(seq(length(d$data)), 4),]
 
 # generate a wDist object
 wd = wDist(pp, sites)
 
-# combine stats and print
-c(wd)
+# structure of the wDist object
+str(wd[[1]])
 }
 
diff --git a/tests/testthat.R b/tests/testthat.R
@@ -2,5 +2,4 @@ library(testthat)
 library(assignR)
 library(raster)
 
-
-test_check("assignR")
+test_check("assignR")
diff --git a/tests/testthat/test_pdRaster.R b/tests/testthat/test_pdRaster.R
@@ -5,6 +5,9 @@ suppressWarnings({
   d2H = -80
   d2H.sd = 2
   un = data.frame(id, d2H, d2H.sd, d2H_cal = "UT_H_1")
+  site = d$data[1,]
+  pdr = pdRaster(r, unknown = un, mask = naMap, genplot = FALSE)
+  wd = wDist(pdr, site)
 
   r2 = r
   r2[[1]][[1]] = r2[[1]][[1]] + rnorm(ncell(r2[[1]][[1]]), 0, 10)
@@ -31,6 +34,10 @@ test_that("pdRaster can correctly calculate posterior probabilities of origin
             expect_is(suppressWarnings(pdRaster(rmulti, list(un2, un2), mask = naMap, 
                                genplot = FALSE)), "RasterLayer")
             expect_is(pdRaster(r3, un3), "RasterLayer")
+            expect_is(wd, "wDist")
+            expect_is(c(wd), "data.frame")
+            expect_error(plot(wd, bin = 11))
+            expect_silent(plot(wd))
             expect_error(pdRaster(r$lm.model, unknown = un))
             expect_error(pdRaster(r$isoscape.rescale$mean, unknown = un))
             expect_error(pdRaster(stack(r$isoscape.rescale,r$isoscape.rescale), un))

diff --git a/vignettes/assignR.Rmd b/vignettes/assignR.Rmd
@@ -188,7 +188,7 @@ Dp_pd_Honly = pdRaster(Dp_multi[[1]], Dp_unk[,-3])
 We see pretty clear distinctions between the two samples, driven by a strong SW-NE gradient in the tissue isoscape H values across the state.
 
 *****
-What if we add the Sr information to the analysis? The syntax for running `pdRaster` is the same, but now we provide our isoStack object in place of the single isoscape. The function will use the spatial covariance of the isoscape values to approximate the error covariance for the two (or more) markers and return poterior probabilities based on the multivariate normal probability density function evaluated at each grid cell.
+What if we add the Sr information to the analysis? The syntax for running `pdRaster` is the same, but now we provide our isoStack object in place of the single isoscape. The function will use the spatial covariance of the isoscape values to approximate the error covariance for the two (or more) markers and return posterior probabilities based on the multivariate normal probability density function evaluated at each grid cell.
 
 ```{r Dp_multi, fig.width=5, fig.asp=0.8, out.width='45%'}
 Dp_pd_multi = pdRaster(Dp_multi, Dp_unk)
@@ -237,6 +237,32 @@ oddsRatio(Ll_prob, pp12)
 
 The odds of the first point being the location of origin are pretty high for each sample, and much higher than for the second point.
 
+## Distance and Direction of Movement
+
+A common goal in movement research is to characterize the distance or direction of movement for individuals. The `wDist` tool and it's helper methods are designed to leverage the information in the posterior probability surfaces for this purpose. 
+
+The analyses conducted in **assignR** cannot determine a single unique location of origin for a given sample, but the do give the probability that each location on the map is the location of origin. If we know the collection location for a sample, we can calculate the distance and direction between each possible location of origin and the collection site, and weighting these by their posterior probability generate a distribution (and statistics for that distribution) describing the distance and direction of travel.
+
+Let's do a weighted distance analysis for our first two unknown origin loggerhead shrike samples. Since these are pretend samples, we'll pretend that the two point locations we defined above for the `oddsRatio` analysis are the locations at which these samples were collected. Here are those locations plotted with the corresponding posterior probability maps. Then we'll use the functions `c` and `plot` to view the summary statistics and distributions returned by `wDist`.
+
+```{r wDist1, fig.width=5, fig.asp=0.8, out.width='45%'}
+# View the data
+plot(Ll_prob[[1]], main = names(Ll_prob)[1])
+points(pp12[1])
+plot(Ll_prob[[2]], main = names(Ll_prob)[2])
+points(pp12[2])
+```
+
+Now let's run the analysis and use the functions `c` and `plot` to view the summary statistics and distributions returned by `wDist`.
+
+```{r wDist2, fig.width=5, fig.asp=0.8, out.width='45%'}
+wd = wDist(Ll_prob[[1:2]], pp12)
+c(wd)[c(1,2,4,6,8,10,12,14,16)] #only showing select columns for formatting!
+plot(wd)
+```
+
+Comparing these statistics and plots with the data shows how the `wDist` metrics nicely summarize the direction and distance of movement. Both individuals almost certainly moved south from their location of origin to the collection location. Individual a's migration may have been a little bit longer than b's, and in a more southwesterly direction, patterns that are dominated more by the difference in collection locations than the probability surfaces for location of origin. Also notice the multi-modal distance distribution for individual b...these can be common in `wDist` summaries so it's a good ideal to look at the distributions themselves before choosing and interpreting summary statistics.
+
 ## Assignment
 
 Researchers often want to classify their study area in to regions that are and are not likely to be the origin of the sample (effectively 'assigning' the sample to a part of the area). This requires choosing a subjective threshold to define how much of the study domain is represented in the assignment region. `qtlRaster` offers two choices.