Skip to content

Commit

Permalink
mu-comparison: 4.0.3
Browse files Browse the repository at this point in the history
 - fix report so it can run with no "continuous" inputs
  • Loading branch information
brownag committed Dec 27, 2023
1 parent 307fc62 commit 01260c8
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 30 deletions.
44 changes: 21 additions & 23 deletions inst/reports/region2/mu-comparison/report.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -232,18 +232,16 @@ if (cache.samples & file.exists('cached-samples.Rda')) {
d.continuous <- subset(sampling.res$raster.samples, variable.type == 'continuous')
d.continuous$variable <- factor(d.continuous$variable, levels = names(raster.list$continuous))
d.circ <- subset(sampling.res$raster.samples, variable.type == 'circular')
# subset and enable aspect summary
if (nrow(d.circ) > 0) { ## TODO: could there ever be more than one type of circular variable?
do.aspect <- TRUE
} else do.aspect <- FALSE
d.cat <- subset(sampling.res$raster.samples, variable.type == 'categorical')
### SUBSET CATEGORICAL VARIABLEs
#d.cat is derived from the sample object resulting from running cLHS_sample_by_polygon.R
d.cat <- subset(sampling.res$raster.samples, variable.type == 'categorical')
d.cat[[mu.col]] <- factor(d.cat[[mu.col]], levels = mu.set)
d.cat.sub <- list()
do.continuous <- isTRUE(nrow(d.continuous) > 0)
do.aspect <- isTRUE(nrow(d.circ) > 0)
do.categorical <- isTRUE(nrow(d.cat) > 0)
### FORMATTING
## figure out reasonable figure heights for bwplots and density plots
# baseline height for figure margins, axes, and legend
Expand Down Expand Up @@ -314,7 +312,7 @@ Whiskers extend from the 5th to 95th [percentiles](https://en.wikipedia.org/wiki
* Non-overlapping boxes are a strong indication that the central tendencies (of select raster data) differ.
* Distribution shape is difficult to infer from box and whisker plots, remember to cross-reference with density plots below.

```{r, echo=FALSE, fig.width=8, fig.height=dynamic.fig.height}
```{r, echo=FALSE, fig.width=8, fig.height=dynamic.fig.height, eval=do.continuous}
tps <- list(box.rectangle = list(col = 'black'),
box.umbrella = list(col = 'black', lty = 1),
box.dot = list(cex = 0.75),
Expand Down Expand Up @@ -360,7 +358,7 @@ These plots are a smooth alternative ([density estimation](https://en.wikipedia.
+ narrow peaks vs. wide "mounds"
+ short vs. long "tails"

```{r, echo=FALSE, fig.width=8, fig.height=dynamic.fig.height}
```{r, echo=FALSE, fig.width=8, fig.height=dynamic.fig.height, eval=do.continuous}
## TODO: consider variable line width proportional to area
## https://github.com/ncss-tech/soilReports/issues/81
# var.lwd <- scales::rescale(log(sampling.res$area.stats$`Total Area`), to=c(0.5, 2.5))
Expand Down Expand Up @@ -401,7 +399,7 @@ rm(density.plot.data)

Table of select [percentiles](https://en.wikipedia.org/wiki/Percentile), by variable. In these tables, headings like "Q5" can be interpreted as the the "5th percentile"; 5% of the data are less than this value. The 50th percentile ("Q50") is the median.

```{r, echo=FALSE, results='asis'}
```{r, echo=FALSE, results='asis', eval=do.continuous}
# summarize raster data for tabular output
mu.stats <- ddply(d.continuous, c('variable', mu.col), f.summary, p = p.quantiles)
Expand All @@ -414,7 +412,7 @@ kable(mu.stats.wide, row.names = FALSE, caption = 'Median Values',
align = 'r', digits = , col.names = c(mu.col, names(mu.stats.wide)[-1]))
```

```{r, echo=FALSE, results='asis'}
```{r, echo=FALSE, results='asis', eval=do.continuous}
muv <- split(mu.stats, mu.stats$variable)
l <- lapply(seq_along(muv), function(i) {
# remove variable column
Expand Down Expand Up @@ -492,7 +490,7 @@ res <- ldply(d.circ.list, function(i) {
kable(res, align = 'r', col.names = c(mu.col, names(res)[-1]))
```

```{r, echo=FALSE, fig.width=12, fig.height=6, results="asis"}
```{r, echo=FALSE, fig.width=12, fig.height=6, results="asis", eval=do.categorical}
# make categorical summaries for all categorical variables
d.cat.list <- split(d.cat, f = d.cat$variable)
Expand Down Expand Up @@ -520,7 +518,7 @@ This plot displays the similarity of the map units across the set of environment
* Comment-out raster data sources with low variability off if the multivariate summary is not displayed.


```{r, results='hide', warning=FALSE, echo=FALSE, fig.width=9, fig.height=9}
```{r, results='hide', warning=FALSE, echo=FALSE, fig.width=9, fig.height=9, eval=do.continuous}
## TODO:
# 1. combine median of continuous, geomorphons proportions, and NLCD proportions for dendrogram
Expand Down Expand Up @@ -625,7 +623,7 @@ if (length(d.mu.wide.vars) < 2) {
}
```

```{r, echo=FALSE}
```{r, echo=FALSE, eval=do.continuous}
if (multivariate.summary == FALSE)
print('Cannot create ordination plot: >1 rasters required or not enough variance within each map unit.')
```
Expand All @@ -639,7 +637,7 @@ The following figure highlights shared information among raster data sources bas
* Look for clustered sets of raster data: typically PRISM-derived and elevation data are closely correlated.
* Highly correlated raster data sources reduce the reliability of the "raster data importance" figure.

```{r, echo=FALSE, fig.width=10, fig.height=8, eval=multivariate.summary}
```{r, echo=FALSE, fig.width=10, fig.height=8, eval=do.continuous && multivariate.summary}
par(mar = c(2,5,2,2))
## note that we don't load the Hmisc package as it causes many NAMESPACE conflicts
## This requires 3 or more variables
Expand All @@ -659,7 +657,7 @@ The following figure ranks raster data sources in terms of how accurately each c
* Highly correlated raster data sources will "compete" for positions in this figure. For example, if *elevation* and *mean annual air temperature* are highly correlated, then their respective "importance" values are interchangeable.


```{r, echo=FALSE, fig.width=8, fig.height=6, eval=multivariate.summary}
```{r, echo=FALSE, fig.width=8, fig.height=6, eval=do.continuous && multivariate.summary}
# this will only work with >= 2 map units and >= 2 variables
# reset factor levels so that empty classes are not passed on to randomForest() (will cause error if empty/low-variance MUs are present)
Expand Down Expand Up @@ -709,7 +707,7 @@ if (length(levels(d.sub[[mu.col]])) >= 2) {
## Polygon Summaries


```{r, echo=FALSE}
```{r, echo=FALSE, eval=do.continuous}
# apply across raster values, to all polygons
#calculates prop outside range for _all_ polys
Expand All @@ -729,7 +727,7 @@ names(polygons.to.check)[1] <- mu.col
```


```{r, echo=FALSE}
```{r, echo=FALSE, eval=do.continuous}
#save a SHP file with prop.outside.range for each polygon and raster data source combination
if (nrow(polygons.to.check) > 0) {
try(writeVector(mu.check, paste0(file.path('output', shp.qc.fname), ".shp"), overwrite = TRUE))
Expand All @@ -741,7 +739,7 @@ if (nrow(polygons.to.check) > 0) {
```


```{r echo=FALSE}
```{r echo=FALSE, eval=do.continuous}
# save SHP with any un-sampled polygons
if (length(sampling.res$unsampled.ids) > 0) {
try(terra::writeVector(mu[sampling.res$unsampled.ids, ],
Expand Down Expand Up @@ -801,13 +799,13 @@ try(terra::writeVector(mu, paste0(file.path('output', shp.stats.fname), ".shp"),

A variety of output files are created by this report (in `output` folder). The output file names can be adjusted in `config.R`. The quantiles for each continuous variable by mapunit symbol are written to a CSV file that looks like this (first 6 columns):

```{r, echo=FALSE, results='asis'}
kable(head(mucol.stats.wide[,1:pmin(6, length(names(mucol.stats.wide)))]), row.names = FALSE)
```{r, echo=FALSE, results='asis', eval=do.continuous}
kable(head(mucol.stats.wide[, 1:pmin(6, length(names(mucol.stats.wide)))]), row.names = FALSE)
```

A shapefile is generated ("polygons-with-stats-XXX" where "XXX" is the set of map units symbols listed in `config.R`) that contains summaries computed by polygon. Polygons are uniquely identified by the `pID` column. Median raster values are given, with data source names abbreviated to conform to the limitations of DBF files:

```{r, echo=FALSE, results='asis'}
```{r, echo=FALSE, results='asis', eval=do.continuous}
kable(head(poly.stats.wide), row.names = FALSE)
```

Expand Down Expand Up @@ -839,7 +837,7 @@ Delineations with more than 10 - 15% of samples outside the range for a data sou

Here is a sample output table for a few polygons. Sorting these columns by magnitude in your GIS software offers a quick way to find potentially problematic areas.

```{r, echo=FALSE, results='asis'}
```{r, echo=FALSE, results='asis', eval=do.continuous}
kable(head(mu.check[complete.cases(as.data.frame(mu.check)),], row.names = FALSE))
```

Expand Down
53 changes: 46 additions & 7 deletions inst/reports/region2/mu-comparison/setup.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,59 @@
## version number

.report.name <- 'mu-comparison'
.report.version <- '4.0.2'
.report.version <- '4.0.3'
.report.description <- 'compare stack of raster data, sampled from polygons associated with 1-8 map units'

.paths.to.copy <- c('report.Rmd','custom.R','config.R','categorical_definitions.R',
'README.md','changes.txt','create-NASIS-import-files.R','clip-and-mask-rasters.R')
.paths.to.copy <-
c(
'report.Rmd',
'custom.R',
'config.R',
'categorical_definitions.R',
'README.md',
'changes.txt',
'create-NASIS-import-files.R',
'clip-and-mask-rasters.R'
)

.update.paths.to.copy <- c('report.Rmd','custom.R', 'categorical_definitions.R',
'README.md','changes.txt','create-NASIS-import-files.R','clip-and-mask-rasters.R')
.update.paths.to.copy <-
c(
'report.Rmd',
'custom.R',
'categorical_definitions.R',
'README.md',
'changes.txt',
'create-NASIS-import-files.R',
'clip-and-mask-rasters.R'
)

.packages.to.get <- c('knitr', 'rmarkdown', "MASS", "terra", "exactextractr", "plyr", "reshape2", "latticeExtra", "cluster", "clhs", "randomForest", "aqp", "sharpshootR", "RColorBrewer", "spdep")

.packages.to.get <-
c(
'knitr',
'rmarkdown',
"MASS",
"terra",
"exactextractr",
"plyr",
"reshape2",
"latticeExtra",
"cluster",
"clhs",
"randomForest",
"aqp",
"sharpshootR",
"RColorBrewer",
"spdep"
)

## packages from GH
# dependencies should be satisfied by installing CRAN version first
.gh.packages.to.get <- c('ncss-tech/sharpshootR', 'ncss-tech/soilDB', 'ncss-tech/aqp')
.gh.packages.to.get <- c(
'ncss-tech/sharpshootR',
'ncss-tech/soilDB',
'ncss-tech/aqp'
)

# flag to indicate that shiny.Rmd and not report.Rmd is the primary markdown file; convention should probably be that shiny.Rmd has option to generate a static report.Rmd-type output.
# currently the pedon summary demo using Shiny supports this option and uses a file called report.Rmd as the template for the static output
Expand Down

0 comments on commit 01260c8

Please sign in to comment.