Big update to pedon-summary report

ncss-tech · Oct 7, 2024 · ccad76d · ccad76d
1 parent e8b4081
commit ccad76d
Show file tree

Hide file tree

Showing 7 changed files with 547 additions and 205 deletions.
diff --git a/inst/reports/region2/pedon-summary/NEWS.md b/inst/reports/region2/pedon-summary/NEWS.md
@@ -0,0 +1,4 @@
+# pedon-summary v1.0 
+- Summarize Low-RV-High values for pedons based on Generalized Horizon Labels
+- Updated to use sf and terra internally
+- Renamed and reorganized configuration file
diff --git a/inst/reports/region2/pedon-summary/README.md b/inst/reports/region2/pedon-summary/README.md
@@ -0,0 +1,25 @@
+To install and run this report, run the following commands:
+
+1. Install latest version of soilReports from GitHub: 
+
+```r
+remotes::install_github("ncss-tech/soilReports")
+```
+
+2. Install required packages for `"pedon-summary"` report:
+
+```r
+soilReports::reportSetup('region2/pedon-summary')
+```
+
+3. Create a fresh instance of `"pedon-summary"` in path `"C:/workspace2/pedon-summary"`: 
+
+```r
+soilReports::reportInit('region2/pedon-summary', "C:/workspace2/pedon-summary")
+```
+
+4. Navigate to `"C:/workspace2/pedon-summary"` (or selected output folder from #3) and open _config.R_
+
+5. In _config.R_ update subset rules, regular expression patterns, generalized horizon label rules, and spatial data sources.
+
+6. Open _report.Rmd_ and click "Knit" button in RStudio
diff --git a/inst/reports/region2/pedon-summary/config.R b/inst/reports/region2/pedon-summary/config.R
@@ -1,12 +1,50 @@
 ## this is where you define input data sources
 
-
 ## determine subsetting rule:
-## pattern: matching based on the component name specified in report-rules.R
-## pedon.id.list: matching based on list of pedon IDs specified in report-rules.R
-subset.rule <- 'musymtaxon'
+## pattern: matching based on the component name 
+## pedon.id.list: matching based on list of pedon IDs 
+## musym: matching based on mapunit symbol
+## musymtaxon: match based on taxon name, taxon kind and mapunit symbol
+SUBSET_RULE <- 'musymtaxon' 
 ## subset.rule <- 'pedon.id.list'
 
+# Target Mapunit Symbol (regular expression)
+MUSYM_PATTERN <- '.'
+TAXONNAME_PATTERN <- '.'
+TAXONKIND_PATTERN <- '.'
+
+# use NASIS selected set? (TRUE) or whole local database (FALSE)
+SELECTED_SET <- TRUE
+
+# Generalized Horizon Label rules file
+GENHZ_RULES <- "genhz-rules.R"
+
+# path to spatial datasource name (a folder or geodatabase)
+SPATIAL_DSN <- "SSURGO" # either "SSURGO", "STATSGO" or path to shapefile/File Geodatabase
+# Shapefile/feature class should contain attributes: "AREASYMBOL", "MUSYM"
+# spatial layer 
+
+# spatial layer within SPATIAL_DSN
+SPATIAL_LAYER <- NULL # NULL/ignored for SPATIAL_DSN = "SSURGO" or "STATSGO"
+# Shapefile name without .SHP extension (if SPATIAL_DSN is a folder)
+# Feature class name in File Geodatabase (if SPATIAL_DSN is a .gdb)
+# use "sapolygon" for soil survey area polygons
+
+# # define raster variable names and data sources in a list
+# # prefix variable names with gis_
+# # need not share the same CRS
+RASTER_LIST <- list(
+  gis_ppt = 'F:/Geodata/project_data/MUSUM_Prism/final_MAP_mm_800m.tif',
+  gis_tavg = 'F:/Geodata/project_data/MUSUM_Prism/final_MAAT_800m.tif',
+  gis_ffd = 'F:/Geodata/project_data/MUSUM_Prism/ffd_50_pct_800m.tif',
+  gis_gdd = 'F:/Geodata/project_data/MUSUM_Prism/gdd_mean_800m.tif'#,
+  # gis_elev = 'F:/Geodata/project_data/MUSum_30m_SSR2/DEM_30m_SSR2.tif',
+  # gis_solar = 'F:/Geodata/project_data/ssro2_ann_beam_rad_int.tif',
+  # gis_twi = 'F:/Geodata/project_data/ssro2_saga_twi_int.tif',
+  # gis_slope = 'F:/Geodata/project_data/MUSum_30m_SSR2/Slope_30m_SSR2.tif',
+  # gis_geomorphons = 'F:/Geodata/project_data/MUSUM_Geomorphon/forms30_region2.tif'
+)
+
 ## report details:
 # probabilities for low-rv-high calculations
 p.low.rv.high <- c(0.05, 0.5, 0.95)
@@ -17,35 +55,3 @@ q.type <- 7
 # ML profile smoothing
 ml.profile.smoothing <- 0.65
 
-## GIS data details
-# map unit linework 
-mu.dsn <- 'L:/NRCS/MLRAShared/CA792/ca792_spatial/FG_CA792_OFFICIAL.gdb'
-mu.layer <- 'ca792_a'
-mu.sym <- '.'
-
-# define raster variable names and data sources, store in a list
-# prefix variable names with gis_
-# these should all share the same CRS
-r <- list(
-  gis_ppt=raster('C:/workspace/R_reports/Geodata/climate/final_MAP_mm_800m.tif'),
-  gis_tavg=raster('C:/workspace/R_reports/Geodata/climate/final_MAAT_800m.tif'),
-  gis_ffd=raster('C:/workspace/R_reports/Geodata/climate/ffd_mean_800m.tif'),
-  gis_gdd=raster('C:/workspace/R_reports/Geodata/climate/gdd_mean_800m.tif'),
-  gis_elev=raster('C:/workspace/R_reports/Geodata/elev10.tif'),
-  gis_solar=raster('C:/workspace/R_reports/Geodata/ssro2_ann_beam_rad_int.tif'),
-  gis_slope=raster('C:/workspace/R_reports/Geodata/MUSum_10m_SSR2/SSR2_Slope10m_AEA.tif'),
-  gis_geomorphons=raster('C:/workspace/R_reports/Geodata/MUSum_Geomorphon/forms10_region2.tif')
-)
-
-## map unit data: load the official version
-mu <-  readOGR(dsn=mu.dsn, layer=mu.layer, encoding='encoding', stringsAsFactors=FALSE)
-
-# convert: character -> integer -> character
-# drops all bogus or undefined map units
-
-mu$MUSYM <- as.character(mu$MUSYM)
-#(as.integer(as.character(mu$MUSYM)))
-
-
-#as.character(as.integer(as.character("6074b"))
-
diff --git a/inst/reports/region2/pedon-summary/custom.R b/inst/reports/region2/pedon-summary/custom.R
@@ -6,28 +6,26 @@
 ## many different summaries have low-rv-high percentiles hard-coded
 ##
 
-
 options(stringsAsFactors=FALSE)
 options(width=140)
 
 ##
 ## plotting styles:
 ##
-tps.standard <- list(plot.symbol=list(col=1, cex=1, pch=1), 
-										 plot.line=list(col=1, lwd=2, alpha=0.75), 
-										 strip.background=list(col=grey(c(0.85,0.75))), 
-										 layout.heights=list(strip=1.5),
-										 box.umbrella=list(col=1, lty=1), 
-										 box.rectangle=list(col=1), 
-										 box.dot=list(col=1, cex=0.5)
-										 )
-
+tps.standard <- list(
+  plot.symbol = list(col = 1, cex = 1, pch = 1),
+  plot.line = list(col = 1, lwd = 2, alpha = 0.75),
+  strip.background = list(col = grey(c(0.85, 0.75))),
+  layout.heights = list(strip = 1.5),
+  box.umbrella = list(col = 1, lty = 1),
+  box.rectangle = list(col = 1),
+  box.dot = list(col = 1, cex = 0.5)
+)
 
 ##
 ## functions 
 ##
 
-
 # return a table of proportions, including missing data, along with sample size as data.frame
 # 'x' must be a factor with levels set in logical order
 categorical.prop.table <- function(x, digits=2) {
@@ -72,9 +70,6 @@ dend.by.musym <- function(f.i, v=c('clay','total_frags_pct')) {
 
 }
 
-
-
-
 ## TODO: which quantile "type" is most appropriate?
 ##    see: http://stackoverflow.com/questions/95007/explain-the-quantile-function-in-r
 ##    test with: x <- sample(1:100, size=20, replace=TRUE) ; round(t(sapply(1:9, function( i ) quantile(x, type=i))), 2)
@@ -272,18 +267,25 @@ summarize.component <- function(f.i) {
 	# summary by variable / generalized hz label
 	s.i <- ddply(h.i.long, c('variable', 'genhz'), .fun=conditional.l.rv.h.summary, p=getOption('p.low.rv.high'), qt=getOption('q.type'))
 
-	## tables
-	# long -> wide
-	prop.by.genhz.table <- dcast(s.i, genhz ~ variable, value.var='range')
-	names(prop.by.genhz.table) <- var.names
-
-	# texture class tables
-	texture.table <- ddply(h.i, c('genhz'), summarize.texture.class)
-  names(texture.table) <- c('Generalized HZ', 'Texture Classes')
-
-	# diagnostic hz tables
-	diag.hz.table <- ddply(diagnostic_hz(f.i), c('featkind'), .fun=diagnostic.hz.summary, p=getOption('p.low.rv.high'), qt=getOption('q.type'))
-	names(diag.hz.table) <- c('kind', 'N', 'top', 'bottom', 'thick')
+	if (nrow(s.i) > 0) {
+  	## tables
+  	# long -> wide
+  	prop.by.genhz.table <- dcast(s.i, genhz ~ variable, value.var='range')
+  	names(prop.by.genhz.table) <- var.names
+
+  	# texture class tables
+  	texture.table <- ddply(h.i, c('genhz'), summarize.texture.class)
+    names(texture.table) <- c('Generalized HZ', 'Texture Classes')
+
+  	# diagnostic hz tables
+  	diag.hz.table <- ddply(diagnostic_hz(f.i), c('featkind'), .fun=diagnostic.hz.summary, p=getOption('p.low.rv.high'), qt=getOption('q.type'))
+  	names(diag.hz.table) <- c('kind', 'N', 'top', 'bottom', 'thick')
+	} else {
+	  prop.by.genhz.table <- NULL
+	  texture.table <- NULL
+	  diag.hz.table <- NULL
+	  f.i$genhz <- "<not-used>"
+	}
 
 	## ML-horizonation
 	# aggregate
@@ -350,13 +352,29 @@ summarize.component <- function(f.i) {
   h.i.long.sub <- h.i.long[grep('R|Cr|Cd', h.i.long$genhz, ignore.case=TRUE, invert=TRUE), ]
 
   # bwplot
-  prop.by.musym.and.genhz <- bwplot(musym ~ value | variable + genhz, data=h.i.long.sub, as.table=TRUE, xlab='', scales=list(x=list(relation='free')), drop.unused.levels=TRUE, par.settings=tps.standard, stats=custom.bwplot, par.strip.text=list(cex=0.75), panel=function(...) {
-    panel.grid(-1, -1)
-    panel.bwplot(...)
-  })
-
-  # convert second paneling dimension to outer strips
-  prop.by.musym.and.genhz <- useOuterStrips(prop.by.musym.and.genhz)
+  if (nrow(h.i.long.sub) > 0) {
+    prop.by.musym.and.genhz <- bwplot(
+      musym ~ value | variable + genhz,
+      data = h.i.long.sub,
+      as.table = TRUE,
+      xlab = '',
+      scales = list(x = list(relation = 'free')),
+      drop.unused.levels = TRUE,
+      par.settings = tps.standard,
+      stats = custom.bwplot,
+      par.strip.text = list(cex = 0.75),
+      panel = function(...) {
+        panel.grid(-1, -1)
+        panel.bwplot(...)
+      }
+    )
+
+    # convert second paneling dimension to outer strips
+    prop.by.musym.and.genhz <- useOuterStrips(prop.by.musym.and.genhz)
+  } else {
+    prop.by.musym.and.genhz <- bwplot(musym ~ value,
+                                      data = h.i.long.sub)
+  }
 
 	# pack into list and return
 	return(list(n=length(f.i), pg=pedon.gis.table, mgz=missing.genhz.IDs, ct=gen.hz.classification.table, rt=prop.by.genhz.table, dt=diag.hz.table, tt=texture.table, ml.hz=gen.hz.ml, ml.hz.plot=gen.hz.aggregate.plot, sf=pedon.surface.frags.table, pmg=prop.by.musym.and.genhz))

diff --git a/inst/reports/region2/pedon-summary/genhz-rules.R b/inst/reports/region2/pedon-summary/genhz-rules.R
@@ -53,6 +53,10 @@ gen.hz.rules <- list(
 	  n=c('Oi','A','BA','Bw1','Bw2','Bt1','Bt2','BC','Cr','R'),
 	  p=c('Oi', 'A', 'AB|BA', 'Bw1','Bw[23]', 'Bt|Bt[12]', 'Bt[345]', 'BC|CB|C', 'Cr', 'R')
 	),
+	'loafercreek'=list(
+	  n=c('Oi','A','BA','Bw1','Bw2','Bt1','Bt2','BC','Cr','R'),
+	  p=c('Oi', 'A', 'AB|BA', 'Bw1','Bw[23]', 'Bt|Bt[12]', 'Bt[345]', 'BC|CB|C', 'Cr', 'R')
+	),
 	'hetchy'=list(
 	  n=c('Oi', 'A','AB','Bw','Bt1','Bt2','Bt3','BC','C','Cr', 'R'),
 	  p=c('O', 'A','AB|BA|ABt','Bw','Bt1|^Bt','Bt2','Bt[34]','^BC|^2BC','^C$|^2C$','Cr', 'R')