-
Notifications
You must be signed in to change notification settings - Fork 54
Seasonal model
Many regions have monitoring data or opportunistic/process-research collections spanning a wide seasonal window. In these cases, it is helpful to account for both:
- among year variability and
- within year variability in spatial configuration.
In these cases, analysts can use the formula interface to specify covariates that represent the main effect of seasonal and annual variation, and then treat a new ordinal "year-season" variable as t_i
within VAST. These main effects for season and year might be linear offsets, spatially varying terms or both, and model selection/exploration is typically needed to determine the best option.
We explore this concept in detail elsewhere, and illustrate a simplified example below. The use of the formula interface for seasonal models was developed by Andrew Allyn.
# load package and example data (requires VAST release >= 3.7.0)
library(VAST)
example = load_example( data_set="NWA_yellowtail_seasons" )
# Load data and quick exploration of structure
# Set of years and seasons. The DFO spring survey usually occurs before the NOAA NEFSC spring survey, so ordering accordingly.
year_set = sort(unique(example$sampling_data[,'year']))
season_set = c("DFO", "SPRING", "FALL")
# Create a grid with all unique combinations of seasons and years and then combine these into one "year_season" variable
yearseason_grid = expand.grid("season" = season_set, "year" = year_set)
yearseason_levels = apply(yearseason_grid[,2:1], MARGIN = 1, FUN = paste, collapse = "_")
yearseason_labels = round(yearseason_grid[,'year'] + (as.numeric(factor(yearseason_grid[,'season'], levels = season_set))-1)/length(season_set), digits=1)
# Similar process, but for the observations
yearseason_i = apply(example$sampling_data[,c("year","season")], MARGIN = 1, FUN = paste, collapse = "_")
yearseason_i = factor(yearseason_i, levels = yearseason_levels)
# Add the year_season factor column to our sampling_data data set
example$sampling_data$year_season = yearseason_i
example$sampling_data$season = factor(example$sampling_data$season, levels = season_set)
# Some last processing steps
example$sampling_data = example$sampling_data[, c("year", "season", "year_season", "latitude", "longitude", "swept", "weight")]
# Make dummy observation for each season-year combination
dummy_data = data.frame(
year = yearseason_grid[,'year'],
season = yearseason_grid[,'season'],
year_season = yearseason_levels,
latitude = mean(example$sampling_data[,'latitude']),
longitude = mean(example$sampling_data[,'longitude']),
swept = mean(example$sampling_data[,'swept']),
weight = 0,
dummy = TRUE)
# Combine with sampling data
full_data = rbind(cbind(example$sampling_data, dummy = FALSE), dummy_data)
# Create sample data
samp_dat = data.frame(
"year_season" = as.numeric(full_data$year_season)-1,
"Lat" = full_data$latitude,
"Lon" = full_data$longitude,
"weight" = full_data$weight,
"Swept" = full_data$swept,
"Dummy" = full_data$dummy )
# Covariate data. Note here, case sensitive!
cov_dat = data.frame(
"Year" = as.numeric(full_data$year_season)-1,
"Year_Cov" = factor(full_data$year, levels = year_set),
"Season" = full_data$season,
"Lat" = full_data$latitude,
"Lon" = full_data$longitude )
# Inspect
table("year_season"=cov_dat$Year, "Actual_year"=cov_dat$Year_Cov)
table("year_season"=cov_dat$Year, "Actual_season"=cov_dat$Season)
#####
## Model settings
#####
# Make settings
settings = make_settings(n_x = 100,
Region = example$Region,
strata.limits = example$strata.limits,
purpose = "index2",
FieldConfig = c("Omega1" = 1, "Epsilon1" = 1, "Omega2" = 1, "Epsilon2" = 1),
RhoConfig = c("Beta1" = 3, "Beta2" = 3, "Epsilon1" = 4, "Epsilon2" = 4),
ObsModel = c(1, 1),
bias.correct = FALSE,
Options = c('treat_nonencounter_as_zero' = TRUE) )
# Creating model formula
formula_use = ~ Season + Year_Cov
# Implement corner constraint for linear effect but not spatially varying effect:
# * one level for each term is 2 (just spatially varying)
# * all other levels for each term is 3 (spatialy varying plus linear effect)
X1config_cp_use = matrix( c(2, rep(3,nlevels(cov_dat$Season)-1), 2, rep(3,nlevels(cov_dat$Year_Cov)-1) ), nrow=1 )
X2config_cp_use = matrix( c(2, rep(3,nlevels(cov_dat$Season)-1), 2, rep(3,nlevels(cov_dat$Year_Cov)-1) ), nrow=1 )
#####
## Model fit -- make sure to use new functions
#####
fit_orig = fit_model("settings" = settings,
"Lat_i" = samp_dat[, 'Lat'],
"Lon_i" = samp_dat[, 'Lon'],
"t_i" = samp_dat[, 'year_season'],
"b_i" = samp_dat[, 'weight'],
"a_i" = samp_dat[, 'Swept'],
"X1config_cp" = X1config_cp_use,
"X2config_cp" = X2config_cp_use,
"covariate_data" = cov_dat,
"X1_formula" = formula_use,
"X2_formula" = formula_use,
"X_contrasts" = list(Season = contrasts(cov_dat$Season, contrasts = FALSE), Year_Cov = contrasts(cov_dat$Year_Cov, contrasts = FALSE)),
"run_model" = FALSE,
"PredTF_i" = samp_dat[, 'Dummy'] )
# Adjust mapping for log_sigmaXi and fitting final model -- pool variance for all seasons and then set year's to NA
Map_adjust = fit_orig$tmb_list$Map
# Pool variances for each term to a single value
Map_adjust$log_sigmaXi1_cp = factor(c(rep(as.numeric(Map_adjust$log_sigmaXi1_cp[1]), nlevels(cov_dat$Season)),
rep(as.numeric(Map_adjust$log_sigmaXi1_cp[nlevels(cov_dat$Season)+1]), nlevels(cov_dat$Year_Cov))))
Map_adjust$log_sigmaXi2_cp = factor(c(rep(as.numeric(Map_adjust$log_sigmaXi2_cp[1]), nlevels(cov_dat$Season)),
rep(as.numeric(Map_adjust$log_sigmaXi2_cp[nlevels(cov_dat$Season)+1]), nlevels(cov_dat$Year_Cov))))
# Fit final model with new mapping
fit = fit_model("settings" = settings,
"Lat_i" = samp_dat[, 'Lat'],
"Lon_i" = samp_dat[, 'Lon'],
"t_i" = samp_dat[, 'year_season'],
"b_i" = samp_dat[, 'weight'],
"a_i" = samp_dat[, 'Swept'],
"X1config_cp" = X1config_cp_use,
"X2config_cp" = X2config_cp_use,
"covariate_data" = cov_dat,
"X1_formula" = formula_use,
"X2_formula" = formula_use,
"X_contrasts" = list(Season = contrasts(cov_dat$Season, contrasts = FALSE), Year_Cov = contrasts(cov_dat$Year_Cov, contrasts = FALSE)),
"newtonsteps" = 1,
"PredTF_i" = samp_dat[, 'Dummy'],
"Map" = Map_adjust )
#
plot( fit,
projargs='+proj=natearth +lon_0=-68 +units=km',
country = "united states of america",
year_labels = yearseason_labels )
Example applications:
- Index standardization
- Empirical Orthogonal Functions
- Ordination using joint species distribution model
- End-of-century projections
- Expand length and age-composition samples
- Combine condition and biomass data
- Expand stomach content samples
- Combine presence/absence, counts, and biomass data
- Seasonal and annual variation
- Combine acoustic and bottom trawl data
- Surplus production models
- Multispecies model of biological interactions
- Stream network models
Usage demos:
- Adding covariates
- Visualize covariate response
- Percent deviance explained
- Create a new extrapolation grid
- Custom maps using ggplot
- Modify axes for distribution metrics
- K-fold crossvalidation
- Simulating new data
- Modify defaults for advanced users
Project structure and utilities: