From 9d00993551e627cea544a69be0ad5b6c22fb6f6d Mon Sep 17 00:00:00 2001 From: Maia Kapur <39382798+mkapur-noaa@users.noreply.github.com> Date: Tue, 10 Dec 2024 15:06:24 -0800 Subject: [PATCH] Create simulated_example.Rmd --- vignettes/simulated_example.Rmd | 151 ++++++++++++++++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100644 vignettes/simulated_example.Rmd diff --git a/vignettes/simulated_example.Rmd b/vignettes/simulated_example.Rmd new file mode 100644 index 0000000..bd2dbbc --- /dev/null +++ b/vignettes/simulated_example.Rmd @@ -0,0 +1,151 @@ +--- +title: "Simulated Data Example" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Simulated Data Example} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r setup, include = FALSE} +library(growthbreaks) +library(MASS) +library(dplyr) +library(ggplot2) +library(sf) +library(rnaturalearth) +``` + + + +# Simulated length-at-age data +A spatial process gives rise to heterogeneity in fish length-at-age with a predominant north-south cline, as shown in the figure below: + +```{r, echo = FALSE} +## based on code by G.D. Adams +## Specify data size +set.seed(731) +G=6 +nsamples = 500 +group <- sample(1:6, nsamples, replace = TRUE) # Group for individual X +N <- nsamples # Total number of samples + + +## Mu VBGM hyperparameters +mu.Linf = 500 +mu.k = 0.3 +mut.t0 = -0.5 +mu.parms <- c(mu.Linf, mu.k, mut.t0) +sigma = 10 # Observation error + + +## Group level random effects +sigma.group = c(0.3, 0.05, 0.2) +rho = 0.3 # Correlation between group level parameters +cor.group.mat = matrix(rho, 3, 3) +diag(cor.group.mat) <- 1 +cov.group.mat <- diag(sigma.group) %*% cor.group.mat %*% diag(sigma.group) # Get covariance + + +## Simulate parameters for groups---- +# - Empty matrix and vectors to fill with parameters and data, respectively +group.param.mat <- group.re.mat <- matrix(NA,G,3,byrow = T) + +# - Random effects +colnames(group.re.mat) <- c("log.Linf.group.re", "log.k.group.re", "t0.group.re") + +# - On VBGF scale +colnames(group.param.mat) <- c("Linf.group", "k.group", "t0.group") + + +# - Simulate group level parameters +for(i in 1:G){ + # - Sim from mvnorm + group.re.mat[i,] <- mvrnorm(1, rep(0,3), cov.group.mat) + + # - Convert to parameter space + group.param.mat[i,1:2] <- mu.parms[1:2] * exp(group.re.mat[i,1:2]) # Log to natural scale + group.param.mat[i,3] <- mu.parms[3] + group.re.mat[i,3] +} + +group.param.mat <- group.param.mat %>% + data.frame() %>% + arrange(Linf.group) + + +## Simulate length-at-age data ---- +ages = seq(from=1,to=20, by = 1) +age = c() +length = c() +for(j in 1:N) { + age[j] = sample(ages, 1) # Sample random age from age range + length[j] = (group.param.mat[group[j],1] * (1 - exp(-group.param.mat[group[j],2]*(age[j]-group.param.mat[group[j],3])))) + rnorm(1,0,sigma) # Add normally distributed random error +} + + +# Assign data to data frame and fill spatial info +dat = data.frame(age = age, length = length, group = as.factor(group)) +dat <- dat[which(dat$length > 0),] # Make sure all lengths are positive +dat <- dat %>% arrange(group, age, length) + +dat$long <- runif(nrow(dat),-180, -135) + +sample_value <- function(group) { + weights <- seq(50, 68, length.out = 50) + sample(seq(50, 68, length.out = 50), 1, prob = weights^(2*group)) +} +dat$lat <- sapply(as.numeric(dat$group), sample_value) +# Plot the data +cols <- c("#86BBD8","#2F4858", "#F6AE2D", "#F26419", "#E86A92", "#57A773") +p1 <- ggplot(dat, aes(x = age, y = length, colour = group)) + + geom_point(size = 2) + + scale_colour_manual(values=cols) + + theme_minimal()+ + theme(legend.position = 'none')+ + labs(title = 'length at age observations') + +p2 <- ggplot(dat, aes(x = long, y = lat, colour = group, size= length)) + + geom_point(alpha = 0.5) + + scale_colour_manual(values=cols) + + theme_minimal()+ + labs(title = 'spatial length-at-age') + +# dat <- dat %>%select(year = group, age, length, latitude = lat, longitude = long) + +# Create a dataframe with latitude and longitude columns + +df <- dat %>% + mutate(meanl = mean(length), .by = c('age')) %>% + mutate(resid = length-meanl) + +# Convert the dataframe to an sf object +sf_df <- st_as_sf(df, coords = c("long", "lat"), crs = 4326) + +# Obtain a map of the US +us <- ne_countries(scale = "medium", returnclass = "sf") %>% + filter(admin == "United States of America" | admin == 'Canada') + +# Perform spatial operation to remove points in the dataframe that overlap with the US polygon +sf_df_clipped <- sf_df[!st_within(sf_df, st_union(us), sparse = FALSE), ] +``` + +```{r, echo = FALSE} +# Plot the US polygon and the points outside of it using ggplot2 +ggplot() + + geom_sf(data = us, fill = NA, color = 'black') + + geom_sf(data = sf_df_clipped, aes( color =resid, size = length), alpha = 0.9) + + scale_y_continuous(limits = c(50,71)) + + scale_x_continuous(limits = c(-185,-130))+ + guides(size = 'none')+ + theme_minimal() + + scale_color_gradient2(low = "blue", mid = "grey90", high = "red", midpoint = 0) + + labs(color = 'length residual') +``` +