diff --git a/vignettes/simulated_example.Rmd b/vignettes/simulated_example.Rmd deleted file mode 100644 index fd2f1d4..0000000 --- a/vignettes/simulated_example.Rmd +++ /dev/null @@ -1,166 +0,0 @@ ---- -title: "Simulated Data Example" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Simulated Data Example} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) -``` - -```{r setup, include = FALSE} -library(growthbreaks) -library(MASS) -library(dplyr) -library(ggplot2) -library(sf) -library(rnaturalearth) -``` - - - -# Simulated length-at-age data -A spatial process gives rise to heterogeneity in fish length-at-age with a predominant north-south cline, as shown in the figure below: - -```{r, echo = FALSE} -## based on code by G.D. Adams -## Specify data size -set.seed(731) -G= 50 ## number of years -nsamples = 5000 -group <- sample(1:G, nsamples, replace = TRUE) # Group for individual X -N <- nsamples # Total number of samples - - -## Mu VBGM hyperparameters -mu.Linf = 50 -mu.k = 0.3 -mut.t0 = -0.5 -mu.parms <- c(mu.Linf, mu.k, mut.t0) -sigma = 0.1*mu.Linf # Observation error - - -## Group level random effects -sigma.group = c(0.3, 0.05, 0.2) -rho = 0.3 # Correlation between group level parameters -cor.group.mat = matrix(rho, 3, 3) -diag(cor.group.mat) <- 1 -cov.group.mat <- diag(sigma.group) %*% cor.group.mat %*% diag(sigma.group) # Get covariance - - -## Simulate parameters for groups---- -# - Empty matrix and vectors to fill with parameters and data, respectively -group.param.mat <- group.re.mat <- matrix(NA,G,3,byrow = T) - -# - Random effects -colnames(group.re.mat) <- c("log.Linf.group.re", "log.k.group.re", "t0.group.re") - -# - On VBGF scale -colnames(group.param.mat) <- c("Linf.group", "k.group", "t0.group") - - -# - Simulate group level parameters -for(i in 1:G){ - # - Sim from mvnorm - group.re.mat[i,] <- mvrnorm(1, rep(0,3), cov.group.mat) - - # - Convert to parameter space - group.param.mat[i,1:2] <- mu.parms[1:2] * exp(group.re.mat[i,1:2]) # Log to natural scale - group.param.mat[i,3] <- mu.parms[3] + group.re.mat[i,3] -} - -group.param.mat <- group.param.mat %>% - data.frame() %>% - arrange(Linf.group) - - -## Simulate length-at-age data ---- -ages = seq(from=1,to=20, by = 1) -age = c() -length = c() -for(j in 1:N) { - age[j] = sample(ages, 1) # Sample random age from age range - length[j] = (group.param.mat[group[j],1] * (1 - exp(-group.param.mat[group[j],2]*(age[j]-group.param.mat[group[j],3])))) + rnorm(1,0,sigma) # Add normally distributed random error -} - - -# Assign data to data frame and fill spatial info -dat <- data.frame(age = age, length = length, year = as.numeric(group)) -dat <- dat[which(dat$length > 0),] # Make sure all lengths are positive -dat <- dat %>% arrange(year, age, length) - -dat$long <- runif(nrow(dat),-180, -135) - -sample_value <- function(group) { - weights <- seq(50, 68, length.out = 50) - sample(seq(50, 68, length.out = 50), 1, prob = weights^(2*group)) -} -dat$lat <- sapply(as.numeric(dat$year), sample_value) -# Plot the data -cols <- c("#86BBD8","#2F4858", "#F6AE2D", "#F26419", "#E86A92", "#57A773") -p1 <- ggplot(dat, aes(x = age, y = length, colour = group)) + - geom_point(size = 2) + - # scale_colour_manual(values=cols) + - theme_minimal()+ - theme(legend.position = 'none')+ - labs(title = 'length at age observations') - -p2 <- ggplot(dat, aes(x = long, y = lat, colour = group, size= length)) + - geom_point(alpha = 0.5) + - # scale_colour_manual(values=cols) + - theme_minimal()+ - labs(title = 'spatial length-at-age') - -# Create a dataframe with latitude and longitude columns -df <- dat %>% - mutate(meanl = mean(length), .by = c('age')) %>% - mutate(resid = length-meanl) - -# Convert the dataframe to an sf object -sf_df <- st_as_sf(df, coords = c("long", "lat"), crs = 4326) - -# Obtain a map of the US -us <- ne_countries(scale = "medium", returnclass = "sf") %>% - filter(admin == "United States of America" | admin == 'Canada') - -# Perform spatial operation to remove points in the dataframe that overlap with the US polygon -sf_df_clipped <- sf_df[!st_within(sf_df, st_union(us), sparse = FALSE), ] - -## a few add'l steps to save this into data/ -simulated_data <- sf_df_clipped %>% - tidyr::extract(geometry, c('long', 'lat'), '\\((.*), (.*)\\)', convert = TRUE) %>% - select(year, age, length, lat, long) - -usethis::use_data(simulated_data,overwrite = TRUE) - -# dat<- simulated_data -``` - -```{r, echo = FALSE} -# Plot the US polygon and the points outside of it using ggplot2 -p1 <- ggplot( ) + - geom_point(aes(x = age, y = length, color = group), size = 2)+ - scale_color_gradient2(low = "blue", mid = "grey90", high = "red", midpoint = 3) + - guides(color = 'none')+ - theme_minimal() - -p2 <- ggplot() + - geom_sf(data = us, fill = NA, color = 'black') + - geom_sf(data = sf_df_clipped, aes( color =resid, size = length), alpha = 0.9) + - scale_y_continuous(limits = c(50,71)) + - scale_x_continuous(limits = c(-185,-130))+ - guides(size = 'none')+ - theme_minimal() + - scale_color_gradient2(low = "blue", mid = "grey90", high = "red", midpoint = 0) + - labs(color = 'length residual') - -p1 -p2 -``` -