-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
cb5ba2c
commit 9d00993
Showing
1 changed file
with
151 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,151 @@ | ||
--- | ||
title: "Simulated Data Example" | ||
output: rmarkdown::html_vignette | ||
vignette: > | ||
%\VignetteIndexEntry{Simulated Data Example} | ||
%\VignetteEngine{knitr::rmarkdown} | ||
%\VignetteEncoding{UTF-8} | ||
--- | ||
|
||
```{r, include = FALSE} | ||
knitr::opts_chunk$set( | ||
collapse = TRUE, | ||
comment = "#>" | ||
) | ||
``` | ||
|
||
```{r setup, include = FALSE} | ||
library(growthbreaks) | ||
library(MASS) | ||
library(dplyr) | ||
library(ggplot2) | ||
library(sf) | ||
library(rnaturalearth) | ||
``` | ||
|
||
|
||
|
||
# Simulated length-at-age data | ||
A spatial process gives rise to heterogeneity in fish length-at-age with a predominant north-south cline, as shown in the figure below: | ||
|
||
```{r, echo = FALSE} | ||
## based on code by G.D. Adams | ||
## Specify data size | ||
set.seed(731) | ||
G=6 | ||
nsamples = 500 | ||
group <- sample(1:6, nsamples, replace = TRUE) # Group for individual X | ||
N <- nsamples # Total number of samples | ||
## Mu VBGM hyperparameters | ||
mu.Linf = 500 | ||
mu.k = 0.3 | ||
mut.t0 = -0.5 | ||
mu.parms <- c(mu.Linf, mu.k, mut.t0) | ||
sigma = 10 # Observation error | ||
## Group level random effects | ||
sigma.group = c(0.3, 0.05, 0.2) | ||
rho = 0.3 # Correlation between group level parameters | ||
cor.group.mat = matrix(rho, 3, 3) | ||
diag(cor.group.mat) <- 1 | ||
cov.group.mat <- diag(sigma.group) %*% cor.group.mat %*% diag(sigma.group) # Get covariance | ||
## Simulate parameters for groups---- | ||
# - Empty matrix and vectors to fill with parameters and data, respectively | ||
group.param.mat <- group.re.mat <- matrix(NA,G,3,byrow = T) | ||
# - Random effects | ||
colnames(group.re.mat) <- c("log.Linf.group.re", "log.k.group.re", "t0.group.re") | ||
# - On VBGF scale | ||
colnames(group.param.mat) <- c("Linf.group", "k.group", "t0.group") | ||
# - Simulate group level parameters | ||
for(i in 1:G){ | ||
# - Sim from mvnorm | ||
group.re.mat[i,] <- mvrnorm(1, rep(0,3), cov.group.mat) | ||
# - Convert to parameter space | ||
group.param.mat[i,1:2] <- mu.parms[1:2] * exp(group.re.mat[i,1:2]) # Log to natural scale | ||
group.param.mat[i,3] <- mu.parms[3] + group.re.mat[i,3] | ||
} | ||
group.param.mat <- group.param.mat %>% | ||
data.frame() %>% | ||
arrange(Linf.group) | ||
## Simulate length-at-age data ---- | ||
ages = seq(from=1,to=20, by = 1) | ||
age = c() | ||
length = c() | ||
for(j in 1:N) { | ||
age[j] = sample(ages, 1) # Sample random age from age range | ||
length[j] = (group.param.mat[group[j],1] * (1 - exp(-group.param.mat[group[j],2]*(age[j]-group.param.mat[group[j],3])))) + rnorm(1,0,sigma) # Add normally distributed random error | ||
} | ||
# Assign data to data frame and fill spatial info | ||
dat = data.frame(age = age, length = length, group = as.factor(group)) | ||
dat <- dat[which(dat$length > 0),] # Make sure all lengths are positive | ||
dat <- dat %>% arrange(group, age, length) | ||
dat$long <- runif(nrow(dat),-180, -135) | ||
sample_value <- function(group) { | ||
weights <- seq(50, 68, length.out = 50) | ||
sample(seq(50, 68, length.out = 50), 1, prob = weights^(2*group)) | ||
} | ||
dat$lat <- sapply(as.numeric(dat$group), sample_value) | ||
# Plot the data | ||
cols <- c("#86BBD8","#2F4858", "#F6AE2D", "#F26419", "#E86A92", "#57A773") | ||
p1 <- ggplot(dat, aes(x = age, y = length, colour = group)) + | ||
geom_point(size = 2) + | ||
scale_colour_manual(values=cols) + | ||
theme_minimal()+ | ||
theme(legend.position = 'none')+ | ||
labs(title = 'length at age observations') | ||
p2 <- ggplot(dat, aes(x = long, y = lat, colour = group, size= length)) + | ||
geom_point(alpha = 0.5) + | ||
scale_colour_manual(values=cols) + | ||
theme_minimal()+ | ||
labs(title = 'spatial length-at-age') | ||
# dat <- dat %>%select(year = group, age, length, latitude = lat, longitude = long) | ||
# Create a dataframe with latitude and longitude columns | ||
df <- dat %>% | ||
mutate(meanl = mean(length), .by = c('age')) %>% | ||
mutate(resid = length-meanl) | ||
# Convert the dataframe to an sf object | ||
sf_df <- st_as_sf(df, coords = c("long", "lat"), crs = 4326) | ||
# Obtain a map of the US | ||
us <- ne_countries(scale = "medium", returnclass = "sf") %>% | ||
filter(admin == "United States of America" | admin == 'Canada') | ||
# Perform spatial operation to remove points in the dataframe that overlap with the US polygon | ||
sf_df_clipped <- sf_df[!st_within(sf_df, st_union(us), sparse = FALSE), ] | ||
``` | ||
|
||
```{r, echo = FALSE} | ||
# Plot the US polygon and the points outside of it using ggplot2 | ||
ggplot() + | ||
geom_sf(data = us, fill = NA, color = 'black') + | ||
geom_sf(data = sf_df_clipped, aes( color =resid, size = length), alpha = 0.9) + | ||
scale_y_continuous(limits = c(50,71)) + | ||
scale_x_continuous(limits = c(-185,-130))+ | ||
guides(size = 'none')+ | ||
theme_minimal() + | ||
scale_color_gradient2(low = "blue", mid = "grey90", high = "red", midpoint = 0) + | ||
labs(color = 'length residual') | ||
``` | ||
|