Skip to content

Commit

Permalink
update data loading and posterior predictions
Browse files Browse the repository at this point in the history
  • Loading branch information
FredericBlum committed Apr 11, 2024
1 parent 384b97a commit 6dd41b6
Show file tree
Hide file tree
Showing 18 changed files with 200 additions and 115 deletions.
36 changes: 19 additions & 17 deletions scripts/00_setup.R
Original file line number Diff line number Diff line change
@@ -1,20 +1,22 @@
install.packages("groundhog", repos="https://ftp.fau.de/cran/")
library("groundhog")
set.groundhog.folder("./R_groundhog/")
pkgs <- c("brms","viridis", "readr", "posterior", "dplyr", "ggplot2", "ggdist",
"gghalves", "patchwork", "bayesplot", "tidybayes", "xtable", "ggrepel",
"rnaturalearth", "rnaturalearthdata", "tidyr", "stringr", "ape")
groundhog.library(pkgs, "2023-12-03", force.install=TRUE)
groundhog.library('github::stan-dev/cmdstanr', "2023-08-01", force.install=TRUE)
install.packages('groundhog', repos='https://ftp.fau.de/cran/')
library('groundhog')
set.groundhog.folder('./R_groundhog/')
pkgs <- c('brms','viridis', 'readr', 'posterior', 'dplyr', 'ggplot2', 'ggdist',
'gghalves', 'patchwork', 'bayesplot', 'tidybayes', 'xtable', 'ggrepel',
'rnaturalearth', 'rnaturalearthdata', 'tidyr', 'stringr', 'ape',
'geostan', 'geodist', 'gridExtra')
groundhog.library(pkgs, '2023-04-01', force.install=TRUE)
groundhog.library('github::stan-dev/cmdstanr', '2023-08-01', force.install=TRUE)

source("01_DataExplorations.R")
source("02_PriorDistributions.R")
source("03_PriorModel.R")
source('01_DataExplorations.R')
source('02_PriorDistributions.R')
source('03_PriorModel.R')

# The models will take a long time to compile, feel free to run, or to use
# our provided models via OSF.
# source("04_FinalModel.R")
# The models and posterior predictions will take a long time to compile,
# feel free to run, or to use our provided models via OSF.
# source('04_FinalModel.R')
# source('05_ModelConvergence.R')
# source('test_moran.R)

source("05_ModelConvergence.R")
source("06_BayesViz.R")
source("07_utils.R")
source('06_BayesViz.R')
source('07_utils.R')
13 changes: 5 additions & 8 deletions scripts/01_DataExplorations.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,11 @@ library(viridis)
###################################
### Data ###
###################################
data <- read_tsv('data.tsv') %>%
mutate(sound_class=paste(voicing, sound_class),
word_initial=as.factor(word_initial),
utt_initial=as.factor(utt_initial),
initial=ifelse(
utt_initial==1, "utterance-initial", ifelse(
word_initial==1, "word-initial", "other"
)))
data <- read_tsv('data.tsv') %>% mutate(
initial=ifelse(
utt_initial==1, "utterance-initial", ifelse(
word_initial==1, "word-initial", "other"
)))

langs <- data %>% group_by(Language) %>% count() %>% arrange(Language)
phons <- data %>% group_by(Language, Value) %>% count() %>% arrange(n)
Expand Down
106 changes: 53 additions & 53 deletions scripts/02_PriorDistributions.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,90 +5,90 @@ library(ggdist)
library(patchwork)
library(viridis)

n = 1e5
n=1e5
set.seed(42)

#########################################
### influence of predictors ###
#########################################

predictors <- tibble(x = c(rnorm(n, 0, 0.3))) %>%
mutate(group = 'beta%~% Normal(0, 0.3)') %>%
ggplot(aes(x = x)) +
geom_density(aes(fill = group)) +
scale_fill_viridis(discrete = T, alpha = 0.7, end = 0.7) +
scale_y_continuous(breaks = NULL) +
predictors <- tibble(x=c(rnorm(n, 0, 0.3))) %>%
mutate(group='beta%~% Normal(0, 0.3)') %>%
ggplot(aes(x=x)) +
geom_density(aes(fill=group)) +
scale_fill_viridis(discrete=T, alpha=0.7, end=0.7) +
scale_y_continuous(breaks=NULL) +
ylab("Density of values") +
scale_x_continuous(name = "Predictor values on log-scale",
limits = c(-1.25, 1.25),
breaks = seq(from = -1, to = 1, by = 0.5)) +
theme(legend.position = "none",
plot.title = element_text(size = 14)) +
labs(title = "β ~ Normal(0, 0.3)")
scale_x_continuous(name="Predictor values on log-scale",
limits=c(-1.25, 1.25),
breaks=seq(from=-1, to=1, by=0.5)) +
theme(legend.position="none",
plot.title=element_text(size=14)) +
labs(title="β ~ Normal(0, 0.3)")

#########################################
### Intercept priors ###
#########################################

# values for intercept
int_vals <- c(rnorm(n, mean = 4.5, sd = 0.1))
int_vals %>% tibble() %>% ggplot(aes(x = .)) + geom_density()
int_vals <- c(rnorm(n, mean=4.5, sd=0.1))
int_vals %>% tibble() %>% ggplot(aes(x=.)) + geom_density()

# sigma for intercept
sigma1 <- rexp(n, rate = 12)
sigma1 <- rexp(n, rate=12)
sigma1 %>% tibble() %>% ggplot(aes(x=.))+ geom_density()

# combination of both
sample_ints <- tibble(x = c(rlnorm(n,
meanlog = int_vals,
sdlog = sigma1))) %>%
mutate(group = 'alpha%~% logn( Normal(4.5, 0.1), exp(12) )') %>%
ggplot(aes(fill = group)) +
geom_density(aes(x = x)) +
sample_ints <- tibble(x=c(rlnorm(n,
meanlog=int_vals,
sdlog=sigma1))) %>%
mutate(group='alpha%~% logn( Normal(4.5, 0.1), exp(12) )') %>%
ggplot(aes(fill=group)) +
geom_density(aes(x=x)) +
scale_x_log10(limits= c(15, 320),
breaks = c(10, 20, 30, 50, 100, 200, 300),
name = "Prior distribution for the intercept") +
scale_y_continuous(breaks = NULL,
name = "Density of values") +
scale_fill_viridis(discrete = T, alpha = 0.7, end = 0.7) +
theme(legend.position = "none",
plot.title = element_text(size = 14)) +
labs(title = "α ~ logn(Normal(4.5, 0.1), Exp(12))")
breaks=c(10, 20, 30, 50, 100, 200, 300),
name="Prior distribution for the intercept") +
scale_y_continuous(breaks=NULL,
name="Density of values") +
scale_fill_viridis(discrete=T, alpha=0.7, end=0.7) +
theme(legend.position="none",
plot.title=element_text(size=14)) +
labs(title="α ~ logn(Normal(4.5, 0.1), Exp(12))")

#########################################
### sigma2 ###
#########################################
sigma2 <- rexp(n, rate = 12) %>%
sigma2 <- rexp(n, rate=12) %>%
tibble() %>%
mutate(group = 'sigma%~% exp(12)') %>%
mutate(group='sigma%~% exp(12)') %>%
ggplot(aes(x=.)) +
geom_density(aes(fill = group)) +
scale_y_continuous(breaks = NULL,
name = "Density of values") +
scale_x_continuous(breaks = seq(from = 0, to = 1.2, by = 0.2),
limits = c(0, 1.2),
name = "Standard deviation of varying intercepts on log-scale") +
scale_fill_viridis(discrete = T, alpha = 0.7, end = 0.7) +
theme(legend.position = "none",
plot.title = element_text(size = 14)) +
labs(title = "σ ~ Exp(12)")
geom_density(aes(fill=group)) +
scale_y_continuous(breaks=NULL,
name="Density of values") +
scale_x_continuous(breaks=seq(from=0, to=1.2, by=0.2),
limits=c(0, 1.2),
name="Standard deviation of varying intercepts on log-scale") +
scale_fill_viridis(discrete=T, alpha=0.7, end=0.7) +
theme(legend.position="none",
plot.title=element_text(size=14)) +
labs(title="σ ~ Exp(12)")

#########################################
### varying slopes matrix ###
#########################################

lkjcorr <- rlkjcorr_marginal(n, K = 2, eta = 5) %>% tibble(x = .) %>%
mutate(group = 'R%~% LKJcorr(5)') %>%
ggplot(aes(x = x, fill = group)) +
lkjcorr <- rlkjcorr_marginal(n, K=2, eta=5) %>% tibble(x=.) %>%
mutate(group='R%~% LKJcorr(5)') %>%
ggplot(aes(x=x, fill=group)) +
geom_density() +
scale_y_continuous(breaks = NULL) +
scale_x_continuous(name = "Correlation of varying intercepts and slopes",
breaks = c(-1, -0.5, 0, 0.5, 1)) +
scale_fill_viridis(discrete = T, alpha = 0.7, end = 0.7) +
theme(legend.position = "none",
plot.title = element_text(size = 14)) +
scale_y_continuous(breaks=NULL) +
scale_x_continuous(name="Correlation of varying intercepts and slopes",
breaks=c(-1, -0.5, 0, 0.5, 1)) +
scale_fill_viridis(discrete=T, alpha=0.7, end=0.7) +
theme(legend.position="none",
plot.title=element_text(size=14)) +
ylab("Density of values") +
labs(title = "R ~ LKJcorr(5)")
labs(title="R ~ LKJcorr(5)")

all_priors <- (sample_ints + predictors) / (sigma2 + lkjcorr)
ggsave("images/prior_all.png", all_priors, scale = 1)
ggsave("images/prior_all.png", all_priors, scale=1)
17 changes: 5 additions & 12 deletions scripts/04_FinalModel.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,10 @@ library(ape)
langs <- read_csv('languages.csv')
data <- read_tsv('data.tsv') %>%
mutate(
word_initial = as.factor(word_initial),
utt_initial = as.factor(utt_initial),
cluster = as.factor(ifelse(InCluster==0, 'single', ifelse(
ClusterInitial==1, 'initial', 'nonInitial')
))
) %>%
left_join(langs, by = join_by(Language==ID)) %>%
rename(
IPA = CLTS,
Family = name_macro_family
)
))) %>%
left_join(langs, by = join_by(Language==ID))

# Read in phylogenetic control
df_phylo <- read_rds("df-phylo.rds")
Expand All @@ -36,19 +29,19 @@ model <-
formula=Duration ~ 1 + utt_initial + word_initial + cluster +
(1 + utt_initial + word_initial + cluster | Language) +
(1 | Speaker) + (1 | gr(phylo, cov=phylo)) +
(1 + word_initial + utt_initial | IPA) +
(1 + word_initial + utt_initial | CLTS) +
z_num_phones + z_word_freq + z_speech_rate,
prior=c(prior(normal(4.5, 0.1), class=Intercept),
prior(normal(6, 0.5), class=shape),
prior(normal(0, 0.3), class=b),
prior(exponential(12), class=sd),
prior(lkj(5), class=cor)
),
iter=2000, warmup=1000, chains=4, cores=4,
iter=20, warmup=10, chains=4, cores=4,
control=list(adapt_delta=0.80, max_treedepth=10),
thread=threading(3),
seed=1,
silent=0,
file="models/cl_hierarchical",
file="models/cl_final",
backend="cmdstanr"
)
Loading

0 comments on commit 6dd41b6

Please sign in to comment.