code/06_CRI_Multiverse.Rmd

---
title: "06. Multiverse/Dredge"
output: html_document
---

This code has two purposes. 

The first is to determine how much variance we should reasonably expect to explain using a 'static' multiverse approach where we use a single worked up data set from one of the teams and then apply a range of possible model specifications to it. Then we predict the outcomes using the model specifications. 

The second is to run an algorithmic analysis of the real data and real model specifications to see if we 'missed' anything or combinations of things that could explain the outcomes.

```{r setup, include=FALSE}
library("pacman")

options(scipen = 999)

pacman::p_load("ggplot2","tidyverse","MuMIn","ggpubr","ragg","factoextra","lme4","dplyr","lmerTest","margins","rvest","nnet")
```

## Expected Explained Variance (a simulation)

We take a team whose results were derived using R and used three waves of the ISSP - Team 18. Their DV is dichotomized, the has pluses and minuses in our multiverse analysis.

```{r team18_sim, warning = FALSE, message = FALSE}
t18m <- read.csv(file = here::here("data/team18_multi.csv"), header = TRUE)

# doesn't include all macro-indicators, merge in here
cri_macro <- read.csv(file = here::here("data/cri_macro.csv"), header = TRUE)

# merge on country
t18m <- left_join(t18m, cri_macro, by = c("iso_country","year"))

# make numeric
t18m <- t18m %>%
    mutate(wdi_unempilo = as.numeric(wdi_unempilo),
           socx_oecd = as.numeric(socx_oecd),
           gdp_oecd = as.numeric(gdp_oecd),
           ginid_solt = as.numeric(ginid_solt),
           migstock_un = as.numeric(migstock_un),
           netmigpct = as.numeric(netmigpct)) %>%
    dplyr::select(., year, female:health_c, migstock_un, netmigpct, wdi_unempilo, socx_oecd, gdp_oecd, iso_country)
```

### Data Frame Prep

```{r models, warning = FALSE}
dvs <- c("jobs","unemployed","reduce_income_diff","old_age_care","housing","health")

dvs_c <- c("jobs_c","unemp_c","incdiff_c","oldage_c","housing_c","health_c")

# for stock effects
vars <- c("", "wdi_unempilo + ", "socx_oecd + ", "gdp_oecd + ", "netmigpct + ", 
          "wdi_unempilo + socx_oecd + ", 
          "wdi_unempilo + gdp_oecd + ", 
          "wdi_unempilo + socx_oecd + gdp_oecd + ", 
          "socx_oecd + gdp_oecd + ", 
          "netmigpct + wdi_unempilo + ", 
          "netmigpct + socx_oecd + ", 
          "netmigpct + gdp_oecd + ", 
          "netmigpct + wdi_unempilo + socx_oecd + ", 
          "netmigpct + wdi_unempilo + gdp_oecd + ", 
          "netmigpct + socx_oecd + gdp_oecd + ", 
          "netmigpct + wdi_unempilo + socx_oecd + gdp_oecd + ")

# switch stock and flow for flow effects
vars2 <- c("", "wdi_unempilo + ", "socx_oecd + ", "gdp_oecd + ", "migstock_un + ", 
          "wdi_unempilo + socx_oecd + ", 
          "wdi_unempilo + gdp_oecd + ", 
          "wdi_unempilo + socx_oecd + gdp_oecd + ", 
          "socx_oecd + gdp_oecd + ", 
          "migstock_un + wdi_unempilo + ", 
          "migstock_un + socx_oecd + ", 
          "migstock_un + gdp_oecd + ", 
          "migstock_un + wdi_unempilo + socx_oecd + ", 
          "migstock_un + wdi_unempilo + gdp_oecd + ", 
          "migstock_un + socx_oecd + gdp_oecd + ", 
          "migstock_un + wdi_unempilo + socx_oecd + gdp_oecd + ")

# names(vars) <- c(1:16)

# create a results frame
m1_results <- matrix(ncol = 17, nrow = 576)
m2_results <- matrix(ncol = 17, nrow = 576)
m3_results <- matrix(ncol = 17, nrow = 576)
m4_results <- matrix(ncol = 17, nrow = 576)

colnames(m1_results) <- c("mname", "dv","AME", "p", "mator", "sample", "unemp", "socx", "gdp", "stock", "netmig", "AME_type", "twowayfe", "ctry_dummies","year_dummies","glm","logit")
colnames(m2_results) <- colnames(m1_results)
colnames(m3_results) <- colnames(m1_results)
colnames(m4_results) <- colnames(m1_results)
```

## OLS Models

### Stock Models, OLS, All years

```{r models1, warning = FALSE}
i = 1
for (dv in dvs_c) {
    for (var in 1:16) {
model <- paste0("m1_",dv, "_", vars[var])
x <- lm(paste0(dv,
               " ~ ",
               "migstock_un + ",
               vars[var], 
               "factor(year) + factor(iso_country)"), data = t18m, na.action = "na.omit")
m1_results[i,1] <- model
m1_results[i,2] <- dv
m1_results[i,3] <- round(summary(x)$coefficients[2,1],5)
m1_results[i,4] <- round(summary(x)$coefficients[2,4],5)
m1_results[i,5:6] <- c("ols","All")
m1_results[i,7] <- ifelse(grepl("unemp",vars[var], fixed = TRUE) == TRUE, 1, 0)
m1_results[i,8] <- ifelse(grepl("socx",vars[var], fixed = TRUE) == TRUE, 1, 0)
m1_results[i,9] <- ifelse(grepl("gdp",vars[var], fixed = TRUE) == TRUE, 1, 0)
m1_results[i,10] <- 1
m1_results[i,11] <- ifelse(grepl("netmig",vars[var], fixed = TRUE) == TRUE, 1, 0)
m1_results[i,12:17] <- c("Stock",1,0,0,0,0)
i <- i + 1
    }
}
```

### Net Models, OLS, All years

```{r models2, warning = FALSE}
i = 97
for (dv in dvs_c) {
    for (var in 1:16) {
model <- paste0("m1_",dv, "_", vars[var])
x <- lm(paste0(dv,
               " ~ ",
               "netmigpct + ",
               vars2[var], 
               "factor(year) + factor(iso_country)"), data = t18m, na.action = "na.omit")
m1_results[i,1] <- model
m1_results[i,2] <- dv
m1_results[i,3] <- round(summary(x)$coefficients[2,1],5)
m1_results[i,4] <- round(summary(x)$coefficients[2,4],5)
m1_results[i,5:6] <- c("ols","All")
m1_results[i,7] <- ifelse(grepl("unemp",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m1_results[i,8] <- ifelse(grepl("socx",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m1_results[i,9] <- ifelse(grepl("gdp",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m1_results[i,10] <- ifelse(grepl("stock",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m1_results[i,11] <- 1
m1_results[i,12:17] <- c("Flow",1,0,0,0,0)
i <- i + 1
    }
}
```

### Stock Models, OLS, 1996 & 2006 only

```{r models3, warning = FALSE}
t18m_9606 <- subset(t18m, year != 2016)

i = 1
for (dv in dvs_c) {
    for (var in 1:16) {
model <- paste0("m1_",dv, "_", vars[var])
y <- vars[var]
x <- lm(paste0(dv,
               " ~ ",
               "migstock_un + ",
               vars[var], 
               "factor(year) + factor(iso_country)"), data = t18m_9606, na.action = "na.omit")
m2_results[i,1] <- model
m2_results[i,2] <- dv
m2_results[i,3] <- round(summary(x)$coefficients[2,1],5)
m2_results[i,4] <- round(summary(x)$coefficients[2,4],5)
m2_results[i,5:6] <- c("ols","w96_06")
m2_results[i,7] <- ifelse(grepl("unemp",vars[var], fixed = TRUE) == TRUE, 1, 0)
m2_results[i,8] <- ifelse(grepl("socx",vars[var], fixed = TRUE) == TRUE, 1, 0)
m2_results[i,9] <- ifelse(grepl("gdp",vars[var], fixed = TRUE) == TRUE, 1, 0)
m2_results[i,10] <- 1
m2_results[i,11] <- ifelse(grepl("netmig",vars[var], fixed = TRUE) == TRUE, 1, 0)
m2_results[i,12:17] <- c("Stock",1,0,0,0,0)
i <- i + 1
    }
}
```

### Net Models, OLS, 1996 & 2006 only

```{r models4, warning = FALSE}

i = 97
for (dv in dvs_c) {
    for (var in 1:16) {
model <- paste0("m1_",dv, "_", vars2[var])
y <- vars2[var]
x <- lm(paste0(dv,
               " ~ ",
               "netmigpct + ",
               vars2[var], 
               "factor(year) + factor(iso_country)"), data = t18m_9606, na.action = "na.omit")
m2_results[i,1] <- model
m2_results[i,2] <- dv
m2_results[i,3] <- round(summary(x)$coefficients[2,1],5)
m2_results[i,4] <- round(summary(x)$coefficients[2,4],5)
m2_results[i,5:6] <- c("ols","w96_06")
m2_results[i,7] <- ifelse(grepl("unemp",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m2_results[i,8] <- ifelse(grepl("socx",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m2_results[i,9] <- ifelse(grepl("gdp",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m2_results[i,10] <- ifelse(grepl("stock",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m2_results[i,11] <- 1
m2_results[i,12:17] <- c("Flow",1,0,0,0,0)
i <- i + 1
    }
}
```

### Stock Models, OLS, 2006 & 2016 only

```{r models5, warning = FALSE}
t18m_0616 <- subset(t18m, year != 1996)

i = 1
for (dv in dvs_c) {
    for (var in 1:16) {
model <- paste0("m1_",dv, "_", vars[var])
y <- vars[var]
x <- lm(paste0(dv,
               " ~ ",
               "migstock_un + ",
               vars[var], 
               "factor(year) + factor(iso_country)"), data = t18m_0616, na.action = "na.omit")
m3_results[i,1] <- model
m3_results[i,2] <- dv
m3_results[i,3] <- round(summary(x)$coefficients[2,1],5)
m3_results[i,4] <- round(summary(x)$coefficients[2,4],5)
m3_results[i,5:6] <- c("ols","w06_16")
m3_results[i,7] <- ifelse(grepl("unemp",vars[var], fixed = TRUE) == TRUE, 1, 0)
m3_results[i,8] <- ifelse(grepl("socx",vars[var], fixed = TRUE) == TRUE, 1, 0)
m3_results[i,9] <- ifelse(grepl("gdp",vars[var], fixed = TRUE) == TRUE, 1, 0)
m3_results[i,10] <- 1
m3_results[i,11] <- ifelse(grepl("netmig",vars[var], fixed = TRUE) == TRUE, 1, 0)
m3_results[i,12:17] <- c("Stock",1,0,0,0,0)
i <- i + 1
    }
}
```

### Net Models, OLS, 1996 & 2006 only

```{r models6, warning = FALSE}

i = 97
for (dv in dvs_c) {
    for (var in 1:16) {
model <- paste0("m1_",dv, "_", vars2[var])
y <- vars2[var]
x <- lm(paste0(dv,
               " ~ ",
               "netmigpct + ",
               vars2[var], 
               "factor(year) + factor(iso_country)"), data = t18m_0616, na.action = "na.omit")
m3_results[i,1] <- model
m3_results[i,2] <- dv
m3_results[i,3] <- round(summary(x)$coefficients[2,1],5)
m3_results[i,4] <- round(summary(x)$coefficients[2,4],5)
m3_results[i,5:6] <- c("ols","w06_16")
m3_results[i,7] <- ifelse(grepl("unemp",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m3_results[i,8] <- ifelse(grepl("socx",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m3_results[i,9] <- ifelse(grepl("gdp",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m3_results[i,10] <- ifelse(grepl("stock",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m3_results[i,11] <- 1
m3_results[i,12:17] <- c("Flow",1,0,0,0,0)
i <- i + 1
    }
}
```

### Stock Models, OLS, All years, Year dummies only (no country FE)

```{r models7, warning = FALSE}
i = 1
for (dv in dvs_c) {
    for (var in 1:16) {
model <- paste0("m1_",dv, "_", vars[var])
x <- lm(paste0(dv,
               " ~ ",
               "migstock_un + ",
               vars[var], 
               "factor(year)"), data = t18m, na.action = "na.omit")
m4_results[i,1] <- model
m4_results[i,2] <- dv
m4_results[i,3] <- round(summary(x)$coefficients[2,1],5)
m4_results[i,4] <- round(summary(x)$coefficients[2,4],5)
m4_results[i,5:6] <- c("ols","All")
m4_results[i,7] <- ifelse(grepl("unemp",vars[var], fixed = TRUE) == TRUE, 1, 0)
m4_results[i,8] <- ifelse(grepl("socx",vars[var], fixed = TRUE) == TRUE, 1, 0)
m4_results[i,9] <- ifelse(grepl("gdp",vars[var], fixed = TRUE) == TRUE, 1, 0)
m4_results[i,10] <- 1
m4_results[i,11] <- ifelse(grepl("netmig",vars[var], fixed = TRUE) == TRUE, 1, 0)
m4_results[i,12:17] <- c("Stock",0,0,1,0,0)
i <- i + 1
    }
}
```

### Net Models, OLS, All years, Year dummies only (no country FE)

```{r models8, warning = FALSE}
i = 97
for (dv in dvs_c) {
    for (var in 1:16) {
model <- paste0("m1_",dv, "_", vars[var])
x <- lm(paste0(dv,
               " ~ ",
               "netmigpct + ",
               vars2[var], 
               "factor(year)"), data = t18m, na.action = "na.omit")
m4_results[i,1] <- model
m4_results[i,2] <- dv
m4_results[i,3] <- round(summary(x)$coefficients[2,1],5)
m4_results[i,4] <- round(summary(x)$coefficients[2,4],5)
m4_results[i,5:6] <- c("ols","All")
m4_results[i,7] <- ifelse(grepl("unemp",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m4_results[i,8] <- ifelse(grepl("socx",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m4_results[i,9] <- ifelse(grepl("gdp",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m4_results[i,10] <- ifelse(grepl("stock",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m4_results[i,11] <- 1
m4_results[i,12:17] <- c("Flow",0,0,1,0,0)
i <- i + 1
    }
}
```

## Logit Models

### Stock Models, logit, All years

```{r model9, warning = FALSE}
i = 385
for (dv in dvs) {
    for (var in 1:16) {
model <- paste0("m1_",dv, "_", vars[var])
x <- glm(paste0(dv,
               " ~ ",
               "migstock_un + ",
               vars[var], 
               "factor(year) + factor(iso_country)"), data = t18m, na.action = "na.omit", family = "binomial")
m1_results[i,1] <- model
m1_results[i,2] <- dv
m1_results[i,3] <- round(as.numeric(summary(margins(x, variables = c("migstock_un")))[2]),5)
m1_results[i,4] <- round(summary(x)$coefficients[2,4],5)
m1_results[i,5:6] <- c("logit","All")
m1_results[i,7] <- ifelse(grepl("unemp",vars[var], fixed = TRUE) == TRUE, 1, 0)
m1_results[i,8] <- ifelse(grepl("socx",vars[var], fixed = TRUE) == TRUE, 1, 0)
m1_results[i,9] <- ifelse(grepl("gdp",vars[var], fixed = TRUE) == TRUE, 1, 0)
m1_results[i,10] <- 1
m1_results[i,11] <- ifelse(grepl("netmig",vars[var], fixed = TRUE) == TRUE, 1, 0)
m1_results[i,12:17] <- c("Stock",1,0,0,0,1)
i <- i + 1
    }
}
```

### Net Models, logit, All years

```{r models10, warning = FALSE}
i = 481
for (dv in dvs) {
    for (var in 1:16) {
model <- paste0("m1_",dv, "_", vars[var])
x <- glm(paste0(dv,
               " ~ ",
               "netmigpct + ",
               vars2[var], 
               "factor(year) + factor(iso_country)"), data = t18m, na.action = "na.omit", family = "binomial")
m1_results[i,1] <- model
m1_results[i,2] <- dv
m1_results[i,3] <- round(as.numeric(summary(margins(x, variables = c("netmigpct")))[2]),5)
m1_results[i,4] <- round(summary(x)$coefficients[2,4],5)
m1_results[i,5:6] <- c("logit","All")
m1_results[i,7] <- ifelse(grepl("unemp",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m1_results[i,8] <- ifelse(grepl("socx",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m1_results[i,9] <- ifelse(grepl("gdp",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m1_results[i,10] <- ifelse(grepl("stock",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m1_results[i,11] <- 1
m1_results[i,12:17] <- c("Flow",1,0,0,0,1)
i <- i + 1
    }
}
```

### Stock Models, logit, 1996 & 2006 only

```{r models11, warning = FALSE}
t18m_9606 <- subset(t18m, year != 2016)

i = 385
for (dv in dvs) {
    for (var in 1:16) {
model <- paste0("m1_",dv, "_", vars[var])
y <- vars[var]
x <- glm(paste0(dv,
               " ~ ",
               "migstock_un + ",
               vars[var], 
               "factor(year) + factor(iso_country)"), data = t18m_9606, na.action = "na.omit", family = "binomial")
m2_results[i,1] <- model
m2_results[i,2] <- dv
m2_results[i,3] <- round(as.numeric(summary(margins(x, variables = c("migstock_un")))[2]),5)
m2_results[i,4] <- round(summary(x)$coefficients[2,4],5)
m2_results[i,5:6] <- c("logit","w96_06")
m2_results[i,7] <- ifelse(grepl("unemp",vars[var], fixed = TRUE) == TRUE, 1, 0)
m2_results[i,8] <- ifelse(grepl("socx",vars[var], fixed = TRUE) == TRUE, 1, 0)
m2_results[i,9] <- ifelse(grepl("gdp",vars[var], fixed = TRUE) == TRUE, 1, 0)
m2_results[i,10] <- 1
m2_results[i,11] <- ifelse(grepl("netmig",vars[var], fixed = TRUE) == TRUE, 1, 0)
m2_results[i,12:17] <- c("Stock",1,0,0,0,1)
i <- i + 1
    }
}
```

### Net Models, logit, 1996 & 2006 only

```{r models12, warning = FALSE}
i = 481
for (dv in dvs) {
    for (var in 1:16) {
model <- paste0("m1_",dv, "_", vars2[var])
y <- vars2[var]
x <- glm(paste0(dv,
               " ~ ",
               "netmigpct + ",
               vars2[var], 
               "factor(year) + factor(iso_country)"), data = t18m_9606, na.action = "na.omit", family = "binomial")
m2_results[i,1] <- model
m2_results[i,2] <- dv
m2_results[i,3] <- round(as.numeric(summary(margins(x, variables = c("netmigpct")))[2]),5)
m2_results[i,4] <- round(summary(x)$coefficients[2,4],5)
m2_results[i,5:6] <- c("logit","w96_06")
m2_results[i,7] <- ifelse(grepl("unemp",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m2_results[i,8] <- ifelse(grepl("socx",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m2_results[i,9] <- ifelse(grepl("gdp",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m2_results[i,10] <- ifelse(grepl("stock",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m2_results[i,11] <- 1
m2_results[i,12:17] <- c("Flow",1,0,0,0,1)
i <- i + 1
    }
}
```

### Stock Models, logit, 2006 & 2016 only

```{r models13, warning = FALSE}
t18m_0616 <- subset(t18m, year != 1996)

i = 385
for (dv in dvs) {
    for (var in 1:16) {
model <- paste0("m1_",dv, "_", vars[var])
y <- vars[var]
x <- glm(paste0(dv,
               " ~ ",
               "migstock_un + ",
               vars[var], 
               "factor(year) + factor(iso_country)"), data = t18m_0616, na.action = "na.omit", family = "binomial")
m3_results[i,1] <- model
m3_results[i,2] <- dv
m3_results[i,3] <- round(as.numeric(summary(margins(x, variables = c("migstock_un")))[2]),5)
m3_results[i,4] <- round(summary(x)$coefficients[2,4],5)
m3_results[i,5:6] <- c("logit","w06_16")
m3_results[i,7] <- ifelse(grepl("unemp",vars[var], fixed = TRUE) == TRUE, 1, 0)
m3_results[i,8] <- ifelse(grepl("socx",vars[var], fixed = TRUE) == TRUE, 1, 0)
m3_results[i,9] <- ifelse(grepl("gdp",vars[var], fixed = TRUE) == TRUE, 1, 0)
m3_results[i,10] <- 1
m3_results[i,11] <- ifelse(grepl("netmig",vars[var], fixed = TRUE) == TRUE, 1, 0)
m3_results[i,12:17] <- c("Stock",1,0,0,0,1)
i <- i + 1
    }
}
```

### Net Models, logit, 1996 & 2006 only

```{r models14, warning = FALSE}
i = 481
for (dv in dvs) {
    for (var in 1:16) {
model <- paste0("m1_",dv, "_", vars2[var])
y <- vars2[var]
x <- glm(paste0(dv,
               " ~ ",
               "netmigpct + ",
               vars2[var], 
               "factor(year) + factor(iso_country)"), data = t18m_0616, na.action = "na.omit", family = "binomial")
m3_results[i,1] <- model
m3_results[i,2] <- dv
m3_results[i,3] <- round(as.numeric(summary(margins(x, variables = c("netmigpct")))[2]),5)
m3_results[i,4] <- round(summary(x)$coefficients[2,4],5)
m3_results[i,5:6] <- c("logit","w06_16")
m3_results[i,7] <- ifelse(grepl("unemp",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m3_results[i,8] <- ifelse(grepl("socx",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m3_results[i,9] <- ifelse(grepl("gdp",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m3_results[i,10] <- ifelse(grepl("stock",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m3_results[i,11] <- 1
m3_results[i,12:17] <- c("Flow",1,0,0,0,1)
i <- i + 1
    }
}
```

### Stock Models, logit, All years, Year dummies only (no country FE)

```{r models15, warning = FALSE}
i = 385
for (dv in dvs) {
    for (var in 1:16) {
model <- paste0("m1_",dv, "_", vars[var])
x <- glm(paste0(dv,
               " ~ ",
               "migstock_un + ",
               vars[var], 
               "factor(year)"), data = t18m, na.action = "na.omit", family = "binomial")
m4_results[i,1] <- model
m4_results[i,2] <- dv
m4_results[i,3] <- round(as.numeric(summary(margins(x, variables = c("migstock_un")))[2]),5)
m4_results[i,4] <- round(summary(x)$coefficients[2,4],5)
m4_results[i,5:6] <- c("logit","All")
m4_results[i,7] <- ifelse(grepl("unemp",vars[var], fixed = TRUE) == TRUE, 1, 0)
m4_results[i,8] <- ifelse(grepl("socx",vars[var], fixed = TRUE) == TRUE, 1, 0)
m4_results[i,9] <- ifelse(grepl("gdp",vars[var], fixed = TRUE) == TRUE, 1, 0)
m4_results[i,10] <- 1
m4_results[i,11] <- ifelse(grepl("netmig",vars[var], fixed = TRUE) == TRUE, 1, 0)
m4_results[i,12:17] <- c("Stock",0,0,1,0,1)
i <- i + 1
    }
}
```

### Net Models, logit, All years, Year dummies only (no country FE)

```{r models16, warning = FALSE}
i = 481
for (dv in dvs) {
    for (var in 1:16) {
model <- paste0("m1_",dv, "_", vars[var])
x <- glm(paste0(dv,
               " ~ ",
               "netmigpct + ",
               vars2[var], 
               "factor(year)"), data = t18m, na.action = "na.omit", family = "binomial")
m4_results[i,1] <- model
m4_results[i,2] <- dv
m4_results[i,3] <- round(as.numeric(summary(margins(x, variables = c("netmigpct")))[2]),5)
m4_results[i,4] <- round(summary(x)$coefficients[2,4],5)
m4_results[i,5:6] <- c("logit","All")
m4_results[i,7] <- ifelse(grepl("unemp",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m4_results[i,8] <- ifelse(grepl("socx",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m4_results[i,9] <- ifelse(grepl("gdp",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m4_results[i,10] <- ifelse(grepl("stock",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m4_results[i,11] <- 1
m4_results[i,12:17] <- c("Flow",0,0,1,0,1)
i <- i + 1
    }
}
```

## GLM Models

### Stock Models, GLM, All years, country-level, year dummy

```{r models17, warning = FALSE}
i = 193
for (dv in dvs_c) {
    for (var in 1:16) {
model <- paste0("m1_",dv, "_", vars[var])
x <- lmer(paste0(dv,
               " ~ ",
               "migstock_un + ",
               vars[var], 
               "factor(year) + (1|iso_country)"), data = t18m, na.action = "na.omit")
m1_results[i,1] <- model
m1_results[i,2] <- dv
m1_results[i,3] <- round(summary(x)$coefficients[2,1],5)
m1_results[i,4] <- round(summary(x)$coefficients[2,5],5)
m1_results[i,5:6] <- c("glm","All")
m1_results[i,7] <- ifelse(grepl("unemp",vars[var], fixed = TRUE) == TRUE, 1, 0)
m1_results[i,8] <- ifelse(grepl("socx",vars[var], fixed = TRUE) == TRUE, 1, 0)
m1_results[i,9] <- ifelse(grepl("gdp",vars[var], fixed = TRUE) == TRUE, 1, 0)
m1_results[i,10] <- 1
m1_results[i,11] <- ifelse(grepl("netmig",vars[var], fixed = TRUE) == TRUE, 1, 0)
m1_results[i,12:17] <- c("Stock",0,0,1,1,0)
i <- i + 1
    }
}
```

### Net Models, GLM, All years, country-level, year dummy

```{r models18, warning = FALSE}
i = 289
for (dv in dvs_c) {
    for (var in 1:16) {
model <- paste0("m1_",dv, "_", vars[var])
x <- lmer(paste0(dv,
               " ~ ",
               "netmigpct + ",
               vars2[var], 
               "factor(year) + (1 | iso_country)"), data = t18m, na.action = "na.omit")
m1_results[i,1] <- model
m1_results[i,2] <- dv
m1_results[i,3] <- round(summary(x)$coefficients[2,1],5)
m1_results[i,4] <- round(summary(x)$coefficients[2,5],5)
m1_results[i,5:6] <- c("glm","All")
m1_results[i,7] <- ifelse(grepl("unemp",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m1_results[i,8] <- ifelse(grepl("socx",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m1_results[i,9] <- ifelse(grepl("gdp",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m1_results[i,10] <- ifelse(grepl("stock",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m1_results[i,11] <- 1
m1_results[i,12:17] <- c("Flow",0,0,1,1,0)
i <- i + 1
    }
}
```

### Stock Models, GLM, country-level, year dummy, 1996 & 2006 only

```{r models19, warning = FALSE}
t18m_9606 <- subset(t18m, year != 2016)

i = 193
for (dv in dvs_c) {
    for (var in 1:16) {
model <- paste0("m1_",dv, "_", vars[var])
y <- vars[var]
x <- lmer(paste0(dv,
               " ~ ",
               "migstock_un + ",
               vars[var], 
               "factor(year) + (1 | iso_country)"), data = t18m_9606, na.action = "na.omit")
m2_results[i,1] <- model
m2_results[i,2] <- dv
m2_results[i,3] <- round(summary(x)$coefficients[2,1],5)
m2_results[i,4] <- round(summary(x)$coefficients[2,5],5)
m2_results[i,5:6] <- c("glm","w96_06")
m2_results[i,7] <- ifelse(grepl("unemp",vars[var], fixed = TRUE) == TRUE, 1, 0)
m2_results[i,8] <- ifelse(grepl("socx",vars[var], fixed = TRUE) == TRUE, 1, 0)
m2_results[i,9] <- ifelse(grepl("gdp",vars[var], fixed = TRUE) == TRUE, 1, 0)
m2_results[i,10] <- 1
m2_results[i,11] <- ifelse(grepl("netmig",vars[var], fixed = TRUE) == TRUE, 1, 0)
m2_results[i,12:17] <- c("Stock",0,0,1,1,0)
i <- i + 1
    }
}
```

### Net Models, GLM, country-level, year dummy, 1996 & 2006 only

```{r models20, warning = FALSE}
i = 289
for (dv in dvs_c) {
    for (var in 1:16) {
model <- paste0("m1_",dv, "_", vars2[var])
y <- vars2[var]
x <- lmer(paste0(dv,
               " ~ ",
               "netmigpct + ",
               vars2[var], 
               "factor(year) + (1 | iso_country)"), data = t18m_9606, na.action = "na.omit")
m2_results[i,1] <- model
m2_results[i,2] <- dv
m2_results[i,3] <- round(summary(x)$coefficients[2,1],5)
m2_results[i,4] <- round(summary(x)$coefficients[2,5],5)
m2_results[i,5:6] <- c("glm","w96_06")
m2_results[i,7] <- ifelse(grepl("unemp",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m2_results[i,8] <- ifelse(grepl("socx",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m2_results[i,9] <- ifelse(grepl("gdp",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m2_results[i,10] <- ifelse(grepl("stock",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m2_results[i,11] <- 1
m2_results[i,12:17] <- c("Flow",0,0,1,1,0)
i <- i + 1
    }
}
```

### Stock Models, GLM, country-level, year dummy, 2006 & 2016 only

```{r models21, warning = FALSE}
t18m_0616 <- subset(t18m, year != 1996)

i = 193
for (dv in dvs_c) {
    for (var in 1:16) {
model <- paste0("m1_",dv, "_", vars[var])
y <- vars[var]
x <- lmer(paste0(dv,
               " ~ ",
               "migstock_un + ",
               vars[var], 
               "factor(year) + (1 | iso_country)"), data = t18m_0616, na.action = "na.omit")
m3_results[i,1] <- model
m3_results[i,2] <- dv
m3_results[i,3] <- round(summary(x)$coefficients[2,1],5)
m3_results[i,4] <- round(summary(x)$coefficients[2,5],5)
m3_results[i,5:6] <- c("glm","w06_16")
m3_results[i,7] <- ifelse(grepl("unemp",vars[var], fixed = TRUE) == TRUE, 1, 0)
m3_results[i,8] <- ifelse(grepl("socx",vars[var], fixed = TRUE) == TRUE, 1, 0)
m3_results[i,9] <- ifelse(grepl("gdp",vars[var], fixed = TRUE) == TRUE, 1, 0)
m3_results[i,10] <- 1
m3_results[i,11] <- ifelse(grepl("netmig",vars[var], fixed = TRUE) == TRUE, 1, 0)
m3_results[i,12:17] <- c("Stock",0,0,1,1,0)
i <- i + 1
    }
}
```

### Net Models, GLM, country-level, year dummy, 2006 & 2016 only

```{r models22, warning = FALSE}
i = 289
for (dv in dvs_c) {
    for (var in 1:16) {
model <- paste0("m1_",dv, "_", vars2[var])
y <- vars2[var]
x <- lmer(paste0(dv,
               " ~ ",
               "netmigpct + ",
               vars2[var], 
               "factor(year) + (1 | iso_country)"), data = t18m_0616, na.action = "na.omit")
m3_results[i,1] <- model
m3_results[i,2] <- dv
m3_results[i,3] <- round(summary(x)$coefficients[2,1],5)
m3_results[i,4] <- round(summary(x)$coefficients[2,5],5)
m3_results[i,5:6] <- c("glm","w06_16")
m3_results[i,7] <- ifelse(grepl("unemp",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m3_results[i,8] <- ifelse(grepl("socx",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m3_results[i,9] <- ifelse(grepl("gdp",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m3_results[i,10] <- ifelse(grepl("stock",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m3_results[i,11] <- 1
m3_results[i,12:17] <- c("Flow",0,0,1,1,0)
i <- i + 1
    }
}
```

### Stock Models, GLM, All years, no year dummy

```{r models23, warning = FALSE}
i = 193
for (dv in dvs_c) {
    for (var in 1:16) {
model <- paste0("m1_",dv, "_", vars[var])
x <- lmer(paste0(dv,
               " ~ ",
               "migstock_un + ",
               vars[var], 
               "(1 | iso_country)"), data = t18m, na.action = "na.omit")
m4_results[i,1] <- model
m4_results[i,2] <- dv
m4_results[i,3] <- round(summary(x)$coefficients[2,1],5)
m4_results[i,4] <- round(summary(x)$coefficients[2,5],5)
m4_results[i,5:6] <- c("glm","All")
m4_results[i,7] <- ifelse(grepl("unemp",vars[var], fixed = TRUE) == TRUE, 1, 0)
m4_results[i,8] <- ifelse(grepl("socx",vars[var], fixed = TRUE) == TRUE, 1, 0)
m4_results[i,9] <- ifelse(grepl("gdp",vars[var], fixed = TRUE) == TRUE, 1, 0)
m4_results[i,10] <- 1
m4_results[i,11] <- ifelse(grepl("netmig",vars[var], fixed = TRUE) == TRUE, 1, 0)
m4_results[i,12:17] <- c("Stock",0,0,0,1,0)
i <- i + 1
    }
}
```

### Net Models, GLM, All years, no year dummy

```{r models24, warning = FALSE}
i = 289
for (dv in dvs_c) {
    for (var in 1:16) {
model <- paste0("m1_",dv, "_", vars[var])
x <- lmer(paste0(dv,
               " ~ ",
               "netmigpct + ",
               vars2[var], 
               "(1 | iso_country)"), data = t18m, na.action = "na.omit")
m4_results[i,1] <- model
m4_results[i,2] <- dv
m4_results[i,3] <- round(summary(x)$coefficients[2,1],5)
m4_results[i,4] <- round(summary(x)$coefficients[2,5],5)
m4_results[i,5:6] <- c("glm","All")
m4_results[i,7] <- ifelse(grepl("unemp",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m4_results[i,8] <- ifelse(grepl("socx",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m4_results[i,9] <- ifelse(grepl("gdp",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m4_results[i,10] <- ifelse(grepl("stock",vars2[var], fixed = TRUE) == TRUE, 1, 0)
m4_results[i,11] <- 1
m4_results[i,12:17] <- c("Flow",0,0,0,1,0)
i <- i + 1
    }
}
```

## Explaining Variance in Multiverse

```{r dataprep, warning = FALSE}
multi_df <- as.data.frame(rbind(m1_results, m2_results, m3_results, m4_results))

multi_df <- multi_df %>%
    mutate(ols = ifelse(mator == "ols", 1, 0),
           sample = factor(sample),
           otherdv = ifelse(stock == 1 & netmig == 1, 1, 0),
           AME = as.numeric(AME))

save(multi_df, file = here::here("results/multi_df.Rda"))
```

```{r multimodels, warning = FALSE}
load(file = here::here("results/multi_df.Rda"))

# m0 <- lm(AME ~ 1, data = multi_df)
m1 <- lm(AME ~ factor(dv) + factor(AME_type), data = multi_df)
m2 <- lm(AME ~ factor(dv) + factor(AME_type) + factor(sample), data = multi_df)
m3 <- lm(AME ~ factor(dv) + factor(AME_type) + factor(sample) + unemp + socx + gdp + otherdv, data = multi_df)
m4 <- lm(AME ~ factor(dv) + factor(AME_type) + factor(sample) + unemp + socx + gdp + otherdv + factor(mator), data = multi_df)
m5 <- lm(AME ~ factor(dv) + factor(AME_type) + factor(dv)*factor(AME_type) + factor(sample) + unemp + socx + gdp + otherdv + factor(mator), data = multi_df)

expvar <- matrix(nrow = 5, ncol = 3)

expvar[,1] <- c("m1","m2","m3","m4","m5")
expvar[,2] <- c("DVs + AME type","+ sample", "+ IVs", "+ other DV + Estimator", "DV*Type interaction")
expvar[,3] <- c(summary(m1)[["r.squared"]], summary(m2)[["r.squared"]], summary(m3)[["r.squared"]], summary(m4)[["r.squared"]], summary(m5)[["r.squared"]])

write.csv(expvar, file = here::here("results/TblS8.csv"))
```

```{r plotms}
sjPlot::tab_model(m3, m4, m5, show.ci = FALSE, p.style = "stars")
```

## Dredge Function

Given computing times, we start with smaller sets of variables and then eliminate those that do not appear in the top fitting models (AIC/LL). 

We also import results from the dredge script to reduce reproduction computing times from [`dredge.R`](../code/script/). 

And for the R&R suggestion of running all interactions from [`dredge_v2.R`](../code/script/)

```{r loaddredge}
load(file = here::here("code/script/dredge.Rdata"))
```


## Final Model

### First Iteration

Used to explain variance reduction - compare with Fig 3 in the main text and Table S6. These variables were selected from iterating dredge.

```{r final, warning = FALSE}

# main results
m_dredged <- lmer(AME_Z ~ Jobs + IncDiff + House + Stock + ChangeFlow + Stock*IncDiff + w2016 + orig13 +  eeurope + twowayfe + level_cyear + twowayfe*Jobs + twowayfe*Stock + Jobs*Stock + (1 | team), data = cri_ml)

# combining main results with m13 results to test for a better model
m_dredged_m13 <- lmer(AME_Z ~ Jobs + IncDiff + House + logit + ols + Stock + ChangeFlow + Stock*IncDiff + w1996 + w2006 + w2016 + w2006*w2016 + orig13  + eeurope + allavailable + twowayfe + level_cyear+ total_score + twowayfe*Jobs + twowayfe*Stock + Jobs*Stock + (1 | team), data = cri_ml)

# this now becomes part of the new TblS10 along with the results of the all interactions dredge (v2, see below)


```

### Reviewer suggested - try all interactions

Here we calculated every interaction pair leading to 5,565 interactions, this was reduced to 2,637 after removing those that had no variance, for example those who used a Bayes estimator did not use OLS so a Bayes\*OLS interaction produced all zeros. We see 7 variables as a maximum for testing the dredge function, again this function runs every combination possible of all 7 variables and all subsets. We tried 10 but too many matricies were rank deficient (more evidence that it is too many variables). The reader may think that more than 7 variables to explain the outcomes is warranted, but **none** of our models showed good fit statistics with more than two interaction variables in the model. Next we had a random seed select sets of 7 variables out of the 2,637 to test. We iterated this 1000 times leading to 1,028,000 models. We then selected the best two fitting models based on AIC from each of the 1000 iterations. From these we extracted all variables used in all those best fitting models. After removing duplicates this resulted in 19 variables. We then re-ran the same process using only these 19 variables and 100 iterations and the best fitting models from these iterations contained 4 variables. In the next code chunk we add these 4 variables to our existing 'best' models to see if they can explain more variance without reducing model fit. We should also point our here that most of these interactions produce unique identifications of team's models (i.e., used by only one team so not fit for putting in a regression although we do it anyway for thoroughness and to make the reviewer happy)

***But with caution**. While we understand the impetus to consider as much information as possible, we are only cautiously following this particular suggestion because potentially real information about how choices impact the outcomes ends up unidentified for statistical reasons. Many of our ‘substantive’ decision variables uniquely identify teams, which would amount to adding a fixed-effect for specific teams in the level-2 part of the equation. This would ‘steal’ the unexplained variance from that team at level-2 and put it into the explained variance. This violates the fundamental principle of general linear modeling that the residual (unexplained variance) is uncorrelated with the predictors in the model (see multilevel textbooks such as Hox 2010). In addition, the central limit theorem suggests that with 107 candidate variables for inclusion as predictors in our models, we should have at least 10 cases per variable to recover a measure of central tendency (most multilevel modelers suggest 30 or even 50 for a single level-2 test variable (or interaction), see review in Bryan and Jenkins 2016). That is a bare minimum. So that already accounts for 1,070 models. For us it is statistically inappropriate to include more variables and leads to over inflation of variance explained, or undermines the reliability of our results. Our algorithm starts to drop variables as we get over 25 variables in the model because of underidentification.
 
Bryan, Mark L., and Stephen P. Jenkins. 2016. ‘Multilevel Modelling of Country Effects: A Cautionary Tale’. **European Sociological Review** 32(1):3–22. doi: 10.1093/esr/jcv059.
 
Hox, Joop J. 2010. **Multilevel Analysis: Techniques and Applications**. 2nd Editio. New York: Routledge.


Results:

It identified one interaction that could improve fit and explained variance slightly. As this interaction occurs in more than one team, we elect to keep it. 

They are:
(first dredge)
Stock*Jobs
twowayfe*Stock
(second dredge)
cluster_any*gdp_ivC
ols*unbalpanel
Unemp*publice_iv
w1996*conservatism_ivC


```{r dredge_v2a}
dredge_v2 <- read_csv(here::here("code", "script", "dredge_best_auto.csv"))

# 
m_dredged1 <- lmer(AME_Z ~ Jobs + IncDiff + House + Stock + ChangeFlow + Stock*IncDiff + w2016 + orig13 +  eeurope + twowayfe + level_cyear + twowayfe*Jobs + twowayfe*Stock + Jobs*Stock + cluster_any*gdp_ivC + (1 | team), data = cri_ml)

m_dredged2 <- lmer(AME_Z ~ Jobs + IncDiff + House + Stock + ChangeFlow + Stock*IncDiff + w2016 + orig13 +  eeurope + twowayfe + level_cyear + twowayfe*Jobs + twowayfe*Stock + Jobs*Stock + ols*unbalpanel + (1 | team), data = cri_ml)

m_dredged3 <- lmer(AME_Z ~ Jobs + IncDiff + House + Stock + ChangeFlow + Stock*IncDiff + w2016 + orig13 +  eeurope + twowayfe + level_cyear + twowayfe*Jobs + twowayfe*Stock + Jobs*Stock + Unemp*publice_iv + (1 | team), data = cri_ml)

m_dredged4 <- lmer(AME_Z ~ Jobs + IncDiff + House + Stock + ChangeFlow + Stock*IncDiff + w2016 + orig13 +  eeurope + twowayfe + level_cyear + twowayfe*Jobs + twowayfe*Stock + Jobs*Stock + w1996*conservatism_ivC + (1 | team), data = cri_ml)


# from Main Analysis (for comparison)
m13 <- lmer(AME_Z ~ Jobs + IncDiff + House+ logit + ols + Stock + ChangeFlow+ w1996 + w2006 + w2016 + w2006*w2016 + orig13  + eeurope + allavailable + twowayfe + level_cyear+ total_score + (1 | team), data = cri_ml)

# extract unexplained variance
md_1 <- as.data.frame(VarCorr(m13)) 
md_1_13 <- as.data.frame(VarCorr(m_dredged_m13))
md_2 <- as.data.frame(VarCorr(m_dredged))
md_3 <- as.data.frame(VarCorr(m_dredged1))
md_4 <- as.data.frame(VarCorr(m_dredged2))
md_5 <- as.data.frame(VarCorr(m_dredged3))
md_6 <- as.data.frame(VarCorr(m_dredged4))

sjPlot::tab_model(m13,m_dredged,m_dredged_m13,m_dredged1,m_dredged2,m_dredged3,m_dredged4, digits = 3, p.style = "stars", show.ci = F, show.loglik = T, show.aic = T, file = here::here("results/reg_dredge_v2.htm"))

sjPlot::tab_model(m13,m_dredged,m_dredged_m13,m_dredged1,m_dredged2,m_dredged3,m_dredged4, digits = 3, p.style = "stars", show.ci = F, show.loglik = T, show.aic = T)

#knitr::include_graphics(here::here("results","reg_dredge_v2.htm"))
```

### Table S10
```{r dredge_v2b, warning = F}

m_d_out <- as.data.frame(read_html(here::here("results/reg_dredge_v2.htm")) %>% html_table(fill=TRUE))

m_d_out[c(34:37),1] <- c("Residual Variance", "Team-Level","Model-Level","Total")

m_d_out[c(34:37),2:length(m_d_out[1,])] <- ""

m_d_out <- m_d_out[-40,]

m_d_outz <- as.data.frame(matrix(nrow=4, ncol = length(m_d_out[1,])))

m_d_out_f <- m_d_out
m_d_out_f[c(38:41),1] <- c("Explained Variance", "Team-Level","Model-Level","Total")
m_d_out_f[38:41,2:length(m_d_out[1,])] <- ""

m_d_out_f[c(42:46),] <- m_d_out[c(38:43),]

# calculate residual variances
## Team Level
m_d_out_f[35,2:length(m_d_out_f[1,])] <- round(as.numeric(c(md_1[1,4],md_2[1,4],md_1_13[1,4],md_3[1,4],md_4[1,4],
                    md_5[1,4],md_6[1,4])),6)

## Model Level
m_d_out_f[36,2:length(m_d_out_f[1,])] <- round(as.numeric(c(md_1[2,4],md_2[2,4],md_1_13[2,4],md_3[2,4],
                    md_4[2,4],md_5[2,4],md_6[2,4])),6)

## Total
m_d_out_f[37,2:length(m_d_out_f[1,])] <- as.numeric(m_d_out_f[35,2:length(m_d_out_f[1,])]) + as.numeric(m_d_out_f[36,2:length(m_d_out_f[1,])])

# calculate explained variance from empty model
m0 <- lmer(AME_Z ~ (1 | team), data = cri_ml)
m0v<- as.data.frame(VarCorr(m0))

totalv <- m0v[1,4] + m0v[2,4]

## Team
m_d_out_f[39,2:length(m_d_out_f[1,])] <- (rep(m0v[1,4],6) - as.numeric(m_d_out_f[35,2:length(m_d_out_f[1,])]))/rep(m0v[1,4],6)

## Model
m_d_out_f[40,2:length(m_d_out_f[1,])] <- (rep(m0v[2,4],6) - as.numeric(m_d_out_f[36,2:length(m_d_out_f[1,])]))/rep(m0v[2,4],6)

## Total
m_d_out_f[41,2:length(m_d_out_f[1,])] <- (rep(totalv,6) - as.numeric(m_d_out_f[37,2:length(m_d_out_f[1,])]))/rep(totalv,6)


# Save table
write.csv(m_d_out_f, file = here::here("results/TblS10.csv"))

```


Best computer selected model (based on AICc and log-lik) can explain slightly more model-level variance but less team-level variance. Cannot explain for of the overall variance. 

## Colophon

This file is part of [https://github.com/nbreznau/CRI](https://github.com/nbreznau/CRI), the reproduction materials for [*Observing Many Researchers using the Same Data and Hypothesis Reveals a Hidden Universe of Uncertainty*](https://doi.org/10.31222/osf.io/cd5j9).

```{r colophon, echo=FALSE}
sessionInfo()
```