-
Notifications
You must be signed in to change notification settings - Fork 5
/
ch7_interactions.Rmd
632 lines (516 loc) · 22.1 KB
/
ch7_interactions.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
---
title: "Chapter 7. Interactions"
output: github_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, comment = "#>", dpi = 500)
library(glue)
library(broom)
library(patchwork)
library(rethinking)
library(tidyverse)
library(conflicted)
conflict_prefer("filter", "dplyr")
conflict_prefer("select", "dplyr")
conflict_prefer("rename", "dplyr")
theme_set(theme_minimal())
source_scripts()
set.seed(0)
```
- so far, we have assumed that each predictor has an independent association with the mean of the outcome
* now we will look at conditioning this estimate on another predictor using *interactions*
- fitting a model with interactions is easy, but understanding them can be harder
## 7.1 Building an interaction
- for examples, we will use information on the economy countries and geographic properties
```{r}
data("rugged")
d <- rugged
d$log_gdp <- log(d$rgdppc_2000)
dd <- d[complete.cases(d$rgdppc_2000), ]
dd <- as_tibble(dd)
```
- one peculiarity is how the GDP of a country is associated to the ruggedness of the terrain.
* the association is opposite for African and non-African countries
```{r}
dd %>%
mutate(is_africa = ifelse(cont_africa == 1, "Africa", "non-Africa")) %>%
ggplot(aes(x = rugged, y = log_gdp)) +
facet_wrap(~ is_africa, nrow = 1, scales = "free") +
geom_smooth(method = "lm", formula = "y ~ x", color = "black", lty = 2) +
geom_point(color = "lightblue4")
```
### 7.1.1 Adding a dummy variable doesn't work
- two models to begin with:
1. linear regression of log-GDP on ruggedness
2. the same model with a dummy variable for the African nations
```{r}
m7_3 <- quap(
alist(
log_gdp ~ dnorm(mu, sigma),
mu <- a + bR*rugged,
a ~ dnorm(8, 100),
bR ~ dnorm(0, 1),
sigma ~ dunif(0, 10)
),
data = dd
)
m7_4 <- quap(
alist(
log_gdp ~ dnorm(mu, sigma),
mu <- a + bR*rugged + bA*cont_africa,
a ~ dnorm(8, 100),
bR ~ dnorm(0, 1),
bA ~ dnorm(0, 1),
sigma ~ dunif(0, 10)
),
data = dd
)
precis(m7_3)
precis(m7_4)
```
```{r}
compare(m7_3, m7_4)
```
- plot the posterior distribution mean and intervals for African countries and the rest
* we can see that the WAIC for the model with the dummy variable was lower because African countries tend to have lower GDP, not because it fit the different slope
```{r}
rugged_seq <- seq(-1, 8, 0.25)
mu_notafrica <- link(m7_4, data = data.frame(cont_africa = 0,
rugged = rugged_seq))
mu_africa <- link(m7_4, data = data.frame(cont_africa = 1,
rugged = rugged_seq))
mu_notafrica_mean <- apply(mu_notafrica, 2, mean)
mu_notafrica_pi <- apply(mu_notafrica, 2, PI, prob = 0.97) %>% pi_to_df()
mu_africa_mean <- apply(mu_africa, 2, mean)
mu_africa_pi <- apply(mu_africa, 2, PI, prob = 0.97) %>% pi_to_df()
bind_rows(
tibble(cont_africa = "not Africa",
rugged = rugged_seq,
mu = mu_notafrica_mean) %>%
bind_cols(mu_notafrica_pi),
tibble(cont_africa = "Africa",
rugged = rugged_seq,
mu = mu_africa_mean) %>%
bind_cols(mu_africa_pi)
) %>%
ggplot(aes(x = rugged)) +
geom_ribbon(aes(ymin = x2_percent, ymax = x98_percent, fill = cont_africa),
alpha = 0.2, color = NA) +
geom_line(aes(y = mu, color = cont_africa), size = 1.1) +
geom_point(data = dd, aes(y = log_gdp), size = 1, color = "lightblue4") +
scale_color_brewer(palette = "Set1") +
scale_fill_brewer(palette = "Set1") +
theme(legend.position = c(0.8, 0.8)) +
labs(x = "terrain ruggedness index",
y = "log GDP year 2000",
color = NULL, fill = NULL,
title = "Model of GDP by ruggedness with a dummy variable for continent",
subtitle = "One line for in and out of Africa with the 97% interval.")
```
### 7.1.2 Adding a linear interaction does work
- we have just used the model below:
$$
Y_i \sim \text{Normal}(\mu_i, \sigma) \\
\mu_i = \alpha + \beta_R R_i + \beta_A A_i
$$
- now we want to allow the relationship of $Y$ and $R$ to vary as a function of $A$
* add in $\gamma$ as a placeholder for another linear function that defines the slope between GDP and ruggedness
* this is the *linear interaction effect*
* explicitly modeling that the slope between GDP and ruggedness is *conditional upon* whether or not a nation is in Africa
$$
Y_i \sim \text{Normal}(\mu_i, \sigma) \\
\mu_i = \alpha + \gamma_i R_i + \beta_A A_i \\
\gamma_i = \beta_R + \beta_{AR}A_i
$$
- rearranging the above formula results in the following
$$
\mu_i = \alpha + \beta_R R_i + \beta_{AR} A_i R_i + \beta_A A_i
$$
- fit the model using `quap()` like normal
```{r}
m7_5 <- quap(
alist(
log_gdp ~ dnorm(mu, sigma),
mu <- a + gamma*rugged + bA*cont_africa,
gamma <- bR + bAR*cont_africa,
a ~ dnorm(8, 100),
c(bA, bR, bAR) ~ dnorm(0, 1),
sigma ~ dunif(0, 10)
),
data = dd
)
precis(m7_5)
plot(precis(m7_5))
```
- the new model is far better than the previous two
```{r}
compare(m7_3, m7_4, m7_5)
```
### 7.1.3 Plotting the interaction
- nothing new, just make two plots, one for African and one for non-African
```{r}
rugged_seq <- seq(-1, 8, 0.25)
mu_africa <- link(m7_5,
data = data.frame(cont_africa = 1, rugged = rugged_seq))
mu_africa_mean <- apply(mu_africa$mu, 2, mean)
mu_africa_pi <- apply(mu_africa$mu, 2, PI, prob = 0.97) %>% pi_to_df()
mu_notafrica <- link(m7_5,
data = data.frame(cont_africa = 0, rugged = rugged_seq))
mu_notafrica_mean <- apply(mu_notafrica$mu, 2, mean)
mu_notafrica_pi <- apply(mu_notafrica$mu, 2, PI, prob = 0.97) %>% pi_to_df()
bind_rows(
tibble(cont_africa = "not Africa",
rugged = rugged_seq,
mu = mu_notafrica_mean) %>%
bind_cols(mu_notafrica_pi),
tibble(cont_africa = "Africa",
rugged = rugged_seq,
mu = mu_africa_mean) %>%
bind_cols(mu_africa_pi)
) %>%
ggplot(aes(x = rugged)) +
geom_ribbon(aes(ymin = x2_percent, ymax = x98_percent, fill = cont_africa),
alpha = 0.2, color = NA) +
geom_line(aes(y = mu, color = cont_africa), size = 1.1) +
geom_point(data = dd, aes(y = log_gdp), size = 1, color = "lightblue4") +
scale_color_brewer(palette = "Set1") +
scale_fill_brewer(palette = "Set1") +
theme(legend.position = c(0.8, 0.8)) +
labs(x = "terrain ruggedness index",
y = "log GDP year 2000",
color = NULL, fill = NULL,
title = "Model of GDP by ruggedness with an interaction term for continent",
subtitle = "One line for in and out of Africa with the 97% interval.")
```
### 7.1.4 Interpreting the interaction estimate
- helpful to plot implied predictions
- often only numbers are reported, though they are difficult to interpret because:
* the parameters have different meanings because they are no longer independent
* it is very difficult to propagate the uncertainty when trying to understand multiple parameters simultaneously
- remember that the interaction term $y$ is a distribution
* we can sample from it for African and non-African countries
```{r}
post <- extract.samples(m7_5)
gamma_africa <- post$bR + post$bAR*1
gamma_notafrica <- post$bR + post$bAR*0
tibble(Africa = gamma_africa,
`not Africa` = gamma_notafrica) %>%
pivot_longer(everything()) %>%
ggplot(aes(x = value)) +
geom_density(aes(fill = name, color = name), alpha = 0.3) +
scale_color_brewer(palette = "Set1") +
scale_fill_brewer(palette = "Set1") +
labs(x = "posterior estimates of gamma",
y = "probability density",
title = "Probability distribution for the estimates of gamma",
subtitle = "Gamma is the slope of the interaction term")
```
- can use these estimates like normal:
* e.g.: what is the probability that the slope within Africa is less than the slope outside of Africa
```{r}
# The probability that the slope within Africa is less than that outside.
sum(gamma_africa < gamma_notafrica) / length(gamma_africa)
```
## 7.2 Symmetry of the linear interaction
- the interaction term we have fit has two different phrasings:
1. "How much does the influence of ruggedness (on GDP) depend upon whether the nation is in Africa?"
2. How much does the influence of being in Africa (on GDP) depend upon ruggedness?
- the model interprets these as the same statement
7.2.1 Buridan's interaction
- the model's formula can be rearranged
* the same model can be reformulated to group the $A_i$ terms together
* shows that *linear interactions are symmetric*
$$
Y_i \sim \text{Normal}(\mu_i, \sigma) \\
\mu_i = \alpha + \gamma_i R_i + \beta_A A_i \\
\gamma_i = \beta_R + \beta_{AR}A_i \\
\ \\
\mu_i = \alpha + (\beta_R + \beta_{AR}A_i) R_i + \beta_A A_i \\
\mu_i = \alpha + \beta_R R_i + \beta_{AR}A_i R_i + \beta_A A_i \\
\ \\
\mu_i = \alpha + \beta_R R_i + (\beta_{AR} R_i + \beta_A) A_i \\
$$
### 7.2.2 Africa depends upon ruggedness
- below is a plot of the reverse interpretation of the interaction term
* the x-axis is now whether the country is in Africa
* the points are the ruggedness separated by high and low (using the median as the cut-off)
* the blue slope is the expected reduction in log GDP for a non-rugged terrain if it was moved to Africa
* for countries in very rugged terrains, the continent has little effect
```{r}
q_rugged <- range(dd$rugged)
mu_ruggedlo <- link(m7_5,
data = data.frame(rugged = q_rugged[1],
cont_africa = 0:1))
mu_ruggedlo_mean <- apply(mu_ruggedlo$mu, 2, mean)
mu_ruggedlo_pi <- apply(mu_ruggedlo$mu, 2, PI) %>% pi_to_df()
mu_ruggedhi <- link(m7_5,
data = data.frame(rugged = q_rugged[2],
cont_africa = 0:1))
mu_ruggedhi_mean <- apply(mu_ruggedhi$mu, 2, mean)
mu_ruggedhi_pi <- apply(mu_ruggedhi$mu, 2, PI) %>% pi_to_df()
bind_rows(
tibble(rugged = "low",
cont_africa = 0:1,
name = "low",
mu_rugged = mu_ruggedlo_mean) %>%
bind_cols(mu_ruggedlo_pi),
tibble(rugged = "high",
cont_africa = 0:1,
mu_rugged = mu_ruggedhi_mean) %>%
bind_cols(mu_ruggedhi_pi)
) %>%
ggplot(aes(x = cont_africa)) +
geom_ribbon(aes(ymin = x5_percent, ymax = x94_percent,
fill = factor(rugged)), alpha = 0.2) +
geom_line(aes(y = mu_rugged, color = factor(rugged))) +
geom_jitter(
data = dd,
aes(x = cont_africa, y = log_gdp,
color = ifelse(rugged < median(rugged), "low", "high")),
alpha = 0.4, width = 0.03
) +
scale_color_brewer(palette = "Set1") +
scale_fill_brewer(palette = "Set1") +
scale_x_continuous(breaks = c(0, 1), labels = c("not Africa", "Africa")) +
labs(x = NULL,
y = "log GDP year 2000",
fill = "ruggedness", color = "ruggedness",
title = "Symmetric interpretation of the interaction term")
```
- it is simultaneously true that:
1. the influence of ruggedness depends on the continent
2. the influence of continent depends in the ruggedness
## 7.3 Continuous interactions
- interaction effects are difficult to interpret
* especially with only tables of posterior means and std. devs.
* we will look at using a *triptych plot* to understand these effects
- it is very important to center and standardize the predictors when using interactions
### 7.3.1 The data
- for examples we will use sizes of blooms from beds of tulips grown in greenhouses with different soil and light
* we are interested in the interaction of light and water
```{r}
data("tulips")
d <- as_tibble(tulips)
str(d)
```
### 7.3.2 The un-centered models
- the author demonstrates fitting and interpreting the models without centering the data
### 7.3.3 Center and re-estimate
```{r}
d$shade_c <- d$shade - mean(d$shade)
d$water_c <- d$water - mean(d$water)
```
- fit two models for predicting number of blooms with water and shade
* the second model includes their interactions
* starting places for the fitting are provided because the priors are very flat
```{r}
m7_8 <- quap(
alist(
blooms ~ dnorm(mu, sigma),
mu <- a + bW*water_c + bS*shade_c,
a ~ dnorm(130, 100),
c(bW, bS) ~ dnorm(0, 100),
sigma ~ dunif(0, 100)
),
data = d,
start = list(a = mean(d$blooms),
bW = 0, bS = 0,
sigma = sd(d$blooms))
)
m7_9 <- quap(
alist(
blooms ~ dnorm(mu, sigma),
mu <- a + bW*water_c + bS*shade_c + bWS*water_c*shade_c,
a ~ dnorm(130, 100),
c(bW, bS, bWS) ~ dnorm(0, 100),
sigma ~ dunif(0, 100)
),
data = d,
start = list(a = mean(d$blooms),
bW = 0, bS = 0, bWS = 0,
sigma = sd(d$blooms))
)
plot(precis(m7_8))
plot(precis(m7_9))
coeftab(m7_8, m7_9)
compare(m7_8, m7_9)
```
- the main effects are the same between the two models
* if the predictors were not centered, the coefficient for shade in the model with the interaction would have been positive
- with the data centered, the intercept has meaning - it is the mean number of blooms
- interpretations of the estimates for the model with the interaction term `m7_9`:
* $\alpha$: the expected value of blooms when both water and shade are at their average values (0 from centering)
* $\beta_W$: the expected change in blooms when water increases by 1 unit and shade is at its average value
* $\beta_S$: the expected change in blooms when shade increases by 1 unit and water is at its average value
* $\beta_{WS}$: the interaction term has multiple interpretations:
- the expected change in the influence of water on blooms when increasing shade by one unit
- the expected change in the influence of shade on blooms when increasing water by one unit
### 7.3.4 Plotting implied predictions
- make a *triptych plot* to help understand interactions:
* plot the bivariate relationship between shade and blooms
* each of the 3 plots will show predictions for different values of water
```{r}
shade_seq <- seq(-1, 1)
purrr::map(seq(-1, 1), function(w) {
dt <- d[d$water_c == w, ]
pred_data <- tibble(water_c = w, shade_c = shade_seq)
mu <- link(m7_9, data = pred_data)
pred_data %>%
mutate(mu_mean = apply(mu, 2, mean)) %>%
bind_cols(apply(mu, 2, PI) %>% pi_to_df())
}) %>%
bind_rows() %>%
ggplot(aes(x = shade_c)) +
facet_wrap(~ water_c) +
geom_ribbon(aes(ymin = x5_percent, ymax = x94_percent), alpha = 0.2) +
geom_line(aes(y = mu_mean)) +
geom_point(data = d, aes(y = blooms)) +
labs(x = "shade (centered)",
y = "blooms",
title = "Effect of amount of light on impact of shade on blooms",
subtitle = "Each panel is a different level of light.")
```
- this makes the interaction of the effects of shade and water easily understandable
* plot on the left: when there is little water, the impact of shade is negligible
* plot on the right: when there is plenty of water, the shade has a larger impact on the number of blooms
7.4 Interactions in design formulas
- mathematical formula with an interaction between $x$ and $z$:
$$
y_i \sim \text{Normal}(\mu_i, \sigma) \\
\mu_i = \alpha + \beta_x x_i + \beta_z z_i + \beta_{xz} x_i z_i
$$
```r
# Two equivalent model specifications.
m7_x <- lm(y ~ x + z + x*z, data = d)
m7_x <- lm(y ~ x*z, data = d)
```
- can also remove main effects by subtracting them out
* useful when we know *a priori* there is know effect of $z$ on $y$
```r
m7_x <- lm(y ~ x + x*z - z, data = d)
```
- can use the following trick to see how R is interpreting a formula
```{r}
x <- z <- w <- 1
colnames(model.matrix(~ x*z*w))
```
## 7.6 Practice
### Hard.
**7H1. Return to the data(tulips) example in the chapter. Now include the bed variable as a predictor in the interaction model. Don’t interact bed with the other predictors; just include it as a main effect. Note that bed is categorical. So to use it properly, you will need to either construct dummy variables or rather an index variable, as explained in Chapter 6.**
```{r}
# Add bed as a set of dummy variables.
# Bed "a" will be part of the intercept.
beds_dummy <- model.matrix(~ bed, data = d) %>%
as.matrix() %>%
as.data.frame() %>%
set_names(paste0("bed_", letters[1:3]))
dd <- bind_cols(d, beds_dummy)
m_7h1 <- quap(
alist(
blooms ~ dnorm(mu, sigma),
mu <- a + bW*water_c + bS*shade_c + bWS*water_c*shade_c + bb*bed_b + bc*bed_c,
a ~ dnorm(130, 100),
c(bW, bS, bWS, bb, bc) ~ dnorm(0, 50),
sigma ~ dunif(0, 100)
),
data = dd,
start = list(a = mean(d$blooms),
bW = 0, bS = 0, bWS = 0,
bb = 0, bc = 0,
sigma = sd(d$blooms))
)
precis(m_7h1)
```
**7H2. Use WAIC to compare the model from 7H1 to a model that omits bed. What do you infer from this comparison? Can you reconcile the WAIC results with the posterior distribution of the bed coefficients?**
```{r}
compare(m_7h1, m7_9)
```
It seems that the model that includes the bed information produced a better model.
However, the improvement was not overwhelming as the difference in WAIC value is less than the standard error of the WAIC values and the weights are split about 75:25.
The beds seem to hold some information, perhaps some sort of batch effect.
From the standard deviation and 89% interval of these coefficients, they are not very informative, given the other variables.
**7H3. Consider again the data(rugged) data on economic development and terrain ruggedness, examined in this chapter. One of the African countries in that example, Seychelles, is far outside the cloud of other nations, being a rare country with both relatively high GDP and high ruggedness. Seychelles is also unusual, in that it is a group of islands far from the coast of mainland Africa, and its main economic activity is tourism.**
**One might suspect that this one nation is exerting a strong influence on the conclusions. In this problem, I want you to drop Seychelles from the data and re-evaluate the hypothesis that the relationship of African economies with ruggedness is different from that on other continents.**
**(a) Begin by using map to fit just the interaction model:**
$$
y_i \sim \text{Normal}(\mu_i, \sigma) \\
\mu_i = \alpha + \beta_A A_i + \beta_R R_i + \beta_{AR} A_i R_i
$$
**where $y$ is log GDP per capita in the year 2000 (log of `rgdppc_2000`); $A$ is `cont_africa`, the dummy variable for being an African nation; and $R$ is the variable `rugged`. Choose your own priors. Compare the inference from this model fit to the data without Seychelles to the same model fit to the full data. Does it still seem like the effect of ruggedness depends upon continent? How much has the expected relationship changed?**
```{r}
d <- rugged
d$log_gdp <- log(d$rgdppc_2000)
dd <- d[complete.cases(d$rgdppc_2000), ]
dd <- as_tibble(dd)
m7h3_1 <- quap(
alist(
log_gdp ~ dnorm(mu, sigma),
mu <- a + bA*cont_africa + bR*rugged + bAR*cont_africa*rugged,
a ~ dnorm(0, 20),
c(bA, bR, bAR) ~ dnorm(0, 10),
sigma ~ dunif(0, 10)
),
data = dd
)
dd_noS <- dd %>% filter(country != "Seychelles")
m7h3_2 <- quap(
alist(
log_gdp ~ dnorm(mu, sigma),
mu <- a + bA*cont_africa + bR*rugged + bAR*cont_africa*rugged,
a ~ dnorm(0, 20),
c(bA, bR, bAR) ~ dnorm(0, 10),
sigma ~ dunif(0, 10)
),
data = dd_noS
)
precis(m7h3_1)
precis(m7h3_2)
```
Removing Seychelles from the data reduced the MAP for the coefficient for continent $\beta_A$ and the interaction term $\beta_{AR}$.
This suggests that Seychelles did have a relatively large impact on the effect from being in Africa and the interaction between being in Africa and terrain ruggedness.
**(b) Now plot the predictions of the interaction model, with and without Seychelles. Does it still seem like the effect of ruggedness depends upon continent? How much has the expected relationship changed?**
```{r}
rugged_seq <- seq(-1, 8, 0.25)
plot_effect_of_africa <- function(mdl, original_data) {
mu_africa <- link(mdl,
data = data.frame(cont_africa = 1,
rugged = rugged_seq))
mu_africa_mean <- apply(mu_africa, 2, mean)
mu_africa_pi <- apply(mu_africa, 2, PI, prob = 0.97) %>% pi_to_df()
mu_notafrica <- link(mdl,
data = data.frame(cont_africa = 0,
rugged = rugged_seq))
mu_notafrica_mean <- apply(mu_notafrica, 2, mean)
mu_notafrica_pi <- apply(mu_notafrica, 2, PI, prob = 0.97) %>% pi_to_df()
bind_rows(
tibble(cont_africa = "not Africa",
rugged = rugged_seq,
mu = mu_notafrica_mean) %>%
bind_cols(mu_notafrica_pi),
tibble(cont_africa = "Africa",
rugged = rugged_seq,
mu = mu_africa_mean) %>%
bind_cols(mu_africa_pi)
) %>%
ggplot(aes(x = rugged)) +
geom_ribbon(aes(ymin = x2_percent, ymax = x98_percent,
fill = cont_africa),
alpha = 0.2, color = NA) +
geom_line(aes(y = mu, color = cont_africa), size = 1.1) +
geom_point(data = original_data, aes(y = log_gdp),
size = 1, color = "lightblue4") +
scale_color_brewer(palette = "Set1") +
scale_fill_brewer(palette = "Set1") +
theme(legend.position = c(0.8, 0.8)) +
labs(x = "terrain ruggedness index",
y = "log GDP year 2000",
color = NULL, fill = NULL,
title = "Model of GDP by ruggedness with an interaction term for continent",
subtitle = "One line for in and out of Africa with the 97% interval.")
}
plot_effect_of_africa(m7h3_1, dd)
plot_effect_of_africa(m7h3_2, dd) +
labs(subtitle = "One line for in and out of Africa with the 97% interval; without Seychelles.")
```
The angle of the red line decreases when Seychelles is removed.
This suggests that Seychelles increased the impact of being in Africa as terrain ruggedness increased (and similarly for the symmetric interpretation of the interaction term).