forked from dgrtwo/data-screencasts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
bird-collisions.Rmd
117 lines (97 loc) · 3.32 KB
/
bird-collisions.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
---
title: "Bird Collisions"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
## R Markdown
```{r}
library(tidyverse)
theme_set(theme_light())
mp_light <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-04-30/mp_light.csv")
bird_collisions <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-04-30/bird_collisions.csv") %>%
left_join(mp_light, by = "date")
```
```{r}
bird_collisions %>%
ggplot(aes(date, fill = locality)) +
geom_histogram()
```
```{r}
bird_collisions %>%
gather(category, value, -date, -light_score) %>%
count(category, value, light_score_missing = is.na(light_score)) %>%
group_by(category) %>%
top_n(16, n) %>%
ungroup() %>%
mutate(value = fct_reorder(value, n, sum),
category = fct_reorder(category, n, length)) %>%
ggplot(aes(value, n, fill = light_score_missing)) +
geom_col() +
facet_wrap(~ category, scales = "free_y") +
coord_flip() +
labs(x = "# of collisions",
y = "",
fill = "Light score missing",
title = "Category breakdowns of collisions")
```
```{r}
bird_collisions %>%
filter(!is.na(light_score)) %>%
count(date, locality) %>%
ggplot(aes(n, color = locality)) +
geom_density() +
scale_x_log10() +
labs(x = "# of collisions per night")
bird_collisions %>%
filter(!is.na(light_score)) %>%
distinct(date, light_score) %>%
ggplot(aes(light_score)) +
geom_histogram()
geom_mean <- function(x) {
exp(mean(log(x + 1)) - 1)
}
by_day_mp <- bird_collisions %>%
filter(!is.na(light_score)) %>%
group_by(date, locality) %>%
summarize(collisions = n()) %>%
ungroup() %>%
complete(date, locality, fill = list(collisions = 0)) %>%
right_join(mp_light %>% crossing(locality = c("CHI", "MP")), by = c("date", "locality")) %>%
filter(date <= "2016-11-13") %>%
replace_na(list(collisions = 0)) %>%
mutate(locality = ifelse(locality == "CHI", "Greater Chicago", "McCormick Place"))
bootstrap_cis <- by_day_mp %>%
bootstraps(times = 1000) %>%
unnest(map(splits, as.data.frame)) %>%
group_by(light_score, locality, id) %>%
summarize(avg_collisions = geom_mean(collisions)) %>%
summarize(bootstrap_low = quantile(avg_collisions, .025),
bootstrap_high = quantile(avg_collisions, .975))
by_day_mp %>%
group_by(light_score, locality) %>%
summarize(avg_collisions = geom_mean(collisions),
nights = n()) %>%
ggplot(aes(light_score, color = locality)) +
geom_line(aes(y = avg_collisions)) +
geom_ribbon(aes(ymin = bootstrap_low, ymax = bootstrap_high),
data = bootstrap_cis,
alpha = .25) +
expand_limits(y = 0) +
labs(x = "Light score at McCormick place (higher means more lights on)",
y = "Geometric mean of the number of collisions",
title = "Brighter lights at McCormick place correlate with more bird collisions there, and not with Chicago overall",
subtitle = "Ribbon shows 95% bootstrapped percentile confidence interval",
color = "")
```
### Look at confounders
```{r}
library(lubridate)
bird_collisions %>%
filter(date >= "2005-01-01") %>%
count(month = month(date, label = TRUE), #floor_date(date, "month"),
locality) %>%
ggplot(aes(month, n, color = locality, group = locality)) +
geom_line()
```