index.Rmd

---
title: "Chicago Community Investment Exploration"
author: Loren Hinkson
output:
    html_document:
      df_print: paged
      code_folding: hide
---

```{r Housekeeping, echo=TRUE, message=FALSE, warning=FALSE, include=FALSE}
# Housekeeping (clear workspace, start output file, load packages)
# rm(list=ls())

# data cleaning, augmenting, manipulation
library(tidyverse)
library(dplyr)
library(haven)
# library(stargazer)
library(xtable)
library(readxl)
# library(astsa)
library(Hmisc)
library(tidycensus)
library(lubridate)
library(here)

# plotting tools
library(gridExtra)
library(ggplot2)
library(grid)
library(ggrepel)
library(gghighlight)
library(ggalt)
library(scales)
library(extrafont)
# font_import()
# loadfonts()
library(swatches)
library(ggbeeswarm)
library(cowplot)

# mapping tools
library(raster)
library(rgdal)
library(rgeos)
library(maptools)
library(tigris)
library(ggmap)
library(sf)
```

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, cache=TRUE)
```


```{r include = FALSE}
monochromatic_C6C3FF <- c(
"#7d77ff",
"#9690ff",
"#aeaaff",
"#c6c3ff",
"#deddff",
"#f6f6ff")

diverging_teal <- c(
                    # 'navy' = '#2d3d61',
                    'darkteal' = '#2d4f5a',
                    'forest' = '#2d615f',
                    'plankton' = '#2d706e',
                    'ocean' = '#318791',
                    'mauve' = '#996768',
                    'cocoa' = '#b5725a',
                    'honeymustard' = '#d59349',
                    'goldenrod' = '#edb834',
                    'darkgray' = '#B5B5B5',
                    'palegray' = '#E5E5E5')

```


```{r include=FALSE}
# make it easy to select colors by name, thanks Dr. Simon J!
# Attribution: https://drsimonj.svbtle.com/creating-corporate-colour-palettes-for-ggplot2
get_dt_cols <- function(...) {
  cols <- c(...)
  if (is.null(cols))
    return (diverging_teal)
  diverging_teal[cols]
}
```

```{r include=FALSE}
# set groupings of colors as palettes
dt_palettes <- list(
  `main`  = get_dt_cols("ocean", "mauve", "darkteal", "goldenrod"),
  `diverging` = get_dt_cols("darkteal", "goldenrod", "forest", "honeymustard", "ocean", "mauve"),
  `cool`  = get_dt_cols("darkteal", "forest", "plankton", "ocean"),
  `desert`   = get_dt_cols("mauve", "goldenrod", "cocoa", "honeymustard"),
  `mixed` = get_dt_cols("mauve", "ocean", "honeymustard", "forest", "cocoa", "plankton", "honeymustard"),
  `grey`  = get_dt_cols("palegray", "darkgray")
)
```


```{r include=FALSE}
# function for uilitizing palettes
dt_pal <- function(palette = "main", reverse = FALSE, shift=0,...) {
  pal <- dt_palettes[[palette]]
  
    if (shift > 0) {
    pal = pal[1+shift:length(pal)]
  }
  
  if (reverse) pal <- rev(pal)
  
  # pass alpha, other color arguments to colorRampPalette
  colorRampPalette(pal, ...)
}

# function for applying palettes to borders and fills in ggplot2
scale_color_dt <- function(palette = "main", discrete = TRUE, reverse = FALSE, shift=0, ...) {
  pal <- dt_pal(palette = palette, reverse = reverse, shift=shift)
  
  if (discrete) {
    discrete_scale("colour", paste0("dt_", palette), palette = pal, ...)
  } else {
    scale_color_gradientn(colours = pal(256), guide="colourbar", ...)
  }
}

scale_fill_dt <- function(palette = "main", discrete = TRUE, reverse = FALSE, ...) {
  pal <- dt_pal(palette = palette, reverse = reverse)

  if (discrete) {
    discrete_scale("fill", paste0("dt_", palette), palette = pal, guide="colourbar", ...)
  } else {
    scale_fill_gradientn(colours = pal(256), ...)
  }
}
```


```{r set_theme, message=FALSE, warning=FALSE, include=FALSE}
theme_modest <- function(base_size = 16,
                                  base_family = "Roboto",
                                  base_line_size = base_size / 25,
                                  base_rect_size = base_size / 25) {
  require(grid)
  theme_minimal(base_size = base_size, 
                base_family = base_family,
                base_line_size = base_line_size)  %+replace%
    theme(
        axis.title = element_text(size = rel(0.75)),
        axis.title.y = element_text(margin=unit(c(0,0,0,2),"lines"), size=rel(1.25)),
        axis.title.x = element_text(margin=unit(c(2,0,0,0), "lines")),
        axis.text = element_text(size = rel(0.75)),
        axis.ticks=element_line(colour="grey", size=0.5),
        panel.grid.major = element_line(colour="grey", size=0.4),
        panel.grid.minor = element_line(colour="lightgrey", size=0.25),
        legend.box = NULL, 
        legend.title = element_text(size = rel(1)),
        legend.text = element_text(size = rel(0.5)),
        legend.key.height = NULL,
        legend.key.width = NULL,
        legend.key.size = unit(20, "pt"),
        legend.key = element_rect(fill = NA, colour = NA),
        legend.background = element_rect(colour = NA, fill=NA), 
        legend.position = "right",
        legend.justification = "center", 
        plot.background = element_rect(colour = NA, fill="#fffbe6"),
        plot.margin = unit(c(3,3,3,3),"lines"),
        plot.title = element_text(face = "bold", hjust="0.5",
                                  margin =unit(c(0,0,1,0), "lines"), family="Ledger"),
        plot.subtitle = element_text(size = rel(0.9), hjust="0.5",  
                                     margin = unit(c(0,0,2,0), "lines"), family="Ledger"),
        plot.caption = element_text(size = rel(0.75), hjust = 1, 
                                    margin = unit(c(2,0,0,0), "lines")),
        strip.background = element_rect(colour = NA, fill=NA),
        strip.text = element_text(face = "bold", size = rel(0.65), family = "Ledger"),
        panel.spacing = unit(15,"lines"),
        panel.border=element_blank(),
        complete = TRUE
    )
}
  
  
theme_map_modest <- function(base_size = 12) {
  require(grid)
  theme_modest(base_size) %+replace%
    theme(
        axis.title.y = element_blank(),
        axis.title.x = element_blank(),
        axis.text = element_blank(),
        axis.ticks=element_blank(),
        panel.grid.minor.y=element_blank(),
        panel.grid.minor.x=element_blank(),
        panel.grid.major.x=element_blank(),
        panel.border=element_blank(),
        panel.spacing = unit(0,"lines"),
        plot.margin  =  unit(c(10,10,10,10),"lines")
    )
}

theme_set(theme_modest(base_size = 12))
```


```{r import-licenses, message=FALSE, warning=FALSE, include=FALSE}
# import business licenses data
# bus_licenses <- read_csv(here::here("data", "Business_Licenses.csv"))
bus_licenses <-  readRDS(here::here("data", "bus_licenses.Rda"))
bus_licenses <- bus_licenses %>% mutate(SIDE = ifelse(SIDE == "Far Southwest", "Far Southwest Side",
                                               ifelse(SIDE == "Far Southeast", "Far Southeast Side", SIDE)))


# view problems report
# problems(bus_licenses)


# investigate nonstandard zip codes flagged -- looks like int'l businesses operating in Chicago
# opting not not to remove
# bind_cols(bus_licenses[unlist(problems(bus_licenses)[, 'row']), c('LICENSE ID', 'LEGAL NAME', 'DOING BUSINESS AS NAME', 'ADDRESS', 'CITY', 'STATE', 'BUSINESS ACTIVITY')], problems(bus_licenses)[, 'actual']) %>% arrange(ADDRESS) %>% distinct(`DOING BUSINESS AS NAME`, `BUSINESS ACTIVITY`, ADDRESS, CITY, STATE, actual)

# add columns calculating license status and term 
# bus_licenses <- mutate(bus_licenses, 
#                        active=ifelse(grepl("AAI",`LICENSE STATUS`),1,0),
#                        app_review = difftime(mdy(`LICENSE APPROVED FOR ISSUANCE`,tz="America/Chicago"), mdy(`APPLICATION REQUIREMENTS COMPLETE`,tz="America/Chicago"),unit="days"),
#                        license_term = difftime(mdy(`LICENSE TERM EXPIRATION DATE`,tz="America/Chicago"), mdy(`LICENSE TERM START DATE`,tz="America/Chicago"),unit="days"),
#                        activity_date = mdy(ifelse(is.na(`LICENSE STATUS CHANGE DATE`), `DATE ISSUED`, `LICENSE STATUS CHANGE DATE`),tz="America/Chicago"),
                      # activity_month = lubridate::as_date(
                      #  cut(activity_date, breaks = "month", start.on.monday = FALSE, origin = lubridate::origin)
                      #  ),
#                        activity_yr = lubridate::year(activity_date)
#                        )
```


```{r add-ward-sides, include=FALSE}


# add ward information
# bus_licenses <- bus_licenses %>% mutate(activity_wk = lubridate::as_date(cut(activity_date, breaks = "week", start.on.monday = FALSE, origin = lubridate::origin)))
# bus_licenses <- left_join(bus_licenses, wardSides, by="WARD")
# saveRDS(bus_licenses, file = here::here("data", "bus_licenses.Rda"))
```


```{r pre-2015-lic, include=FALSE, eval=FALSE}
# data exploration
bus_licenses %>% group_by(activity_date, WARD) %>% summarise(business_count = n()) %>% 
  filter(activity_date < mdy("1/1/2015"), !is.na(WARD)) %>%
  ggplot() + geom_line(aes(x=activity_date, y=business_count, colour=as.factor(WARD))) +
  gghighlight(min(business_count), max_highlight = 3L) +
  labs(y="Business Count", x="Activity Date", colour="Ward", 
       title="Chicago Ward Business Presence", 
       caption="Data Source: Chicago Open Data Portal") + 
  theme_modest() + 
  theme(panel.grid.major.x = element_blank(),
        panel.grid.minor = element_blank()) +
  scale_color_dt() 
```

```{r check-2012-spike, include=FALSE}
# look into spike around beginning of 2013
# bus_licenses %>% group_by(activity_date, WARD) %>% summarise(bus_count = n()) %>% summarise(max_activity = max(bus_count)) %>% arrange(desc(max_activity))

# bus_licenses %>% filter(active==1, activity_date < mdy("1/1/2015"), !is.na(WARD), !(activity_date== mdy("12/29/2012"))) %>%
#   group_by(activity_date, WARD) %>% summarise(business_count = n()) %>% 
#   ggplot() + geom_line(aes(x=activity_date, y=business_count, colour=as.factor(WARD))) +
#   gghighlight(min(business_count), max_highlight = 3L) +
#   labs(y="Business Count", x="Activity Date", colour="Ward", 
#        title="Chicago Ward Business Presence (2002-2015)", 
#        caption="Data Source: Chicago Open Data Portal") + 
#   theme_modest() + 
#   theme(panel.grid.major.x = element_blank(),
#         panel.grid.minor = element_blank()) +
#   scale_color_dt() 
```


```{r import-former-wards, include=FALSE, eval=FALSE}
# wardsPre2015 <- sf::st_read(here::here("data","Geofiles - Chicago Zip Code and Neighborhood", "Boundaries - Wards (2003-2015)","geo_export_fa687596-87b6-4059-afac-b28ce789a674.shp"))
# 
# wardsPre2015 <-wardsPre2015 %>% mutate(lon=map_dbl(geometry, ~st_centroid(.x)[[1]]), # add centroid values for labels
#                                     lat=map_dbl(geometry, ~st_centroid(.x)[[2]])) # add centroid values for labels
```


```{r include=FALSE}
# run API key
tidycensus::census_api_key(Sys.getenv("CENSUS_API_KEY"))

# cache census shapefiles
# tigris_cache_dir(here::here("data", "tigris_cache"))
# options(tigris_use_cache = TRUE)
# readRenviron('~/.Renviron')
```


```{r  include=FALSE}
# want to know: how population changing/ not changing across tracts - race, age, income, poverty status, safety net takeup
getDemos <- function(acs_year, geoms=TRUE) {
  tidycensus::get_acs(geography = "tract", 
              variables = c(medianIncome = 'B19013_001',
                            perCapitaIncome = 'B19301_001',
                            population = 'B02001_001',
                            belowPovertyPop = 'B17001_002',
                            whitePop = 'B02001_002',
                            blackPop = 'B02001_003',
                            latinxPop = 'B03001_003',
                            asianPop = 'B02001_005',
                            citizenshipPop = 'B05001_001',
                            naturalized = 'B05001_005',
                            noncitizen = 'B05001_006',
                            mobilityPop = 'B07001_001',
                            movedInsideCounty = 'B07001_033',  #hipster age movement inside Cook County
                            movedInside20to24 = 'B07001_021',  #hipster age movement inside Cook County
                            movedInside25to29 = 'B07001_022', 
                            medAgemovedInside = 'B07002_003',
                            renterPop = 'B07013_001',
                            owners = 'B07013_002',
                            renters = 'B07013_003',
                            transportPop = 'B08122_001',
                            publicTransport = 'B08122_013',
                            households = 'B22001_001',
                            householdsSNAP = 'B22001_002',
                            householdsKidsSNAP = 'B22002_003',
                            kidsHealthIns = 'B27001_007',
                            medicarePop = 'C27006_001',
                            maleChildMedicare = 'C27006_004',
                            maleMedicare18_64 = 'C27006_007',
                            maleMedicare65_up = 'C27006_010',
                            femaleMedicare18_64 = 'C27006_017',
                            femaleMedicare65_up = 'C27006_020',
                            femaleChildMedicare = 'C27006_014',
                            medicaidPop = 'C27007_001',
                            maleChildMedicaid = 'C27007_004',
                            maleMedicaid18_64 = 'C27007_007',
                            maleMedicaid65_up = 'C27007_010',
                            femaleMedicaid18_64 = 'C27007_017',
                            femaleMedicaid65_up = 'C27007_020',
                            femaleChildMedicaid = 'C27007_014'
                            ), 
              year = acs_year,
              state = 17, # IL = 17
              county = 031, # Cook County = 031 FIPS
              geometry = geoms)
}
```


```{r  include=FALSE}

demoConversion <- function(acs_year, geoms = TRUE) {
  demo_df <- getDemos(acs_year=acs_year, geoms)
  demo_df <- demo_df %>% dplyr::select(-one_of("moe")) %>% spread(variable, estimate)
  demo_df <- demo_df %>% mutate(
                      asian_pct = asianPop/ population,
                      black_pct = blackPop / population,
                      latinx_pct = latinxPop/ population,
                      white_pct = whitePop/ population,
                      other_pct = ifelse(
                        (population - blackPop - latinxPop - whitePop - asianPop) / population > 0, 
                        (population - blackPop - latinxPop - whitePop - asianPop) / population, 
                        NA),
                      below_poverty_pct = belowPovertyPop / population,
                      childMedicaid_pct = (femaleChildMedicaid + maleChildMedicaid) / medicarePop,
                      childMedicare_pct = (femaleChildMedicare + maleChildMedicare) / medicarePop,
                      takeupMedicare_pct = (maleChildMedicare + maleMedicare18_64 + maleMedicare65_up +
                          femaleChildMedicare + femaleMedicare18_64 + femaleMedicare65_up) / medicarePop,
                      takeupMedicaid_pct = (maleChildMedicaid + maleMedicaid18_64 + maleMedicaid65_up +
                          femaleChildMedicaid + femaleMedicaid18_64 + femaleMedicaid65_up) / medicaidPop,
                      households_kidsSNAP_pct = householdsKidsSNAP / households,
                      takeupSNAP_pct = householdsSNAP / households,
                      YAmovesInChi_pct = (movedInside20to24 + movedInside25to29) / mobilityPop,
                      movesInChi_pct = movedInsideCounty / mobilityPop,
                      immigrant_pct = (naturalized + noncitizen) / citizenshipPop,
                      renting_pct = renters / renterPop,
                      public_transport_pct = publicTransport / transportPop
                      ) %>% 
    dplyr::select(GEOID, NAME, medianIncome, perCapitaIncome, population, medAgemovedInside, contains("_pct"), everything())
  
    demo_df <- left_join(demo_df, as.data.frame(demo_df) %>% 
                           dplyr::select(GEOID, NAME, black_pct, 
                                         latinx_pct, asian_pct, 
                                         white_pct, other_pct) %>%
                           group_by(GEOID, NAME) %>% 
                           gather(group_name, pct, -GEOID, -NAME) %>% 
                           slice(which.max(pct)) %>% 
                           mutate(
                             predominant_race = ifelse(
                             group_name == 'black_pct', 'Black',
                             ifelse(group_name == 'white_pct', 'White',
                             ifelse(group_name == 'asian_pct', 'Asian',
                             ifelse(group_name == 'latinx_pct', 'Latinx',
                             ifelse(group_name == 'other_pct', 'Other')))))) %>% 
                           dplyr::select(GEOID, NAME, predominant_race, max_pct = pct), 
                         by = c("GEOID", "NAME")
                       )
    return(demo_df)
}
```


```{r echo=TRUE, include=FALSE, eval=FALSE}
# test that census calls working
getDemos(2017)
```


```{r fortify-mapping, message=FALSE, include=FALSE, eval=FALSE}
# import ward boundaries as sf object
wards.2015 <- sf::st_read(here::here("data","Geofiles - Chicago Zip Code and Neighborhood","Boundaries - Wards (2015-)","geo_export_0bb2e9fd-20ca-415b-a96a-7722d72c1b41.shp"))

# add centroid values for labels to wards sf object
wards.2015 <- wards.2015  %>% mutate(
  long=map_dbl(geometry, ~st_centroid(.x)[[1]]), 
  lat=map_dbl(geometry, ~st_centroid(.x)[[2]]))

# add sides column
wardSides <- read_csv(here::here("data", "wardSides.csv"))
wardSides <- wardSides %>% mutate(WARD = as.factor(WARD))
wards.2015 <- left_join(wards.2015, wardSides, by=c(ward = "WARD"))

saveRDS(wards.2015, file = here::here("data", "wards2015_sf.Rda"))
```


```{r message=FALSE,  include = FALSE, eval=FALSE}
years <- list(2012, 2013, 2015, 2017)
multi_year <-
  map(.x = years, .f=demoConversion, geoms=TRUE) %>%
  map2(.y = years, ~ mutate(.x, id = .y))

# all_demos <- reduce(multi_year, rbind)
all_demos_range <- reduce(multi_year, rbind)

# match census sf object to Chicago wards projection
# all_demos <-st_transform(all_demos, crs =  st_crs(wards.2015))
all_demos_range <-st_transform(all_demos_range, crs =  st_crs(wards.2015))

# saveRDS(all_demos, file = here::here("data", "all_demos_chi_proj.Rda"))
saveRDS(all_demos_range, file = here::here("data", "range_demos_chi_proj.Rda"))
```

```{r include = FALSE}
arg_values_demos <- c(
 "GEOID" = "identity", "NAME" = "identiy", "id"="identity", "medianIncome" = "aggregate",
 "perCapitaIncome" = "aggregate", "population" = "aggregate", "medAgemovedInside" = "aggregate",
 "asian_pct" = "aggregate", "black_pct" = "aggregate", "latinx_pct" = "aggregate", "white_pct" = "aggregate",
 "other_pct" = "aggregate", "below_poverty_pct" = "aggregate", "childMedicaid_pct" = "aggregate",
 "childMedicare_pct" = "aggregate", "takeupMedicare_pct" = "aggregate",  "takeupMedicaid_pct" = "aggregate",
 "households_kidsSNAP_pct"="aggregate", "takeupSNAP_pct" = "aggregate", "YAmovesInChi_pct" = "aggregate",
 "movesInChi_pct" = "aggregate", "immigrant_pct" = "aggregate", "renting_pct" = "aggregate",
 "public_transport_pct" = "aggregate", "asianPop" = "aggregate", "belowPovertyPop" = "aggregate",
 "blackPop" = "aggregate", "citizenshipPop" = "aggregate", "femaleChildMedicaid" = "aggregate",
 "femaleChildMedicare" = "aggregate", "femaleMedicaid18_64" = "aggregate", "femaleMedicaid65_up" = "aggregate",
 "femaleMedicare18_64"  = "aggregate", "femaleMedicare65_up" = "aggregate", "households" = "aggregate",
 "householdsKidsSNAP","householdsSNAP" = "aggregate", "kidsHealthIns" = "aggregate", "latinxPop" = "aggregate",
 "maleChildMedicaid" = "aggregate",  "maleChildMedicare" = "aggregate", "maleMedicaid18_64" = "aggregate",
 "maleMedicaid65_up" = "aggregate", "maleMedicare18_64" = "aggregate", "maleMedicare65_up" = "aggregate",
 "medicaidPop" = "aggregate", "medicarePop" = "aggregate", "mobilityPop" = "aggregate",
 "movedInside20to24" = "aggregate", "movedInside25to29" = "aggregate", "movedInsideCounty" = "aggregate",
 "naturalized" = "aggregate", "noncitizen" = "aggregate", "owners" = "aggregate",
 "publicTransport" = "aggregate", "renterPop" = "aggregate", "renters" = "aggregate", "transportPop" = "aggregate", "whitePop" = "aggregate",  "predominant_race" = "aggregate", "max_pct" = "aggregate"
)
```
There is a colloquially accepted theory of gentrification in Chicago which holds that gentrification happens differently across racial and ethnic communities. Under this theory, wealthier populations are more likely to settle in predominantly Latinx communities, eventually pushing out current residents. In Black neighborhoods, however, gentrification happens after significant periods of neglect and disenfranchisement make an area unlivable, forcing residents to move elsewhere: "freeing it up" for different demographics and consequent investment catering to these new residents. 

I seek to evaluate this claim by exploring the movement of racial and ethnic populations within the city, business activity as measured by business license issuances, and investment in education determined by average spend per student across different socioeconomic groups. I focus on the years immediately following the recent recession, as I am interested in the varying post-crisis resilience of neighborhoods as a factor in the theory described above.

## Population Displacement in Chicago Communities
Since the Great Recession, underrepresented minority populations in Chicago have been increasingly relegated to lower income areas in the South and West of the City. Some communities have maintained a foothold in their respective neighborhoods, while others are priced out as wealthier populations move into the areas they have historically called home. 
```{r facet_wrap_demo_info, message=FALSE, warning=FALSE, fig.width=30, fig.height=24}
all_demos_range <- readRDS(here::here("data", "range_demos_chi_proj.Rda"))
wards.2015 <- readRDS(here::here("data", "wards2015_sf.Rda"))

ward_range <- st_intersection(wards.2015, all_demos_range)

# plot intersection of Census tracts with chicago wards
# st_intersection(wards.2015, all_demos_range) %>%
ward_range %>% filter((predominant_race == "Latinx") & (id != 2015)) %>%
ggplot() +
  # outline tracts based on predominant race based on Census, shaded by median Income for tract
  geom_sf(aes(fill=below_poverty_pct), lwd = 2, color="#6d7d53") +

  scale_fill_gradient(low = "#dae0e2", high="#2d4f5a") +
  
  # outline Chicago wards over data
  geom_sf(data = wards.2015, color="#292929", lwd = 1, fill=NA) +

  coord_sf(datum = NA) +
  theme_map_modest() + 
  theme(
        plot.title = element_text(face = "bold", hjust="0.5", family = "Ledger", size=rel(2.5)),
        plot.subtitle = element_text(margin = unit(c(0, 0, 1, 0), "lines"), size=rel(2)),
        plot.caption = element_text(hjust=1, size=rel(1.75)),
        strip.text.x = element_text(family="Ledger", size=rel(2.25)),
        legend.position = "bottom",
        legend.box = "vertical",
        legend.title = element_text(size=rel(2), family = "Ledger"),
        legend.text = element_text(size=rel(1.1)),
        legend.key.size = unit(45, "pt"),
        panel.spacing = unit(1, "lines"),
        plot.margin = unit(c(2,4,2,2),"lines")
        ) +
  facet_wrap(~ id) + 
  # guides(fill = guide_colourbar(title.position="top", title.hjust = 0.5)) + 
  labs(alpha = "Median Income", color="Predominant Race", title="Latinx Populations in Chicago Pushed to South, West Neighborhoods", caption="Data Source: U.S. Census Bureau", 
       subtitle = "Chicago racial and ethnic group movement by census tract since 2012 (5 year averages) show that Latinx communities are being\ndisplaced from the city center.", fill="Tract Percentage Below Federal Poverty Line") 


```

While there are myriad factors contributing to the way communities move over time, race is an undeniable component. There is a clear shift of Latinx populations from desirable neighborhoods close to the center and north of the city to more western and southern neighborhoods.
```{r facet-wrap-black, message=FALSE, warning=FALSE, fig.width=30, fig.height=24}
ward_range %>% filter((predominant_race == "Black") & (id != 2015)) %>%
ggplot() +
  # outline tracts based on predominant race based on Census, shaded by median Income for tract
  # geom_sf(aes(alpha=below_poverty_pct), lwd = 0, fill=get_dt_cols("ocean")) +

  geom_sf(aes(fill=below_poverty_pct), lwd = 2, color="#9d8e64") +
  # scale_alpha(range = c(0.15, 1)) +
  scale_fill_gradient(low = "#dae0e2", high="#2d4f5a") +
  # outline Chicago wards over data
  geom_sf(data = wards.2015, color="#292929", lwd = 1, fill=NA) +
  coord_sf(datum = NA) +
  theme_map_modest() + 
  theme(
        plot.title = element_text(face = "bold", hjust="0.5", family = "Ledger", size=rel(2.5)),
        plot.subtitle = element_text(margin = unit(c(0, 0, 1, 0), "lines"), size=rel(2)),
        plot.caption = element_text(hjust=1, size=rel(1.75)),
        strip.text.x = element_text(family="Ledger", size=rel(2.25)),
        legend.position = "bottom",
        legend.box = "vertical",
        legend.title = element_text(size=rel(2), family = "Ledger"),
        legend.text = element_text(size=rel(1.1)),
        legend.key.size = unit(45, "pt"),
        panel.spacing = unit(1, "lines"),
        plot.margin = unit(c(2,4,2,2),"lines")
        ) +
  facet_wrap(~ id) +
  # guides(fill = guide_colourbar(title.position="top", title.hjust = 0.5)) + 
  labs(alpha = "Median Income", color="Predominant Race", title="In Contrast, Black Communities Stay in Place in Increasingly Poor Neighborhoods ", caption="Data Source: U.S. Census Bureau", 
       subtitle = "Black communities are not pushed out of their neighborhoods, however, the percentage of residents below the federal poverty line\nin predominantly Black tracts has increased city-wide.", fill="Tract Percentage Below Federal Poverty Line") 

```


```{r include=FALSE, eval=FALSE}
demos_2012 <- st_join(all_demos_range %>% filter(as.numeric(id) == 2012), wards.2015, largest=TRUE, agr=arg_values_demos)
demos_2013 <- st_join(all_demos_range %>% filter(as.numeric(id) == 2013), wards.2015, largest=TRUE, agr=arg_values_demos)
demos_2015 <- st_join( all_demos_range %>% filter(as.numeric(id) == 2015), wards.2015, largest=TRUE, agr=arg_values_demos)
demos_2017 <- st_join( all_demos_range %>% filter(as.numeric(id) == 2017), wards.2015, largest=TRUE, agr=arg_values_demos)

demos_2012 %>% dplyr::select(GEOID, id, medianIncome, predominant_race, max_pct, population, whitePop, white_pct, blackPop, black_pct, asianPop, asian_pct, latinx_pct, below_poverty_pct, renterPop, renters, owners,renting_pct) spread()

```

```{r include=FALSE, eval=FALSE}
library(fivethirtyeight)

all_demos %>%
  mutate(`Number of Drivers` = scale(num_drivers),
         `Percent Speeding` = scale(perc_speeding),
         `Percent Alcohol` = scale(perc_alcohol),
         `Percent Not Distracted` = scale(perc_not_distracted),
         `Percent No Previous` = scale(perc_no_previous),
         state = factor(state, levels = rev(state))
         ) %>%
  select(-insurance_premiums, -losses, -(num_drivers:losses)) %>%
  gather(`Number of Drivers`:`Percent No Previous`, key = "measure", value = "SD's from Mean") %>%
  ggplot(aes(measure, state)) +
    geom_tile(aes(fill = `SD's from Mean`)) +
    labs(title = "Drivers Involved in Fatal Collisions By Behavior",
      subtitle = "As a share of scaled fatal collisions per billion miles, 2009",
      caption = "Source: fivethirtyeight R package",
      x = NULL,
      y = NULL) + 
    scale_fill_gradientn() +
    theme(legend.position = "right",
      legend.direction = "vertical",
      axis.text.x = element_text(angle = 45))
```


## Business Activity across Chicago Areas
The neighborhoods housing these minority communities are those with distinctly lower levels of economic activity. The majority of the city's "bouncing back" after the recession happens in the Central, North, and West sides of the city.
```{r bus-stagnation-bar, fig.height=12, fig.width=15, message=FALSE, warning=FALSE}

all_wards_all_dates <-  readRDS(here::here("data", "all_wards_all_dates.Rda"))

allDatesCount.df <- all_wards_all_dates %>% expand(SIDE_CLEAN, WARD, `APPLICATION TYPE`, count_date) %>% 
 full_join(all_wards_all_dates) %>%  arrange(SIDE_CLEAN, WARD, `APPLICATION TYPE`, count_date) %>% 
  mutate(
    activity_wk = lubridate::as_date(
      cut(count_date, breaks = "week", start.on.monday = FALSE, origin = lubridate::origin)),
   activity_month = lubridate::as_date(
      cut(count_date, breaks = "month", start.on.monday = FALSE, origin = lubridate::origin)),
   activity_qtr = lubridate::as_date(
      cut(count_date, breaks = "quarter", start.on.monday = FALSE, origin = lubridate::origin)) 
   )


library(lemon)
allDatesCount.df %>% 
  filter(`APPLICATION TYPE` %in% c("ISSUE", "RENEW")) %>% group_by(SIDE, activity_qtr, `APPLICATION TYPE`) %>% 
  summarise(active_businesses = sum(active_businesses)) %>% 
  arrange(activity_qtr, desc(active_businesses), SIDE) %>%
 mutate(SIDE_CLEAN = factor(SIDE,levels = rev(unique(SIDE))),
        SIDE_CLEAN = ifelse(SIDE_CLEAN == "Far Southwest", "Far Southwest Side", 
                     ifelse(SIDE_CLEAN == "Far Southeast", "Far Southeast Side",
                     SIDE_CLEAN))) %>%
              ggplot(aes(x=activity_qtr, y=active_businesses)) +   
  geom_bar(aes(x=activity_qtr, y=active_businesses, fill=`APPLICATION TYPE`), stat = "identity", position="dodge") +
  scale_fill_dt(labels=c("    Newly Issued Licenses    ", "    License Renewals    ")) +
  # geom_text(aes(label=active_businesses), 
  #           size = 3, position = position_stack(vjust = 0.5), color="white") +
  scale_y_continuous(labels = scales::comma) +
  scale_x_date(date_labels = "%b %y", 
                date_breaks = "1 year", limits = c(ymd("2012-01-01"), ymd("2018-12-31"))) +
  theme_modest() + 
  theme(
        legend.direction = "horizontal",
        legend.key.size = unit(25, "pt"),
        legend.title = element_blank(),
        panel.grid.minor = element_blank(),
        panel.grid.major.x = element_blank(),
        panel.spacing = unit(2, "lines"),
        strip.text = element_text(family="Ledger", size=rel(1.2)),
        axis.title.y = element_text(size=rel(1.5), margin=unit(c(0,0,2,2), "lines"), angle=90, family="Ledger"),
        axis.text.y = element_text(size=rel(1), margin=unit(c(0,0,2,2), "lines")),
        axis.text.x = element_text(hjust = 0, angle=-45),
        legend.position = "bottom",
        axis.title.x = element_blank(),
        plot.margin = unit(c(2,4,2,2),"lines")) + 
  facet_rep_wrap(~ SIDE_CLEAN, repeat.tick.labels=TRUE) +
      labs(y="Number of Business Licenses Issued or Renewed", 
           colour="Chicago Council Ward", 
           caption="Data Source: Chicago Open Data Portal",  
           title="New Business Never Returns to Far Southeast and Far Southwest Post-Recession", 
           subtitle = "Steady decline in new business entry post-recession in Chicago areas with lowest economic activity pre-recession", fill="Chicago Area")  
  # annotate("text", x=lubridate::ymd("2013-01-01"), y = 850000, label="Business renewals dipping in 2013, were balanced by spikes in new business due to EDGE tax credits.")
```
Immediately post-recession in 2013, the amount of [Economic Development for a Growing Economy (EDGE) tax credits granted by Illinois nearly doubled from the prior year](https://www.chicagobusiness.com/article/20171006/NEWS02/171009915/corporate-tax-incentives-back-in-favor-in-illinois), causing a brief spike in new business license Issuances for every area of the city beyond Central (Loop area), which has experienced fairly constant new business entry since the recession. Thus far, only the West Side of Chicago is approaching 2012 levels of new business, with most other areas of the city remaining fairly constant after EDGE excitement tapered off. 

In the Far Southeast and Far Southwest Sides, economic activity (in terms of number of active businesses) was about half that of the next lowest areas, even before EDGE-induced business dropped off. One must wonder how much growth a neighborhood can experience with a monthly rate of new business entry that has hovered around 500 businesses for years. For context, the mean number of quarterly new licenses issued in the Central area of the city is just under 700,000, and the mean number of quarterly licenses issued in the Northwest Side is just over 85,000.

```{r include=FALSE, echo=TRUE, eval=FALSE}
# to look into: relationship between population and new business, can talk about that here. esp. number of new businesses per capita could be more meaningful than raw numbers.
# Also potentially interesting: population in far SE and far SW also going down?

allDatesCount.df %>% 
  filter(`APPLICATION TYPE` %in% c("ISSUE", "RENEW")) %>% group_by(SIDE, activity_qtr, `APPLICATION TYPE`) %>% 
  summarise(active_businesses = sum(active_businesses)) %>% 
  group_by(SIDE, `APPLICATION TYPE`) %>% 
  summarise(min_ct = min(active_businesses),
            median_ct = median(active_businesses),
            mean_ct = mean(active_businesses, na.rm=TRUE),
            max_ct = max(active_businesses))
```

## Mapping Chicago's Forgotten Economies

There is unquestionable variation in the level of economic activity within each Side. However, positive variance from the norm for areas that are _predominantly_ Black and Latinx actually appear to be in parts of these communities that border areas with larger Asian, White, and Other racial/ ethnic populations.
```{r ggmap-prep, include=FALSE}

# read in Chicago ward geos in ggmap-compatible format
wards.shp.2015 <- here::here("data","Geofiles - Chicago Zip Code and Neighborhood","Boundaries - Wards (2015-)","geo_export_0bb2e9fd-20ca-415b-a96a-7722d72c1b41.shp")

wards2015 <- shapefile(wards.shp.2015)
wards2015@data <- mutate(wards2015@data, id = rownames(wards2015@data))
wards2015.points <- fortify(wards2015, region="id")
wards2015.df <- merge(wards2015.points, wards2015@data, by="id")

# name license types
license_types <- c("ISSUE" = "New Business Licenses", 
                   "RENEW" = "Business License Renewals")  


bus_licenses <-bus_licenses %>% mutate(`LICENSE TERM EXPIRATION DATE` = lubridate::mdy(`LICENSE TERM EXPIRATION DATE`))
```


```{r include=FALSE, eval=FALSE}

daily_bus_by_type <-  readRDS(here::here("data", "daily_bus_by_type.Rda"))
```


```{r include= FALSE}
# kudos to agstudy on SO for this formating idea! (https://stackoverflow.com/questions/20123147/add-line-break-to-axis-labels-and-ticks-in-ggplot)
addline_format <- function(x,...){
    gsub('\\s','\n',x)
}
```


```{r avg-monthly-lic, warning=FALSE, fig.width=15, fig.height=12, message=FALSE}
# filter for business license issuances and renewals, and create a monthly count
bus_licenses %>% filter(!is.na(WARD), active==1,  activity_date >= mdy("1/1/2012")) %>%
  group_by(activity_month, activity_yr, WARD, SIDE) %>% 
  summarise(business_count = n()) %>% group_by(WARD, SIDE) %>% arrange(desc(business_count)) %>% 
  
  # plot boxplot of median monthly issuances and renewals for each ward
  ggplot(aes(x=reorder(reorder(reorder(SIDE, business_count, FUN = median),WARD),business_count, FUN=median), y=business_count)) + 
  # add comparison line for lowest levels of montly ward business activity
  geom_hline(yintercept=25, linetype = "dotted") +
  # draw boxplots with color and order determined by Chicago Side area
  geom_boxplot(aes(group=reorder(as.factor(WARD), SIDE), fill=as.factor(SIDE)), show.legend = FALSE) + 
  scale_fill_dt() +
  
  # limit scale to 750, (one Loop ward's outliers extend ~1000 above other wards)
  # still very clearly the highest even without all outliers visible
  scale_y_continuous(breaks = sort(c(seq(0, 750, 150), 25)), limits = c(0, 750), 
                     minor_breaks = seq(0 , 750, 75)) +
  scale_x_discrete(breaks=unique(bus_licenses$SIDE), 
    labels=addline_format(as.factor(unique(bus_licenses$SIDE))) ) +
  labs(x="Chicago Council Ward", y="Average Monthly Business Count", caption="Data Source: Chicago Open Data Portal", title="Least New & Surviving Businesses in Far South, Southwest Wards for 5+ Years", subtitle="Since 2012, Far South wards average 26 or less monthly business license issuances and renewals",fill="Chicago Area") + 
  theme_modest() +
  theme(
        panel.grid.major.x = element_blank(),
        axis.ticks =  element_blank(),
        axis.title.x = element_blank(),
        axis.text.x = element_text( family="Ledger", size=rel(1)),
        axis.text.y = element_text( margin=unit(c(0,2,2,2), "lines"), size=rel(1)),
        axis.title.y = element_text(margin=unit(c(0,0,2,2), "lines"), size=rel(1.2), angle = 90, family = "Ledger"),
        plot.caption = element_text(hjust=1)
        ) +
  annotate("text", x = 7 , y= 390, label="Most West Side business\noccurs in Wards 2 and 27,\nwhich border the Loop.", size=rel(4)) +
  annotate("text", x = 8.9 , y= 745, label="The Central area is comprised\nof Ward 42, the Loop area.", size=rel(4)) +
  annotate("text", x = 5 , y= 335, label="South Side business activity\nis concentrated in Kenwood,\n Fuller Park areas bordering\nthe University of Chicago.", size=rel(4)) +
  annotate("text", x = 3 , y= 305, label="Ward 12 on the Southwest\nSide, which neighbors Chinatown, is\ncharacterized by higher rates\nof business activity than\nits neighbors.", size=rel(4)) 
  
```


```{r include=FALSE, eval=FALSE}
bus_licenses %>% filter(!is.na(WARD), active==1,  activity_date >= mdy("1/1/2012")) %>%
  group_by(activity_month, activity_yr, WARD, SIDE) %>% 
  summarise(business_count = n()) %>% group_by(SIDE, WARD) %>% 
  summarise(med_ward_bus_ct = median(business_count),
            max_ward_bus_ct = max(business_count)) %>% 
  arrange(SIDE, WARD) 
```


## Investing in the Future: Trends in Chicago Public Schools
While economic activity is definitively a factor in the a neighborhood's ability to thrive, just as important in future success is the quality of education that students in the area are able to receive. Here I explore the variation in Chicago Public School spending for students of different socioeconomic backgrounds.
```{r quasi-random, fig.height=12, fig.width=15, message=FALSE, warning=FALSE}
# read in combined school report card and budget dataset
school_data <-  readRDS(here::here("data_preparation", "data", "combined_school_data.Rda"))

# create variable of columns to keep in front after modifications
front_cols <- c("YEAR", "CPS_id_num", "WARD", "SIDE", "SCHOOL_NAME", "NAME_WARD_JOIN", "CY_BUDGET", "SCHOOL_TOTAL_ENROLLMENT")


# bin enrollment and spend values for comparison
school_data <- school_data %>% 
  mutate(
    ENROLLMENT_RANGE = cut(SCHOOL_TOTAL_ENROLLMENT, 
    breaks=c(0, 500, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000, Inf),
    labels=c("0-499", "500-999", "1,000-1,499", "1,500-1,999", 
             "2,000-2,499", "2,500-2,999", "3,000-3,499", 
             "3,500-3,999", "4,000-4,499", "4,500-4,999", "5,000+"), 
    ordered_result = TRUE),
        SPEND_RANGE = cut(AVG_SPEND_PER_STUDENT, 
                          breaks=c(0, 2500, 5000, 75000, 10000, 12500, 15000, Inf),
                          labels=c("0-2,499", "2,500-4,999", "5,000-7,499", 
                                   "7,500-9,999", "10,000-12,499", "12,500-14,999",
                                   "15,000+"), 
                          ordered_result = TRUE)) %>%
  dplyr::select(one_of(front_cols), ENROLLMENT_RANGE, SPEND_RANGE, everything())

# enable mapping by majority race in school
school_data <- left_join(
  school_data,
  school_data %>% group_by(`SCHOOL ID (R-C-D-T-S)`) %>% 
      summarise(
        White= mean(`SCHOOL - WHITE %`, na.rm=TRUE),
        Black= mean(`SCHOOL - BLACK %`, na.rm=TRUE),
        Asian= mean(`SCHOOL - ASIAN %`, na.rm=TRUE),
        Latinx = mean(`SCHOOL - HISPANIC %`, na.rm=TRUE)) %>% 
      gather(group_name, pct, -`SCHOOL ID (R-C-D-T-S)`) %>% 
      group_by(`SCHOOL ID (R-C-D-T-S)`) %>% 
      # sanity check to make sure all races in output to start before slicing
      # arrange(`SCHOOL ID (R-C-D-T-S)`)  %>%
      slice(which.max(pct)) %>% 
      dplyr::select(`SCHOOL ID (R-C-D-T-S)`, group_name, pct), 
  by = c("SCHOOL ID (R-C-D-T-S)")) %>% 
    dplyr::select(front_cols, ENROLLMENT_RANGE, SPEND_RANGE, 
      predominant_race = group_name, max_pct = pct, everything())
  

# saveRDS(school_data, file = here::here("data_preparation", "data", "binned_school_data.Rda"))
school_data <-  readRDS(here::here("data_preparation", "data", "binned_school_data.Rda"))

#########################
##  Quasi-Random Plot  ##
#########################
school_data %>% group_by(`SCHOOL ID (R-C-D-T-S)`, predominant_race) %>% 
  summarise(avg_LI_pct = mean(`LOW-INCOME SCHOOL %`, na.rm = TRUE),
                              school_count = n()) %>% 
  ggplot(aes(predominant_race, avg_LI_pct, color=factor(predominant_race))) +
      geom_quasirandom(varwidth=TRUE, size=1.5, show.legend = FALSE) +
  scale_color_dt("diverging", reverse = TRUE, na.value=get_dt_cols("palegray")) +
  scale_y_continuous(labels = function(x) paste0(x, "%")) +
  scale_x_discrete(position = "top", 
                   labels = c("Majority Asian Schools", "Majority Black Schools",
                             "Majority Latinx Schools", "Majority White Schools")) +
  theme_modest() + 
  theme(axis.text.x.top = element_text(size=rel(1.25), vjust = -8, face="bold"),
        axis.text.y = element_text(size=rel(1.25), margin=unit(c(0,0,0,3),"lines")),
        axis.title.y = element_text(size=rel(1.25), angle = 90, family = "Ledger"),
        axis.title.x = element_blank(),
      # legend.title = element_text(size=rel(2)),
       #  legend.key.size =  unit(35, "pt"),
       # legend.text = element_text(size=rel(1.5))
       plot.title = element_text(size=rel(1.15))
       ) +
  labs(title = "Majority Black and Majority Latinx Schools Have Much Higher Percentages\nof Low Income Students", 
       x="Predominant Race in School", 
       y="Percentage of Low Income Students", 
       caption="Data Source: Illinois State Board of Education") + 
    annotate("text", y = 30, x = 2.4, label = "A handful of Near West Side\nand Wicker Park schools buck\nthe overall trend.", hjust=0.5, size=rel(4), face="bold") + 
  annotate("segment", x = 2.55, xend = 2.975, y = 34, yend = 40) +
  annotate("segment", x = 2.8, xend = 2.975, y = 30, yend = 31.5) +
  annotate("segment", x = 2.05, xend = 2.15, y = 27, yend = 28) +
  annotate("text", x = 2.45, y= 18, label="Keller Gifted Magnet Elementary School*", hjust=0.5, size=rel(4), fontface="bold") + 
  annotate("text", x = 1.3, y= 53, label="Sheridan Math & Science\nAcademy in Chinatown", hjust=0.5, size=rel(4), face="bold") +
  annotate("text", x = 1.45, y= 26, label="Rates at South Loop Elementary School\nand Lenart Regional Gifted Center\nElementary School located six blocks from\nthe University of Chicago are likely\nreflective of overall higher median\nincomes in the schools' neighborhoods.", hjust=0.5, size=rel(4), face="bold") +
  annotate("segment", x = 1.875, xend = 1.975, y = 30, yend = 33) +
  annotate("segment", x = 1.875, xend = 1.975, y = 30, yend = 30.5) +
  annotate("text", x=1.60, y = 38, label="Sutherland Elementary School*",hjust=0.5, size=rel(4), fontface="bold") + 
    annotate("text", x=3.55, y = 80, label="Schools near Humbolt Park and\nO'Hare have comparably high rates\nof low-income White students.",hjust=0.5, size=rel(4), face="bold") + 
  annotate("text", x = 1.4, y = 5, label = "*Keller Gifted Magnet and Sutherland Elementary Schools in the neighboring Mt. Greenwood and Beverly communities on the Far\nSouthwest Side have two of the smallest differences between white and non-white student percentages across all non-white schools.", hjust=0, size=rel(4), face="bold")
```
On average over the last 5 school years, non-white schools have served significantly higher percentages of low-income students, as determined by free and reduced lunch designation. 

```{r sanity-check-quasi, echo=TRUE, include=FALSE, eval=FALSE}
# sanity check number of majority white schools out of 721, seems low
school_data %>% group_by(predominant_race) %>% 
  distinct(predominant_race, `SCHOOL ID (R-C-D-T-S)`) %>% 
  arrange(predominant_race, `SCHOOL ID (R-C-D-T-S)`) %>% summarise(schools = n()
                                                                   )
```


```{r include=FALSE}
school_data <- left_join(school_data,
          bind_rows(
school_data %>% filter(YEAR==2013) %>% 
  dplyr::select(YEAR, `SCHOOL ID (R-C-D-T-S)`, `LOW-INCOME SCHOOL %`) %>% 
  arrange(desc(`LOW-INCOME SCHOOL %`)) %>% 
  mutate(low_income_percentile = ntile(`LOW-INCOME SCHOOL %`, 100),
         low_income_decile = ntile(`LOW-INCOME SCHOOL %`, 10)) %>% 
  dplyr::select(YEAR, `SCHOOL ID (R-C-D-T-S)`, low_income_percentile, low_income_decile),

school_data %>% filter(YEAR==2014) %>% 
  dplyr::select(YEAR, `SCHOOL ID (R-C-D-T-S)`, `LOW-INCOME SCHOOL %`) %>% 
  arrange(desc(`LOW-INCOME SCHOOL %`)) %>% 
  mutate(low_income_percentile = ntile(`LOW-INCOME SCHOOL %`, 100),
         low_income_decile = ntile(`LOW-INCOME SCHOOL %`, 10)) %>% 
  dplyr::select(YEAR, `SCHOOL ID (R-C-D-T-S)`, low_income_percentile, low_income_decile),

school_data %>% filter(YEAR==2015) %>% 
  dplyr::select(YEAR, `SCHOOL ID (R-C-D-T-S)`, `LOW-INCOME SCHOOL %`) %>% 
  arrange(desc(`LOW-INCOME SCHOOL %`)) %>% 
  mutate(low_income_percentile = ntile(`LOW-INCOME SCHOOL %`, 100),
         low_income_decile = ntile(`LOW-INCOME SCHOOL %`, 10)) %>% 
  dplyr::select(YEAR, `SCHOOL ID (R-C-D-T-S)`, low_income_percentile, low_income_decile),

school_data %>% filter(YEAR==2016) %>% 
  dplyr::select(YEAR, `SCHOOL ID (R-C-D-T-S)`, `LOW-INCOME SCHOOL %`) %>% 
  arrange(desc(`LOW-INCOME SCHOOL %`)) %>% 
  mutate(low_income_percentile = ntile(`LOW-INCOME SCHOOL %`, 100),
         low_income_decile = ntile(`LOW-INCOME SCHOOL %`, 10)) %>% 
  dplyr::select(YEAR, `SCHOOL ID (R-C-D-T-S)`, low_income_percentile, low_income_decile),

school_data %>% filter(YEAR==2017) %>% 
  dplyr::select(YEAR, `SCHOOL ID (R-C-D-T-S)`, `LOW-INCOME SCHOOL %`) %>% 
  arrange(desc(`LOW-INCOME SCHOOL %`)) %>% 
  mutate(low_income_percentile = ntile(`LOW-INCOME SCHOOL %`, 100),
         low_income_decile = ntile(`LOW-INCOME SCHOOL %`, 10)) %>% 
  dplyr::select(YEAR, `SCHOOL ID (R-C-D-T-S)`, low_income_percentile, low_income_decile),

school_data %>% filter(YEAR==2018) %>% 
  dplyr::select(YEAR, `SCHOOL ID (R-C-D-T-S)`, `LOW-INCOME SCHOOL %`) %>% 
  arrange(desc(`LOW-INCOME SCHOOL %`)) %>% 
  mutate(low_income_percentile = ntile(`LOW-INCOME SCHOOL %`, 100),
         low_income_decile = ntile(`LOW-INCOME SCHOOL %`, 10)) %>% 
  dplyr::select(YEAR, `SCHOOL ID (R-C-D-T-S)`, low_income_percentile, low_income_decile),

school_data %>% filter(YEAR==2019) %>% 
  dplyr::select(YEAR, `SCHOOL ID (R-C-D-T-S)`, `LOW-INCOME SCHOOL %`) %>% 
  arrange(desc(`LOW-INCOME SCHOOL %`)) %>% 
  mutate(low_income_percentile = ntile(`LOW-INCOME SCHOOL %`, 100),
         low_income_decile = ntile(`LOW-INCOME SCHOOL %`, 10)) %>% 
  dplyr::select(YEAR, `SCHOOL ID (R-C-D-T-S)`, low_income_percentile, low_income_decile)
), 
by=c(YEAR = "YEAR", `SCHOOL ID (R-C-D-T-S)` = "SCHOOL ID (R-C-D-T-S)")) %>% 
  dplyr::select(front_cols, ENROLLMENT_RANGE, SPEND_RANGE, predominant_race, low_income_percentile, low_income_decile, everything())
  
```

It seems to follow that these schools would provide more resources for students, however, over the same time period, average spend per student remains roughly the same across schools' low-income percentages.
```{r area-chart, fig.height=12, fig.width=15, message=FALSE, warning=FALSE}
school_data %>% group_by(YEAR, low_income_decile) %>% 
  summarise(median_spend = median(AVG_SPEND_PER_STUDENT, na.rm=T)) %>%
  ggplot() +
  geom_area(aes(as.factor(YEAR), median_spend,
                   group=as.factor(low_income_decile),  fill=low_income_decile)) +
  scale_fill_dt(discrete=F) +
      scale_y_continuous(labels = scales::dollar) +
  theme_modest() +
  theme(legend.text = element_text(size=rel(1)),
        legend.title = element_text(size=rel(1), family = "Ledger"),
        plot.caption = element_text(hjust=1, size=rel(0.75)),
        panel.grid.minor = element_blank(),
        legend.position = "bottom",
        axis.title.x = element_blank(),
        axis.title.y = element_text(angle = 90, family = "Ledger", size=rel(1.25), 
                                    margin=unit(c(0,0,2,2), "lines")),
        axis.text.y = element_text( margin=unit(c(0,2,2,2), "lines"))
        ) +
      guides(fill = guide_colourbar(title.position="top", title.hjust = 0.5)) +
          labs(y= "Median Spend per Student", x="Year", caption="Data Source: Chicago Public Schools & Illinois State Board of Education", title="Average Spending per Student Largely Similar Across Schools by Low-Income Deciles", fill="Low Income Decile")
```
Schools with a large proportion on average of low-income students do tend to be on the lower end of the spectrum in terms of spend per student, based on the breakdown of low-income student percentages from the Illinois State Board of Education:
```{r fig.height=18, fig.width=15, message=FALSE, warning=FALSE}
library(waffle)

parts_all <- school_data %>% filter(!is.na(SPEND_RANGE)) %>% group_by(SPEND_RANGE) %>% summarise(
  mean_enrollment = round(mean(SCHOOL_TOTAL_ENROLLMENT, na.rm=T),0),
  Black =round( mean(`SCHOOL - BLACK %`, na.rm=T)),
  White = round(mean(`SCHOOL - WHITE %`, na.rm=T)),
  Asian = round(mean(`SCHOOL - ASIAN %`, na.rm=T)),
  Latinx = round(mean(`SCHOOL - HISPANIC %`, na.rm=T)),
  `Low-Income` = round(mean(`LOW-INCOME SCHOOL %`, na.rm=T))
  )  %>% 
mutate(
  Black = round((Black/100) * mean_enrollment),
  White = round((White/100) * mean_enrollment),
  Asian = round((Asian/100) * mean_enrollment),
  Latinx = round((Latinx/100) * mean_enrollment),
  `Low-Income` = round((`Low-Income`/100) * mean_enrollment),
  `Not Low-Income` = mean_enrollment - `Low-Income`
  )

parts_all.list <- setNames(split(parts_all, seq(nrow(parts_all))), rownames(parts_all))

# school waffles by low-income status
waffles <- plot_grid(
waffle(round(mapply(FUN = `/`, as.list(parts_all.list[[1]])[7:8], 1)), colors = c("#986769", "#ebb742"), pad = (800 - as.list(parts_all.list[[1]])[[2]]) / 40,
       # pad = (800 - as.list(parts_all.list[[1]])[[2]]),
       rows=40, size=0.5, title="Under $2,500", xlab = "1 Square == 1 Student") +  
  theme(panel.grid.major=element_blank(), panel.grid.minor=element_blank(), axis.text = element_blank(), text = element_text(family="Roboto")),

waffle(round(mapply(FUN = `/`, as.list(parts_all.list[[2]])[7:8], 1)), colors = c("#986769", "#ebb742"), pad = (800 - as.list(parts_all.list[[2]])[[2]]) / 40,
       rows=40, size=0.5, title="$2,500-4,999", xlab = "1 Square == 1 Student") + 
  theme(panel.grid=element_blank(), axis.text = element_blank(), text = element_text(family="Roboto")),

waffle(round(mapply(FUN = `/`, as.list(parts_all.list[[3]])[7:8], 1)), colors = c("#986769", "#ebb742"), pad = (800 - as.list(parts_all.list[[3]])[[2]]) / 40,
       rows=40, size=0.5, title="$5,000-7,499", xlab = "1 Square == 1 Student") +  
  theme(panel.grid=element_blank(), axis.text = element_blank(), text = element_text(family="Roboto")),
NULL, NULL, NULL,
waffle(round(mapply(FUN = `/`, as.list(parts_all.list[[4]])[7:8], 1)), colors = c("#986769", "#ebb742"), pad = (800 - as.list(parts_all.list[[4]])[[2]]) / 40,
       rows=40, size=0.5, title="$7,500-9,999", xlab = "1 Square == 1 Student") + 
  theme(panel.grid=element_blank(), axis.text = element_blank(), text = element_text(family="Roboto")),

waffle(round(mapply(FUN = `/`, as.list(parts_all.list[[5]])[7:8], 1)), colors = c("#986769", "#ebb742"), pad = (800 - as.list(parts_all.list[[5]])[[2]]) / 40,
       rows=40, size=0.5, title="$10,000-12,499", xlab = "1 Square == 1 Student") + 
  theme(panel.grid=element_blank(), axis.text = element_blank(), text = element_text(family="Roboto")),

waffle(round(mapply(FUN = `/`, as.list(parts_all.list[[6]])[7:8], 1)), colors = c("#986769", "#ebb742"), pad = (800 - as.list(parts_all.list[[6]])[[2]]) / 40,
       rows=40, size=0.5, title="$12,500-14,999", xlab = "1 Square == 1 Student") +  
  theme(panel.grid=element_blank(), axis.text = element_blank(), text = element_text(family="Roboto")), 
ncol = 3, nrow=3, rel_heights =  c(1, 0.05, 1), align = 'v') + theme_modest() + theme(panel.grid=element_blank(), axis.text = element_blank())

title <- ggdraw() + draw_label("More Low-Income Students in Schools with Low- to Mid-Range Spend", fontface='bold', size=20, fontfamily="Ledger") + theme_modest() + theme(panel.grid=element_blank(), axis.text = element_blank(), axis.ticks=element_blank())
caption <- ggdraw() + draw_label("Data Source: Illinois State Board of Education",  size=12, hjust = 0) + theme_modest() + theme(panel.grid=element_blank(), axis.text = element_blank(), axis.ticks=element_blank())

plot_grid(title, waffles, caption, ncol=1, align="v", rel_heights = c(0.10, 0.75, 0.09))
```


```{r include=FALSE, eval=FALSE}
# school waffles by race
iron(
waffle(round(mapply(FUN = `/`, as.list(parts_all.list[[1]])[3:6], 1)), colors = c("#9d8e64", "#30505a",  "#986769", "#6d7d53"), pad = (800 - as.list(parts_all.list[[1]])[[2]]) / 40,
       # pad = (800 - as.list(parts_all.list[[1]])[[2]]),
       rows=40, size=0.5, title="Under $2,500") +  theme_modest() +
  theme(panel.grid.major=element_blank(), panel.grid.minor=element_blank(), axis.text = element_blank()),

waffle(round(mapply(FUN = `/`, as.list(parts_all.list[[2]])[3:6], 1)), colors = c("#9d8e64", "#30505a",  "#986769", "#6d7d53"), pad = (800 - as.list(parts_all.list[[2]])[[2]]) / 40,
       rows=40, size=0.5, title="$2,500-4,999") +  theme_modest() +
  theme(panel.grid=element_blank(), axis.text = element_blank()),

waffle(round(mapply(FUN = `/`, as.list(parts_all.list[[3]])[3:6], 1)), colors = c("#9d8e64", "#30505a",  "#986769", "#6d7d53"), pad = (800 - as.list(parts_all.list[[3]])[[2]]) / 40,
       rows=40, size=0.5, title="$5,000-7,499") +  theme_modest() +
  theme(panel.grid=element_blank(), axis.text = element_blank()),

waffle(round(mapply(FUN = `/`, as.list(parts_all.list[[4]])[3:6], 1)), colors = c("#9d8e64", "#30505a",  "#986769", "#6d7d53"), pad = (800 - as.list(parts_all.list[[4]])[[2]]) / 40,
       rows=40, size=0.5, title="$7,500-9,999") + theme_modest() +
  theme(panel.grid=element_blank(), axis.text = element_blank()),

waffle(round(mapply(FUN = `/`, as.list(parts_all.list[[5]])[3:6], 1)), colors = c("#9d8e64", "#30505a",  "#986769", "#6d7d53"), pad = (800 - as.list(parts_all.list[[5]])[[2]]) / 40,
       rows=40, size=0.5, title="$10,000-12,499") +  theme_modest() +
  theme(panel.grid=element_blank(), axis.text = element_blank()),

waffle(round(mapply(FUN = `/`, as.list(parts_all.list[[6]])[3:6], 1)), colors = c("#9d8e64", "#30505a",  "#986769", "#6d7d53"), pad = (800 - as.list(parts_all.list[[6]])[[2]]) / 40,
       rows=40, size=0.5, title="$12,500-14,999", xlab = "1 Square == 1 Student") +  theme_modest() +
  theme(panel.grid=element_blank(), axis.text = element_blank()) 
)
```


```{r include= FALSE}
# To Look Into: is attendance higher in majority low income schools (>50%) in areas with higher SNAP takeup? 
# how to combine census data set to compare over time...
```

However, in a perhaps reassuring trend, the [newest Evidence-Based-Fuding Formula](https://cps.edu/News/Press_releases/Pages/PR1_10_5_17.aspx) used by CPS appears to have increased the number of predominantly non-white schools spending more per student.
```{r gganimate_budget, fig, fig.height=12, fig.width=15, message=FALSE, warning=FALSE}
 
library(gganimate)

school_data %>% mutate(NONWHITE_PCT = (100 - as.numeric(`SCHOOL - WHITE %`)),
                       DIFF_NONWHITE = NONWHITE_PCT - as.numeric(`SCHOOL - WHITE %`)) %>% 
  arrange(desc(predominant_race), DIFF_NONWHITE) %>% 
  dplyr::select(front_cols, predominant_race, NONWHITE_PCT, DIFF_NONWHITE, everything()) %>%   
ggplot(aes(NONWHITE_PCT, AVG_SPEND_PER_STUDENT, frame = as.integer(YEAR))) +
  geom_point(alpha = 0.5, aes(size = `OVERALL AVERAGE CLASS SIZE - SCHOOL`,
           colour = predominant_race, group=`SCHOOL ID (R-C-D-T-S)`)) +
  scale_size(range = c(2, 12)) +
  scale_color_dt("diverging", reverse = TRUE, na.value=get_dt_cols("palegray")) +
  enter_fade() +
  exit_fade() +
  theme_modest() +
  theme(
        axis.text.y = element_text(margin=unit(c(0,0,0,2),"lines")),
        axis.text.x = element_text(margin=unit(c(2,0,0,0),"lines")),
        panel.grid.minor.y = element_blank(),
        axis.text = element_text(size=rel(0.75)),
        axis.title.x = element_text(size=rel(1.25), family = "Ledger"),
        plot.subtitle = element_text(size=rel(0.9)),
        legend.text = element_text(size=rel(0.75)),
        legend.direction = "horizontal",
        legend.key.size = unit(45, "pt"),
        legend.position = "bottom",
        # plot.title = element_text(size=rel(1.5), hjust = .5),
        plot.title = element_text(hjust = .5),
        plot.caption = element_text(size=rel(0.75)),
        axis.title.y = element_text(size=rel(1.25), angle = 90, family = "Ledger")
  ) +
  # add animation
  labs(subtitle = 'Average Spend per student by Class Size, School Predominant Rance in Year: {frame_time}', x = 'Non-White Student Percentage', y = 'Average Spend per Student',
       title = "Spread of Spend per Student Increases for Predominantly Non-White Schools\nafter 2018 School Funding Formula Changes",
       caption="Data Source: Illinois State Board of Education", color="Predominant Race", size="Average Class Size") +
  transition_components(as.integer(YEAR), enter_length = as.integer(2), exit_length=as.integer(2),range=c(as.integer(2012), as.integer(2019))) +
  # gganimate::transition_time(time=as.integer(YEAR)) +
  exit_fade() +
  enter_fade() +
  ease_aes('sine-in-out') +
  guides(colour = guide_legend(title.position="top", title.hjust = 0.5, legend.box = "vertical", 
                               override.aes = list(alpha = .75)),
         size = guide_legend(title.position="top", title.hjust = 0.5, legend.box = "vertical"))

```


## Exploring Voter Activation in Deinvested Wards

In light of fairly clear de-investment in certain parts of Chicago, I hypothesized that voters in affected wards would be encouraged to vote in aldermen who would help improve their interests. I was interested to see that the Far Southwest Side, arguably the area most impacted by de-investment, had the highest voter turnout in both of the two most recent elections. 

As it turns out, the highest voter activation in this area occurred in Ward 19, the Beverly and Mt. Greenwood area, of which over 70% of the population is white, in comparison to the rest of the area, which has a white population of less than 1%, and had turnout of 35-40%. It is perhaps interesting that in a community that is largely black, another population group has the loudest voices in terms of voter input. 

```{r get-voter-stats, include=FALSE, eval=FALSE}
as.data.frame(demos_2015 %>% filter(as.integer(id)==2015, SIDE=="Far Southwest Side") %>% 
                group_by(SIDE, ward) %>%  
  summarise(avg_white = mean(white_pct, na.rm=TRUE),
            med_white = median(white_pct)) 
)

as.data.frame(demos_2015 %>% filter(as.integer(id)==2015, SIDE=="Far Southwest Side", ward!="19") %>% 
                group_by(SIDE) %>%  
  summarise(avg_white = mean(white_pct, na.rm=TRUE),
            med_white = median(white_pct)) 
)

turnoutDiff %>% filter(SIDE == "Far Southwest Side")

```

```{r message=FALSE, warning=FALSE, include=FALSE}
# read in 2015 turnout data and format columns for numeric manipulations, add column for year

# voter_turnout15 <- read_csv(here::here("data", "voter_turnout_city_council_2015.csv"), col_names = FALSE, skip=8)
# voter_turnout15 <- voter_turnout15 %>% mutate(WARD = ifelse(grepl("WARD", voter_turnout15$X1), as.numeric(gsub("WARD ", "", X1)), NA),
#                                               X4 = as.numeric(gsub("%", "", X4)),
#                                               YEAR = 2015)
# voter_turnout15 <- voter_turnout15 %>% fill(WARD)


# double-check columns that look empty
# voter_turnout15 %>% dplyr::select(X5, X6) %>% distinct()


# filter out subtitles and intermediate headings

# voter_turnout15 <- voter_turnout15 %>% dplyr::select(-c(X5, X6))
# names(voter_turnout15) <- sapply(c("Precinct","Registered","Ballots","Turnout", "WARD", "YEAR"), toupper)
# voter_turnout15 <- voter_turnout15 %>% filter(PRECINCT !="Precinct", !grepl("WARD", PRECINCT), !is.na(PRECINCT))


# read in 2011 turnout data and format columns for numeric manipulations, add column for year

# voter_turnout11 <- read_csv(here::here("data", "voter_turnout_city_council_2011.csv"), col_names = FALSE, skip=8)
# voter_turnout11 <- voter_turnout11 %>% mutate(WARD = ifelse(grepl("WARD", voter_turnout11$X1), as.numeric(gsub("WARD ", "", X1)), NA),
#                                               X4 = as.numeric(gsub("%", "", X4)),
#                                               YEAR=2011)
# voter_turnout11 <- voter_turnout11 %>% fill(WARD)


# double-check columns that look empty
# voter_turnout11 %>% dplyr::select(X5, X6) %>% distinct()


# filter out subtitles and intermediate headings

# voter_turnout11 <- voter_turnout11 %>% dplyr::select(-c(X5, X6))
# names(voter_turnout11) <- sapply(c("Precinct","Registered","Ballots","Turnout", "WARD", "YEAR"), toupper)
# voter_turnout11 <- voter_turnout11 %>% filter(PRECINCT !="Precinct", !grepl("WARD", PRECINCT), !is.na(PRECINCT))


# combine 2011 and 2015 data into one dataset

# voterTurnout <- bind_rows(voter_turnout11, voter_turnout15)
# voterTurnout <- voterTurnout %>% 
#   mutate(TURNOUT = round((as.numeric(gsub(",", "", BALLOTS)) / as.numeric(gsub(",", "", REGISTERED))) * 100,1))
# rm(voter_turnout11, voter_turnout15)


# consolidate turnout and change in turnout by Chicago area

# turnoutSides <- merge(voterTurnout, wardSides, by="WARD") %>% filter(PRECINCT=="Total") %>% 
#   group_by(YEAR, SIDE) %>% summarise(MEAN_TURNOUT = round(mean(TURNOUT, na.rm = TRUE), 1)) %>% 
#   spread(YEAR, MEAN_TURNOUT) %>% mutate(DIFFERENCE = `2015` - `2011`) %>% 
#   gather("YEAR", "TURNOUT", -SIDE, -DIFFERENCE) %>% 
#   mutate(DIFFERENCE = ifelse(YEAR==2011, NA, DIFFERENCE)) %>% 
#   dplyr::select(YEAR, everything()) %>% 
#   arrange(SIDE, YEAR)
# 
# saveRDS(turnoutSides, file = here::here("data", "sides_turnout.Rda"))                                  
 

# consolidate turnout and change in turnout by Chicago ward

# turnoutDiff <- voterTurnout %>%filter(PRECINCT=="Total", YEAR %in% c(2011, 2015)) %>% dplyr::select(WARD, TURNOUT, YEAR) %>% 
#   group_by(YEAR, WARD) %>%
#   spread(YEAR, TURNOUT) %>% mutate(DIFFERENCE = `2015` - `2011`) %>% 
#   gather("YEAR", "TURNOUT", -WARD, -DIFFERENCE) %>% 
#   mutate(DIFFERENCE = ifelse(YEAR==2011, NA, DIFFERENCE)) %>% 
#   dplyr::select(YEAR, WARD, TURNOUT, DIFFERENCE) %>% 
#   arrange(YEAR, WARD) %>% group_by(WARD) %>% merge(., wardSides, by="WARD")

# saveRDS(turnoutDiff, file = here::here("data", "wards_turnout.Rda"))
``` 


```{r slopegraph, fig.height=12, fig.width=15}
# read in turnout and change in turnout by Chicago ward and Chciago Side
  turnoutSides <-  readRDS(here::here("data", "sides_turnout.Rda"))
  turnoutDiff <-  readRDS(here::here("data", "wards_turnout.Rda"))
  
  # turnoutDiff %>% group_by(SIDE, WARD) %>% summarise(z = max(TURNOUT)) %>% filter(SIDE == "Far Southwest")
  
  # plot average turnout in 2011 and 2015 for each Chicago Side
  ggplot(turnoutSides, aes(x=as.factor(YEAR), y=TURNOUT, group=as.factor(SIDE))) + 
  geom_line(aes(colour=SIDE), size=1.5, show.legend = FALSE) +
  # geom_line(data = filter(turnoutSides, (DIFFERENCE > -0)||(is.na(DIFFERENCE))), aes(colour=SIDE), size=1.5, show.legend=FALSE) +
  scale_color_dt() + 
    
  # add lines for distribution within each Side at Ward level
  geom_line(data = turnoutDiff, aes(group=as.factor(WARD), color=SIDE), size=0.5, alpha=0.35, show.legend = FALSE) +
  
  # label each Side's values in line graphs for both 2011 and 2015
  geom_label_repel(data = turnoutSides %>% filter(YEAR == 2011), 
            aes(label = paste0(SIDE, " - ", TURNOUT, "%"), color = SIDE),
            fill=NA,
            hjust = "left",
            nudge_x = -.15,
            force=6,
            direction = "y",
            xlim= c(-0.5,.965),
            fontface = "bold",
            label.size = 0,
            point.padding	= 0.75,
            size = rel(5),
            show.legend = FALSE,
            family="Ledger") +
  geom_label_repel(data = turnoutSides %>% filter(YEAR == 2015),
            aes(label = paste0(SIDE, " - ", TURNOUT, "%"), color = SIDE),
            fill=NA,
            hjust = "right",
            nudge_x = .25,
            force = 5,
            direction = "y",
            fontface = "bold",
            point.padding	= 0.75,
            xlim=c(2.05,3),
            label.size = 0,
            size = rel(5),
            show.legend = FALSE,
            family="Ledger") +
  # move x-axis text to top of graph
  scale_x_discrete(position = "top") +
   # coord_cartesian(ylim=c(23.5, 60)) +
  theme_modest() + 
  theme(axis.text.x.top = element_text(size=rel(1.25), vjust = -8, face="bold", family="Ledger"),
          axis.title.y = element_blank(),
          axis.title.x = element_blank(),
          axis.text.y = element_blank(),
          panel.grid.minor=element_blank(),
          panel.grid.major=element_blank()
          # plot.title = element_text(size=rel(1.5)),
          # plot.subtitle = element_text(hjust=0.35)
        ) +
       
  labs(title="Amid City-Wide Turnout Dropoff, Significantly Higher Voter Activation in Far Southwest Side", 
         subtitle="Voters in the Far Southwest, which has faced de-investment as determined by a number of factors, far exceeded other areas'\nparticipation in the last two City Council elections.", 
         caption="Source: Chicago Board of Election Commissioners", color="Chicago Area") +
  annotate("text",  x = 1, y = 25, label = "*Underlying ward trends visualized\nbehind Side trendlines", size=rel(4)) + 
    annotate("text", x = 0.7, y = 70, label="Ward 19 on the Far Southwest\nSide, which contains the Beverly\nand Morgan Park neighborhoods,\nhad voter turnout of nearly 75%\n in the 2011 City Council Election.", size=rel(4), hjust=0, color=get_dt_cols("mauve"))
```

The disparities explored above give all Chicagoans good reason to utilize their right to choose leaders who will work to build a more equitable Chicago.

### Chicago City Council Election Day is Tuesday, February 26, 2019!

Find your ward's [early voting site](https://chicagoelections.com/en/early-voting.html) to vote before Monday, February 25, 2019.

Not registered for the upcoming Chicago Elections? [Register here](https://www.uchivotes.com/voting-in-chicago), or [double-check your registration status](https://chicagoelections.com/en/your-voter-information.html).

Want to learn about the candidates? Visit the City Council election page on [Ballotpedia](https://ballotpedia.org/City_council_elections_in_Chicago,_Illinois_(2019)).


### Resources:

Illinois State Board of Education, [Report Card Data Library](https://www.isbe.net/Pages/Illinois-State-Report-Card-Data.aspx) 2013-2019

Chicago Public Schools, [Find Your School Budget](https://biportal.cps.edu/analytics/saw.dll?Dashboard) 2013-2019

Chicago Public Schools, [School Demographic Data](https://cps.edu/SchoolData/Pages/SchoolData.aspx) 2013-2019

Chicago Department of Business Affairs and Consumer Protection, [Business Licenses Dataset](https://data.cityofchicago.org/Community-Economic-Development/Business-Licenses/r5kz-chrr) 2012-2018

U.S. Census Bureau, ACS 5-Year Tables 2012-2017