WFH_Models.qmd

---
title: "Models for Paper"
subtitle: "Work From Home: Who Can and Who Does?"
format:
  html:
    df-print: paged
    toc: true
---


```{r setup, warning=FALSE, message=FALSE}

library(scales)
library(reldist)
library(pollster)
library(labelled)
library(weights)
library(tigris)
library(ipumsr)
library(srvyr)
library(survey)
library(tidyverse)
library(naniar)
library(gmodels)
library(gtsummary)
library(quarto)
library(huxtable) # for summ() and regression output formatting
library(jtools)
library(modelsummary)
library(car)

knitr::opts_chunk$set(warning=FALSE, message=FALSE)

load("./data/WFH.RData")
# hist(joined$INCEARN)


joined$race_cat <- factor(joined$race_cat, levels = c("White", "Asian", "Black", "Other") )
joined$CanWorkFromHome <- factor(joined$CanWorkFromHome, levels = c("No WFH", "Some WFH", "Can WFH") )
table(joined$CanWorkFromHome)
joined <- joined %>% mutate(tenKdollars = INCEARN / 10000)
joined$CIHISPEED <- factor(joined$CIHISPEED, labels = c("10" = "Has Access", "20" = "Lacks Access") )

# hist(joined$INCEARN)
```

```{r include=FALSE}
Alea_theme <- function(){ 
    theme_classic() %+replace%    #replace elements we want to change
    
    theme(
      
      #grid elements
      panel.grid.major = element_blank(),    #strip major gridlines
      panel.grid.minor = element_blank(),    #strip minor gridlines
      axis.ticks = element_blank(),          #strip axis ticks
      axis.text.x = element_blank(),
     axis.text.y = element_blank(),

      #since theme_minimal() already strips axis lines, 
      #we don't need to do that again
      
      #text elements
      plot.title = element_text(             #title
                   size = 14,                #set font size
                   face = 'bold',            #bold typeface
                   hjust = 0,                #left align
                   vjust = 2),               #raise slightly
      
      plot.subtitle = element_text(          #subtitle
                   size = 14),               #font size
      
      plot.caption = element_text(           #caption
                   size = 9,                 #font size
                   hjust = 1),               #right align
      
      axis.title = element_text(             #axis titles
                   size = 10),               #font size
      
      axis.text = element_text(              #axis text
                   size = 9)                #font size
    
      
      #since the legend often requires manual tweaking 
      #based on plot content, don't define it here
    )
}
```


# Regression Models


Regression for 2019 using survey object `dstrata2019` & regression for 2021 using survey object `dstrata2021`.


## Subset: Gender & Household Dynamics

Model supporting statements made in "Gender and Household Dynamics" section of Working From Home in Illinois paper.

### Logit Model

Model created with a subset of survey data. Only uses workers who had Management type occupations, Could feasibly WFH, and were under the ages of 45.

```{r}
joined$occ_2dig_labels <- as.factor(joined$occ_2dig_labels)
joined$CanWorkFromHome <- as.factor(joined$CanWorkFromHome)

#gmodels::CrossTable(joined$CanWorkFromHome, joined$occ_2dig_labels, chisq = TRUE, prop.r = FALSE, prop.c = FALSE, prop.t = FALSE, prop.chisq = FALSE )

dstrata <- survey::svydesign(id = ~CLUSTER, strata = ~STRATA, weights = ~PERWT, data = joined) %>% 
  as_survey() %>%
  mutate(decile = ntile(INCEARN, 10))


# 2019 data turned into survey item
dstrata2019 <- joined %>% filter(YEAR==2019) 
dstrata2019 <- survey::svydesign(id = ~CLUSTER, strata = ~STRATA, 
                                 weights = ~PERWT, data = dstrata2019) %>% 
  as_survey() %>% 
  mutate(decile = ntile(INCEARN, 10))


dstrata2021 <- joined %>% filter(YEAR==2021) 

dstrata2021 <- survey::svydesign(id = ~CLUSTER, strata = ~STRATA, weights = ~PERWT, data = dstrata2021) %>% as_survey() %>%
  mutate(decile = ntile(INCEARN, 10))
```


```{r eval=FALSE, include = FALSE}

## both years in one model, does NOT have interaction for years. 
## not wanted but wasn't ready to delete yet.
both <- svyglm(did_wfh~ INCEARN + race_cat + sex_cat + age_cat + factor(CIHISPEED) + county_pop_type + NCHILD + NCHLT5  + factor(YEAR), 
               subset = AGE < 45 & CanWorkFromHome == "Can WFH" & occ_2dig_labels == "Management, Business, Science, Arts",
        family = quasibinomial(), design = dstrata)

# summary(both)


OR.CI_Both <- cbind("AOR" = exp(   coef(both)),
                       exp(confint(both,
                           df.resid=degf(both$survey.design))))[-1,]

t1 <- both %>%
  tbl_regression(intercept = T,
                 # Use style_number to round to 2 digits (e.g., 1.27)
                 estimate_fun = function(x) style_number(x, digits = 2),
                 # Use style_sigfig to keep 2 significant digits (e.g., 1.3)
                 # estimate_fun = function(x) style_sigfig(x, digits = 2),
                  label  = list('(Intercept)'  ~ "Intercept")
 ) %>%
  modify_column_hide(p.value) %>%
  modify_caption("Both Survey Years Together - Logit Model")

t2 <- both %>%
  tbl_regression(intercept = F,     # No intercept in OR table
                 exponentiate = T,  # OR = exp(B)
                 # Use style_number to round to 2 digits (e.g., 1.27)
                 estimate_fun = function(x) style_number(x, digits = 2),
                 # Use style_sigfig to keep 2 significant digits (e.g., 1.3)
                 pvalue_fun   = function(x) style_pvalue(x, digits = 3),
                 # label  = list(
                 #               AGE ~ "Age (years)",
                 #              )
                 ) %>%
  add_global_p(keep = T, test.statistic = "F")

tbl_merge(
  tbls = list(t1, t2),
  tab_spanner = c("**Regression Coefficients**", "**Adjusted Odds Ratio**")
)

```


A note on the p-value: the p-value is a test of significance for the null hypothesis H0 that

-   there is no difference in the log-odds of the outcome between the reference group (captured by the intercept) and the explanatory variable (or one of its categories), or that the difference between the two groups equals zero: H0:b1=0 and Ha:b1≠0

If p\<0.5, we reject H0 as we have evidence to suggest that the difference between the two groups does not equal zero.

Log-odds are not the most intuitive to interpret. Instead of discussing the change in the log-odds, we can calculate the odds ratio for a given variable by exponentiating the coefficient. 

__Odds ratio is read "have x times the odds of the outcome of interest compared to those in the reference group".__

Reference group of the outcome variable: by default, R creates uses the lowest coded group as the reference. The reference category can be changed by using the 'relevel()'.

Relationship between Odds and Probabilities:

Odds=P/(1-P)   

P=odds/(1+odds) 

Odds=exp(log-odds) 

P=exp(log-odds)/(1+exp(log-odds))   


```{r eval=FALSE, include=FALSE}
#| column: page

#### Both Years in Same Model (with Year interactions)

both <- svyglm(did_wfh~ sex_cat*factor(YEAR)+ age_cat*factor(YEAR) + NCHILD*factor(YEAR) + NCHLT5*factor(YEAR)+factor(CIHISPEED)*factor(YEAR) + county_pop_type*factor(YEAR) +race_cat*factor(YEAR), 
               subset = AGE < 45 & CanWorkFromHome == "Can WFH" & occ_2dig_labels == "Management, Business, Science, Arts",
        family = quasibinomial(), design = dstrata)

OR.CI_Both <- cbind("AOR" = exp(   coef(both)),
                       exp(confint(both,
                           df.resid=degf(both$survey.design))))[-1,]


t1 <- both %>%
  tbl_regression(intercept = T,
                 # Use style_number to round to 2 digits (e.g., 1.27)
                 estimate_fun = function(x) style_number(x, digits = 2),
                 # Use style_sigfig to keep 2 significant digits (e.g., 1.3)
                 # estimate_fun = function(x) style_sigfig(x, digits = 2)
                 ) %>%
  modify_column_hide(p.value) %>%
  add_significance_stars()%>%
  modify_caption("Both years together with Interactions - Logit Model")

t2 <- both %>%
  tbl_regression(intercept = F,     # No intercept in OR table
                 exponentiate = T,  # OR = exp(B)
                 # Use style_number to round to 2 digits (e.g., 1.27)
                 estimate_fun = function(x) style_number(x, digits = 2),
                 # Use style_sigfig to keep 2 significant digits (e.g., 1.3)
                 pvalue_fun   = function(x) style_pvalue(x, digits = 3)
                 ) %>%
    add_significance_stars(hide_p = FALSE)%>%

  add_global_p(keep = T, test.statistic = "F")

tbl_merge(
  tbls = list(t1, t2),
  tab_spanner = c("**Regression Coefficients**", "**Adjusted Odds Ratio**")
)

# # Same output, different table format: # # 

# modelsummary(both, exponentiate = TRUE, # does standard and exponentiated models together 
#              statistic = c("conf.int",
#                            "s.e. = {std.error}", 
#                            "p = {p.value}"),
#              stars = TRUE, shape = term ~  statistic,
#              notes = list('Subset of ACS Survey Data for 2019 and 2021',
#                           'Odds Ratios Shown in Table'),
#              title = 'Predictions for WFH in 2019 vs 2021')
```

#### Separate models for each year

> Be careful when subsetting survey data. Messes up standard errors if filter() is used to select the subset instead using the subset = "" option for survey objects.

```{r}
#| column: page

m <- list(
  "2019 Logged Odds" = svyglm(did_wfh~ sex_cat + age_cat  + NCHILD + NCHLT5 + CIHISPEED + county_pop_type + race_cat,
                    subset = AGE < 45 & CanWorkFromHome == "Can WFH" & occ_2dig_labels == "Management, Business, Science, Arts",
                    family = quasibinomial(),
                    design = dstrata2019),
  
   "2021 Logged Odds" =  svyglm(did_wfh~ sex_cat+ age_cat  + NCHILD + NCHLT5 + CIHISPEED + county_pop_type +race_cat,
                    subset = AGE < 45 & CanWorkFromHome == "Can WFH" & occ_2dig_labels == "Management, Business, Science, Arts",
                    family = quasibinomial(),
                    design = dstrata2021),
  
   "2019 Odds Ratio" = svyglm(did_wfh~ sex_cat + age_cat  + NCHILD + NCHLT5 + CIHISPEED + county_pop_type + race_cat,
                    subset = AGE < 45 & CanWorkFromHome == "Can WFH" & occ_2dig_labels == "Management, Business, Science, Arts",
                    family = quasibinomial(),
                    design = dstrata2019),
  
   "2021 Odds Ratio" =   svyglm(did_wfh~ sex_cat+ age_cat  + NCHILD + NCHLT5 + CIHISPEED + county_pop_type +race_cat,
                    subset = AGE < 45 & CanWorkFromHome == "Can WFH" & occ_2dig_labels == "Management, Business, Science, Arts",
                    family = quasibinomial(),
                    design = dstrata2021)
) 

# 
# modelsummary(m, exponentiate = TRUE, # does standard and exponentiated models together
#              statistic = c("conf.int",
#                            "s.e. = {std.error}",
#                            "p = {p.value}"),
#              stars = TRUE, shape = term ~ model + statistic,
#              notes = list('Subset of ACS Survey Data for 2019 and 2021',
#                           'Odds Ratios Shown in Table'),
#              title = 'Predictions for WFH in 2019 vs 2021',
#              output = "table.docx")

modelsummary(m, 
             exponentiate = c(FALSE,FALSE,TRUE,TRUE), # does standard and exponentiated models together 
             statistic = c("s.e. = {std.error}", 
                           "p = {p.value}"),
             stars = TRUE, shape = term ~ model + statistic,
             notes = list('Subset of ACS Survey Data for 2019 and 2021',
                          'Odds Ratios Shown in Table'),
             title = 'Predictions for WFH in 2019 vs 2021')

```


```{r eval=FALSE, include = FALSE}
modelplot(m, facet=TRUE)
```


```{r eval=FALSE, include = FALSE}

## same models as above, but different formatting for table output
m4_2019 <- svyglm(did_wfh~ sex_cat+ age_cat  + NCHILD + NCHLT5 + CIHISPEED + county_pop_type +incdecile_w +race_cat,
                    subset = AGE < 45 & CanWorkFromHome == "Can WFH" & occ_2dig_labels == "Management, Business, Science, Arts",
                    family = quasibinomial(),
                    design = dstrata2019)
  
m4_2021 <- svyglm(did_wfh~ sex_cat+ age_cat  + NCHILD + NCHLT5 + CIHISPEED + county_pop_type+incdecile_w +race_cat,
                    subset = AGE < 45 & CanWorkFromHome == "Can WFH" & occ_2dig_labels == "Management, Business, Science, Arts",
                    family = quasibinomial(),
                    design = dstrata2021)


export_summs(m4_2019, m4_2021, model.names = c("Subset 2019 Logit", " Subset 2021 Logit"))


```


### OLS Model


```{r}
#| column: page
m <- list(
  "2019 OLS" = svyglm(did_wfh~ sex_cat + age_cat + NCHLT5 + CIHISPEED + county_pop_type +race_cat,
                    subset = AGE < 45 & CanWorkFromHome == "Can WFH" & occ_2dig_labels == "Management, Business, Science, Arts",
                    design = dstrata2019),
  
  "2021 OLS" =  svyglm(did_wfh ~ sex_cat+ age_cat + NCHLT5 + CIHISPEED + county_pop_type +race_cat,
                    subset = AGE < 45 & CanWorkFromHome == "Can WFH" & occ_2dig_labels == "Management, Business, Science, Arts",
                    design = dstrata2021)
) 

# 
# modelsummary(m, exponentiate = TRUE, # does standard and exponentiated models together
#              statistic = c("conf.int",
#                            "s.e. = {std.error}",
#                            "p = {p.value}"),
#              stars = TRUE, shape = term ~ model + statistic,
#              notes = list('Subset of ACS Survey Data for 2019 and 2021',
#                           'Odds Ratios Shown in Table'),
#              title = 'Predictions for WFH in 2019 vs 2021',
#              output = "table.docx")

modelsummary(m,# exponentiate = TRUE, # does standard and exponentiated models together 
             statistic = c(
                           "s.e. = {std.error}", 
                           "p = {p.value}"),
             stars = TRUE, shape = term ~ model + statistic,
             notes = list('Subset of ACS Survey Data for 2019 and 2021'),
             title = 'OLS - Separate Models for WFH in 2019 vs 2021')


```


```{r include=FALSE, eval=FALSE}
#| echo: false
modelplot(m, facet=TRUE)
```


```{r}
#|column: page
m4_2019 <- svyglm(did_wfh ~ sex_cat+ age_cat + NCHLT5 + CIHISPEED + county_pop_type+ tenKdollars +race_cat,
                    subset = AGE < 45 & CanWorkFromHome == "Can WFH" & occ_2dig_labels == "Management, Business, Science, Arts",
                    design = dstrata2019)
  
m4_2021 <- svyglm(did_wfh ~ sex_cat+ age_cat + NCHLT5 + CIHISPEED + county_pop_type+tenKdollars +race_cat,
                    subset = AGE < 45 & CanWorkFromHome == "Can WFH" & occ_2dig_labels == "Management, Business, Science, Arts",
                    design = dstrata2021)


# export_summs(m4_2019, m4_2021, model.names = c("Subset 2019 OLS", " Subset 2021 OLS"))


t1 <- m4_2019 %>%
  tbl_regression(intercept = T,
                 # Use style_number to round to 2 digits (e.g., 1.27)
                 estimate_fun = function(x) style_number(x, digits = 2),
                 pvalue_fun   = function(x) style_pvalue(x, digits = 3)

                 ) %>%
    add_significance_stars(hide_ci = TRUE)%>%

  add_global_p(keep = T, test.statistic = "F")

t2 <- m4_2021 %>%
  tbl_regression(intercept = T,     # No intercept in OR table
                 # Use style_number to round to 2 digits (e.g., 1.27)
                 estimate_fun = function(x) style_number(x, digits = 2),
                 # Use style_sigfig to keep 2 significant digits (e.g., 1.3)
                 pvalue_fun   = function(x) style_pvalue(x, digits = 3)
                 ) %>%
    add_significance_stars(hide_ci = TRUE)%>%

  add_global_p(keep = T, test.statistic = "F")


tbl_merge(
  tbls = list(t1, t2),
  tab_spanner = c("**OLS 2019 Subset**", "**OLS 2021 Subset**")
)
```


## Full Sample - Models with all Workers 

### OLS

```{r}
#| column: page

both <- svyglm(did_wfh~ CanWorkFromHome+county_pop_type+ NCHILD + NCHLT5 +
                 tenKdollars +race_cat+ sex_cat + age_cat + CIHISPEED + 
                 occ_2dig_labels + factor(YEAR), design = dstrata)


ols2019 <- svyglm(did_wfh~ CanWorkFromHome+county_pop_type+ NCHILD + NCHLT5 +
                    tenKdollars +race_cat+ sex_cat + age_cat + CIHISPEED + 
                    occ_2dig_labels, design = dstrata2019)


ols2021 <- svyglm(did_wfh~ CanWorkFromHome+county_pop_type+ NCHILD + NCHLT5 +
                    tenKdollars +race_cat+ sex_cat + age_cat + CIHISPEED + 
                    occ_2dig_labels, design = dstrata2021)

#summary(ols2021)

# export_summs(both, ols2021, model.names = c("Both Years OLS", " Only 2021 OLS"))

t1 <- ols2019 %>%
  tbl_regression(intercept = T,
                 # Use style_number to round to 2 digits (e.g., 1.27)
                 estimate_fun = function(x) style_number(x, digits = 2),
                 pvalue_fun   = function(x) style_pvalue(x, digits = 3)

                 ) %>%
  add_significance_stars(hide_ci = TRUE) %>%
  add_global_p(keep = T, test.statistic = "F")

t2 <- ols2021 %>%
  tbl_regression(intercept = T,     # No intercept in OR table
                 # Use style_number to round to 2 digits (e.g., 1.27)
                 estimate_fun = function(x) style_number(x, digits = 2),
                 # Use style_sigfig to keep 2 significant digits (e.g., 1.3)
                 pvalue_fun   = function(x) style_pvalue(x, digits = 3)
                 ) %>%
  add_significance_stars(hide_ci = TRUE)%>%
  add_global_p(keep = T, test.statistic = "F")


tbl_merge(
  tbls = list(t1, t2),
  tab_spanner = c("**OLS 2019 Full Model**", "**OLS 2021 Full Model**")
)
```

### Logit

```{r fullmodel-logit, eval=FALSE, include=FALSE}


both <- svyglm(did_wfh ~ CanWorkFromHome + occ_2dig_labels + factor(YEAR) + CIHISPEED+ 
                 county_pop_type+ NCHILD + NCHLT5 + tenKdollars +race_cat+ sex_cat 
               + age_cat, 
        family = quasibinomial(), design = dstrata)


OR.CI_Both <- cbind("AOR" = exp(   coef(both)),
                       exp(confint(both,
                           df.resid=degf(both$survey.design))))[-1,]
#round(OR.CI_2019, 3)

t1 <- both %>%
  tbl_regression(intercept = T,
                 # Use style_number to round to 2 digits (e.g., 1.27)
                 estimate_fun = function(x) style_number(x, digits = 2)) %>%
  modify_column_hide(p.value) %>%
    add_significance_stars(hide_ci = TRUE)%>%

  modify_caption("Logit - 2019 & 2021 ACS Data Together")

t2 <- both %>%
  tbl_regression(intercept = F,     # No intercept in OR table
                 exponentiate = T,  # OR = exp(B)
                 # Use style_number to round to 2 digits (e.g., 1.27)
                 estimate_fun = function(x) style_number(x, digits = 2),
                 # Use style_sigfig to keep 2 significant digits (e.g., 1.3)
                 pvalue_fun   = function(x) style_pvalue(x, digits = 3)) %>%
    add_significance_stars(hide_ci = TRUE)%>%

  add_global_p(keep = T, test.statistic = "F")

tbl_merge(
  tbls = list(t1, t2),
  tab_spanner = c("**Logit Coefficients**", "** Odds Ratio**")
)

```


### OLS - Drop WFH Feasibility

```{r}
#| column: page


ols2019 <- svyglm(did_wfh~ occ_2dig_labels+county_pop_type + NCHLT5 + spouse +
                    tenKdollars +race_cat+ sex_cat + age_cat + CIHISPEED, design = dstrata2019)


ols2021 <- svyglm(did_wfh~ occ_2dig_labels+county_pop_type + NCHLT5 + spouse +
                    tenKdollars +race_cat+ sex_cat + age_cat + CIHISPEED, design = dstrata2021)

#summary(ols2021)

# export_summs(both, ols2021, model.names = c("Both Years OLS", " Only 2021 OLS"))

t1 <- ols2019 %>%
  tbl_regression(intercept = T,
                 # Use style_number to round to 2 digits (e.g., 1.27)
                 estimate_fun = function(x) style_number(x, digits = 2),
                 pvalue_fun   = function(x) style_pvalue(x, digits = 3)

                 ) %>%
  add_significance_stars(hide_ci = TRUE) %>%
  add_global_p(keep = T, test.statistic = "F")

t2 <- ols2021 %>%
  tbl_regression(intercept = T,     # No intercept in OR table
                 # Use style_number to round to 2 digits (e.g., 1.27)
                 estimate_fun = function(x) style_number(x, digits = 2),
                 # Use style_sigfig to keep 2 significant digits (e.g., 1.3)
                 pvalue_fun   = function(x) style_pvalue(x, digits = 3)
                 ) %>%
  add_significance_stars(hide_ci = TRUE)%>%
  add_global_p(keep = T, test.statistic = "F")


tbl_merge(
  tbls = list(t1, t2),
  tab_spanner = c("**OLS 2019 Full Model**", "**OLS 2021 Full Model**")
)
```


### OLS - Drop Occupations

```{r}
#| column: page

ols2019 <- svyglm(did_wfh ~ CanWorkFromHome + county_pop_type+ NCHILD + NCHLT5 +
                    tenKdollars + race_cat + sex_cat + age_cat + CIHISPEED, design = dstrata2019)


ols2021 <- svyglm(did_wfh ~ CanWorkFromHome + county_pop_type + NCHILD + NCHLT5 +
                    tenKdollars + race_cat + sex_cat + age_cat + CIHISPEED, design = dstrata2021)

#summary(ols2021)

# export_summs(both, ols2021, model.names = c("Both Years OLS", " Only 2021 OLS"))

t1 <- ols2019 %>%
  tbl_regression(intercept = T,
                 # Use style_number to round to 2 digits (e.g., 1.27)
                 estimate_fun = function(x) style_number(x, digits = 2),
                 pvalue_fun   = function(x) style_pvalue(x, digits = 3)

                 ) %>%
  add_significance_stars(hide_ci = TRUE) %>%
  add_global_p(keep = T, test.statistic = "F")

t2 <- ols2021 %>%
  tbl_regression(intercept = T,     # No intercept in OR table
                 # Use style_number to round to 2 digits (e.g., 1.27)
                 estimate_fun = function(x) style_number(x, digits = 2),
                 # Use style_sigfig to keep 2 significant digits (e.g., 1.3)
                 pvalue_fun   = function(x) style_pvalue(x, digits = 3)
                 ) %>%
  add_significance_stars(hide_ci = TRUE)%>%
  add_global_p(keep = T, test.statistic = "F")


tbl_merge(
  tbls = list(t1, t2),
  tab_spanner = c("**OLS 2019 Full Model**", "**OLS 2021 Full Model**")
)
```


## Additional Models


```{r}
#| code-fold: true


#svyglm(did_wfh~ INCEARN + race_cat+ SEX+ CIHISPEED+CINETHH + occ_2dig_labels+CanWorkFromHome+county_pop_type, design = dstrata2021) %>% summary()


### All variations of family = "" had same results 
# m1_2019 <- svyglm(did_wfh~ INCEARN +race_cat+ SEX+ AGE + CIHISPEED*CINETHH + CanWorkFromHome+county_pop_type+NCHILD+MARST + NCHLT5, 
#       family = "binomial", design = dstrata2019)
# 
# m1_2021<- svyglm(did_wfh~ INCEARN +race_cat+ SEX+ AGE + CIHISPEED*CINETHH + CanWorkFromHome+county_pop_type+NCHILD+MARST + NCHLT5, 
#       family = "binomial", design = dstrata2021)
#   
# export_summs(m1_2019, m1_2021, 
#              model.names = c("2019 Logit", "2021 Logit"), 
#             robust = "HC3", statistics = c(N = "nobs", R2 = "r.squared", adjR2 = "adj.r.squared"))
# 
# 
# m1_2019 <- svyglm(did_wfh~ INCEARN +race_cat+ SEX+ AGE + CIHISPEED*CINETHH + CanWorkFromHome+county_pop_type+NCHILD+MARST + NCHLT5, 
#        family = quasibinomial(), design = dstrata2019)
# 
# m1_2021<- svyglm(did_wfh~ INCEARN +race_cat+ SEX+ AGE + CIHISPEED*CINETHH + CanWorkFromHome+county_pop_type+NCHILD+MARST + NCHLT5, 
#     family=quasibinomial(),   design = dstrata2021)
#   
# 
# export_summs(m1_2019, m1_2021, 
#              model.names = c("2019 QuasiBi", "2021 QuasiBi"), 
#             robust = "HC3", statistics = c(N = "nobs", R2 = "r.squared", adjR2 = "adj.r.squared"))


m1_2019 <- svyglm(did_wfh ~ CanWorkFromHome + county_pop_type + NCHILD + NCHLT5  + CIHISPEED + INCEARN + race_cat + sex_cat + age_cat, 
        family = quasibinomial(),design = dstrata2019)

m1_2021<- svyglm(did_wfh ~ CanWorkFromHome + county_pop_type + NCHILD + NCHLT5  + CIHISPEED + tenKdollars + race_cat + sex_cat+ age_cat, 
      family = quasibinomial(), design = dstrata2021)
  
# export_summs(m1_2019, m1_2021, 
#              model.names = c("2019 Logit", "2021 Logit"), 
#             robust = "HC3", statistics = c(N = "nobs", R2 = "r.squared", adjR2 = "adj.r.squared"))
```


```{r}
#| column: page
#| code-fold: true
OR.CI_2019 <- cbind("AOR" = exp(   coef(m1_2019)),
                       exp(confint(m1_2019,
                           df.resid=degf(m1_2019$survey.design))))[-1,]

OR.CI_2021 <- cbind("AOR" = exp(   coef(m1_2021)),
                       exp(confint(m1_2021,
                           df.resid=degf(m1_2021$survey.design))))[-1,]

library(gtsummary)
library(car)

t1 <- m1_2019 %>%
  tbl_regression(intercept = T,
                 # Use style_number to round to 2 digits (e.g., 1.27)
                 estimate_fun = function(x) style_number(x, digits = 2),
) %>%
  modify_column_hide(p.value) %>%
      add_significance_stars(hide_ci = TRUE)%>%

  modify_caption("2019  vs 2021 ACS Data - Logit Models")

t2 <- m1_2019 %>%
  tbl_regression(intercept = F,     # No intercept in OR table
                 exponentiate = T,  # OR = exp(B)
                 # Use style_number to round to 2 digits (e.g., 1.27)
                 estimate_fun = function(x) style_number(x, digits = 2),
                 # Use style_sigfig to keep 2 significant digits (e.g., 1.3)
                 pvalue_fun   = function(x) style_pvalue(x, digits = 3),
) %>%
      add_significance_stars(hide_ci = TRUE)%>%

  add_global_p(keep = T, test.statistic = "F")

# tbl_merge(
#   tbls = list(t1, t2),
#   tab_spanner = c("**2019 Regression Coefficients**", "** 2019 Adjusted Odds Ratio**")
# )


t3 <- m1_2021 %>%
  tbl_regression(intercept = T,
                 # Use style_number to round to 2 digits (e.g., 1.27)
                 estimate_fun = function(x) style_number(x, digits = 2),
) %>%
      add_significance_stars(hide_ci = TRUE)%>%

  modify_column_hide(p.value) %>%
  modify_caption("2021 ACS")

t4 <- m1_2021 %>%
  tbl_regression(intercept = F,     # No intercept in OR table
                 exponentiate = T,  # OR = exp(B)
                 # Use style_number to round to 2 digits (e.g., 1.27)
                 estimate_fun = function(x) style_number(x, digits = 2),
                 # Use style_sigfig to keep 2 significant digits (e.g., 1.3)
                 pvalue_fun   = function(x) style_pvalue(x, digits = 3) ) %>%
      add_significance_stars(hide_ci = TRUE)%>%

  add_global_p(keep = T, test.statistic = "F")

# tbl_merge(
#   tbls = list(t3, t4),
#   tab_spanner = c("**Regression Coefficients**", "**Adjusted Odds Ratio**")
# )


tbl_merge(
  tbls = list(t1, t2, t3, t4),
  tab_spanner = c("**2019 Logged Odds**", "**2019 Odds Ratio**", "**2021 Logged Odds**", "**2021 Odds Ratio**")
)

```


### Prediction

[Interpreation examples](https://www.bookdown.org/rwnahhas/RMPH/survey-logistic.html)

What is the predicted probability (and 95% CI) that someone worked from home in 2019 and male, and between 25 and 34 years old?

```{r }
# Always include the intercept for prediction.
# Specify a 1 for the intercept, a # for each continuous predictor
# and a 1 for each non-reference level of a categorical variable.
# If a predictor is at its reference level, specify a 0 or exclude it.
#install.packages("faraway")
library(faraway)

ilogit(svycontrast(m1_2019, c("(Intercept)" = 1,
                              "age_cat25to34" =1                             )))
```

```{r include = FALSE, eval=FALSE}
m1_2019 <- svyglm(did_wfh~ INCEARN +race_cat+ sex_cat + age_cat + CanWorkFromHome+county_pop_type+NCHILD, 
       design = dstrata2019)

m2_2019 <- svyglm(did_wfh~ INCEARN +race_cat+ sex_cat + age_cat + factor(CIHISPEED) + CanWorkFromHome+county_pop_type+NCHILD + NCHLT5, 
       design = dstrata2019)

m1_2021<- svyglm(did_wfh~ INCEARN +race_cat+ sex_cat+ age_cat+ CanWorkFromHome+county_pop_type+NCHILD, 
       design = dstrata2021)

m2_2021<- svyglm(did_wfh~ INCEARN +race_cat+ sex_cat+ age_cat + factor(CIHISPEED) + CanWorkFromHome+county_pop_type+NCHILD + NCHLT5, 
       design = dstrata2021)
  

export_summs(m1_2019, m2_2019, m1_2021,  m2_2021,
             model.names = c("M1 Linear 2019", "M2 Linear 2019", "M1 Linear 2021",  "M2 Linear 2021"), 
            robust = "HC3", statistics = c(N = "nobs", R2 = "r.squared", adjR2 = "adj.r.squared"))


```


```{r othermodels,  eval=FALSE, include = FALSE}
m3_2019 <- svyglm(did_wfh~occ_2dig_labels*CanWorkFromHome+race_cat+ sex_cat+ age_cat +CIHISPEED*CINETHH + county_pop_type+NCHILD*NCHLT5, design = dstrata2019)
m3_2021 <- svyglm(did_wfh~occ_2dig_labels*CanWorkFromHome+race_cat+ sex_cat+ age_cat +CIHISPEED*CINETHH + county_pop_type+NCHILD*NCHLT5, design = dstrata2021)# %>% summary()

export_summs(m3_2019, m3_2021,
             model.names = c("2019", "2021"),
            robust = "HC3", statistics = c(N = "nobs", R2 = "r.squared", adjR2 = "adj.r.squared"))


m4_2019 <- svyglm(did_wfh~NCHLT5+ sex_cat +factor(MARST)+factor(CINETHH) + occ_2dig_labels+county_pop_type, design = dstrata2019 )# %>% summary()

m4_2021 <- svyglm(did_wfh~NCHLT5+sex_cat +factor(MARST)+factor(CINETHH) + occ_2dig_labels+county_pop_type, design = dstrata2021)# %>% summary()

export_summs(m4_2019, m4_2021,
             model.names = c("M4 2019", "M4 2021"),
            robust = "HC3", statistics = c(N = "nobs", R2 = "r.squared", adjR2 = "adj.r.squared"))


export_summs(m4_2019, m4_2021, m3_2019, m3_2021, 
            robust = "HC3", statistics = c(N = "nobs", R2 = "r.squared"))

```

> Recode marital status and add back into regression