-
Notifications
You must be signed in to change notification settings - Fork 1
/
WA_Can_HCA_DtaMngmt_Backup.Rmd
375 lines (293 loc) · 12.3 KB
/
WA_Can_HCA_DtaMngmt_Backup.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
---
title: "WA_Can_HCA_DtaMngmt"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
# Set working directory
setwd("C:/wa-cannabis-hca")
# Clear workspace
rm(list = ls())
# Load required libraries
library(survey)
library(epiR)
library(tidyverse)
library(foreign)
library(haven)
library(table1)
library(kableExtra)
library(gtsummary)
library(gt)
library(broom)
```
```{r}
#### Load in the WA-BRFSS Data ####
# WRITING THE TEMP FILE IS ONLY NECCESARY IF YOU DO NOT HAVE THE FILE IN CSV FORMAT
# Read in the data as dta
temp <- read_dta('data/MAS_2011_2020_v2.dta') %>%
subset(year > 2014 & year < 2020) # Years of interest given variable avail. are 2015 - 2019
# Find the proportion of observations in each year
tc <- table(temp$year) %>%
prop.table() # get the proportion of each year's observations
# tc <- table(temp$sex) %>%
# prop.table() # get the proportion of each year's observations
#
# # Creating new Race/Eth. variable
# temp$race[temp$"_mrace1" == 4 & temp$"_hispanc" != 1] <- 1 # Non-hispanic Asian
# temp$race[temp$"_mrace1" == 2 & temp$"_hispanc" != 1] <- 2 # Non-hispanic Black or African American
# temp$race[temp$"_hispanc" == 1] <- 3 # Hispanic/Latino/a
# temp$race[temp$"_mrace1" == 7 & temp$"_hispanc" != 1] <- 4 # Non-hispanic Multiracial
# temp$race[temp$"_mrace1" == 6 & temp$"_hispanc" != 1] <- 5 # Non-hispanic Native American
# temp$race[temp$"_mrace1" == 3 & temp$"_hispanc" != 1] <- 6 # Non-hispanic other race
# temp$race[temp$"_mrace1" == 5 & temp$"_hispanc" != 1] <- 7 # Non-hispanic Pacific Islander
# temp$race[temp$"_mrace1" == 1 & temp$"_hispanc" != 1] <- 8 # Non-hispanic White
# temp$race[is.na(temp$"_mrace1") & is.na(temp$"_hispanc")] <- NA
#
# tr <- table(temp$race) %>%
# prop.table()
#
# ta <- table(temp$"_age_g") %>%
# prop.table()
temp$'_llcpwt'[temp$year == 2015] <- temp$'_llcpwt'[temp$year == 2015] * tc[1] # Re-weighting 2015
temp$'_llcpwt'[temp$year == 2016] <- temp$'_llcpwt'[temp$year == 2016] * tc[2] # Re-weighting 2016
temp$'_llcpwt'[temp$year == 2017] <- temp$'_llcpwt'[temp$year == 2017] * tc[3] # Re-weighting 2017
temp$'_llcpwt'[temp$year == 2018] <- temp$'_llcpwt'[temp$year == 2018] * tc[4] # Re-weighting 2018
temp$'_llcpwt'[temp$year == 2019] <- temp$'_llcpwt'[temp$year == 2019] * tc[5] # Re-weighting 2019
# Re-write the file as a csv
write.csv(temp, 'data/WA_2015_2019_BRFSS.csv')
#Remove the dta file
rm(temp)
# Read in the newly created csv file for desired vars
full <- read.csv("data/WA_2015_2019_BRFSS.csv", stringsAsFactors = F) %>%
select(year,
zipcode,
zipcode1,
X_ststr,
X_llcpwt,
sex,
X_age_g,
X_mrace1,
X_hispanc,
educa,
employ1,
income_wa,
X_smoker3,
alcday5,
X_ment14d,
hlthpln1,
medcost,
mjpast30,
mj30day
) %>%
subset(X_age_g < 6) # Removing individuals over 65 due to Medicare eligibility
# *Optional* Write to new file for memory efficiency
write.csv(full, "data/WA_2015_2019_BRFSS_Sub.csv")
rm(list = ls())
df <- read.csv("data/WA_2015_2019_BRFSS_Sub.csv")
df$X <- NULL
# summary(df)
```
```{r}
#### Assigning NA Values ####
# Assigning the values of 7 or 9 "refusal" to NA within sex variable
df$sex[df$sex == 7 | df$sex == 9] <- NA
# Assigning the value of 9 to NA within the race variable
df$X_mrace1[df$X_mrace1 == 77 | df$X_mrace1 == 99] <- NA
# Assigning the value of 9 to NA within the Eth variable
df$X_hispanc[df$X_hispanc == 9] <- NA
# Assigning the value of 9 to NA within the edu. variable
df$educa[df$educa == 9] <- NA
# Assigning the value of 9 to NA within the employment variable
df$employ1[df$employ1 == 9] <- NA
# Assigning the value of 77 and 99 to NA within the income variable
df$income_wa[df$income_wa == 77 | df$income_wa == 99] <- NA
# Assigning the value of 9 to NA within the mental health variable
df$X_ment14d[df$X_ment14d == 9] <- NA
# Assigning the value of 7 and 9 to NA within the insurance variable
df$hlthpln1[df$hlthpln1 == 7 | df$hlthpln1 == 9] <- NA
# Assigning the value of 7 and 9 to NA within the medical cost variable
df$medcost[df$medcost == 7 | df$medcost == 9] <- NA
# Assigning the value of 7 and 9 to NA within the smoking variable
df$X_smoker3[df$X_smoker3 == 9] <- NA
# Assigning the value of 777 and 999 to NA within the alc. variable
df$alcday5[df$alcday5 == 777 | df$alcday5 == 999] <- NA
# Assigning the value of 77 and 99 to NA within the cannabis variable
df$mj30day[df$mj30day == 9 | df$mj30day == 7] <- NA
# Assigning the value of 77 and 99 to NA within the cannabis variable
df$mjpast30[df$mjpast30 == 77 | df$mjpast30 == 99] <- NA
# summary(df)
# barplot(table(df$menthlth))
# barplot(table(df$mjpast30))
#
# testdf <- subset(df, mjpast30 != 88)
# barplot(table(testdf$mjpast30))
#
# testdf <- subset(df, menthlth != 88)
# barplot(table(testdf$menthlth))
```
```{r}
#### Dropping All Observations Where MJ or HCA is NA ####
df <- df %>%
filter(!is.na(mj30day)) %>% # filter out any missingness associated with exposure/outcome
rename(zipcodeTemp1 = zipcode, zipcodeTemp2 = zipcode1) %>% # Rename zipcode variables
mutate(zipcode = coalesce(zipcodeTemp1, zipcodeTemp2)) %>% # Combine the zipcode variables across the two columns
select(-c(zipcodeTemp1, zipcodeTemp2)) %>% # Drop old zipcode variables
select(zipcode, everything()) # Move updated variable to the beginning of the dataframe
df <- df %>%
filter(!is.na(X_ment14d))
df <- df %>%
filter(!is.na(medcost) )
df <- df %>%
filter(!is.na(hlthpln1))
# summary(df)
# Creating variable for 14+ vs. 0-13 comparison
df$mentComp[df$X_ment14d == 3] <- 1
df$mentComp[df$X_ment14d == 1 | df$X_ment14d == 2] <- 2
# Recoding tobacco usage variable
df$curSmk[df$X_smoker3 == 1 | df$X_smoker3 == 2] <- 1 # Values 1 and 2 correspond to current smoker
df$curSmk[df$X_smoker3 == 3 | df$X_smoker3 == 4] <- 2 # Either a former smoker or never smoked
df$curSmk[is.na(df$X_smoker3)] <- NA
# Recoding the alc usage variable
df$curAlc[df$alcday5 > 100 & df$alcday5 < 108] <- 1 # Consumed alcohol in the past week
df$curAlc[df$alcday5 > 200 & df$alcday5 < 231] <- 2 # Consumed alcohol in the past month
df$curAlc[df$alcday5 == 888] <- 3 # Consumed no alcohol in past 30 days
df$curAlc[is.na(df$alcday5)] <- NA
# Recoding current smoker + current alc usage
df$curAlcSmk[df$curSmk == 1 | df$curAlc == 1 | df$curAlc == 2] <- 1
df$curAlcSmk[df$curSmk == 2 & df$curAlc == 3] <- 2
df$curAlcSmk[is.na(df$curAlc) | is.na(df$curSmk)] <- NA
# Recoding the Education Variable
df$edu[df$educa < 4] <- 1 # Did not graduate high school
df$edu[df$educa == 4] <- 2 # Graduated high school
df$edu[df$educa == 5] <- 3 # Some college or tech school
df$edu[df$educa == 6] <- 4 # Graduated from college/tech school
df$edu[is.na(df$educa)] <- NA
# Creating new Race/Eth. variable
df$race[df$X_mrace1 == 4 & df$X_hispanc != 1] <- 1 # Non-hispanic Asian
df$race[df$X_mrace1 == 2 & df$X_hispanc != 1] <- 2 # Non-hispanic Black or African American
df$race[df$X_hispanc == 1] <- 3 # Hispanic/Latino/a
df$race[df$X_mrace1 == 7 & df$X_hispanc != 1] <- 4 # Non-hispanic Multiracial
df$race[df$X_mrace1 == 6 & df$X_hispanc != 1] <- 5 # Non-hispanic Native American
df$race[df$X_mrace1 == 3 & df$X_hispanc != 1] <- 6 # Non-hispanic other race
df$race[df$X_mrace1 == 5 & df$X_hispanc != 1] <- 7 # Non-hispanic Pacific Islander
df$race[df$X_mrace1 == 1 & df$X_hispanc != 1] <- 8 # Non-hispanic White
df$race[is.na(df$X_mrace1) & is.na(df$X_hispanc)] <- NA
# Creating new income variable
df$income_wa[df$income_wa <= 5] <- 1 # Income less than $25,000
df$income_wa[df$income_wa > 5 & df$income_wa < 8] <- 2 # Income between $25,000 - $75,000
df$income_wa[df$income_wa >= 8] <- 3 # Income >$75,000
df$income_wa[is.na(df$income_wa)] <- NA
# Creating difficulty accesssing care variable
df$dHCA[df$medcost == 1 | df$hlthpln1 == 2] <- 1 # difficulty accessing care
df$dHCA[df$medcost == 2 & df$hlthpln1 == 1] <- 2 # No difficulty accessing care
```
```{r}
#### Factoring Variables ####
# Gender Factor
df$sex <- factor(df$sex,
levels = c(1,2),
labels = c("Male", "Female"),
exclude = NULL)
# Education Factor CREATE NEW VARIABLE
df$edu <- factor(df$edu,
levels = c(1,2,3,4),
labels = c("Did not graduate high school",
"High school graduate",
"Some college or tech school",
"Graduated college or tech school"),
exclude = NULL)
# Race Factor
df$race <- factor(df$race,
levels = c(5,1,2,3,7,4,6,8),
labels = c("Non-Hispanic AIAN",
"Non-Hispanic Asian",
"Non-Hispanic Black or African American",
"Hispanic, Latino/a, or Spanish origin",
"Non-Hispanic NHPI",
"Non-Hispanic other race",
"Non-Hispanic two or more races",
"Non-Hispanic White"),
exclude = NULL)
# Age Factors
df$X_age_g <- factor(df$X_age_g,
levels = c(1,2,3,4,5),
labels = c("18-24", "25-34",
"35-44", "45-54",
"55-64"),
exclude = NULL)
# Employment Factors
df$employ1 <- factor(df$employ1,
levels = c(1,2,3,4,5,6,7,8),
labels = c("Employed for wages", "Self-employed",
"Out of work for 1 year or more", "Out of work < 1 year",
"A homemaker", "A student", "Retired", "Unable to work"),
exclude = NULL)
# Income Factors CREATE NEW VARIABLE
df$income_wa <- factor(df$income_wa,
levels = c(1,2,3),
labels = c("< $35,000", "$35,000-$75,000", "> $75,000"),
exclude = NULL)
# Currant Smoker Factors
df$curSmk <- factor(df$curSmk,
levels = c(1,2),
labels = c("Current smoker", "Non-smoker"),
exclude = NULL)
# Current Alc. Factors
df$curAlc <- factor(df$curAlc,
levels = c(1,2,3),
labels = c("Within past week", "Within past month", "None in past month"),
exclude = NULL)
# Mental Health Factors
df$mentComp <- factor(df$mentComp,
levels = c(1,2),
labels = c("14+ days", "0-13 days"),
exclude = NULL)
# Health Insurance Factor
df$hlthpln1 <- factor(df$hlthpln1,
levels = c(2,1),
labels = c("Yes", "No"),
exclude = NULL)
# Med Cost Factor
df$medcost <- factor(df$medcost,
levels = c(1,2),
labels = c("Yes", "No"),
exclude = NULL)
# Med Cost Factor
df$dHCA <- factor(df$dHCA,
levels = c(1,2),
labels = c("Yes", "No"),
exclude = NULL)
# Cannabis Use Factor
df$mj30day <- factor(df$mj30day,
levels = c(1,2),
labels = c("Cannabis use in past month", "No cannabis use in past month"),
exclude = NULL)
```
```{r}
#### Creating Labels for each variable ####
# Creating age label
label(df$X_age_g) <- "Age group (yrs)"
# Creating gender label
label(df$sex) <- "Gender"
# Creating race label
label(df$race) <- "Race/Ethnicity"
# Creating education label
label(df$edu) <- "Education"
# Creating income label
label(df$income_wa) <- "Household Income"
# Creating health insurance label
label(df$hlthpln1) <- "No health coverage"
# Creating health cost label
label(df$medcost) <- "Delay medical care due to cost in past year"
# Creating mental health label
label(df$mentComp) <- "Poor mental health in past month"
# Creating alcohol consumption label
label(df$curAlc) <- "Alcohol consumption"
# Creating smoking status label
label(df$curSmk) <- "Tobacco smoking status"
# Creating cannabis usage label
label(df$mj30day) <- "Cannabis use in past month"
# Creating difficulty accessing care label
label(df$dHCA) <- "Difficulty accessing healthcare"
save(df, file = "data/WA_2015_2019_BRFSS_Clean.Rds")
```