-
Notifications
You must be signed in to change notification settings - Fork 0
/
ABCD Inference Stats.R
122 lines (99 loc) · 4.24 KB
/
ABCD Inference Stats.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#Importing excel file into R
pacman::p_load(dplyr, tidyr)
data = readxl::read_xlsx("~/ABCD.xlsx")
data = data[2:nrow(data),]
data = data.frame(ID = 1:nrow(data), data)
#Renaming variables in dataframe
data = data %>%
mutate(Race = case_when(
Race == 'White' ~ 'Non-Hispanic White',
Race != 'White' ~ 'POC'
))
data = data %>%
mutate(Turnover = case_when(
Turnover %in% c(0:10) ~ '0-10',
Turnover %in% c(11:20) ~ '11-20',
Turnover %in% c(21:30) ~ '21-30',
Turnover %in% c(31:40) ~ '31-40',
))
data = data %>%
mutate(Time = case_when(
Time %in% c(2015:2017) ~ 0,
Time %in% c(2021:2022) ~ 1
))
#Categorizing individuals based on the certain criteria
# For instance, those that are PI/CoIs and are compensated will be classified as Compensated PIs/CoIs in the Finance_Groups column
#Used for chisquare tests
data = data %>%
mutate(Finance_Group = case_when(
Roles == "PIs/CoIs" & Finance == "Compensated" ~ 'Compensated PIs/CoIs',
Roles == "PIs/CoIs" & Finance == "Volunteers" ~ 'Volunteer PIs/CoIs',
Roles == "RAs" & Finance == "Compensated" ~ 'Compensated RAs',
Roles == "RAs" & Finance == "Volunteers" ~ 'Volunteer RAs',
Roles == "RECOVER" & Finance == "Compensated" ~ 'Compensated RECOVER',
Roles == "RECOVER" & Finance == "Volunteers" ~ 'Volunteer RECOVER',
Roles == "Trainees" & Finance == "Compensated" ~ 'Compensated Trainees',
Roles == "Trainees" & Finance == "Volunteers" ~ 'Volunteer Trainees',
Roles == "Trainees" & Finance == "I’m not sure" ~ 'Unsure Trainees'
))
#Categorizing individuals based on the certain criteria
#PIs/CoIs that are not managers are classified as 0 in the newly created Manager_Groups column. This is used for logistic regression
data = data %>%
mutate(Finance_Group = case_when(
Roles == "PIs/CoIs" & Manage == "Non-managers" ~ 0,
Roles == "PIs/CoIs" & Manage == "Managers" ~ 1
))
#Creating new dataframe in preparation for chisquare test
Race_Finance = data[c("Finance_Groups", "Race_Grouped")]
#All NAs are renamed to "Filler"
Race_Finance[is.na(Race_Finance)] <- "Filler"
#Using tidyr's group_by and summaruse commands to create a summary table for each Finance Group
Race_Finance= Race_Finance%>%
group_by(Finance_Groups) %>%
summarise(
"White" = sum(Race_Grouped=="White"),
"POC"= sum(Race_Grouped=="POC")
)
#Removing the "Filler" group
Race_Finance = Race_Finance[-c(5),]
# Taking the names from the first column and reassigning them as rownames
names = Race_Finance$Finance_Groups
Race_Finance = Race_Finance[2:ncol(Race_Finance)] %>% data.frame() %>% t()
rownames(Race_Finance)=names
#chisquare test
chisq.test(Race_Finance, simulate.p.value = T)
#Post hoc w/Bonferonni correction
bonferroni = 0.05/28
x = 1
#While loop for contrasts
while(x < 8){
num = (x + 1):8
for(y in num){
test = chisq.test(Race_Finance[c(x,y)],simulate.p.value = T)
#Only print columns that meet threshold
if(test$p.value < bonferroni){
print(sprintf('Columns %s & %s', names(Race_Finance[x]),names(Race_Finance[y])))
print(test)
}
}
x = x + 1
}
#Reordering factors for regression
data$Race = factor(data$Race, levels = c("White", "Hispanic or Latino/a/x", "Asian or Pacific Islander","Black or African American",
"Multiracial or Biracial", "Middle Eastern/North African",
"Other race/ethnicity not listed here"
))
data$Roles = factor(data$Roles, levels = c("PIs/CoIs", "RAs", "RECOVER","Trainees"), exclude = NA)
data$ABCD_Roles = factor(data$ABCD_Roles, levels = c("PIs/CoIs", "RAs", "RECOVER Coordinators", "Project Coordinators",
"Site Coordinators", "Research Coordinators", "Site Clinicians",
"Medical Professionals",
"Trainees"), exclude = NA)
#Manager Races
model_2 = glm(data$Manager_Groups~data$Race_Grouped, data = data, family = "binomial")
summary(model_2)
#Average work hours
model_3 = lm(Time~Roles, data = data)
summary(model_3)
#Turnover Group
model_4 = glm(data$Turnover_Grouped~data$Roles, data = data, family = "binomial")
summary(model_4)