-
Notifications
You must be signed in to change notification settings - Fork 0
/
Descriptive_Example.R
151 lines (116 loc) · 6.75 KB
/
Descriptive_Example.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#Script to create the Dummy_Education_Plot_Example.png
#These were the type of plots were generated from the ABCD Descriptive script
#Import excel sheet
pacman::p_load(dplyr, ggplot2, tidyr)
data = readxl::read_xlsx("~/Documents/ABCD Demographics Survey.xlsx")
data = data[2:nrow(data),]
data = data.frame(ID = 1:nrow(data), data)
#Renaming variables in dataframe
data = data %>%
mutate(Q6_4_Grouped = case_when(
Q6_4 == '0' ~ 'Does not work w/ participants',
Q6_4 != '0' ~ 'Works w/ Participants'
))
data = data %>%
mutate(Q5_4_Grouped = case_when(
Q5_4 %in% c(0:10) ~ '0-10',
Q5_4 %in% c(11:20) ~ '11-20',
Q5_4 %in% c(21:30) ~ '21-30',
Q5_4 %in% c(31:40) ~ '31-40',
))
# For each question, count data is grouped into a longform dataframe for plotting
data[is.na(data)] = "Filler"
#############################
#Plotting for specific groups
#Creating group data with all the necessary groups to join with rbind
data = data[,c("Q2_Grouped","Q4_Grouped","Q6_4_Grouped","Q30_Grouped", "Q8_Simplified", "Q9", "Q10", "Q11_Simplified", "Q5_4_Grouped")]
Q2_seperated_data = separate_rows(data, "Q2_Grouped", sep = ",")
Q2_seperated_data = Q2_seperated_data %>% mutate(Group = Q2_Grouped )
Q6_4_seperated_data = Q30_seperated_data = data
Q6_4_seperated_data = Q6_4_seperated_data %>% mutate(Group = Q6_4_Grouped)
Q30_seperated_data = Q2_seperated_data %>% mutate(Group = Q30_Grouped )
Q4_seperated_data = separate_rows(data, "Q4_Grouped", sep = ",")
Q4_seperated_data = Q4_seperated_data %>% mutate(Group = Q4_Grouped )
combined_data = rbind(Q2_seperated_data[which(Q2_seperated_data$Group =="PIs/CoIs"),],
Q2_seperated_data[which(Q2_seperated_data$Group =="RECOVER"),],
Q2_seperated_data[which(Q2_seperated_data$Group =="Trainees"),],
Q2_seperated_data[which(Q2_seperated_data$Group =="RAs"),],
Q6_4_seperated_data[which(Q6_4_seperated_data$Group == "Works w/ Participants"),],
Q30_seperated_data[which(Q30_seperated_data$Group == "Managers"),],
Q4_seperated_data[which(Q4_seperated_data$Group =="Volunteers"),],
Q4_seperated_data[which(Q4_seperated_data$Group=="Compensated"),]
)
combined_data = data.frame(combined_data[c("Group", "Q8_Simplified", "Q9", "Q10", "Q11_Simplified", "Q5_4_Grouped")])
colnames(combined_data ) = c("Group","Education","Gender", "Race_Ethnicity","Group_Membership","Working_Hours")
combined_data$Group = factor(combined_data$Group, levels=c("Volunteers",
"Compensated",
"Managers",
"Works w/ Participants",
"RECOVER",
"RAs",
"Trainees",
"PIs/CoIs"))
#Function to create dataframe for plotting
create_dataframe = function(data, column_name, elongate){
if (elongate == "Yes"){
data = data.frame(separate_rows(data, column_name, sep = ","))
}
#Retrieve names of factors
group_factor_levels = levels(factor(data[,"Group"]))
variable_factor_levels =levels(factor(data[,column_name], exclude = c("Filler", NA)))
#Create dataframe
survey_participants = data.frame(matrix(nrow = length(group_factor_levels) * length(variable_factor_levels), ncol = 3))
colnames(survey_participants) = c("Group", "Variable", "Percentage")
counts = c()
group_vector = c()
variable_names = c()
#Counting number of factors in dataframe column
for(group_factor_level in group_factor_levels){
group_vector = c(group_vector,rep(group_factor_level, length(variable_factor_levels)))
for(variable_factor_level in variable_factor_levels){
variable_names = c(variable_names, variable_factor_level)
counts = c(counts, nrow(data[which(data[,"Group"] == group_factor_level & data[,column_name] == variable_factor_level ),]))
}
}
survey_participants$Group = group_vector
survey_participants$Variable = variable_names
survey_participants$Percentage= counts
return(survey_participants)
}
Education = create_dataframe(data = combined_data, column_name = "Education", elongate = "No")
#Generate random data
groups = Education["Group"]; groups = as.character(groups$Group); groups = unique(groups)
education_groups = unique(Education$Variable)
for(group in groups){
prob = 0
#While loop to create probabilities for each education variable
#Purpose is to replicate a sample that where the probabilities do not exceed 100 and are not less than 80
#Creates a unique distribution for each of the groups for plotting
while(!(sum(prob) < 100 & sum(prob) > 60)){
#Max is 100/length(education_groups) so that even in the incredibly rare chance that all variables qual the max
#The max will not exceed 100
prob = round(runif(length(education_groups), min = 0, max = 100/length(unique(education_groups))),0)
}
#Convert to percent
Education[Education$Group == group,"Percentage"] = prob
}
Education[which(Education$Variable == "Other:______________"), "Variable"] = "Other"
#Code to create a color vector the same length as the variable and plot the variable
#Education is replaced with the Working_Hours, Gender, Race_Ethnicity, and Group_Membership dataframes
#to create different plots
colors = c("#6639B5","#9B25AE", "#1F95F1", "#009586", "#4AAE4F", "#FEC006", "#FE5521", "#E81C62")
new_colours = c()
#Function to create plots
create_plot = function(file_name,dataframe){
for(x in colors){
#Color vector needs to match the rows in the Group column
new_colours = c(new_colours, rep(x, nrow(dataframe[dataframe$Group == "Volunteers",])))
}
ggsave(filename = file_name,ggplot(dataframe, aes(Percentage, Variable, label = paste0(Percentage, "%"))) +
geom_segment(aes(x = 0, y = Variable, xend = Percentage, yend = Variable), show.legend = FALSE, color = new_colours) + theme_bw() + facet_wrap(~Group, ncol = 4) +
theme(plot.title = element_text(hjust = 0.5),text = element_text(size = 10), panel.grid.major = element_blank(), axis.title.y=element_blank(), axis.title.x=element_blank(), axis.text.x = element_text(size=8), axis.text.y = element_text(size=8),
strip.background=element_rect(colour="black",
fill="grey95"))+ geom_point(size = 5, show.legend = FALSE, color = new_colours) +
expand_limits(x=c(0,100)) + geom_text(color = "white", size = 2), width = 7, height = 3, dpi = 300, units = "in", device='png')
}
create_plot("Education_Dummy_Plot.png", Education)