-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathteam_selection.R
145 lines (123 loc) · 5.27 KB
/
team_selection.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
library(kbtbr)
library(readr)
library(stringr)
library(dplyr)
library(tidyr)
library(janitor)
source("utils.R") # custom functions
# please refer to the readme for what the project id is
# this value should be used as a label within the kobo form - for both questions
PROJECT_ID <- "2022-04-LAU" # REPLACE THIS EXAMPLE VALUE
PROJECT_FOLDER <- here::here("projects/", PROJECT_ID)
# create folder for project
if (!dir.exists(PROJECT_FOLDER)) {
dir.create(PROJECT_FOLDER)
}
# kobo instance
kobo <- kbtbr::Kobo$new("kobo.correlaid.org")
all_surveys <- kobo$get_surveys()
# get survey id
survey_id <- all_surveys %>%
filter(name == "Applications for CorrelAid Projects") %>%
pull(uid)
applications <- kobo$get_submissions(survey_id)
# Kobotoolbox handles data very "wide format-y", so we have to do quite a bit of data wrangling
# this is code from the clean_kobo function in projectutils: https://github.com/CorrelAid/projectutils/blob/cd118871ae5d50c5116fd86935aa11e95a4edf25/R/applications.R#L29
# it is copied here to allow for easier modifications to the code
# rename
applications <- applications %>% dplyr::rename(applicant_id = `_id`,
motivation_why_involved = motivation_why)
# if the gender self identification variable does not exist, then create it but put NA
if (!"gender_self_identification" %in% colnames(applications)) {
applications$gender_self_identification <- NA
}
# people can apply to multiple projects at once, data is not stored in separate rows by KoboToolbox
# --> pull "applied to" information into separate rows and data frame
project_ids_df <- applications %>%
dplyr::select(applicant_id, project_id) %>%
dplyr::mutate(applied_to = project_id) %>%
tidyr::separate_rows(project_id, sep = " ") %>%
dplyr::mutate(project_id = unify_project_id_formats(project_id)) %>%
dplyr::distinct()
# project role: each project has its own column --> make into long data frame
project_roles_df <- applications %>%
dplyr::select(applicant_id, dplyr::starts_with("project_role")) %>%
tidyr::pivot_longer(dplyr::starts_with("project_role"), names_to = "project_id_unclean", values_to = "project_role") %>%
dplyr::mutate(project_id = project_id_unclean %>%
extract_ids_from_kobo_columnnames()) %>%
dplyr::filter(project_role != "DNA") %>%
dplyr::distinct() %>%
dplyr::select(-project_id_unclean)
# personal informaton and skills
# select variables and rename columns
personal_info_df <- applications %>%
dplyr::select(
applicant_id,
dplyr::starts_with("gender"),
first_name,
last_name,
email = email_address,
german_skills,
dplyr::starts_with("rating"),
dplyr::starts_with("motivation"),
consent_privacy_policy,
dplyr::starts_with("past_")
) %>%
dplyr::distinct() %>%
janitor::clean_names() %>%
dplyr::rename_with(
~ stringr::str_replace_all(.x,
"rating_technologies_tools", "skills"),
dplyr::starts_with("rating_technologies_tools")
) %>%
dplyr::rename_with( ~ stringr::str_replace_all(.x, "rating_",
""),
dplyr::starts_with("rating"))
# gender
personal_info_df <-
personal_info_df %>% dplyr::mutate(gender = dplyr::if_else(gender ==
"self_identification", NA_character_, gender))
personal_info_df$gender <- dplyr::coalesce(personal_info_df$gender,
personal_info_df$gender_self_identification)
# join the data frames
cleaned_df <- project_ids_df %>%
dplyr::left_join(project_roles_df, by = c("applicant_id", "project_id")) %>%
dplyr::left_join(personal_info_df, by = "applicant_id")
# now finally filter for our project!
cleaned_df_filtered <- cleaned_df %>% dplyr::filter(project_id == .env$PROJECT_ID)
if (nrow(cleaned_df_filtered) == 0) {
usethis::ui_warn(glue::glue("No applicants present after filtering for {project_id}. Did you specify the PROJECT_ID in the correct format?"))
}
cleaned_df_filtered <- cleaned_df_filtered %>%
dplyr::select(
applicant_id,
gender,
email,
dplyr::ends_with("name"),
dplyr::starts_with("project"),
applied_to,
dplyr::starts_with("past"),
dplyr::starts_with("skills"),
dplyr::starts_with("techniques"),
dplyr::starts_with("topics"),
dplyr::everything()
)
# anonmyize and save
appl_anon <- cleaned_df_filtered %>%
select(-email, -first_name, -last_name)
anon_path <- here::here(PROJECT_FOLDER, "applications_anon.csv")
appl_anon %>% readr::write_csv(anon_path)
# mapping from email / name to applicant_id
mapping <- cleaned_df_filtered %>%
select(applicant_id, email, first_name, last_name)
mapping_path <- here::here(PROJECT_FOLDER, "mapping.csv")
mapping %>% readr::write_csv(mapping_path)
# google sheets upload
gs_main_table <- cleaned_df_filtered %>%
dplyr::select(project_id, applicant_id, gender, applied_as = project_role, past_applications)
gs_main_table_path <- here::here(PROJECT_FOLDER, "google_sheets_main_table.csv")
gs_main_table %>% readr::write_csv(gs_main_table_path)
# knit report
rmarkdown::render(here::here("templates/template_applications_report.Rmd"),
output_dir = PROJECT_FOLDER,
params = list(project_id = PROJECT_ID, anon_path = anon_path))