-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathr_label_helper.R
131 lines (125 loc) · 5.16 KB
/
r_label_helper.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
########################################################################################
# Title: UK LLC R value and variable labelling helper
#
# This uses the expss package to apply variable and value labels to datasets, this can
# be done on individual datasets or multiple datasets at once. Due to performance issues
# it is not recommended to load and apply to all datasets at once (if you have a large
# number available as part of your project) as the loading will be slow
#
# ACTIONS: 1) UPDATE project number at ACTION1
# ACTIONS: 2) Decide which method (A,B,C) to use to label data:
# A: one table at a time - AND UPDATE data_source and table variables
# B: one data_source at a time - AND UPDATE data_source variable
# C: all project data in one go - NO UPDATE required
#
# Date: 13/01/2023 (version 1.1)
#######################################################################################
#################################
# ACTION1: enter/update project number in form "LLC_0001"
#################################
proj_no = "LLC_0009"
##############################################################
# define function to check and install packages if required
##############################################################
load_install_packages <-function(x){
for(i in x){
# returns true if package loads
if(!require(i, character.only = T)){
# if package not able to be loaded (re)install
install.packages(i, dependencies = T)
# load package following install
require(i, character.only = T)
}
}
}
##############
# packages
#############
packages = c("tidyr", "dplyr", "RODBC", "expss", "stringr", "tidyr", "rstudioapi")
# run function to load and install packages
load_install_packages(packages)
####################################################################################################################
# set the dir to where this script is save and source the labelling function (assumes both scripts in same location)
####################################################################################################################
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
source("r_label_functions.R")
#########################################
# DB connection and project information
#########################################
# setup connection
conn <- odbcConnect(dsn="LLC_DB")
# Get all table/viewnames avail in connection
allviews <- sqlTables(conn)
# just take ones attached to project schema
viewnames <- tolower(allviews[allviews$TABLE_SCHEM %in% proj_no,"TABLE_NAME"])
# and print to sense-check
print(viewnames)
#
# ######################################################################
# # METHOD A: label one table at a time
# # ACTION2a: define/update data_source AND table name to label (use lowercase)
# #######################################################################
# data_source <- "genscot"
# table <- "covidlife1_v0001_20211101"
# # run labelling function
# data_t1 <- lab_func(proj_no,data_source,table)
# # create df name
# name_t1 <- paste0(data_source,"_",table)
# # assign name to dataset
# assign(name_t1, data_t1)
#
# ######################################################################
# # METHOD B: separate one study/source and label multiple tables at a time
# # ACTION2b: define/update data_source to label (use lowercase)
# ######################################################################
# data_source <- "genscot"
# # get all views names belonging to data source/study
# one_stud_viewnames <- grep(paste0("^",data_source), viewnames, value=TRUE)
# # new
# one_stud_views_split <- str_split_fixed(one_stud_viewnames,"_",2)
# # create empty list to populate
# one_stud_dfs_w_labs <- list()
# # go through each dataframe and apply values and variable labels
# for (i in 1:length(one_stud_viewnames)){
# # define schema and tables names for function to run
# data_source <- one_stud_views_split[i,1]
# table <- one_stud_views_split[i,2]
# # run labelling function
# data <- lab_func(proj_no,data_source,table)
# # create df name
# name_t1 <- paste0(data_source,"_",table)
# # add dataframe to list
# one_stud_dfs_w_labs[[name_t1]] = data.frame(data)
# }
#
# ####################################################################
# # METHOD C: label all tables at same time - WARNING - SLOW TO RETRIVE ALL DATA
# ####################################################################
# # create empty list to populate
# all_dfs_w_labs <- list()
# # separate study name from descriptive part of table name
# all_views_split <- str_split_fixed(viewnames,"_",2)
# # go through each dataframe and apply values and variable labels
# for (i in 1:length(viewnames)){
# # define schema and tables names for function to run
# data_source <- all_views_split[i,1]
# table <- all_views_split[i,2]
# # run labelling function
# data <- lab_func(proj_no,data_source,table)
# # create df name
# name_t1 <- paste0(data_source,"_",table)
# # add dataframe to list
# all_dfs_w_labs[[name_t1]] = data.frame(data)
# }
#
#
#
#
#
#
#
#
#
#
#
#