-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path01_correct_omissions_errors.R
101 lines (72 loc) · 4.94 KB
/
01_correct_omissions_errors.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# Calcs and correct errors/omissions in data extraction
# 6.4.2020 updated 6.17.20
# load required packages
library(here)
# load source functions
#source(here::here('scr', 'conversion_functions.R'))
# set hard-coded variables
file <- 'covidence.csv'
# load data
dt <- read.csv(here::here('output', file))
# Errors ####
# Fix Study ID for Green 1994
dt[which(dt$Study.Identifier == 'GREEN 1994'), which(colnames(dt) == 'Study.Identifier')] <- 'Green 1994'
# Fix Tschernegg 2015 measure - correlation is taken from %SS decisions, not model parameters
dt[which(dt$Study.Identifier == 'Tschernegg 2015'), which(colnames(dt) == 'Measure')] <- 'proportion'
# Fix Boyle 2012 design
dt[which(dt$Study.Identifier == 'Boyle 2012'), which(colnames(dt) == 'Design')] <- 'continuous age'
# Fix Loceknhoff 2011 design
dt[which(dt$Study.Identifier == 'Löckenhoff 2011'), which(colnames(dt) == 'Design')] <- 'continuous age'
# Fix Buono 2015 design
dt[which(dt$Study.Identifier == 'Buono 2015'), which(colnames(dt) == 'Design')] <- 'extreme group'
#added in revision
# Fix Samanez-Larkin 2011 sample size for older
dt[which(dt$Study.Identifier == 'Samanez-Larkin 2011' & dt$Intervention == 'Older'), which(colnames(dt) == 'n')] <- 13
# Fix Liu magnitude of delay and age range
dt[which(dt$Study.Identifier == 'Liu 2016'), which(colnames(dt) == 'Magnitude.of.Time.Delay')] <- 'months'
dt[which(dt$Study.Identifier == 'Liu 2016' & dt$Intervention == 'Older'), which(colnames(dt) == 'age_range')] <- '61-87'
# Fix Study ID and measure in Sparrow 2019 and measure in Sparrow 2018
dt[which(dt$correct_data_extracted == 'No mean or sd but we can extract from barplot'),
which(colnames(dt) == 'Study.Identifier')] <- 'Sparrow 2019'
#dt[which(dt$Study.Identifier == 'Sparrow 2018'), which(colnames(dt) == 'Study.Identifier')] <- 'Sparrow 2019'
dt[which(dt$Study.Identifier == 'Sparrow 2019'), which(colnames(dt) == 'Measure')] <- 'proportion'
dt[which(dt$Study.Identifier == 'Sparrow 2018'), which(colnames(dt) == 'Measure')] <- 'proportion'
# Fix Hampton 2018 delay
dt[which(dt$Study.Identifier == 'Hampton 2018'), which(colnames(dt) == 'Magnitude.of.Time.Delay')] <- 'years'
# Fix Halfmann 2016 incentive
dt[which(dt$Study.Identifier == 'Halfmann 2016'), which(colnames(dt) == 'Incentive')] <- 'hypothetical'
# Fix Buono 2015 delay
dt[which(dt$Study.Identifier == 'Buono 2015'), which(colnames(dt) == 'Magnitude.of.Time.Delay')] <- 'years'
# Fix Bickel 2014 age SD
dt[which(dt$Study.Identifier == 'Bickel 2014'), which(colnames(dt) == 'age_sd')] <- 11.25
# Fix Sheffer 2016 incentive
dt[which(dt$Study.Identifier == 'Sheffer 2016'), which(colnames(dt) == 'Incentive')] <- 'hypothetical'
# Omissions ####
# Fix Sanchez-Rodrigue 2018 incentive
dt[which(dt$Study.Identifier == 'Sanchez-Roige 2018'), which(colnames(dt) == 'Incentive')] <- 'hypothetical'
# Age Mean for Gollner 2018 (Data taken from Table 1) - note they claim 96 participants, but Table 1 adds up to 101
dt[which(dt$Study.Identifier == 'Gollner 2018'), which(colnames(dt) == 'age_mean')] <- (11.3*18 + 21.8*25 + 42.3*18 + 73.4*25 + 90.0*15)/(18+25+18+25+15)
# Age Mean for Johnson (Data taken from Table 5 - nonsmokers)
dt[which(dt$Study.Identifier == 'Johnson 2015'), which(colnames(dt) == 'age_mean')] <- (35.6*55 + 35.5*59 + 35.6*61)/(55+59+61)
# Age mean for Kirby 2002 - Table 4 27.4 Median - make a star in table with exception
dt[which(dt$Study.Identifier == 'Kirby 2002'), which(colnames(dt) == 'age_mean')] <- 27.4
# Age mean for Tschernegg 2015
dt[which(dt$Study.Identifier == 'Tschernegg 2015'), which(colnames(dt) == 'age_mean')] <- (24.5*51 + 27.15*19)/(51+19)
# Add Liu 2016 middle age group
liuma <- dt[which(dt$Study.Identifier == 'Liu 2016' & dt$Intervention == 'Older'),]
liuma$Intervention <- 'Middle'
liuma$age_mean <- 43.27
liuma$age_sd <- 6.74
liuma$age_range <- '31-60'
liuma$n <- 745
liuma$mean <- c(-4.22, -3.93, -3.93, -3.64)
liuma$sd <- c(1.92, 1.76, 1.88, 1.80)
dt <- rbind(dt, liuma)
# Estimated means ####
# add assumed means for Buono 2015 & Garza 2016
dt[which(dt$Study.Identifier == 'Buono 2015'), ]$age_mean <- c(mean(seq(18,27)), mean(seq(45,55)))
dt[which(dt$Study.Identifier == 'Garza 2016'), ]$age_mean <- c(mean(seq(21,34)), mean(seq(35,49)), mean(seq(50,64)), mean(seq(65,76)))
dt[which(dt$Study.Identifier == 'Jimura 2011' & dt$Intervention == 'Younger'), ]$age_mean <- 20
# write data file
write.csv(dt, here::here('output', 'corrected.csv'), row.names = FALSE)
rm(dt, file, liuma)