-
Notifications
You must be signed in to change notification settings - Fork 1
/
12_Costing_primary_care.R
214 lines (157 loc) · 11 KB
/
12_Costing_primary_care.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Project: English results for the International Collaborative on Costs, Outcomes & Needs in Care (ICCONIC)
# Purpose: Process and cost CPRD primary care data - consultations and therapies
# Author: Dr Will Parry
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#Setup - load packages, source folder locations ----
pkgs <- c('tidyverse', 'here')
sapply(pkgs, require, character.only = TRUE)
#__________________
#Consultations ----
cohorts <- readRDS(here('../Data', 'cohorts.rds')) #read in cohorts
cons <- readRDS(here('../Data', 'consultation.rds')) %>% select(-sysdate, -consid) #read in consultations and remove unnecessary columns
staff <- readRDS(here('../Data', 'staff.rds')) %>% #read in staff dataset. NB: it was duplicated in our raw data...
group_by(staffid, role) %>% tally() %>% select(staffid, role) #...so we group and summarise it here
cot <- readRDS(here('../Data', 'cot.rds')) #read in CPRD reference data: consultation codes
rol <- readRDS(here('../Data', 'rol.rds')) #read in CPRD reference data: staff role codes
deflators <- readRDS(here('Reference_data', 'deflators.rds')) #NB: not using this for primary care visits at the moment
# Face to face consultation codes: Home consultation codes: Telephone consultation codes:
# -------------------------------- ------------------------ -----------------------------
# Clinic,1, Home Visit,27, Telephone call from a patient,10,
# Follow-up/routine visit,3, Hotel Visit,28, Telephone call to a patient,21,
# Night visit , practice,6, Nursing Home Visit,30, Triage,33,
# Out of hours, Practice,7, Residential Home Visit,31, Telephone Consultation,55,
# Surgery consultation,9, Twilight Visit,32,
# Acute visit,11, Night Visit,50,
# Emergency Consultation,18,
# Initial Post Discharge Review,48,
#
# GP codes from rol: Nurse codes from rol: Other clinician codes from rol:
# ------------------ --------------------- -------------------------------
# Senior Partner,1, Practice Nurse,11, Physiotherapist,26,
# Partner,2, Other Nursing & Midwifery,54 Other Health Care Professional,33
# Assistant,3,
# Associate,4,
# Locum,7,
# GP Registrar,8,
# Sole Practitioner,10,
# Salaried Partner,47,
# GP Retainer,50,
# Other Students,53
#PSSRU unit costs of health and social care are difficult to use due to many changes over the years.
#The overall estimates do not change massively over time, so we use:
# - £56 per patient contact hour (with quals) for nurses
# - £216 per patient contact hour (with quals) for GPs
dat1 <- cohorts %>% left_join(cons) %>% #join consultations onto cohort to reduce it down (cons is very large)
mutate(heartpre = eventdate >= (heart - 365) & eventdate < heart, #add indicators for cohort pre/post relevance...
heartpost = eventdate < (heart + 365) & eventdate >= heart,
hippre = eventdate >= (hip - 365) & eventdate < hip,
hippost = eventdate < (hip + 365) & eventdate >= hip,
dempre = eventdate >= (dementia - 365) & eventdate < dementia,
dempost = eventdate < (dementia + 365) & eventdate >= dementia) %>%
filter(heartpre | heartpost | hippre | hippost | dempre | dempost) %>% #filter out rows with TRUE in cohort identifiers
left_join(staff) %>% left_join(rol, by = c('role' = 'Code')) %>% #join on staff info
left_join(cot, by = c('constype' = 'Code')) %>% #categorise consultation type
mutate(role_cat = case_when( #categorise staff types
role %in% c(11, 54) ~ 'Nurse',
role %in% c(1, 2, 3, 4, 7, 8, 10, 47, 50, 53) ~ 'GP',
TRUE ~ 'other'
),
cons_cat = case_when( #categorise consultation types
constype %in% c(1, 3, 6, 7, 9, 11, 18, 48) ~ 'F2F',
constype %in% c(27, 28, 30, 31, 32, 50) ~ 'Home',
constype %in% c(10, 21, 33, 55) ~ 'Telphn',
TRUE ~ 'other'
)) %>%
filter(role_cat %in% c('Nurse', 'GP'), cons_cat %in% c('F2F', 'Home')) %>% #ICCONIC only cost nurse/GP, face-to-face/home visits
mutate(year = as.integer(format(eventdate, '%Y')), #add a year as integer
month = as.integer(format(eventdate, '%m')), #add a month as integer
HRGyear = as.character(ifelse(month <= 3, year - 1, year))) %>% #work out the HRG year (e.g. 2014 => 2014/15)
#left_join(deflators) %>% #ignoring deflation for now, because PSSRU costs don't seem to change reliably over time
mutate(minute_cost = ifelse(role_cat == 'GP', 216/60, 56/60), #get cost per minute (e.g. £216 / 60 mins for a GP)
cons_dur = ifelse(duration == 0, 0.5, ifelse(duration > 60, 60, duration)), #set min time 0.5 minutes, max time 60 minutes
cost = cons_dur * minute_cost) #calculate cost of consultation
saveRDS(dat1, here('../Data', 'PCvisits.rds')) #save to file
#____________________
#Therapy records ----
dmdprices <- readRDS(here('Reference_data', 'dmd_prices.rds')) #read in drug price data
ther <- readRDS(here('../Data', 'therapy.rds')) %>% #read in therapy records
select(patid, eventdate, prodcode, qty) #reduce to required columns (it is a very large dataset)
dat2 <- cohorts %>% left_join(ther) %>% left_join(dmdprices) %>% #join data onto cohorts
mutate(heartpre = eventdate >= (heart - 365) & eventdate < heart, #add indicators for cohort pre/post relevance...
heartpost = eventdate < (heart + 365) & eventdate >= heart,
hippre = eventdate >= (hip - 365) & eventdate < hip,
hippost = eventdate < (hip + 365) & eventdate >= hip,
dempre = eventdate >= (dementia - 365) & eventdate < dementia,
dempost = eventdate < (dementia + 365) & eventdate >= dementia) %>%
filter(heartpre | heartpost | hippre | hippost | dempre | dempost) #filter out rows with TRUE in cohort identifiers
#Summarise success of pricing attempt
dat2 %>% summarise(perc_unmatched = sum(is.na(drugsubstance))/length(patid)) #therapy records not matched to drug substances: ~7%
#Need to use 2020 deflator as a multiple because dm+d prices are from 2020 NHSBSA dm+d dataset
#(NB: actually prices each have a date associated with them in dm+d, but these are the most uptodate from TRUD at 2020)
def2020 <- deflators[deflators$HRGyear == 2020, 'def_mult']
dat3 <- dat2 %>% filter(!is.na(drugsubstance) & !is.na(price)) %>% #remove unmatched/unpriced records
mutate(cost = qty * price * !!def2020) %>% #multiply price by quantity prescribed and deflator for 2020 -> 2016
select(patid, gender, dod, hip:indem, heartpre:dempost, eventdate, drugsubstance, cost) #reduce dataset to required columns
saveRDS(dat3, here('../Data', 'PCdrugs_recs.rds')) #save to file
dat4 <- dat3 %>% select(-eventdate) %>% group_by(across(-cost)) %>% #remove eventdate and group by everything except the cost
summarise(countrecs = n(), drugcost = sum(cost, na.rm = TRUE)) #summarise to get total cost by patid and drugsubstance
#NB: countrecs stores the count (prescriptions) of the specific drug substance, but is not required by ICCONIC
saveRDS(dat4, here('../Data', 'PCdrugs.rds')) #save to file
#_________________________
#Create summary stats ----
cohorts <- readRDS(here('../Data', 'cohorts.rds')) %>% filter(inhip | inheart | indem)
pcvisits <- readRDS(here('../Data', 'PCvisits.rds'))
pcdrugs <- readRDS(here('../Data', 'PCdrugs.rds'))
source(here('R', 'Output_functions.R'))
#Set cohorts to use
hipcats <- c('hip_op_total','hip_op_partial','hip_op_osteopin') #hip cats to include
hip_cohort <- cohorts %>% filter(inhip, hip_cat %in% hipcats)
heartd_cohort <- cohorts %>% filter(inheartd)
heart_cohort <- cohorts %>% filter(inheart)
heartdc_cohort <- cohorts %>% filter(inheartdc)
heartnodc_cohort <- cohorts %>% filter(inheartnodc)
#Hip cohort
hipstats1 <- stats_func(hip_cohort, pcvisits, 'cost', 'hippost', 'hip_agegroup', hipcats)
hipstats2 <- stats_func(hip_cohort, pcvisits, 'cost', 'hippost', 'hip_agegroup', hipcats, extra_cat = 'role_cat')
hipstats3 <- stats_func(hip_cohort, pcdrugs, 'drugcost', 'hippost', 'hip_agegroup', hipcats)
write_csv(hipstats1, here('../Outputs', 'hip_pc_visits_stats1.csv'))
write_csv(hipstats2, here('../Outputs', 'hip_pc_visits_stats2.csv'))
write_csv(hipstats3, here('../Outputs', 'hip_pc_drugs_stats1.csv'))
#Heart and diabetes cohort
heartstats1 <- stats_func(heartd_cohort, pcvisits, 'cost', 'heartpost', 'heart_agegroup')
heartstats2 <- stats_func(heartd_cohort, pcvisits, 'cost', 'heartpost', 'heart_agegroup', hip_cats = NULL, extra_cat = 'role_cat')
heartstats3 <- stats_func(heartd_cohort, pcdrugs, 'drugcost', 'heartpost', 'heart_agegroup')
write_csv(heartstats1, here('../Outputs', 'heartd_pc_visits_stats1.csv'))
write_csv(heartstats2, here('../Outputs', 'heartd_pc_visits_stats2.csv'))
write_csv(heartstats3, here('../Outputs', 'heartd_pc_drugs_stats1.csv'))
#__________________
#Lookback year ----
hipstats1 <- stats_func(hip_cohort, pcvisits, 'cost', 'hippre', 'hip_agegroup', hipcats)
hipstats2 <- stats_func(hip_cohort, pcvisits, 'cost', 'hippre', 'hip_agegroup', hipcats, extra_cat = 'role_cat')
hipstats3 <- stats_func(hip_cohort, pcdrugs, 'drugcost', 'hippre', 'hip_agegroup', hipcats)
write_csv(hipstats1, here('../Outputs', 'lookback_hip_pc_visits_stats1.csv'))
write_csv(hipstats2, here('../Outputs', 'lookback_hip_pc_visits_stats2.csv'))
write_csv(hipstats3, here('../Outputs', 'lookback_hip_pc_drugs_stats1.csv'))
heartstats1 <- stats_func(heartd_cohort, pcvisits, 'cost', 'heartpre', 'heart_agegroup')
heartstats2 <- stats_func(heartd_cohort, pcvisits, 'cost', 'heartpre', 'heart_agegroup', hip_cats = NULL, extra_cat = 'role_cat')
heartstats3 <- stats_func(heartd_cohort, pcdrugs, 'drugcost', 'heartpre', 'heart_agegroup')
write_csv(heartstats1, here('../Outputs', 'lookback_heart_pc_visits_stats1.csv'))
write_csv(heartstats2, here('../Outputs', 'lookback_heart_pc_visits_stats2.csv'))
write_csv(heartstats3, here('../Outputs', 'lookback_heart_pc_drugs_stats1.csv'))
#_____________________________________
#Additional work on heart cohorts ----
heartstats_all <- stats_func(heart_cohort, pcvisits, 'cost', 'heartpost', 'heart_agegroup')
heartstats_dc <- stats_func(heartdc_cohort, pcvisits, 'cost', 'heartpost', 'heart_agegroup')
heartstats_nodc <- stats_func(heartnodc_cohort, pcvisits, 'cost', 'heartpost', 'heart_agegroup')
write_csv(heartstats_all, here('../Outputs', 'heart_pc_visits_stats.csv'))
write_csv(heartstats_dc, here('../Outputs', 'heartdc_pc_visits_stats.csv'))
write_csv(heartstats_nodc, here('../Outputs', 'heartnodc_pc_visits_stats.csv'))
#_______________________________________________________
#Additional work on cumulative statistics by decile ----
dec_hipall_visits <- decile_func(hip_cohort, pcvisits, 'hippost', 'cost')
dec_heartd_visits <- decile_func(heartd_cohort, pcvisits, 'heartpost', 'cost')
dec_hipall_drugs <- decile_func(hip_cohort, pcdrugs, 'hippost', 'drugcost')
dec_heartd_drugs <- decile_func(heartd_cohort, pcdrugs, 'heartpost', 'drugcost')
rbind(dec_hipall_visits, dec_heartd_visits, dec_hipall_drugs, dec_heartd_drugs) %>% #bind the results together
write_csv(here('../Outputs', 'pc_visits_drugs_deciles.csv')) #write to file