-
Notifications
You must be signed in to change notification settings - Fork 1
/
Example-Analysis-for-Figures.Rmd
553 lines (465 loc) · 29.4 KB
/
Example-Analysis-for-Figures.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
---
title: "Running an Example Analysis"
author: "Ariel Nikas"
date: "7/26/2022"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
```{r}
#Load In Packages
library("survminer")
library("survival")
```
##--For each of the following chunks, we consider our main scenario of interest which is waning vaccination from 100-0% protection over 60 days while vaccination occurs over 30 days. While in the paper most figures unless otherwise noted were generated using partitions with 100 event and 1 day minimums, to facilitate quicker figure generation in this example the min days and events are increased so that figures can be generated in at most a few minutes.
##--Equivalent to Figure 1: Here we consider the individual level of protection from 100-0% waning over 60 days as well as the average protection in the at risk vaccinated population. The population average is the "True" value plotted on all graphs for 30 day spread
```{r}
Scenario=read.csv("Batch1-30Day23.csv") #Read In File, May Need to Change File Path
#Make Individual Level Waning
time1<-seq(1:60)
Wane<-1-(1/60)*(time1-1)
Wane<-append(Wane,rep(0,120))
plot(seq(1:180), Wane*100, col="black", type="l", lwd=2, lty=2, ylab="VE %", xlab="Day of Epidemic", main="1 Day vs. 30 Day Spread Waning")
#Find Population Average Waning, Spread Over 30 Days
sus.groups<-sus.groups2<-matrix(data=NA, nrow=180, ncol=30)
for(k in 1:30){
for(i in 1:180){
sus.groups[i,k]<-length(which(Scenario$vac_time==k & Scenario$vac_time<=i& Scenario$infected_time>i))
}
}
sus.groups2[,1]<-sus.groups[,1]*Wane
for( k in 2:30){
Wane2<-append(rep(NA,(k-1)), Wane)
Wane2<-Wane2[1:180]
sus.groups2[,k]<-sus.groups[,k]*Wane2
}
sus.groups[which(sus.groups==0)]<-NA
SpreadWane<-rep(NA,180)
for( j in 1:180){
SpreadWane[j]<-sum(sus.groups2[j,],na.rm=T)/sum(sus.groups[j,], na.rm=T)
}
lines(seq(1:180), SpreadWane*100, lwd=2, col="purple")
legend("topright", legend=c("Individual", "Population Average"), col=c("black", "purple"), lwd=2, lty=c(2,1), bty="n")
```
##--Figure 2 (D) & Figure 3: Comparing the 3 Main Levels (as based on Halloran '97 & '99). Here we consider our main scenario of 100-0% waning over 60 days with 30 days of vaccination. Figure 2's Example gives Level 3 SR while Figure 3's Example gives Level 3 TVI. Run time <5min. Please note:
#1)If vaccination occurs on only 1 day, change spread parameter to 1
#2)In this example mindays is set to 7 to reduce computation time, however in the paper figures it is set to 1 so while results will be similar they will not exactly match unless this parameter value is changed (will take >10 minutes depending on computer)
#3)If considering constant at 80%, the appropriate lines are commented out
#4)Depending on bin size & number of events, you may get the R warning message "Loglik converged before variable; beta may be infinite." This is not a fatal error and is to be expected when running the Cox model with low/no individuals in a given group (vaccinated/unvaccinated)
```{r}
#Parameters
spread=30 #the number of days vaccination is spread over (Change to 1 if not spread)
mindays=7 #Sets the minimum number of days a bin can be (in the paper this is 1 but depending on computing ability may take >15 minutes to run set this way)
minevents=100 #Sets the minimum number of events a bin must have (if last bin too small it merges with the one before it) Note: If this number is small, like 100, the code can again take a while to run
D=180 #how many days in the simulation?
#For 100-0% Waning
an=Scenario3=Scenario=read.csv("Batch1-30Day23.csv") #This File Tracks Events & Status Per Person
ScenarioEXP=read.csv("Batch1-30Day123.csv") #This File Tracks Exposures/Infections Per Day
# #For 80% Constant
# an<-Scenario3<-Scenario<-read.csv("80Leaky-500Seed-30Day9.csv")
# ScenarioEXP<-read.csv(file="80Leaky-500Seed-30Day109.csv")
##--First: Reorganize Data, Find Expected Value, and Create 100 Event Bines
an=subset(an,original_infectors==0)
an$vac_status2=an$original_infectors=NULL
#--Rearranging Information for Unvaccinated
x1=subset(an,vac_status==0)
x1$time1=1; x1$time2=x1$infected_time+0.5; x1$event=x1$ever_infected; x1$vac=0
#Rearranging Information for Those Vaccinated After Day 1
if( length(which(an$vac_status==1&an$vac_time>1))>=1){
x2=subset(an,vac_status==1&vac_time>1)
x2$time1=1; x2$time2=x2$vac_time; x2$event=0; x2$vac=0
}
#Rearranging Information For Those Vaccinated
x3=subset(an,vac_status==1)
x3$time1=x3$vac_time; x3$time2=x3$infected_time+0.5; x3$event=x3$ever_infected; x3$vac=1
if(length(which(an$vac_status==1&an$vac_time>1))>=1){
x=rbind(x1,x2,x3)}else{x=rbind(x1,x3)}
x=x[order(x$id,x$time1),]
x$time3=1; x$time4=x$time2-x$time1+1
x$vac_status=x$ever_infected=x$infected_time=x$vac_time=NULL #x is dataset rearranged to work with coxph
#!--Finding Expected Value
rt=sapply(1:D,function(i){s=subset(x3,time1<=i&time2>i); mean(pmin(1,(i-s$time1)/60))})
expected<-100*(1-rt)
fit=coxph(Surv(time1,time2,event)~vac,x)
zph=cox.zph(fit,'identity')
###--Make Bins: Because the Cox Model is Based on Events This is Relatively Straightforward
ut=c(unique(zph$x),Inf); LL=ut[1]; i=1; j=length(ut); mind=mindays; minn=minevents
while(TRUE){
if(ut[j]-ut[i]<mind|sum(ut[i]<=zph$x)<minn){LL[length(LL)]=Inf; break}
for(newi in i:j) if(ut[newi]-ut[i]>=mind&sum(ut[i]<=zph$x&zph$x<ut[newi])>=minn) break
i=newi; LL=c(LL,ut[newi])
}
bins=cut(zph$x,LL,FALSE,right=FALSE); nbins=length(LL)-1
time.cuts<-append(1,LL[2:nbins]+0.1)
##--Find Level 3 (this was mostly done in the last few steps anyway)
day<-tapply(zph$x,bins,mean); event.level3.SR<-(1-exp(tapply(zph$y,bins,mean)))*100
##--FIND LEVEL ONE
y.level1<-rep(NA,D)
#Note: Columns are named by who gives to whom (i.e. unvac2vac_inf means an unvaccinated person infected a vaccinated person)
v.exp<-ScenarioEXP$vac2vac_inf+ScenarioEXP$unv2vac_inf+ScenarioEXP$vac2vac_exp+ScenarioEXP$unv2vac_exp
u.exp<-ScenarioEXP$unv2unv_inf+ScenarioEXP$vac2unv_inf+ScenarioEXP$unv2unv_exp+ScenarioEXP$vac2unv_exp
v.inf<-ScenarioEXP$vac2vac_inf+ScenarioEXP$unv2vac_inf
u.inf<-ScenarioEXP$unv2unv_inf+ScenarioEXP$vac2unv_inf
event.level1<-rep(NA,length(time.cuts))
for(j in 1:(length(time.cuts)-1)){
left<-time.cuts[j] #Find the left cutoff day
right<-time.cuts[(j+1)] #Find the right cut off day
#Calculate the VE value between the cut off points
event.uexp<-sum(u.exp[left:right])
event.vexp<-sum(v.exp[left:right])
event.uinf1<-sum(u.inf[left:right])
event.vinf1<-sum(v.inf[left:right])
event.level1[j]<-100*(1-((event.vinf1/event.vexp)/(event.uinf1/event.uexp)))
}
#Final Level 1 Time Cut
left<-time.cuts[length(time.cuts)]
right<-D
event.uexp<-sum(u.exp[left:right])
event.vexp<-sum(v.exp[left:right])
event.uinf1<-sum(u.inf[left:right])
event.vinf1<-sum(v.inf[left:right])
event.level1[length(time.cuts)]<-100*(1-((event.vinf1/event.vexp)/(event.uinf1/event.uexp)))
##--FIND LEVEL 2
v2.people<-u2.people<-v2.inf<-u2.inf<-rep(NA,D)
for(i in 1:D){
v2.people[i]<-length(which(Scenario$vac_time<=i & Scenario$vac_status==1 & Scenario$infected_time>i)) #people who are vaccinated by today, marked as being vaccinated, not yet infected
if(spread==1){
u2.people[i]<-length(which(Scenario$vac_status==0 & Scenario$infected_time>i)) #If everyone vaccinated on day one, no worries
}else{ #worries, add in not yet vaccinated people to unvaccinated group to add into unvac time
u2.people[i]<-length(which(Scenario$vac_status==0 & Scenario$infected_time>i))+length(which(Scenario$vac_time>i & Scenario$vac_status==1 & Scenario$infected_time>i))
}
v2.inf[i]<-length(which(Scenario$vac_status==1 & Scenario$infected_time==i))
u2.inf[i]<-length(which(Scenario$vac_status==0 & Scenario$infected_time==i)) #adding in not yet vac doesn't matter here because they cannot get infected
}
event.level2<-rep(NA,length(time.cuts))#-Break up into 100 event sections
for(j in 1:(length(time.cuts)-1)){
left<-time.cuts[j]
right<-time.cuts[(j+1)]
event.upt<-sum(u2.people[left:right])
event.vpt<-sum(v2.people[left:right])
event.uinf<-sum(u2.inf[left:right])
event.vinf<-sum(v2.inf[left:right])
event.level2[j]<-100*(1-((event.vinf/event.vpt)/(event.uinf/event.upt)))
}
#Final Level 2 Bin
left<-time.cuts[length(time.cuts)]
right<-D
event.upt<-sum(u2.people[left:right])
event.vpt<-sum(v2.people[left:right])
event.uinf<-sum(u2.inf[left:right])
event.vinf<-sum(v2.inf[left:right])
event.level2[length(time.cuts)]<-100*(1-((event.vinf/event.vpt)/(event.uinf/event.upt)))
day<-tapply(zph$x,bins,mean)
SR1<-exp(tapply(zph$y,bins,mean))
ru=residuals(fit,'schoenfeld') #unscaled schoenfeld residuals
tk=as.numeric(names(ru)); Bm=coef(fit); rh=exp(Bm); V=(ru>0)+0 #tk is infection times, V indicates whether infected was vaccinated (0=unvaccinated, 1=vaccinated)
Z=sapply(unique(rank(tk,ties.method='min')),function(i) optimize(function(Z) (V[i]-exp(Bm)*Z/(exp(Bm)*Z+1-Z) - ru[i])^2 ,c(0,1),tol=1e-15)$min )
Z=Z[as.numeric(factor(tk))] #proportion of never infected that are vaccinated (Z can easily be calculated directly but that does not account for ties)
rs = (exp(Bm)*Z+1-Z)^2*ru/(exp(Bm)*Z*(1-Z)) + Bm #scaled schoenfeld residuals
SR2<-exp(tapply(rs,bins,mean)) #hazard ratio estimated using schoenfeld residuals without constant variance approximation
TSA=sapply(unique(bins),function(i){
Z=Z[bins==i]; V=V[bins==i]
optimize(function(ER) sum(rh*Z/(rh*Z+1-Z)+rh*Z*(1-Z)/(rh*Z+1-Z)^2*(log(ER)-Bm) - V)^2 ,c(0,2),tol=1e-15)$min
})
y=survSplit(Surv(time1,time2,event)~.,x,cut=tapply(zph$x,bins,max)[-nbins],zero=1,episode='tcat') #dataset with time category
vzm=sapply(1:nbins,function(i) coxph(Surv(time1,time2,event)~vac,subset(y,tcat==i))$coef[[1]] ) #fitting a separate cox model for each time category
SCM<-exp(vzm)
fit2=coxph(Surv(time1,time2,event)~vac:factor(tcat),y)
TVI<-exp(coef(fit2))#hazard ratio estimates using time vaccine interaction term
bins=as.numeric(factor(bins))
sapply(unique(bins),function(i){
Z=Z[bins==i]; V=V[bins==i]
eq.TVI=sum(TVI[i]*Z/(TVI[i]*Z+1-Z))-sum(V) #equals 0 for TVI method, maximizes likelihood
eq.TSA=sum(rh*Z/(rh*Z+1-Z)+rh*Z*(1-Z)/(rh*Z+1-Z)^2*(log(TSA)[i]-Bm))-sum(V) #equals 0 for TSA method, taylor series approximation of eq.TVI
eq.SR2=sum((rh*Z+1-Z)/(rh*Z*(1-Z))*rh*Z+log(SR2)[i]-Bm)-sum((rh*Z+1-Z)^2/(rh*Z*(1-Z))*V) #equals 0 for SR2 method, weighting of eq.TSA
eq.SR1=sum(rh*Z/(rh*Z+1-Z)+(log(SR1)[i]-Bm)/fit$var[1,1]/fit$nevent)-sum(V) #equals 0 for SR1 method, replaces time varying 'variance' of V, rh*Z*(1-Z)/(rh*Z+1-Z)^2, in either eq.TSA or eq.SR2 with 1/fit$var[1,1]/fit$nevent
round(c(eq.TVI=eq.TVI,eq.TSA=eq.TSA,eq.SR2=eq.SR2,eq.SR1=eq.SR1),3)
})
event.level3.TVI<-(1-TVI)*100
#######----------------------------------Just Plotting
plot(seq(1:180), expected, lwd=2, col="black", main="Example Figure 2: Comparing Different Levels with SR", ylab="VE %", xlab="Day of Epidemic", type="l")
lines(day, event.level1, col="red", lwd=2, type="b")
lines(day, event.level2, col="blue", lwd=2, type="b")
lines(day, event.level3.SR, col="green", lwd=2, type="b")
legend("topright", legend=c("True", "Level 1", "Level 2", "Level 3 (SR)"), col=c("black", "red", "blue", "green"), lwd=2, lty=1,bty="n")
plot(seq(1:180), expected, lwd=2, col="black", main="Example Figure 3: Comparing Different Levels with TVI", ylab="VE %", xlab="Day of Epidemic", type="l")
lines(day, event.level1, col="red", lwd=2, type="b")
lines(day, event.level2, col="blue", lwd=2, type="b")
lines(day, event.level3.TVI, col="dodgerblue", lwd=2, type="b")
legend("topright", legend=c("True", "Level 1", "Level 2", "Level 3 (TVI)"), col=c("black", "red", "blue", "dodgerblue"), lwd=2, lty=1,bty="n")
# #If Constant at 80%
# plot(seq(1:180), rep(80,180), lwd=2, col="black", main="Example: Comparing Different Levels", ylab="VE %", xlab="Day of Epidemic", type="l")
# lines(day, event.level1, col="red", lwd=2, type="b")
# lines(day, event.level2, col="blue", lwd=2, type="b")
# lines(day, event.level3.SR, col="green", lwd=2, type="b")
# legend("topright", legend=c("True", "Level 1", "Level 2", "Level 3 (SR)"), col=c("black", "red", "blue", "green"), lwd=2, lty=1,bty="n")
# plot(seq(1:180), rep(80,180), lwd=2, col="black", main="Example Figure 3: Comparing Different Levels with TVI", ylab="VE %", xlab="Day of Epidemic", type="l")
# lines(day, event.level1, col="red", lwd=2, type="b")
# lines(day, event.level2, col="blue", lwd=2, type="b")
# lines(day, event.level3.TVI, col="dodgerblue", lwd=2, type="b")
# legend("topright", legend=c("True", "Level 1", "Level 2", "Level 3 (TVI)"), col=c("black", "red", "blue", "dodgerblue"), lwd=2, lty=1,bty="n")
```
##Figure 4: Comparing Level 3 Methods. Please Note:
#1)In this example mindays is set to 7 to reduce computation time, however in the paper figures it is set to 1 so while results will be similar they will not exactly match unless this parameter value is changed (will take >10 minutes depending on computer)
#2)Depending on bin size & number of events, you may get the R warning message "Loglik converged before variable; beta may be infinite." This is not a fatal error and is to be expected when running the Cox model with low/no individuals in a given group (vaccinated/unvaccinated)
```{r}
an=Scenario3=read.csv("Batch1-30Day23.csv") #ariel nikas's simulation
mind=7 #minimum day for bins
minn=100 #minimum events for bins
rt0=c(seq(0.0,1.0,length.out=61),rep(1.0,119)) #hazard ratio over time since vaccination
an=subset(an,original_infectors==0)
an$vac_time[an$vac_time==0]=180; an$pv_time=an$vac_time
an$vac_status2=an$original_infectors=an$pv_time=NULL
x1=subset(an,vac_status==0)
x1$time1=1; x1$time2=x1$infected_time+0.5; x1$event=x1$ever_infected; x1$vac=0
if( length(which(an$vac_status==1&an$vac_time>1))>=1){#if spread=30
x2=subset(an,vac_status==1&vac_time>1)
x2$time1=1; x2$time2=x2$vac_time; x2$event=0; x2$vac=0
}
x3=subset(an,vac_status==1)
x3$time1=x3$vac_time; x3$time2=x3$infected_time+0.5; x3$event=x3$ever_infected; x3$vac=1
if( length(which(an$vac_status==1&an$vac_time>1))>=1){
x=rbind(x1,x2,x3)}else{x=rbind(x1,x3)}
x=x[order(x$id,x$time1),]
x$time3=1; x$time4=x$time2-x$time1+1
x$vac_status=x$ever_infected=x$infected_time=x$vac_time=NULL #x is dataset rearranged to work with coxph
rt=sapply(1:180,function(i){s=subset(x3,time1<=i&time2>i); mean(rt0[i-s$time1+1])}) #true hazard ratio
plot(1:180,rt,xlab='Day of epidemic',ylab='1-VE',type='l',ylim=c(0,1.6))
fit=coxph(Surv(time1,time2,event)~vac,x)
zph=cox.zph(fit,'identity')
ut=c(unique(zph$x),Inf); LL=ut[1]; i=1; j=length(ut)
while(TRUE){
if(ut[j]-ut[i]<mind|sum(ut[i]<=zph$x)<minn){LL[length(LL)]=Inf; break}
for(newi in i:j) if(ut[newi]-ut[i]>=mind&sum(ut[i]<=zph$x&zph$x<ut[newi])>=minn) break
i=newi; LL=c(LL,ut[newi])
}
bins=cut(zph$x,LL,FALSE,right=FALSE); nbins=length(LL)-1
lines(day<-tapply(zph$x,bins,mean), SR1<-exp(tapply(zph$y,bins,mean)),type='b',col="green") #hazard ratio estimated using schoenfeld residuals from zph
ru=residuals(fit,'schoenfeld') #unscaled schoenfeld residuals
tk=as.numeric(names(ru)); Bm=coef(fit); rh=exp(Bm); V=(ru>0)+0 #tk is infection times, V indicates whether infected was vaccinated (0=unvaccinated, 1=vaccinated)
Z=sapply(unique(rank(tk,ties.method='min')),function(i) optimize(function(Z) (V[i]-exp(Bm)*Z/(exp(Bm)*Z+1-Z) - ru[i])^2 ,c(0,1),tol=1e-15)$min )
Z=Z[as.numeric(factor(tk))] #proportion of never infected that are vaccinated (Z can easily be calculated directly but that does not account for ties)
rs = (exp(Bm)*Z+1-Z)^2*ru/(exp(Bm)*Z*(1-Z)) + Bm #scaled schoenfeld residuals
lines(day,SR2<-exp(tapply(rs,bins,mean)),type='b',col="darkgreen",pch=2) #hazard ratio estimated using schoenfeld residuals without constant variance approximation
TSA=sapply(unique(bins),function(i){
Z=Z[bins==i]; V=V[bins==i]
optimize(function(ER) sum(rh*Z/(rh*Z+1-Z)+rh*Z*(1-Z)/(rh*Z+1-Z)^2*(log(ER)-Bm) - V)^2 ,c(0,2),tol=1e-15)$min
})
lines(day,TSA,type='b',col="purple",pch=3) #hazard ratio estimated using taylor series approximation
y=survSplit(Surv(time1,time2,event)~.,x,cut=tapply(zph$x,bins,max)[-nbins],zero=1,episode='tcat') #dataset with time category
vzm=sapply(1:nbins,function(i) coxph(Surv(time1,time2,event)~vac,subset(y,tcat==i))$coef[[1]] ) #fitting a separate cox model for each time category
lines(day,SCM<-exp(vzm),type='b',col=3,lwd=2) #hazard ratio estimates using the separate cox models
fit2=coxph(Surv(time1,time2,event)~vac:factor(tcat),y)
TVI<-exp(coef(fit2))#hazard ratio estimates using time vaccine interaction term
bins=as.numeric(factor(bins))
sapply(unique(bins),function(i){
Z=Z[bins==i]; V=V[bins==i]
eq.TVI=sum(TVI[i]*Z/(TVI[i]*Z+1-Z))-sum(V) #equals 0 for TVI method, maximizes likelihood
eq.TSA=sum(rh*Z/(rh*Z+1-Z)+rh*Z*(1-Z)/(rh*Z+1-Z)^2*(log(TSA)[i]-Bm))-sum(V) #equals 0 for TSA method, taylor series approximation of eq.TVI
eq.SR2=sum((rh*Z+1-Z)/(rh*Z*(1-Z))*rh*Z+log(SR2)[i]-Bm)-sum((rh*Z+1-Z)^2/(rh*Z*(1-Z))*V) #equals 0 for SR2 method, weighting of eq.TSA
eq.SR1=sum(rh*Z/(rh*Z+1-Z)+(log(SR1)[i]-Bm)/fit$var[1,1]/fit$nevent)-sum(V) #equals 0 for SR1 method, replaces time varying 'variance' of V, rh*Z*(1-Z)/(rh*Z+1-Z)^2, in either eq.TSA or eq.SR2 with 1/fit$var[1,1]/fit$nevent
round(c(eq.TVI=eq.TVI,eq.TSA=eq.TSA,eq.SR2=eq.SR2,eq.SR1=eq.SR1),3)
})
plot(1:180,(1-rt)*100,xlab='Day of Epidemic',ylab='VE %',type='l',ylim=c(-50,100), main="Example: Level 3 Comparisons",cex.main=1.5, cex.axis=1.3, cex.lab=1.1, lwd=3)
lines(day,(1-SR1)*100,type='b',col="green", lwd=3)#SR
lines(day,(1-SR2)*100,type='b',col="forestgreen", lwd=3) #SRTV
lines(day,(1-TSA)*100,type='b',col="purple",pch=3, lty=2,lwd=3)
lines(day,(1-TVI)*100,type='b',col="dodgerblue",lty=2, lwd=3)
legend("topright", legend= c("True", "Level 3 (SR)", "Level 3 (SRTV)", "Level 3 (TS)", "Level 3 (TVI)"), lwd=2, col=c("black", "green", "forestgreen", "purple", "dodgerblue"), pch=c(NA,1,1,3,1), bty="n")
```
##Figure 5 Equivalent: Example Optimization to Create Heat Maps for R0=1.8. Because this considers 120 combinations of min days and events it takes a long time to run (in units of hours). This will return a heat map for R0=1.8. It will also give an example stepped function like SI figure 5. Please note:
#1)Depending on bin size & number of events, you may get the R warning message "Loglik converged before variable; beta may be infinite." This is not a fatal error and is to be expected when running the Cox model with low/no individuals in a given group (vaccinated/unvaccinated)
```{r}
D=180
an=Scenario=read.csv("Batch1-30Day23.csv")
an=subset(an,original_infectors==0)
an$vac_status2=an$original_infectors=NULL
#--Rearranging Information for Unvaccinated
x1=subset(an,vac_status==0)
x1$time1=1; x1$time2=x1$infected_time+0.5; x1$event=x1$ever_infected; x1$vac=0
#Rearranging Information for Those Vaccinated After Day 1
if( length(which(an$vac_status==1&an$vac_time>1))>=1){
x2=subset(an,vac_status==1&vac_time>1)
x2$time1=1; x2$time2=x2$vac_time; x2$event=0; x2$vac=0
}
#Rearranging Information For Those Vaccinated
x3=subset(an,vac_status==1)
x3$time1=x3$vac_time; x3$time2=x3$infected_time+0.5; x3$event=x3$ever_infected; x3$vac=1
if(length(which(an$vac_status==1&an$vac_time>1))>=1){
x=rbind(x1,x2,x3)}else{x=rbind(x1,x3)}
x=x[order(x$id,x$time1),]
x$time3=1; x$time4=x$time2-x$time1+1
x$vac_status=x$ever_infected=x$infected_time=x$vac_time=NULL #x is dataset rearranged to work with coxph
#!--Finding Expected Value
rt=sapply(1:D,function(i){s=subset(x3,time1<=i&time2>i); mean(pmin(1,(i-s$time1)/60))})
expected<-100*(1-rt)
fit=coxph(Surv(time1,time2,event)~vac,x)
zph=cox.zph(fit,'identity')
###
######## Finding Minimum AIC ###############
daystested <-seq(1,10,1)
minpointstested <-seq(100,1200,100)
data<-error.AIC<-error.weighted<-error.notweighted <- matrix(0,length(daystested),length(minpointstested))
for (k in 1:length(daystested)) {
for(m in 1:length(minpointstested)) {
ut=c(unique(zph$x),300); LL=ut[1]; i=1; j=length(ut); mind=daystested[k]; minn=minpointstested[m]
while(TRUE){
if(ut[j]-ut[i]<mind|sum(ut[i]<=zph$x)<minn){LL[length(LL)]=Inf; break}
for(newi in i:j) if(ut[newi]-ut[i]>=mind&sum(ut[i]<=zph$x&zph$x<ut[newi])>=minn) break
i=newi; LL=c(LL,ut[newi])
}
LL[which(LL==Inf)]<-180
bins=cut(zph$x,LL,FALSE,right=FALSE); nbins=length(LL)-1
if (nbins>1){ #If everything is fine, do normally
binsY = bins; nbinsY=nbins; time.vz<-tapply(zph$x,binsY,mean)
xsplitY=survSplit(Surv(time1,time2,event)~.,x,cut=LL[2:nbins]+0.1,zero=1,episode='tcat') #dataset with time category
fit=coxph(Surv(time1,time2,event)~vac:factor(tcat),xsplitY)
time.cuts<-append(1,LL[2:nbins]+0.1)
}else if(LL==180){ #If your only cut is at 180 (which if you are looking at a reasonable epidemic shouldnt happen), then actually it's 1 to 180
LL=c(1,180)
binsY = bins; nbinsY=1; time.vz<-tapply(zph$x,binsY,mean)
xsplitY=survSplit(Surv(time1,time2,event)~.,x,cut=LL[1:2]+0.1,zero=1,episode='tcat') #dataset with time category
fit=coxph(Surv(time1,time2,event)~vac:factor(tcat),xsplitY)
time.cuts<-LL+0.1
}
else{
binsY = bins; nbinsY=1; time.vz<-tapply(zph$x,binsY,mean)
xsplitY=survSplit(Surv(time1,time2,event)~.,x,cut=LL[1:2]+0.1,zero=1,episode='tcat') #dataset with time category
fit=coxph(Surv(time1,time2,event)~vac:factor(tcat),xsplitY)
time.cuts<-append(1,LL+0.1)}
cutsvalues<-round(time.cuts)
time.vz.opt<-time.vz
VE.opt<-(1-(exp(coef(fit))))*100
##---Now Calculate All Errors By First Making the Step Function Using the Data Out
if(is.na(VE.opt[1])==T){
VE.opt<-VE.opt[-1]
}
VE.stepper<-c(0)
for( g in 1:(length(cutsvalues)-1)){
VE.stepper[cutsvalues[g]:cutsvalues[g+1]]<-rep(VE.opt[g])
}
VE.stepper[length(VE.stepper):180]<-VE.opt[length(VE.opt)]
cost.in<-rep(0,D)
cost2.in<-rep(0,D)
for( i in 1:179){#FIND THE DIFFERENCE IN EXPECTED VALUE AND THE STEPPED VALUE THEN WEIGHT BY HOW MANY INFECTION ON THAT DAY
cost.in[i]<-(((1-rt[i])*100-VE.stepper[i])^2)*(length(which(an$infected_time==i)))
cost2.in[i]<-(((1-rt[i])*100-VE.stepper[i])^2)
}
#Save Errors in Matrix
error.weighted[k,m]<-sum(cost.in, na.rm=TRUE) #Weighted Error
error.notweighted[k,m]<-sum(cost2.in, na.rm=TRUE)
error.AIC[k,m]<-AIC(fit) #AIC
}
}
plot(1:180, (1-rt)*100, type="l", main="Example: Stepped Function To Find Optimal Partition", ylab="VE %", xlab="Day of Epidemic")
#lines(time.vz, (1-(exp(coef(fit))))*100, type="b", lwd=2, col="purple")
lines(seq(1:D), VE.stepper, col="blue")
WE.18<-error.weighted
inf.18<-length(which(Scenario$ever_infected==1))
MRE.18<-sqrt(WE.18/inf.18)
events= seq(100, 1200, by = 100) #This should be minimum cut off amount by 100s
days = seq(1, 10, by = 1) #This should be minimum day amount by 1's
colfunc<-colorRampPalette(c("royalblue","springgreen","yellow","red")) #Colors, can change
level1=seq(0, 14,2)
col=(colfunc(length(level1)))
filled.contour(days,events, MRE.18, col=col,levels=level1, main="Root Mean Square Error: R0=1.8", xlab="Minimum Days", ylab="", cex.lab=1, cex.main=1.25)
title(ylab="Cut Off Assigned", line=5, cex.lab=2.5)
```
##Figure 6 Equivalent: Comparing Level 3 Methods-Non-optimized and Optimized. This will match Figure 6 given in the paper. However, due to the small bins for the non-optimized panel A (mind and minn) this can take a long time to run especially on laptops (>20 min). The optimized version (Panel B) using (mind.opt and minn.opt) beginning on line 501 will also match the paper exactly but will take significantly less time to run.Please note:
#1)Depending on bin size & number of events, you may get the R warning message "Loglik converged before variable; beta may be infinite." This is not a fatal error and is to be expected when running the Cox model with low/no individuals in a given group (vaccinated/unvaccinated)
```{r}
an=Scenario3=read.csv("Batch1-30Day23.csv")
mind=1 #minimum day for bins(For non-optimized days as used throughout paper)
minn=100 #minimum events for bins(For non-optimized events as used throughout paper)
mind.opt=6 #minimum day for bins (Optimal Day Partition for specific scenario as found in paper)
minn.opt=700 #minimum events for bins (Optimal Event Partition for specific scenario as found in paper)
#First Run Not Optimized (Note:This will take a while)
rt0=c(seq(0.0,1.0,length.out=61),rep(1.0,119)) #hazard ratio over time since vaccination
an=subset(an,original_infectors==0)
an$vac_time[an$vac_time==0]=180; an$pv_time=an$vac_time
an$vac_status2=an$original_infectors=an$pv_time=NULL
x1=subset(an,vac_status==0)
x1$time1=1; x1$time2=x1$infected_time+0.5; x1$event=x1$ever_infected; x1$vac=0
if( length(which(an$vac_status==1&an$vac_time>1))>=1){#if spread=30
x2=subset(an,vac_status==1&vac_time>1)
x2$time1=1; x2$time2=x2$vac_time; x2$event=0; x2$vac=0
}
x3=subset(an,vac_status==1)
x3$time1=x3$vac_time; x3$time2=x3$infected_time+0.5; x3$event=x3$ever_infected; x3$vac=1
if( length(which(an$vac_status==1&an$vac_time>1))>=1){
x=rbind(x1,x2,x3)}else{x=rbind(x1,x3)}
x=x[order(x$id,x$time1),]
x$time3=1; x$time4=x$time2-x$time1+1
x$vac_status=x$ever_infected=x$infected_time=x$vac_time=NULL #x is dataset rearranged to work with coxph
rt=sapply(1:180,function(i){s=subset(x3,time1<=i&time2>i); mean(rt0[i-s$time1+1])}) #true hazard ratio
fit=coxph(Surv(time1,time2,event)~vac,x)
zph=cox.zph(fit,'identity')
ut=c(unique(zph$x),Inf); LL=ut[1]; i=1; j=length(ut)
while(TRUE){
if(ut[j]-ut[i]<mind|sum(ut[i]<=zph$x)<minn){LL[length(LL)]=Inf; break}
for(newi in i:j) if(ut[newi]-ut[i]>=mind&sum(ut[i]<=zph$x&zph$x<ut[newi])>=minn) break
i=newi; LL=c(LL,ut[newi])
}
bins=cut(zph$x,LL,FALSE,right=FALSE); nbins=length(LL)-1
day<-tapply(zph$x,bins,mean)
SR1<-exp(tapply(zph$y,bins,mean)) #hazard ratio estimated using schoenfeld residuals from zph
ru=residuals(fit,'schoenfeld') #unscaled schoenfeld residuals
tk=as.numeric(names(ru)); Bm=coef(fit); rh=exp(Bm); V=(ru>0)+0 #tk is infection times, V indicates whether infected was vaccinated (0=unvaccinated, 1=vaccinated)
Z=sapply(unique(rank(tk,ties.method='min')),function(i) optimize(function(Z) (V[i]-exp(Bm)*Z/(exp(Bm)*Z+1-Z) - ru[i])^2 ,c(0,1),tol=1e-15)$min )
Z=Z[as.numeric(factor(tk))] #proportion of never infected that are vaccinated (Z can easily be calculated directly but that does not account for ties)
y=survSplit(Surv(time1,time2,event)~.,x,cut=tapply(zph$x,bins,max)[-nbins],zero=1,episode='tcat') #dataset with time category
fit2=coxph(Surv(time1,time2,event)~vac:factor(tcat),y)
TVI<-exp(coef(fit2))#hazard ratio estimates using time vaccine interaction term
bins=as.numeric(factor(bins))
sapply(unique(bins),function(i){
Z=Z[bins==i]; V=V[bins==i]
eq.TVI=sum(TVI[i]*Z/(TVI[i]*Z+1-Z))-sum(V) #equals 0 for TVI method, maximizes likelihood
eq.SR1=sum(rh*Z/(rh*Z+1-Z)+(log(SR1)[i]-Bm)/fit$var[1,1]/fit$nevent)-sum(V) #equals 0 for SR1 method, replaces time varying 'variance' of V, rh*Z*(1-Z)/(rh*Z+1-Z)^2, in either eq.TSA or eq.SR2 with 1/fit$var[1,1]/fit$nevent
round(c(eq.TVI=eq.TVI,eq.SR1=eq.SR1),3)
})
#Save Information for Non-Optimized Partitions
expected.nonopt<-(1-rt)*100
time.nonopt<-day
SR.nonopt<-(1-SR1)*100
TVI.nonopt<-(1-TVI)*100
##--Begin Running Optimized Partitions HERE
rt0=c(seq(0.0,1.0,length.out=61),rep(1.0,119)) #hazard ratio over time since vaccination
ut=c(unique(zph$x),Inf); LL=ut[1]; i=1; j=length(ut)
while(TRUE){
if(ut[j]-ut[i]<mind.opt|sum(ut[i]<=zph$x)<minn.opt){LL[length(LL)]=Inf; break}
for(newi in i:j) if(ut[newi]-ut[i]>=mind.opt&sum(ut[i]<=zph$x&zph$x<ut[newi])>=minn.opt) break
i=newi; LL=c(LL,ut[newi])
}
bins=cut(zph$x,LL,FALSE,right=FALSE); nbins=length(LL)-1
day<-tapply(zph$x,bins,mean)
SR1<-exp(tapply(zph$y,bins,mean)) #hazard ratio estimated using schoenfeld residuals from zph
ru=residuals(fit,'schoenfeld') #unscaled schoenfeld residuals
tk=as.numeric(names(ru)); Bm=coef(fit); rh=exp(Bm); V=(ru>0)+0 #tk is infection times, V indicates whether infected was vaccinated (0=unvaccinated, 1=vaccinated)
Z=sapply(unique(rank(tk,ties.method='min')),function(i) optimize(function(Z) (V[i]-exp(Bm)*Z/(exp(Bm)*Z+1-Z) - ru[i])^2 ,c(0,1),tol=1e-15)$min )
Z=Z[as.numeric(factor(tk))] #proportion of never infected that are vaccinated (Z can easily be calculated directly but that does not account for ties)
y=survSplit(Surv(time1,time2,event)~.,x,cut=tapply(zph$x,bins,max)[-nbins],zero=1,episode='tcat') #dataset with time category
fit2=coxph(Surv(time1,time2,event)~vac:factor(tcat),y)
TVI<-exp(coef(fit2))#hazard ratio estimates using time vaccine interaction term
bins=as.numeric(factor(bins))
sapply(unique(bins),function(i){
Z=Z[bins==i]; V=V[bins==i]
eq.TVI=sum(TVI[i]*Z/(TVI[i]*Z+1-Z))-sum(V) #equals 0 for TVI method, maximizes likelihood
eq.SR1=sum(rh*Z/(rh*Z+1-Z)+(log(SR1)[i]-Bm)/fit$var[1,1]/fit$nevent)-sum(V) #equals 0 for SR1 method, replaces time varying 'variance' of V, rh*Z*(1-Z)/(rh*Z+1-Z)^2, in either eq.TSA or eq.SR2 with 1/fit$var[1,1]/fit$nevent
round(c(eq.TVI=eq.TVI,eq.SR1=eq.SR1),3)
})
#Save Information for Non-Optimized Partitions
expected.opt<-(1-rt)*100
time.opt<-day
SR.opt<-(1-SR1)*100
TVI.opt<-(1-TVI)*100
##Plotting
plot(1:180,expected.nonopt,xlab='Day of Epidemic',ylab='VE %',type='l',ylim=c(-50,100), main="Example: Level 3 Non-Optimized",cex.main=1.5, cex.axis=1.3, cex.lab=1.1, lwd=3)
lines(time.nonopt,SR.nonopt,type='b',col="green", lwd=3)#SR
lines(time.nonopt,TVI.nonopt,type='b',col="dodgerblue",lty=2, lwd=3)
legend("topright", legend= c("True", "Level 3 (SR)", "Level 3 (TVI)"), lwd=2, col=c("black", "green", "dodgerblue"), pch=c(NA,1,1,3,1), bty="n")
plot(1:180,expected.opt,xlab='Day of Epidemic',ylab='VE %',type='l',ylim=c(-50,100), main="Example: Level 3 Optimized",cex.main=1.5, cex.axis=1.3, cex.lab=1.1, lwd=3)
lines(time.opt,SR.opt,type='b',col="green", lwd=3)#SR
lines(time.opt,TVI.opt,type='b',col="dodgerblue",lty=2, lwd=3)
legend("topright", legend= c("True", "Level 3 (SR)", "Level 3 (TVI)"), lwd=2, col=c("black", "green", "dodgerblue"), pch=c(NA,1,1,3,1), bty="n")
```