-
Notifications
You must be signed in to change notification settings - Fork 0
/
chapter4.qmd
254 lines (186 loc) · 6.86 KB
/
chapter4.qmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
---
title: "Chapter 4 - Cox Proportional Hazards Regression"
---
## Slides
Lecture slides [here](chap4.html){target="_blank"}. (To convert html to pdf, press E $\to$ Print $\to$ Destination: Save to pdf)
## Base R Code
```{r}
#| code-fold: true
#| code-summary: "Show the code"
#| eval: false
##################################################################
# This code generates all numerical results in chapter 4. ##
##################################################################
###################################################
# Section 4.2.6 (Figure 4.2)
# Cox proportional hazards model on GBC study data
###################################################
library(survival)
##############################################
# GBC study
#############################################
# read in the complete data
gbc <- read.table("Data//German Breast Cancer Study//gbc.txt")
# subset to first event data
# Sort the data by time within each id
o <- order(gbc$id,gbc$time)
gbc <- gbc[o,]
# get the first row for each id
data.CE <- gbc[!duplicated(gbc$id),]
# set status=1 if status==2 or 1
data.CE$status <- (data.CE$status>0)+0
# fit the Cox proportional hazards model
## preparation
data.CE$hormone <- factor(data.CE$hormone)
data.CE$meno <- factor(data.CE$meno)
data.CE$grade <- factor(data.CE$grade)
obj <- coxph(Surv(time, status)~ hormone + meno + age + size + grade
+ prog + estrg, data = data.CE)
#summarize results
summary(obj)
# Wald test on tumor grade
# H_0: beta_5=beta_6=0
beta_q <- obj$coefficients[5:6]
Sigma_q <- obj$var[5:6,5:6]
#chisq statistic with 2 d.f.
chisq2 <- t(beta_q) %*% solve(Sigma_q) %*% beta_q
#p-value
pval <- 1 - pchisq(chisq2, 2)
### explore forest plot
library(survminer)
ggforest(obj, data = data.CE)
####
### Get the Breslow estimates of baseline
### cumulative hazard function
Lambda0 <- basehaz(obj,centered = FALSE)
#plot the baseline hazard function
par(mfrow=c(1,1))
plot(stepfun(Lambda0$time,c(0,Lambda0$hazard)),
do.points=F,xlim=c(0,100),ylim=c(0,0.8),lwd=2,frame.plot=F,
xlab="Time (months)",ylab="Baseline cumulative hazard function",
main="")
############################
# Residual analysis
# (Figures 4.3--4.6)
############################
#####################################
## First get the Cox-Snell residuals.;
## The default residuals of coxph in R are the martingale residuals.
## resid(obj,type=c("martingale", "deviance", "score", "schoenfeld",
## "dfbeta", "dfbetas", "scaledsch","partial"))
## Use relationship between cox-snell and martingal
## residuals
coxsnellres <- data.CE$status-resid(obj, type="martingale")
## Then use N-A method to estimate the cumulative
## hazard function for residuals;
fit <- survfit(Surv(coxsnellres,data.CE$status) ~ 1)
Htilde <- cumsum(fit$n.event/fit$n.risk)
plot(log(fit$time),log(Htilde), xlab="log(t)", frame.plot = FALSE,
ylab="log-cumulative hazard", xlim = c(-8, 2), ylim = c(-8, 2))
abline(0, 1, lty = 3, lwd = 1.5)
# rescaled Schoelfeld residuals
sch <- cox.zph(obj)
# chi-square test results for proportionality
# of each covariate and overall
sch$table
### calculated residuals ###
sch_time <- sch$time # the X_i's (m-vector)
sch_resids <- sch$y # rescaled residuals (m x p matrix)
# Plot rescaled Schoelfeld residuals for each covariate
# using the object sch directly
par(mar = c(4, 4, 2, 2), mfrow=c(4,2))
plot(sch, xlab="Time (months)", lwd=2, cex.lab=1.2, cex.axis=1.2,
ylab = c("Hormone", "Menopause", "Age", "Tumor size",
"Tumor grade", "Progesterone", "Estrogen"))
## an alternative graphic provided in survminer package
# ggcoxzph(sch)
# To address non-proportionality of tumor grade
# re-fit the Cox proportional hazards model
# stratified by tumor grade
obj.stra <- coxph(Surv(time, status) ~ hormone + meno
+ age + size + prog + estrg + strata(grade), data = data.CE)
#Schoelfeld
#produce proportionality test results
sch.stra <- cox.zph(obj.stra)
round(sch.stra$table, 4)
## residual plots for stratified model
par(mfrow=c(3,2))
plot(sch.stra, xlab="Time (months)", lwd=2, cex.lab=1.2, cex.axis=1.2,
ylab = c("Hormone", "Menopause", "Age", "Tumor size",
"Progesterone", "Estrogen"))
## Martingale residuals
mart_resid <- resid(obj.stra,type = 'martingale')
## Deviance residuals
dev_resid <- resid(obj.stra,type = 'deviance')
#plot the martingale residuals against
# the fours quantitative covariates:
# age, tumor size, progesterone and
# estrogen receptor levels
## Age
par(mfrow=c(2,2))
plot(data.CE$age, mart_resid,
xlab="Age (years)", ylab="Martingale residuals",
main='Age',cex.lab=1.2,cex.axis=1.2)
lines(lowess(data.CE$age, mart_resid), lwd = 2)
abline(0,0,lty=3,lwd=2)
## Tumor size
plot(data.CE$size, mart_resid,
xlab="Tumor size (mm)", ylab="Martingale residuals",
main='Tumor size',cex.lab=1.2,cex.axis=1.2)
lines(lowess(data.CE$size, mart_resid),lwd=2)
abline(0,0,lty=3,lwd=2)
## Progesterone
plot(data.CE$prog, mart_resid,
xlab="Progesterone receptor (fmol/mg)", ylab="Martingale Residuals",
main='Progesterone',cex.lab=1.2,cex.axis=1.2)
lines(lowess(data.CE$prog, mart_resid),lwd=2)
abline(0,0,lty=3,lwd=2)
## Estrogen
plot(data.CE$estrg, mart_resid,
xlab="Estrogen receptor (fmol/mg)", ylab="Martingale Residuals",
main='Estrogen',cex.lab=1.2,cex.axis=1.2)
lines(lowess(data.CE$estrg, mart_resid),lwd=2)
abline(0,0,lty=3,lwd=2)
# To address non-linear age
# categorize age in agec
# age<=40: agec=1
# 40<age<=60: agec=2
# age>60: agec=3
data.CE$agec <- (data.CE$age<=40)+2*(data.CE$age>40&data.CE$age<=60)+
3*(data.CE$age>60)
data.CE$agec <- factor(data.CE$agec)
#re-fit the model with agec
obj.stra.final <- coxph(Surv(time,status)~ hormone + meno
+ agec + size + prog + estrg + strata(grade), data = data.CE)
#plot the estimated HRs for agec (Figure 4.6)
# and confidence intervals
final.sum <- summary(obj.stra.final)
final.sum
# Plot the age-group-specific HR and confidence
# intervals from the re-fitted model
ci.table=final.sum$conf.int
hr=ci.table[3:4,1]
hr.low=ci.table[3:4,3]
hr.up=ci.table[3:4,4]
par(mfrow=c(1,1))
plot(1:3,c(1,hr),ylim=c(0,1.2),frame=F,xaxt='n',
xlab="Age (years)", ylab="Hazard ratio",pch=19,cex=1.5,cex.lab=1.2,
cex.axis=1.2)
axis(1, at=c(1,2,3),labels=c("(20, 40]","(40, 60]","(60, 80]"),cex.axis=1.2)
# horizontal error bars
arrows(2:3, hr.low, 2:3, hr.up, length=0.05, angle=90, code=3,lwd=2)
lines(1:3,c(1,hr),lty=3,lwd=2)
###########################################
# Illustration with time-varying covariates
# Stanford heart study
###########################################
head(heart)
## change variable name "year" -> "accpt"
colnames(heart)[5] <- "accpt"
#sample size
n <- length(unique(heart$id))
# fit a Cox model with time-dependent "transplant"
obj <- coxph(Surv(start, stop, event) ~ age + accpt + surgery + transplant, data=heart)
#summarize results
summary(obj)
```