-
Notifications
You must be signed in to change notification settings - Fork 0
/
lasso function.R
105 lines (95 loc) · 3.36 KB
/
lasso function.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#Define function to run LASSO regression and return eval results#
run.lasso <- function(
c.vars, #select variable x
c.response, #select variable y
set.train = c.train, #select train
set.test = c.test, #select test
family = "binomial", #define y type
var.name, #output x category
fold = fold, #output fold and iteration
cohort=cohort, #output train cohort
mcohort=mcohort, #output test cohort
c.res = c.res #store result
) {
#Get data
c.train.X <- set.train %>% dplyr::select(c.vars) %>% mutate_all(~replace_na(., min(., na.rm=T))) %>% as.matrix.data.frame()
c.test.X <- set.test %>% dplyr::select(c.vars) %>% mutate_all(~replace_na(., min(., na.rm=T))) %>% as.matrix.data.frame()
resp.train <- set.train %>% pull(c.response)
resp.test <- set.test %>% pull(c.response)
#Run cv'ed LASSO regression
c.lasso <- cv.glmnet(c.train.X, resp.train, alpha = 1, family = family)
#Get standardised coefficients at lambda.1se using the Agresti method
sds <- apply(c.train.X, 2, sd)
cs <- as.matrix(coef(c.lasso, s = "lambda.min")) #"lambda.min", "lambda.1se"#
std_coefs <- cs[-1, 1] * sds
#Prepare output
c.res[["coef"]] <- bind_rows(c.res[["coef"]], tibble(
cohort= cohort,
mcohort=mcohort,
resp.var = c.response,
cv.fold = fold,
family = family,
var.type = var.name,
term = c.vars,
coef = std_coefs
))
#Evaluate model fit and pass outputs
c.predict.test <- as.numeric(predict(c.lasso, c.test.X, "lambda.min"))
c.res[["predict"]] <- bind_rows(c.res[["predict"]], tibble(
cohort=cohort,
mcohort=mcohort,
resp.var = c.response,
cv.fold = fold,
family = family,
var.type = var.name,
response = resp.test,
predicted = c.predict.test
))
if (family == "binomial") {
c.res[["metric"]] <- bind_rows(c.res[["metric"]], tibble(
cohort=cohort,
mcohort=mcohort,
resp.var = c.response,
cv.fold = fold,
family = family,
var.type = var.name,
metric = "auc",
non.trivial = var(c.predict.test) != 0, #avoid to all variables were predict to one type #
value = metric_auc(resp.test, c.predict.test)
))
#Pass ROC curve
tmp.roc <- invisible(roc(response = resp.test, predictor = as.numeric(c.predict.test), ret = "coords"))
c.res[["roc"]] <- bind_rows(c.res[["roc"]], tibble(
cohort=cohort,
mcohort=mcohort,
resp.var = c.response,
cv.fold = fold,
family = family,
var.type = var.name,
sensitivity = tmp.roc$sensitivities,
specificity = tmp.roc$specificities
))
#Pass
return(c.res)
} else {
#Pass metric
c.res[["metric"]] <- bind_rows(c.res[["metric"]], tibble(
cohort=cohort,
mcohort=mcohort,
resp.var = c.response,
cv.fold = fold,
family = family,
var.type = var.name,
metric = c("rho", "rsq", "mse", "rmse"),
non.trivial = var(c.predict.test) != 0,
value = c(
cor(resp.test, c.predict.test),
metric_rsquared(resp.test, c.predict.test),
metric_mse(resp.test, c.predict.test),
metric_rmse(resp.test, c.predict.test)
)
))
#Pass
return(c.res)
}
}