-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy path5_linear_ensemble_booster.R
80 lines (64 loc) · 2.4 KB
/
5_linear_ensemble_booster.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
library(xgboost)
#--------------------------------
# Setting the working directory.
#--------------------------------
setwd("/home/benedek/Documents/societe/")
#---------------------------------------------
# Reading the datasets used for the stacking.
#---------------------------------------------
train <- read.csv("./clean_dataset/train_mixing.csv", stringsAsFactors = FALSE, sep = ";")
test <- read.csv("./clean_dataset/test_mixing.csv", stringsAsFactors = FALSE, sep = ";")
#------------------
# Setting the IDs.
#------------------
test_ID <- test[,1]
train_ID <- train[,1]
#--------------------
# Reading the target.
#--------------------
target <- read.csv("./raw_dataset/target.csv", sep = ";", stringsAsFactors = FALSE)
target <- target$Target
#-------------------------------------------
# Transforming the train and test dataset.
#-------------------------------------------
train <- as.matrix(train[,2:ncol(train)])
test <- as.matrix(test[,2:ncol(test)])
#----------------------------------------------------------------------------------
# Performing cross-validation with a linear booster -- it is a nice mixing method.
#----------------------------------------------------------------------------------
bst <- xgb.cv(data = train,
nfold = 5,
early.stop.round = 3,
label = target,
eta = 0.1,
subsample = 1,
colsample_bytree = 0.1,
nrounds = 2000,
alpha = 0.5,
lambda = 0.5,
objective = "binary:logistic",
booster = "gblinear",
eval_metric = "auc",
maximize = TRUE)
#---------------------
# Fitting the mixer.
#---------------------
bst <- xgboost(data = train,
label = target,
eta = 0.1,
subsample = 1,
colsample_bytree = 0.1,
nrounds = 200,
alpha = 0.5,
lambda = 0.5,
objective = "binary:logistic",
booster = "gblinear",
eval_metric = "auc",
maximize = TRUE)
#-----------------------------------------------------
# Doing the predictions and dumping results to disk.
#-----------------------------------------------------
yhat <- predict(bst, test)
out <- cbind(test_ID, yhat)
colnames(out) <- c("ID", "TARGET")
write.table(out, "predictions.csv", row.names = FALSE, sep = ";")