-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.r
117 lines (84 loc) · 3.23 KB
/
main.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
setwd('..')
# function definitions ##########################
# Bounded logloss
# input:
# prediction: our prediction
# actual: real class
# output
# logarithmic loss of prediction given actual
llfun <- function(actual, prediction) {
epsilon <- 10e-14
yhat <- pmin(pmax(prediction, epsilon), 1.-epsilon)
logloss <- -mean(actual*log(yhat)
+ (1.-actual)*log(1. - yhat))
return(logloss)
}
# Prediction based on features and weights
# input:
# x: features
# w: weights
# output:
# probability of p(y = 1 | x ; w)
predict <- function(x, w) {
# inner product of w and x
wTx <- 0.
for (i in x) wTx <- wTx + w[i] * 1.
# return probability estimation
1./( 1. + exp( -max( min(wTx, 20.), -20.) ) )
}
update <- function(w, n, x, prediction, actual) {
for (i in x){
# alpha / (sqrt(n) + 1) is the adaptive learning rate heuristic
# (p - y) * x[i] is the current gradient
# note that in our case, if i in x then x[i] = 1
adp_rate <- alpha / (sqrt(n[i]) + 1.)
w[i] <- w[i] - (prediction - actual) * adp_rate
n[i] <- n[i]+ 1.
}
list('w' = w, 'n' = n)
}
# training #######################################################
library(ff)
# Read the hashed feature generated by one-hot encoding (done off-line)
df1 <- read.csv.ffdf(file= 'hash_train.csv' , VERBOSE=TRUE)
# initialize our model
D <- 2**20
alpha <- 1 # initial learning rate for stochastic gradient descent
w <- rep(0, D) # initial weights
n <- rep(0, D) # total time of each feature encountered
# training the stochastic gradient descent logistic regression model
loss = 0.
for (i in 1:nrow(df1)){
row <- df1[i,]
if (row[1] == 1) y = 1.
else y = 0.
# get the hashed features, exclude the target values
x <- row[-1]
# get the prediction based on features and weights
prediction <- predict(x, w)
# print out current logloss
loss <- loss + llfun(y,prediction)
if (i %% 10000 == 0) print(loss/i)
# update weights and feature encountered times
wn_update <- update(w, n, x, prediction, y)
w = wn_update$w # update weights
n = wn_update$n # update times of features encountered
}
# testing #######################################################
# Read the hashed feature generated by one-hot encoding (done off-line)
df2 <- read.csv.ffdf(file= 'hash_test.csv' , VERBOSE=TRUE)
sub_file <- "submission.csv"
# write header
FF <- as.matrix( t( c("id","click")))
write.table (FF, file = sub_file, row.names = F,col.names=F ,sep="," )
for (i in 1:nrow(df2)){
row <- df2[i,]
id <- row[1]
# get the hashed features, exclude the id
x <- row[-1]
# get the prediction based on features and weights
prediction <- predict(x, w)
# write prediction of each testing case
FF <- as.matrix( t( list(id,prediction)))
write.table (FF, file = sub_file, row.names = F,col.names=F ,sep="," ,append = T)
}