-
Notifications
You must be signed in to change notification settings - Fork 0
/
linear_regression.py
83 lines (66 loc) · 2.76 KB
/
linear_regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import numpy as np
def lr_predict(X, w, t):
'''
Given X, w, and t, predicts t_hat and calculates the corresponding
loss (using mean squared error) and risk (using mean absolute
difference).
X_new: N x (d + 1)
w: (d + 1) x 1
t: N x 1
'''
t_hat = np.matmul(X, w)
# Mean squared error
loss = (1 / (2 * t.shape[0])) * np.linalg.norm(t_hat - t, 2) ** 2
# Mean absolute difference
risk = (1 / t.shape[0]) * np.linalg.norm(np.absolute(t_hat - t), 1)
return t_hat, loss, risk
def lr_train(X_train, t_train, X_val, t_val, hyperparams):
'''
Performs training and validation on the respective datasets passed in
using mini-batch gradient descent with l2-regularization.
X_train: N_train x (d + 1)
t_train: N_train x 1
X_val: N_val x (d + 1)
t_val: N_val x 1
'''
# Initialize weights randomly, w: (d + 1) x 1
w = np.random.rand(X_train.shape[1])
losses_train = []
risks_val = []
w_best = None
risk_best = 10000
epoch_best = 0
num_batches = int(np.ceil(X_train.shape[0] / hyperparams.batch_size))
for epoch in range(hyperparams.max_epochs):
loss_this_epoch = 0
for b in range(num_batches):
# X_batch: batch_size x (d + 1)
X_batch = X_train[b *
hyperparams.batch_size:(b +
1) *
hyperparams.batch_size]
# t_batch: batch_size x 1
t_batch = t_train[b *
hyperparams.batch_size:(b +
1) *
hyperparams.batch_size]
# lr_predict t_hat
_, loss_batch, _ = lr_predict(X_batch, w, t_batch)
loss_this_epoch += loss_batch
# Mini-batch gradient descent
X_batch_T = np.matrix.transpose(X_batch)
# gradient = (1 / batch_size) * (X^(T)Xw - X^(T)t)
gradient = (1 / hyperparams.batch_size) * \
(np.matmul(np.matmul(X_batch_T, X_batch), w) - np.matmul(X_batch_T, t_batch))
# Use l2 regularization
w = w - hyperparams.alpha * (gradient + hyperparams.decay * w)
# Compute the training loss by averaging loss_this_epoch
training_loss = loss_this_epoch / num_batches
losses_train.append(training_loss)
# Perform validation on the validation set by the risk
_, _, risk_val = lr_predict(X_val, w, t_val)
risks_val.append(risk_val)
# Keep track of the best validation epoch, risk, and the weights
if risk_val < risk_best:
w_best, risk_best, epoch_best = w, risk_val, epoch
return w_best, risk_best, epoch_best, losses_train, risks_val