-
Notifications
You must be signed in to change notification settings - Fork 0
/
optimizers.py
38 lines (30 loc) · 1.23 KB
/
optimizers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import numpy as np
class Adam():
def __init__(self, model, beta_1=0.9, beta_2=0.999, epsilon=1e-8):
self.beta_1 = beta_1
self.beta_2 = beta_2
self.epsilon = epsilon
self.t = 1
self.v = [0] * len(model.layers)
self.s = [0] * len(model.layers)
def step(self, layers, lr):
for i, layer in enumerate(layers):
if layer.isUpdatable:
self.v[i] = self.beta_1 * self.v[i] + \
(1 - self.beta_1) * layer.W.grad
self.s[i] = self.beta_2 * self.s[i] + \
(1 - self.beta_2) * np.square(layer.W.grad)
v_bias_corr = self.v[i] / (1 - self.beta_1 ** self.t)
s_bias_corr = self.s[i] / (1 - self.beta_2 ** self.t)
layer.W.value -= lr * v_bias_corr / \
(np.sqrt(s_bias_corr) + self.epsilon)
self.t += 1
class L2():
def __init__(self, reg_strength):
self.reg_strength = reg_strength
def regularize(self, layers, loss):
for layer in layers:
if layer.isUpdatable:
loss += self.reg_strength * np.sum(layer.W.value)
layer.W.grad += 2 * self.reg_strength * layer.W.value
return loss