-
Notifications
You must be signed in to change notification settings - Fork 29
/
UniLayer1O.h
142 lines (107 loc) · 2.8 KB
/
UniLayer1O.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
/*
* UniLayer1O.h
*
* Created on: Mar 18, 2015
* Author: mszhang
*/
/*
* use it only for output layer
*/
#ifndef SRC_UniLayer1O_H_
#define SRC_UniLayer1O_H_
#include "tensor.h"
#include "MyLib.h"
#include "Utiltensor.h"
using namespace mshadow;
using namespace mshadow::expr;
using namespace mshadow::utils;
template<typename xpu>
class UniLayer1O {
public:
Tensor<xpu, 2, dtype> _W;
Tensor<xpu, 2, dtype> _gradW;
Tensor<xpu, 2, dtype> _eg2W;
public:
UniLayer1O() {
}
inline void initial(int nISize, int seed = 0) {
dtype bound = sqrt(6.0 / (1 + nISize + 1));
//dtype bound = 0.01;
_W = NewTensor<xpu>(Shape2(1, nISize), d_zero);
_gradW = NewTensor<xpu>(Shape2(1, nISize), d_zero);
_eg2W = NewTensor<xpu>(Shape2(1, nISize), d_zero);
random(_W, -1.0 * bound, 1.0 * bound, seed);
}
inline void initial(Tensor<xpu, 2, dtype> W) {
static int nISize;
nISize = W.size(1);
_W = NewTensor<xpu>(Shape2(1, nISize), d_zero);
_gradW = NewTensor<xpu>(Shape2(1, nISize), d_zero);
_eg2W = NewTensor<xpu>(Shape2(1, nISize), d_zero);
Copy(_W, W);
}
inline void release() {
FreeSpace(&_W);
FreeSpace(&_gradW);
FreeSpace(&_eg2W);
}
virtual ~UniLayer1O() {
// TODO Auto-generated destructor stub
}
inline dtype squarenormAll() {
dtype result = squarenorm(_gradW);
return result;
}
inline void scaleGrad(dtype scale) {
_gradW = _gradW * scale;
}
public:
inline void ComputeForwardScore(Tensor<xpu, 2, dtype> x, dtype& y) {
static int nISize;
nISize = _W.size(1);
y = 0.0;
for(int idx = 0; idx < nISize; idx++){
y += x[0][idx] * _W[0][idx];
}
}
//please allocate the memory outside here
inline void ComputeBackwardLoss(Tensor<xpu, 2, dtype> x, dtype ly, Tensor<xpu, 2, dtype> lx, bool bclear = false) {
//_gradW
_gradW += ly * x;
if (bclear)
lx = 0.0;
//lx
lx += ly * _W;
}
inline void randomprint(int num) {
static int nISize;
nISize = _W.size(1);
int count = 0;
while (count < num) {
int idy = rand() % nISize;
std::cout << "_W[" << 0 << "," << idy << "]=" << _W[0][idy] << " ";
count++;
}
std::cout << std::endl;
}
inline void updateAdaGrad(dtype regularizationWeight, dtype adaAlpha, dtype adaEps) {
_gradW = _gradW + _W * regularizationWeight;
_eg2W = _eg2W + _gradW * _gradW;
_W = _W - _gradW * adaAlpha / F<nl_sqrt>(_eg2W + adaEps);
clearGrad();
}
inline void clearGrad() {
_gradW = 0;
}
void writeModel(LStream &outf) {
SaveBinary(outf, _W);
SaveBinary(outf, _gradW);
SaveBinary(outf, _eg2W);
}
void loadModel(LStream &inf) {
LoadBinary(inf, &_W, false);
LoadBinary(inf, &_gradW, false);
LoadBinary(inf, &_eg2W, false);
}
};
#endif /* SRC_UniLayer1O_H_ */