-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhook_test.py
231 lines (185 loc) · 7.47 KB
/
hook_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
'''
import torch
from torch.autograd import Variable
x = Variable(torch.randn(5, 5), requires_grad=True)
y = Variable(torch.randn(5, 5), requires_grad=True)
a = x * 2
b = y * 3
def hook_a(grad):
grad.mul_(2)
a.register_hook(hook_a)
c = a + b
#print(c)
#a.backward();
c.sum().backward()
print(x.grad) # should be 2, is 2
print(y.grad) # should be 3, is 6
'''
# -*- coding: utf-8 -*-
"""
nn package
==========
We’ve redesigned the nn package, so that it’s fully integrated with
autograd. Let's review the changes.
**Replace containers with autograd:**
You no longer have to use Containers like ``ConcatTable``, or modules like
``CAddTable``, or use and debug with nngraph. We will seamlessly use
autograd to define our neural networks. For example,
* ``output = nn.CAddTable():forward({input1, input2})`` simply becomes
``output = input1 + input2``
* ``output = nn.MulConstant(0.5):forward(input)`` simply becomes
``output = input * 0.5``
**State is no longer held in the module, but in the network graph:**
Using recurrent networks should be simpler because of this reason. If
you want to create a recurrent network, simply use the same Linear layer
multiple times, without having to think about sharing weights.
.. figure:: /_static/img/torch-nn-vs-pytorch-nn.png
:alt: torch-nn-vs-pytorch-nn
torch-nn-vs-pytorch-nn
**Simplified debugging:**
Debugging is intuitive using Python’s pdb debugger, and **the debugger
and stack traces stop at exactly where an error occurred.** What you see
is what you get.
Example 1: ConvNet
------------------
Let’s see how to create a small ConvNet.
All of your networks are derived from the base class ``nn.Module``:
- In the constructor, you declare all the layers you want to use.
- In the forward function, you define how your model is going to be
run, from input to output
"""
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
class MNISTConvNet(nn.Module):
def __init__(self):
# this is the place where you instantiate all your modules
# you can later access them using the same names you've given them in
# here
super(MNISTConvNet, self).__init__()
self.conv1 = nn.Conv2d(1, 10, 5)
self.pool1 = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(10, 20, 5)
self.pool2 = nn.MaxPool2d(2, 2)
self.fc1 = nn.Linear(320, 50)
self.fc2 = nn.Linear(50, 10)
# it's the forward function that defines the network structure
# we're accepting only a single input in here, but if you want,
# feel free to use more
def forward(self, input):
x = self.pool1(F.relu(self.conv1(input)))
x = self.pool2(F.relu(self.conv2(x)))
# in your model definition you can go full crazy and use arbitrary
# python code to define your model structure
# all these are perfectly legal, and will be handled correctly
# by autograd:
# if x.gt(0) > x.numel() / 2:
# ...
#
# you can even do a loop and reuse the same module inside it
# modules no longer hold ephemeral state, so you can use them
# multiple times during your forward pass
# while x.norm(2) < 10:
# x = self.conv1(x)
x = x.view(x.size(0), -1)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
return x
###############################################################
# Let's use the defined ConvNet now.
# You create an instance of the class first.
net = MNISTConvNet()
print(net)
########################################################################
# .. note::
#
# ``torch.nn`` only supports mini-batches The entire ``torch.nn``
# package only supports inputs that are a mini-batch of samples, and not
# a single sample.
#
# For example, ``nn.Conv2d`` will take in a 4D Tensor of
# ``nSamples x nChannels x Height x Width``.
#
# If you have a single sample, just use ``input.unsqueeze(0)`` to add
# a fake batch dimension.
#
# Create a mini-batch containing a single sample of random data and send the
# sample through the ConvNet.
input = Variable(torch.randn(1, 1, 28, 28))
out = net(input)
print(out.size())
########################################################################
# Define a dummy target label and compute error using a loss function.
target = Variable(torch.LongTensor([3]))
loss_fn = nn.CrossEntropyLoss() # LogSoftmax + ClassNLL Loss
err = loss_fn(out, target)
err.backward()
print(err)
########################################################################
# The output of the ConvNet ``out`` is a ``Variable``. We compute the loss
# using that, and that results in ``err`` which is also a ``Variable``.
# Calling ``.backward`` on ``err`` hence will propagate gradients all the
# way through the ConvNet to it’s weights
#
# Let's access individual layer weights and gradients:
print(net.conv1.weight.grad.size())
########################################################################
print(net.conv1.weight.data.norm()) # norm of the weight
print(net.conv1.weight.grad.data.norm()) # norm of the gradients
########################################################################
# Forward and Backward Function Hooks
# -----------------------------------
#
# We’ve inspected the weights and the gradients. But how about inspecting
# / modifying the output and grad\_output of a layer?
#
# We introduce **hooks** for this purpose.
#
# You can register a function on a ``Module`` or a ``Variable``.
# The hook can be a forward hook or a backward hook.
# The forward hook will be executed when a forward call is executed.
# The backward hook will be executed in the backward phase.
# Let’s look at an example.
#
# We register a forward hook on conv2 and print some information
def printnorm(self, input, output):
# input is a tuple of packed inputs
# output is a Variable. output.data is the Tensor we are interested
print('Inside ' + self.__class__.__name__ + ' forward')
print('')
print('input: ', type(input))
print('input[0]: ', type(input[0]))
print('output: ', type(output))
print('')
print('input size:', input[0].size())
print('output size:', output.data.size())
print('output norm:', output.data.norm())
#net.conv2.register_forward_hook(printnorm)
out = net(input)
########################################################################
#
# We register a backward hook on conv2 and print some information
def printgradnorm(self, grad_input, grad_output):
print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
print('Inside ' + self.__class__.__name__ + ' backward')
print('Inside class:' + self.__class__.__name__)
print('')
print('grad_input: ', type(grad_input))
print('grad_input tuple size: ', grad_input.__len__())
print('grad_input[0]: ', type(grad_input[0]))
print('grad_output: ', type(grad_output))
print('grad_output tuple size: ', grad_output.__len__())
print('grad_output[0]: ', type(grad_output[0]))
print('')
print('grad_input size:', grad_input[0].size())
print('grad_input size:', grad_input[1].size())
print('grad_input size:', grad_input[2].size())
print('grad_output size:', grad_output[0].size())
print('grad_input norm:', grad_input[0].data.norm())
print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
net.conv2.register_backward_hook(printgradnorm)
net.fc1.register_backward_hook(printgradnorm)
out = net(input)
err = loss_fn(out, target)
err.backward()