forked from nouiz/ccw_tutorial_theano
-
Notifications
You must be signed in to change notification settings - Fork 14
/
advanced.tex
411 lines (345 loc) · 12.8 KB
/
advanced.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
\documentclass[utf8x]{beamer}
\usepackage[utf8x]{inputenc}
\usepackage[OT1]{fontenc}
\usepackage{graphicx}
\usepackage{listings}
\usepackage{hyperref}
\usepackage{xcolor}
\usetheme{Malmoe}
\usecolortheme{beaver}
\lstloadlanguages{Python,C,sh}
\definecolor{darkgreen}{RGB}{0,93,21}
\definecolor{greenblue}{RGB}{40,110,126}
\definecolor{lightgray}{RGB}{246,246,246}
\definecolor{bordergray}{RGB}{193,193,193}
\definecolor{lightblue}{RGB}{0,114,168}
\definecolor{methblue}{RGB}{0,31,108}
\title{Extending Theano}
\author{Arnaud Bergeron}
\date{\today}
\lstset{
language=Python,
basicstyle=\fontfamily{pcr}\selectfont\footnotesize,
keywordstyle=\color{darkgreen}\bfseries,
commentstyle=\color{greenblue}\itshape,
stringstyle=\color{violet},
showstringspaces=false,
tabsize=4,
backgroundcolor=\color{lightgray},
frame=single,
emph={[2]__init__,make_node,perform,infer_shape,c_code,make_thunk,grad,R_op},emphstyle={[2]\color{methblue}},
emph={[3]self},emphstyle={[3]\color{darkgreen}},
moredelim=**[is][{\color{red}}]{`}{`}
}
\newcommand{\code}[1]{\lstinline[emph={[2]}]|#1|}
\begin{document}
\frame[plain]{\titlepage}
\section*{}
\begin{frame}{Outline}
\begin{enumerate}
\item How to Make an Op (Python) (45 min)
\item How to Make an Op (C) (30 min)
\item Op Params (10 min)
\item Optimizations (20 min)
\end{enumerate}
\end{frame}
\section{How to Make an Op (Python)}
\begin{frame}[plain]{}
\begin{center}
\Huge How to Make an Op (Python)
\end{center}
\end{frame}
\begin{frame}[fragile]{Overview}
\lstinputlisting[lastline=14]{python.py}
\end{frame}
\begin{frame}{\code{__init__}}
\lstinputlisting[firstline=6,lastline=8]{python.py}
\begin{itemize}
\item Optional, a lot of Ops don't have one
\item Serves to set up Op-level parameters
\item Should also perform validation on those parameters
\end{itemize}
\end{frame}
\begin{frame}{\code{__props__}}
\lstinputlisting[firstline=4,lastline=5]{python.py}
\begin{itemize}
\item Optional (although very useful)
\item Generates \code{__hash__}, \code{__eq__} and \code{__str__} methods if present
\item Empty tuple signifies no properties that should take part in comparison
\item If you have only one property, make sure you add a final comma: \code{('property',)}
\end{itemize}
\end{frame}
\begin{frame}{\code{make_node}}
\lstinputlisting[firstline=9,lastline=11]{python.py}
\begin{itemize}
\item This creates the node object that represents our computation in the graph
\item The parameters are usually Theano variables, but can be python objects too
\item The return value must be an \code{Apply} instance
\end{itemize}
\end{frame}
\begin{frame}{What Is an Apply Node?}
\begin{center}
\includegraphics[width=\textwidth]{apply_node}
\end{center}
\end{frame}
\begin{frame}{\code{perform}}
\lstinputlisting[firstline=12,lastline=14]{python.py}
\begin{itemize}
\item This performs the computation on a set of values (hence the method name)
\item The parameters are all python objects (not symbolic values)
\item This method must not return its result, but rather store it in the 1-element lists (or cells) provided in \code{outputs_storage}
\item The output storage may contain a pre-existing value from a previous run that may be reused for storage.
\end{itemize}
\end{frame}
\begin{frame}{DoubleOp}
\lstinputlisting[lastline=15]{doubleop.py}
\end{frame}
\begin{frame}{Op Instances and Nodes}
When you call an op class you get an instance of that Op:
\vskip4mm
\hskip3em\code{double_op = DoubleOp()}
\vskip4mm
But when you want to use that op as a node in a graph you need to call the \textit{instance}:
\vskip4mm
\hskip3em\code{node = double_op(x)}
\vskip4mm
You can do both steps at once with a double call like this:
\vskip4mm
\hskip3em\code{node = DoubleOp()(x)}
\end{frame}
\begin{frame}{Basic Tests}
\lstinputlisting[linerange={1-5,8-18}]{test_doubleop.py}
\end{frame}
\begin{frame}[fragile]{Run Tests}
The simplest way to run your tests is to use \texttt{nosetests} directly on your test file like this:
\begin{lstlisting}[language={},backgroundcolor=\color{white},frame={}]
$ nosetests test_doubleop.py
.
------------------------------------------------------
Ran 1 test in 0.427s
OK
\end{lstlisting}
You can also use \texttt{theano-nose} which is a wrapper around \texttt{nosetests} with some extra options.
\end{frame}
\begin{frame}{\code{infer_shape}}
\lstinputlisting[firstline=15,lastline=17]{python.py}
\begin{itemize}
\item This functions is optional, although highly recommended
\item It takes as input the symbolic shapes of the input variables
\item \code{input_shapes} is of the form \code{[[i0_shp0, i0_shp1, ...], ...]}
\item It must return a list with the symbolic shape of the output variables
\end{itemize}
\end{frame}
\begin{frame}{Example}
\lstinputlisting[firstline=16,lastline=18]{doubleop.py}
\begin{itemize}
\item Here the code is really simple since we don't change the shape in any way in our Op
\item \code{input_shapes} would be an expression equivalent to \code{[x.shape]}
\end{itemize}
\end{frame}
\begin{frame}{Tests}
\lstinputlisting[linerange={5-5,20-34}]{test_doubleop.py}
\end{frame}
\begin{frame}{Gradient}
\lstinputlisting[firstline=18,lastline=20]{python.py}
\begin{itemize}
\item This function is required for graphs including your op to work with \code{theano.grad()}
\item Each item you return represents the gradient with respect to that input computed based on the gradient with respect to the outputs (which you get in \code{output_grads}).
\item It must return a list of symbolic graphs for each of your inputs
\item Inputs that have no valid gradient should have a special \code{DisconnectedType} value
\end{itemize}
\end{frame}
\begin{frame}{Example}
\lstinputlisting[firstline=19,lastline=21]{doubleop.py}
\begin{itemize}
\item Here since the operation is simple the gradient is simple
\item Note that we return a list
\end{itemize}
\end{frame}
\begin{frame}{Tests}
To test the gradient we use \code{verify_grad}
\lstinputlisting[linerange={5-5,36-44}]{test_doubleop.py}
It will compute the gradient numerically and symbolically (using our \code{L_op()} method) and compare the two.
\end{frame}
\section{How to Make an Op (C)}
\begin{frame}[plain]{}
\begin{center}
\Huge How to Make an Op (C)
\end{center}
\end{frame}
\begin{frame}{Overview}
\lstinputlisting{c.py}
\end{frame}
\begin{frame}{\code{c_code}}
\lstinputlisting[linerange={9-11}]{c.py}
\begin{itemize}
\item This method returns a python string containing C code
\item \code{input_names} contains the variable names where the inputs are
\item \code{output_names} contains the variable names where to place the outputs
\item \code{sub} contains some code snippets to insert into our code (mostly to indicate failure)
\item The variables in \code{output_names} may contain a reference to a pre-existing value from a previous run that may be reused for storage.
\end{itemize}
\end{frame}
\begin{frame}{Support Code}
\lstinputlisting[linerange={13-14}]{c.py}
\begin{itemize}
\item This method return a python string containing C code
\item The code may be shared with multiple instances of the op
\item It can contain things like helper functions
\end{itemize}
There are a number of similar methods to insert code at various points
\end{frame}
\begin{frame}{Headers, Libraries, Compilers}
Some of the methods available to customize the compilation environment:
\begin{description}
\item[\texttt{c\_libraries}] Return a list of shared libraries the op needs
\item[\texttt{c\_headers}] Return a list of included headers the op needs
\item[\texttt{c\_compiler}] C compiler to use (if not the default)
\end{description}
Again others are available. Refer to the documentation for a complete list.
\end{frame}
\begin{frame}{Python C-API}
\begin{description}
\item[\texttt{void Py\_INCREF(PyObject *o)}] Increase the reference count of a python object.
\item[\texttt{void Py\_DECREF(PyObject *o)}] Decrease the reference count of a python object.
\item[\texttt{void Py\_XINCREF(PyObject *o)}] Increase the reference count of a (potentially NULL) python object.
\item[\texttt{void Py\_XDECREF(PyObject *o)}] Decrease the reference count of a (potentially NULL) python object.
\end{description}
\end{frame}
\begin{frame}{Numpy C-API}
\begin{description}
\item[\texttt{int PyArray\_NDIM(PyArrayObject *a)}] Get the number of dimension of an array.
\item[\texttt{npy\_intp *PyArray\_DIMS(PyArrayObject *a)}] Get the shape of an array.
\item[\texttt{npy\_intp *PyArray\_STRIDES(PyArrayObject *a)}] Get the strides of an array.
\item[\texttt{void * PyArray\_DATA(PyArrayObject *a)}] Get the data pointer (pointer to element 0) of an array.
\end{description}
\end{frame}
\begin{frame}[allowframebreaks]{Example}
\vskip5mm
This is the C code equivalent to \code{perform}
\vskip4mm
\lstinputlisting[linerange={1-27}]{doublec.py}
\end{frame}
\begin{frame}{COp}
\lstinputlisting{cop.py}
\end{frame}
\begin{frame}{Constructor Arguments}
\begin{itemize}
\item Basically you just pass arguments to the constructor of COp
\begin{itemize}
\item Either by calling the constructor directly \code{COp.__init__(self, ...)}
\item Or via the superclass \code{super(MyOp, self).__init__(...)}
\end{itemize}
\item The arguments are:
\begin{itemize}
\item a list of file names with code sections (relative to the location of the op class)
\item the name of a function to call to make the computation (optional)
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}{COp: Example}
\only<1>{\lstinputlisting[linerange={1-16}]{doublecop.py}}
\only<2>{\lstinputlisting[language=C]{doublecop.c}}
\end{frame}
\begin{frame}{Tests}
\begin{itemize}
\item Testing ops with C code is done the same way as testing for python ops
\item One thing to watch for is tests for ops which don't have python code
\begin{itemize}
\item You should skip the test in those cases
\item Test for \code{theano.config.gxx == ""}
\end{itemize}
\item Using DebugMode will compare the output of the Python version to the output of the C version and raise an error if they don't match
\end{itemize}
\end{frame}
\begin{frame}{Gradient and Other Concerns}
\begin{itemize}
\item The code for \code{grad()} and \code{infer_shape()} is done the same way as for a python Op
\item In fact you can have the same Op with a python and a C version sharing the \code{grad()} and \code{infer_shape()} code
\begin{itemize}
\item That's how most Ops are implemented
\end{itemize}
\end{itemize}
\end{frame}
\section{Op Params}
\begin{frame}[plain]{}
\begin{center}
\Huge Op Params
\end{center}
\end{frame}
\begin{frame}{Purpose}
\begin{itemize}
\item Used to pass information to the C code
\item Can reduce the amount of compiled C code
\item Required for things that can change from one script run to the other.
\end{itemize}
\end{frame}
\begin{frame}{Usage}
\lstinputlisting{params.py}
\end{frame}
\section{GPU Ops}
\begin{frame}[plain]{}
\begin{center}
\Huge GPU Ops
\end{center}
\end{frame}
\begin{frame}{Overview}
\only<1>{\lstinputlisting[linerange=1-12]{gpu.py}}
\only<2>{\lstinputlisting[linerange=14-20]{gpu.py}
\begin{itemize}
\item \texttt{params\_type} is new.
\item \texttt{get\_params} is new.
\end{itemize}}
\end{frame}
\begin{frame}{Context and Context Name}
\begin{itemize}
\item Context is what is used to refer to the chosen GPU.
It is a C object that can't be serialized.
\item Context Name is a name internal to Theano to refer to a given context object. It is a python string.
\item Context Names are used whenever you need a symbolic object.
\end{itemize}
\end{frame}
\begin{frame}{Double on GPU}
\only<1>{\lstinputlisting[linerange=5-21]{doublegpu.py}}
\only<2>{\lstinputlisting[linerange=22-37]{doublegpu.py}}
\only<3>{\lstinputlisting[linerange=39-55]{doublegpu.py}}
\end{frame}
\begin{frame}{GpuKernelBase}
\only<1>{\lstinputlisting[linerange=6-20]{doublecgpu.py}}
\only<2>{\lstinputlisting[linerange=1-10]{doublecgpu.c}}
\only<3>{\lstinputlisting[linerange=12-28]{doublecgpu.c}}
\end{frame}
\section{Optimizations}
\begin{frame}[plain]{}
\begin{center}
\Huge Optimizations
\end{center}
\end{frame}
\begin{frame}{Purpose}
\begin{itemize}
\item End goal is to make code run faster
\item Sometimes they look after stability or memory usage
\item Most of the time you will make one to insert a new Op you wrote
\end{itemize}
\end{frame}
\begin{frame}{Replace an Op}
Here is code to use \code{DoubleOp()} instead of \code{ScalMul(2)}.
\lstinputlisting[linerange={1-2,7-8,11-20}]{opt.py}
\end{frame}
\begin{frame}{Replace an Op for GPU}
Here is code to move the Double op to GPU.
\lstinputlisting[linerange={1-5,9-10,22-30}]{opt.py}
\end{frame}
\begin{frame}{Tests}
\lstinputlisting{test_opt.py}
\end{frame}
\begin{frame}{Exercice}
\begin{itemize}
\item Implement a ScalMulOp that multiplies its input by an arbitrary scalar value. Start with a python implementation
\item Add C code to your implementation
\item Create a GPU version of your op.
\item Create an optimization that replace the CPU version with a GPU version when appropriate.
\end{itemize}
Clone the repo at \url{https://github.com/abergeron/ccw_tutorial_theano.git}.
\end{frame}
\end{document}