diff --git a/.gitignore b/.gitignore index ded6067..9f074d8 100644 --- a/.gitignore +++ b/.gitignore @@ -34,3 +34,13 @@ nosetests.xml .mr.developer.cfg .project .pydevproject + +# Latex stuff +*.aux +*.log +*.nav +*.out +*.snm +*.synctex.gz +*.toc +*.vrb diff --git a/06_scalmulop/01_scalmulop_soln.py b/06_scalmulop/01_scalmulop_soln.py index 700073c..632fbbc 100644 --- a/06_scalmulop/01_scalmulop_soln.py +++ b/06_scalmulop/01_scalmulop_soln.py @@ -1,6 +1,6 @@ from theano import Op, Apply from theano.tensor import as_tensor_variable -from theano.scalar import as_scalar_variable +from theano.scalar import as_scalar class ScalMulV1(Op): __props__ = ('scal',) @@ -25,7 +25,7 @@ class ScalMulV2(Op): def make_node(self, x, scal): x = as_tensor_variable(x) - scal = as_scalar_variable(scal) + scal = as_scalar(scal) return Apply(self, [x, scal], [x.type()]) def perform(self, node, inputs, output_storage): diff --git a/07_scalmulgrad/01_scalmulop.py b/07_scalmulgrad/01_scalmulop.py index a5ee1b8..1a31582 100644 --- a/07_scalmulgrad/01_scalmulop.py +++ b/07_scalmulgrad/01_scalmulop.py @@ -1,6 +1,5 @@ from theano import Op, Apply from theano.tensor import as_tensor_variable -from theano.scalar import as_scalar_variable class ScalMul(Op): __props__ = ('scal',) diff --git a/07_scalmulgrad/01_scalmulop_soln.py b/07_scalmulgrad/01_scalmulop_soln.py index c35dc0b..2c4a265 100644 --- a/07_scalmulgrad/01_scalmulop_soln.py +++ b/07_scalmulgrad/01_scalmulop_soln.py @@ -1,6 +1,5 @@ from theano import Op, Apply from theano.tensor import as_tensor_variable -from theano.scalar import as_scalar_variable class ScalMul(Op): __props__ = ('scal',) diff --git a/README.md b/README.md index b8c47f9..38931bf 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,37 @@ ccw_tutorial_theano =================== -Common Code Workflow tutorial on Theano +This repo contains two theano tutorials. +The first one covers the basics of running and debugging theano code. +The second one covers extending theano in python and C. + +Basic tutorial +-------------- + +This tutorial covers: + + * Overview of library (3 min) + * Building expressions (30 min) + * Compiling and running expressions (30 min) + * Modifying expressions (25 min) + * Debugging (30 min) + * Citing Theano (2 min) + +In order to follow this tutorial you will need the ipython-notebook +python package on your computer and a clone of this repo to get the +notebook with exercices. + +The following commands should perform the correct installation on most +unix-like machines: + + pip install ipython-notebook + git clone https://github.com/abergeron/ccw_tutorial_theano.git + cd ccw_tutorial_theano/ipnb + ipython notebook Theano-basic.ipynb + +This should open your browser to the notebook page. + +Advanced tutorial +----------------- + +COMING SOON diff --git a/advanced.pdf b/advanced.pdf index 1b38a4f..3a994a7 100644 Binary files a/advanced.pdf and b/advanced.pdf differ diff --git a/advanced.tex b/advanced.tex index d9bcdfa..e1a94bb 100644 --- a/advanced.tex +++ b/advanced.tex @@ -1,6 +1,5 @@ \documentclass[utf8x]{beamer} -% \usepackage{beamerthemesplit} // Activate for custom appearance \usepackage[utf8x]{inputenc} \usepackage[OT1]{fontenc} \usepackage{graphicx} @@ -35,8 +34,6 @@ tabsize=4, backgroundcolor=\color{lightgray}, frame=single, -%showlines=true, -%emph={theano,MyOp,DoubleOp}, emphstyle=\color{lightblue}\bfseries, emph={[2]__init__,make_node,perform,infer_shape,c_code,make_thunk,grad,R_op},emphstyle={[2]\color{methblue}}, emph={[3]self},emphstyle={[3]\color{darkgreen}}, moredelim=**[is][{\color{red}}]{`}{`} @@ -54,7 +51,7 @@ \section*{} \begin{enumerate} \item How to Make an Op (Python) (45 min) \item How to Make an Op (C) (30 min) -\item How to Make a Complex Op (10 min) +\item Op Params (10 min) \item Optimizations (20 min) \end{enumerate} \end{frame} @@ -150,28 +147,6 @@ \section{How to Make an Op (Python)} \end{lstlisting} You can also use \texttt{theano-nose} which is a wrapper around \texttt{nosetests} with some extra options. - -\end{frame} - -\begin{frame}{Exercise: TripleOp} -What would need to be changed in the code below (DoubleOp) to make this Op triple the input instead of double? -\lstinputlisting[lastline=15]{doubleop.py} -\end{frame} - -\begin{frame}{Solution: TripleOp} -You change the class name and the constant \code{2} for a constant \code{3}. \\ -\ -\lstinputlisting[lastline=15]{tripleop.py} -\end{frame} - -\begin{frame}{Exercise: ScalMulOp} -\begin{center} -Work though the "06\_scalmulop" directory available at \url{https://github.com/abergeron/ccw_tutorial_theano.git}. -\end{center} -\begin{itemize} -\item Take the \code{DoubleOp} code and make it work with an arbitrary scalar -\item There are more than one solution possible, both have advantages and disadvantages -\end{itemize} \end{frame} \begin{frame}{\code{infer_shape}} @@ -217,15 +192,7 @@ \section{How to Make an Op (Python)} \begin{frame}{Tests} To test the gradient we use \code{verify_grad} \lstinputlisting[linerange={5-5,36-44}]{test_doubleop.py} -It will compute the gradient numerically and symbolically (using our \code{grad()} method) and compare the two. -\end{frame} - -\begin{frame}{Exercice: Add Special Methods to ScalMulOp} -Work through the "07\_scalmulgrad" directory available at \url{https://github.com/abergeron/ccw_tutorial_theano.git} -\begin{itemize} -\item Take the ScalMulOp class you made and add the \code{infer_shape} and \code{grad} methods to it. -\item Don't forget to make tests for your new class to make sure everything works correctly. -\end{itemize} +It will compute the gradient numerically and symbolically (using our \code{L_op()} method) and compare the two. \end{frame} \section{How to Make an Op (C)} @@ -302,15 +269,15 @@ \section{How to Make an Op (C)} \begin{frame}{Constructor Arguments} \begin{itemize} -\item Basically you just pass two arguments to the constructor of COp +\item Basically you just pass arguments to the constructor of COp \begin{itemize} \item Either by calling the constructor directly \code{COp.__init__(self, ...)} \item Or via the superclass \code{super(MyOp, self).__init__(...)} \end{itemize} -\item The two arguments are: +\item The arguments are: \begin{itemize} -\item the name of the C code file -\item the name of the function to call to make the computation +\item a list of file names with code sections (relative to the location of the op class) +\item the name of a function to call to make the computation (optional) \end{itemize} \end{itemize} \end{frame} @@ -342,32 +309,65 @@ \section{How to Make an Op (C)} \end{itemize} \end{frame} -\begin{frame}{Exercice: Add C Code to ScalMulOp} -Work through the "08\_scalmulc" directory available at \url{https://github.com/abergeron/ccw_tutorial_theano.git}. +\section{Op Params} + +\begin{frame}[plain]{} +\begin{center} +\Huge Op Params +\end{center} +\end{frame} + +\begin{frame}{Purpose} \begin{itemize} -\item Take the ScalMulOp from before and write C code for it using either approach (only accept vectors). -\item You can base yourself on the C code for DoubleOp. -\item Don't forget to test your new implementation! Be sure to check for invalid inputs (matrices). +\item Used to pass information to the C code +\item Can reduce the amount of compiled C code +\item Required for things that can change from one script run to the other. \end{itemize} \end{frame} -\section{How to Make a Complex Op} +\begin{frame}{Usage} +\lstinputlisting{params.py} +\end{frame} + +\section{GPU Ops} \begin{frame}[plain]{} \begin{center} -\Huge How to Make a Complex Op +\Huge GPU Ops \end{center} \end{frame} -\begin{frame}{\code{make_thunk}} -\lstinputlisting[linerange={12-14}]{thunk.py} +\begin{frame}{Overview} +\only<1>{\lstinputlisting[linerange=1-12]{gpu.py}} +\only<2>{\lstinputlisting[linerange=14-20]{gpu.py} +\begin{itemize} +\item \texttt{params\_type} is new. +\item \texttt{get\_params} is new. +\end{itemize}} +\end{frame} + +\begin{frame}{Context and Context Name} \begin{itemize} -\item Define instead of \code{perform} or \code{c_code} -\item Gives total freedom on how the computation is performed -\item More complex to use and generally not needed +\item Context is what is used to refer to the chosen GPU. + +It is a C object that can't be serialized. +\item Context Name is a name internal to Theano to refer to a given context object. It is a python string. +\item Context Names are used whenever you need a symbolic object. \end{itemize} \end{frame} +\begin{frame}{Double on GPU} +\only<1>{\lstinputlisting[linerange=5-21]{doublegpu.py}} +\only<2>{\lstinputlisting[linerange=22-37]{doublegpu.py}} +\only<3>{\lstinputlisting[linerange=39-55]{doublegpu.py}} +\end{frame} + +\begin{frame}{GpuKernelBase} +\only<1>{\lstinputlisting[linerange=6-20]{doublecgpu.py}} +\only<2>{\lstinputlisting[linerange=1-10]{doublecgpu.c}} +\only<3>{\lstinputlisting[linerange=12-28]{doublecgpu.c}} +\end{frame} + \section{Optimizations} \begin{frame}[plain]{} @@ -384,32 +384,28 @@ \section{Optimizations} \end{itemize} \end{frame} -\begin{frame}{Replace an Op (V1)} +\begin{frame}{Replace an Op} Here is code to use \code{DoubleOp()} instead of \code{ScalMul(2)}. -\lstinputlisting[linerange={1-5,9-15}]{opt.py} -\end{frame} - -\begin{frame}{Replace an Op (V2)} -In this case since we are replacing one instance with another there is an easier way. -\lstinputlisting[linerange={1-2,16-20}]{opt.py} +\lstinputlisting[linerange={1-2,7-8,11-20}]{opt.py} \end{frame} -\begin{frame}{Registering} -In any case you need to register your optimization. -\lstinputlisting[linerange={6-10}]{opt.py} -\lstinputlisting[linerange={21-22}]{opt.py} +\begin{frame}{Replace an Op for GPU} +Here is code to move the Double op to GPU. +\lstinputlisting[linerange={1-5,9-10,22-30}]{opt.py} \end{frame} \begin{frame}{Tests} \lstinputlisting{test_opt.py} \end{frame} -\begin{frame}{Exercice 4} -Work through the "09\_opt" directory available at \url{https://github.com/abergeron/ccw_tutorial_theano.git}. +\begin{frame}{Exercice} \begin{itemize} -\item Make an optimization that replace DoubleOp with DoubleC (or DoubleCOp) -\item Write tests to make sure your optimization is applied correctly +\item Implement a ScalMulOp that multiplies its input by an arbitrary scalar value. Start with a python implementation +\item Add C code to your implementation +\item Create a GPU version of your op. +\item Create an optimization that replace the CPU version with a GPU version when appropriate. \end{itemize} +Clone the repo at \url{https://github.com/abergeron/ccw_tutorial_theano.git}. \end{frame} \end{document} diff --git a/cop.py b/cop.py index 81b65e1..40c8297 100644 --- a/cop.py +++ b/cop.py @@ -4,7 +4,7 @@ class MyOp(COp): __props__ = () def __init__(self, ...): - COp.__init__(self, c_file, func_name) + COp.__init__(self, c_files, func_name) # Other init code if needed def make_node(self, ...): diff --git a/doublecgpu.c b/doublecgpu.c new file mode 100644 index 0000000..76c4a08 --- /dev/null +++ b/doublecgpu.c @@ -0,0 +1,29 @@ +#section kernels +#kernel doublek : *, *, size : + +KERNEL void doublek(GLOBAL_MEM DTYPE_o0 *out, + GLOBAL_MEM DTYPE_i0 *a, + ga_size n) { + for (ga_size i = LID_0; i < n; i += LDIM_0) { + out[i] = 2 * a[i]; + } +} + +#section support_code_struct +int double_fn(PyGpuArrayObject *inp, + PyGpuArrayObject **out, + PyGpuContextObject *ctx) { + size_t n = 1; + Py_XDECREF(*out); + *out = pygpu_empty(PyGpuArray_NDIM(inp), + PyGpuArray_DIMS(inp), + GA_C_ORDER, ctx, Py_None); + if (*out == NULL) return -1; + for (unsigned int i = 0; i < inp->ga.nd; i++) + n *= PyGpuArray_DIM(inp, i); + if (doublek_scall(1, &n, 0, *out, inp, n)) { + PyErr_SetString(PyExc_RuntimeError, + "Error calling kernel"); + return -1; + } +} diff --git a/doublecgpu.py b/doublecgpu.py new file mode 100644 index 0000000..073b3aa --- /dev/null +++ b/doublecgpu.py @@ -0,0 +1,25 @@ +from theano import Apply +from theano.gpuarray.basic_ops import (as_gpuarray_variable, + infer_context_name, CGpuKernelBase) + + +class DoubleCGpu(CGpuKernelBase): + __props__ = () + + def __init__(self): + CGpuKernelBase.__init__(self, ["doublecgpu.c"], + "double_fn") + + def make_node(self, x): + ctx_name = infer_context_name(x) + x = as_gpuarray_variable(x, ctx_name) + return Apply(self, [x], [x.type()]) + + def get_params(self, node): + return node.outputs[0].type.context + + def infer_shape(self, node, input_shapes): + return input_shapes + + def grad(self, inputs, output_grads): + return [output_grads[0] * 2] diff --git a/doublecop.py b/doublecop.py index d62ca60..419ad0c 100644 --- a/doublecop.py +++ b/doublecop.py @@ -6,7 +6,7 @@ class DoubleCOp(COp): __props__ = () def __init__(self): - COp.__init__(self, "./doublecop.c", + COp.__init__(self, ["doublecop.c"], "APPLY_SPECIFIC(doublecop)") def make_node(self, x): diff --git a/doublegpu.py b/doublegpu.py new file mode 100644 index 0000000..a18f1f2 --- /dev/null +++ b/doublegpu.py @@ -0,0 +1,60 @@ +from theano import Op, Apply +from theano.gpuarray.basic_ops import (as_gpuarray_variable, Kernel, + infer_context_name, GpuKernelBase) + +try: + from pygpu import gpuarray +except ImportError: + pass + + +class DoubleGpu(Op, GpuKernelBase): + __props__ = () + + def make_node(self, x): + ctx_name = infer_context_name(x) + x = as_gpuarray_variable(x, ctx_name) + return Apply(self, [x], [x.type()]) + + def get_params(self, node): + return node.outputs[0].type.context + + def gpu_kernels(self, node, name): + dt = node.inputs[0].type + code = """ +KERNEL void doublek(GLOBAL_MEM %(ctype) *out, + GLOBAL_MEM const %(ctype)s *a, + ga_size n) { + for (ga_size i = LID_0; i < n; i += LDIM_0) { + out[i] = 2 * a[i]; + } +} +""" % dict(ctype=gpuarray.dtype_to_ctype(dt)) + return [Kernel(code=code, name="doublek", + params=[gpuarray.GpuArray, + gpuarray.GpuArray, + gpuarray.SIZE], + flags=Kernel.get_flags(dt))] + + def c_code(self, node, name, inn, outn, sub): + return """ +size_t n = 1; +Py_XDECREF(%(out)s); +%(out)s = pygpu_empty(PyGpuArray_NDIM(%(inp)s), + PyGpuArray_DIMS(%(inp)s), + GA_C_ORDER, %(ctx)s, Py_None); +if (%(out)s == NULL) %(fail)s +for (unsigned int i = 0; i < %(inp)s->ga.nd; i++) + n *= PyGpuArray_DIM(%(inp)s, i); +if (doublek_scall(1, &n, 0, %(out)s, %(inp)s, n)) { + PyErr_SetString(PyExc_RuntimeError, + "Error calling kernel"); + %(fail)s; +} +""" % dict(inp=inn[0], out=outn[0], fail=sub["fail"]) + + def infer_shape(self, node, input_shapes): + return input_shapes + + def grad(self, inputs, output_grads): + return [output_grads[0] * 2] diff --git a/doubleop.py b/doubleop.py index a1ab4fa..77cddeb 100644 --- a/doubleop.py +++ b/doubleop.py @@ -16,7 +16,7 @@ def perform(self, node, inputs, output_storage): def infer_shape(self, node, input_shapes): return input_shapes - def grad(self, inputs, output_grads): + def L_op(self, inputs, outputs, output_grads): return [output_grads[0] * 2] def R_op(self, inputs, eval_points): diff --git a/gpu.py b/gpu.py new file mode 100644 index 0000000..fd934c7 --- /dev/null +++ b/gpu.py @@ -0,0 +1,20 @@ +from theano import Op +from theano.gpuarray.type import gpu_context_type + +class GpuOp(Op): + __props__ = () + params_type = gpu_context_type + + def make_node(self, ...): + # return apply node + + def get_params(self, node): + return node.outputs[0].type.context + + def perform(self, node, inputs, output_storage): + # python code + + def c_code(self, node, name, input_names, + output_names, sub): + # return C code string + diff --git a/ipnb/01_scalar_soln.py b/ipnb/01_scalar_soln.py new file mode 100644 index 0000000..0ca85a0 --- /dev/null +++ b/ipnb/01_scalar_soln.py @@ -0,0 +1,39 @@ +import numpy as np +from theano import function +import theano.tensor as T + + +def make_scalar(): + """ + Returns a new Theano scalar. + """ + + return T.scalar() + + +def log(x): + """ + Returns the logarithm of a Theano scalar x. + """ + + return T.log(x) + + +def add(x, y): + """ + Adds two theano scalars together and returns the result. + """ + + return x + y + +a = make_scalar() +b = make_scalar() +c = log(b) +d = add(a, c) +f = function([a, b], d) +a = np.cast[a.dtype](1.) +b = np.cast[b.dtype](2.) +actual = f(a, b) +expected = 1. + np.log(2.) +assert np.allclose(actual, expected) +print "SUCCESS!" diff --git a/ipnb/02_vector_mat_soln.py b/ipnb/02_vector_mat_soln.py new file mode 100644 index 0000000..46da45b --- /dev/null +++ b/ipnb/02_vector_mat_soln.py @@ -0,0 +1,59 @@ +import numpy as np +from theano import function +import theano.tensor as T + + +def make_vector(): + """ + Returns a new Theano vector. + """ + + return T.vector() + + +def make_matrix(): + """ + Returns a new Theano matrix. + """ + + return T.matrix() + + +def elemwise_mul(a, b): + """ + a: A theano matrix + b: A theano matrix + Returns the elementwise product of a and b + """ + + return a * b + + +def matrix_vector_mul(a, b): + """ + a: A theano matrix + b: A theano vector + Returns the matrix-vector product of a and b + """ + + return T.dot(a, b) + +a = make_vector() +b = make_vector() +c = elemwise_mul(a, b) +d = make_matrix() +e = matrix_vector_mul(d, c) + +f = function([a, b, d], e) + +rng = np.random.RandomState([1, 2, 3]) +a_value = rng.randn(5).astype(a.dtype) +b_value = rng.rand(5).astype(b.dtype) +c_value = a_value * b_value +d_value = rng.randn(5, 5).astype(d.dtype) +expected = np.dot(d_value, c_value) + +actual = f(a_value, b_value, d_value) + +assert np.allclose(actual, expected) +print "SUCCESS!" diff --git a/ipnb/03_tensor_soln.py b/ipnb/03_tensor_soln.py new file mode 100644 index 0000000..e5b3b7d --- /dev/null +++ b/ipnb/03_tensor_soln.py @@ -0,0 +1,58 @@ +import numpy as np +from theano import function +import theano.tensor as T + + +def make_tensor(dim): + """ + Returns a new Theano tensor with no broadcastable dimensions. + dim: the total number of dimensions of the tensor. + """ + + return T.TensorType(broadcastable=tuple([False] * dim), dtype='float32')() + + +def broadcasted_add(a, b): + """ + a: a 3D theano tensor + b: a 4D theano tensor + Returns c, a 4D theano tensor, where + + c[i, j, k, l] = a[l, k, i] + b[i, j, k, l] + + for all i, j, k, l + """ + + return a.dimshuffle(2, 'x', 1, 0) + b + + +def partial_max(a): + """ + a: a 4D theano tensor + + Returns b, a theano matrix, where + + b[i, j] = max_{k,l} a[i, k, l, j] + + for all i, j + """ + + return a.max(axis=(1, 2)) + +a = make_tensor(3) +b = make_tensor(4) +c = broadcasted_add(a, b) +d = partial_max(c) + +f = function([a, b], d) + +rng = np.random.RandomState([1, 2, 3]) +a_value = rng.randn(2, 2, 2).astype(a.dtype) +b_value = rng.rand(2, 2, 2, 2).astype(b.dtype) +c_value = np.transpose(a_value, (2, 1, 0))[:, None, :, :] + b_value +expected = c_value.max(axis=1).max(axis=1) + +actual = f(a_value, b_value) + +assert np.allclose(actual, expected), (actual, expected) +print "SUCCESS!" diff --git a/ipnb/11_function_soln.py b/ipnb/11_function_soln.py new file mode 100644 index 0000000..d67e981 --- /dev/null +++ b/ipnb/11_function_soln.py @@ -0,0 +1,24 @@ +from theano import tensor as T +from theano import function + + +def evaluate(x, y, expr, x_value, y_value): + """ + x: A theano variable + y: A theano variable + expr: A theano expression involving x and y + x_value: A numpy value + y_value: A numpy value + + Returns the value of expr when x_value is substituted for x + and y_value is substituted for y + """ + + return function([x, y], expr)(x_value, y_value) + + +x = T.iscalar() +y = T.iscalar() +z = x + y +assert evaluate(x, y, z, 1, 2) == 3 +print "SUCCESS!" diff --git a/ipnb/12_shared_soln.py b/ipnb/12_shared_soln.py new file mode 100644 index 0000000..f2337e8 --- /dev/null +++ b/ipnb/12_shared_soln.py @@ -0,0 +1,60 @@ +import numpy as np +from theano.compat.python2x import OrderedDict +from theano import function +from theano import shared + + +def make_shared(shape): + """ + Returns a theano shared variable containing a tensor of the specified + shape. + You can use any value you want. + """ + return shared(np.zeros(shape)) + + +def exchange_shared(a, b): + """ + a: a theano shared variable + b: a theano shared variable + Uses get_value and set_value to swap the values stored in a and b + """ + temp = a.get_value() + a.set_value(b.get_value()) + b.set_value(temp) + + +def make_exchange_func(a, b): + """ + a: a theano shared variable + b: a theano shared variable + Returns f + where f is a theano function, that, when called, swaps the + values in a and b + f should not return anything + """ + + updates = OrderedDict() + updates[a] = b + updates[b] = a + f = function([], updates=updates) + return f + + +a = make_shared((5, 4, 3)) +assert a.get_value().shape == (5, 4, 3) +b = make_shared((5, 4, 3)) +assert a.get_value().shape == (5, 4, 3) +a.set_value(np.zeros((5, 4, 3), dtype=a.dtype)) +b.set_value(np.ones((5, 4, 3), dtype=b.dtype)) +exchange_shared(a, b) +assert np.all(a.get_value() == 1.) +assert np.all(b.get_value() == 0.) +f = make_exchange_func(a, b) +rval = f() +assert isinstance(rval, list) +assert len(rval) == 0 +assert np.all(a.get_value() == 0.) +assert np.all(b.get_value() == 1.) + +print "SUCCESS!" diff --git a/ipnb/13_bug_soln.py b/ipnb/13_bug_soln.py new file mode 100644 index 0000000..32b9b48 --- /dev/null +++ b/ipnb/13_bug_soln.py @@ -0,0 +1,10 @@ +# The weird thing is that the function succeeds. +# +# This is weird because the two values passed in for x and y do not +# have the same shape, yet x is added with something that has the same +# shape as y (z). +# +# This happens because optimizations realize that z is always zero and +# therefore remove the addition, which removes the error. +# +# The problem is more evident if FAST_COMPILE or DEBUG_MODE is used. diff --git a/ipnb/21_grad_soln.py b/ipnb/21_grad_soln.py new file mode 100644 index 0000000..db606d1 --- /dev/null +++ b/ipnb/21_grad_soln.py @@ -0,0 +1,23 @@ +# Fill in the TODOs in this exercise, then run +# python 01_grad.py to see if your solution works! +# +from theano import tensor as T + + +def grad_sum(x, y, z): + """ + x: A theano variable + y: A theano variable + z: A theano expression involving x and y + + Returns dz / dx + dz / dy + """ + + return sum(T.grad(z, [x, y])) + +x = T.scalar() +y = T.scalar() +z = x + y +s = grad_sum(x, y, z) +assert s.eval({x: 0, y: 0}) == 2 +print "SUCCESS!" diff --git a/ipnb/22_traverse_soln.py b/ipnb/22_traverse_soln.py new file mode 100644 index 0000000..6751147 --- /dev/null +++ b/ipnb/22_traverse_soln.py @@ -0,0 +1,59 @@ +import numpy as np +from theano.gof import Variable +from theano import tensor as T + + +def arg_to_softmax(prob): + """ + Oh no! Someone has passed you the probability output, + "prob", of a softmax function, and you want the unnormalized + log probability--the argument to the softmax. + + Verify that prob really is the output of a softmax. Raise a + TypeError if it is not. + + If it is, return the argument to the softmax. + """ + + if not isinstance(prob, Variable): + raise TypeError() + + if prob.owner is None: + raise TypeError() + + owner = prob.owner + + if not isinstance(owner.op, T.nnet.Softmax): + raise TypeError() + + rval, = owner.inputs + + return rval + +if __name__ == "__main__": + x = np.ones((5, 4)) + try: + arg_to_softmax(x) + raise Exception("You should have raised an error.") + except TypeError: + pass + + x = T.matrix() + try: + arg_to_softmax(x) + raise Exception("You should have raised an error.") + except TypeError: + pass + + y = T.nnet.sigmoid(x) + try: + arg_to_softmax(y) + raise Exception("You should have raised an error.") + except TypeError: + pass + + y = T.nnet.softmax(x) + rval = arg_to_softmax(y) + assert rval is x + + print "SUCCESS!" diff --git a/ipnb/31_debug_soln.py b/ipnb/31_debug_soln.py new file mode 100644 index 0000000..56bde73 --- /dev/null +++ b/ipnb/31_debug_soln.py @@ -0,0 +1,17 @@ +import numpy as np +from theano import function +from theano import tensor as T +from theano import config +config.compute_test_value = 'raise' +a = T.vector() +a.tag.test_value = np.ones((3,)).astype(a.dtype) +b = T.log(a) +c = T.nnet.sigmoid(b) +d = T.sqrt(c) +e = T.concatenate((d, c), axis=0) +f = b * c * d +# This is the first bad line +g = e + f +h = g / c +fn = function([a], h) +fn(np.ones((3,)).astype(a.dtype)) diff --git a/ipnb/Theano-basic.ipynb b/ipnb/Theano-basic.ipynb new file mode 100644 index 0000000..b826bb1 --- /dev/null +++ b/ipnb/Theano-basic.ipynb @@ -0,0 +1,623 @@ +{ + "metadata": { + "name": "", + "signature": "sha256:33e931e54d686a2ab2c44bfcdc99a4383aecf754e0c80fef2e0ada6858e9b48c" + }, + "nbformat": 3, + "nbformat_minor": 0, + "worksheets": [ + { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "All the exercices on this sheet work this way:\n", + "\n", + " 1. You have a cell with TODOs that raise errors with a description of what is needed. Do that.\n", + " 2. Then run the cell(ctrl-enter) to execute it.\n", + " 3. It should print \"Success\" at the end (there is validation code in the cell). If not, try again.\n", + " 4. If you want to see the solution, execute the cell that start with \"%load\" after the exercice." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Builing expressions\n", + "\n", + "#### Excercice 1.1\n", + "\n", + "This exercice walks you through creating Theano variables and doing some computation with them." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import numpy as np\n", + "from theano import function\n", + "raise NotImplementedError(\"TODO: add any other imports you need\")\n", + "\n", + "\n", + "def make_scalar():\n", + " \"\"\"\n", + " Returns a new Theano scalar.\n", + " \"\"\"\n", + "\n", + " raise NotImplementedError(\"TODO: implement this function.\")\n", + "\n", + "\n", + "def log(x):\n", + " \"\"\"\n", + " Returns the logarithm of a Theano scalar x.\n", + " \"\"\"\n", + "\n", + " raise NotImplementedError(\"TODO: implement this function.\")\n", + "\n", + "\n", + "def add(x, y):\n", + " \"\"\"\n", + " Adds two theano scalars together and returns the result.\n", + " \"\"\"\n", + "\n", + " raise NotImplementedError(\"TODO: implement this function.\")\n", + " \n", + "# The following code uses your code and tests it.\n", + "a = make_scalar()\n", + "b = make_scalar()\n", + "c = log(b)\n", + "d = add(a, c)\n", + "f = function([a, b], d)\n", + "a = np.cast[a.dtype](1.)\n", + "b = np.cast[b.dtype](2.)\n", + "actual = f(a, b)\n", + "expected = 1. + np.log(2.)\n", + "assert np.allclose(actual, expected)\n", + "print \"SUCCESS!\"\n" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "%load 01_scalar_soln.py" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Exercice 1.2\n", + "\n", + "This exercice asks you to make Theano variables, elementwise multiplication and matrix/vector dot product.\n" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import numpy as np\n", + "from theano import function\n", + "raise NotImplementedError(\"TODO: add any other imports you need\")\n", + "\n", + "\n", + "def make_vector():\n", + " \"\"\"\n", + " Returns a new Theano vector.\n", + " \"\"\"\n", + "\n", + " raise NotImplementedError(\"TODO: implement this function.\")\n", + "\n", + "\n", + "def make_matrix():\n", + " \"\"\"\n", + " Returns a new Theano matrix.\n", + " \"\"\"\n", + "\n", + " raise NotImplementedError(\"TODO: implement this function.\")\n", + "\n", + "def elemwise_mul(a, b):\n", + " \"\"\"\n", + " a: A theano matrix\n", + " b: A theano matrix\n", + " Returns the elementwise product of a and b\n", + " \"\"\"\n", + "\n", + " raise NotImplementedError(\"TODO: implement this function.\")\n", + "\n", + "\n", + "def matrix_vector_mul(a, b):\n", + " \"\"\"\n", + " a: A theano matrix\n", + " b: A theano vector\n", + " Returns the matrix-vector product of a and b\n", + " \"\"\"\n", + "\n", + " raise NotImplementedError(\"TODO: implement this function.\")\n", + "\n", + "# The following code uses your code and tests it.\n", + "a = make_vector()\n", + "b = make_vector()\n", + "c = elemwise_mul(a, b)\n", + "d = make_matrix()\n", + "e = matrix_vector_mul(d, c)\n", + "\n", + "f = function([a, b, d], e)\n", + "\n", + "rng = np.random.RandomState([1, 2, 3])\n", + "a_value = rng.randn(5).astype(a.dtype)\n", + "b_value = rng.rand(5).astype(b.dtype)\n", + "c_value = a_value * b_value\n", + "d_value = rng.randn(5, 5).astype(d.dtype)\n", + "expected = np.dot(d_value, c_value)\n", + "\n", + "actual = f(a_value, b_value, d_value)\n", + "assert np.allclose(actual, expected)\n", + "print \"SUCCESS!\"" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "%load 02_vector_mat_soln.py" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Exercice 1.3\n", + "\n", + "This exercices asks you to create a tensor variable, do broadcastable additions and compute the max over part of a tensor." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import numpy as np\n", + "from theano import function\n", + "raise NotImplementedError(\"TODO: add any other imports you need\")\n", + "\n", + "\n", + "def make_tensor(dim):\n", + " \"\"\"\n", + " Returns a new Theano tensor with no broadcastable dimensions.\n", + " dim: the total number of dimensions of the tensor.\n", + " (You can use any dtype you like)\n", + " \"\"\"\n", + "\n", + " raise NotImplementedError(\"TODO: implement this function.\")\n", + "\n", + "\n", + "def broadcasted_add(a, b):\n", + " \"\"\"\n", + " a: a 3D theano tensor\n", + " b: a 4D theano tensor\n", + " Returns c, a 4D theano tensor, where\n", + "\n", + " c[i, j, k, l] = a[l, k, i] + b[i, j, k, l]\n", + "\n", + " for all i, j, k, l\n", + " \"\"\"\n", + "\n", + " raise NotImplementedError(\"TODO: implement this function.\")\n", + "\n", + "def partial_max(a):\n", + " \"\"\"\n", + " a: a 4D theano tensor\n", + "\n", + " Returns b, a theano matrix, where\n", + "\n", + " b[i, j] = max_{k,l} a[i, k, l, j]\n", + "\n", + " for all i, j\n", + " \"\"\"\n", + "\n", + " raise NotImplementedError(\"TODO: implement this function.\")\n", + "\n", + "# The following code uses your code and tests it.\n", + "a = make_tensor(3)\n", + "b = make_tensor(4)\n", + "c = broadcasted_add(a, b)\n", + "d = partial_max(c)\n", + "\n", + "f = function([a, b], d)\n", + "\n", + "rng = np.random.RandomState([1, 2, 3])\n", + "a_value = rng.randn(2, 2, 2).astype(a.dtype)\n", + "b_value = rng.rand(2, 2, 2, 2).astype(b.dtype)\n", + "c_value = np.transpose(a_value, (2, 1, 0))[:, None, :, :] + b_value\n", + "expected = c_value.max(axis=1).max(axis=1)\n", + "\n", + "actual = f(a_value, b_value)\n", + "\n", + "assert np.allclose(actual, expected), (actual, expected)\n", + "print \"SUCCESS!\"" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "%load 03_tensor_soln.py" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Compiling and Running\n", + "\n", + "#### Exercice 2.1\n", + "\n", + "This exercice asks you to compile a Theano function and call it. " + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "from theano import tensor as T\n", + "raise NotImplementedError(\"TODO: add any other imports you need\")\n", + "\n", + "\n", + "def evaluate(x, y, expr, x_value, y_value):\n", + " \"\"\"\n", + " x: A theano variable\n", + " y: A theano variable\n", + " expr: A theano expression involving x and y\n", + " x_value: A numpy value\n", + " y_value: A numpy value\n", + "\n", + " Returns the value of expr when x_value is substituted for x\n", + " and y_value is substituted for y\n", + " \"\"\"\n", + "\n", + " raise NotImplementedError(\"TODO: implement this function.\")\n", + "\n", + "\n", + "# The following code use your code and test it.\n", + "x = T.iscalar()\n", + "y = T.iscalar()\n", + "z = x + y\n", + "assert evaluate(x, y, z, 1, 2) == 3\n", + "print \"SUCCESS!\"" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "%load 11_function_soln.py" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Exercice 2.2\n", + "\n", + "This exercice makes you use shared variables. You must create some and update them by swapping 2 shared variables value." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import numpy as np\n", + "raise NotImplementedError(\"TODO: add any other imports you need\")\n", + "\n", + "\n", + "def make_shared(shape):\n", + " \"\"\"\n", + " Returns a theano shared variable containing a tensor of the specified\n", + " shape.\n", + " You can use any value you want.\n", + " \"\"\"\n", + " raise NotImplementedError(\"TODO: implement the function\")\n", + "\n", + "\n", + "def exchange_shared(a, b):\n", + " \"\"\"\n", + " a: a theano shared variable\n", + " b: a theano shared variable\n", + " Uses get_value and set_value to swap the values stored in a and b\n", + " \"\"\"\n", + " raise NotImplementedError(\"TODO: implement the function\")\n", + "\n", + "\n", + "def make_exchange_func(a, b):\n", + " \"\"\"\n", + " a: a theano shared variable\n", + " b: a theano shared variable\n", + " Returns f\n", + " where f is a theano function, that, when called, swaps the\n", + " values in a and b\n", + " f should not return anything\n", + " \"\"\"\n", + " raise NotImplementedError(\"TODO: implement the function\")\n", + "\n", + "\n", + "# The following code use your code and test it.\n", + "a = make_shared((5, 4, 3))\n", + "assert a.get_value().shape == (5, 4, 3)\n", + "b = make_shared((5, 4, 3))\n", + "assert a.get_value().shape == (5, 4, 3)\n", + "a.set_value(np.zeros((5, 4, 3), dtype=a.dtype))\n", + "b.set_value(np.ones((5, 4, 3), dtype=b.dtype))\n", + "exchange_shared(a, b)\n", + "assert np.all(a.get_value() == 1.)\n", + "assert np.all(b.get_value() == 0.)\n", + "f = make_exchange_func(a, b)\n", + "rval = f()\n", + "assert isinstance(rval, list)\n", + "assert len(rval) == 0\n", + "assert np.all(a.get_value() == 0.)\n", + "assert np.all(b.get_value() == 1.)\n", + "\n", + "print \"SUCCESS!\"" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "%load 12_shared_soln.py" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Exercice 2.3\n", + "\n", + "Something weird happens when you run this code, find the problem. Explain what is happening.\n", + "\n", + "Hint: some compilation modes make the problem more obvious than others." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import numpy as np\n", + "from theano import function\n", + "from theano import tensor as T\n", + "x = T.vector()\n", + "y = T.vector()\n", + "z = T.zeros_like(y)\n", + "a = x + z\n", + "f = function([x, y], a)\n", + "output = f(np.zeros((1,), dtype=x.dtype), np.zeros((2,), dtype=y.dtype))" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "%load 13_bug_soln.py" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Modifying Graphs\n", + "\n", + "#### Exercice 3.1\n", + "\n", + "This exercice makes you use the Theano symbolic grad." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "from theano import tensor as T\n", + "\n", + "\n", + "def grad_sum(x, y, z):\n", + " \"\"\"\n", + " x: A theano variable\n", + " y: A theano variable\n", + " z: A theano expression involving x and y\n", + "\n", + " Returns dz / dx + dz / dy\n", + " \"\"\"\n", + " raise NotImplementedError(\"TODO: implement this function.\")\n", + "\n", + "\n", + "# The following code use your code and test it.\n", + "x = T.scalar()\n", + "y = T.scalar()\n", + "z = x + y\n", + "s = grad_sum(x, y, z)\n", + "assert s.eval({x: 0, y: 0}) == 2\n", + "print \"SUCCESS!\"" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "%load 21_grad_soln.py" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Exercice 3.2\n", + "\n", + "This exercice is here to show you how to navigate in a Theano graph. You will need to find the inputs used to produce\n", + "some computation." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import numpy as np\n", + "from theano import tensor as T\n", + "raise NotImplementedError(\"Add any imports you need.\")\n", + "\n", + "\n", + "def arg_to_softmax(prob):\n", + " \"\"\"\n", + " Oh no! Someone has passed you the probability output,\n", + " \"prob\", of a softmax function, and you want the unnormalized\n", + " log probability--the argument to the softmax.\n", + "\n", + " Verify that prob really is the output of a softmax. Raise a\n", + " TypeError if it is not.\n", + "\n", + " If it is, return the argument to the softmax.\n", + " \"\"\"\n", + "\n", + " raise NotImplementedError(\"Implement this function.\")\n", + "\n", + "\n", + "x = np.ones((5, 4))\n", + "try:\n", + " arg_to_softmax(x)\n", + " raise Exception(\"You should have raised an error.\")\n", + "except TypeError:\n", + " pass\n", + "\n", + "x = T.matrix()\n", + "try:\n", + " arg_to_softmax(x)\n", + " raise Exception(\"You should have raised an error.\")\n", + "except TypeError:\n", + " pass\n", + "\n", + "y = T.nnet.sigmoid(x)\n", + "try:\n", + " arg_to_softmax(y)\n", + " raise Exception(\"You should have raised an error.\")\n", + "except TypeError:\n", + " pass\n", + "\n", + "y = T.nnet.softmax(x)\n", + "rval = arg_to_softmax(y)\n", + "assert rval is x\n", + "\n", + "print \"SUCCESS!\"" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "%load 22_traverse_soln.py" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Debugging\n", + "\n", + "#### Exercice 4.1\n", + "\n", + "The code in the next cell has a bug. Run the cell to see it.\n", + "\n", + "Use Theano flags or extra parameters to function() to find the cause.\n", + "\n", + "Don't try to find the bug by inspection of prints, the point of the exercice is to get you to work with the theano debugging tools that will be required for more complex code.\n", + "\n", + "To modify the environement for a cell use the `%env` magic command like this:\n", + "\n", + " %env THEANO_FLAGS=floatX=float32\n", + "\n", + "You will have to restart the ipython kernel from the Kernel menu above to get the enviroment changes to work." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import numpy as np\n", + "from theano import function\n", + "from theano import tensor as T\n", + "a = T.vector()\n", + "b = T.log(a)\n", + "c = T.nnet.sigmoid(b)\n", + "d = T.sqrt(c)\n", + "e = T.concatenate((d, c), axis=0)\n", + "f = b * c * d\n", + "g = e + f\n", + "h = g / c\n", + "fn = function([a], h)\n", + "fn(np.ones((3,)).astype(a.dtype))" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "%load 31_debug_soln.py" + ], + "language": "python", + "metadata": {}, + "outputs": [] + } + ], + "metadata": {} + } + ] +} \ No newline at end of file diff --git a/opt.py b/opt.py index a56906b..eccaa30 100644 --- a/opt.py +++ b/opt.py @@ -1,22 +1,29 @@ from scalmulop import ScalMulV1 from doubleop import DoubleOp +from doublecop import DoubleCOp +from doublec import DoubleC +from doublecgpu import DoubleCGpu from theano.gof import local_optimizer - from theano.tensor.opt import register_specialize +from theano.gpuarray.opt import (register_opt, op_lifter, + register_opt2) + @register_specialize @local_optimizer([ScalMulV1]) -def local_scalmul_double_v1(node): - if not (isinstance(node.op, ScalMulV1) - and node.op.scal == 2): +def local_scalmul_double(node): + if not (isinstance(node.op, ScalMulV1) and + node.op.scal == 2): return False return [DoubleOp()(node.inputs[0])] -from theano.gof.opt import OpSub - -local_scalmul_double_v2 = OpSub(ScalMulV1(2), DoubleOp()) -register_specialize(local_scalmul_double_v2, - name='local_scalmul_double_v2') +@register_opt('fast_compile') +@op_lifter([DoubleOp, DoubleC, DoubleCOp]) +@register_opt2([DoubleOp, DoubleC, DoubleCOp], + 'fast_compile') +def local_scalmul_double_gpu(op, context_name, inputs, + outputs): + return DoubleCGpu diff --git a/params.py b/params.py new file mode 100644 index 0000000..319fffa --- /dev/null +++ b/params.py @@ -0,0 +1,14 @@ +from theano import Op + +class MyOp(Op): + params_type = # a params type here + + def __init__(self, ...): + # Get some params + + # signature change + def perform(self, node, inputs, out_storage, params): + # do something + + def get_params(self, node): + # Return a params object diff --git a/presentation.pdf b/presentation.pdf index 70379d9..ec62df1 100644 Binary files a/presentation.pdf and b/presentation.pdf differ diff --git a/presentation.tex b/presentation.tex index 1a4690f..dc0c481 100644 --- a/presentation.tex +++ b/presentation.tex @@ -1,36 +1,82 @@ -\documentclass[utf8x,xcolor=pdftex,dvipsnames,table]{beamer} -\usetheme{Malmoe} % Now it's a beamer presentation with the lisa theme! -\setbeamertemplate{footline}[page number] -\usecolortheme{beaver} -\usepackage[T1]{fontenc} -\usepackage{amsmath} +\documentclass[utf8x,hyperref={pdfpagelabels=false}]{beamer} + \usepackage[utf8x]{inputenc} -%\logo{\includegraphics[width=.8in]{UdeM_NoirBleu_logo_Marie_crop}} +\usepackage[OT1]{fontenc} +\usepackage{graphicx} +\usepackage{amsmath} \usepackage{listings} +\usepackage{hyperref} +\usepackage{xcolor} +\usepackage{tikz} +\usetikzlibrary{shapes.arrows} +%\logo{\includegraphics[width=.8in]{UdeM_NoirBleu_logo_Marie_crop}} + + +\usetheme{Malmoe} % Now it's a beamer presentation with the lisa theme! +\usecolortheme{beaver} +\setbeamertemplate{footline}[page number] +\setbeamertemplate{navigation symbols}{} + +\lstloadlanguages{Python} + +\definecolor{darkgreen}{RGB}{0,93,21} +\definecolor{greenblue}{RGB}{40,110,126} +\definecolor{lightgray}{RGB}{246,246,246} +\definecolor{bordergray}{RGB}{193,193,193} +\definecolor{lightblue}{RGB}{0,114,168} +\definecolor{methblue}{RGB}{0,31,108} \newcommand{\superscript}[1]{\ensuremath{^{\textrm{#1}}}} \mode \title{Introduction to Theano} - \author{% \footnotesize -Frédéric Bastien \newline -(slides highly copied from previous tutorial by Ian G.) \newline +Arnaud Bergeron \newline +(slides adapted by Frédéric Bastien from slides by Ian G.) \newline +(further adapted by Arnaud Bergeron) +} +\date{February 26, 2015} + +\lstdefinestyle{theano}{ +language=Python, +basicstyle=\fontfamily{pcr}\selectfont\footnotesize, +keywordstyle=\color{darkgreen}\bfseries, +commentstyle=\color{greenblue}\itshape, +%commentstyle=\color{blue}\itshape, +stringstyle=\color{violet}, +showstringspaces=false, +tabsize=4, +backgroundcolor=\color{lightgray}, +frame=single, +emph={[2]__init__,make_node,perform,infer_shape,c_code,make_thunk,grad,R_op},emphstyle={[2]\color{methblue}}, +emph={[3]self},emphstyle={[3]\color{darkgreen}}, +moredelim=**[is][{\color{red}}]{`}{`} } -\date{September 25, 2014} +% We don't have code till the end of the file. +\lstdefinestyle{output}{ +language={}, +basicstyle=\ttfamily\footnotesize, +backgroundcolor=\color{white}, +frame={}, +breaklines=true, +emph={[2]}, +emph={[3]}, +} -\setbeamertemplate{navigation symbols}{} +\lstset{style=theano} + +\newcommand{\code}[1]{\lstinline[emph={[2]}]|#1|} \begin{document} \begin{frame}[plain] \titlepage - \vspace{-5em} +% \vspace{-5em} % \includegraphics[width=1in]{../hpcs2011_tutorial/pics/lisabook_logo_text_3.png} - \hfill +% \hfill % \includegraphics[width=.8in]{../hpcs2011_tutorial/pics/UdeM_NoirBleu_logo_Marie_crop} \end{frame} @@ -67,27 +113,18 @@ \section{Outline} \end{itemize} \end{frame} -\begin{frame}[fragile] - \frametitle{Overview} +\begin{frame}[fragile]{Overview} Using Theano: \begin{itemize} \item define expression $f(x,y) = x + y$ +\begin{lstlisting} +>>> z = x + y +\end{lstlisting} \item compile expression -\lstset{language=Python, - commentstyle=\itshape\color{blue}, - stringstyle=\color{violet}, - } \begin{lstlisting} -int f(int x, int y){ - return x + y; -} +>>> f = theano.function([x, y], z) \end{lstlisting} - \item execute expression -\lstset{language=Python, - commentstyle=\itshape\color{blue}, - stringstyle=\color{violet}, - } \begin{lstlisting} >>> f(1, 2) 3 @@ -95,30 +132,21 @@ \section{Outline} \end{itemize} \end{frame} - \section{Building} + \begin{frame}{Building expressions} \begin{itemize} \item Scalars \item Vectors \item Matrices \item Tensors + \item Broadcasting \item Reduction \item Dimshuffle \end{itemize} \end{frame} -\begin{frame}[fragile] - \frametitle{Scalar math} - Using Theano: - \begin{itemize} - \item define expression $f(x,y) = x + y$ - \item compile expression - \end{itemize} -\lstset{language=Python, - commentstyle=\itshape\color{blue}, - stringstyle=\color{violet}, - } +\begin{frame}[fragile]{Scalar math} \begin{lstlisting} from theano import tensor as T x = T.scalar() @@ -132,13 +160,7 @@ \section{Building} \end{lstlisting} \end{frame} -\begin{frame}[fragile] - \frametitle{Vector math} - -\lstset{language=Python, - commentstyle=\itshape\color{blue}, - stringstyle=\color{violet}, - } +\begin{frame}[fragile]{Vector math} \begin{lstlisting} from theano import tensor as T x = T.vector() @@ -152,13 +174,7 @@ \section{Building} \end{lstlisting} \end{frame} -\begin{frame}[fragile] - \frametitle{Matrix math} - -\lstset{language=Python, - commentstyle=\itshape\color{blue}, - stringstyle=\color{violet}, - } +\begin{frame}[fragile]{Matrix math} \begin{lstlisting} from theano import tensor as T x = T.matrix() @@ -171,24 +187,14 @@ \section{Building} \end{lstlisting} \end{frame} -\begin{frame}[fragile] - \frametitle{Tensors} - Using Theano: - \begin{itemize} - \item define expression $f(x,y) = x + y$ - \item compile expression - \begin{itemize} +\begin{frame}[fragile]{Tensors} + \begin{itemize} \item Dimensionality defined by length of ``broadcastable'' argument \item Can add (or do other elemwise op) on two tensors with same dimensionality \item Duplicate tensors along broadcastable axes to make size match - \end{itemize} \end{itemize} -\lstset{language=Python, - commentstyle=\itshape\color{blue}, - stringstyle=\color{violet}, - } \begin{lstlisting} from theano import tensor as T tensor3 = T.TensorType( @@ -198,17 +204,46 @@ \section{Building} \end{lstlisting} \end{frame} -\begin{frame}[fragile] - \frametitle{Reductions} - Using Theano: - \begin{itemize} - \item define expression $f(x,y) = x + y$ - \item compile expression - \end{itemize} -\lstset{language=Python, - commentstyle=\itshape\color{blue}, - stringstyle=\color{violet}, - } +\begin{frame}{Broadcasting} +\begin{tabular}{lcccccccl} + & + \begin{tabular}{cc} + 1 & 2 \\ + 3 & 4 \\ + 5 & 6 \\ + \end{tabular} & + + & + \begin{tabular}{cc} + 1 & 2 \\ + \end{tabular} & + = & + \begin{tabular}{cc} + 1 & 2 \\ + 3 & 4 \\ + 5 & 6 \\ + \end{tabular} & + + & + \begin{tabular}{cc} + 1 & 2 \\ + \color{blue} 1 & \color{blue} 2 \\ + \color{blue} 1 & \color{blue} 2 \\ + \end{tabular} & + \hspace{-1.3em} + \tikz[baseline={([yshift=-.5ex]current bounding box.center)}]{ + \draw [->, very thick] (0,0) -- (0,-1.2); + } \\[1.5em] + shape: & (3, 2) & & (2,) & & (3, 2) & & ({\color{blue}3}, 2) & +\end{tabular} +\vfill +\begin{itemize} + \item Pad shape with 1s on the left : $(2,) \equiv (1,2)$ + \item Two dimensions are compatible when they have the same length or one of them is broadcastable + \item broadcastable dimensions must have a length of 1 + \item Adding tensors of shape (8, 1, 6, 1) and (7, 1, 5) gives a tensor of shape (8, 7, 6, 5) +\end{itemize} +\end{frame} + +\begin{frame}[fragile]{Reductions} \begin{lstlisting} from theano import tensor as T tensor3 = T.TensorType( @@ -221,16 +256,12 @@ \section{Building} \end{lstlisting} \end{frame} -\begin{frame}[fragile] - \frametitle{Dimshuffle} - -\lstset{language=Python, - commentstyle=\itshape\color{blue}, - stringstyle=\color{violet}, - } +\begin{frame}[fragile]{Dimshuffle} \begin{lstlisting} from theano import tensor as T -tensor3 = T.TensorType(broadcastable=(False, False, False), dtype=''float32'') +tensor3 = T.TensorType( + broadcastable=(False, False, False), + dtype='float32') x = tensor3() y = x.dimshuffle((2, 1, 0)) a = T.matrix() @@ -238,20 +269,19 @@ \section{Building} # Same as b c = a.dimshuffle((0, 1)) # Adding to larger tensor -d = a.dimshuffle((0, 1, ``x'')) +d = a.dimshuffle((0, 1, 'x')) e = a + d \end{lstlisting} \end{frame} \begin{frame}{Exercices} - Work through the ``01\_buildbing\_expressions'' directory now. - Available at ``git~clone~https://github.com/nouiz/ccw\_tutorial\_theano.git''. +Work through the "Building Expressions" section of the ipython notebook. \end{frame} \section{Compiling/Running} \begin{frame}{Compiling and running expression} \begin{itemize} - \item theano.function + \item \code{theano.function} \item shared variables and updates \item compilation modes \item compilation for GPU @@ -259,23 +289,18 @@ \section{Compiling/Running} \end{itemize} \end{frame} -\begin{frame}[fragile] - \frametitle{theano.function} +\begin{frame}[fragile]{\code{theano.function}} -\lstset{language=Python, - commentstyle=\itshape\color{blue}, - stringstyle=\color{violet}, - } \begin{lstlisting} >>> from theano import tensor as T >>> x = T.scalar() >>> y = T.scalar() >>> from theano import function ->>> # first arg is list of SYMBOLIC inputs ->>> # second arg is SYMBOLIC output +>>> # first arg is list of symbolic inputs +>>> # second arg is symbolic output >>> f = function([x, y], x + y) ->>> # Call it with NUMERICAL values ->>> # Get a NUMERICAL output +>>> # Call it with numerical values +>>> # Get a numerical output >>> f(1., 2.) array(3.0) \end{lstlisting} @@ -284,33 +309,26 @@ \section{Compiling/Running} \begin{frame}{Shared variables} \begin{itemize} \item It’s hard to do much with purely functional programming - \item ``shared variables'' add just a little bit of imperative programming - \item A “shared variable” is a buffer that stores a numerical value for a Theano variable + \item \emph{shared variables} add just a little bit of imperative programming + \item A \emph{shared variable} is a buffer that stores a numerical value for a Theano variable \item Can write to as many shared variables as you want, once each, at the end of the function - \item Modify outside Theano function with get\_value() and set\_value() methods. + \item Modify outside Theano function with \code{get_value()} and \code{set_value()} methods. \end{itemize} \end{frame} -\begin{frame}[fragile] - \frametitle{Shared variable example} - -\lstset{language=Python, - commentstyle=\itshape\color{blue}, - stringstyle=\color{violet}, - } +\begin{frame}[fragile]{Shared variable example} \begin{lstlisting} >>> from theano import shared >>> x = shared(0.) ->>> from theano.compat.python2x import OrderedDict ->>> updates = OrderedDict() ->>> updates[x] = x + 1 +# Can also use a dict for more complex code +>>> updates = [(x, x + 1)] >>> f = function([], updates=updates) >>> f() ->>> x.get\_value() +>>> x.get_value() 1.0 ->>> x.set\_value(100.) +>>> x.set_value(100.) >>> f() ->>> x.get\_value() +>>> x.get_value() 101.0 \end{lstlisting} \end{frame} @@ -320,15 +338,11 @@ \section{Compiling/Running} \item Use theano.compat.python2x.OrderedDict \item Not collections.OrderedDict \begin{itemize} - \item This isn’t available in older versions of python, -and will limit the portability of your code + \item This isn’t available in older versions of python, and will limit the portability of your code. \end{itemize} - \item Not \{\} aka dict + \item Not \code{\{\}} aka dict \begin{itemize} - \item The iteration order of this built-in class is not - deterministic (thanks, Python!) so if Theano - accepted this, the same script could compile - different C programs each time you run it + \item The iteration order of this built-in class is not deterministic so if Theano accepted this, the same script could compile different C programs each time you run it. \end{itemize} \end{itemize} \end{frame} @@ -336,33 +350,33 @@ \section{Compiling/Running} \begin{frame}{Compilation modes} \begin{itemize} \item Can compile in different modes to get different kinds of programs - \item Can specify these modes very precisely with arguments to theano.function + \item Can specify these modes very precisely with arguments to \code{theano.function()} \item Can use a few quick presets with environment variable flags \end{itemize} \end{frame} \begin{frame}{Example preset compilation modes} - \begin{itemize} - \item FAST\_RUN: default. Spends a lot of time on + \begin{description}[FAST\_RUN] + \item[FAST\_RUN] Default. Spends a lot of time on compilation to get an executable that runs fast. - \item FAST\_COMPILE: Doesn’t spend much time -compiling. Executable usually uses python + \item[FAST\_COMPILE] Doesn’t spend much time compiling. +Executable usually uses python instead of compiled C code. Runs slow. - \item DEBUG\_MODE: Adds lots of checks. -Raises error messages in situations other -modes regard as fine. - \end{itemize} + \item[DEBUG\_MODE] Adds lots of checks. +Raises error messages in situations other modes don't check for. + \end{description} \end{frame} \begin{frame}{Compilation for GPU} \begin{itemize} - \item Theano current back-end only supports 32 bit on GPU + \item Theano's current back-end only supports 32 bit on GPU \item CUDA supports 64 bit, but is slow in gamer card - \item T.fscalar, T.fvector, T.fmatrix are all 32 bit - \item T.scalar, T.vector, T.matrix resolve to 32 bit or 64 bit depending on theano’s floatX flag + \item \code{T.fscalar}, \code{T.fvector}, \code{T.fmatrix} are all 32 bit + \item \code{T.scalar}, \code{T.vector}, \code{T.matrix} resolve to 32 or 64 bit depending on theano’s floatX flag \item floatX is float64 by default, set it to float32 - \item Set device flag to gpu (or a specific gpu, like gpu0) + \item Set the device flag to gpu (or a specific gpu, like gpu0) + \item Optional: warn\_float64=\{'ignore', 'warn', 'raise', 'pdb'\} \end{itemize} \end{frame} @@ -372,44 +386,33 @@ \section{Compiling/Running} you write before converting them to C code \item It makes them faster \begin{itemize} - \item (x+y)+(x+y) -> 2 (x + y) + \item $(x+y)+(x+y) \to 2\times(x + y)$ \end{itemize} \item It makes them more stable \begin{itemize} - \item exp(a)/exp(a).sum()->softmax(a) + \item $\exp(a)/\sum{\exp(a)} \to \operatorname{softmax}(a)$ \end{itemize} \end{itemize} \end{frame} -\begin{frame}[fragile] - \frametitle{Optimizations} - - \begin{itemize} - \item Sometimes optimizations discard error - checking and produce incorrect output - rather than an exception - \end{itemize} -\lstset{language=Python, - commentstyle=\itshape\color{blue}, - stringstyle=\color{violet}, - } +\begin{frame}[fragile]{Optimizations (2)} +Sometimes optimizations discard error checking and produce incorrect output rather than an exception. \begin{lstlisting} >>> x = T.scalar() >>> f = function([x], x/x) >>> f(0.) array(1.0) \end{lstlisting} - \end{frame} \begin{frame}{Exercises} -Work through the ``02\_compiling\_and\_running'' directory now +Work through the "Compiling and Running" section of the ipython notebook. \end{frame} \section{Modifying expressions} \begin{frame}{Modifying expressions} \begin{itemize} - \item The grad method + \item The \code{grad()} method \item Variable nodes \item Types \item Ops @@ -417,18 +420,13 @@ \section{Modifying expressions} \end{itemize} \end{frame} -\begin{frame}[fragile] - \frametitle{The grad method} - -\lstset{language=Python, - commentstyle=\itshape\color{blue}, - stringstyle=\color{violet}, - } +\begin{frame}[fragile]{The \code{grad()} method} \begin{lstlisting} >>> x = T.scalar('x') >>> y = 2. * x >>> g = T.grad(y, x) >>> from theano.printing import min_informative_str +# Print the unoptimized graph >>> print min_informative_str(g) A. Elemwise{mul} B. Elemwise{second,no_inplace} @@ -440,47 +438,58 @@ \section{Modifying expressions} \end{lstlisting} \end{frame} -\begin{frame}{Theano Variables} +\begin{frame}[fragile]{The \code{grad()} method} +\begin{lstlisting} +>>> x = T.scalar('x') +>>> y = 2. * x +>>> g = T.grad(y, x) +>>> from theano.printing import min_informative_str +# Print the optimized graph +>>> f = theano.function([x], g) +>>> theano.printing.debugprint(f) +DeepCopyOp [@A] '' 0 + |TensorConstant{2.0} [@B] +\end{lstlisting} +\end{frame} + +\begin{frame}{Theano variables} \begin{itemize} - \item A Variable is a theano expression - \item Can come from T.scalar, T.matrix, etc. - \item Can come from doing operations on other Variables - \item Every Variable has a type field, identifying its Type \newline - e.g. TensorType((True, False), ‘float32’) + \item A \emph{variable} is a theano expression. + \item Can come from \code{T.scalar()}, \code{T.matrix()}, etc. + \item Can come from doing operations on other variables. + \item Every variable has a type field, identifying its \emph{type}, such as \code{TensorType((True, False), 'float32')} \item Variables can be thought of as nodes in a graph \end{itemize} \end{frame} \begin{frame}{Ops} - \begin{itemize} - \item An Op is any class that describes a -mathematical function of some variables + \item An Op is any class that describes a function operating on some variables \item Can call the op on some variables to get a new variable or variables \item An Op class can supply other forms of information about the function, such as its -derivatives +derivative \end{itemize} \end{frame} \begin{frame}{Apply nodes} \begin{itemize} - \item The Apply class is a specific instance of an application of an Op + \item The Apply class is a specific instance of an application of an Op. \item Notable fields: - \begin{itemize} - \item op: The Op to be applied - \item inputs: The Variables to be used as input - \item outputs: The Variables produced - \end{itemize} - \item Variable.owner identifies the Apply that created the variable + \begin{description}[\texttt{outputs}] + \item[\texttt{op}] The Op to be applied + \item[\texttt{inputs}] The Variables to be used as input + \item[\texttt{outputs}] The Variables produced + \end{description} + \item The \code{owner} field on variables identifies the Apply that created it. \item Variable and Apply instances are nodes and owner/ - inputs/outputs identify edges in a Theano graph + inputs/outputs identify edges in a Theano graph. \end{itemize} \end{frame} \begin{frame}{Exercises} -Work through the ``03\_modifying'' directory now +Work through the "Modifying" section in the ipython notebook. \end{frame} \section{Debugging} @@ -488,19 +497,14 @@ \section{Debugging} \begin{itemize} \item DEBUG\_MODE \item Error message - \item theano.printing.debugprint - \item min\_informative\_str + \item \code{theano.printing.debugprint()} + \item \code{min_informative_str()} \item compute\_test\_value \item Accessing the FunctionGraph \end{itemize} \end{frame} -\begin{frame}[fragile] - \frametitle{Error message: code} -\lstset{language=Python, - commentstyle=\itshape\color{blue}, - stringstyle=\color{violet}, - } +\begin{frame}[fragile]{Error message: code} \begin{lstlisting} import numpy as np import theano @@ -514,110 +518,51 @@ \section{Debugging} \end{lstlisting} \end{frame} -\begin{frame}[fragile] - \frametitle{Error message: 1st part} - -\lstset{language=Python, - commentstyle=\itshape\color{blue}, - stringstyle=\color{violet}, - } -\begin{lstlisting} +\begin{frame}[fragile,allowframebreaks]{Error message} +\vspace{1em} +\begin{lstlisting}[style=output] Traceback (most recent call last): -[...] -ValueError: Input dimension mis-match. - (input[0].shape[0] = 3, input[1].shape[0] = 2) -Apply node that caused the error: - Elemwise{add,no_inplace}(, - , - ) -Inputs types: [TensorType(float64, vector), - TensorType(float64, vector), - TensorType(float64, vector)] + File "test.py", line 9, in + f(np.ones((2,)), np.ones((3,))) + File "/Users/anakha/Library/Python/2.7/site-packages/theano/compile/function_module.py", line 606, in __call__ + storage_map=self.fn.storage_map) + File "/Users/anakha/Library/Python/2.7/site-packages/theano/compile/function_module.py", line 595, in __call__ + outputs = self.fn() +ValueError: Input dimension mis-match. (input[0].shape[0] = 3, input[1].shape[0] = 2) +Apply node that caused the error: Elemwise{add,no_inplace}(, , ) +Inputs types: [TensorType(float64, vector), TensorType(float64, vector), TensorType(float64, vector)] Inputs shapes: [(3,), (2,), (2,)] Inputs strides: [(8,), (8,), (8,)] -Inputs scalar values: ['not scalar', 'not scalar', 'not scalar'] -\end{lstlisting} -\end{frame} - -\begin{frame}[fragile] - \frametitle{Error message: 2st part} +Inputs values: [array([ 1., 1., 1.]), array([ 1., 1.]), array([ 1., 1.])] -\lstset{language=Python, - commentstyle=\itshape\color{blue}, - stringstyle=\color{violet}, - } -\begin{lstlisting} -HINT: Re-running with most Theano optimization -disabled could give you a back-traces when this -node was created. This can be done with by setting -the Theano flags optimizer=fast_compile -HINT: Use the Theano flag 'exception_verbosity=high' -for a debugprint of this apply node. +HINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag 'optimizer=fast_compile'. If that does not work, Theano optimizations can be disabled with 'optimizer=None'. +HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node. \end{lstlisting} \end{frame} -\begin{frame}[fragile] - \frametitle{Error message: exception\_verbosity=high} - -\lstset{language=Python, - commentstyle=\itshape\color{blue}, - stringstyle=\color{violet}, - } -\begin{lstlisting} -Debugprint of the apply node: -Elemwise{add,no_inplace} [@A] '' +\begin{frame}[fragile]{Error message: exception\_verbosity=high} +\begin{lstlisting}[style=output] +Debugprint of the apply node: +Elemwise{add,no_inplace} [@A] '' | [@B] | [@C] | [@C] +Storage map footprint: + - , Shape: (3,), ElemSize: 8 Byte(s), TotalSize: 24 Byte(s) + - , Shape: (2,), ElemSize: 8 Byte(s), TotalSize: 16 Byte(s) \end{lstlisting} \end{frame} -\begin{frame}[fragile] - \frametitle{Error message: optimizer=fast\_compile} - -\lstset{language=Python, - commentstyle=\itshape\color{blue}, - stringstyle=\color{violet}, - } -\begin{lstlisting} +\begin{frame}[fragile]{Error message: optimizer=fast\_compile} +\begin{lstlisting}[style=output] Backtrace when the node is created: File "test.py", line 7, in z = z + y - File "/home/nouiz/src/Theano/theano/tensor/var.py", line 122, in __add__ - return theano.tensor.basic.add(self, other) - \end{lstlisting} \end{frame} -\begin{frame}[fragile] - \frametitle{Error message: Traceback} - -\lstset{language=Python, - commentstyle=\itshape\color{blue}, - stringstyle=\color{violet}, - } -\begin{lstlisting} -Traceback (most recent call last): - File "test.py", line 9, in - f(np.ones((2,)), np.ones((3,))) - File "/u/bastienf/repos/theano/compile/function_module.py", - line 589, in __call__ - self.fn.thunks[self.fn.position_of_error]) - File "/u/bastienf/repos/theano/compile/function_module.py", - line 579, in __call__ - outputs = self.fn() - -\end{lstlisting} -\end{frame} - -\begin{frame}[fragile] - \frametitle{debugprint} - -\lstset{language=Python, - commentstyle=\itshape\color{blue}, - stringstyle=\color{violet}, - } +\begin{frame}[fragile]{debugprint} \begin{lstlisting} >>> from theano.printing import debugprint >>> debugprint(a) @@ -629,13 +574,7 @@ \section{Debugging} \end{lstlisting} \end{frame} -\begin{frame}[fragile] - \frametitle{min\_informative\_str} - -\lstset{language=Python, - commentstyle=\itshape\color{blue}, - stringstyle=\color{violet}, - } +\begin{frame}[fragile]{min\_informative\_str} \begin{lstlisting} >>> x = T.scalar() >>> y = T.scalar() @@ -650,13 +589,7 @@ \section{Debugging} \end{lstlisting} \end{frame} -\begin{frame}[fragile] - \frametitle{compute\_test\_value} - -\lstset{language=Python, - commentstyle=\itshape\color{blue}, - stringstyle=\color{violet}, - } +\begin{frame}[fragile]{compute\_test\_value} \begin{lstlisting} >>> from theano import config >>> config.compute_test_value = 'raise' @@ -672,13 +605,7 @@ \section{Debugging} \end{lstlisting} \end{frame} -\begin{frame}[fragile] - \frametitle{Accessing a function’s fgraph} - -\lstset{language=Python, - commentstyle=\itshape\color{blue}, - stringstyle=\color{violet}, - } +\begin{frame}[fragile]{Accessing a function’s fgraph} \begin{lstlisting} >>> x = T.scalar() >>> y = x / x @@ -690,14 +617,13 @@ \section{Debugging} \end{frame} \begin{frame}{Exercises} -Work through the ``04\_debugging'' directory now +Work through the "Debugging" section of the ipython notebook. \end{frame} -\section{Citing} +\section*{} \begin{frame}{Citing Theano} +Please cite both of the following papers in all work that uses Theano: \begin{itemize} - \item Please cite both of the following papers in -all work that uses Theano: \item Bastien, Frédéric, Lamblin, Pascal, Pascanu, Razvan, Bergstra, James, Goodfellow, Ian, Bergeron, Arnaud, Bouchard, Nicolas, and Bengio,Yoshua. Theano: new features and speed improvements. Deep Learning and Unsupervised Feature Learning NIPS 2012 Workshop, 2012. @@ -708,11 +634,8 @@ \section{Citing} \end{frame} \begin{frame}{Example acknowledgments} -We would like to thank the developers of -Theano \\citep\{bergstra+al:2010-scipy,Bastien-Theano-2012\}, -Pylearn2 \\citep\{pylearn2\_arxiv\_2013\}. We would also like -to thank NSERC, Compute Canada, and Calcul Qu\'ebec -for providing computational resources. +We would like to thank the developers of Theano \textbackslash citep\{bergstra+al:2010-scipy,Bastien-Theano-2012\}. +We would also like to thank NSERC, Compute Canada, and Calcul Québec for providing computational resources. \end{frame} diff --git a/python.py b/python.py index cbfdac0..49aab48 100644 --- a/python.py +++ b/python.py @@ -12,10 +12,10 @@ def make_node(self, ...): def perform(self, node, inputs, outputs_storage): # do the computation - def infer_shape(self, input_shapes): + def infer_shape(self, node, input_shapes): # return output shapes - def grad(self, inputs, output_grads): + def L_op(self, inputs, outputs, output_grads): # return gradient graph for each input def R_op(self, inputs, eval_points):