Merge pull request #96 from slimgroup/add_docs

small spelling, add examples
slimgroup · Nov 14, 2023 · af20c9e · af20c9e
2 parents 0599cc7 + bb2320b
commit af20c9e
Show file tree

Hide file tree

Showing 3 changed files with 231 additions and 18 deletions.
diff --git a/docs/src/api.md b/docs/src/api.md
@@ -7,7 +7,7 @@ Order  = [:function]
 Pages = ["neuralnet.jl", "parameter.jl"]
 ```
 
-## Activations functions
+## Activation functions
 
 ```@autodocs
 Modules = [InvertibleNetworks]

diff --git a/docs/src/examples.md b/docs/src/examples.md
@@ -1,10 +1,225 @@
-## Simple examples
+## Further examples
 
 We provide usage examples for all the layers and network in our [examples](https://github.com/slimgroup/InvertibleNetworks.jl/tree/master/examples) subfolder. Each of the example show how to setup and use the building block for simple random variables.
 
-## Litterature applications
+## 2D Rosenbrock/banana distribution sampling w/ GLOW
 
-The following examples show the implementaton of applications from the linked papers with [InvertibleNetworks.jl]:
+```@example banana
+using LinearAlgebra, InvertibleNetworks, PyPlot, Flux, Random
+
+# Random seed
+Random.seed!(11)
+
+# Define network
+nx = 1; ny = 1; n_in = 2
+n_hidden = 64
+batchsize = 20
+depth = 4
+AN = Array{ActNorm}(undef, depth)
+L = Array{CouplingLayerGlow}(undef, depth)
+Params = Array{Parameter}(undef, 0)
+
+# Create layers
+for j=1:depth
+    AN[j] = ActNorm(n_in; logdet=true)
+    L[j] = CouplingLayerGlow(n_in, n_hidden; k1=1, k2=1, p1=0, p2=0, logdet=true)
+
+    # Collect parameters
+    global Params = cat(Params, get_params(AN[j]); dims=1)
+    global Params = cat(Params, get_params(L[j]); dims=1)
+end
+
+# Forward pass
+function forward(X)
+    logdet = 0f0
+    for j=1:depth
+        X_, logdet1 = AN[j].forward(X)
+        X, logdet2 = L[j].forward(X_)
+        logdet += (logdet1 + logdet2)
+    end
+    return X, logdet
+end
+
+# Backward pass
+function backward(ΔX, X)
+    for j=depth:-1:1
+        ΔX_, X_ = L[j].backward(ΔX, X)
+        ΔX, X = AN[j].backward(ΔX_, X_)
+    end
+    return ΔX, X
+end
+
+####################################################################################################
+
+# Loss
+function loss(X)
+    Y, logdet = forward(X)
+    f = -log_likelihood(Y) - logdet
+    ΔY = -∇log_likelihood(Y)
+    ΔX = backward(ΔY, Y)[1]
+    return f, ΔX
+end
+
+# Training
+maxiter = 2000
+opt = Flux.ADAM(1f-3)
+fval = zeros(Float32, maxiter)
+
+for j=1:maxiter
+
+    # Evaluate objective and gradients
+    X = sample_banana(batchsize)
+    fval[j] = loss(X)[1]
+
+    # Update params
+    for p in Params
+        Flux.update!(opt, p.data, p.grad)
+    end
+    clear_grad!(Params)
+end
+
+####################################################################################################
+
+# Testing
+test_size = 500
+X = sample_banana(test_size)
+Y_ = forward(X)[1]
+Y = randn(Float32, 1, 1, 2, test_size)
+X_ = backward(Y, Y)[2]
+
+# Plot
+fig = figure(figsize=[8,8])
+ax1 = subplot(2,2,1); plot(X[1, 1, 1, :], X[1, 1, 2, :], "."); title(L"Data space: $x \sim \hat{p}_X$")
+ax1.set_xlim([-3.5,3.5]); ax1.set_ylim([0,50])
+ax2 = subplot(2,2,2); plot(Y_[1, 1, 1, :], Y_[1, 1, 2, :], "g."); title(L"Latent space: $z = f(x)$")
+ax2.set_xlim([-3.5, 3.5]); ax2.set_ylim([-3.5, 3.5])
+ax3 = subplot(2,2,3); plot(X_[1, 1, 1, :], X_[1, 1, 2, :], "g."); title(L"Data space: $x = f^{-1}(z)$")
+ax3.set_xlim([-3.5,3.5]); ax3.set_ylim([0,50])
+ax4 = subplot(2,2,4); plot(Y[1, 1, 1, :], Y[1, 1, 2, :], "."); title(L"Latent space: $z \sim \hat{p}_Z$")
+ax4.set_xlim([-3.5, 3.5]); ax4.set_ylim([-3.5, 3.5])
+savefig("plot_banana.svg")
+nothing
+```
+![](plot_banana.svg)
+
+
+
+
+
+## Conditional 2D Rosenbrock/banana distribution sampling w/ cHINT
+
+```@example cbanana
+using InvertibleNetworks
+using Flux, LinearAlgebra, PyPlot
+
+# Define network
+nx = 1; ny = 1; n_in = 2
+n_hidden = 64
+batchsize = 64
+depth = 8
+
+# Construct HINT network
+H = NetworkConditionalHINT(n_in, n_hidden, depth; k1=1, k2=1, p1=0, p2=0)
+
+# Linear forward operator
+A = randn(Float32,2,2)
+A = A / (2*opnorm(A))
+
+# Loss
+function loss(H, X, Y)
+    Zx, Zy, logdet = H.forward(X, Y)
+    f = -log_likelihood(tensor_cat(Zx, Zy)) - logdet
+    ΔZ = -∇log_likelihood(tensor_cat(Zx, Zy))
+    ΔZx, ΔZy = tensor_split(ΔZ)
+    ΔX, ΔY = H.backward(ΔZx, ΔZy, Zx, Zy)[1:2]
+    return f, ΔX, ΔY
+end
+
+# Training
+maxiter = 1000
+opt = Flux.ADAM(1f-3)
+fval = zeros(Float32, maxiter)
+
+for j=1:maxiter
+
+    # Evaluate objective and gradients
+    X = sample_banana(batchsize)
+    Y = reshape(A*reshape(X, :, batchsize), nx, ny, n_in, batchsize)
+    Y += .2f0*randn(Float32, nx, ny, n_in, batchsize)
+
+    fval[j] = loss(H, X, Y)[1]
+
+    # Update params
+    for p in get_params(H)
+        Flux.update!(opt, p.data, p.grad)
+    end
+    clear_grad!(H)
+end
+
+# Testing
+test_size = 1000
+X = sample_banana(test_size)
+Y = reshape(A*reshape(X, :, test_size), nx, ny, n_in, test_size)
+Y += .2f0*randn(Float32, nx, ny, n_in, test_size)
+
+Zx_, Zy_ = H.forward(X, Y)[1:2]
+
+Zx = randn(Float32, nx, ny, n_in, test_size)
+Zy = randn(Float32, nx, ny, n_in, test_size)
+X_, Y_ = H.inverse(Zx, Zy)
+
+# Now select single fixed sample from all Ys
+X_fixed = sample_banana(1)
+Y_fixed = reshape(A*vec(X_fixed), nx, ny, n_in, 1)
+Y_fixed += .2f0*randn(Float32, size(X_fixed))
+
+Zy_fixed = H.forward_Y(Y_fixed)
+Zx = randn(Float32, nx, ny, n_in, test_size)
+
+X_post = H.inverse(Zx, Zy_fixed.*ones(Float32, nx, ny, n_in, test_size))[1]
+
+# Model/data spaces
+fig = figure(figsize=[16,6])
+ax1 = subplot(2,5,1); plot(X[1, 1, 1, :], X[1, 1, 2, :], "."); title(L"Model space: $x \sim \hat{p}_x$")
+ax1.set_xlim([-3.5, 3.5]); ax1.set_ylim([0,50])
+ax2 = subplot(2,5,2); plot(Y[1, 1, 1, :], Y[1, 1, 2, :], "."); title(L"Noisy data $y=Ax+n$ ")
+
+ax3 = subplot(2,5,3); plot(X_[1, 1, 1, :], X_[1, 1, 2, :], "g."); title(L"Model space: $x = f(zx|zy)^{-1}$")
+ax3.set_xlim([-3.5, 3.5]); ax3.set_ylim([0,50])
+ax4 = subplot(2,5,4); plot(Y_[1, 1, 1, :], Y_[1, 1, 2, :], "g."); title(L"Data space: $y = f(zx|zy)^{-1}$")
+
+ax5 = subplot(2,5,5); plot(X_post[1, 1, 1, :], X_post[1, 1, 2, :], "g."); 
+plot(X_fixed[1, 1, 1, :], X_fixed[1, 1, 2, :], "r."); title(L"Model space: $x = f(zx|zy_{fix})^{-1}$")
+ax5.set_xlim([-3.5, 3.5]); ax5.set_ylim([0,50])
+
+# Latent spaces
+ax6 = subplot(2,5,6); plot(Zx_[1, 1, 1, :], Zx_[1, 1, 2, :], "g."); title(L"Latent space: $zx = f(x|y)$")
+ax6.set_xlim([-3.5, 3.5]); ax6.set_ylim([-3.5, 3.5])
+ax7 = subplot(2,5,7); plot(Zy_[1, 1, 1, :], Zy[1, 1, 2, :], "g."); title(L"Latent space: $zy \sim \hat{p}_{zy}$")
+ax7.set_xlim([-3.5, 3.5]); ax7.set_ylim([-3.5, 3.5])
+ax8 = subplot(2,5,9); plot(Zx[1, 1, 1, :], Zx[1, 1, 2, :], ".");  title(L"Latent space: $zx \sim \hat{p}_{zy}$")
+ax8.set_xlim([-3.5, 3.5]); ax8.set_ylim([-3.5, 3.5])
+ax9 = subplot(2,5,8); plot(Zy[1, 1, 1, :], Zy[1, 1, 2, :], "."); title(L"Latent space: $zy \sim \hat{p}_{zy}$")
+ax9.set_xlim([-3.5, 3.5]); ax9.set_ylim([-3.5, 3.5])
+ax10 = subplot(2,5,10); plot(Zx[1, 1, 1, :], Zx[1, 1, 2, :], "."); 
+plot(Zy_fixed[1, 1, 1, :], Zy_fixed[1, 1, 2, :], "r."); title(L"Latent space: $zx \sim \hat{p}_{zx}$")
+ax10.set_xlim([-3.5, 3.5]); ax10.set_ylim([-3.5, 3.5])
+savefig("plot_cbanana.svg")
+nothing
+```
+![](plot_cbanana.svg)
+
+
+
+
+
+
+
+
+
+## Literature applications
+
+The following examples show the implementation of applications from the linked papers with [InvertibleNetworks.jl]:
 
 - Invertible recurrent inference machines (Putzky and Welling, 2019) ([generic example](https://github.com/slimgroup/InvertibleNetworks.jl/tree/master/examples/networks/network_irim.jl))
 

diff --git a/docs/src/index.md b/docs/src/index.md
@@ -1,29 +1,27 @@
-Memory efficient inverible layers, networks and activation function for Machine learning.
 
 # InvertibleNetworks.jl documentation
 
-This documentation is work in progress and is being actively populated.
-
 ## About
 
-[InvertibleNetworks.jl](https://github.com/slimgroup/InvertibleNetworks.jl) is a package of invertible layers and networks for machine learning. The invertibility allow to backpropagate through the layers and networks without the need for storing the forward state that is recomputed on the fly, inverse propagating through it. This package is the first of its kind in Julia.
-
-This package is developped and maintained by Felix J. Herrmann's [SlimGroup](https://slim.gatech.edu/) at Georgia Institute of Technology. In particular the main contributors of this package are:
-
- - Philipp Witte, Microsoft Corporation (pwitte@microsoft.com)
- - Gabrio Rizzuti, Utrecht University (g.rizzuti@umcutrecht.nl)
- - Mathias Louboutin, Georgia Institute of Technology (mlouboutin3@gatech.edu)
- - Ali Siahkoohi, Georgia Institute of Technology (alisk@gatech.edu)
+[InvertibleNetworks.jl](https://github.com/slimgroup/InvertibleNetworks.jl) is a package of invertible layers and networks for machine learning. The invertibility allows to backpropagate through the layers and networks without the need for storing the forward state that is recomputed on the fly, inverse propagating through it. This package is the first of its kind in Julia with memory efficient invertible layers, networks and activation functions for machine learning.
 
 ## Installation
 
-THis package is registered in the Julia general registry and can be directly installed in the julia REPL package manager (`]`):
-
+This package is registered in the Julia general registry and can be installed in the REPL package manager (`]`):
 
 ```julia
-] add/dev InvertibleNetworks
+] add InvertibleNetworks
 ```
 
+## Authors
+
+This package is developed and maintained by Felix J. Herrmann's [SlimGroup](https://slim.gatech.edu/) at Georgia Institute of Technology. The main contributors of this package are:
+ - Rafael Orozco, Georgia Institute of Technology (rorozco@gatech.edu)
+ - Philipp Witte, Microsoft Corporation (pwitte@microsoft.com)
+ - Gabrio Rizzuti, Utrecht University (g.rizzuti@umcutrecht.nl)
+ - Mathias Louboutin, Georgia Institute of Technology (mlouboutin3@gatech.edu)
+ - Ali Siahkoohi, Georgia Institute of Technology (alisk@gatech.edu)
+
 ## References
 
  - Yann Dauphin, Angela Fan, Michael Auli and David Grangier, "Language modeling with gated convolutional networks", Proceedings of the 34th International Conference on Machine Learning, 2017. [ArXiv](https://arxiv.org/pdf/1612.08083.pdf)