diff --git a/vision/conv_mnist/conv_mnist.jl b/vision/conv_mnist/conv_mnist.jl index 67319143..668dd1ad 100644 --- a/vision/conv_mnist/conv_mnist.jl +++ b/vision/conv_mnist/conv_mnist.jl @@ -1,6 +1,7 @@ -# Classification of MNIST dataset using a convnet, a variant of the original LeNet +# Classification of MNIST dataset using a convolutional network, +# which is a variant of the original LeNet from 1998. -using MLDatasets, Flux, CUDA, BSON # this will install everything if necc. +using MLDatasets, Flux, BSON, CUDA # this will install everything if necc. #===== DATA =====# @@ -26,7 +27,8 @@ loader() # returns a DataLoader, with first element a tuple like this: x1, y1 = first(loader()); # (28×28×1×64 Array{Float32, 3}, 10×64 OneHotMatrix(::Vector{UInt32})) -# If you are using a GPU, these should be CuArray{Float32, 3} etc. +# If you are using a GPU, these should be CuArray{Float32, 3} etc. +# If not, the `gpu` function does nothing (except complain the first time). #===== MODEL =====# @@ -44,6 +46,8 @@ lenet = Chain( Dense(84 => 10), ) |> gpu +# Notice that most of the parameters are in the final Dense layers. + y1hat = lenet(x1) # try it out softmax(y1hat) @@ -63,7 +67,7 @@ hcat(Flux.onecold(y1hat, 0:9), Flux.onecold(y1, 0:9)) using Statistics: mean # standard library function loss_and_accuracy(model, data::MNIST=test_data) - (x,y) = only(loader(data; batchsize=0)) # batchsize=0 means one big batch + (x,y) = only(loader(data; batchsize=length(data))) # make one big batch ŷ = model(x) loss = Flux.logitcrossentropy(ŷ, y) # did not include softmax in the model acc = round(100 * mean(Flux.onecold(ŷ) .== Flux.onecold(y)); digits=2) @@ -91,6 +95,7 @@ opt_rule = OptimiserChain(WeightDecay(settings.lambda), Adam(settings.eta)) opt_state = Flux.setup(opt_rule, lenet); for epoch in 1:settings.epochs + # @time will show a much longer time for the first epoch, due to compilation @time for (x,y) in loader(batchsize=settings.batchsize) grads = Flux.gradient(m -> Flux.logitcrossentropy(m(x), y), lenet) Flux.update!(opt_state, lenet, grads[1]) @@ -101,7 +106,7 @@ for epoch in 1:settings.epochs loss, acc, _ = loss_and_accuracy(lenet) test_loss, test_acc, _ = loss_and_accuracy(lenet, test_data) @info "logging:" epoch acc test_acc - nt = (; epoch, loss, acc, test_loss, test_acc) + nt = (; epoch, loss, acc, test_loss, test_acc) # make a NamedTuple push!(train_log, nt) end if epoch % 5 == 0 @@ -118,16 +123,16 @@ hcat(Flux.onecold(y1hat, 0:9), Flux.onecold(y1, 0:9)) #===== INSPECTION =====# -using ImageInTerminal, ImageCore +using ImageCore, ImageInTerminal -xtest, ytest = only(loader(test_data, batchsize=0)) +xtest, ytest = only(loader(test_data, batchsize=length(test_data))); # There are many ways to look at images, you won't need ImageInTerminal if working in a notebook # ImageCore.Gray is a special type, whick interprets numbers between 0.0 and 1.0 as shades: -xtest[:,:,1,5] .|> Gray |> transpose # should display a 4 +xtest[:,:,1,5] .|> Gray |> transpose |> cpu -Flux.onecold(ytest, 0:9)[5] # it's coded as being a 4 +Flux.onecold(ytest, 0:9)[5] # true label, should match! # Let's look for the image whose classification is least certain. # First, in each column of probabilities, ask for the largest one. @@ -137,33 +142,18 @@ ptest = softmax(lenet(xtest)) max_p = maximum(ptest; dims=1) _, i = findmin(vec(max_p)) -xtest[:,:,1,i] .|> Gray |> transpose +xtest[:,:,1,i] .|> Gray |> transpose |> cpu Flux.onecold(ytest, 0:9)[i] # true classification +ptest[:,i] # probabilities of all outcomes Flux.onecold(ptest[:,i], 0:9) # uncertain prediction -# Next, let's look for the most confident, yet wrong, prediction. -# Often this will look quite ambiguous to you too. - -iwrong = findall(Flux.onecold(lenet(xtest)) .!= Flux.onecold(ytest)) - -max_p = maximum(ptest[:,iwrong]; dims=1) -_, k = findmax(vec(max_p)) # now max not min -i = iwrong[k] - -xtest[:,:,1,i] .|> Gray |> transpose - -Flux.onecold(ytest, 0:9)[i] # true classification -Flux.onecold(ptest[:,i], 0:9) # prediction - #===== SIZES =====# -# Maybe... at first I had this above, but it makes things long. - # A layer like Conv((5, 5), 1=>6) takes 5x5 patches of an image, and matches them to each # of 6 different 5x5 filters, placed at every possible position. These filters are here: -Conv((5, 5), 1=>6).weights |> summary # 5×5×1×6 Array{Float32, 4} +Conv((5, 5), 1=>6).weight |> summary # 5×5×1×6 Array{Float32, 4} # This layer can accept any size of image; let's trace the sizes with the actual input: @@ -172,19 +162,19 @@ Conv((5, 5), 1=>6).weights |> summary # 5×5×1×6 Array{Float32, 4} julia> x1 |> size (28, 28, 1, 64) -julia> conv_layers[1](x1) |> size +julia> lenet[1](x1) |> size # after Conv((5, 5), 1=>6, relu), (24, 24, 6, 64) -julia> conv_layers[1:2](x1) |> size +julia> lenet[1:2](x1) |> size # after MaxPool((2, 2)) (12, 12, 6, 64) -julia> conv_layers[1:3](x1) |> size +julia> lenet[1:3](x1) |> size # after Conv((5, 5), 6 => 16, relu) (8, 8, 16, 64) -julia> conv_layers(x1) |> size +julia> lenet[1:4](x1) |> size # after MaxPool((2, 2)) (4, 4, 16, 64) -julia> conv_layers(x1) |> Flux.flatten |> size +julia> lenet[1:5](x1) |> size # after Flux.flatten (256, 64) =# @@ -193,4 +183,3 @@ julia> conv_layers(x1) |> Flux.flatten |> size # This 256 must match the Dense(256 => 120). (See Flux.outputsize for ways to automate this.) #===== THE END =====# -