# Convolutional neural network

In [1]:
using MLDatasets
using Flux
using Flux: @epochs, onehotbatch, onecold, crossentropy, throttle
using Base.Iterators: repeated, partition
using Statistics: mean
# using CuArrays

## Load data

Classify MNIST digits with a convolutional network

In [2]:
train_x, train_y = CIFAR10.traindata()
test_x,  test_y  = CIFAR10.testdata()

(FixedPointNumbers.Normed{UInt8,8}[0.62 0.596 … 0.239 0.212; 0.624 0.592 … 0.192 0.22; … ; 0.494 0.49 … 0.114 0.133; 0.455 0.467 … 0.078 0.082]

FixedPointNumbers.Normed{UInt8,8}[0.439 0.439 … 0.455 0.42; 0.435 0.431 … 0.4 0.412; … ; 0.357 0.357 … 0.322 0.329; 0.333 0.345 … 0.251 0.263]

FixedPointNumbers.Normed{UInt8,8}[0.192 0.2 … 0.659 0.627; 0.184 0.157 … 0.58 0.584; … ; 0.141 0.125 … 0.494 0.506; 0.129 0.133 … 0.42 0.431]

FixedPointNumbers.Normed{UInt8,8}[0.922 0.933 … 0.322 0.333; 0.906 0.922 … 0.18 0.243; … ; 0.914 0.925 … 0.725 0.706; 0.91 0.922 … 0.733 0.729]

FixedPointNumbers.Normed{UInt8,8}[0.922 0.933 … 0.376 0.396; 0.906 0.922 … 0.224 0.294; … ; 0.914 0.925 … 0.784 0.765; 0.91 0.922 … 0.792 0.784]

FixedPointNumbers.Normed{UInt8,8}[0.922 0.933 … 0.322 0.325; 0.906 0.922 … 0.141 0.188; … ; 0.914 0.925 … 0.769 0.749; 0.91 0.922 … 0.784 0.78]

FixedPointNumbers.Normed{UInt8,8}[0.62 0.667 … 0.09 0.11; 0.62 0.675 … 0.106 0.118; … ; 0.929 0.965 … 0.016 0.016; 0.933 0.965 … 0.0

In [3]:
train_x = Float64.(train_x / 255)
train_y = onehotbatch(train_y, 0:9)
test_x = Float64.(test_x / 255)
test_y = onehotbatch(test_y, 0:9)

10×10000 Flux.OneHotMatrix{Array{Flux.OneHotVector,1}}:
 false  false  false   true  false  …  false  false  false  false  false
 false  false  false  false  false     false  false  false   true  false
 false  false  false  false  false     false  false  false  false  false
  true  false  false  false  false     false   true  false  false  false
 false  false  false  false  false     false  false  false  false  false
 false  false  false  false  false  …  false  false   true  false  false
 false  false  false  false   true     false  false  false  false  false
 false  false  false  false  false     false  false  false  false   true
 false   true   true  false  false      true  false  false  false  false
 false  false  false  false  false     false  false  false  false  false

## Preprocessing

In [4]:
# Partition into batches of size 1,000
train = [(train_x[:, :, :, i:(i+999)], train_y[:, i:(i+999)]) for i = 1:1000:50_000];
# train = gpu.(train)

In [5]:
# Prepare test set (first 1,000 images)
test_x = test_x[:, :, :, 1:1000]# |> gpu
test_y = test_y[:, 1:1000]# |> gpu

10×1000 Flux.OneHotMatrix{Array{Flux.OneHotVector,1}}:
 false  false  false   true  false  …  false  false  false  false  false
 false  false  false  false  false     false  false   true  false  false
 false  false  false  false  false     false  false  false  false  false
  true  false  false  false  false      true  false  false   true  false
 false  false  false  false  false     false  false  false  false  false
 false  false  false  false  false  …  false  false  false  false  false
 false  false  false  false   true     false  false  false  false  false
 false  false  false  false  false     false  false  false  false  false
 false   true   true  false  false     false   true  false  false   true
 false  false  false  false  false     false  false  false  false  false

## Model

In [6]:
m = Chain(
    Conv((3, 3), 3=>32, relu),  # (32, 32, 3) -> (30, 30, 32)
    Conv((3, 3), 32=>64, relu),  # (30, 30, 32) -> (28, 28, 64)
    MaxPool((2, 2); stride=2),  # (28, 28, 64) -> (14, 14, 64)
    BatchNorm(64),
#     Dropout(0.25),
    Conv((3, 3), 64=>128, relu),  # (14, 14, 64) -> (12, 12, 128)
    MaxPool((2, 2); stride=2),  # (12, 12, 128) -> (6, 6, 128)
    Conv((2, 2), 128=>128, relu),  # (6, 6, 128) -> (5, 5, 128)
    MaxPool((2, 2); stride=2),  # (5, 5, 128) -> (2, 2, 128)
    BatchNorm(128),
#     Dropout(0.25),
    x -> reshape(x, :, size(x, 4)),  # (2, 2, 128) -> 512
    Dense(512, 1500, relu),
    BatchNorm(1500),
#     Dropout(0.5),
    Dense(1500, 10), softmax)# |> gpu

Chain(Conv((3, 3), 3=>32, NNlib.relu), Conv((3, 3), 32=>64, NNlib.relu), MaxPool((2, 2), pad = (0, 0), stride = (2, 2)), BatchNorm(64), Conv((3, 3), 64=>128, NNlib.relu), MaxPool((2, 2), pad = (0, 0), stride = (2, 2)), Conv((2, 2), 128=>128, NNlib.relu), MaxPool((2, 2), pad = (0, 0), stride = (2, 2)), BatchNorm(128), getfield(Main, Symbol("##5#6"))(), Dense(512, 1500, NNlib.relu), BatchNorm(1500), Dense(1500, 10), NNlib.softmax)

In [7]:
m(train[1][1])  # test if model works

Tracked 10×1000 Array{Float64,2}:
 0.40891     0.317849    0.110271    …  0.049612    0.0234114  0.0105068
 0.0385509   0.00186091  0.0269249      0.200287    0.341255   0.130925 
 0.0302261   0.0332867   0.405741       0.122652    0.175817   0.0454539
 0.273067    0.0189853   0.162143       0.0737403   0.307202   0.0227735
 0.0383755   0.110287    0.00979033     0.110497    0.0165328  0.194926 
 0.0727444   0.0113008   0.0149627   …  0.0134456   0.0122279  0.195315 
 0.00817208  0.377087    0.0401661      0.0443034   0.0263315  0.0203326
 0.0398294   0.0100822   0.0255684      0.193098    0.042487   0.329965 
 0.0257418   0.0281576   0.0958805      0.185488    0.014231   0.0382532
 0.0643833   0.0911041   0.108553       0.00687646  0.0405051  0.0115492

## Loss function

In [8]:
loss(x, y) = crossentropy(m(x), y)

loss (generic function with 1 method)

In [9]:
accuracy(x, y) = mean(onecold(m(x)) .== onecold(y))

accuracy (generic function with 1 method)

## Optimizer

In [10]:
evalcb = throttle(() -> @show(accuracy(test_x, test_y)), 10)
opt = ADAM(params(m))

#43 (generic function with 1 method)

## Training

In [None]:
@epochs 5 Flux.train!(loss, train, opt, cb=evalcb)

┌ Info: Epoch 1
└ @ Main /home/pika/.julia/packages/Flux/rcN9D/src/optimise/train.jl:93


accuracy(test_x, test_y) = 0.215
