# Convolutional neural network

In [1]:
using MLDatasets
using Flux
using Flux: @epochs, onehotbatch, onecold, crossentropy, throttle
using Base.Iterators: repeated, partition
using Statistics: mean
# using CuArrays

## Load data

Classify MNIST digits with a convolutional network

In [2]:
train_x, train_y = CIFAR10.traindata()
test_x,  test_y  = CIFAR10.testdata()

(FixedPointNumbers.Normed{UInt8,8}[0.62 0.596 … 0.239 0.212; 0.624 0.592 … 0.192 0.22; … ; 0.494 0.49 … 0.114 0.133; 0.455 0.467 … 0.078 0.082]

FixedPointNumbers.Normed{UInt8,8}[0.439 0.439 … 0.455 0.42; 0.435 0.431 … 0.4 0.412; … ; 0.357 0.357 … 0.322 0.329; 0.333 0.345 … 0.251 0.263]

FixedPointNumbers.Normed{UInt8,8}[0.192 0.2 … 0.659 0.627; 0.184 0.157 … 0.58 0.584; … ; 0.141 0.125 … 0.494 0.506; 0.129 0.133 … 0.42 0.431]

FixedPointNumbers.Normed{UInt8,8}[0.922 0.933 … 0.322 0.333; 0.906 0.922 … 0.18 0.243; … ; 0.914 0.925 … 0.725 0.706; 0.91 0.922 … 0.733 0.729]

FixedPointNumbers.Normed{UInt8,8}[0.922 0.933 … 0.376 0.396; 0.906 0.922 … 0.224 0.294; … ; 0.914 0.925 … 0.784 0.765; 0.91 0.922 … 0.792 0.784]

FixedPointNumbers.Normed{UInt8,8}[0.922 0.933 … 0.322 0.325; 0.906 0.922 … 0.141 0.188; … ; 0.914 0.925 … 0.769 0.749; 0.91 0.922 … 0.784 0.78]

FixedPointNumbers.Normed{UInt8,8}[0.62 0.667 … 0.09 0.11; 0.62 0.675 … 0.106 0.118; … ; 0.929 0.965 … 0.016 0.016; 0.933 0.965 … 0.0

In [3]:
train_x = Float64.(train_x / 255)
train_y = onehotbatch(train_y, 0:9)
test_x = Float64.(test_x / 255)
test_y = onehotbatch(test_y, 0:9)

10×10000 Flux.OneHotMatrix{Array{Flux.OneHotVector,1}}:
 false  false  false   true  false  …  false  false  false  false  false
 false  false  false  false  false     false  false  false   true  false
 false  false  false  false  false     false  false  false  false  false
  true  false  false  false  false     false   true  false  false  false
 false  false  false  false  false     false  false  false  false  false
 false  false  false  false  false  …  false  false   true  false  false
 false  false  false  false   true     false  false  false  false  false
 false  false  false  false  false     false  false  false  false   true
 false   true   true  false  false      true  false  false  false  false
 false  false  false  false  false     false  false  false  false  false

## Preprocessing

In [4]:
# Partition into batches of size 1,000
train = [(train_x[:, :, :, i:(i+999)], train_y[:, i:(i+999)]) for i = 1:1000:50_000];
# train = gpu.(train)

In [5]:
# Prepare test set (first 1,000 images)
test_x = test_x[:, :, :, 1:1000]# |> gpu
test_y = test_y[:, 1:1000]# |> gpu

10×1000 Flux.OneHotMatrix{Array{Flux.OneHotVector,1}}:
 false  false  false   true  false  …  false  false  false  false  false
 false  false  false  false  false     false  false   true  false  false
 false  false  false  false  false     false  false  false  false  false
  true  false  false  false  false      true  false  false   true  false
 false  false  false  false  false     false  false  false  false  false
 false  false  false  false  false  …  false  false  false  false  false
 false  false  false  false   true     false  false  false  false  false
 false  false  false  false  false     false  false  false  false  false
 false   true   true  false  false     false   true  false  false   true
 false  false  false  false  false     false  false  false  false  false

## Model

In [6]:
m = Chain(
    Conv((3, 3), 3=>32, relu),  # (32, 32, 3) -> (30, 30, 32)
    Conv((3, 3), 32=>64, relu),  # (30, 30, 32) -> (28, 28, 64)
    x -> maxpool(x, (2, 2); stride=2),  # (28, 28, 64) -> (14, 14, 64)
    BatchNorm(64),
#     Dropout(0.25),
    Conv((3, 3), 64=>128, relu),  # (14, 14, 64) -> (12, 12, 128)
    x -> maxpool(x, (2, 2); stride=2),  # (12, 12, 128) -> (6, 6, 128)
    Conv((2, 2), 128=>128, relu),  # (6, 6, 128) -> (5, 5, 128)
    x -> maxpool(x, (2, 2); stride=2),  # (5, 5, 128) -> (2, 2, 128)
    BatchNorm(128),
#     Dropout(0.25),
    x -> reshape(x, :, size(x, 4)),  # (2, 2, 128) -> 512
    Dense(512, 1500, relu),
    BatchNorm(1500),
#     Dropout(0.5),
    Dense(1500, 10), softmax)# |> gpu

Chain(Conv((3, 3), 3=>32, NNlib.relu), Conv((3, 3), 32=>64, NNlib.relu), getfield(Main, Symbol("##5#9"))(), BatchNorm(64), Conv((3, 3), 64=>128, NNlib.relu), getfield(Main, Symbol("##6#10"))(), Conv((2, 2), 128=>128, NNlib.relu), getfield(Main, Symbol("##7#11"))(), BatchNorm(128), getfield(Main, Symbol("##8#12"))(), Dense(512, 1500, NNlib.relu), BatchNorm(1500), Dense(1500, 10), NNlib.softmax)

In [7]:
m(train[1][1])  # test if model works

Tracked 10×1000 Array{Float64,2}:
 0.0205276  0.10518     0.0423531    …  0.0541638   0.0106      0.139392  
 0.239923   0.160242    0.0361944       0.00292145  0.00869964  0.0201769 
 0.056292   0.17587     0.00675316      0.143196    0.00742403  0.00874986
 0.0310601  0.0601984   0.00134746      0.0368507   0.00255334  0.0595093 
 0.0178015  0.0419944   0.119793        0.0485718   0.0799776   0.149576  
 0.0368068  0.011981    0.772457     …  0.215175    0.0471264   0.449265  
 0.355944   0.00437336  0.00516802      0.105177    0.0924541   0.0455375 
 0.0665038  0.0223977   0.00337104      0.0168675   0.024626    0.00619529
 0.0603635  0.348615    0.000957903     0.338733    0.662072    0.107867  
 0.114778   0.0691486   0.0116045       0.0383439   0.0644668   0.0137314 

## Loss function

In [8]:
loss(x, y) = crossentropy(m(x), y)

loss (generic function with 1 method)

In [9]:
accuracy(x, y) = mean(onecold(m(x)) .== onecold(y))

accuracy (generic function with 1 method)

## Optimizer

In [10]:
evalcb = throttle(() -> @show(accuracy(test_x, test_y)), 10)
opt = ADAM(params(m))

#43 (generic function with 1 method)

## Training

In [11]:
@epochs 5 Flux.train!(loss, train, opt, cb=evalcb)

┌ Info: Epoch 1
└ @ Main /home/pika/.julia/packages/Flux/jbpWo/src/optimise/train.jl:93


accuracy(test_x, test_y) = 0.184
accuracy(test_x, test_y) = 0.25
accuracy(test_x, test_y) = 0.305
accuracy(test_x, test_y) = 0.306
accuracy(test_x, test_y) = 0.345
accuracy(test_x, test_y) = 0.381
accuracy(test_x, test_y) = 0.391
accuracy(test_x, test_y) = 0.406
accuracy(test_x, test_y) = 0.406
accuracy(test_x, test_y) = 0.437
accuracy(test_x, test_y) = 0.427
accuracy(test_x, test_y) = 0.436
accuracy(test_x, test_y) = 0.449
accuracy(test_x, test_y) = 0.465
accuracy(test_x, test_y) = 0.464
accuracy(test_x, test_y) = 0.468
accuracy(test_x, test_y) = 0.49


┌ Info: Epoch 2
└ @ Main /home/pika/.julia/packages/Flux/jbpWo/src/optimise/train.jl:93


accuracy(test_x, test_y) = 0.506
accuracy(test_x, test_y) = 0.498
accuracy(test_x, test_y) = 0.511
accuracy(test_x, test_y) = 0.506
accuracy(test_x, test_y) = 0.491
accuracy(test_x, test_y) = 0.499
accuracy(test_x, test_y) = 0.504
accuracy(test_x, test_y) = 0.525
accuracy(test_x, test_y) = 0.544
accuracy(test_x, test_y) = 0.498
accuracy(test_x, test_y) = 0.516
accuracy(test_x, test_y) = 0.529
accuracy(test_x, test_y) = 0.506
accuracy(test_x, test_y) = 0.546
accuracy(test_x, test_y) = 0.542
accuracy(test_x, test_y) = 0.541
accuracy(test_x, test_y) = 0.542


┌ Info: Epoch 3
└ @ Main /home/pika/.julia/packages/Flux/jbpWo/src/optimise/train.jl:93


accuracy(test_x, test_y) = 0.551
accuracy(test_x, test_y) = 0.569
accuracy(test_x, test_y) = 0.456
accuracy(test_x, test_y) = 0.441
accuracy(test_x, test_y) = 0.421
accuracy(test_x, test_y) = 0.443
accuracy(test_x, test_y) = 0.495
accuracy(test_x, test_y) = 0.505
accuracy(test_x, test_y) = 0.534
accuracy(test_x, test_y) = 0.527
accuracy(test_x, test_y) = 0.516
accuracy(test_x, test_y) = 0.525
accuracy(test_x, test_y) = 0.495
accuracy(test_x, test_y) = 0.512
accuracy(test_x, test_y) = 0.511
accuracy(test_x, test_y) = 0.543
accuracy(test_x, test_y) = 0.55


┌ Info: Epoch 4
└ @ Main /home/pika/.julia/packages/Flux/jbpWo/src/optimise/train.jl:93


accuracy(test_x, test_y) = 0.552
accuracy(test_x, test_y) = 0.547
accuracy(test_x, test_y) = 0.548
accuracy(test_x, test_y) = 0.532
accuracy(test_x, test_y) = 0.557
accuracy(test_x, test_y) = 0.56
accuracy(test_x, test_y) = 0.569
accuracy(test_x, test_y) = 0.551
accuracy(test_x, test_y) = 0.567
accuracy(test_x, test_y) = 0.568
accuracy(test_x, test_y) = 0.569
accuracy(test_x, test_y) = 0.583
accuracy(test_x, test_y) = 0.587
accuracy(test_x, test_y) = 0.572
accuracy(test_x, test_y) = 0.572
accuracy(test_x, test_y) = 0.576
accuracy(test_x, test_y) = 0.587


┌ Info: Epoch 5
└ @ Main /home/pika/.julia/packages/Flux/jbpWo/src/optimise/train.jl:93


accuracy(test_x, test_y) = 0.599
accuracy(test_x, test_y) = 0.578
accuracy(test_x, test_y) = 0.594
accuracy(test_x, test_y) = 0.583
accuracy(test_x, test_y) = 0.592
accuracy(test_x, test_y) = 0.58
accuracy(test_x, test_y) = 0.606
accuracy(test_x, test_y) = 0.6
accuracy(test_x, test_y) = 0.593
accuracy(test_x, test_y) = 0.595
accuracy(test_x, test_y) = 0.606
accuracy(test_x, test_y) = 0.619
accuracy(test_x, test_y) = 0.606
accuracy(test_x, test_y) = 0.609
accuracy(test_x, test_y) = 0.603
accuracy(test_x, test_y) = 0.589
accuracy(test_x, test_y) = 0.6
