# Pre-train model - VGG19

In [1]:
using MLDatasets
using Flux, CuArrays
using Flux: @epochs, onehotbatch, onecold, logitcrossentropy, throttle
using Base.Iterators: repeated, partition
using Statistics: mean

## Load data

Classify CIFAR10 with a convolutional network

In [2]:
train_x, train_y = CIFAR10.traindata()
test_x,  test_y  = CIFAR10.testdata()

(FixedPointNumbers.Normed{UInt8,8}[0.62N0f8 0.596N0f8 … 0.239N0f8 0.212N0f8; 0.624N0f8 0.592N0f8 … 0.192N0f8 0.22N0f8; … ; 0.494N0f8 0.49N0f8 … 0.114N0f8 0.133N0f8; 0.455N0f8 0.467N0f8 … 0.078N0f8 0.082N0f8]

FixedPointNumbers.Normed{UInt8,8}[0.439N0f8 0.439N0f8 … 0.455N0f8 0.42N0f8; 0.435N0f8 0.431N0f8 … 0.4N0f8 0.412N0f8; … ; 0.357N0f8 0.357N0f8 … 0.322N0f8 0.329N0f8; 0.333N0f8 0.345N0f8 … 0.251N0f8 0.263N0f8]

FixedPointNumbers.Normed{UInt8,8}[0.192N0f8 0.2N0f8 … 0.659N0f8 0.627N0f8; 0.184N0f8 0.157N0f8 … 0.58N0f8 0.584N0f8; … ; 0.141N0f8 0.125N0f8 … 0.494N0f8 0.506N0f8; 0.129N0f8 0.133N0f8 … 0.42N0f8 0.431N0f8]

FixedPointNumbers.Normed{UInt8,8}[0.922N0f8 0.933N0f8 … 0.322N0f8 0.333N0f8; 0.906N0f8 0.922N0f8 … 0.18N0f8 0.243N0f8; … ; 0.914N0f8 0.925N0f8 … 0.725N0f8 0.706N0f8; 0.91N0f8 0.922N0f8 … 0.733N0f8 0.729N0f8]

FixedPointNumbers.Normed{UInt8,8}[0.922N0f8 0.933N0f8 … 0.376N0f8 0.396N0f8; 0.906N0f8 0.922N0f8 … 0.224N0f8 0.294N0f8; … ; 0.914N0f8 0.925N0f8 … 0.784N0f8 0.765N0f8; 

In [3]:
train_x = Float64.(train_x / 255)
train_y = onehotbatch(train_y, 0:9)
test_x = Float64.(test_x / 255)
test_y = onehotbatch(test_y, 0:9)

10×10000 Flux.OneHotMatrix{Array{Flux.OneHotVector,1}}:
 0  0  0  1  0  0  0  0  0  0  1  0  0  …  0  0  0  1  0  0  0  0  0  0  0  0
 0  0  0  0  0  0  1  0  0  1  0  0  0     0  0  0  0  0  0  0  0  0  0  1  0
 0  0  0  0  0  0  0  0  0  0  0  0  0     0  1  0  0  0  0  0  0  0  0  0  0
 1  0  0  0  0  0  0  0  1  0  0  0  0     0  0  0  0  1  0  1  0  1  0  0  0
 0  0  0  0  0  0  0  0  0  0  0  0  0     0  0  0  0  0  0  0  0  0  0  0  0
 0  0  0  0  0  0  0  0  0  0  0  0  1  …  0  0  0  0  0  1  0  0  0  1  0  0
 0  0  0  0  1  1  0  1  0  0  0  0  0     0  0  0  0  0  0  0  0  0  0  0  0
 0  0  0  0  0  0  0  0  0  0  0  0  0     0  0  1  0  0  0  0  0  0  0  0  1
 0  1  1  0  0  0  0  0  0  0  0  0  0     1  0  0  0  0  0  0  1  0  0  0  0
 0  0  0  0  0  0  0  0  0  0  0  1  0     0  0  0  0  0  0  0  0  0  0  0  0

## Preprocessing

In [4]:
# Partition into batches of size 1,000
train = [(train_x[:, :, :, i:(i+999)], train_y[:, i:(i+999)]) for i = 1:1000:50_000];
train = gpu.(train);

In [5]:
# Prepare test set (first 1,000 images)
test_x = test_x[:, :, :, 1:1000] |> gpu
test_y = test_y[:, 1:1000] |> gpu;

## Model

In [6]:
vgg19() = Chain(
  Conv((3, 3), 3 => 64, relu, pad=(1, 1), stride=(1, 1)),
  BatchNorm(64),
  Conv((3, 3), 64 => 64, relu, pad=(1, 1), stride=(1, 1)),
  BatchNorm(64),
  MaxPool((2, 2)),
  Conv((3, 3), 64 => 128, relu, pad=(1, 1), stride=(1, 1)),
  BatchNorm(128),
  Conv((3, 3), 128 => 128, relu, pad=(1, 1), stride=(1, 1)),
  BatchNorm(128),
  MaxPool((2, 2)),
  Conv((3, 3), 128 => 256, relu, pad=(1, 1), stride=(1, 1)),
  BatchNorm(256),
  Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)),
  BatchNorm(256),
  Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)),
  BatchNorm(256),
  Conv((3, 3), 256 => 256, relu, pad=(1, 1), stride=(1, 1)),
  MaxPool((2, 2)),
  Conv((3, 3), 256 => 512, relu, pad=(1, 1), stride=(1, 1)),
  BatchNorm(512),
  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
  BatchNorm(512),
  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
  BatchNorm(512),
  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
  MaxPool((2, 2)),
  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
  BatchNorm(512),
  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
  BatchNorm(512),
  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
  BatchNorm(512),
  Conv((3, 3), 512 => 512, relu, pad=(1, 1), stride=(1, 1)),
  MaxPool((2, 2)),
  flatten,
  Dense(512, 4096, relu),
  Dropout(0.5),
  Dense(4096, 4096, relu),
  Dropout(0.5),
  Dense(4096, 10),
  softmax) |> gpu

vgg19 (generic function with 1 method)

In [7]:
m = vgg19()
m(train[1][1])  # test if model works

10×1000 CuArray{Float32,2,Nothing}:
 0.1  0.1  0.1  0.1  0.1  0.1  0.1  0.1  …  0.1  0.1  0.1  0.1  0.1  0.1  0.1
 0.1  0.1  0.1  0.1  0.1  0.1  0.1  0.1     0.1  0.1  0.1  0.1  0.1  0.1  0.1
 0.1  0.1  0.1  0.1  0.1  0.1  0.1  0.1     0.1  0.1  0.1  0.1  0.1  0.1  0.1
 0.1  0.1  0.1  0.1  0.1  0.1  0.1  0.1     0.1  0.1  0.1  0.1  0.1  0.1  0.1
 0.1  0.1  0.1  0.1  0.1  0.1  0.1  0.1     0.1  0.1  0.1  0.1  0.1  0.1  0.1
 0.1  0.1  0.1  0.1  0.1  0.1  0.1  0.1  …  0.1  0.1  0.1  0.1  0.1  0.1  0.1
 0.1  0.1  0.1  0.1  0.1  0.1  0.1  0.1     0.1  0.1  0.1  0.1  0.1  0.1  0.1
 0.1  0.1  0.1  0.1  0.1  0.1  0.1  0.1     0.1  0.1  0.1  0.1  0.1  0.1  0.1
 0.1  0.1  0.1  0.1  0.1  0.1  0.1  0.1     0.1  0.1  0.1  0.1  0.1  0.1  0.1
 0.1  0.1  0.1  0.1  0.1  0.1  0.1  0.1     0.1  0.1  0.1  0.1  0.1  0.1  0.1

## Loss function

In [8]:
loss(x, y) = logitcrossentropy(m(x), y)

loss (generic function with 1 method)

In [9]:
accuracy(x, y) = mean(onecold(m(x)) .== onecold(y))

accuracy (generic function with 1 method)

## Optimizer

In [10]:
evalcb() = @show(accuracy(test_x, test_y))
opt = ADAM()

ADAM(0.001, (0.9, 0.999), IdDict{Any,Any}())

## Training

In [11]:
@epochs 10 Flux.train!(loss, params(m[36:end]), train, opt, cb=throttle(evalcb, 10))

┌ Info: Epoch 1
└ @ Main /home/yuehhua/.julia/packages/Flux/Fj3bt/src/optimise/train.jl:121
└ @ GPUArrays /home/yuehhua/.julia/packages/GPUArrays/JqOUg/src/host/indexing.jl:43


accuracy(test_x, test_y) = 0.089
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103


┌ Info: Epoch 2
└ @ Main /home/yuehhua/.julia/packages/Flux/Fj3bt/src/optimise/train.jl:121


accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103


┌ Info: Epoch 3
└ @ Main /home/yuehhua/.julia/packages/Flux/Fj3bt/src/optimise/train.jl:121


accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103


┌ Info: Epoch 4
└ @ Main /home/yuehhua/.julia/packages/Flux/Fj3bt/src/optimise/train.jl:121


accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103


┌ Info: Epoch 5
└ @ Main /home/yuehhua/.julia/packages/Flux/Fj3bt/src/optimise/train.jl:121


accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103


┌ Info: Epoch 6
└ @ Main /home/yuehhua/.julia/packages/Flux/Fj3bt/src/optimise/train.jl:121


accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103


┌ Info: Epoch 7
└ @ Main /home/yuehhua/.julia/packages/Flux/Fj3bt/src/optimise/train.jl:121


accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103


┌ Info: Epoch 8
└ @ Main /home/yuehhua/.julia/packages/Flux/Fj3bt/src/optimise/train.jl:121


accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103


┌ Info: Epoch 9
└ @ Main /home/yuehhua/.julia/packages/Flux/Fj3bt/src/optimise/train.jl:121


accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103


┌ Info: Epoch 10
└ @ Main /home/yuehhua/.julia/packages/Flux/Fj3bt/src/optimise/train.jl:121


accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
accuracy(test_x, test_y) = 0.103
