# Image recognition in Julia

## Introduction to Flux

### Linear regression

In [1]:
using Flux.Tracker
using Flux
using DelimitedFiles: readdlm

In [2]:
isfile("housing.data") ||
  download("https://raw.githubusercontent.com/MikeInnes/notebooks/master/housing.data", "housing.data")

true

#### Read data

In [3]:
rawdata = readdlm("housing.data")'

14×505 LinearAlgebra.Adjoint{Float64,Array{Float64,2}}:
   0.02731    0.02729    0.03237  …    0.06076    0.10959    0.04741
   0.0        0.0        0.0           0.0        0.0        0.0    
   7.07       7.07       2.18         11.93      11.93      11.93   
   0.0        0.0        0.0           0.0        0.0        0.0    
   0.469      0.469      0.458         0.573      0.573      0.573  
   6.421      7.185      6.998    …    6.976      6.794      6.03   
  78.9       61.1       45.8          91.0       89.3       80.8    
   4.9671     4.9671     6.0622        2.1675     2.3889     2.505  
   2.0        2.0        3.0           1.0        1.0        1.0    
 242.0      242.0      222.0         273.0      273.0      273.0    
  17.8       17.8       18.7      …   21.0       21.0       21.0    
 396.9      392.83     394.63        396.9      393.45     396.9    
   9.14       4.03       2.94          5.64       6.48       7.88   
  21.6       34.7       33.4          23.9     

In [4]:
x = rawdata[1:13,:]
y = rawdata[14:14,:]

1×505 Array{Float64,2}:
 21.6  34.7  33.4  36.2  28.7  22.9  …  16.8  22.4  20.6  23.9  22.0  11.9

In [5]:
using Statistics
x = (x .- mean(x; dims=2)) ./ std(x; dims=2)

13×505 Array{Float64,2}:
 -0.417416  -0.417418  -0.416828  …  -0.41353   -0.407858  -0.415081
 -0.486234  -0.486234  -0.486234     -0.486234  -0.486234  -0.486234
 -0.595732  -0.595732  -1.30899       0.11315    0.11315    0.11315 
 -0.272618  -0.272618  -0.272618     -0.272618  -0.272618  -0.272618
 -0.739098  -0.739098  -0.833934      0.15753    0.15753    0.15753 
  0.194741   1.28121    1.01528   …   0.983996   0.725177  -0.361293
  0.366208  -0.265527  -0.808535      0.795646   0.735312   0.433641
  0.556346   0.556346   1.0759       -0.771891  -0.66685   -0.611768
 -0.868939  -0.868939  -0.754097     -0.983782  -0.983782  -0.983782
 -0.987128  -0.987128  -1.10573      -0.803294  -0.803294  -0.803294
 -0.306024  -0.306024   0.110158  …   1.17373    1.17373    1.17373 
  0.441136   0.396591   0.416291      0.441136   0.403377   0.441136
 -0.494157  -1.20985   -1.36251      -0.984357  -0.866709  -0.670629

#### Model

In [6]:
in_dim = size(x, 1)
out_dim = size(y, 1)

1

In [7]:
predict(x) = W*x .+ b
W = param(randn(out_dim, in_dim) / 10)
b = param(zeros(out_dim))

Tracked 1-element Array{Float64,1}:
 0.0

In [8]:
error(ŷ, y) = sum((ŷ .- y).^2)/size(y, 2)

error (generic function with 1 method)

In [9]:
loss(x, y) = error(predict(x), y)

loss (generic function with 1 method)

In [10]:
function update!(ps, η = .1)
    for w in ps
        w.data .-= w.grad .* η
        w.grad .= 0
    end
end

update! (generic function with 2 methods)

#### Training

In [11]:
for i = 1:50
    back!(loss(x, y))
    update!((W, b))
    @show loss(x, y)
end

loss(x, y) = 367.50881965303574 (tracked)
loss(x, y) = 242.24694489482238 (tracked)
loss(x, y) = 163.25007371236447 (tracked)
loss(x, y) = 112.9158437353103 (tracked)
loss(x, y) = 80.77670679944988 (tracked)
loss(x, y) = 60.22346987439324 (tracked)
loss(x, y) = 47.05711156195376 (tracked)
loss(x, y) = 38.60547826412295 (tracked)
loss(x, y) = 33.16614851671527 (tracked)
loss(x, y) = 29.6534559082126 (tracked)
loss(x, y) = 27.374443508206987 (tracked)
loss(x, y) = 25.886452736668986 (tracked)
loss(x, y) = 24.906493137565008 (tracked)
loss(x, y) = 24.253506105864677 (tracked)
loss(x, y) = 23.811551163722 (tracked)
loss(x, y) = 23.506314694134716 (tracked)
loss(x, y) = 23.2901096654243 (tracked)
loss(x, y) = 23.132291392362127 (tracked)
loss(x, y) = 23.013130374521957 (tracked)
loss(x, y) = 22.919893163142135 (tracked)
loss(x, y) = 22.844334269866426 (tracked)
loss(x, y) = 22.78109025538249 (tracked)
loss(x, y) = 22.72665091545428 (tracked)
loss(x, y) = 22.678699783349295 (tracked)
loss(x,

#### Predict

In [12]:
predict(x[:, 1]) / y[1]

Tracked 1-element Array{Float64,1}:
 1.1564584925731751

## Training image recognition model

In [13]:
using Flux, Flux.Data.MNIST
using Flux: @epochs, onehotbatch, onecold, crossentropy, throttle
using Base.Iterators: repeated, partition
using Statistics: mean
# using CuArrays

#### Load data and preprocessing

In [14]:
batch_size = 128

128

In [15]:
function minibatch(X, Y, idxs)
    X_batch = Array{Float32}(undef, size(X[1])..., 1, length(idxs))
    for i in 1:length(idxs)
        X_batch[:, :, :, i] = Float32.(X[idxs[i]])
    end
    Y_batch = onehotbatch(Y[idxs], 0:9)
    return (X_batch, Y_batch)
end

minibatch (generic function with 1 method)

In [16]:
train_labels = MNIST.labels()
train_imgs = MNIST.images()
mb_idxs = partition(1:length(train_imgs), batch_size)
train = [minibatch(train_imgs, train_labels, i) for i in mb_idxs];

In [17]:
test_imgs = MNIST.images(:test)
test_labels = MNIST.labels(:test)
test = minibatch(test_imgs, test_labels, 1:length(test_imgs));

#### Model

In [18]:
model = Chain(
    Conv((3, 3), 1=>16, pad=(2,2), relu),
    MaxPool((2, 2)),
    Conv((3, 3), 16=>32, pad=(2,2), relu),
    MaxPool((2, 2)),
    Conv((3, 3), 32=>32, pad=(2,2), relu),
    MaxPool((2, 2)),
    x -> reshape(x, :, size(x, 4)),
    Dense(800, 10), softmax)

Chain(Conv((3, 3), 1=>16, NNlib.relu), MaxPool((2, 2), pad = (0, 0, 0, 0), stride = (2, 2)), Conv((3, 3), 16=>32, NNlib.relu), MaxPool((2, 2), pad = (0, 0, 0, 0), stride = (2, 2)), Conv((3, 3), 32=>32, NNlib.relu), MaxPool((2, 2), pad = (0, 0, 0, 0), stride = (2, 2)), getfield(Main, Symbol("##5#6"))(), Dense(800, 10), NNlib.softmax)

In [19]:
model(train[1][1])

Tracked 10×128 Array{Float32,2}:
 0.351562    0.325374    0.0516932   …  0.311723    0.0750973   0.157314  
 0.032302    0.153326    0.0601784      0.037398    0.125817    0.0372543 
 0.00364434  0.0218031   0.0103726      0.00543482  0.0104553   0.00560774
 0.0059915   0.0308268   0.0621679      0.0168124   0.034673    0.0284475 
 0.00081132  0.00244262  0.00275543     0.00151854  0.00898325  0.00862993
 0.00466386  0.0135385   0.00326359  …  0.00250788  0.00252443  0.00518499
 0.219428    0.272147    0.253162       0.368735    0.478974    0.256483  
 0.0640379   0.0719354   0.426452       0.0800504   0.0447688   0.325045  
 0.0157753   0.00562276  0.0211086      0.00795527  0.00723735  0.0217686 
 0.301783    0.102984    0.108846       0.167864    0.211469    0.154265  

#### If you have CUDA...

In [20]:
# train = gpu.(train)
# test = gpu.(test)
# model = gpu(model)

#### Loss function

In [21]:
loss(x, y) = crossentropy(model(x), y)

loss (generic function with 1 method)

In [22]:
accuracy(x, y) = mean(onecold(model(x)) .== onecold(y))

accuracy (generic function with 1 method)

#### Optimizer

In [23]:
evalcb() = @show(accuracy(test[1], test[2]))
opt = ADAM()

ADAM(0.001, (0.9, 0.999), IdDict{Any,Any}())

In [24]:
loss(train[1][1], train[1][2])

3.1158879f0 (tracked)

#### Training

In [None]:
@epochs 10 Flux.train!(loss, params(model), train, opt, cb=throttle(evalcb, 10))

┌ Info: Epoch 1
└ @ Main /home/pika/.julia/packages/Flux/qXNjB/src/optimise/train.jl:105


accuracy(test[1], test[2]) = 0.1338
accuracy(test[1], test[2]) = 0.5392


## Use pre-trained models

In [3]:
using Metalhead
using Metalhead: classify

In [4]:
vgg = VGG19()
# sample = load("Elephant.jpg")

VGG19()

In [30]:
classify(vgg, sample)

UndefVarError: UndefVarError: sample not defined

In [5]:
x = rand(Float32, 224, 224, 3, 1)
vgg(x)

MethodError: MethodError: no method matching maxpool(::Array{Float32,4}, ::Tuple{Int64,Int64})
Closest candidates are:
  maxpool(::AbstractArray{xT,N}, !Matched::PoolDims; kwargs...) where {xT, N} at /home/pika/.julia/packages/TimerOutputs/7zSea/src/TimerOutput.jl:198

In [31]:
vgg.layers

Chain(Conv((3, 3), 3=>64, NNlib.relu), Conv((3, 3), 64=>64, NNlib.relu), getfield(Metalhead, Symbol("##42#48"))(), Conv((3, 3), 64=>128, NNlib.relu), Conv((3, 3), 128=>128, NNlib.relu), getfield(Metalhead, Symbol("##43#49"))(), Conv((3, 3), 128=>256, NNlib.relu), Conv((3, 3), 256=>256, NNlib.relu), Conv((3, 3), 256=>256, NNlib.relu), Conv((3, 3), 256=>256, NNlib.relu), getfield(Metalhead, Symbol("##44#50"))(), Conv((3, 3), 256=>512, NNlib.relu), Conv((3, 3), 512=>512, NNlib.relu), Conv((3, 3), 512=>512, NNlib.relu), Conv((3, 3), 512=>512, NNlib.relu), getfield(Metalhead, Symbol("##45#51"))(), Conv((3, 3), 512=>512, NNlib.relu), Conv((3, 3), 512=>512, NNlib.relu), Conv((3, 3), 512=>512, NNlib.relu), Conv((3, 3), 512=>512, NNlib.relu), getfield(Metalhead, Symbol("##46#52"))(), getfield(Metalhead, Symbol("##47#53"))(), Dense(25088, 4096, NNlib.relu), Dropout{Float32}(0.5f0, false), Dense(4096, 4096, NNlib.relu), Dropout{Float32}(0.5f0, false), Dense(4096, 1000), NNlib.softmax)

### Models available:

* GoogLeNet
* ResNet
* VGG
* SqueezeNet
* DenseNet