# Image recognition in Julia

## Introduction to Flux

### Linear regression

In [1]:
using Flux.Tracker
using Flux
using DelimitedFiles: readdlm

In [2]:
isfile("housing.data") ||
  download("https://raw.githubusercontent.com/MikeInnes/notebooks/master/housing.data", "housing.data")

true

#### Read data

In [3]:
rawdata = readdlm("housing.data")'

14×505 LinearAlgebra.Adjoint{Float64,Array{Float64,2}}:
   0.02731    0.02729    0.03237  …    0.06076    0.10959    0.04741
   0.0        0.0        0.0           0.0        0.0        0.0    
   7.07       7.07       2.18         11.93      11.93      11.93   
   0.0        0.0        0.0           0.0        0.0        0.0    
   0.469      0.469      0.458         0.573      0.573      0.573  
   6.421      7.185      6.998    …    6.976      6.794      6.03   
  78.9       61.1       45.8          91.0       89.3       80.8    
   4.9671     4.9671     6.0622        2.1675     2.3889     2.505  
   2.0        2.0        3.0           1.0        1.0        1.0    
 242.0      242.0      222.0         273.0      273.0      273.0    
  17.8       17.8       18.7      …   21.0       21.0       21.0    
 396.9      392.83     394.63        396.9      393.45     396.9    
   9.14       4.03       2.94          5.64       6.48       7.88   
  21.6       34.7       33.4          23.9     

In [4]:
x = rawdata[1:13,:]
y = rawdata[14:14,:]

1×505 Array{Float64,2}:
 21.6  34.7  33.4  36.2  28.7  22.9  …  16.8  22.4  20.6  23.9  22.0  11.9

In [5]:
using Statistics
x = (x .- mean(x; dims=2)) ./ std(x; dims=2)

13×505 Array{Float64,2}:
 -0.417416  -0.417418  -0.416828  …  -0.41353   -0.407858  -0.415081
 -0.486234  -0.486234  -0.486234     -0.486234  -0.486234  -0.486234
 -0.595732  -0.595732  -1.30899       0.11315    0.11315    0.11315 
 -0.272618  -0.272618  -0.272618     -0.272618  -0.272618  -0.272618
 -0.739098  -0.739098  -0.833934      0.15753    0.15753    0.15753 
  0.194741   1.28121    1.01528   …   0.983996   0.725177  -0.361293
  0.366208  -0.265527  -0.808535      0.795646   0.735312   0.433641
  0.556346   0.556346   1.0759       -0.771891  -0.66685   -0.611768
 -0.868939  -0.868939  -0.754097     -0.983782  -0.983782  -0.983782
 -0.987128  -0.987128  -1.10573      -0.803294  -0.803294  -0.803294
 -0.306024  -0.306024   0.110158  …   1.17373    1.17373    1.17373 
  0.441136   0.396591   0.416291      0.441136   0.403377   0.441136
 -0.494157  -1.20985   -1.36251      -0.984357  -0.866709  -0.670629

#### Model

In [6]:
in_dim = size(x, 1)
out_dim = size(y, 1)

1

In [7]:
predict(x) = W*x .+ b
W = param(randn(out_dim, in_dim) / 10)
b = param(zeros(out_dim))

Tracked 1-element Array{Float64,1}:
 0.0

In [8]:
error(ŷ, y) = sum((ŷ .- y).^2)/size(y, 2)

error (generic function with 1 method)

In [9]:
loss(x, y) = error(predict(x), y)

loss (generic function with 1 method)

In [10]:
function update!(ps, η = .1)
    for w in ps
        w.data .-= w.grad .* η
        w.grad .= 0
    end
end

update! (generic function with 2 methods)

#### Training

In [11]:
for i = 1:50
    back!(loss(x, y))
    update!((W, b))
    @show loss(x, y)
end

loss(x, y) = 368.73713763134253 (tracked)
loss(x, y) = 242.90470526263246 (tracked)
loss(x, y) = 163.6599378806958 (tracked)
loss(x, y) = 113.18538960376684 (tracked)
loss(x, y) = 80.96356350768635 (tracked)
loss(x, y) = 60.3604867678749 (tracked)
loss(x, y) = 47.16321665566758 (tracked)
loss(x, y) = 38.691666012178324 (tracked)
loss(x, y) = 33.23887337721578 (tracked)
loss(x, y) = 29.716564518059297 (tracked)
loss(x, y) = 27.430277827982 (tracked)
loss(x, y) = 25.93648163014644 (tracked)
loss(x, y) = 24.951675053259876 (tracked)
loss(x, y) = 24.29449802536281 (tracked)
loss(x, y) = 23.848829414920033 (tracked)
loss(x, y) = 23.540245090734963 (tracked)
loss(x, y) = 23.320988425156337 (tracked)
loss(x, y) = 23.160369154832527 (tracked)
loss(x, y) = 23.038626416489812 (tracked)
loss(x, y) = 22.943003912571843 (tracked)
loss(x, y) = 22.865238495620318 (tracked)
loss(x, y) = 22.799952313678855 (tracked)
loss(x, y) = 22.743622857356208 (tracked)
loss(x, y) = 22.69392279228906 (tracked)
loss

#### Predict

In [12]:
predict(x[:, 1]) / y[1]

Tracked 1-element Array{Float64,1}:
 1.1574200155419858

## Training image recognition model

In [13]:
using Flux, Flux.Data.MNIST
using Flux: @epochs, onehotbatch, onecold, crossentropy, throttle
using Base.Iterators: repeated, partition
using Statistics: mean
# using CuArrays

#### Load data and preprocessing

In [14]:
batch_size = 128

128

In [15]:
function minibatch(X, Y, idxs)
    X_batch = Array{Float32}(undef, size(X[1])..., 1, length(idxs))
    for i in 1:length(idxs)
        X_batch[:, :, :, i] = Float32.(X[idxs[i]])
    end
    Y_batch = onehotbatch(Y[idxs], 0:9)
    return (X_batch, Y_batch)
end

minibatch (generic function with 1 method)

In [16]:
train_labels = MNIST.labels()
train_imgs = MNIST.images()
mb_idxs = partition(1:length(train_imgs), batch_size)
train = [minibatch(train_imgs, train_labels, i) for i in mb_idxs];

In [17]:
test_imgs = MNIST.images(:test)
test_labels = MNIST.labels(:test)
test = minibatch(test_imgs, test_labels, 1:length(test_imgs));

#### Model

In [18]:
model = Chain(
    Conv((3, 3), 1=>16, pad=(2,2), relu),
    MaxPool((2, 2)),
    Conv((3, 3), 16=>32, pad=(2,2), relu),
    MaxPool((2, 2)),
    Conv((3, 3), 32=>32, pad=(2,2), relu),
    MaxPool((2, 2)),
    x -> reshape(x, :, size(x, 4)),
    Dense(800, 10), softmax)

Chain(Conv((3, 3), 1=>16, NNlib.relu), MaxPool((2, 2), pad = (0, 0, 0, 0), stride = (2, 2)), Conv((3, 3), 16=>32, NNlib.relu), MaxPool((2, 2), pad = (0, 0, 0, 0), stride = (2, 2)), Conv((3, 3), 32=>32, NNlib.relu), MaxPool((2, 2), pad = (0, 0, 0, 0), stride = (2, 2)), getfield(Main, Symbol("##5#6"))(), Dense(800, 10), NNlib.softmax)

In [19]:
model(train[1][1])

Tracked 10×128 Array{Float32,2}:
 0.0368724    0.130336     0.00507068  0.388271    …  0.0302351    0.110079  
 0.022705     0.0455846    0.0374525   0.19131        0.011452     0.0684349 
 0.000584813  0.0034824    0.0148576   0.0014479      0.000692421  0.0034748 
 0.00147608   0.0029834    0.0011707   0.0199318      0.000606448  0.00833279
 0.00228483   0.00555993   0.00304258  0.00227664     0.000566299  0.00408492
 0.000662726  0.000421997  0.00129102  0.00525391  …  0.000558556  0.00321902
 0.00696284   0.0261311    0.0148321   0.0200633      0.00306733   0.00486249
 0.000841722  0.0105874    0.00231068  0.00513339     0.000528892  0.00503813
 0.894514     0.705336     0.859029    0.300797       0.889939     0.587818  
 0.0330951    0.0695779    0.0609432   0.0655144      0.0623538    0.204655  

#### If you have CUDA...

In [20]:
# train = gpu.(train)
# test = gpu.(test)
# model = gpu(model)

#### Loss function

In [21]:
loss(x, y) = crossentropy(model(x), y)

loss (generic function with 1 method)

In [22]:
accuracy(x, y) = mean(onecold(model(x)) .== onecold(y))

accuracy (generic function with 1 method)

#### Optimizer

In [23]:
evalcb() = @show(accuracy(test[1], test[2]))
opt = ADAM()

ADAM(0.001, (0.9, 0.999), IdDict{Any,Any}())

In [24]:
loss(train[1][1], train[1][2])

4.6231046f0 (tracked)

#### Training

In [25]:
@epochs 10 Flux.train!(loss, params(model), train, opt, cb=throttle(evalcb, 10))

┌ Info: Epoch 1
└ @ Main /home/pika/.julia/packages/Flux/qXNjB/src/optimise/train.jl:105


accuracy(test[1], test[2]) = 0.1187
accuracy(test[1], test[2]) = 0.5855
accuracy(test[1], test[2]) = 0.784
accuracy(test[1], test[2]) = 0.8828
accuracy(test[1], test[2]) = 0.9052
accuracy(test[1], test[2]) = 0.9189
accuracy(test[1], test[2]) = 0.9336


InterruptException: InterruptException:

## Use pre-trained models

In [28]:
using Metalhead
using Metalhead: classify

┌ Info: Precompiling Metalhead [dbeba491-748d-5e0e-a39e-b530a07fa0cc]
└ @ Base loading.jl:1186


In [32]:
Metalhead.download(CIFAR10)

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
 99  162M   99  162M    0     0  1743k      0  0:01:35  0:01:35 --:--:-- 2430k

cifar-10-batches-bin/
cifar-10-batches-bin/data_batch_1.bin


100  162M  100  162M    0     0  1743k      0  0:01:35  0:01:35 --:--:-- 2508k


cifar-10-batches-bin/batches.meta.txt
cifar-10-batches-bin/data_batch_3.bin
cifar-10-batches-bin/data_batch_4.bin
cifar-10-batches-bin/test_batch.bin
cifar-10-batches-bin/readme.html
cifar-10-batches-bin/data_batch_5.bin
cifar-10-batches-bin/data_batch_2.bin


Process(`[4mtar[24m [4m-xzvf[24m [4m/home/pika/.julia/packages/Metalhead/fYeSU/src/datasets/../../datasets/cifar-10-binary.tar.gz[24m [4m-C[24m [4m/home/pika/.julia/packages/Metalhead/fYeSU/src/datasets/../../datasets[24m`, ProcessExited(0))

In [29]:
vgg = VGG19()
sample = load("Elephant.jpg")

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   601    0   601    0     0    662      0 --:--:-- --:--:-- --:--:--   661
100  548M  100  548M    0     0  3177k      0  0:02:56  0:02:56 --:--:-- 3645k
┌ Info: Recompiling stale cache file /home/pika/.julia/compiled/v1.1/ImageMagick/0LbNX.ji for ImageMagick [6218d12a-5da1-5696-b52f-db25d2ecc6d1]
└ @ Base loading.jl:1184
Error encountered while loading "Elephant.jpg".
Fatal error:


ErrorException: UnableToOpenBlob `Elephant.jpg': 沒有此一檔案或目錄 @ error/blob.c/OpenBlob/2873

In [30]:
classify(vgg, sample)

UndefVarError: UndefVarError: sample not defined

In [31]:
vgg.layers

Chain(Conv((3, 3), 3=>64, NNlib.relu), Conv((3, 3), 64=>64, NNlib.relu), getfield(Metalhead, Symbol("##42#48"))(), Conv((3, 3), 64=>128, NNlib.relu), Conv((3, 3), 128=>128, NNlib.relu), getfield(Metalhead, Symbol("##43#49"))(), Conv((3, 3), 128=>256, NNlib.relu), Conv((3, 3), 256=>256, NNlib.relu), Conv((3, 3), 256=>256, NNlib.relu), Conv((3, 3), 256=>256, NNlib.relu), getfield(Metalhead, Symbol("##44#50"))(), Conv((3, 3), 256=>512, NNlib.relu), Conv((3, 3), 512=>512, NNlib.relu), Conv((3, 3), 512=>512, NNlib.relu), Conv((3, 3), 512=>512, NNlib.relu), getfield(Metalhead, Symbol("##45#51"))(), Conv((3, 3), 512=>512, NNlib.relu), Conv((3, 3), 512=>512, NNlib.relu), Conv((3, 3), 512=>512, NNlib.relu), Conv((3, 3), 512=>512, NNlib.relu), getfield(Metalhead, Symbol("##46#52"))(), getfield(Metalhead, Symbol("##47#53"))(), Dense(25088, 4096, NNlib.relu), Dropout{Float32}(0.5f0, false), Dense(4096, 4096, NNlib.relu), Dropout{Float32}(0.5f0, false), Dense(4096, 1000), NNlib.softmax)

### Models available:

* GoogLeNet
* ResNet
* VGG
* SqueezeNet
* DenseNet