In [13]:
# Opracowane na podstawie https://minpy.readthedocs.io/en/latest/tutorial/rnn_mnist.html
using MLDatasets, Flux

using BenchmarkTools
train_data = MLDatasets.MNIST(split=:train)
test_data  = MLDatasets.MNIST(split=:test)

function loader(data; batchsize::Int=1)
    x1dim = reshape(data.features, 28 * 28, :) # reshape 28×28 pixels into a vector of pixels
    yhot  = Flux.onehotbatch(data.targets, 0:9) # make a 10×60000 OneHotMatrix
    Flux.DataLoader((x1dim, yhot); batchsize, shuffle=true)
end

net = Chain(
    RNN((14 * 14) => 64, tanh),
    Dense(64 => 10, identity),
)

Chain(
  Recur(
    RNNCell(196 => 64, tanh),           [90m# 16_768 parameters[39m
  ),
  Dense(64 => 10),                      [90m# 650 parameters[39m
) [90m        # Total: 6 trainable arrays, [39m17_418 parameters,
[90m          # plus 1 non-trainable, 64 parameters, summarysize [39m68.406 KiB.

In [14]:
using Statistics: mean  # standard library
function loss_and_accuracy(model, data)
    (x,y) = only(loader(data; batchsize=length(data)))
    Flux.reset!(model)
    ŷ = model(x[  1:196,:])
    ŷ = model(x[197:392,:])
    ŷ = model(x[393:588,:])
    ŷ = model(x[589:end,:])
    loss = Flux.logitcrossentropy(ŷ, y)  # did not include softmax in the model
    acc = round(100 * mean(Flux.onecold(ŷ) .== Flux.onecold(y)); digits=2)
    (; loss, acc, split=data.split)  # return a NamedTuple
end

@show loss_and_accuracy(net, test_data);  # accuracy about 10%, before training

train_log = []
settings = (;
    eta = 15e-3,
    epochs = 5,
    batchsize = 100,
)

opt_state = Flux.setup(Descent(settings.eta), net);

loss_and_accuracy(net, test_data) = (loss = 2.539091f0, acc = 13.39, split = :test)


In [15]:
using ProgressMeter
for epoch in 1:settings.epochs
    @time for (x,y) in loader(train_data, batchsize=settings.batchsize)
        Flux.reset!(net)
        grads = Flux.gradient(model -> let
                ŷ = model(x[  1:196,:])
                ŷ = model(x[197:392,:])
                ŷ = model(x[393:588,:])
                ŷ = model(x[589:end,:])
                Flux.logitcrossentropy(ŷ, y)
            end, net)
        Flux.update!(opt_state, net, grads[1])
    end
    
    loss, acc, _ = loss_and_accuracy(net, train_data)
    test_loss, test_acc, _ = loss_and_accuracy(net, test_data)
    @info epoch acc test_acc
    nt = (; epoch, loss, acc, test_loss, test_acc) 
    push!(train_log, nt)
end

  3.404935 seconds (833.99 k allocations: 2.649 GiB, 6.95% gc time, 19.48% compilation time)


┌ Info: 1
│   acc = 89.68
│   test_acc = 90.17
└ @ Main c:\Users\wolek\source\repos\AutomaticDiff\AWID-2024-RNN.ipynb:17


  2.755165 seconds (541.33 k allocations: 2.633 GiB, 13.96% gc time)


┌ Info: 2
│   acc = 91.95
│   test_acc = 92.2
└ @ Main c:\Users\wolek\source\repos\AutomaticDiff\AWID-2024-RNN.ipynb:17


  2.472870 seconds (541.33 k allocations: 2.633 GiB, 6.58% gc time)


┌ Info: 3
│   acc = 93.04
│   test_acc = 93.35
└ @ Main c:\Users\wolek\source\repos\AutomaticDiff\AWID-2024-RNN.ipynb:17


  2.380648 seconds (541.33 k allocations: 2.633 GiB, 7.06% gc time)


┌ Info: 4
│   acc = 93.8
│   test_acc = 93.94
└ @ Main c:\Users\wolek\source\repos\AutomaticDiff\AWID-2024-RNN.ipynb:17


  2.517187 seconds (541.33 k allocations: 2.633 GiB, 7.03% gc time)


┌ Info: 5
│   acc = 94.57
│   test_acc = 94.41
└ @ Main c:\Users\wolek\source\repos\AutomaticDiff\AWID-2024-RNN.ipynb:17


  2.396418 seconds (541.33 k allocations: 2.633 GiB, 6.78% gc time)


┌ Info: 1
│   acc = 94.89
│   test_acc = 94.88
└ @ Main c:\Users\wolek\source\repos\AutomaticDiff\AWID-2024-RNN.ipynb:17


  2.759504 seconds (541.33 k allocations: 2.633 GiB, 18.42% gc time)


┌ Info: 2
│   acc = 95.27
│   test_acc = 95.09
└ @ Main c:\Users\wolek\source\repos\AutomaticDiff\AWID-2024-RNN.ipynb:17


  2.452662 seconds (541.33 k allocations: 2.633 GiB, 7.03% gc time)


┌ Info: 3
│   acc = 95.64
│   test_acc = 95.36
└ @ Main c:\Users\wolek\source\repos\AutomaticDiff\AWID-2024-RNN.ipynb:17


  2.683570 seconds (541.33 k allocations: 2.633 GiB, 6.68% gc time)


┌ Info: 4
│   acc = 95.9
│   test_acc = 95.49
└ @ Main c:\Users\wolek\source\repos\AutomaticDiff\AWID-2024-RNN.ipynb:17


  3.003656 seconds (541.33 k allocations: 2.633 GiB, 18.28% gc time)


┌ Info: 5
│   acc = 96.1
│   test_acc = 95.61
└ @ Main c:\Users\wolek\source\repos\AutomaticDiff\AWID-2024-RNN.ipynb:17


  2.600270 seconds (541.33 k allocations: 2.633 GiB, 6.27% gc time)


┌ Info: 1
│   acc = 96.28
│   test_acc = 95.84
└ @ Main c:\Users\wolek\source\repos\AutomaticDiff\AWID-2024-RNN.ipynb:17


  2.881438 seconds (541.33 k allocations: 2.633 GiB, 18.08% gc time)


┌ Info: 2
│   acc = 96.51
│   test_acc = 95.98
└ @ Main c:\Users\wolek\source\repos\AutomaticDiff\AWID-2024-RNN.ipynb:17


  2.766757 seconds (541.33 k allocations: 2.633 GiB, 6.22% gc time)


┌ Info: 3
│   acc = 96.65
│   test_acc = 96.07
└ @ Main c:\Users\wolek\source\repos\AutomaticDiff\AWID-2024-RNN.ipynb:17


  2.573661 seconds (541.33 k allocations: 2.633 GiB, 6.58% gc time)


┌ Info: 4
│   acc = 96.84
│   test_acc = 96.3
└ @ Main c:\Users\wolek\source\repos\AutomaticDiff\AWID-2024-RNN.ipynb:17


  2.991549 seconds (541.33 k allocations: 2.633 GiB, 17.44% gc time)


┌ Info: 5
│   acc = 96.94
│   test_acc = 96.26
└ @ Main c:\Users\wolek\source\repos\AutomaticDiff\AWID-2024-RNN.ipynb:17


BenchmarkTools.Trial: 1 sample with 1 evaluation.
 Single result which took [34m16.022 s[39m (10.69% GC) to evaluate,
 with a memory estimate of [33m16.29 GiB[39m, over [33m2709494[39m allocations.

In [16]:
Flux.reset!(net)
x1, y1 = first(loader(train_data)); # (28×28×1×1 Array{Float32, 3}, 10×1 OneHotMatrix(::Vector{UInt32}))
y1hat = net(x1[  1:196,:])
y1hat = net(x1[197:392,:])
y1hat = net(x1[393:588,:])
y1hat = net(x1[589:end,:])
@show hcat(Flux.onecold(y1hat, 0:9), Flux.onecold(y1, 0:9))

@show loss_and_accuracy(net, train_data);

hcat(Flux.onecold(y1hat, 0:9), Flux.onecold(y1, 0:9)) = [3 3]
loss_and_accuracy(net, train_data) = (loss = 0.10827223f0, acc = 96.94, split = :train)
