In [2]:
ENV["COLUMNS"]=72
using Knet: conv4, pool, mat, KnetArray, nll, zeroone, progress, progress!, sgd, adam, rmsprop, adagrad, param, param0, dropout, relu, minibatch, Data, sigm, tanh
using Statistics: mean
using Random
using Test
using MLDatasets: MNIST
using IterTools: ncycle, takenth
import .Iterators: cycle, Cycle, take
import CUDA


In [3]:
a_type = (CUDA.functional() ? KnetArray{Float32} : Array{Float32})

Array{Float32,N} where N

In [4]:
# Dense layer definition
struct Dense
    w
    b
    f
    p
end

(d::Dense)(x) = d.f.(d.w * mat(dropout(x, d.p)) .+ d.b) # Callable object that feed-forwards one minibatch through a layer
# Constructor definition for Dense layer
Dense(i::Int,o::Int,f=relu; a_type = Array, pdrop=0) = Dense(param(o, i; atype = a_type), param0(o; atype = a_type), f, pdrop)

struct GenericMLP
    layers
    optimizer_type
    lr
    function GenericMLP(i = 784, o = 10; hidden = [], f = relu, pdrop = 0.0, optimizer_type = sgd, lr = 0.1, a_type = Array)
    
        architecture = vcat(i, hidden, o)
        layers = []

        for k in 1:size(architecture, 1) - 1

            push!(layers, Dense(architecture[k], architecture[k + 1], f, a_type = a_type, pdrop = pdrop))
        end

        new(Tuple(layers), optimizer_type, lr)
        
    end
    
end



function (gmlp::GenericMLP)(x)
   
    for l in gmlp.layers
    
        x = l(x)
    
    end
    
    return x
    
end

function (gmlp::GenericMLP)(x, y)
   
    return nll(gmlp(x), y)
    
end

function (gmlp::GenericMLP)(data::Data)
    
    return mean(gmlp(x, y) for (x, y) in data)
    
end



In [38]:
# Convolutional layer definition
struct Conv
    w 
    b
    f 
    p 
end

(c::Conv)(x) = c.f.(pool(conv4(c.w, dropout(x, c.p)) .+ c.b)) # Callable object that feed-forwards one minibatch
# Constructor definition for Convolutional layer
Conv(w1::Int,w2::Int,cx::Int,cy::Int,f=relu; a_type = Array, pdrop=0) = Conv(param(w1, w2, cx, cy; atype = a_type), param0(1, 1, cy, 1; atype = a_type), f, pdrop)

struct GeneriCONV
    layers
    optimizer_type
    lr
    function GeneriCONV(i_dim, o_dim, kernels; hidden = [], f = relu, pdrop = 0.0, optimizer_type = sgd, lr = 0.1, a_type = Array)
        
        layers = []
        x, _, c, _ = i_dim 
        
        for kernel in kernels
        
            spatial = kernel[1]
            output_dim = kernel[2]
            push!(layers, Conv(spatial, spatial, c, output_dim, f; a_type = a_type, pdrop = pdrop))
            
            x = floor((x - spatial + 1) / 2)
            c = output_dim
        
        end
        
        i_dense = x * x * c
        o_dense = o_dim

        gmlp = GenericMLP(convert(Int64, i_dense), o_dense; hidden = hidden, f = f, a_type = a_type, pdrop = pdrop)

        push!(layers, gmlp.layers...)

        new(Tuple(layers), optimizer_type, lr)
        
    
    end
    
    
end


function (gconv::GeneriCONV)(x)
   
    for l in gconv.layers
    
        x = l(x)
    
    end
    
    return x
    
end

function (gconv::GeneriCONV)(x, y)
   
    return nll(gconv(x), y)
    
end

function (gconv::GeneriCONV)(data::Data)
    
    return mean(gconv(x, y) for (x, y) in data)
    
end

In [45]:
function train_summarize(model; dtrn, epoch = 100)
    
    result = (model(dtrn) for x in takenth(progress(model.optimizer_type(model,ncycle(dtrn,epoch), lr = model.lr)),length(dtrn)));
    return collect(result)
end

train_summarize (generic function with 3 methods)

In [31]:
xtrn,ytrn = MNIST.traindata(Float32); ytrn[ytrn.==0] .= 10
xtst,ytst = MNIST.testdata(Float32);  ytst[ytst.==0] .= 10
dtrn = minibatch(xtrn, ytrn, 100; xsize = (784,:), xtype = a_type)
dtst = minibatch(xtst, ytst, 100; xsize = (784,:), xtype = a_type);
@show dtrn

dtrn = 600-element Data{Tuple{Array{Float32,N} where N,Array{Int64,N} where N}}


600-element Data{Tuple{Array{Float32,N} where N,Array{Int64,N} where N}}

In [7]:
gmlp1 = GenericMLP(784, 10; hidden = [], f = identity, a_type = a_type, pdrop = 0, optimizer_type = sgd, lr = 0.1)
@show gmlp1(dtrn)
loss1 = train(gmlp1, dtrn,1);
@show gmlp1(dtrn)

gmlp1(dtrn) = 2.3984659f0


┣████████████████████┫ [100.00%, 600/600, 00:07/00:07, 91.52i/s] 


gmlp1(dtrn) = 0.3847315f0


0.3847315f0

In [8]:
gmlp2 = GenericMLP(784, 10 ; hidden = [30], f = tanh, a_type = a_type, pdrop = 0, optimizer_type = adam, lr = 0.1)
@show gmlp2(dtrn)
loss2 = train(gmlp2, dtrn, 1);
@show gmlp2(dtrn)

gmlp2(dtrn) = 2.4540172f0


┣████████████████████┫ [100.00%, 600/600, 00:08/00:08, 72.76i/s] 


gmlp2(dtrn) = 1.0894085f0


1.0894085f0

In [9]:
gmlp3 = GenericMLP(784, 10 ; hidden = [60], f = sigm, a_type = a_type, pdrop = 0.5, optimizer_type = rmsprop, lr = 0.01)
@show gmlp3(dtrn)
loss3 = train(gmlp3, dtrn, 1);
@show gmlp3(dtrn)

gmlp3(dtrn) = 2.3117151f0


┣████████████████████┫ [100.00%, 600/600, 00:06/00:06, 92.66i/s] 


gmlp3(dtrn) = 1.5668726f0


1.5668726f0

In [10]:
gmlp4 = GenericMLP(784, 10 ; hidden = [30, 30], f = relu, a_type = a_type, pdrop = 0, optimizer_type = sgd, lr = 0.1)
@show gmlp4(dtrn)
loss4 = train(gmlp4, dtrn, 1);
@show gmlp4(dtrn)

gmlp4(dtrn) = 2.279828f0

┣                    ┫ [0.17%, 1/600, 00:01/05:50, 1.71i/s] 




┣████████████████████┫ [100.00%, 600/600, 00:03/00:03, 214.35i/s] 


gmlp4(dtrn) = 0.2987718f0


0.2987718f0

In [7]:
xtrn,ytrn = MNIST.traindata(Float32); ytrn[ytrn.==0] .= 10
xtst,ytst = MNIST.testdata(Float32);  ytst[ytst.==0] .= 10
dtrn = minibatch(xtrn, ytrn, 100; xsize = (28,28,1,:), xtype = a_type)
dtst = minibatch(xtst, ytst, 100; xsize = (28,28,1,:), xtype = a_type);

In [12]:
gconv = GeneriCONV((28,28,1,100), 10, [(5,3)], [], f = relu, a_type = a_type, pdrop = 0.0, optimizer_type = sgd, lr = 0.1)
@show gconv(dtrn)
conv_loss = train(gconv, dtrn, 1);
@show gconv(dtrn)

gconv(dtrn) = 2.304462f0


┣████████████████████┫ [100.00%, 600/600, 00:36/00:36, 16.61i/s]                  ┫ [14.83%, 89/600, 00:17/01:52, 43.11i/s] 


gconv(dtrn) = 0.91396695f0


0.91396695f0

In [13]:
gconv = GeneriCONV((28,28,1,100), 10, [(5,3)], [], f = relu, a_type = a_type, pdrop = 0.0, optimizer_type = adam, lr = 0.1)
@show gconv(dtrn)
conv_loss = train(gconv, dtrn, 1);
@show gconv(dtrn)

gconv(dtrn) = 2.3110049f0

┣                    ┫ [0.17%, 1/600, 00:01/08:08, 1.23i/s] 




┣████████████████████┫ [100.00%, 600/600, 00:19/00:19, 31.61i/s] 15.83%, 95/600, 00:03/00:18, 45.34i/s] 


gconv(dtrn) = 1.096976f0


1.096976f0

In [14]:
gconv = GeneriCONV((28,28,1,100), 10, [(5,3)], [], f = relu, a_type = a_type, pdrop = 0.0, optimizer_type = sgd, lr = 0.2)
@show gconv(dtrn)
conv_loss = train(gconv, dtrn, 1);
@show gconv(dtrn)

gconv(dtrn) = 2.3966498f0


┣████████████████████┫ [100.00%, 600/600, 00:20/00:20, 29.70i/s] 


gconv(dtrn) = 1.3287678f0


1.3287678f0

In [15]:
gconv = GeneriCONV((28,28,1,100), 10, [(5,3)], [], f = relu, a_type = a_type, pdrop = 0.0, optimizer_type = rmsprop, lr = 0.05)
@show gconv(dtrn)
conv_loss = train(gconv, dtrn, 1);
@show gconv(dtrn)

gconv(dtrn) = 2.330751f0


┣████████████████████┫ [100.00%, 600/600, 00:21/00:21, 29.08i/s] ██████████████▏    ┫ [75.83%, 455/600, 00:11/00:14, 42.56i/s] [81.50%, 489/600, 00:12/00:14, 32.56i/s] 


gconv(dtrn) = 0.66135305f0


0.66135305f0

In [16]:
gconv = GeneriCONV((28,28,1,100), 10, [(5,3)], [], f = sigm, a_type = a_type, pdrop = 0.0, optimizer_type = sgd, lr = 0.1)
@show gconv(dtrn)
conv_loss = train(gconv, dtrn, 1);
@show gconv(dtrn)

gconv(dtrn) = 2.3138297f0

┣                    ┫ [0.17%, 1/600, 00:01/09:25, 1.06i/s] 




┣████████████████████┫ [100.00%, 600/600, 00:24/00:24, 25.16i/s] 


gconv(dtrn) = 1.8719897f0


1.8719897f0

In [32]:
gconv = GeneriCONV((28,28,1,100), 10, [(5,3), (3,20)], [30], f = tanh, a_type = a_type, pdrop = 0.0, optimizer_type = sgd, lr = 0.1)
@show gconv(dtrn)
conv_loss = train_summarize(gconv, dtrn, 1);
@show gconv(dtrn)

gconv(dtrn) = 2.3186362f0


┣████████████████████┫ [100.00%, 600/600, 00:35/00:35, 17.36i/s] ▏                  ┫ [6.17%, 37/600, 00:13/03:33, 35.79i/s] ▌                 ┫ [12.83%, 77/600, 00:14/01:50, 39.14i/s] 


gconv(dtrn) = 0.9711142f0


0.9711142f0

In [18]:
gconv = GeneriCONV((28,28,1,100), 10, [(5,6)], [20], f = relu, a_type = a_type, pdrop = 0.0, optimizer_type = sgd, lr = 0.1)
@show gconv(dtrn)
conv_loss = train(gconv, dtrn, 1);
@show gconv(dtrn)

gconv(dtrn) = 2.308941f0


┣████████████████████┫ [100.00%, 600/600, 00:27/00:27, 22.55i/s] 


gconv(dtrn) = 0.42979252f0


0.42979252f0

In [29]:
gconv = GeneriCONV((28,28,1,100), 10, [(5,3), (2, 10)], [10]; f = sigm, a_type = a_type, pdrop = 0.0, optimizer_type = sgd, lr = 0.1)
@show gconv(dtrn)
conv_loss = train(gconv, dtrn, 1);
@show gconv(dtrn)

gconv(dtrn) = 2.3086264f0


LoadError: UndefVarError: train not defined

In [17]:
ACTIVATION_FUNCTIONS = [relu, sigm, tanh]
OPTIMIZER_TYPE = [sgd, adam, rmsprop, adagrad]
LEARNING_RATE = [0.05, 0.1, 0.15]
HIDDEN_LAYER = [[], [30], [60], [60, 30], [90, 60, 30]]
CONV_LAYER = [[(5, 3)], [(5, 20), (5, 50)], [(5, 10), (4, 10), (3, 10)]]

3-element Array{Array{Tuple{Int64,Int64},1},1}:
 [(5, 3)]
 [(5, 20), (5, 50)]
 [(5, 10), (4, 10), (3, 10)]

In [18]:
xtrn,ytrn = MNIST.traindata(Float32); ytrn[ytrn.==0] .= 10
xtst,ytst = MNIST.testdata(Float32);  ytst[ytst.==0] .= 10
dtrn = minibatch(xtrn, ytrn, 100; xsize = (28,28,1,:), xtype = a_type)
dtst = minibatch(xtst, ytst, 100; xsize = (28,28,1,:), xtype = a_type);

In [39]:
models = []
for f in shuffle(ACTIVATION_FUNCTIONS)
    for optimizer_type in shuffle(OPTIMIZER_TYPE)
        for lr in shuffle(LEARNING_RATE)
            for hidden in shuffle(HIDDEN_LAYER)
                for kernels in shuffle(CONV_LAYER)
                    
                    model = GeneriCONV((28, 28, 1, 100), 10, kernels; hidden = hidden,
                       f = f, a_type = a_type, pdrop = 0.0, 
                        optimizer_type = optimizer_type, lr = lr)
                    push!(models, model)
                    
                end
            end
        end
    end
end
    

In [40]:
models

540-element Array{Any,1}:
 GeneriCONV((Conv(P(Array{Float32,4}(5,5,1,10)), P(Array{Float32,4}(1,1,10,1)), Knet.Ops20.relu, 0.0), Conv(P(Array{Float32,4}(4,4,10,10)), P(Array{Float32,4}(1,1,10,1)), Knet.Ops20.relu, 0.0), Conv(P(Array{Float32,4}(3,3,10,10)), P(Array{Float32,4}(1,1,10,1)), Knet.Ops20.relu, 0.0), Dense(P(Array{Float32,2}(60,10)), P(Array{Float32,1}(60)), Knet.Ops20.relu, 0.0), Dense(P(Array{Float32,2}(30,60)), P(Array{Float32,1}(30)), Knet.Ops20.relu, 0.0), Dense(P(Array{Float32,2}(10,30)), P(Array{Float32,1}(10)), Knet.Ops20.relu, 0.0)), Knet.Train20.adagrad, 0.15)
 GeneriCONV((Conv(P(Array{Float32,4}(5,5,1,3)), P(Array{Float32,4}(1,1,3,1)), Knet.Ops20.relu, 0.0), Dense(P(Array{Float32,2}(60,432)), P(Array{Float32,1}(60)), Knet.Ops20.relu, 0.0), Dense(P(Array{Float32,2}(30,60)), P(Array{Float32,1}(30)), Knet.Ops20.relu, 0.0), Dense(P(Array{Float32,2}(10,30)), P(Array{Float32,1}(10)), Knet.Ops20.relu, 0.0)), Knet.Train20.adagrad, 0.15)
 GeneriCONV((Conv(P(Array{Float32,4}(

In [None]:
train_summarize.(models; dtrn = dtrn, epoch = 1)

┣████████████████████┫ [100.00%, 600/600, 00:33/00:33, 18.32i/s] 
┣████████████████████┫ [100.00%, 600/600, 00:09/00:09, 64.53i/s] 
┣████████████████████┫ [100.00%, 600/600, 01:32/01:32, 6.51i/s]                  ┫ [3.50%, 21/600, 00:05/02:18, 1.82i/s] ▌                ┫ [17.83%, 107/600, 00:14/01:20, 9.50i/s] ▏               ┫ [21.17%, 127/600, 00:16/01:18, 9.18i/s] [27.83%, 167/600, 00:21/01:15, 9.28i/s] ▉            ┫ [39.50%, 237/600, 00:28/01:11, 9.39i/s] 
┣████████████████████┫ [100.00%, 600/600, 00:28/00:28, 21.39i/s] 
┣████████████████████┫ [100.00%, 600/600, 01:33/01:33, 6.47i/s] 3%, 11/600, 00:01/01:05, 9.27i/s] ▋                   ┫ [3.33%, 20/600, 00:02/01:06, 8.96i/s] [54.00%, 324/600, 00:36/01:08, 9.69i/s] ▍        ┫ [57.33%, 344/600, 00:39/01:07, 9.32i/s] ┫ [64.00%, 384/600, 00:43/01:07, 9.35i/s] ▏    ┫ [75.67%, 454/600, 00:50/01:06, 9.36i/s] ┫ [84.00%, 504/600, 00:56/01:06, 9.40i/s] ▍  ┫ [87.33%, 524/600, 00:58/01:06, 9.49i/s] ▊  ┫ [89.00%, 534/600, 00:59/01:06, 9.31i/s