# Testing allocations

In [28]:
using StaticArrays
using BenchmarkTools

In [6]:
abstract type AbstractLayer end

struct FC{M,V} <: AbstractLayer
    W::M
    b::V
    tmp::V
    FC(W::M, b::V) where {M<:AbstractMatrix, V<:AbstractVector} = 
        new{M,V}(W, b, zero(b))
end

In [7]:
relu(x) = max(x,0)

relu (generic function with 1 method)

In [9]:
function compute_simple(fc::FC, x)
    W,b = fc.W, fc.b
    relu.(W*x + b)
end

compute_simple (generic function with 1 method)

## Use StaticArrays

In [107]:
sx,sy = 10, 15

Wtest   = rand(sx,sy)
WtestSV = SMatrix{sx,sy}(Wtest)
WtestMV = MMatrix{sx,sy}(Wtest)
WtestSS = Size(sx,sy)(Wtest)

btest   = rand(sx)
btestSV = SVector{sx}(btest)
btestMV = MVector{sx}(btest)
btestSS = Size(sx)(btest)

fctest   = FC(Wtest, btest)
fctestSV = FC(WtestSV, btestSV)
fctestMV = FC(WtestMV, btestMV)
fctestSS = FC(WtestSS, btestSS)

inputtest = rand(sy)
inputtestSV = SVector{sy}(inputtest)
inputtestMV = MVector{sy}(inputtest)
inputtestSS = Size(sy)(inputtest);

In [108]:
@btime compute_simple($fctest, $inputtest)

  230.249 ns (3 allocations: 480 bytes)


10-element Array{Float64,1}:
 3.713271347463431
 3.2383310627896753
 3.987669369832495
 3.4561800822731468
 4.5100428786778135
 4.661435217170334
 4.244956701462266
 3.304202366928738
 4.00475573524668
 2.9892531083648177

In [109]:
@btime compute_simple($fctestSV, $inputtestSV)

  0.025 ns (0 allocations: 0 bytes)


10-element SArray{Tuple{10},Float64,1,10} with indices SOneTo(10):
 3.713271347463431
 3.238331062789676
 3.987669369832495
 3.4561800822731468
 4.5100428786778135
 4.661435217170334
 4.244956701462267
 3.3042023669287377
 4.00475573524668
 2.9892531083648177

In [110]:
@btime compute_simple($fctestMV, $inputtestMV)

  39.477 ns (1 allocation: 96 bytes)


10-element MArray{Tuple{10},Float64,1,10} with indices SOneTo(10):
 3.713271347463431
 3.238331062789676
 3.987669369832495
 3.4561800822731468
 4.5100428786778135
 4.661435217170334
 4.244956701462267
 3.3042023669287377
 4.00475573524668
 2.9892531083648177

In [111]:
@btime compute_simple($fctestSS, $inputtestSS)

  133.983 ns (3 allocations: 480 bytes)


10-element SizedArray{Tuple{10},Float64,1,1} with indices SOneTo(10):
 3.713271347463431
 3.238331062789676
 3.987669369832495
 3.4561800822731468
 4.5100428786778135
 4.661435217170334
 4.244956701462267
 3.3042023669287377
 4.00475573524668
 2.9892531083648177

# In place calculations

In [113]:
using LinearAlgebra

In [115]:
function compute_inplace!(y, fc::FC, x)
    W,b = fc.W, fc.b
    tmp = fc.tmp
    mul!(tmp, W, x)
    y = relu.(tmp + b)
end

compute_inplace! (generic function with 1 method)

In [117]:
outputtest = zeros(sx)
#outputtestSV = SVector{sx}(outputtest) # Can't mutatate SVectors
outputtestMV = MVector{sx}(outputtest)
outputtestSS = Size(sx)(outputtest);

In [119]:
@btime compute_inplace!($outputtest, $fctest, $inputtest)

  207.489 ns (2 allocations: 320 bytes)


10-element Array{Float64,1}:
 3.713271347463431
 3.2383310627896753
 3.987669369832495
 3.4561800822731468
 4.5100428786778135
 4.661435217170334
 4.244956701462266
 3.304202366928738
 4.00475573524668
 2.9892531083648177

In [121]:
@btime compute_inplace!($outputtestMV, $fctestMV, $inputtestMV)

  69.674 ns (1 allocation: 96 bytes)


10-element MArray{Tuple{10},Float64,1,10} with indices SOneTo(10):
 3.713271347463431
 3.238331062789676
 3.987669369832495
 3.4561800822731468
 4.5100428786778135
 4.661435217170334
 4.244956701462267
 3.3042023669287377
 4.00475573524668
 2.9892531083648177

In [122]:
@btime compute_inplace!($outputtestSS, $fctestSS, $inputtestSS)

  134.839 ns (2 allocations: 320 bytes)


10-element SizedArray{Tuple{10},Float64,1,1} with indices SOneTo(10):
 3.713271347463431
 3.238331062789676
 3.987669369832495
 3.4561800822731468
 4.5100428786778135
 4.661435217170334
 4.244956701462267
 3.3042023669287377
 4.00475573524668
 2.9892531083648177

In [148]:
function compute_inplace_unwrapped!(y, fc::FC, x)
    W,b = fc.W, fc.b
    tmp = fc.tmp
    mul!(tmp, W, x)
    for i in 1:length(y)
        y[i] = relu(tmp[i] + b[i])
    end
    y
end

compute_inplace_unwrapped! (generic function with 1 method)

In [150]:
@btime compute_inplace_unwrapped!($outputtest, $fctest, $inputtest)

  105.329 ns (0 allocations: 0 bytes)


10-element Array{Float64,1}:
 3.713271347463431
 3.2383310627896753
 3.987669369832495
 3.4561800822731468
 4.5100428786778135
 4.661435217170334
 4.244956701462266
 3.304202366928738
 4.00475573524668
 2.9892531083648177

In [151]:
@btime compute_inplace_unwrapped!($outputtestMV, $fctestMV, $inputtestMV)

  62.679 ns (0 allocations: 0 bytes)


10-element MArray{Tuple{10},Float64,1,10} with indices SOneTo(10):
 3.713271347463431
 3.238331062789676
 3.987669369832495
 3.4561800822731468
 4.5100428786778135
 4.661435217170334
 4.244956701462267
 3.3042023669287377
 4.00475573524668
 2.9892531083648177

In [152]:
@btime compute_inplace_unwrapped!($outputtestSS, $fctestSS, $inputtestSS)

  69.454 ns (0 allocations: 0 bytes)


10-element SizedArray{Tuple{10},Float64,1,1} with indices SOneTo(10):
 3.713271347463431
 3.238331062789676
 3.987669369832495
 3.4561800822731468
 4.5100428786778135
 4.661435217170334
 4.244956701462267
 3.3042023669287377
 4.00475573524668
 2.9892531083648177

# Function barriers

In [153]:
struct NeuralNet{T}
    layers::Vector{AbstractLayer}
    tmp::Vector{Vector{T}}
    res::Vector{T}
end

In [181]:
function compute_naive(Ws, bs, x0)
    x = x0
    for i in 1:length(Ws)
        x = relu.(Ws[i]*x + bs[i])
        #x = y
    end
    return x
end
function compute!(nn::NeuralNet, x)
    nn.tmp[1] .= x
    for i in 1:length(nn.layers)
        compute_inplace_unwrapped!(
            nn.tmp[i+1],
            nn.layers[i],
            nn.tmp[i]
        )
    end
    return nn.tmp[end]
end

compute! (generic function with 1 method)

In [159]:
s1x,s1y = 10, 15
s2x,s2y = 5, 10

W1test   = rand(s1x,s1y)
W1testSV = SMatrix{s1x,s1y}(W1test)
W1testMV = MMatrix{s1x,s1y}(W1test)
W1testSS = Size(s1x,s1y)(W1test)
W2test   = rand(s2x,s2y)
W2testSV = SMatrix{s2x,s2y}(W2test)
W2testMV = MMatrix{s2x,s2y}(W2test)
W2testSS = Size(s2x,s2y)(W2test)

b1test   = rand(s1x)
b1testSV = SVector{s1x}(b1test)
b1testMV = MVector{s1x}(b1test)
b1testSS = Size(s1x)(b1test)
b2test   = rand(s2x)
b2testSV = SVector{s2x}(b2test)
b2testMV = MVector{s2x}(b2test)
b2testSS = Size(s2x)(b2test)

fc1test   = FC(W1test, b1test)
fc1testSV = FC(W1testSV, b1testSV)
fc1testMV = FC(W1testMV, b1testMV)
fc1testSS = FC(W1testSS, b1testSS)
fc2test   = FC(W2test, b2test)
fc2testSV = FC(W2testSV, b2testSV)
fc2testMV = FC(W2testMV, b2testMV)
fc2testSS = FC(W2testSS, b2testSS)

inputtest = rand(s1y)
inputtestSV = SVector{s1y}(inputtest)
inputtestMV = MVector{s1y}(inputtest)
inputtestSS = Size(s1y)(inputtest);

In [189]:
nntest   = NeuralNet{Float64}([fc1test, fc2test], [zeros(s1y),zeros(s1x),zeros(s2x)], zeros(s2x))
nntestSV = NeuralNet{Float64}([fc1testSV, fc2testSV], [zeros(s1y),zeros(s1x),zeros(s2x)], zeros(s2x))
nntestMV = NeuralNet{Float64}([fc1testMV, fc2testMV], [zeros(s1y),zeros(s1x),zeros(s2x)], zeros(s2x))
nntestSS = NeuralNet{Float64}([fc1testSS, fc2testSS], [zeros(s1y),zeros(s1x),zeros(s2x)], zeros(s2x));

In [190]:
@btime compute!(nntest, inputtest)

  294.028 ns (0 allocations: 0 bytes)


5-element Array{Float64,1}:
 23.31074108279547
 25.052350941408143
 25.522754182283403
 21.84304598257672
 20.674088841651265

In [191]:
@btime compute_naive([W1test, W2test], [b1test, b2test], inputtest)

  691.257 ns (8 allocations: 1.03 KiB)


5-element Array{Float64,1}:
 23.31074108279547
 25.052350941408143
 25.522754182283403
 21.84304598257672
 20.674088841651265

In [194]:
@btime compute!(nntestMV, inputtestMV)

  226.925 ns (0 allocations: 0 bytes)


5-element Array{Float64,1}:
 23.310741082795474
 25.052350941408147
 25.522754182283403
 21.843045982576715
 20.674088841651265