$$ \#\theta = \sum^{l-1}_{l=0} (n_l +1)n_{l+1}$$

In [None]:
# AUTHOR: Axel Bjarkar Sigurjónsson
# This is a modified 
using Flux, LinearAlgebra, IterTools

In [None]:

function check_dim(x)
    """This function checks the appropriate  dimensions of input data"""
    if isa(x, Matrix)
        return size(x, 2)  # Returns the number of columns (width) of the matrix
    elseif isa(x, Vector)
        return 1  # Return 1 if it's a column vector
    else
        type = typeof(x)
        error("Input data type: $type is neither a matrix or column vector")
    end
end


In [None]:

function node_count(model)
    """Function checks how many nodes are in each layer
       Including  the input, ouput and hidden layers"""
    param(x)  = Flux.params(model)[x]
    n = []
    for i = 1:length(Flux.params(model))
        if i%2 != 0 # Check all weights in θ 
            ni = size(param(i))[2]
            push!(n,ni)                 
        end                             
    end

    """We also have to check the output layer specifically.  
       This is because there is not weight in θ associated with the output layer"""
    push!(n, length(param(length(Flux.params(model)))))
    return n
end


In [101]:
m = Chain(Dense(1=>10), Dense(10=>1), Dense(1=>6), Dense(6=>1))
display(m)

for i in Flux.params(m)
    display(i)
end

Chain(
  Dense(1 => 10),                       [90m# 20 parameters[39m
  Dense(10 => 1),                       [90m# 11 parameters[39m
  Dense(1 => 6),                        [90m# 12 parameters[39m
  Dense(6 => 1),                        [90m# 7 parameters[39m
) [90m                  # Total: 8 arrays, [39m50 parameters, 712 bytes.

10×1 Matrix{Float32}:
 -0.31009614
  0.29096594
  0.53697085
 -0.48650137
 -0.41816324
 -0.67582506
  0.5830284
 -0.69080883
 -0.22767554
  0.4261204

10-element Vector{Float32}:
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0

1×10 Matrix{Float32}:
 -0.697257  0.186764  0.0431383  -0.017348  …  -0.1415  0.53403  -0.470015

1-element Vector{Float32}:
 0.0

6×1 Matrix{Float32}:
 -0.59647983
 -0.5012748
 -0.74013877
  0.51998526
  0.6316856
  0.5442218

6-element Vector{Float32}:
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0

1×6 Matrix{Float32}:
 -0.610163  0.418035  0.301192  -0.138591  0.66567  0.378746

1-element Vector{Float32}:
 0.0

<h1>Now we norm the weights</h1>

In [110]:

function norm_weights(model)
    """Normalizes the weights in θ"""
    θ(x)  = Flux.params(model)[x]
    nNodes = node_count(model)

    i = 1
    for n = 1:length(nNodes)-1
        ni = nNodes[n]

        display(ni)
        display(θ(i))

        θ(i) .= θ(i) * 1/sqrt(ni)

        display(θ(i))

        i += 2
    end
end


norm_weights (generic function with 1 method)

In [111]:
norm_weights(m)
# for i in Flux.params(m)
#     display(i)
# end

1

10×1 Matrix{Float32}:
 -0.31009614
  0.29096594
  0.53697085
 -0.48650137
 -0.41816324
 -0.67582506
  0.5830284
 -0.69080883
 -0.22767554
  0.4261204

10×1 Matrix{Float32}:
 -0.31009614
  0.29096594
  0.53697085
 -0.48650137
 -0.41816324
 -0.67582506
  0.5830284
 -0.69080883
 -0.22767554
  0.4261204

10

1×10 Matrix{Float32}:
 -0.0220492  0.00590598  0.00136415  …  -0.00447464  0.0168875  -0.0148632

1×10 Matrix{Float32}:
 -0.00697257  0.00186764  0.000431383  …  -0.001415  0.0053403  -0.00470015

1

6×1 Matrix{Float32}:
 -0.59647983
 -0.5012748
 -0.74013877
  0.51998526
  0.6316856
  0.5442218

6×1 Matrix{Float32}:
 -0.59647983
 -0.5012748
 -0.74013877
  0.51998526
  0.6316856
  0.5442218

6

1×6 Matrix{Float32}:
 -0.0415164  0.0284437  0.0204935  -0.00942989  0.0452931  0.0257704

1×6 Matrix{Float32}:
 -0.016949  0.0116121  0.00836644  -0.00384974  0.0184908  0.0105207

<h1>Now we do the kernels, which are supposed to be normalized</h1>

In [130]:
function jac(model, x, f,param)
    """Gets the jacobian of a specific parameter"""
    jaco(f) = Flux.gradient(() -> model(x)[f],Flux.params(model))
    return jaco(f)[Flux.params(model)[param]]
end

function Df(model, x)
    # x: single datapoint
    m = length(model(x))

    # Total amount of θ exluding final bias
    total_amount_of_θ = sum(length, Flux.params(model))  - length(Flux.params(model)[length(Flux.params(model))])

    # Skilgreini empty jacobian matrix
    Jacob = zeros(total_amount_of_θ,m)

    for func_i = 1:m
        current_col = Vector{Float64}(undef, 0) # Preallocate memory

        for param_i = 1:length(Flux.params(model)) - 1
            jac_vec = jac(model, x, func_i, param_i)[:]
            current_col = vcat(current_col, jac_vec) # Concatenate vectors
        end

        for k = 1:total_amount_of_θ 
            Jacob[k, func_i] = current_col[k]
        end
    end
    
    return Jacob # Þetta er Df fylkið í bilblíunni
end

function kernel(model, x)
    N = check_dim(x)
    m = length(model(x[:,1]))  # Number of functions in the model output
    K = zeros(N*m, N*m)
    
    for i = 1:N
        for j = 1:N
            block = Df(model, x[:,i])' * Df(model, x[:,j])
            K[(i-1)*m+1:i*m, (j-1)*m+1:j*m] .= block
        end
    end

    return K
end

jac(my_sigm, hcat(0.5),1,length(Flux.params(my_sigm))-1)[:]

1×100 Matrix{Float32}:
 0.506202  0.507597  0.4888  0.502825  …  0.495301  0.497027  0.469968

The models from "activationFunctionTest.ipynb"

In [122]:
Nx=21
xa=-1.0
xb=1.0

xVec=collect(range(-1.0,stop=1.0,length=Nx));
yVec=sin.(2*pi*xVec) .+ 0.1*randn(size(xVec));  # Einhver plat gögn

using Random
Random.seed!(123)

n1=100
W=randn(n1,1);  #Modelling N(0,1) distributed W^[1]
b=randn(n1,1);  #Modelling N(0,1) distributed b^[0]
W0=randn(1,n1); #Modelling N(0,1) distributed W^[0]

my_sigm = Chain(Dense(1,n1,sigmoid), Dense(n1,1))

for i in Flux.params(my_sigm)
    display(i)
end

100×1 Matrix{Float32}:
  0.049617905
  0.060779575
 -0.08961165
  0.022597395
  0.028293798
 -0.045233548
  0.10096506
  0.0854967
  0.05472716
 -0.1872795
  ⋮
  0.13397047
  0.19957124
  0.18591695
 -0.19495997
 -0.037545957
 -0.18100029
 -0.03759733
 -0.023785785
 -0.24054728

100-element Vector{Float32}:
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 ⋮
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0

1×100 Matrix{Float32}:
 0.200566  -0.224587  0.164237  …  -0.0553575  0.222334  -0.232293

1-element Vector{Float32}:
 0.0

In [119]:
norm_weights(my_sigm)
for i in Flux.params(my_sigm)
    display(i)
end

1

100×1 Matrix{Float32}:
  0.049617905
  0.060779575
 -0.08961165
  0.022597395
  0.028293798
 -0.045233548
  0.10096506
  0.0854967
  0.05472716
 -0.1872795
  ⋮
  0.13397047
  0.19957124
  0.18591695
 -0.19495997
 -0.037545957
 -0.18100029
 -0.03759733
 -0.023785785
 -0.24054728

100×1 Matrix{Float32}:
  0.049617905
  0.060779575
 -0.08961165
  0.022597395
  0.028293798
 -0.045233548
  0.10096506
  0.0854967
  0.05472716
 -0.1872795
  ⋮
  0.13397047
  0.19957124
  0.18591695
 -0.19495997
 -0.037545957
 -0.18100029
 -0.03759733
 -0.023785785
 -0.24054728

100

1×100 Matrix{Float32}:
 0.200566  -0.224587  0.164237  …  -0.0553575  0.222334  -0.232293

1×100 Matrix{Float32}:
 0.0200566  -0.0224587  0.0164237  …  -0.00553575  0.0222334  -0.0232293

100×1 Matrix{Float32}:
  0.049617905
  0.060779575
 -0.08961165
  0.022597395
  0.028293798
 -0.045233548
  0.10096506
  0.0854967
  0.05472716
 -0.1872795
  ⋮
  0.13397047
  0.19957124
  0.18591695
 -0.19495997
 -0.037545957
 -0.18100029
 -0.03759733
 -0.023785785
 -0.24054728

100-element Vector{Float32}:
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 ⋮
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0

1×100 Matrix{Float32}:
 0.0200566  -0.0224587  0.0164237  …  -0.00553575  0.0222334  -0.0232293

1-element Vector{Float32}:
 0.0

In [120]:
K = kernel(my_sigm,hcat(xVec...))

21×21 Matrix{Float64}:
 24.9049  24.9043  24.9036  24.9029  …  24.892   24.8913  24.8906  24.8899
 24.9043  24.9047  24.9051  24.9055     24.9102  24.9106  24.911   24.9114
 24.9036  24.9051  24.9066  24.9081     24.9284  24.9299  24.9314  24.933
 24.9029  24.9055  24.9081  24.9107     24.9466  24.9492  24.9518  24.9545
 24.9021  24.9059  24.9096  24.9133     24.9648  24.9685  24.9722  24.976
 24.9014  24.9062  24.9111  24.9159  …  24.983   24.9878  24.9927  24.9975
 24.9006  24.9066  24.9125  24.9185     25.0012  25.0071  25.0131  25.0191
 24.8998  24.9069  24.914   24.921      25.0194  25.0265  25.0335  25.0406
 24.899   24.9072  24.9154  24.9236     25.0376  25.0458  25.054   25.0621
 24.8982  24.9075  24.9168  24.9261     25.0558  25.0651  25.0744  25.0837
  ⋮                                  ⋱                              ⋮
 24.8958  24.9085  24.9211  24.9338     25.1104  25.123   25.1357  25.1483
 24.8951  24.9088  24.9226  24.9363     25.1286  25.1424  25.1561  25.1699
 24.8943 

In [97]:
eigen(K).values

21-element Vector{Float64}:
 6.818029403723471e-17
 1.0081027923495614e-16
 1.0084268830678011e-16
 1.1243091310290042e-16
 1.302474807006361e-16
 1.3277782132797353e-16
 1.4570873489871668e-16
 1.5388201549486105e-16
 1.5882397298156224e-16
 1.6820686644037814e-16
 ⋮
 2.328579882968902e-16
 2.5994814082090454e-16
 3.048916354469903e-16
 3.4002426841753824e-16
 6.106226605463413e-16
 1.3987419559560172e-11
 1.13354315214589e-9
 0.008459260446600269
 5.250036075645336

In [102]:
Df(m,hcat(1))

49×1 Matrix{Float64}:
 -0.3388855755329132
  0.09077209234237671
  0.02096637524664402
 -0.00843160878866911
  0.33954623341560364
  0.017555786296725273
 -0.3388819992542267
 -0.06877296417951584
  0.2595527172088623
 -0.22843971848487854
  ⋮
 -0.13859057426452637
  0.6656697392463684
  0.3787462115287781
  0.3847413659095764
  0.3233322203159332
  0.47740423679351807
 -0.3354008197784424
 -0.40744978189468384
 -0.35103389620780945