FFN output distribution for input gaussian-distributed coefficients

In [144]:
import math
import numpy as np
import torch
from torch import Tensor
import torch.nn as nn

In [145]:
class FeedForwardNet(nn.Module):

    def __init__(self, n0=3, nk=10, nl=3, l=3, bias_on=False):
        '''n0: # dimension of x
           nk: # hidden nodes
           nl: # dimension of y
           l: # number of layers
           bias_on: # whether bias is included into linear preactivations'''
        super().__init__()
        self.n0=n0
        self.nk=nk
        self.nl=nl
        self.bias_on = bias_on
        self.log_level = None
        self.linears = []
        print("FeedForwardNet created with n0={}, nk={}, nl={}, l={}, bias_on={}".format(n0, nk, nl, l, bias_on))

        self.linears.append(nn.Linear(n0, nk, bias=bias_on))
        if l > 2:
            for _ in range(2, l):
                self.linears.append(nn.Linear(nk, nk, bias=bias_on))
        if l > 1:
            self.linears.append(nn.Linear(nk, nl, bias=bias_on))

    def set_log_level(self, value):
        self.log_level = value

    def get_log_level(self):
        if self.log_level in ("debug", "info", "warning", "error"):
            return self.log_level
        else:
            return "info"

    def init_weights(self, cb=1.0, cw=1.0):
        if self.get_log_level() == "debug":
            print("FeedForwardNet weights initialised with cb={}, cw={}".format(cb, cw))

        #Weight initialisation as in 2.19, 2.20
        n_prev = self.n0
        for linear in self.linears:
            nn.init.normal_(linear.weight, mean = 0., std = math.sqrt(cw/n_prev))
            n_prev = linear.weight.size()[0]
            if self.bias_on:
                nn.init.normal_(linear.bias, mean = 0., std = math.sqrt(cb))



FFN with nn.PReLU(init=1); 

In [146]:
class ParametricReLUNet(FeedForwardNet):
    def __init__(self, n0=3, nk=10, nl=3, l=3, bias_on=False):
        super().__init__(n0, nk, nl, l, bias_on)
        self.slope_positive = None
        self.slope_negative = None

    def set_slopes(self, slope_positive = 1.0, slope_negative = 0.25):
        self.slope_positive = slope_positive
        self.slope_negative = slope_negative

    def PReLU(self, input: Tensor) -> Tensor:
        for pos in range(input.size(dim=0)):
            input[pos] = input[pos] * (self.slope_positive if input[pos] >= 0 else self.slope_negative)
        return input

    def forward(self, xx):
        if self.slope_positive == None:
            raise Exception("To use forward set slopes with call ParametricReLUNet.set_slopes(...)")

        zk = torch.tensor(xx.transpose(), dtype=torch.float32)
        for linear in self.linears:
            #print("!!:{}".format(linear.weight.size()))
            zk = linear(zk)
            zk = self.PReLU(zk)
        return zk.detach().numpy().transpose()

In [93]:
testPReLU = ParametricReLUNet()
testPReLU.set_slopes()
resultPReLU = testPReLU.PReLU(torch.tensor(np.array([1.1,-2.2]), dtype=torch.float32))
print(resultPReLU)


n0=3, nk=10, nl=3, l=3, bias_on=False
tensor([ 1.1000, -0.5500])


In [182]:
'''n0: # dimension of x
    nk: # hidden nodes
    nl: # dimension of y
    l: # number of layers
    nd: # number of points in train-set'''
n0,nk,nl,l=3,10000,2,10
nd = 2
slope_plus, slope_minus=1.0, 0.5
experiments_number = 100

testNet = ParametricReLUNet(n0=n0,nk=nk,nl=nl,l=l)
testNet.set_log_level("info")
testNet.set_slopes(slope_plus, slope_minus)
xx = np.random.normal(size=(n0, nd)).astype(np.float32)
yy = np.zeros((experiments_number, nl, nd))
cw= 2.0/(slope_plus**2.0 + slope_minus**2.0)

#for each experiment re-initialisation of the weights with recalculation
for experiment_number in range(experiments_number):
    #weights distribution is initialisied as in (5.67)
    testNet.init_weights(0, cw)
    for col in range(nd):
        res = testNet.forward(xx[:,col])
        for row in range(nl):
            yy[experiment_number,row,col] = res[row]

print("xx:", xx)
print("yy:", yy)

FeedForwardNet created with n0=3, nk=10000, nl=2, l=10, bias_on=False
xx: [[-1.2542083  -0.792888  ]
 [-0.28262737 -0.89341635]
 [ 1.3196152  -1.8420714 ]]
yy: [[[-0.3833212  -0.10483067]
  [ 1.07970071  2.19558668]]

 [[-0.34438813  1.08525407]
  [ 0.91630888 -0.16859892]]

 [[ 0.95373046  2.38643456]
  [-0.49385935 -0.69601178]]

 [[-0.09194447 -0.15572333]
  [-0.77740848  0.40690744]]

 [[-0.95291948  0.15772772]
  [-0.51302862 -0.65267718]]

 [[ 3.42854977  1.78155673]
  [-0.25860655  0.62528223]]

 [[-0.4606086  -0.43085718]
  [ 0.83513224 -0.26133293]]

 [[-0.37925041  1.10424519]
  [ 0.82586855 -1.1900475 ]]

 [[-0.58366716 -0.14379731]
  [-0.55097389  0.38153476]]

 [[-0.25031808 -0.40673247]
  [ 1.39455724 -0.26412091]]

 [[ 0.14102483 -0.58789814]
  [ 0.24600318 -1.53212595]]

 [[-0.10491222 -0.01507721]
  [-0.43623501  0.99764895]]

 [[-1.23462093 -2.21511292]
  [-0.4116894  -0.82427794]]

 [[-0.10732216  1.9897995 ]
  [-0.15465878  2.81567645]]

 [[-0.99871302 -0.2409167 ]


In [183]:
def sample_covariance(neuron_one, trainpoint_one, neuron_two, trainpoint_two, yy):
    one = yy[:, neuron_one-1, trainpoint_one-1]
    two = yy[:, neuron_two-1, trainpoint_two-1]
    nn, mean_one, mean_two, sum = len(one), np.mean(one), np.mean(two), 0.0
    #print(one, two, len(one), mean_one, mean_two)
    for pos in range(nn):
        sum += (one[pos]-mean_one)*(two[pos]-mean_two)

    return sum/(nn - 1)


In [188]:
for neuron1 in range(1, nl+1):
      for neuron2 in range(neuron1, nl+1):
            for trainpoint1 in range(1, nd+1):
                  for trainpoint2 in range(1 if neuron1 != neuron2 else trainpoint1, nd+1):
                        print("Sample covariance between neuron {}, trainpoint {} and neuron {}, trainpoint {}: {}"\
                              .format(neuron1, trainpoint1, neuron2, trainpoint2\
                                      , sample_covariance(neuron1,trainpoint1,neuron2,trainpoint2,yy)))


Sample covariance between neuron 1, trainpoint 1 and neuron 1, trainpoint 1: 1.3470337797143601
Sample covariance between neuron 1, trainpoint 1 and neuron 1, trainpoint 2: 0.6128995584820138
Sample covariance between neuron 1, trainpoint 2 and neuron 1, trainpoint 2: 1.5647381343248983
Sample covariance between neuron 1, trainpoint 1 and neuron 2, trainpoint 1: 0.14137190069098998
Sample covariance between neuron 1, trainpoint 1 and neuron 2, trainpoint 2: 0.18619428088752862
Sample covariance between neuron 1, trainpoint 2 and neuron 2, trainpoint 1: 0.12801494183882242
Sample covariance between neuron 1, trainpoint 2 and neuron 2, trainpoint 2: 0.3940895719566503
Sample covariance between neuron 2, trainpoint 1 and neuron 2, trainpoint 1: 0.9351642066810827
Sample covariance between neuron 2, trainpoint 1 and neuron 2, trainpoint 2: 0.14633049850961322
Sample covariance between neuron 2, trainpoint 2 and neuron 2, trainpoint 2: 1.1242276937578675


In [189]:
def K_xx(trainpoint_one, trainpoint_two, cw, xx):
    one, two = xx[:,trainpoint_one-1],xx[:,trainpoint_two-1]
    nn, sum = len(one), 0.0
    for pos in range(nn):
        sum += one[pos]*two[pos]

    return sum*cw/nn

In [190]:
for trainpoint1 in range(1, nd+1):
    for trainpoint2 in range(trainpoint1, nd+1):
        print("Metric (4.8) for trainpoint {} and trainpoint {}: {}"\
              .format(trainpoint1, trainpoint2, K_xx(trainpoint1, trainpoint2, cw, xx)))


Metric (4.8) for trainpoint 1 and trainpoint 1: 1.810293972492218
Metric (4.8) for trainpoint 1 and trainpoint 2: -0.6313999334971111
Metric (4.8) for trainpoint 2 and trainpoint 2: 2.570715300242106
