In [2]:
import math
import torch
from torch import FloatTensor

In [451]:
NUMBERS = '0123456789' 
def increment(s): # function taken from here:
    out = ''      # https://codegolf.stackexchange.com/questions/38033/increment-every-number-in-a-string

    number = ''
    for c in s:
        if c in NUMBERS:
            number += c
        else:
            if number != '':
                out += str(int(number) + 1)
                number = ''
            out += c

    if number != '':
        out += str(int(number) + 1)
        number = ''

    return out

In [452]:
def generate_disc_set(nb):
    input = FloatTensor(nb, 2).uniform_(0, 1)
    target = torch.norm(input,2,1) < math.sqrt(1/(2*math.pi))
    return input, target.float()

train_input, train_target = generate_disc_set(1000)
test_input, test_target = generate_disc_set(1000)

mean, std = train_input.mean(), train_input.std()

train_input.sub_(mean).div_(std)
test_input.sub_(mean).div_(std);

In [453]:
# class Parameter

class Parameter():
    def __init__(self, name, tensor, gradient):
        self.name = name
        self.data = tensor
        self.grad = gradient

In [454]:
# Simple structure for a Module

class Module ( Parameter ) :
    def __init__(self):
        super(Parameter, self).__init__()
        self.param = []
        
    def forward ( self , * input ) :
        raise NotImplementedError
        
    def backward ( self , * gradwrtoutput ) :
        raise NotImplementedError
        
    def init_parameters ( self ):
        raise NotImplementedError
        
    def add_parameter( self , parameter ):
        if parameter.__class__.__name__ == 'Parameter':
            self.param.append((parameter.name, parameter.data, parameter.grad))
        else:
            if parameter.__class__.__name__ == 'list':                        
                if parameter != []:
                    self.param.append(parameter)
                    
    def zero_grad( self ):
        for i in range(len(self.param)):
            for j in range(len(self.param[i])):
                self.param[i][j][2][:] = 0
                
    def update_one_parameter( self, name, new_param, new_grad):
        for i in range(len(self.param)):
            for j in range(len(self.param[i])):
                if self.param[i][j][0] == name:
                    self.param[i][j][1] = new_param
                    self.param[i][j][2] = new_grad
                    
    def update_all_parameters( self ):
        raise NotImplementedError
    def parameters ( self ):
        return self.param

In [455]:
# Module for MSEloss

class MSEloss( Module ):
    def __init__(self):
        super(MSEloss, self).__init__()
    def forward ( self , input, target ):
        return input.sub(target).pow(2).sum()    
    def backward ( self , input, target ):
        return 2*(input.sub(target))    

In [456]:
# Module ReLU

class ReLU( Module ):
    def __init__(self):
        super(ReLU, self).__init__()
    def forward( self, input ):
        return input.clamp(min = 0)
    def backward( self, input ):
        input[input <= 0] = 0
        input[input > 0] = 1

In [457]:
# Module Tanh

class Tanh( Module ):
    def __init__(self):
        super(Tanh, self).__init__()
    def forward( self , input ):
        return input.tanh()
    def backward( self, input ):
        return 4 * (input.exp() + input.mul(-1).exp()).pow(-2)

In [458]:
# test module linear

class Linear( Module ):
    
    Linear_counter = 0
    
    def __init__(self, input_features, output_features, eps=1e-6):
        super(Linear, self).__init__()
        self.input_features = input_features
        self.output_features = output_features
        self.init_parameters(input_features, output_features, eps)
        Linear.Linear_counter +=1
        
    def init_parameters ( self, input_features, output_features, eps ):
        weigths_name = 'weights0'
        bias_name = 'bias0'
        for i in range(self.Linear_counter):
            weigths_name = increment(weigths_name)
            bias_name = increment(bias_name)
        self.weights = Parameter(weigths_name, torch.Tensor(output_features, input_features),
                                 torch.Tensor(output_features, input_features))
        self.bias = Parameter(bias_name, torch.Tensor(output_features), torch.Tensor(output_features))
        self.weights.data.normal_(0, eps)
        self.weights.grad.zero_()
        self.bias.grad.zero_()
        self.bias.data.normal_(0, eps)
        self.add_parameter(self.weights)
        self.add_parameter(self.bias)
        
    def forward( self , input):
        output = input.matmul(self.weights.data.t())
        output += self.bias.data
        return output
    
    def backward( self, dloss ):
        print(self.weights.data.t())
        return self.weights.data.t().mm(dloss)
        

In [459]:
# test Net

Linear.Linear_counter = 0

class Net( Module ):
    def __init__(self, hidden_layer):
        super(Net, self).__init__()
        self.fc1 = Linear(2, hidden_layer)
        self.fc2 = Tanh()
        self.fc3 = Linear(hidden_layer, 1)
        self.add_parameter( self.fc1.parameters() )
        self.add_parameter( self.fc2.parameters() )
        self.add_parameter( self.fc3.parameters() )
        
        self.x0 = 0
        self.s1 = 0
        self.x1 = 0
        self.s2 = 0
        self.x2 = 0

    def forward(self, x):
        self.x0 = x
        self.s1 = self.fc1.forward( self.x0 )
        self.x1 = self.fc2.forward( self.s1 )
        self.s2 = self.fc3.forward( self.x1 )
        self.x2 = self.fc2.forward( self.s2 ) 
        return self.x2
    def backward(self, dloss):
        dl_dx2 = dloss
        dl_ds2 = self.fc2.backward(self.s2) * dl_dx2
        dl_dx1 = self.fc3.backward(dl_ds2)
        dl_ds1 = self.fc2.backward(self.s1) * dl_dx1
        
        dl_dwfc3.add_(dl_ds2.view(-1, 1).mm(self.x1.view(1, -1)))
        dl_dbfc3.add_(dl_ds2)
        dl_dwfc1.add_(dl_ds1.view(-1, 1).mm(self.x0.view(1, -1)))
        dl_dbfc1.add_(dl_ds1)
        
        return dl_dwfc1, dl_dbfc1, dl_dwfc3, dl_dbfc3
    
    def update_all_parameters( self ):
        return []
    
model4 = Net(100)
criterion = MSEloss()
output = model4.forward(train_input)
dloss = criterion.backward(output, train_target)
dl_dwfc1, dl_dbfc1, dl_dwfc3, dl_dbfc3 = model4.backward(dloss)


1.00000e-06 *
 -0.2179
  0.1015
 -0.8712
 -0.7506
  1.3573
 -1.3706
 -0.7605
  0.5757
 -0.9414
 -0.6347
 -0.8749
  0.6937
  0.9265
  1.1002
  0.8966
  2.9314
 -0.0430
  0.2860
  1.5812
 -0.0351
  0.5213
 -0.0412
 -0.1651
  1.5628
 -0.5410
 -0.1206
  1.6604
  0.7558
 -0.1113
  2.0788
  1.4717
 -0.0513
  1.1417
 -1.8669
  0.1443
 -1.9956
  1.5940
  0.7869
 -1.3360
 -0.4191
 -0.5242
 -0.0867
  0.4952
 -1.0829
  1.2301
 -0.0039
  0.9629
 -1.0122
  0.3068
 -0.1220
 -0.9768
 -0.2973
 -0.5266
 -1.3116
  0.0949
  0.2603
 -1.3233
  1.2372
  1.4665
 -0.5633
 -1.0312
  1.5584
  0.2656
  0.1050
  0.8880
 -1.9023
 -0.1157
 -1.5434
 -0.3487
  1.6651
 -0.1621
  1.3767
  2.5949
  1.2992
 -0.0164
  0.9533
  1.1453
 -1.2265
  0.9878
  0.1553
 -0.1385
  0.1965
 -0.5476
 -1.1106
  0.4544
  1.0028
  1.9383
  1.0289
  0.8282
  0.3816
 -0.3557
 -1.8828
 -0.1062
  1.3086
  1.7038
 -0.0842
  0.7337
  0.0570
 -1.2351
  0.0591
[torch.FloatTensor of size 100x1]



RuntimeError: size mismatch, m1: [100 x 1], m2: [1000 x 1000] at /opt/conda/conda-bld/pytorch_1518243271935/work/torch/lib/TH/generic/THTensorMath.c:1434

In [408]:
test = model4.parameters()

In [425]:
a = torch.Tensor(1000, 2).normal_(0, 1e-3)
b = torch.Tensor(1000, 2).normal_(0, 1e-3)
c = 2 * (a.sub(b))


 3.1233e-04  2.1430e-03
 4.1766e-04 -4.4921e-04
 1.1467e-03  2.1319e-03
           ⋮            
-1.8960e-04  9.3419e-05
-1.6098e-03 -1.8226e-04
 1.4650e-03 -7.8535e-04
[torch.FloatTensor of size 1000x2]



In [84]:
import torch
import math

from torch import optim
from torch import Tensor
from torch.autograd import Variable
from torch import nn

train_input, train_target = Variable(train_input), Variable(train_target)
test_input, test_target = Variable(test_input), Variable(test_target)

mini_batch_size = 100
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr = 1e-1)

def train_model(model, train_input, train_target):
    nb_epochs = 250

    for e in range(0, nb_epochs):
        for b in range(0, train_input.size(0), mini_batch_size):
            output = model(train_input.narrow(0, b, mini_batch_size))
            loss = criterion(output, train_target.narrow(0, b, mini_batch_size))
            model.zero_grad()
            loss.backward()
            optimizer.step()


def create_shallow_model():
    return nn.Sequential(
        nn.Linear(2, 128),
        nn.ReLU(),
        nn.Linear(128, 2)
    )

In [85]:
model = create_shallow_model()
for param in model.parameters():
    print (param.data)


-0.6942 -0.3747
-0.5409  0.1554
 0.3436  0.2803
-0.1926 -0.0961
 0.6756 -0.1201
-0.3381  0.1005
 0.1184 -0.3067
 0.5240  0.3154
-0.2419  0.1664
 0.3632 -0.0056
-0.6728  0.1585
 0.6073  0.0428
 0.1415  0.0611
-0.2524 -0.0338
 0.6873  0.5168
-0.0284 -0.1292
-0.2894 -0.6843
 0.2632 -0.1540
 0.6541 -0.0701
 0.2755  0.6429
 0.0607 -0.5694
 0.0917  0.4707
-0.2657  0.2632
-0.6404 -0.4735
 0.3192  0.3457
 0.5976  0.4233
-0.4498  0.7069
-0.5443  0.3204
-0.6347  0.6735
-0.4395 -0.2654
 0.5578 -0.1573
 0.2167 -0.1489
 0.1859 -0.6394
 0.6642 -0.4088
 0.1396 -0.5506
 0.3834 -0.6786
 0.1033 -0.4065
-0.0743 -0.5520
-0.4050  0.3709
-0.0510  0.6013
-0.6080  0.4766
 0.3596 -0.2839
-0.3375  0.5258
-0.0922 -0.2590
 0.3924  0.5540
 0.6163 -0.6205
-0.6690 -0.6774
-0.2989  0.3123
 0.2629  0.3536
 0.3916  0.3715
 0.1547  0.6609
-0.6499  0.4555
-0.0116 -0.0233
-0.6125 -0.7019
 0.0058 -0.5175
 0.6603 -0.4861
-0.4113 -0.4441
-0.4775  0.0459
 0.0209 -0.4182
 0.6285 -0.3778
 0.6710  0.5616
 0.4646  0.2513
-0.0767

In [80]:
model = nn.Linear(2, 4)

In [81]:
for param in model.parameters():
    print (param.data)


-0.2673  0.3985
-0.4782  0.5297
 0.1251 -0.1143
 0.2737 -0.6261
[torch.FloatTensor of size 4x2]


-0.6262
-0.4391
 0.2651
 0.3366
[torch.FloatTensor of size 4]

