In [68]:
import math
import torch
from torch import FloatTensor

NAME_INDEX = 0
DATA_INDEX = 1
GRAD_INDEX = 2

In [69]:
def generate_disc_set(nb):
    # Generates data and corresponding target
    
    input = FloatTensor(nb, 2).uniform_(0, 1)
    target = torch.norm(input, 2, 1) < math.sqrt(1/(2*math.pi))
    return input, target.float()

In [70]:
class Parameter():
    # class used to create all the parameters in the same way
    
    def __init__(self, name, tensor, gradient):
        self.name = name
        self.data = tensor
        self.grad = gradient

In [71]:
class Module ( Parameter ) :
    # base module that the following class inherits
    
    def __init__(self):
        super(Parameter, self).__init__()
        self.param = []
        
    def forward (self, * input) :
        # Computes the forward pass of the Module. Need to be implemented in future class if one wants to use it.
        raise NotImplementedError
        
    def backward (self , * gradwrtoutput) :
        # Computes the backward pass of the Module. Need to be implemented in future class if one wants to use it.
        raise NotImplementedError
        
    def init_parameters ( self ):
        # Initialize the proper parameters for the Module. Need to be implemented in future class if one wants to use it.
        raise NotImplementedError
        
    def add_parameter(self, parameter):
        # Adds the input parameter into
        if parameter.__class__.__name__ == 'Parameter':
            self.param.append((parameter.name, parameter.data, parameter.grad))
        elif parameter.__class__.__name__ == 'list':                        
            if parameter != []:
                self.param.append(parameter)
                    
    def zero_grad( self ):
        for i in range(len(self.param)):
            for j in range(len(self.param[i])):
                self.param[i][j][GRAD_INDEX][:] = 0
                    
    def optimizer (self, lr = 1e-2):
        for i in range(len(self.param)):
            for j in range(len(self.param[i])):
                self.param[i][j][DATA_INDEX][:] -= lr * self.param[i][j][GRAD_INDEX][:]
                
    def parameters ( self ):
        return self.param

In [72]:
# Module for MSEloss

class MSEloss( Module ):
    
    
    def __init__(self):
        super(MSEloss, self).__init__()
    def forward (self, input, target):
        return input.sub(target.view(-1, 1)).pow(2).sum()    
    def backward (self, input, target):
        return 2*(input.sub(target.view(-1, 1)))    

In [73]:
# Module ReLU

class ReLU( Module ):
    
    
    def __init__(self):
        super(ReLU, self).__init__()
        self.name = 'ReLU'
        
    def forward(self, input):
        return input.clamp(min = 0)
    
    def backward(self, input, dx):
        input[input <= 0] = 0
        input[input > 0] = 1
        return input * dx

In [74]:
# Module Tanh

class Tanh( Module ):
    
    
    def __init__(self):
        super(Tanh, self).__init__()
        self.name = 'Tanh'
        
    def forward(self, input):
        return input.tanh()
    
    def backward(self, input, dx):
        return 4 * (input.exp() + input.mul(-1).exp()).pow(-2) * dx

In [75]:
# module linear

class Linear( Module ):

    
    Linear_counter = 0
    
    def __init__(self, input_features, output_features, eps = 1e-2):
        super(Linear, self).__init__()
        self.name = 'Linear'
        Linear.Linear_counter += 1
        self.input_features = input_features
        self.output_features = output_features
        
        self.init_parameters(input_features, output_features, eps)
        
    def init_parameters (self, input_features, output_features, eps):
        weigths_name = f'weights{self.Linear_counter}'
        bias_name = f'bias{self.Linear_counter}'
        self.weights = Parameter(weigths_name, torch.Tensor(input_features, output_features),
                                 torch.Tensor(input_features, output_features))
        self.bias = Parameter(bias_name, torch.Tensor(output_features), torch.Tensor(output_features))
        self.weights.data.normal_(0, eps)
        self.weights.grad.zero_()
        self.bias.grad.zero_()
        self.bias.data.normal_(0, eps)
        
        self.add_parameter(self.weights)
        self.add_parameter(self.bias)
        
    def forward(self, input):
        output = input.mm(self.weights.data)
        output += self.bias.data
        return output
    
    def backward(self, input, ds):
        dx = ds.mm(self.weights.data.t())
        dw = input.t().mm(ds)
        db = ds.t().mm(torch.ones(ds.size(0), 1))
        
        self.weights.grad.add_(dw)
        self.bias.grad.add_(db)
        return dx
        

In [76]:
# Module Sequential

class Sequential( Module ):
    
    
    def __init__(self, *args):
        super(Sequential, self).__init__()
        Linear.Linear_counter = 0
        self.module_nb = len(args)
        self.fc = [None] * self.module_nb
        self.x = [None] * (self.module_nb + 1)
        
        for id, module in enumerate(args):
            self.fc[id] = module
            self.init_parameters(id)
        
    def init_parameters (self, id):
        self.add_parameter(self.fc[id].parameters())
        
    def forward(self, input):
        self.x[0] = input
        for i in range(1, self.module_nb + 1):
            self.x[i] = self.fc[i-1].forward(self.x[i-1])
        return self.x[self.module_nb]
    
    def backward(self, dloss):
        dx = [None] * (self.module_nb + 1)
        dx[self.module_nb] = dloss
        for i in range(1, self.module_nb + 1):
            j = self.module_nb - i
            dx[j] = self.fc[j].backward(self.x[j], dx[j+1])
        return dx

In [77]:
# Net 2 layers

Linear.Linear_counter = 0

class Net2( Module ):
    def __init__( self , hidden_layer):
        super(Net2, self).__init__()
        self.fc1 = Linear(2, hidden_layer)
        self.fc2 = ReLU()
        self.fc3 = Linear(hidden_layer, 1)
        self.init_parameters()
        
    def init_parameters ( self ):
        self.add_parameter( self.fc1.parameters() )
        self.add_parameter( self.fc2.parameters() )
        self.add_parameter( self.fc3.parameters() )

    def forward(self, input):
        self.x0 = input
        s1 = self.fc1.forward( input )
        self.s1 = s1
        x1 = self.fc2.forward( s1 )
        self.x1 = x1
        s2 = self.fc3.forward( x1 )
        self.s2 = s2
        x2 = self.fc2.forward( s2 )
        self.x2 = x2
        return x2
    
    def backward(self, dloss):
        dx2 = dloss
        ds2 = self.fc2.backward(self.s2) * dx2
        dx1, dw3, db3 = self.fc3.backward( ds2 , self.x1 )
        self.fc3.weights.grad.add_(dw3)
        self.fc3.bias.grad.add_(db3)
        
        ds1 = self.fc2.backward(self.s1) * dx1
        
        dx0, dw1, db1 = self.fc1.backward( ds1 , self.x0 )

        self.fc1.weights.grad.add_(dw1)
        self.fc1.bias.grad.add_(db1)
        
        return dx0

In [78]:
# Net 4 layers

Linear.Linear_counter = 0

class Net4( Module ):
    def __init__(self, hidden_layer):
        super(Net4, self).__init__()
        self.fc1 = Linear(2, 2*hidden_layer)
        self.fc2 = ReLU()
        self.fc3 = Linear(2*hidden_layer, hidden_layer)
        self.fc4 = Linear(hidden_layer, 1)
        self.init_parameters()
        
    def init_parameters ( self ):
        self.add_parameter( self.fc1.parameters() )
        self.add_parameter( self.fc2.parameters() )
        self.add_parameter( self.fc3.parameters() )
        self.add_parameter( self.fc4.parameters() )

    def forward(self, x):
        self.x0 = x
        x = self.fc1.forward( x )
        self.s1 = x
        x = self.fc2.forward( x )
        self.x1 = x
        x = self.fc3.forward( x )
        self.s2 = x
        x = self.fc2.forward( x )
        self.x2 = x
        x = self.fc4.forward( x )
        self.s3 = x
        x = self.fc2.forward( x )
        self.x3 = x
        return x
    
    def backward(self, dloss):
        dx3 = dloss
        ds3 = self.fc2.backward(self.s3) * dx3
        dx2, dw4, db4 = self.fc4.backward( ds3, self.x2 )
        self.fc4.weights.grad.add_(dw4)
        self.fc4.bias.grad.add_(db4)
        ds2 = self.fc2.backward(self.s2) * dx2
        dx1, dw3, db3 = self.fc3.backward( ds2 , self.x1 )
        self.fc3.weights.grad.add_(dw3)
        self.fc3.bias.grad.add_(db3)
        
        ds1 = self.fc2.backward(self.s1) * dx1
        
        dx0, dw1, db1 = self.fc1.backward( ds1 , self.x0 )

        self.fc1.weights.grad.add_(dw1)
        self.fc1.bias.grad.add_(db1)
        
        return dx0

In [79]:
def train_model(model, train_input, train_target, mini_batch_size, lr = 1e-1):
    nb_epochs = 250
    criterion = MSEloss()
    
    for e in range(0, nb_epochs):
        sum_loss = 0
        for b in range(0, train_input.size(0), mini_batch_size):
            output = model.forward(train_input.narrow(0, b, mini_batch_size))
            loss = criterion.forward(output, train_target.narrow(0, b, mini_batch_size))
            dloss = criterion.backward(output, train_target.narrow(0, b, mini_batch_size))
            sum_loss += loss
            model.zero_grad()
            dx0 = model.backward(dloss)
            model.optimizer(lr)
        print(e, sum_loss)

In [80]:
def compute_nb_errors(model, data_input, data_target, mini_batch_size):
    total_nb_errors = 0
    
    for b in range(0, data_input.size(0), mini_batch_size):
        output = model.forward(data_input.narrow(0, b, mini_batch_size))
        _, predicted_classes = torch.max(output, 1)
        for k in range(0, mini_batch_size):
            if data_target[b + k] != predicted_classes[k]:
                total_nb_errors += 1

    return total_nb_errors

In [84]:
train_input, train_target = generate_disc_set(1000)
test_input, test_target = generate_disc_set(1000)
mean, std = train_input.mean(), train_input.std()
train_input.sub_(mean).div_(std)
test_input.sub_(mean).div_(std);

mini_batch_size = 100

model = Sequential(Linear(2, 25, 1), ReLU(), Linear(25, 25, 1), ReLU(), Linear(25, 2, 1), ReLU())
train_model(model, train_input, train_target, mini_batch_size, 1e-6)
nb_test_errors = compute_nb_errors(model, test_input, test_target, mini_batch_size)

print(nb_test_errors)
print('test error Net {:0.2f}%% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                      nb_test_errors, test_input.size(0)))





0 116645.25551211373
1 41331.8831437583
2 18021.34329176069
3 9138.0775416703
4 5290.15911744412
5 3456.256001225327
6 2468.6180838231357
7 1871.310609977581
8 1484.8518865022306
9 1219.83925419473
10 1030.311244781472
11 888.8687953189947
12 781.1371486384669
13 697.8410695310886
14 632.3934413792165
15 579.8927448530703
16 537.0246050404166
17 501.99412862592
18 472.6186490739483
19 447.31351964498754
20 425.6074293126985
21 407.09796482491856
22 391.35629266983597
23 377.87908494478324
24 366.278015550497
25 356.2991072088007
26 347.7019974943871
27 340.24796971798787
28 333.7291866251262
29 328.01460093572905
30 322.9825093895779
31 318.53136855485536
32 314.577081879444
33 311.0467675074815
34 307.8811345961585
35 305.02617909654737
36 302.44528227770934
37 300.10690873287695
38 297.9803182618971
39 296.03720269899713
40 294.25650983152383
41 292.62036053999327
42 291.1137366568728
43 289.7253091671664
44 288.44233204540797
45 287.2549022397434
46 286.1552996047394
47 285.13511639