In [11]:
import math
import torch
from torch import FloatTensor

In [28]:
NUMBERS = '0123456789' 
def increment(s): # function taken from here:
    out = ''      # https://codegolf.stackexchange.com/questions/38033/increment-every-number-in-a-string

    number = ''
    for c in s:
        if c in NUMBERS:
            number += c
        else:
            if number != '':
                out += str(int(number) + 1)
                number = ''
            out += c

    if number != '':
        out += str(int(number) + 1)
        number = ''

    return out

In [29]:
def generate_disc_set(nb):
    input = FloatTensor(nb, 2).uniform_(0, 1)
    target = torch.norm(input,2,1) < math.sqrt(1/(2*math.pi))
    
    return input, target.float()

In [30]:
# class Parameter

class Parameter():
    def __init__(self, name, tensor, gradient):
        self.name = name
        self.data = tensor
        self.grad = gradient

In [31]:
# Simple structure for a Module

class Module ( Parameter ) :
    def __init__(self):
        super(Parameter, self).__init__()
        self.param = []
        
    def forward ( self , * input ) :
        raise NotImplementedError
        
    def backward ( self , * gradwrtoutput ) :
        raise NotImplementedError
        
    def init_parameters ( self ):
        raise NotImplementedError
        
    def add_parameter( self , parameter ):
        if parameter.__class__.__name__ == 'Parameter':
            self.param.append((parameter.name, parameter.data, parameter.grad))
        elif parameter.__class__.__name__ == 'list':                        
            if parameter != []:
                self.param.append(parameter)
                    
    def zero_grad( self ):
        for i in range(len(self.param)):
            for j in range(len(self.param[i])):
                self.param[i][j][2][:] = 0
                    
    def optimizer ( self , lr=1e-2 ):
        for i in range(len(self.param)):
            for j in range(len(self.param[i])):
                self.param[i][j][1][:] -= lr * self.param[i][j][2][:]
                
    def parameters ( self ):
        return self.param

In [32]:
# Module for MSEloss

class MSEloss( Module ):
    def __init__(self):
        super(MSEloss, self).__init__()
    def forward ( self , input, target ):
        return input.sub(target.view(-1, 1)).pow(2).sum()    
    def backward ( self , input, target ):
        return 2*(input.sub(target.view(-1, 1)))    

In [33]:
# Module ReLU

class ReLU( Module ):
    def __init__(self):
        super(ReLU, self).__init__()
        self.name = 'ReLU'
    def forward( self, input ):
        return input.clamp(min = 0)
    def backward( self, input ):
        input[input <= 0] = 0
        input[input > 0] = 1
        return input

In [34]:
# Module Tanh

class Tanh( Module ):
    def __init__(self):
        super(Tanh, self).__init__()
        self.name = 'Tanh'
    def forward( self , input ):
        return input.tanh()
    def backward( self, input ):
        return 4 * (input.exp() + input.mul(-1).exp()).pow(-2)

In [35]:
# module linear

class Linear( Module ):
    
    Linear_counter = 0
    
    def __init__(self, input_features, output_features, eps=1e-2):
        super(Linear, self).__init__()
        self.name = 'Linear'
        self.input_features = input_features
        self.output_features = output_features
        self.init_parameters(input_features, output_features, eps)
        Linear.Linear_counter +=1
        
    def init_parameters ( self, input_features, output_features, eps ):
        weigths_name = 'weights0'
        bias_name = 'bias0'
        for i in range(self.Linear_counter):
            weigths_name = increment(weigths_name)
            bias_name = increment(bias_name)
        self.weights = Parameter(weigths_name, torch.Tensor(input_features, output_features),
                                 torch.Tensor(input_features, output_features))
        self.bias = Parameter(bias_name, torch.Tensor(output_features), torch.Tensor(output_features))
        self.weights.data.normal_(0, eps)
        self.weights.data[self.weights.data < 0] *= -1 # to get positive values
        self.weights.grad.zero_()
        self.bias.grad.zero_()
        self.bias.data.normal_(0, eps)
        self.bias.data[self.bias.data < 0] *= -1 
        self.add_parameter(self.weights)
        self.add_parameter(self.bias)
        
    def forward( self , input):
        output = input.mm(self.weights.data)
        output += self.bias.data
        return output
    
    def backward( self, ds , input ):
        dx = ds.mm(self.weights.data.t())
        dw = input.t().mm(ds)
        db = ds.t().mm(torch.ones(ds.size(0),1))
        return dx, dw, db
        

In [36]:
# Module Sequential

class Sequential( Module ):
    def __init__(self, *args):
        super(Sequential, self).__init__()
        Linear.Linear_counter = 0
        self.module_nb = len(args)
        self.fc = [None] * self.module_nb
        self.x = [None] * (self.module_nb + 1)
        for id, module in enumerate(args):
            self.fc[id] = module
            self.init_parameters(id)
        
    def init_parameters ( self, id ):
        self.add_parameter( self.fc[id].parameters() )
        
    def forward( self, input):
        self.x[0] = input
        for i in range(1, self.module_nb + 1):
            self.x[i] = self.fc[i-1].forward(self.x[i-1])
        return self.x[self.module_nb]
    
    def backward( self, dloss):
        dx = [None] * (self.module_nb + 1)
        dx[self.module_nb] = dloss
        for i in range(1, self.module_nb + 1):
            j = self.module_nb - i
            if self.fc[j].parameters() == []:
                dx[j] = self.fc[j].backward(self.x[j]) * dx[j+1]
            else:
                dx[j], dw, db = self.fc[j].backward( dx[j+1], self.x[j])
                self.fc[j].weights.grad.add_(dw)
                self.fc[j].bias.grad.add_(db)
        return dx

In [37]:
# Net 2 layers

Linear.Linear_counter = 0

class Net2( Module ):
    def __init__( self , hidden_layer):
        super(Net2, self).__init__()
        self.fc1 = Linear(2, hidden_layer)
        self.fc2 = ReLU()
        self.fc3 = Linear(hidden_layer, 1)
        self.init_parameters()
        
    def init_parameters ( self ):
        self.add_parameter( self.fc1.parameters() )
        self.add_parameter( self.fc2.parameters() )
        self.add_parameter( self.fc3.parameters() )

    def forward(self, input):
        self.x0 = input
        s1 = self.fc1.forward( input )
        self.s1 = s1
        x1 = self.fc2.forward( s1 )
        self.x1 = x1
        s2 = self.fc3.forward( x1 )
        self.s2 = s2
        x2 = self.fc2.forward( s2 )
        self.x2 = x2
        return x2
    
    def backward(self, dloss):
        dx2 = dloss
        ds2 = self.fc2.backward(self.s2) * dx2
        dx1, dw3, db3 = self.fc3.backward( ds2 , self.x1 )
        self.fc3.weights.grad.add_(dw3)
        self.fc3.bias.grad.add_(db3)
        
        ds1 = self.fc2.backward(self.s1) * dx1
        
        dx0, dw1, db1 = self.fc1.backward( ds1 , self.x0 )

        self.fc1.weights.grad.add_(dw1)
        self.fc1.bias.grad.add_(db1)
        
        return dx0

In [38]:
# Net 4 layers

Linear.Linear_counter = 0

class Net4( Module ):
    def __init__(self, hidden_layer):
        super(Net4, self).__init__()
        self.fc1 = Linear(2, 2*hidden_layer)
        self.fc2 = ReLU()
        self.fc3 = Linear(2*hidden_layer, hidden_layer)
        self.fc4 = Linear(hidden_layer, 1)
        self.init_parameters()
        
    def init_parameters ( self ):
        self.add_parameter( self.fc1.parameters() )
        self.add_parameter( self.fc2.parameters() )
        self.add_parameter( self.fc3.parameters() )
        self.add_parameter( self.fc4.parameters() )

    def forward(self, x):
        self.x0 = x
        x = self.fc1.forward( x )
        self.s1 = x
        x = self.fc2.forward( x )
        self.x1 = x
        x = self.fc3.forward( x )
        self.s2 = x
        x = self.fc2.forward( x )
        self.x2 = x
        x = self.fc4.forward( x )
        self.s3 = x
        x = self.fc2.forward( x )
        self.x3 = x
        return x
    
    def backward(self, dloss):
        dx3 = dloss
        ds3 = self.fc2.backward(self.s3) * dx3
        dx2, dw4, db4 = self.fc4.backward( ds3, self.x2 )
        self.fc4.weights.grad.add_(dw4)
        self.fc4.bias.grad.add_(db4)
        ds2 = self.fc2.backward(self.s2) * dx2
        dx1, dw3, db3 = self.fc3.backward( ds2 , self.x1 )
        self.fc3.weights.grad.add_(dw3)
        self.fc3.bias.grad.add_(db3)
        
        ds1 = self.fc2.backward(self.s1) * dx1
        
        dx0, dw1, db1 = self.fc1.backward( ds1 , self.x0 )

        self.fc1.weights.grad.add_(dw1)
        self.fc1.bias.grad.add_(db1)
        
        return dx0

In [39]:
def train_model(model, train_input, train_target, mini_batch_size, lr = 1e-1):
    nb_epochs = 30
    criterion = MSEloss()
    
    for e in range(0, nb_epochs):
        sum_loss = 0
        for b in range(0, train_input.size(0), mini_batch_size):
            output = model.forward(train_input.narrow(0, b, mini_batch_size))
            loss = criterion.forward(output, train_target.narrow(0, b, mini_batch_size))
            dloss = criterion.backward(output, train_target.narrow(0, b, mini_batch_size))
            sum_loss += loss
            model.zero_grad()
            dx0 = model.backward(dloss)
            model.optimizer(lr)
        print(e, sum_loss)
        #param = model.parameters()
        #print(param)

In [40]:
def compute_nb_errors(model, data_input, data_target, mini_batch_size):

    total_nb_errors = 0

    for b in range(0, data_input.size(0), mini_batch_size):
        output = model.forward(data_input.narrow(0, b, mini_batch_size))
        _, predicted_classes = torch.max(output, 1)
        for k in range(0, mini_batch_size):
            if data_target[b + k] != predicted_classes[k]:
                total_nb_errors += 1

    return total_nb_errors

In [42]:
train_input, train_target = generate_disc_set(1000)
test_input, test_target = generate_disc_set(1000)

mean, std = train_input.mean(), train_input.std()

train_input.sub_(mean).div_(std)
test_input.sub_(mean).div_(std);

mini_batch_size = 100

model = Sequential(Linear(2, 5), ReLU(), Linear(5, 10), ReLU(), Linear(10, 20), ReLU(), Linear(20, 50), 
                   ReLU(), Linear(50, 20), ReLU(), Linear(20, 10), ReLU(), Linear(10, 5), ReLU(), Linear(5, 1), ReLU())
train_model(model, train_input, train_target, mini_batch_size, 1e-4)
nb_test_errors = compute_nb_errors(model, test_input, test_target, mini_batch_size)
print(nb_test_errors)
print('test error Net {:0.2f}%% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                      nb_test_errors, test_input.size(0)))





0 112.51492991075429
1 109.21502580522792
2 107.01145547628403
3 105.53978169872425
4 104.55675716139376
5 103.89999046875164
6 103.46108913887292
7 103.16769059281796
8 102.97147973999381
9 102.8401978770271
10 102.75231356546283
11 102.69343763124198
12 102.6539566423744
13 102.62745896726847
14 102.60964890755713
15 102.59765906073153
16 102.58957846835256
17 102.58411366678774
18 102.5804095454514
19 102.57789393700659
20 102.57617613393813
21 102.57499998714775
22 102.57419175002724
23 102.57363052573055
24 102.57323851343244
25 102.57296390086412
26 102.57276741974056
27 102.57262703590095
28 102.57252726703882
29 102.57245516311377
129
test error Net 12.90%% 129/1000
