In [36]:
import math
import torch
from torch import FloatTensor

NAME_INDEX = 0
DATA_INDEX = 1
GRAD_INDEX = 2

In [47]:
def generate_disc_set(nb):
    # Generates data and corresponding target
    disk_center = FloatTensor(nb, 2).fill_(0.5)
    input = FloatTensor(nb, 2).uniform_(0, 1)
    target = torch.norm(input.sub(disk_center), 2, 1) < math.sqrt(1/(2*math.pi))
    target = torch.eye(2).index_select(0, target.long())
    return input, target.float()

In [48]:
class Parameter():
    # class used to create all the parameters in the same way
    
    def __init__(self, name, tensor, gradient):
        self.name = name       # name of the parameter
        self.data = tensor     # parameter values
        self.grad = gradient   # gradient of the parameter

In [49]:
class Module ( Parameter ) :
    # base module that the following class inherits
    
    def __init__(self):
        super(Parameter, self).__init__()
        self.name = 'Base Module'  # name of the module
        self.param = []            # contains all the parameters of the Module
        
    def forward (self, * input) :
        # Computes the forward pass of the Module.
        # Need to be implemented in future class if one wants to use it.
        raise NotImplementedError
        
    def backward (self , * gradwrtoutput) :
        # Computes the backward pass of the Module for the backpropagation of the loss.
        # Need to be implemented in future class if one wants to use it.
        raise NotImplementedError
        
    def init_parameters ( self ):
        # Initialize the proper parameters for the Module.
        # Need to be implemented in future class if one wants to use it.
        raise NotImplementedError
        
    def add_parameter(self, parameter):
        # Adds the input parameter to the already existing parameters of the Module.
        # Different implementation if the input parameter is:
            # - an object of class 'Parameter'(when you initialize parameters in the current module)
            # - a list of objects(when you add all the parameters of a module to another one)
        if parameter.__class__.__name__ == 'Parameter':
            self.param.append((parameter.name, parameter.data, parameter.grad))
        elif parameter.__class__.__name__ == 'list':                        
            if parameter != []:
                self.param.append(parameter)
                    
    def zero_grad( self ):
        # Reset the gradient of the parameters to 0
        for i in range(len(self.param)):            # loop on the different Module initialized in the 'self' Module
            for j in range(len(self.param[i])):     # loop on the parameters of each Module
                self.param[i][j][GRAD_INDEX][:] = 0
                    
    def optimizer (self, lr = 1e-5):
        # Stochastic Gradient descent
        # updates the parameters in regard of their gradient and the input learning rate
        for i in range(len(self.param)):           # loop on the different Module initialized in the 'self' Module
            for j in range(len(self.param[i])):    # loop on the parameters of each Module
                self.param[i][j][DATA_INDEX][:] -= lr * self.param[i][j][GRAD_INDEX][:]
                
    def parameters ( self ):
        # returns the all parameters of the Module
        return self.param

In [56]:
class MSEloss( Module ):
    # Compute the Mean Square Error between the given input and target
    # no parameters needed
    
    def __init__(self):
        super(MSEloss, self).__init__()
        
    def forward (self, input, target):
        return input.sub(target).pow(2).sum() 
    
    def backward (self, input, target):
        return 2*(input.sub(target))    

In [57]:
class ReLU( Module ):
    # Activation functions: Rectified Linear Unit on each element of the input
    # no parameters needed
    
    def __init__(self):
        super(ReLU, self).__init__()
        self.name = 'ReLU'
        
    def forward(self, input):
        return input.clamp(min = 0)
    
    def backward(self, input, dx):
        input[input <= 0] = 0
        input[input > 0] = 1
        return input * dx

In [58]:
# Module Tanh

class Tanh( Module ):
    # Activation functions: Hyperbolic tangent of each element of the input
    # no parameters needed
    
    def __init__(self):
        super(Tanh, self).__init__()
        self.name = 'Tanh'
        
    def forward(self, input):
        return input.tanh()
    
    def backward(self, input, dx):
        return 4 * (input.exp() + input.mul(-1).exp()).pow(-2) * dx

In [59]:
class Linear( Module ):
    # Linear transformation with certain weights and bias
    
    Linear_counter = 0 # counter of the number of Linear module created in order to name properly the parameters
    
    def __init__(self, input_features, output_features, eps = 1e-2):
        super(Linear, self).__init__()
        self.name = 'Linear'
        Linear.Linear_counter += 1        
        self.init_parameters(input_features, output_features, eps)
        
    def init_parameters (self, input_features, output_features, eps):
        # initializes the weight parameters with a normal distribution and set their gradient to 0
        weigths_name = f'weights{self.Linear_counter}'
        self.weights = Parameter(weigths_name, torch.Tensor(input_features, output_features),
                                 torch.Tensor(input_features, output_features))
        self.weights.data.normal_(0, eps)
        self.weights.grad.zero_()
        
        # initializes the bias parameters with a normal distribution and set their gradient to 0
        bias_name = f'bias{self.Linear_counter}'
        self.bias = Parameter(bias_name, torch.Tensor(output_features), torch.Tensor(output_features))  
        self.bias.data.normal_(0, eps)
        self.bias.grad.zero_()
        
        # adds the weight and bias parameters to this Module
        self.add_parameter(self.weights)
        self.add_parameter(self.bias)
        
    def forward(self, input):
        output = input.mm(self.weights.data)
        output += self.bias.data
        return output
    
    def backward(self, input, ds):
        dx = ds.mm(self.weights.data.t())
        dw = input.t().mm(ds)
        db = ds.t().mm(torch.ones(ds.size(0), 1))
        
        self.weights.grad.add_(dw) # updates the gradient of the parameters with the computed value
        self.bias.grad.add_(db)
        
        return dx

In [60]:
class Sequential( Module ):
    # Sequential container of Modules object.
    # The order is given by the user when creating the object of class Sequential
    
    def __init__(self, *args):
        super(Sequential, self).__init__()
        Linear.Linear_counter = 0               # resets the number of Linear for each sequential object
        self.module_nb = len(args)
        self.fc = [None] * self.module_nb       # contains the modules object in order
        self.x = [None] * (self.module_nb + 1)  # contains the input of the model 
                                                # and the intermediate results after the forward pass of each module 
        
        for id, module in enumerate(args):      # fills the list containing the modules and adds their parameters to
            self.fc[id] = module                # the sequential 'self' module
            self.init_parameters(id)
        
    def init_parameters (self, id):
        self.add_parameter(self.fc[id].parameters())
        
    def forward(self, input):                   # execute the forward pass of each module in the order given by the user
        self.x[0] = input                       # fills the list with the intermediate output of the forward
                                                # pass of each module 
        for i in range(1, self.module_nb + 1):  
            self.x[i] = self.fc[i-1].forward(self.x[i-1])
        return self.x[self.module_nb]
    
    def backward(self, dloss):                  # execute the backward pass of each module in the inverse order
        dx = [None] * (self.module_nb + 1)      # compared to the forward pass
        dx[self.module_nb] = dloss
        
        for i in range(1, self.module_nb + 1):
            j = self.module_nb - i
            dx[j] = self.fc[j].backward(self.x[j], dx[j+1])
            
        return dx

In [61]:
# Net 4 layers

Linear.Linear_counter = 0

class Net4( Module ):
    def __init__(self, hidden_layer):
        super(Net4, self).__init__()
        self.fc1 = Linear(2, 2*hidden_layer)
        self.fc2 = ReLU()
        self.fc3 = Linear(2*hidden_layer, hidden_layer)
        self.fc4 = Linear(hidden_layer, 1)
        self.init_parameters()
        
    def init_parameters ( self ):
        self.add_parameter( self.fc1.parameters() )
        self.add_parameter( self.fc2.parameters() )
        self.add_parameter( self.fc3.parameters() )
        self.add_parameter( self.fc4.parameters() )

    def forward(self, x):
        self.x0 = x
        x = self.fc1.forward( x )
        self.s1 = x
        x = self.fc2.forward( x )
        self.x1 = x
        x = self.fc3.forward( x )
        self.s2 = x
        x = self.fc2.forward( x )
        self.x2 = x
        x = self.fc4.forward( x )
        self.s3 = x
        x = self.fc2.forward( x )
        self.x3 = x
        return x
    
    def backward(self, dloss):
        dx3 = dloss
        ds3 = self.fc2.backward(self.s3) * dx3
        dx2, dw4, db4 = self.fc4.backward( ds3, self.x2 )
        self.fc4.weights.grad.add_(dw4)
        self.fc4.bias.grad.add_(db4)
        ds2 = self.fc2.backward(self.s2) * dx2
        dx1, dw3, db3 = self.fc3.backward( ds2 , self.x1 )
        self.fc3.weights.grad.add_(dw3)
        self.fc3.bias.grad.add_(db3)
        
        ds1 = self.fc2.backward(self.s1) * dx1
        
        dx0, dw1, db1 = self.fc1.backward( ds1 , self.x0 )

        self.fc1.weights.grad.add_(dw1)
        self.fc1.bias.grad.add_(db1)
        
        return dx0

In [62]:
def train_model(model, criterion, train_input, train_target, mini_batch_size = 100, lr = 1e-1, nb_epochs = 250):
    # trains the given model 'nb_epochs' times and prints the loss for each iteration in the end
    
    sum_loss = FloatTensor(nb_epochs).zero_()                     # contains the loss after each iteration
    
    for e in range(0, nb_epochs):
        for b in range(0, train_input.size(0), mini_batch_size):  # divides the input data into batches
            # input go through the model
            output = model.forward(train_input.narrow(0, b, mini_batch_size))
            
            # loss and dloss computation to train the model and visualize it
            loss = criterion.forward(output, train_target.narrow(0, b, mini_batch_size))
            sum_loss[e] += loss
            dloss = criterion.backward(output, train_target.narrow(0, b, mini_batch_size))
            
            # reset the gradient before the backpropagation of the loss by gradient descent
            # in order to update the parameters (learning phase)
            model.zero_grad()
            model.backward(dloss)
            model.optimizer(lr)
            
        nb_test_errors = compute_nb_errors(model, test_input, test_target, mini_batch_size)
        nb_train_errors = compute_nb_errors(model, train_input, train_target, mini_batch_size)
        print('test error Net {:0.2f}%% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                      nb_test_errors, test_input.size(0)))
        print('train error Net {:0.2f}%% {:d}/{:d}'.format((100 * nb_train_errors) / train_input.size(0),
                                                      nb_train_errors, train_input.size(0)))
            
        print(e, sum_loss[e])

In [65]:
def compute_nb_errors(model, data_input, data_target, mini_batch_size = 100):
    # computes the number of datapoint wrongly classified by the model
    
    total_nb_errors = 0
    
    for b in range(0, data_input.size(0), mini_batch_size):
        
        output = model.forward(data_input.narrow(0, b, mini_batch_size))   # the predicted class is the column of the output with the greater value
        #print(output)
        _, predicted_classes = torch.max(output, 1)                        # the predicted class 
        #print(predicted_classes)
        _, target_class = torch.max(data_target, 1)
        
        for k in range(0, mini_batch_size): 
            if target_class[b + k] != predicted_classes[k]:
                total_nb_errors += 1

    return total_nb_errors

In [69]:
# Main Function

# Computing of the train and test set
train_input, train_target = generate_disc_set(1000)
test_input, test_target = generate_disc_set(1000)
mean, std = train_input.mean(), train_input.std()
train_input.sub_(mean).div_(std)
test_input.sub_(mean).div_(std);

# Training parameters
mini_batch_size = 100
nb_epochs = 250

# initilization of the model and criterion
model = Sequential(Linear(2, 25, 1), ReLU(), Linear(25, 25, 1), ReLU(), Linear(25, 2, 1))
criterion = MSEloss()

# training of the model
train_model(model, criterion, train_input, train_target, mini_batch_size, 1e-5, nb_epochs)

# computing of the number of error
nb_test_errors = compute_nb_errors(model, test_input, test_target, mini_batch_size)
nb_train_errors = compute_nb_errors(model, train_input, train_target, mini_batch_size)

# visualization
print(nb_test_errors)
print('test error Net {:0.2f}%% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                      nb_test_errors, test_input.size(0)))
print('train error Net {:0.2f}%% {:d}/{:d}'.format((100 * nb_train_errors) / train_input.size(0),
                                                      nb_train_errors, train_input.size(0)))




test error Net 57.00%% 570/1000
train error Net 57.70%% 577/1000
0 80237.4296875
test error Net 58.40%% 584/1000
train error Net 60.60%% 606/1000
1 13055.748046875
test error Net 54.70%% 547/1000
train error Net 55.90%% 559/1000
2 6189.01904296875
test error Net 51.30%% 513/1000
train error Net 55.10%% 551/1000
3 4117.88427734375
test error Net 49.40%% 494/1000
train error Net 52.80%% 528/1000
4 3299.1142578125
test error Net 47.00%% 470/1000
train error Net 50.00%% 500/1000
5 2834.868408203125
test error Net 44.70%% 447/1000
train error Net 47.00%% 470/1000
6 2515.505126953125
test error Net 41.00%% 410/1000
train error Net 43.80%% 438/1000
7 2274.06494140625
test error Net 38.30%% 383/1000
train error Net 41.50%% 415/1000
8 2084.276611328125
test error Net 34.90%% 349/1000
train error Net 37.80%% 378/1000
9 1930.0452880859375
test error Net 32.10%% 321/1000
train error Net 35.00%% 350/1000
10 1802.9315185546875
test error Net 30.00%% 300/1000
train error Net 32.80%% 328/1000
11 1694.

test error Net 12.00%% 120/1000
train error Net 11.30%% 113/1000
105 402.9253234863281
test error Net 12.00%% 120/1000
train error Net 11.20%% 112/1000
106 400.4118347167969
test error Net 12.00%% 120/1000
train error Net 11.20%% 112/1000
107 397.7730712890625
test error Net 11.80%% 118/1000
train error Net 11.20%% 112/1000
108 395.216064453125
test error Net 11.80%% 118/1000
train error Net 11.20%% 112/1000
109 392.7259826660156
test error Net 11.70%% 117/1000
train error Net 11.20%% 112/1000
110 390.20025634765625
test error Net 11.60%% 116/1000
train error Net 11.10%% 111/1000
111 387.7444763183594
test error Net 11.40%% 114/1000
train error Net 11.10%% 111/1000
112 385.33978271484375
test error Net 11.40%% 114/1000
train error Net 10.90%% 109/1000
113 382.99163818359375
test error Net 11.30%% 113/1000
train error Net 10.80%% 108/1000
114 380.6382751464844
test error Net 11.30%% 113/1000
train error Net 10.70%% 107/1000
115 378.26318359375
test error Net 11.20%% 112/1000
train error

test error Net 9.00%% 90/1000
train error Net 6.70%% 67/1000
205 268.7044982910156
test error Net 9.00%% 90/1000
train error Net 6.60%% 66/1000
206 268.0622863769531
test error Net 9.00%% 90/1000
train error Net 6.60%% 66/1000
207 267.42523193359375
test error Net 9.00%% 90/1000
train error Net 6.60%% 66/1000
208 266.7952880859375
test error Net 9.00%% 90/1000
train error Net 6.50%% 65/1000
209 266.1618347167969
test error Net 9.00%% 90/1000
train error Net 6.50%% 65/1000
210 265.5461730957031
test error Net 9.00%% 90/1000
train error Net 6.50%% 65/1000
211 264.9281311035156
test error Net 9.00%% 90/1000
train error Net 6.50%% 65/1000
212 264.3302917480469
test error Net 8.90%% 89/1000
train error Net 6.50%% 65/1000
213 263.73614501953125
test error Net 8.90%% 89/1000
train error Net 6.50%% 65/1000
214 263.15435791015625
test error Net 8.90%% 89/1000
train error Net 6.50%% 65/1000
215 262.6260070800781
test error Net 8.80%% 88/1000
train error Net 6.50%% 65/1000
216 262.0260925292969
t

-0.5

In [83]:
######################################################################

import torch
import math

from torch import optim
from torch import Tensor
from torch.autograd import Variable
from torch import nn

######################################################################

def generate_disc_set(nb):
    input = Tensor(nb, 2).uniform_(0, 1)
    target = torch.norm(input, 2, 1) < math.sqrt(1/(2*math.pi))
    return input, target

train_input, train_target = generate_disc_set(1000)
test_input, test_target = generate_disc_set(1000)

mean, std = train_input.mean(), train_input.std()

train_input.sub_(mean).div_(std)
test_input.sub_(mean).div_(std)

train_input, train_target = Variable(train_input), Variable(train_target)
test_input, test_target = Variable(test_input), Variable(test_target)

mini_batch_size = 100

######################################################################

def train_model(model, train_input, train_target):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr = 1e-1)
    nb_epochs = 250

    for e in range(0, nb_epochs):
        for b in range(0, train_input.size(0), mini_batch_size):
            output = model(train_input.narrow(0, b, mini_batch_size))
            loss = criterion(output, train_target.narrow(0, b, mini_batch_size))
            model.zero_grad()
            loss.backward()
            optimizer.step()

######################################################################

def compute_nb_errors(model, data_input, data_target):

    nb_data_errors = 0

    for b in range(0, data_input.size(0), mini_batch_size):
        output = model(data_input.narrow(0, b, mini_batch_size))
        print(output)
        _, predicted_classes = torch.max(output.data, 1)
        for k in range(0, mini_batch_size):
            if data_target.data[b + k] != predicted_classes[k]:
                nb_data_errors = nb_data_errors + 1

    return nb_data_errors

######################################################################

def create_shallow_model():
    return nn.Sequential(
        nn.Linear(2, 128),
        nn.ReLU(),
        nn.Linear(128, 2)
    )

######################################################################

model = create_shallow_model()
for p in model.parameters(): p.data.normal_(0, 1e-1)

train_model(model, train_input, train_target)

print('std {:f} train_error {:.02f}% test_error {:.02f}%'.format(
        std,
        compute_nb_errors(model, train_input, train_target) / train_input.size(0) * 100,
        compute_nb_errors(model, test_input, test_target) / test_input.size(0) * 100
    )
    )

######################################################################

RuntimeError: Expected object of type Variable[torch.LongTensor] but found type Variable[torch.ByteTensor] for argument #1 'target'