# Exercise 2
Reinforcement Learning Course, 2018

Go through the code first to see what is going on. The first section reads the data and prepares the training, test and validation split

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import csv
%matplotlib inline

Load file and split input from target

Dataset from:
https://archive.ics.uci.edu/ml/datasets/Airfoil+Self-Noise

In [None]:
with open("airfoil_self_noise.dat","r") as file:
    csvreader = csv.reader(file, delimiter='\t')
    table = np.asarray([row for row in csvreader], dtype=np.float)
np.set_printoptions(suppress=True) # do not show scientific notation
print(table.shape)
print(table[:3,:])

normalize data and keep normalizing factors

In [None]:
xs = table[:,0:-1]
x_mean, x_std = np.mean(xs), np.std(xs)
xs = (xs - x_mean)/x_std
ys = table[:,[-1]]
y_mean, y_std = np.mean(ys), np.std(ys)
ys = (ys - y_mean)/y_std

In [None]:
print(xs.shape,ys.shape)
print(ys[:3])

This function splits data into random train, val, test  (attention, we used a fixed seed here)

In [None]:
def splitDataSetShuffle(inputs, outputs,percent_val_test=10, seed=1):
    assert len(inputs) == len(outputs)
    size = len(inputs)
    np.random.seed(seed)
    shuffle = np.random.permutation(size)
    inps = np.asarray(inputs)[shuffle]
    outs = np.asarray(outputs)[shuffle]
    ts = size * (100-2*percent_val_test) // 100
    vs = size * percent_val_test // 100
    train_set = (inps[:ts], outs[:ts])
    valid_set = (inps[ts:ts + vs], outs[ts:ts + vs])
    test_set = (inps[ts + vs:], outs[ts + vs:])
    return train_set, valid_set, test_set

In [None]:
train_set, valid_set, test_set = splitDataSetShuffle(xs,ys)

In [None]:
print(train_set[0].shape, valid_set[0].shape, test_set[0].shape)
print(train_set[1].shape, valid_set[1].shape, test_set[1].shape)

# 2 layer neural network

This is our simple Neural network implementation. 

In [None]:
class simpleNN(object):
    def __init__(self, input_dim, output_dim, num_hidden_units, seed=2):        
        # initialize weight matrices and bias vectors
        np.random.seed(seed)
        weight_variance =  2.0/(num_hidden_units + input_dim) # Xavier initialization
        self.W1 = np.random.randn(input_dim, num_hidden_units) * weight_variance
        self.b1 = np.zeros(num_hidden_units)
        weight_variance =  2.0/(output_dim + num_hidden_units)
        self.W2 = np.random.randn(num_hidden_units, output_dim) * weight_variance
        self.b2 = np.zeros(output_dim)        
        self.layer1 = None
        
    def evaluate(self, x):                    
        self.layer1 = np.tanh(np.dot(x, self.W1) + self.b1)
        return np.dot(self.layer1, self.W2) + self.b2        
    
    def train_with_square_loss(self, X, y, epsilon=0.001):
        # implement this function
        # X is the matrix of inputs of shape (num_examples, dimension)
        # y is the vector of target outputs (num_examples, 1)
        # loss function: L = Sum_i(f(x_i) - y)_i^2        
        
        # it can be useful to use the evaluate function in here
        
        # return prediction error during training        
        return pred_error
                
    def error(self, X, y):
        pred = self.evaluate(X)
        return np.mean((pred - y)**2)
        

Here we create an instance with 20 hidden units

In [None]:
net = simpleNN(train_set[0].shape[1], 1, 20)

initial error

In [None]:
net.error(valid_set[0],valid_set[1])

## Perform training using gradient decent (using all of the training data at once)

In [None]:
train_errors = []
val_errors = []

In [None]:
for i in range(10000):    
    train_errors.append(net.train_with_square_loss(train_set[0],train_set[1], 0.00005))    
    # add here also the validation errors
    

In [None]:
plt.plot(train_errors, color='g', linewidth=2)
# plt.plot(val_errors, color='b', linestyle='--', linewidth=2)
plt.xlabel("steps")
plt.ylabel("error")

Final performance on all the datasets

In [None]:
print(net.error(*train_set),net.error(*valid_set),net.error(*test_set))

## perform stochastic gradient descent (SGD) (with minibatches)

Create new instance of the network and perform training with mini-batches of size 16 with learning rate around 0.0002. You will need more update steps.

Evaluation the validation error every 100 batches and make the same calculation as above

The training curve will be very noise. Try to plot a smoothed version.

## use a larger network

Do the same thing with 150 hidden units

### Regularization

If you managed to incorporate the regularization, you can compare weights with and without it. You can use these matrix visualizations:

In [None]:
plt.imshow(net.W1, interpolation="nearest")

In [None]:
plt.imshow(net.W2.T,interpolation="nearest")