In [1]:
# Will Hollingsworth, Colton Murray, Alexander Shiveley

In [2]:
import numpy as np
import matplotlib.pyplot as plt

# Getting the data into Python

In [3]:
# Load the csv as a numpy array of strings, 
# because it includes the column headers
raw_data = np.loadtxt('data_banknote_authentication.txt', delimiter=',', dtype=str)

# Update output class values of 0 to -1
def apply_mapping(row):
    row[4] = -1 if row[4] == 0 else 1
    return row
converted = np.apply_along_axis(apply_mapping, 1, raw_data)

# Convert everything into floats!
clean_data = np.array(converted, dtype=float)

In [4]:
def get_sets(data, split):
    """
    Convenience function that randomly selects a training and test set from the input data.
    
    :param data: (ndarray) the data you want to split
    :param split: (float array) the percentages of the data you want to be TRAINING, VALIDATION, and TESTING data
    
    :returns: (tuple) a tuple where the first element is the training set, and the second element is the test set
    """
    # Randomly shuffle the order from a copy of the data
    shuffled = data.copy()
    np.random.shuffle(shuffled)

    row_count = data.shape[0]

    # calc the number of samples, assumes the input samples are seperated by row
    training_count = round(row_count * split[0])
    
    training_set = shuffled[:training_count]
    remaining_set = shuffled[training_count:]
    
    # calc the number of samples, assumes the input samples are seperated by row
    training_count = round(row_count * split[1] / (split[1] + split[2]))
    
    validation_set = remaining_set[:training_count]
    test_set = remaining_set[training_count:]
    
    return training_set, validation_set, test_set

In [23]:
training, validation, test = get_sets(clean_data, [1/3, 1/3, 1/3])

# Perceptron (2 Input Linear Unit)

In [45]:
class linear_unit():
    def __init__(self, num_inputs):
        self.weights = np.random.rand(num_inputs + 1)
        
    def output(self, data_point):
        """
        Returns the linear combination of the input and this unit's weights
        """
        # total = w0*1 + w1x1 + w2x2 + ...
        data_with_bias = np.hstack((np.array([1]), data_point[:-1]))
        t = data_with_bias * self.weights
        t = np.sum(t)
        
        return t     

## Neural Network

In [51]:
class neural_net():
    def __init__(self, num_inputs, num_hidden, num_outputs, squash_f, squash_d_f):
        self.hidden_units = []
        for _ in range(num_hidden):
            self.hidden_units.append(linear_unit(num_inputs))
        self.output_units = []
        for _ in range(num_outputs):
            self.output_units.append(linear_unit(num_hidden))
        self.squash_fn = squash_f
        self.squash_d_fn = squash_d_f    
        
    def output(self, data_point):
        """
        Returns the array of outputs from the output units
        """            
        
        return output_at_layer(data_point, 1)   
    
    def output_at_layer(self, data_point, layer):
        
        """
        Returns the array of outputs from the units on the specified layer
        0 - hidden
        1 - output
        """
        ## Calculate squashed hidden outputs, with bias
        hidden_out = [1]
        for unit in self.hidden_units:
            hidden_out.append(self.squash_fn(unit.output(data_point)))
        
        if layer == 0:
            return hidden_out
            
        ## Calculate squashed outputs using hidden outputs
        out = []
        for unit in self.output_units:
            out.append(self.squash_fn(unit.output(hidden_out)))    
            
        return out
        
        
    def error(self, data):
        """
        Returns the sum squared error of the data using this network's output units
        """
        sum = 0
        for d in data:
            for out in self.output(d):
                o = 1 if out > 0 else -1
                sum = sum + (d[2] - o)**2
        return sum

## Squashing Functions

In [47]:
# Sigmoid function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Sigmoid derivative function
def sigmoid_d(x):
    return sigmoid(x) * (1 - sigmoid(x))

# Hyperbolic tan function
def tanh(x):
    return np.tanh(x)

# Hyperbolic tan derivative function
def tanh_d(x):
    return 1 - tanh(x)**2

In [48]:
net = neural_net(4, 4, 1, sigmoid, sigmoid_d)
print(training[0])
print(net.output(training[0]))
print(net.error(training))

[ 3.4893   6.69    -1.2042  -0.38751  1.     ]
[0.8967325518619476]
8725.838658222196


## Backpropagation

In [49]:
def backpropagation(squash_f, squash_d_f, learning_rate, epochs):    
    """
    Find the network with the lowest error based on the number of hidden units after being trained with backpropagation
    """
    best_network = None
    for h in range(4, 0, -1):
        # Step 1 Initialize network
        network = neural_net(4, h, 1, squash_f, squash_d_f)
        
        # TODO Train network and do the backpropagation
        # From NN-MitchelChapter4-2 on canvas
        # Alex - Started on it but need to be careful with tanh. The notes on canvas works out sigmoid in detail but not tanh
        #           Using a more generalized form of the equations for the algorithm in 1.2
        
        for epoch in range(epochs):
            for d in training:
                d_with_bias = np.hstack((np.array([1]), d[:-1]))
                hidden_out = network.output_at_layer(d, 0)
                
                # Step 2 but replacing specific sigmoid derivative with the squash derivative
                delta_out = []
                for unit in network.output_units:
                    unit_out = unit.output(hidden_out)
                    unit_delta = squash_d_f(unit_out) * (d[4] - squash_f(unit_out))
                    delta_out.append(unit_delta)

                # Step 3 but replacing specific sigmoid derivative with the squash derivative
                delta_hidden = []
                for unit in network.hidden_units:
                    unit_out = unit.output(d_with_bias)
                    unit_delta = 0 # TODO squash derivative * sum(weights * delta_out)
                    delta_hidden.append(unit_delta)

                # Step 4 update weights
                # TODO
        
        # If this network has lower error from validation set, set as best
        if best_network is None or network.error(validation) < best_network.error(validation):
            best_network = network
            
    return best_network

In [50]:
# TODO Random learning rate and epochs. I don't know what to use
best_net = backpropagation(sigmoid, sigmoid_d, 0.01, 50)
print(training[0])
print(best_net.output(training[0]))
print(best_net.error(training))

ValueError: operands could not be broadcast together with shapes (5,) (4,) 