In [1]:
import numpy as np

# The Perceptron

## Architecture
- Input layer (+ bias node)
- A single layer of output neurons 
- Weights: $w_{i,j}$ connecting input $i$ to output $j$

## Activation Function
- Threshold/Step-function

## Error
- $E = (y_k - t_k)$, where $y_k$ and $t_k$ are the output of NN and the target value, respectively.

## Pseudocode
- Initialize the weights connecting input nodes to output nodes.

- Do while convergence criteria is not met (or for a specified number of iterations $T$):

    - For each input vector:
        1. Using the current value of the weights, compute the output $y_k$ for each output neuron.
        2. Update each of the weights via:
    $$w_{i,j} \gets w_{i,j} - \eta (y_j-t_j)x_i,$$
where $x_i$ represents the value of the $i$-th input node and $\eta$ is a pre-specified learning rate. (Note that in classification problems, only the weights connecting to incorrect outputs are updated.)

### Example: The Logical OR 
- \# input nodes = 2
- \# of output nodes = 1
- Weight matrix $\bf{W}$ is a $3\times 1$ matrix (i.e., 2 input nodes + bias, and one output node)

In [2]:
inputs = np.array([[0,0],[0,1],[1,0],[1,1]])
inputs = np.concatenate((inputs, np.ones(4).reshape(4,1)), axis=1) # add bias node
targets = np.array([0,1,1,1]).reshape(4,1)

In [3]:
# initialize weights (to small values, both positive and small) UNIF(a,b)
a,b = -0.25,0.25
weights = a + np.random.rand(inputs.shape[1], targets.shape[1])*(b-a)

In [4]:
weights

array([[ 0.03216293],
       [ 0.18454368],
       [-0.21558359]])

In [5]:
T = 6

In [6]:
# for i in range(T):
converged = False
i = 1
eta = 0.25
while not converged: 
    print('iteration {0}'.format(i))
    # calculate sums for the output nodes
    h = np.dot(inputs,weights)
    # calculate outputs for the output nodes
    y = np.where(h>0, 1, 0)
    weights -= eta*np.dot(inputs.T, (y-targets)) # update the weights
    converged = (y-targets == 0).all()
    i += 1
    print(y)

iteration 1
[[0]
 [0]
 [0]
 [1]]
iteration 2
[[1]
 [1]
 [1]
 [1]]
iteration 3
[[1]
 [1]
 [1]
 [1]]
iteration 4
[[0]
 [1]
 [1]
 [1]]


In [7]:
weights

array([[ 0.28216293],
       [ 0.43454368],
       [-0.21558359]])

## A Perceptron Class

In [8]:
class pcn:
    def __init__(self, inputs, targets, weights):
        self.inputs = inputs
        self.nData = len(inputs)
        self.nOut = targets.shape[1]
        self.inputs = np.concatenate((inputs, np.ones(4).reshape(4,1)), axis=1) # add bias node
        self.weights = weights
        self.targets = targets
        self.arch = (inputs.shape[1],self.nOut)
    
    def train(self, T=5, eta=0.25,printscn = False):
        """
        This method trains the Perceptron.
        """
        for i in range(T): 
            # calculate sums for the output nodes
            h = np.dot(self.inputs,self.weights)
            # calculate outputs for the output nodes
            y = np.where(h>0, 1, 0)
            self.weights -= eta*np.dot(inputs.T, (y-self.targets)) # update the weights
            if printscn:
                print('iteration {0}'.format(i))
                print(y)
                print(self.weights)
    
    def forward(self, newdata):
        """
        This method uses the current sets of weights, and outputs
        the firing of the output nodes when fed an input vector.
        """
        # calculate sums for the output nodes
        h = np.dot(newdata,self.weights)
        # calculate outputs for the output nodes
        y = np.where(h>0, 1, 0)
        return y

In [9]:
weights = a + np.random.rand(inputs.shape[1], targets.shape[1])*(b-a)

In [10]:
p = pcn(np.array([[0,0],[0,1],[1,0],[1,1]]),
       targets,
       weights)

In [11]:
p.train(printscn=True)

iteration 0
[[1]
 [0]
 [1]
 [0]]
[[ 0.26571436]
 [ 0.33999216]
 [ 0.33630541]]
iteration 1
[[1]
 [1]
 [1]
 [1]]
[[ 0.26571436]
 [ 0.33999216]
 [ 0.08630541]]
iteration 2
[[1]
 [1]
 [1]
 [1]]
[[ 0.26571436]
 [ 0.33999216]
 [-0.16369459]]
iteration 3
[[0]
 [1]
 [1]
 [1]]
[[ 0.26571436]
 [ 0.33999216]
 [-0.16369459]]
iteration 4
[[0]
 [1]
 [1]
 [1]]
[[ 0.26571436]
 [ 0.33999216]
 [-0.16369459]]


In [14]:
p.forward(inputs[3,:])

array([1])

In [15]:
targets

array([[0],
       [1],
       [1],
       [1]])

In [16]:
p.weights

array([[ 0.26571436],
       [ 0.33999216],
       [-0.16369459]])

In [17]:
p.nData

4

In [18]:
p.nOut

1

In [19]:
p.arch # (number of input nodes, number of output nodes)

(2, 1)