In [1]:
import tensorflow as tf
print("TensorFlow version:", tf.__version__)

TensorFlow version: 2.7.0


In [2]:
import numpy as np
import pandas as pd
import random

# Generate N data points within unit square and labels

In [13]:
def generate_data(N):
    x = np.random.uniform(0,1,N*2).reshape(N,2)
    y = (x[:,0] - x[:,1] > 0).astype(int)*2-1
    return x, y

In [42]:
x_train, y_train = generate_data(20)
x_test, y_test = generate_data(1000)

In [43]:
print(x_train, y_train)

[[0.67239282 0.10022841]
 [0.54381034 0.02878654]
 [0.14008126 0.77086849]
 [0.72636794 0.45263393]
 [0.07281047 0.65587833]
 [0.76551505 0.84246293]
 [0.53678763 0.30877629]
 [0.54106736 0.65185553]
 [0.21498941 0.75477042]
 [0.01514386 0.50858212]
 [0.75685352 0.07318384]
 [0.45006962 0.18971094]
 [0.96416723 0.0198879 ]
 [0.9395163  0.43102253]
 [0.99861812 0.70790419]
 [0.94477674 0.9629321 ]
 [0.6472921  0.22963197]
 [0.90021079 0.93735798]
 [0.58161061 0.40604057]
 [0.87920545 0.04011996]] [ 1  1 -1  1 -1 -1  1 -1 -1 -1  1  1  1  1  1 -1  1 -1  1  1]


# Perceptron algorithm
-  Nerual Network without hidden layer.
-  The activation function of the output layer is the sign function. ($y_i \in \{-1,1\}$, $\hat{y}_i = sign(W \cdot X_i)\in\{-1,1\}$)
-  Loss functions
    -  Perceptron criterion loss: $L_i = max\{-y_i(W \cdot X_i),0\}$, $\partial L_i/\partial w_j = \begin{cases}
                                                                                                    0 &\text{if $y_i = \hat{y}_i$} \\
                                                                                                    -y_iX_{ij} &\text{otherwise}
                                                                                                    \end{cases} = -\frac{1}{2}(y_i - \hat{y}_i)X_{ij}$                                                                        
       Downside: You can always set $W$ to zero to minimize the loss.
    -  Hinge loss: $L_i = max\{1-y_i(W \cdot X_i),0\}$, $\partial L_i/\partial w_j = \begin{cases}
                                                                                                    0 &\text{if $y_i(W \cdot X_i)>1$} \\
                                                                                                    -y_iX_{ij} &\text{otherwise}
                                                                                                    \end{cases}$            

In [76]:
class Perceptron:
    def __init__(self, d, alpha=0.1):
        self.W = np.random.uniform(-1,1,d)
        self.alpha= alpha
    
    def predict(self, x):
        return np.sign(np.dot(x, self.W))
    
    def fit(self, X, y, batch_size = 1, epoch = 100, loss='perceptron'):
        N = len(y)
        Nb = round(N*batch_size)
        for _ in range(epoch):
            idlist = np.random.randint(0,N,size=Nb)
            grad = 0
            for i in idlist:
                xi = X[idlist[i]]
                yi = y[idlist[i]]
                if loss == 'perceptron':
                    if  yi*(np.dot(xi, self.W))<0:
                        grad += -yi*xi
                elif loss == 'hinge':
                    if  yi*(np.dot(xi, self.W))<1:
                        grad += -yi*xi
            self.W = self.W - self.alpha*grad

In [86]:
model = Perceptron(len(x_train[0]))
model.fit(x_train,y_train,epoch=100)

In [87]:
print("Train accuracy:", (model.predict(x_train) == y_train).mean())
print("Test accuracy:", (model.predict(x_test) == y_test).mean())

Train accuracy: 1.0
Test accuracy: 0.87


In [88]:
model2 = Perceptron(len(x_train[0]))
model2.fit(x_train,y_train,epoch=100, loss='hinge')

In [89]:
print("Train accuracy:", (model2.predict(x_train) == y_train).mean())
print("Test accuracy:", (model2.predict(x_test) == y_test).mean())

Train accuracy: 1.0
Test accuracy: 0.941
