In [1]:
import tensorflow as tf
print("TensorFlow version:", tf.__version__)

TensorFlow version: 2.7.0


In [2]:
import numpy as np
import pandas as pd
import random

# Generate N data points within unit square and labels

In [3]:
def generate_data(N):
    x = np.random.uniform(0,1,N*2).reshape(N,2)
    y = (x[:,0] - x[:,1] > 0).astype(int)*2-1
    return x, y

In [4]:
x_train, y_train = generate_data(20)
x_test, y_test = generate_data(1000)

In [5]:
print(x_train, y_train)

[[0.53418224 0.71668829]
 [0.9103246  0.89357462]
 [0.05721559 0.43756385]
 [0.88444044 0.12768854]
 [0.70402733 0.49461694]
 [0.85764027 0.41277816]
 [0.39609031 0.34649127]
 [0.31938538 0.7385899 ]
 [0.49334731 0.51231368]
 [0.82488302 0.95868036]
 [0.16444687 0.29322307]
 [0.22465656 0.70204661]
 [0.97821809 0.7197862 ]
 [0.28002307 0.72276618]
 [0.47190302 0.16425309]
 [0.16136981 0.44112853]
 [0.53413983 0.64966776]
 [0.16076358 0.28698629]
 [0.69718786 0.14790561]
 [0.13659892 0.16050024]] [-1  1 -1  1  1  1  1 -1 -1 -1 -1 -1  1 -1  1 -1 -1 -1  1 -1]


# Perceptron algorithm
-  Nerual Network without hidden layer.
-  The activation function of the output layer is the sign function. ($y_i \in \{-1,1\}$, $\hat{y}_i = sign(W \cdot X_i)\in\{-1,1\}$)
-  Loss functions
    -  Perceptron criterion loss: $L_i = max\{-y_i(W \cdot X_i),0\}$, $\partial L_i/\partial w_j = \begin{cases}
                                                                                                    0 &\text{if $y_i = \hat{y}_i$} \\
                                                                                                    -y_iX_{ij} &\text{otherwise}
                                                                                                    \end{cases} = -\frac{1}{2}(y_i - \hat{y}_i)X_{ij}$                                                                        
       Downside: You can always set $W$ to zero to minimize the loss.
    -  Hinge loss: $L_i = max\{1-y_i(W \cdot X_i),0\}$, $\partial L_i/\partial w_j = \begin{cases}
                                                                                                    0 &\text{if $y_i(W \cdot X_i)>1$} \\
                                                                                                    -y_iX_{ij} &\text{otherwise}
                                                                                                    \end{cases}$            

In [7]:
class Perceptron:
    def __init__(self, d, alpha=0.1):
        self.W = np.random.uniform(-1,1,d)
        self.alpha= alpha
    
    def predict(self, x):
        return np.sign(np.dot(x, self.W))
    
    def fit(self, X, y, batch_size = 1, epoch = 100, loss='perceptron'):
        N = len(y)
        Nb = round(N*batch_size)
        for _ in range(epoch):
            idlist = np.random.randint(0,N,size=Nb)
            grad = 0
            for i in idlist:
                xi = X[idlist[i]]
                yi = y[idlist[i]]
                if loss == 'perceptron':
                    if  yi*(np.dot(xi, self.W))<0:
                        grad += -yi*xi
                elif loss == 'hinge':
                    if  yi*(np.dot(xi, self.W))<1:
                        grad += -yi*xi
            self.W = self.W - self.alpha*grad

In [8]:
model = Perceptron(len(x_train[0]))
model.fit(x_train,y_train,epoch=100)

In [9]:
print("Train accuracy:", (model.predict(x_train) == y_train).mean())
print("Test accuracy:", (model.predict(x_test) == y_test).mean())

Train accuracy: 1.0
Test accuracy: 0.984


In [10]:
model2 = Perceptron(len(x_train[0]))
model2.fit(x_train,y_train,epoch=100, loss='hinge')

In [11]:
print("Train accuracy:", (model2.predict(x_train) == y_train).mean())
print("Test accuracy:", (model2.predict(x_test) == y_test).mean())

Train accuracy: 1.0
Test accuracy: 1.0
