In [1]:
import os
import pandas as pd
import numpy as np
from pathlib import Path
from tqdm.notebook import tqdm
from sklearn.metrics import accuracy_score


CUR_DIR = Path(os.getcwd())
CSV_PATH = CUR_DIR / Path("data/scaled.csv")

In [2]:
class CustomPerceptron():
    def __init__(self, lr=0.001, epoch=1000, feature_num=None):
        self.w = np.ones(feature_num) if feature_num else None
        self.b = 0
        self.l_rate = lr
        self.epoch = epoch

    def sign(self,y):
        return -1 if y <= 0 else 1
    
    def fit(self,x_train,y_train):
        if not self.w:
            self.w = np.zeros(x_train.shape[1])
        self.best_acc = -1
        for _ in tqdm(range(self.epoch),total=self.epoch):
            for i in range(len(x_train)):
                xi = x_train[i,:]
                yi = y_train[i]
                yi_hat = self._predict(xi)
                if yi * yi_hat != 1:
                    self.w += self.l_rate * yi * xi
                    self.b += self.l_rate * yi
            y_pred = self.predict(x_train)
            if self.best_acc < (tmp:=accuracy_score(y_train,y_pred)):
                self.best_acc = tmp

    def _calculate(self, x):
        return np.dot(x, self.w)+self.b
    
    def _predict(self,x):
        return -1 if self._calculate(x)<0 else 1
    
    def predict(self,x):
        y_hat = x@self.w.T + self.b
        y_hat = np.where(y_hat<0,-1,1)
        return y_hat
    
    def get_params(self):
        return {"w": self.w, "b": self.b}

In [3]:
np.random.seed(42)
dataset = pd.read_csv(CSV_PATH)
x = dataset[dataset.columns.values[:-1]].values
y = dataset[dataset.columns.values[-1]].values

In [4]:
model = CustomPerceptron(epoch=10000)
model.fit(x,y)

  0%|          | 0/10000 [00:00<?, ?it/s]

In [5]:
print(model.get_params())
y_pred = model.predict(x)
print(model.best_acc)
accuracy_score(y,y_pred)

{'w': array([-1.25000000e-03,  5.85806452e-03, -1.28571429e-03, -1.23913043e-03,
       -1.20432692e-03,  3.55828221e-03,  5.97779676e-06,  1.70000000e-03]), 'b': 0.002}
0.78125


0.5455729166666666

In [6]:
from sklearn.linear_model import Perceptron

sk_perceptron = Perceptron(tol=1e-1,random_state=42, max_iter=10000, eta0=0.001)
sk_perceptron.fit(x,y)
print(sk_perceptron.coef_)
sk_perceptron.score(x,y)
y_pred = sk_perceptron.predict(x)
accuracy_score(y,y_pred)

[[ 0.0015      0.00446452 -0.00104082 -0.00069565 -0.00254087  0.00329652
   0.00289325  0.00026667]]


0.76953125