In [16]:
import os
import pandas as pd
import numpy as np
from pathlib import Path
from tqdm.notebook import tqdm
from sklearn.metrics import accuracy_score


CUR_DIR = Path(os.getcwd())
CSV_PATH = CUR_DIR / Path("data/scaled.csv")

In [17]:
class CustomPerceptron():
    def __init__(self, lr=0.001, epoch=1000, feature_num=None):
        self.w = np.ones(feature_num) if feature_num else None
        self.b = 0
        self.best_w = None
        self.best_b = None
        self.l_rate = lr
        self.epoch = epoch

    def sign(self,y):
        return -1 if y < 0 else 1
    
    def fit(self,x_train,y_train):
        if not self.w:
            self.w = np.zeros(x_train.shape[1])
        self.best_acc = -1
        for _ in tqdm(range(self.epoch),total=self.epoch):
            err=0
            for i in range(len(x_train)):
                xi = x_train[i,:]
                yi = y_train[i]
                yi_hat = self._predict(xi)
                if yi * yi_hat != 1:
                    err+=1
                    self.w += self.l_rate * yi * xi
                    self.b += self.l_rate * yi
            if err == 0:
                break
            y_pred = self.predict(x_train)
            cur_acc = accuracy_score(y_train,y_pred)
            if self.best_acc < cur_acc:
                self.best_w = self.w
                self.best_b = self.b
                self.best_acc = cur_acc
            

    def _calculate(self, x):
        return np.dot(x, self.w)+self.b
    
    def _predict(self,x):
        return -1 if self._calculate(x)<0 else 1
    
    def predict(self,x):
        y_hat = x@self.w.T + self.b
        y_hat = np.where(y_hat<0,-1,1)
        return y_hat
    
    def predict_use_best(self,x):
        y_hat = x@self.best_w.T + self.best_b
        y_hat = np.where(y_hat<0,-1,1)
        return y_hat
    
    def get_best_params(self):
        return {"w": self.best_w, "b": self.best_b}
    
    def get_params(self):
        return {"w": self.w, "b": self.b}

In [18]:
np.random.seed(42)
dataset = pd.read_csv(CSV_PATH)
x = dataset[dataset.columns.values[:-1]].values
y = dataset[dataset.columns.values[-1]].values

In [19]:
model = CustomPerceptron(epoch=1000)
model.fit(x,y)

  0%|          | 0/1000 [00:00<?, ?it/s]

In [20]:
print(model.get_params())
y_pred = model.predict(x)
print(f"best acc: {model.best_acc}")
print(f"final acc: {accuracy_score(y,y_pred)}")

{'w': array([ 0.001375  ,  0.00536774, -0.00195918, -0.00045652, -0.0010649 ,
        0.00357464,  0.00129889,  0.00106667]), 'b': 0.002}
best acc: 0.77734375
final acc: 0.765625


In [21]:
from sklearn.linear_model import Perceptron, LogisticRegression
from sklearn.svm import LinearSVC,SVC
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import *

scoring = ['accuracy','precision_macro', 'recall_macro','f1_macro']
_cls = Perceptron(tol=1e-3,random_state=42, max_iter=5000, eta0=0.01)
per = {}
for item in scoring:
    per[item.split("_")[0]] = cross_val_score(_cls, x, y, cv=5, scoring=item)

_cls = LogisticRegression(tol=1e-3,random_state=42, max_iter=5000)
log = {}
for item in scoring:
    log[item.split("_")[0]] = cross_val_score(_cls, x, y, cv=5, scoring=item)

_cls = LinearSVC(tol=1e-3,random_state=42, max_iter=5000)
lsvc = {}
for item in scoring:
    lsvc[item.split("_")[0]] = cross_val_score(_cls, x, y, cv=5, scoring=item)

_cls = SVC(tol=1e-3,random_state=42, max_iter=5000)
svc = {}
for item in scoring:
    svc[item.split("_")[0]] = cross_val_score(_cls, x, y, cv=5, scoring=item)

_cls = MLPClassifier(hidden_layer_sizes=(32,64,32),tol=1e-3,random_state=42, max_iter=1000)
mlp = {}
for item in scoring:
    mlp[item.split("_")[0]] = cross_val_score(_cls, x, y, cv=5, scoring=item)



In [22]:
result = {"perceptron":per,"logistic":log,"linear_svm":lsvc,"rbf_svm":svc,"mlp":mlp}
print(result)
for k,v in result.items():
    for sk,sv in v.items():
        v[sk] = sv.mean()
print(result)

{'perceptron': {'accuracy': array([0.78571429, 0.73376623, 0.74025974, 0.78431373, 0.69281046]), 'precision': array([0.76489819, 0.72038567, 0.7464527 , 0.7647111 , 0.75621521]), 'recall': array([0.76259259, 0.66296296, 0.77018519, 0.78179245, 0.76056604]), 'f1': array([0.76370484, 0.67160764, 0.73580374, 0.7701343 , 0.69275796])}, 'logistic': {'accuracy': array([0.76623377, 0.74675325, 0.76623377, 0.79084967, 0.77777778]), 'precision': array([0.74772727, 0.72158766, 0.75635209, 0.77983449, 0.76653171]), 'recall': array([0.72203704, 0.71555556, 0.70925926, 0.74245283, 0.72358491]), 'f1': array([0.73061224, 0.7182266 , 0.72101449, 0.75431554, 0.73577814])}, 'linear_svm': {'accuracy': array([0.77272727, 0.75324675, 0.75974026, 0.79738562, 0.77777778]), 'precision': array([0.75965897, 0.72903846, 0.75378151, 0.78635779, 0.76653171]), 'recall': array([0.72277778, 0.72055556, 0.69574074, 0.75188679, 0.72358491]), 'f1': array([0.73362985, 0.72417044, 0.70765995, 0.76338239, 0.73577814])}, 'r

In [23]:
result_df = pd.DataFrame().from_dict(result).T


In [24]:
result_df.to_csv("result.csv")

In [None]:
df = pd.read_csv("result.csv")
df.to_latex("result.tex")