In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
from sklearn.datasets import load_breast_cancer
X, y = load_breast_cancer(return_X_y=True)

In [3]:
def divTrainTest(X, y, size = 0.33):
 randomi = np.random.permutation(len(y))
 limiar = int(len(y) * size)
 X_test = X[randomi][0:limiar]
 X_train = X[randomi][limiar:]
 y_test = y[randomi][0:limiar]
 y_train = y[randomi][limiar:]
 return X_train, X_test, y_train, y_test

In [4]:
#Divisão de treino e teste
X_train, X_test, y_train, y_test = divTrainTest(X, y, size = 0.33)

In [5]:
class xscale():
 def __init__(self):
  self.mean = None
  self.dp = None
 def fit(self, data):
  self.mean = data.mean(axis=0)
  self.dp = data.std(axis=0)
 def transform(self, data):
  return (data - self.mean) / self.dp

 def inversa(self, data):
  return (data * self.dp) + self.mean

In [6]:
schedr_X = xscale()
schedr_X.fit(X_train)
X_train_sched = schedr_X.transform(X_train)
X_test_sched = schedr_X.transform(X_test)

In [7]:
def ACC(y_true, y_pred):
 acc = (y_true == y_pred).sum()/len(y_pred)
 return acc

In [8]:
def REV(tp, fn):
 rev = tp / (tp + fn)
 return rev

In [9]:
def PRE(tp, fp):
 pre = tp / (tp + fp)
 return pre

In [10]:
def FUSCORE(pre, rev):
 fuscore = (2 * pre * rev) / (pre + rev)
 return fuscore

### Regressão logística

In [11]:
class LogisticRegression():

    def __init__(self, t=1000, alpha=0.005):
        self.w = None
        self.t = t
        self.alpha = 0.01

    def fit(self, X, y):
        X = np.c_[np.ones(X.shape[0]), X]
        self.w = np.random.rand(X.shape[1]) * 0.9

        for epoch in range(self.t):
            pred = X @ self.w
            y_pred = 1/(1+np.exp(-pred))
            err = y - y_pred
            cost =  np.mean(-y * np.log(y_pred) - (1-y) * np.log(1 - y_pred))
            self.w = self.w +  self.alpha * (X.T @ err)/len(y)

    def predict(self, X):
        X = np.c_[np.ones(X.shape[0]), X]
        y_pred = 1.0/(1+np.exp(-X @ self.w))
        return np.where( y_pred > 0.5, 1, 0 )

In [12]:
from sklearn.metrics import confusion_matrix

In [13]:
def matrix_confusao(y_test, y_pred):
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    return np.array([[tp, fp],[fn, tn]])

In [14]:
RegL = LogisticRegression(t= 1000)
RegL.fit(X_train_sched, y_train)
y_pred = RegL.predict(X_test_sched)

  cost =  np.mean(-y * np.log(y_pred) - (1-y) * np.log(1 - y_pred))


In [15]:
print(matrix_confusao(y_test,y_pred))

[[119   3]
 [  3  62]]


In [16]:
acuracia = (ACC(y_test,y_pred))
revocacao = (REV(matrix_confusao(y_test,y_pred)[0,0],matrix_confusao(y_test,y_pred)[1,0]))
precisao = (PRE(matrix_confusao(y_test,y_pred)[0,0],matrix_confusao(y_test,y_pred)[0,1]))
f1score = (FUSCORE(precisao, revocacao))

print("A acuracia do modelo é: {} ".format(acuracia))
print("A revocação do modelo é: {} ".format(revocacao))
print("A precisao do modelo é: {} ".format(precisao))
print("A f1 score do modelo é: {} ".format(f1score))

A acuracia do modelo é: 0.9679144385026738 
A revocação do modelo é: 0.9754098360655737 
A precisao do modelo é: 0.9754098360655737 
A f1 score do modelo é: 0.9754098360655737 


In [17]:
class KNN():
    
    def __init__(self):
        self.K = None
        self.X_train = None

    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train

    def predict(self, X, K=1):
        self.K = K
        classes = np.unique(self.y_train)
        y_pred = []
        
        for xi in X:
            euclidian = -2 * xi @ self.X_train.T + (xi**2).sum() + (self.X_train**2).sum(axis=1)
            knn_index = np.argsort(euclidian)[0:K]
            score = []
            for classe in classes:
                score.append((self.y_train[knn_index]==classe).sum())

            score = np.array(score)
            pred_index = np.argmax(score)    
            y_pred.append(classes[pred_index])

        return np.array(y_pred)

### K-Nearest Neighbors (KNN)

In [18]:
knn = KNN()
knn.fit(X_train_sched, y_train)
y_pred = knn.predict(X_test_sched, K=3)

In [19]:
acuracia = (ACC(y_test,y_pred))
revocacao = (REV(matrix_confusao(y_test,y_pred)[0,0],matrix_confusao(y_test,y_pred)[1,0]))
precisao = (PRE(matrix_confusao(y_test,y_pred)[0,0],matrix_confusao(y_test,y_pred)[0,1]))
f1score = (FUSCORE(precisao, revocacao))

print("A acuracia do modelo é: {} ".format(acuracia))
print("A revocação do modelo é: {} ".format(revocacao))
print("A precisao do modelo é: {} ".format(precisao))
print("A f1 score do modelo é: {} ".format(f1score))

A acuracia do modelo é: 0.9572192513368984 
A revocação do modelo é: 0.9754098360655737 
A precisao do modelo é: 0.9596774193548387 
A f1 score do modelo é: 0.9674796747967479 
