In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
from sklearn.datasets import load_breast_cancer
X, y = load_breast_cancer(return_X_y=True)

In [3]:
def divTrainTest(X, y, size = 0.30):
 randomi = np.random.permutation(len(y))
 limiar = int(len(y) * size)
 X_test = X[randomi][0:limiar]
 X_train = X[randomi][limiar:]
 y_test = y[randomi][0:limiar]
 y_train = y[randomi][limiar:]
 return X_train, X_test, y_train, y_test

In [4]:
#Divisão de treino e teste
X_train, X_test, y_train, y_test = divTrainTest(X, y, size = 0.30)

In [5]:
class xscale():
 def __init__(self):
  self.mean = None
  self.dp = None
 def fit(self, data):
  self.mean = data.mean(axis=0)
  self.dp = data.std(axis=0)
 def transform(self, data):
  return (data - self.mean) / self.dp

 def inversa(self, data):
  return (data * self.dp) + self.mean

In [6]:
schedr_X = xscale()
schedr_X.fit(X_train)
X_train_sched = schedr_X.transform(X_train)
X_test_sched = schedr_X.transform(X_test)

### Metricas de avaliação de modelo

In [7]:
#FUNÇÃO DE ACURACIA
def ACC(y_true, y_pred):
 acc = (y_true == y_pred).sum()/len(y_pred)
 return acc

In [8]:
#FUNÇÃO DE REVOCAÇÃO
def REV(tp, fn):
 rev = tp / (tp + fn)
 return rev

In [9]:
#FUNÇÃO DE PRECISÃO
def PRE(tp, fp):
 pre = tp / (tp + fp)
 return pre

In [10]:
#FUNÇÃO PARA O F1 SCORE
def FUSCORE(pre, rev):
 fuscore = (2 * pre * rev) / (pre + rev)
 return fuscore

### Regressão logística

In [11]:
#REGRESSÃO LOGISTICA COM GD
class LogisticRegression():
    def __init__(self, t=1000, alpha=0.005):
        self.w = None
        self.t = t
        self.alpha = 0.01
    def fit(self, X, y):
        X = np.c_[np.ones(X.shape[0]), X]
        self.w = np.random.rand(X.shape[1]) * 0.9
        for epoch in range(self.t):
            pred = X @ self.w
            y_pred = 1/(1+np.exp(-pred))
            err = y - y_pred
            cost =  np.mean(-y * np.log(y_pred) - (1-y) * np.log(1 - y_pred))
            self.w = self.w +  self.alpha * (X.T @ err)/len(y)

    def predict(self, X):
        X = np.c_[np.ones(X.shape[0]), X]
        y_pred = 1.0/(1+np.exp(-X @ self.w))
        return np.where( y_pred > 0.5, 1, 0 )

In [12]:
#TESTE DA FUNÇÃO
RegL = LogisticRegression(t= 1000)
RegL.fit(X_train_sched, y_train)
y_pred = RegL.predict(X_test_sched)

In [13]:
from sklearn.metrics import confusion_matrix

In [14]:
def matrix_confusao(y_test, y_pred):
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    return np.array([[tp, fp],[fn, tn]])

In [15]:
#MATRIZ DE CONFUSÃO
print(matrix_confusao(y_test,y_pred))

[[112   2]
 [  1  55]]


In [16]:
acuracia = (ACC(y_test,y_pred))
revocacao = (REV(matrix_confusao(y_test,y_pred)[0,0],matrix_confusao(y_test,y_pred)[1,0]))
precisao = (PRE(matrix_confusao(y_test,y_pred)[0,0],matrix_confusao(y_test,y_pred)[0,1]))
f1score = (FUSCORE(precisao, revocacao))

print("A acuracia do modelo é: {} ".format(acuracia))
print("A revocação do modelo é: {} ".format(revocacao))
print("A precisao do modelo é: {} ".format(precisao))
print("A f1 score do modelo é: {} ".format(f1score))

A acuracia do modelo é: 0.9823529411764705 
A revocação do modelo é: 0.9911504424778761 
A precisao do modelo é: 0.9824561403508771 
A f1 score do modelo é: 0.9867841409691629 


### Análise do discriminante Gaussiano

In [17]:
class GaussianDA():
    
    def fit(self, X, y):
        self.fi = y.mean()
        self.u = np.array([ X[y==k].mean(axis=0) for k in [0,1]])
        X_u = X.copy()
        for k in [0,1]: X_u[y==k] -= self.u[k]
        self.E = X_u.T.dot(X_u) / len(y)
        self.invE = np.linalg.pinv(self.E)
        return self
    
    def predict(self, X):
        return np.argmax([self.compute_prob(X, i) for i in range(len(self.u))], axis=0)
    
    def compute_prob(self, X, i):
        u, phi = self.u[i], ((self.fi)**i * (1 - self.fi)**(1 - i))
        return np.exp(-1.0 * np.sum((X-u).dot(self.invE)*(X-u), axis=1)) * phi
    
    def score(self, X, y):
        return (self.predict(X) == y).mean()

In [18]:
gaussianda = GaussianDA()
gaussianda.fit(X_train_sched, y_train)
y_pred = gaussianda.predict(X_test_sched)

In [19]:
def matrix_confusao(y_test, y_pred):
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    return np.array([[tp, fp],[fn, tn]])

In [20]:
#MATRIZ DE CONFUSÃO
print(matrix_confusao(y_test,y_pred))

[[113   9]
 [  0  48]]


In [21]:
acuracia = (ACC(y_test,y_pred))
revocacao = (REV(matrix_confusao(y_test,y_pred)[0,0],matrix_confusao(y_test,y_pred)[1,0]))
precisao = (PRE(matrix_confusao(y_test,y_pred)[0,0],matrix_confusao(y_test,y_pred)[0,1]))
f1score = (FUSCORE(precisao, revocacao))

print("A acuracia do modelo é: {} ".format(acuracia))
print("A revocação do modelo é: {} ".format(revocacao))
print("A precisao do modelo é: {} ".format(precisao))
print("A f1 score do modelo é: {} ".format(f1score))

A acuracia do modelo é: 0.9470588235294117 
A revocação do modelo é: 1.0 
A precisao do modelo é: 0.9262295081967213 
A f1 score do modelo é: 0.9617021276595745 


### Naive Bayes Gaussiano

In [22]:
class GaussianNBayes():
    def __init__(self, priors=None):
        self.priors = priors
        self.theta_ = 0.0
        self.sigma_ = 0.0

    def fit(self, x, y):
        classes, counts = np.unique(y, return_counts=True)
        self.priors = counts / counts.sum() if self.priors is None else self.priors

        self.theta_ = np.array([np.mean(x[y == c], axis=0) for c in classes])
        self.sigma_ = np.array([np.var(x[y == c], axis=0) for c in classes])

    def predict(self, x):
        return np.argmax(self.predict_proba(x), axis=1)

    def predict_proba(self, x):
        y_pred = []
        for sample in x:
            joint_prob = self.__joint_prob(sample)
            posterior_prob = self.__posterior_prob(joint_prob)
            y_pred.append(posterior_prob)
        return np.array(y_pred)

    def __normal_pdf(self, x, mean_c, var_c):
        exponent = ((x - mean_c)**2) / (2 * var_c)
        f = (1.0 / np.sqrt(2.0 * np.pi * var_c)) * np.exp(-exponent)
        return np.prod(f)

    def __joint_prob(self, x):
        joint_prob = []
        for p, t, s in zip(self.priors, self.theta_, self.sigma_):
            joint_prob.append(p * self.__normal_pdf(x, t, s))
        return joint_prob

    def __posterior_prob(self, joint_prob):
        marginal_prob = np.sum(joint_prob)
        return joint_prob / marginal_prob

In [23]:
naivebg = GaussianNBayes()
naivebg.fit(X_train_sched, y_train)
y_pred = naivebg.predict(X_test_sched)

In [24]:
def matrix_confusao(y_test, y_pred):
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    return np.array([[tp, fp],[fn, tn]])

In [25]:
#MATRIZ DE CONFUSÃO
print(matrix_confusao(y_test,y_pred))

[[109   6]
 [  4  51]]


In [26]:
acuracia = (ACC(y_test,y_pred))
revocacao = (REV(matrix_confusao(y_test,y_pred)[0,0],matrix_confusao(y_test,y_pred)[1,0]))
precisao = (PRE(matrix_confusao(y_test,y_pred)[0,0],matrix_confusao(y_test,y_pred)[0,1]))
f1score = (FUSCORE(precisao, revocacao))

print("A acuracia do modelo é: {} ".format(acuracia))
print("A revocação do modelo é: {} ".format(revocacao))
print("A precisao do modelo é: {} ".format(precisao))
print("A f1 score do modelo é: {} ".format(f1score))

A acuracia do modelo é: 0.9411764705882353 
A revocação do modelo é: 0.9646017699115044 
A precisao do modelo é: 0.9478260869565217 
A f1 score do modelo é: 0.956140350877193 


### K-Nearest Neighbors (KNN)

In [27]:
class KNearestN():
    
    def __init__(self):
        self.K = None
        self.X_train = None

    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train

    def predict(self, X, K=1):
        self.K = K
        classes = np.unique(self.y_train)
        y_pred = []
        
        for xi in X:
            euclidian = -2 * xi @ self.X_train.T + (xi**2).sum() + (self.X_train**2).sum(axis=1)
            knn_index = np.argsort(euclidian)[0:K]
            score = []
            for classe in classes:
                score.append((self.y_train[knn_index]==classe).sum())

            score = np.array(score)
            pred_index = np.argmax(score)    
            y_pred.append(classes[pred_index])

        return np.array(y_pred)

In [28]:
knn = KNearestN()
knn.fit(X_train_sched, y_train)
y_pred = knn.predict(X_test_sched, K=3)

In [29]:
def matrix_confusao(y_test, y_pred):
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    return np.array([[tp, fp],[fn, tn]])

In [30]:
#MATRIZ DE CONFUSÃO
print(matrix_confusao(y_test,y_pred))

[[113   5]
 [  0  52]]


In [31]:
acuracia = (ACC(y_test,y_pred))
revocacao = (REV(matrix_confusao(y_test,y_pred)[0,0],matrix_confusao(y_test,y_pred)[1,0]))
precisao = (PRE(matrix_confusao(y_test,y_pred)[0,0],matrix_confusao(y_test,y_pred)[0,1]))
f1score = (FUSCORE(precisao, revocacao))

print("A acuracia do modelo é: {} ".format(acuracia))
print("A revocação do modelo é: {} ".format(revocacao))
print("A precisao do modelo é: {} ".format(precisao))
print("A f1 score do modelo é: {} ".format(f1score))

A acuracia do modelo é: 0.9705882352941176 
A revocação do modelo é: 1.0 
A precisao do modelo é: 0.9576271186440678 
A f1 score do modelo é: 0.9783549783549783 


### Árvore de decisão

In [32]:
from sklearn.tree import DecisionTreeClassifier

In [33]:
classifier = DecisionTreeClassifier(criterion='gini')
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)

In [34]:
def matrix_confusao(y_test, y_pred):
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    return np.array([[tp, fp],[fn, tn]])

In [35]:
#MATRIZ DE CONFUSÃO
print(matrix_confusao(y_test,y_pred))

[[107   8]
 [  6  49]]


In [36]:
acuracia = (ACC(y_test,y_pred))
revocacao = (REV(matrix_confusao(y_test,y_pred)[0,0],matrix_confusao(y_test,y_pred)[1,0]))
precisao = (PRE(matrix_confusao(y_test,y_pred)[0,0],matrix_confusao(y_test,y_pred)[0,1]))
f1score = (FUSCORE(precisao, revocacao))

print("A acuracia do modelo é: {} ".format(acuracia))
print("A revocação do modelo é: {} ".format(revocacao))
print("A precisao do modelo é: {} ".format(precisao))
print("A f1 score do modelo é: {} ".format(f1score))

A acuracia do modelo é: 0.9176470588235294 
A revocação do modelo é: 0.9469026548672567 
A precisao do modelo é: 0.9304347826086956 
A f1 score do modelo é: 0.9385964912280702 
