## Implementação de um Classificador Perceptron

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv('gun-violence-data_01-2013_03-2018.csv')

class Perceptron(object):
    """Perceptron classifier.

    Parameters
    ------------
    eta : float
      Learning rate (between 0.0 and 1.0)
    n_iter : int
      Passes over the training dataset.
    random_state : int
      Random number generator seed for random weight
      initialization.

    Attributes
    -----------
    w_ : 1d-array
      Weights after fitting.
    errors_ : list
      Number of misclassifications (updates) in each epoch.

    """
    def __init__(self, eta=0.01, n_iter=50, random_state=1):
        self.eta = eta
        self.n_iter = n_iter
        self.random_state = random_state

    def fit(self, X, y):
        """Fit training data.

        Parameters
        ----------
        X : {array-like}, shape = [n_examples, n_features]
          Training vectors, where n_examples is the number of examples and
          n_features is the number of features.
        y : array-like, shape = [n_examples]
          Target values.

        Returns
        -------
        self : object

        """
        rgen = np.random.RandomState(self.random_state)
        self.w_ = rgen.normal(loc=0.0, scale=0.01, size=1 + X.shape[1])
        self.errors_ = []

        for _ in range(self.n_iter):
            errors = 0
            for xi, target in zip(X, y):
                update = self.eta * (target - self.predict(xi))
                self.w_[1:] += update * xi
                self.w_[0] += update
                errors += int(update != 0.0)
            self.errors_.append(errors)
        return self

    def net_input(self, X):
        """Calculate net input"""
        return np.dot(X, self.w_[1:]) + self.w_[0]

    def predict(self, X):
        """Return class label after unit step"""
        return np.where(self.net_input(X) >= 0.0, 1, -1)

## Testando o classificador Perceptron

In [None]:
"""Dados de Treinamento """
X = np.array([[1,1],[2,2],[3,3]])
y = np.array([1,1,-1])

"""Criando objeto Perceptron"""
ppn = Perceptron(eta=0.1, n_iter=100)

"""Treinando o modelo"""
ppn.fit(X, y)

"""Testando modelo treinado """
X_newdata = np.array([[4,4],[2,2],[3,3]])
print("Resultado da Predição",ppn.predict(X_newdata));

## Questao 1 - Implemente uma função para calcular a acurácia do modelo

In [None]:
def model_accuracy(data, perc_result):
    accuracy = 0
    accumulator = 0
    accumulator = sum([1 if i == j else 0 for i,j in zip(data,perc_result)])
#     for i in range(len(perc_result)):
#         if(data[i] == perc_result[i]):
#             accumulator += 1
    accuracy = accumulator / len(perc_result)
    return accuracy

## Questao 2 - Implemente um método de validação cruzada para testar 

In [None]:
def cross_validation(data, label, k=None):
    
    k = len(data)//5 if k == None else k
    
    performance = 0
    ppnToCross = Perceptron(eta=0.1, n_iter=100)
   
    for i in range(k):
        
        """Separando dados que não vão ser treinados"""
        validation_fold = data[i*k:(i*k)+k]        
        validation_fold_label = label[i*k:(i*k)+k]
        
        
        """Separando dados que vão ser treinados"""
        training_fold = np.concatenate((data[:i*k],data[(i*k)+k:]))
        training_fold_label = np.concatenate((label[:i*k],label[(i*k)+k:]))
        
        """Treinando o modelo"""
        ppnToCross.fit(np.array(training_fold), np.array(training_fold_label))
        
        """Testando o modelo treinado"""
        performance += model_accuracy(validation_fold_label,ppn.predict(validation_fold))
    
    return performance/k

## Teste o classificador usando um conjunto de dados linearmente separável e outro não linearmente separável
### Sugestão: crie datasets sintéticos com apenas dois atributos para voce poder visualizar a separação das classes

In [None]:
# """Testando o modelo com conjunto de dados linearmente separável"""
X = np.array([[8,1],[8,5],[4,5],[2,3],[2,5],[5,1],[6,2],[1,6],[5,7],[3,6],[7,7],[8,3],[9,4],[10,3],[9,1],[3,7],[1,4],[6,3],[4,2],[6,6],[5,6],[4,6],[9,2],[6,1],[10,5]])
y = np.array([-1,-1,1,-1,1,-1,-1,1,1,1,1,-1,-1,-1,-1,1,1,-1,-1,1,1,1,-1,-1,-1])
cross_validation(X,y)


In [None]:
"""Testando o modelo com conjunto de dados não linearmente separável"""
X = np.array([[8,1],[8,5],[4,5],[2,3],[2,5],[5,1],[6,2],[1,6],[5,7],[3,6],[7,7],[8,3],[9,4],[10,3],[9,1],[3,7],[1,4],[6,3],[4,2],[6,6],[5,6],[4,6],[9,2],[6,1],[10,5]])
y = np.array([-1,1,1,-1,1,-1,-1,-1,1,1,1,1,1,-1,-1,1,-1,1,-1,1,1,1,-1,-1,1])
cross_validation(X,y)

## Questao 3 - Treine um classificador perceptron para os dados de seu estudo de caso

In [None]:
recorte = pd.DataFrame(df, columns=["latitude","longitude"]).dropna().reset_index()
latitude = recorte.latitude
longitude = recorte.longitude

# y = []
# for i in range(len(latitude)):
# #     print(i)
#     t = np.float64(37.1491475)
#     if(latitude[i] >= t):
#         y.append(1)
#     else:
#         y.append(-1)

y_ = [1 if i >= 37.1491475 else -1 for i in latitude ]
# y == y_

In [None]:
# X = []
# for i in range(len(latitude)):
#     x = [latitude[i],longitude[i]]
#     X.append(x)

X_ = [[i,j] for i,j in zip(latitude,longitude)]

# X_ == X

### resultado esperado para a predição -> [1,-1,-1,1,1]

In [None]:
ppnQ3 = Perceptron(eta=0.1, n_iter=2)

"""Treinando o modelo"""
ppn.fit(np.array(X), np.array(y))

"""Testando modelo treinado """
X_newdata = np.array([[43.702580, -115.340433],[32.037827, -90.262025],[35.453457, -116.431459],[48.364965, -119.287904],[38.7067, -90.2494]])
print("Resultado da Predição",ppn.predict(X_newdata));

In [None]:
len(latitude)