## Naive Bayes' Classifier
### Ref : https://towardsdatascience.com/naive-bayes-classifier-bayes-inference-central-limit-theorem-python-c-implementation-bdffb3b35de

In [3]:
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn import datasets

## Classifier Implementation

In [2]:
class NaiveBayes:
    def __init__(self, X, y):
        self.priors = []  # List of priors - P(y) for each class
        self.stds = []    # List of STDs of feature columns
        self.means = []   # List of Mean of feature columns
        self.classes = np.unique(y)  # unique class labels

        self.X = X
        self.y = y

    def fit(self):
        # update likelihood p(x|y) 
        for c in self.classes:
            x_c = self.X[c == self.y]                   # Get a feature vector that belongs to the class
            self.means.append(x_c.mean(axis=0))         # axis = 0 => column-wise Mean
            self.stds.append(x_c.std(axis=0))           # axis = 0 => column-wise STD
            self.priors.append(len(x_c) / len(self.X))  # Calculate frequency of each class

    def predict(self, X):
        y_pred = [self.__predict__(x) for x in X]
        return y_pred

    def __gauss_pdf__(self, idx, x):
        mu = self.means[idx]
        std = self.stds[idx]

        return np.exp(-(x - mu) ** 2 / (2 * std ** 2)) / np.sqrt(2 * np.pi * std ** 2)
            
    def __predict__(self, x):
        # y = argmax P(y_i|x)
        posteriors = []
        for idx, c in enumerate(self.classes):
            prior = self.priors[idx]             # P(yi)
            P_X_yi = self.__gauss_pdf__(idx, x)  # P(X | yi) - Likelihood

            posterior = np.log(P_X_yi).sum() + np.log(prior)
            posteriors.append(posterior)

        return self.classes[np.argmax(posteriors)]

## Toy Example

## Load Data

In [22]:
data_x, data_y = datasets.make_classification(n_samples=10000, n_features=15, 
                                              n_classes=2, random_state=123)

## Split Data

In [17]:
x_train, x_test, y_train, y_test = train_test_split(data_x, data_y, test_size=0.2, random_state=123)

## Train

In [18]:
nb = NaiveBayes(x_train, y_train)
nb.fit()

## Evaluation

In [19]:
y_pred = nb.predict(x_test)

In [20]:
print('Accuracy: %.2f' % ((y_pred == y_test).sum() / y_test.size))

Accuracy: 0.92
