In [157]:
import math
import numpy as np

'''
Refernce to 
1. https://machinelearningmastery.com/naive-bayes-classifier-scratch-python/
2. https://github.com/meetvora/mlp-classifier/blob/master/models/naiveScratch.py
'''

class MyNaiveBayesClassifier():
    def __init__(self, X, y):
        self.classes = set(y)
        self.class_count = len(self.classes)
        self.train_data = X
        self.train_data_label = y
        self.train_data_len = len(self.train_data)
        
    def calculateClassProbability(self, summaries, x, label):
        mu, sigma, prior = summaries[label]
        likelihood = 1
        for i in range(len(mu)):
            exponent = math.exp(-((x[i]-mu[i])*(x[i]-mu[i]))/(2*sigma[i]*sigma[i]))
            likelihood *=  exponent/(math.sqrt(2*math.pi)*sigma[i])
            
        return prior * likelihood
    
    def separateByClass(self):
        d = dict.fromkeys(self.classes)
        for c in self.classes:
            d[c] = []
        
        for i in range(len(self.train_data_label)):
            d[self.train_data_label[i]].append(self.train_data[i])
            
        return d    
    
    def summarizeClass(self, method):
        class_data = self.separateByClass()
        summary = {}
        if method == 'gaussian':
            for c, data in class_data.items():
                summary[c] = (self._calculateMean(data), self._calculateStd(data), len(data)/self.train_data_len)
        
        return summary
    
    def predict(self, summaries, input_data):
        max_prob = 0
        best_class = None
        
        for c in self.classes:
            prob = self.calculateClassProbability(summaries, input_data, c)
            if prob > max_prob:
                max_prob = prob
                best_class = c
        
        return best_class
    
    def getPredictions(self, summaries, test_data):
        predictions = []
        for data in test_data:
            result = self.predict(summaries, data)
            predictions.append(result)
        return predictions
    
        
    def _calculateMean(self,data):
        return np.mean(data, axis=0)
    
    def _calculateStd(self, data):
        return np.std(data, axis=0)
    

In [161]:
import pandas as pd
filename = "pima-indians-diabetes.data.csv"
dataset = pd.read_csv(filename, index_col=False)
y = dataset[" i"].values.tolist()
dataset.drop([" i"],axis=1, inplace=True)
X = dataset.values.tolist()

X_train = X[:700]
y_train = y[:700]

X_test = X[701:]
y_test = y[701:]

NB = MyNaiveBayesClassifier(X_train, y_train)
summ = NB.summarizeClass('gaussian')
res = NB.getPredictions(summ, X_test)
count = 0
for i in range(len(res)):
    #print("Predicted: {}, Y_label: {}".format(res[i], y_test[i]))
    if res[i] == y_test[i]:
        count += 1
print(count/len(y_test))
    

0.7313432835820896
