In [1]:
import numpy as np
import pandas as pd
import math

In [2]:
class Naive_Bayes():
    """ Guassian Naive Bayes Classifier """
    
    def __init__(self):
        self.classes = None
        self.parameters = {}
        self.y = None
        self.cols = None
    
    def fit(self, x, y):
        """ 
        Estimate mean and var of each feature for each class 
        Save them in self.parameters
        """
        if len(x) != len(y):
            print ("Mismatch between x and y")
            exit()
        
        self.cols = x.columns
        self.y = y
        self.classes = np.unique(y)
        for i, c in enumerate(self.classes):
            x_c = x.loc[np.where(y == c)]
            self.parameters[c] = {}
            for col in x_c.columns:
                self.parameters[c][col] = (x_c[col].mean(), x_c[col].var())
                
    def calculate_prior(self, y, c):
        """
        Estimate prior probability of each class
        """
        return np.mean(y == c)
    
    def gaussian_prob(self, val, mean, var):
        """
        Give a value, return its Gaussian probability
        """
        prefix = 1.0 / np.sqrt(2.0 * math.pi * var)
        bracket = - (val - mean) ** 2 / 2.0 / var
        return prefix * np.exp(bracket)
    
    def calculate_joint(self, c, test):
        """
        Estimate joint probability 
        """
        joint = self.calculate_prior(self.y, c)
        for col, val in zip(self.cols, test):
            mean, var = self.parameters[c][col]
            joint *= self.gaussian_prob(val, mean, var)
        return joint
    
    def predict(self, test):
        proba = []
        res = []
        for c in self.classes:
            posterior = self.calculate_joint(c, test)
            res.append(posterior)
            
        return self.classes[np.argmax(res)]  

In [3]:
x_train = pd.DataFrame({'A' : [1, 1, 2, 2, 10, 7, 9],
                        'B' : [1, 2, 1, 2, 9, 10, 8]})
y_train = [0, 0, 0, 0, 1, 1, 1]
x_test = [8, 8]

In [4]:
nb = Naive_Bayes()
nb.fit(x_train, y_train)
nb.predict(x_test)

1