In [19]:
import numpy as np
from numpy import array as ar
import pandas as pd


class NaiveBayesSingleVar:

    def __init__(self):
         pass
        
    def fit(self, X_train, Y_train):

        self.Y_unique = np.unique(Y_train)
        
        self.y_count = len(self.Y_unique)
        self.X_unique = np.unique(X_train)
        
        self.x_count = len(self.X_unique)
        self.__X_given_y = [X_train[Y_train == y] for y in np.unique(Y_train)]

        prob_table = np.zeros((self.x_count,self.y_count))
        priors = np.zeros(self.y_count)
        x_normalization = np.zeros(self.x_count)
        n_points = len(Y_train)
        for i, x in enumerate(self.X_unique):
            x_normalization[i] = len(X_train[X_train == x])/n_points
            for j, y in enumerate(self.Y_unique):
                denominator = len(Y_train[Y_train == y])
                
                numerator = len(X_train[(X_train == x) & (Y_train == y)])
                prob = numerator/denominator
                prob_table[i,j] = prob

        for j, y in enumerate(self.Y_unique):
            denominator = len(Y_train[Y_train == y])
            priors[j] = denominator/n_points

        self.prob_table = pd.DataFrame(prob_table.T, columns = self.X_unique, index = self.Y_unique)
        self.priors = pd.DataFrame(priors, index = self.Y_unique)
        self.x_normalization = pd.DataFrame(x_normalization, index = self.X_unique)


    def predict_proba(self, X_test):

        x_test_len = len(X_test)

        results = np.ones((x_test_len, len(self.Y_unique)))
        for i, x_test in enumerate(X_test):
            for j,y in enumerate(self.Y_unique):
                likelihood = self.prob_table[x_test][y]
                prior = self.priors.loc[y]
                x_norm =self.x_normalization.loc[x_test]
                
                results[i,j] = (likelihood*prior)/x_norm

        return results

In [31]:
class NaiveBayesCategorical:

    def __init__(self):
        pass


    def fit(self, X_train:np.array, Y_train:np.array):
        self.X_train = X_train
        self.__prob_table_dd = []
        self.__X_normalization_dd = []

        for X_train_single_feature in self.X_train[:,]:
            naivebayes_single_var = NaiveBayesSingleVar()
            naivebayes_single_var.fit(X_train_single_feature, Y_train)

            self.__prob_table_dd.append(naivebayes_single_var.prob_table)
            self.__X_normalization_dd.append(naivebayes_single_var.x_normalization)

        self.__priors = naivebayes_single_var.priors

        self.Y_unique = naivebayes_single_var.Y_unique


    def predict_proba(self, X_test:np.array):
        results = np.zeros((X_test.shape[0], len(self.Y_unique)))
        for i, x_test in enumerate(X_test):
            for j, y in enumerate(self.Y_unique):

                likelihood = []
                prior = self.__priors.loc[y][0]
                x_norm = []
                
                # Naive Bayes Assumption of independence among dimensions
                for d in range(self.X_train[:,].shape[0]):
                    x_test_d = x_test[d]
                    
                    likelihood.append(self.__prob_table_dd[d][x_test_d][y])
                    x_norm.append(self.__X_normalization_dd[d].loc[x_test_d])
                
                likelihood_prod = np.prod(likelihood)
        #         print(likelihood_prod)
                posterior_numerator = likelihood_prod*prior
        #         print(posterior_numerator)
                posterior_denominator = np.prod(x_norm)
        #         print(posterior_denominator)

                posterior = posterior_numerator/posterior_denominator
                
                results[i,j] = posterior


        return results

# test data
## https://medium.com/@rangavamsi5/na√Øve-bayes-algorithm-implementation-from-scratch-in-python-7b2cc39268b9

In [32]:
df = pd.read_csv('naivebayes/toy_data.csv')

X_train = df.drop('Play', axis = 1).values.T

X_train

Y_train = df['Play'].values

In [33]:
 X_test = ar([['Rainy','Mild','Normal','t']])

In [34]:
nb = NaiveBayesCategorical()

In [35]:
nb.fit(X_train, Y_train)

In [36]:
nb.predict_proba(X_test)

array([[0.3136    , 0.43017833]])