In [7]:
import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted

class GaussianNaiveBayesClassifier(BaseEstimator, ClassifierMixin):
    def fit(self, X, y):
        X, y = check_X_y(X, y)
        self.priors_ = np.bincount(y) / len(y)
        self.n_classes_ = np.max(y) + 1
        
        self.means_ = np.array([X[np.where(y==i)].mean(axis=0) for i in range(self.n_classes_)])
        std = pd.DataFrame(X).groupby(by=y).std()
        self.stds_ = np.array(std)
        #self.stds_ = np.array([X.groupby(by=y).std(axis=0) for i in range(self.n_classes_)])
        print("----------Prior Probabilities:P(Yes) or P(No)---------")
        print("P(0):",self.priors_[0])
        print("P(1):",self.priors_[1])
        #when there are two features uncomment below code
        """
        print("-------Feature1=Age, Feature2=Salary-------")
        print("mean_feat1_0:",self.means_[0][0])
        print("mean_feat2_0:",self.means_[0][1])
        print("mean_feat1_1:",self.means_[1][0])
        print("mean_feat1_1:",self.means_[1][1])
        
        print("std_feat1_0:",self.stds_[0][0])
        print("std_feat2_0:",self.stds_[0][1])
        print("std_feat1_1:",self.stds_[1][0])
        print("std_feat1_1:",self.stds_[1][1])
        
        print("var_feat1_0:",self.stds_[0][0]**2)
        print("var_feat2_0:",self.stds_[0][1]**2)
        print("var_feat1_1:",self.stds_[1][0]**2)
        print("var_feat1_1:",self.stds_[1][1]**2)
        """
        #for only 1 feature uncomment below code
        print("-------Feature1=Age, Feature2=Salary-------")
        print("mean_feat1_0:",self.means_[0][0])
        print("mean_feat1_1:",self.means_[1][0])
        
        print("std_feat1_0:",self.stds_[0][0])
        print("std_feat1_1:",self.stds_[1][0])
        
        print("var_feat1_0:",self.stds_[0][0]**2)
        print("var_feat1_1:",self.stds_[1][0]**2)
        
        return self
    
    def predict_proba(self, X):
        check_is_fitted(self)
        X = check_array(X)
        
        res = []
        print("-------------Final Probabilities-----------")
        for i in range(len(X)):
            probas = []
            for j in range(self.n_classes_):
                #printing P(x|no) & P(x|yes)
                if j==0:
                    print("P(x1|no):",(1/np.sqrt(2*np.pi*self.stds_[j]**2)*np.exp(-0.5*((X[i]-self.means_[j])/self.stds_[j])**2)).prod())
                else:
                    print("P(x1|yes):",(1/np.sqrt(2*np.pi*self.stds_[j]**2)*np.exp(-0.5*((X[i]-self.means_[j])/self.stds_[j])**2)).prod())
                
                probas.append((1/np.sqrt(2*np.pi*self.stds_[j]**2)*np.exp(-0.5*((X[i]-self.means_[j])/self.stds_[j])**2)).prod()*self.priors_[j])
            probas = np.array(probas)
            print("P(no|x1):",probas[0])
            print("P(yes|x1):",probas[1])
            res.append(probas / probas.sum())
            
        print("----------------------------------------------------------")
        return np.array(res)
    def predict(self, X):
        check_is_fitted(self)
        X = check_array(X)
        
        res = self.predict_proba(X)
        
        return res.argmax(axis=1)

In [8]:
#--------------------------exam version-------------------------------------------------
#----------------input X,y,[38, 71000]---------------------------
X = pd.DataFrame({'Age': [59,35,37,52,48,37,37,48,41,37,39,49,55,37,35,36,42], 
        'Salary': [88000,61000,70000,21000,141000,93000,62000,138000,79000,78000,134000,89000,39000,77000,57000,63000,73000]})

y = pd.Series([1,0,1,1,0,1,0,1,0,1,1,1,1,0,0,0,1])

my_gauss = GaussianNaiveBayesClassifier()
my_gauss.fit(X, y)
my_gauss.predict_proba([[38, 71000]])

----------Prior Probabilities:P(Yes) or P(No)---------
P(0): 0.4117647058823529
P(1): 0.5882352941176471
-------Feature1=Age, Feature2=Salary-------
mean_feat1_0: 38.42857142857143
mean_feat1_1: 45.5
std_feat1_0: 4.68533680244878
std_feat1_1: 8.195527235429498
var_feat1_0: 21.952380952380956
var_feat1_1: 67.16666666666667
-------------Final Probabilities-----------
P(x1|no): 1.1266995920268005e-06
P(x1|yes): 3.3534468935573886e-07
P(no|x1): 4.6393512612868253e-07
P(yes|x1): 1.9726158197396404e-07
----------------------------------------------------------


array([[0.70165976, 0.29834024]])

In [10]:
#--------------------------exam version-------------------------------------------------
#----------------input X1,y1,[61]---------------------------
X1 = pd.DataFrame({'Percent': [70,36,95,63,43,84,54,15,21,91,34]})

y1 = pd.Series([1,0,1,1,0,1,1,0,0,1,0])

my_gauss = GaussianNaiveBayesClassifier()
my_gauss.fit(X1, y1)
my_gauss.predict_proba([[61]])

----------Prior Probabilities:P(Yes) or P(No)---------
P(0): 0.45454545454545453
P(1): 0.5454545454545454
-------Feature1=Age, Feature2=Salary-------
mean_feat1_0: 29.8
mean_feat1_1: 76.16666666666667
std_feat1_0: 11.476062042355819
std_feat1_1: 16.36357744097136
var_feat1_0: 131.70000000000002
var_feat1_1: 267.76666666666677
-------------Final Probabilities-----------
P(x1|no): 0.0008631916482906215
P(x1|yes): 0.01586679399696767
P(no|x1): 0.00039235984013210066
P(yes|x1): 0.00865461490743691
----------------------------------------------------------


array([[0.04336918, 0.95663082]])

# Normal calculation not part of above code

In [6]:
#Calculating PRobability with variance
import numpy as np
X = 61
mean_y = 29.8
var_y = 131.7

print("P(X|YorN):",(1/np.sqrt(2*np.pi*var_y)*np.exp(-0.5*(((X-mean_y)**2)/var_y))))

P(X|YorN): 0.0008631916482906208


In [16]:
#Calculating PRobability with stddev
import numpy as np
X = 71000
mean_y = 82300
std_y = 36295

print("P(X|YorN):",(1/np.sqrt(2*np.pi*(std_y**2))*np.exp(-0.5*(((X-mean_y)**2)/(std_y**2)))))

P(X|YorN): 1.047164563704697e-05
