#### NaiveBayes probabilitstic prediction
![Naive1.png](https://raw.githubusercontent.com/vaibhaVishwakarma/ML-from-scratch/master/NoteBooks/Naive1.png)
![Naive2.png](https://raw.githubusercontent.com/vaibhaVishwakarma/ML-from-scratch/master/NoteBooks/Naive2.png)
![Naive3.png](https://raw.githubusercontent.com/vaibhaVishwakarma/ML-from-scratch/master/NoteBooks/Naive3.png)

In [32]:
import numpy as np
class NaiveBayes:
    def __init__(self):
        pass

    def fit(self , X ,y):
        n_samples , n_features = X.shape
        self.classes = np.unique(y)
        self.n_classes = len(self.classes)

        self._mean = np.zeros((self.n_classes,n_features))
        self._var = np.zeros((self.n_classes,n_features))
        self._freq = np.zeros(self.n_classes)

        for idx , _class in enumerate(self.classes):
            _X = X[ y == _class]

            self._mean[idx, :] = _X.mean(axis = 0) #feature wise mean
            self._var[idx, :] = _X.var(axis = 0) 
            self._freq[idx] = _X.shape[0] / n_samples       
        
    def predict(self , X):
        return np.array([ self._predict(x) for x in X ])
    
    def _predict(self , x):  #passed a row
        probabilities = []

        for idx , _class in enumerate(self.classes):
            prior = np.log(self._freq[idx])
            postiriors = np.sum(np.log(self._pdf(idx,x)))
            probabilities.append(prior+postiriors)

        return self.classes[np.argmax(probabilities)]
    
    def _pdf(self , class_index , record):
        mean = self._mean[class_index, :]
        variance = self._var[class_index , :]

        numerator = np.exp(-((record-mean)**2)/(2*variance))
        denominator = np.sqrt(2 * np.pi * variance)

        return numerator/denominator


        

In [59]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
data = datasets.load_breast_cancer()
X , y = data.data , data.target

X_train , X_test , y_train , y_test = train_test_split(X,y,
                                                       train_size=0.2,
                                                       shuffle=True,
                                                       random_state=4444,
                                                       )


In [60]:
import warnings
warnings.filterwarnings("ignore" , category=RuntimeWarning)

clf = NaiveBayes()
clf.fit(X_train,y_train)
preds = clf.predict(X_test)

from sklearn.naive_bayes import GaussianNB
sk_clf = GaussianNB()
sk_clf.fit(X_train , y_train)
sk_preds = clf.predict(X_test)

In [61]:
from sklearn.metrics import accuracy_score
print(f"Custom NB accuracy {100*accuracy_score(y_test , preds)} ")
print(f"Sklearn NB accuracy {100*accuracy_score(y_test , sk_preds)} ")

Custom NB accuracy 94.2982456140351 
Sklearn NB accuracy 94.2982456140351 


In [62]:
from sklearn.metrics import confusion_matrix
print(f"Custom NB Confusion Matrix \n{100*confusion_matrix(y_test , preds)} \n")
print(f"Sklearn NB Confusion Matrix \n{100*confusion_matrix(y_test , sk_preds)} ")

Custom NB Confusion Matrix 
[[14900  2000]
 [  600 28100]] 

Sklearn NB Confusion Matrix 
[[14900  2000]
 [  600 28100]] 
