# Naive Bayes Classifier

In [58]:
import numpy as np
import pandas as pd
import itertools
from sklearn import datasets

In [14]:
data_set = datasets.load_iris()
data_df = pd.DataFrame(data_set.data, columns=['x1','x2','x3','x4'])
data_df['y']= data_set.target

In [30]:
data_df.head()

Unnamed: 0,x1,x2,x3,x4,y
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


$$ P(y \mid x_1, \dots, x_n) = \frac{P(y) P(x_1, \dots x_n \mid y)}{P(x_1, \dots, x_n)}$$

In [46]:
def probability_from_df(df, grouping_key,probablity_name):
    probablity = df.copy()
    probablity[probablity_name] = 1
    probablity = probablity.groupby(grouping_key, as_index=False).agg({probablity_name:'count'})
    probablity[probablity_name] = probablity[probablity_name]/probablity[probablity_name].sum() 
    return probablity

In [49]:
prior = probability_from_df(data_df,['y'],'prior_probability')
likelyhood = probability_from_df(data_df,['x1','x2','x3','x4','y'],'likelyhood_probability')
evidence = probability_from_df(data_df,['x1','x2','x3','x4'],'evidence_probability')

In [51]:
prior.head()

Unnamed: 0,y,prior_probability
0,0,0.333333
1,1,0.333333
2,2,0.333333


In [52]:
likelyhood.head()

Unnamed: 0,x1,x2,x3,x4,y,likelyhood_probability
0,4.3,3.0,1.1,0.1,0,0.006667
1,4.4,2.9,1.4,0.2,0,0.006667
2,4.4,3.0,1.3,0.2,0,0.006667
3,4.4,3.2,1.3,0.2,0,0.006667
4,4.5,2.3,1.3,0.3,0,0.006667


In [53]:
evidence.head()

Unnamed: 0,x1,x2,x3,x4,evidence_probability
0,4.3,3.0,1.1,0.1,0.006667
1,4.4,2.9,1.4,0.2,0.006667
2,4.4,3.0,1.3,0.2,0.006667
3,4.4,3.2,1.3,0.2,0.006667
4,4.5,2.3,1.3,0.3,0.006667


$$P(x_i | y, x_1, \dots, x_{i-1}, x_{i+1}, \dots, x_n) = P(x_i | y)$$

In [73]:
naive_likelyhood = dict()
for x in ['x1','x2','x3','x4']:
    naive_likelyhood[x] = probability_from_df(data_df,[x,'y'],x+'_likelyhood_probability')

In [74]:
naive_likelyhood['x1'].head()

Unnamed: 0,x1,y,x1_likelyhood_probability
0,4.3,0,0.006667
1,4.4,0,0.02
2,4.5,0,0.006667
3,4.6,0,0.026667
4,4.7,0,0.013333


$$P(y \mid x_1, \dots, x_n) = \frac{P(y) \prod_{i=1}^{n} P(x_i \mid y)}
                                 {P(x_1, \dots, x_n)}$$

In [78]:
postirior.head()

Unnamed: 0,x1,x2,x3,x4,y,prior_probability,x1_likelyhood_probability,x2_likelyhood_probability,x3_likelyhood_probability,x4_likelyhood_probability
0,5.1,3.5,1.4,0.2,0,0.333333,0.053333,0.04,0.08,0.186667
1,5.1,3.5,1.4,0.2,1,0.333333,0.006667,0.0,0.0,0.0
2,5.1,3.5,1.4,0.2,2,0.333333,0.0,0.0,0.0,0.0
3,5.1,3.5,1.4,0.4,0,0.333333,0.053333,0.04,0.08,0.046667
4,5.1,3.5,1.4,0.4,1,0.333333,0.006667,0.0,0.0,0.0


$$P(y \mid x_1, \dots, x_n) \propto P(y) \prod_{i=1}^{n} P(x_i \mid y)$$





In [79]:
postirior = pd.DataFrame(list(itertools.product(*[data_df[i].unique() for i in data_df.columns])), 
                         columns=data_df.columns)

postirior = pd.merge(postirior, prior, on=['y'],how='left').fillna(0)

for x in ['x1','x2','x3','x4']:
    postirior = pd.merge(postirior, naive_likelyhood[x], on=[x,'y'],how='left').fillna(0)
    
postirior['postirior_probability'] = postirior.apply(lambda row: 
                                                     row.prior_probability*
                                                     row.x1_likelyhood_probability*
                                                     row.x2_likelyhood_probability*
                                                     row.x3_likelyhood_probability*
                                                     row.x4_likelyhood_probability,axis=1)

In [80]:
postirior.head()

Unnamed: 0,x1,x2,x3,x4,y,prior_probability,x1_likelyhood_probability,x2_likelyhood_probability,x3_likelyhood_probability,x4_likelyhood_probability,postirior_probability
0,5.1,3.5,1.4,0.2,0,0.333333,0.053333,0.04,0.08,0.186667,1.1e-05
1,5.1,3.5,1.4,0.2,1,0.333333,0.006667,0.0,0.0,0.0,0.0
2,5.1,3.5,1.4,0.2,2,0.333333,0.0,0.0,0.0,0.0,0.0
3,5.1,3.5,1.4,0.4,0,0.333333,0.053333,0.04,0.08,0.046667,3e-06
4,5.1,3.5,1.4,0.4,1,0.333333,0.006667,0.0,0.0,0.0,0.0


$$\hat{y} = \arg\max_y P(y) \prod_{i=1}^{n} P(x_i \mid y)$$

In [91]:
map_df = postirior.groupby(['x1','x2','x3','x4'], as_index=False).agg({'postirior_probability':'max'})
map_df = pd.merge(map_df, postirior[['x1','x2','x3','x4', 'postirior_probability','y']], 
                  on=['x1','x2','x3','x4', 'postirior_probability']).rename(columns={'y': 'prediction'})