# Naive Bayes Classification with Laplace Smoothing/Correction

In [1]:
# To see multiple output statements in same sell without print or display

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
# Required Libraries
import pandas as pd
import numpy as np

## Data

In [3]:
# Dataset

emp_data = {
    "Department": ["sales","sales","sales","systems","systems","systems","systems","marketing","marketing", "secretary","secretary"],
    "Age": ["31...35","26...30","31...35","21...25","31...35","26...30","41...45","36...40","31...35","46...50","26...30"],
    "Salary": ["46K-50K","26K-30K","31K-35K","46K-50K","66K-70K","46K-50K","66K-70K","46K-50K","41K-45K","36K-40K","26K-30K"],
    "Status": ["senior","junior","junior","junior","senior","junior","senior","senior","junior","senior","junior"]
}

In [4]:
df = pd.DataFrame(emp_data)
# df.head()

Unnamed: 0,Department,Age,Salary,Status
0,sales,31...35,46K-50K,senior
1,sales,26...30,26K-30K,junior
2,sales,31...35,31K-35K,junior
3,systems,21...25,46K-50K,junior
4,systems,31...35,66K-70K,senior


## NB Classifier with Laplace Smoothing

In [15]:
# Naive Bayes Train Function with laplace Smoothing

def nblaplace_train(data, laplace = 0):
    
    # Prior Probablities Calculation 
    
    labels = data["Status"].unique()
    labels_len = len(y_classes)
    
    prior_prob = np.zeros(labels_len) # List Containing Prior Probabilities of Senior and Junior
    
    for i in range(0,labels_len):
        obs = sum(data['Status'] == labels[i]) + laplace
        total = len(data['Status']) + 2 * laplace
        prior_prob[i] =  obs / total
        
        
    # Conditional Probablities Calculation
    
    cond_prob = {}
    for column in data.columns[:-1]:
        x_classes = list(set(data[column]))
        x_classes_len = len(x_classes)
        x_cond_prob = np.zeros((y_classes_len, len(set(data[column]))))
        
        for a in range(0, y_classes_len):
            for b in range(0, x_classes_len):
                count = data.loc[(data[column] == x_classes[b]) & (data['Status'] == y_classes[a]),].shape[0] + laplace
                total = sum(data["Status"] == y_classes[a]) + x_classes_len * laplace
                x_cond_prob[a][b] = count / total
                
        x_cond_prob = pd.DataFrame(x_cond_prob,columns=x_classes,index=y_classes)   
        cond_prob[column] = x_cond_prob
    
    return prior_prob, cond_prob

## Training NB Classifier

In [13]:
# Training on Data

prior_prob, cond_prob = nblaplace_train(df, laplace = 1)

print("Conditional Laplace Probabilities for Age")
cond_prob['Age']

print("Conditional Laplace Probabilities for Department")
cond_prob['Department']

print("Conditional Laplace Probabilities for Salary")
cond_prob['Salary']

Conditional Laplace Probabilities for Age


Unnamed: 0,26...30,46...50,36...40,31...35,41...45,21...25
senior,0.090909,0.181818,0.181818,0.272727,0.181818,0.090909
junior,0.333333,0.083333,0.083333,0.25,0.083333,0.166667


Conditional Laplace Probabilities for Department


Unnamed: 0,sales,systems,secretary,marketing
senior,0.222222,0.333333,0.222222,0.222222
junior,0.3,0.3,0.2,0.2


Conditional Laplace Probabilities for Salary


Unnamed: 0,46K-50K,26K-30K,36K-40K,66K-70K,41K-45K,31K-35K
senior,0.272727,0.090909,0.181818,0.272727,0.090909,0.090909
junior,0.25,0.25,0.083333,0.083333,0.166667,0.166667


## Classification

In [7]:
# Prediction Function

def nblaplace_predict(testcase):
    
    department, age, salary = testcase
    P = {}
    P["senior"] = prior_prob[0]*cond_prob["Department"][department][0]*cond_prob["Age"][age][0]*cond_prob["Salary"][salary][0]
    P["junior"] = prior_prob[1]*cond_prob["Department"][department][1]*cond_prob["Age"][age][1]*cond_prob["Salary"][salary][1]
    
    ans = max(P, key=P.get)
    
    return ans, P[ans]

In [16]:
# Testing Classifier

a = nblaplace_predict(["marketing", "31...35", "46K-50K"])
b = nblaplace_predict(["sales", "31...35", "66K-70K"])
c = nblaplace_predict(["systems", "21...25", "31K-35K"])

## Outcome

In [26]:
# Output
print("Results of Classification\n")

print("Classified as", a[0],"with greater probability of",a[1])
print("Classified as", b[0],"with greater probability of",b[1])
print("Classified as", c[0],"with greater probability of",c[1])

Results of Classification

Classified as senior with greater probability of 0.007628734901462173
Classified as senior with greater probability of 0.007628734901462173
Classified as junior with greater probability of 0.004487179487179486
