In [1]:
import numpy as np
import pandas as pd

In [2]:
# Loads data
data = pd.read_csv("play_tennis.csv")

In [3]:
data

Unnamed: 0,day,outlook,temp,humidity,wind,play
0,D1,Sunny,Hot,High,Weak,No
1,D2,Sunny,Hot,High,Strong,No
2,D3,Overcast,Hot,High,Weak,Yes
3,D4,Rain,Mild,High,Weak,Yes
4,D5,Rain,Cool,Normal,Weak,Yes
5,D6,Rain,Cool,Normal,Strong,No
6,D7,Overcast,Cool,Normal,Strong,Yes
7,D8,Sunny,Mild,High,Weak,No
8,D9,Sunny,Cool,Normal,Weak,Yes
9,D10,Rain,Mild,Normal,Weak,Yes


In [4]:
# Removes the column "Day" as it is not relevant to the model
data.drop(["day"], axis=1, inplace=True)

In [5]:
data

Unnamed: 0,outlook,temp,humidity,wind,play
0,Sunny,Hot,High,Weak,No
1,Sunny,Hot,High,Strong,No
2,Overcast,Hot,High,Weak,Yes
3,Rain,Mild,High,Weak,Yes
4,Rain,Cool,Normal,Weak,Yes
5,Rain,Cool,Normal,Strong,No
6,Overcast,Cool,Normal,Strong,Yes
7,Sunny,Mild,High,Weak,No
8,Sunny,Cool,Normal,Weak,Yes
9,Rain,Mild,Normal,Weak,Yes


In [6]:
def naive_bayes_predict(data, target_name, test_instance):
    """
    Predicts the target value for the test instance
    
    Parameters:
    -----------
    data: DataFrame
        Data set for Naive Bayes classifier to learn cencept from
    target_name: str
        Name of the target in the data set
    test_instance: dict
        Test instance attribute values
        
    Returns
    -------
    dict
        Probability for each target value
    """
    
    # Counts the unique values in the target
    target_value_counts = data[target_name].value_counts().to_dict()
    print("Target value counts")
    print(target_value_counts)
    
    # Calculates the frequency with which each target value occurs in the data
    #proba_target_values = (data["play"].value_counts()/data.shape[0]).to_dict()
    proba_target_values = (data[data.columns[data.shape[1] - 1]].value_counts()/data.shape[0]).to_dict()
    
    print("Proba_target_values")
    print(proba_target_values)
    
    # Initialize dictionary to store output probabilities against all (unique) target values
    predicted_target_values = {}

    # Iterate through each of the target value to calculate the probability against it
    for target, value_count in target_value_counts.items():
        attrib_proba_given_target_proba = 1
        
        
        # Filters the data set against target value
        data_subset = data[data[target_name] == target]
        
        # Calculates product of probabilities for each attribute
        # in test instance given probability of the target
        for attrib, value in test_instance.items():
            attrib_value_conditional_proba = data_subset[data_subset[attrib] == value].shape[0] / data_subset.shape[0]
            attrib_proba_given_target_proba *= attrib_value_conditional_proba
        
        # Calculates probailities of test instance attributes for a given target probability,
        # and stores it in a dictionary for later reference
        target_value_proba = proba_target_values[target] * attrib_proba_given_target_proba       
        predicted_target_values[target] = target_value_proba
        
    return predicted_target_values

In [7]:
# Creates a test instance
test_instance = {"outlook": "Sunny", "temp": "Cool", "humidity": "High", "wind": "Strong"}

In [8]:
# Performs prediction
prediction = naive_bayes_predict(data.copy(), data.columns[data.shape[1] - 1], test_instance)
print("Prediction is:",prediction)

Target value counts
{'Yes': 9, 'No': 5}
Proba_target_values
{'Yes': 0.6428571428571429, 'No': 0.35714285714285715}
Prediction is: {'Yes': 0.005291005291005291, 'No': 0.02057142857142857}
