In [21]:
import numpy as np

def create_train_data():
    data = [
        ['Sunny', 'Hot', 'High', 'Weak', 'no'],
        ['Sunny', 'Hot', 'High', 'Strong', 'no'],
        ['Overcast', 'Hot', 'High', 'Weak', 'yes'],
        ['Rain', 'Mild', 'High', 'Weak', 'yes'],
        ['Rain', 'Cool', 'Normal', 'Weak', 'yes'],
        ['Rain', 'Cool', 'Normal', 'Strong', 'no'],
        ['Overcast', 'Cool', 'Normal', 'Strong', 'yes'],
        ['Overcast', 'Mild', 'High', 'Weak', 'no'],
        ['Sunny', 'Cool', 'Normal', 'Weak', 'yes'],
        ['Rain', 'Mild', 'Normal', 'Weak', 'yes']
    ]
    return np.array(data)

train_data = create_train_data()
print(train_data)


[['Sunny' 'Hot' 'High' 'Weak' 'no']
 ['Sunny' 'Hot' 'High' 'Strong' 'no']
 ['Overcast' 'Hot' 'High' 'Weak' 'yes']
 ['Rain' 'Mild' 'High' 'Weak' 'yes']
 ['Rain' 'Cool' 'Normal' 'Weak' 'yes']
 ['Rain' 'Cool' 'Normal' 'Strong' 'no']
 ['Overcast' 'Cool' 'Normal' 'Strong' 'yes']
 ['Overcast' 'Mild' 'High' 'Weak' 'no']
 ['Sunny' 'Cool' 'Normal' 'Weak' 'yes']
 ['Rain' 'Mild' 'Normal' 'Weak' 'yes']]


In [22]:
def compute_prior_probability(train_data):
    y = train_data[:, -1]
    y_unique, counts = np.unique(y, return_counts=True)
    total_count = len(y)
    prior_probability = counts / total_count
    return prior_probability

prior_probability = compute_prior_probability(train_data)
print("P(Play Tennis = No)", prior_probability[0])
print("P(Play Tennis = Yes)", prior_probability[1])


P(Play Tennis = No) 0.4
P(Play Tennis = Yes) 0.6


In [24]:
def compute_conditional_probability(train_data):
    y_unique = np.unique(train_data[:, -1])
    conditional_probability = []
    list_x_name = []
    
    for i in range(train_data.shape[1] - 1):
        x_unique = np.unique(train_data[:, i])
        list_x_name.append(x_unique)
        
        probs = []
        for y in y_unique:
            subset = train_data[train_data[:, -1] == y]
            counts = np.array([np.sum(subset[:, i] == x) for x in x_unique])
            probs.append(counts / len(subset))
        
        conditional_probability.append(np.array(probs))
    
    return conditional_probability, list_x_name

train_data = create_train_data()
_, list_x_name = compute_conditional_probability(train_data)
print("x1 =", list_x_name[0])
print("x2 =", list_x_name[1])
print("x3 =", list_x_name[2])
print("x4 =", list_x_name[3])


x1 = ['Overcast' 'Rain' 'Sunny']
x2 = ['Cool' 'Hot' 'Mild']
x3 = ['High' 'Normal']
x4 = ['Strong' 'Weak']


In [25]:
def get_index_from_value(feature_name, list_features):
    return np.where(list_features == feature_name)[0][0]


In [26]:
def train_naive_bayes(train_data):
    y_unique = np.unique(train_data[:, -1])
    prior_probability = compute_prior_probability(train_data)
    conditional_probability, list_x_name = compute_conditional_probability(train_data)
    return prior_probability, conditional_probability, list_x_name


In [27]:
def prediction_play_tennis(X, list_x_name, prior_probability, conditional_probability):
    x_indices = [get_index_from_value(x, list_x_name[i]) for i, x in enumerate(X)]
    
    p0 = prior_probability[0]
    p1 = prior_probability[1]
    
    for i in range(len(X)):
        p0 *= conditional_probability[i][0][x_indices[i]]
        p1 *= conditional_probability[i][1][x_indices[i]]
    
    if p0 > p1:
        y_pred = 0
    else:
        y_pred = 1
    
    return y_pred

X = ['Sunny', 'Cool', 'High', 'Strong']
data = create_train_data()
prior_probability, conditional_probability, list_x_name = train_naive_bayes(data)
pred = prediction_play_tennis(X, list_x_name, prior_probability, conditional_probability)

if pred:
    print("Ad should go!")
else:
    print("Ad should not go!")


Ad should not go!
