In [1]:
import numpy as np
import pandas as pd


In [2]:
df_2 = pd.DataFrame({'Weather': ['clear', 'clear', 'clear', 'clear', 'clear', 'rainy', 'rainy', 'rainy', 'rainy', 'rainy', 'snowy', 'snowy', 'snowy', 'snowy', 'snowy'],
                     'Time of week': ['workday', 'workday', 'workday', 'weekend', 'weekend', 'weekend', 'workday', 'workday', 'weekend', 'weekend', 'workday', 'workday', 'weekend', 'weekend', 'weekend'],
                     'Time of day': ['morning', 'lunch', 'evening', 'morning', 'evening', 'lunch', 'morning', 'lunch', 'morning', 'evening', 'morning', 'lunch', 'evening', 'lunch', 'morning'],
                     'Traffic jam': ['yes', 'no', 'yes', 'no', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes', 'yes', 'yes', 'no', 'yes']})

In [3]:
Classes = df_2['Traffic jam'].unique().tolist()

In [4]:
# Calculate the prior probabilities of each class
def calculate_prior(df, class_column):
    priors = {}
    total_count = len(df)
    class_counts = df[class_column].value_counts().to_dict()
    for class_, count in class_counts.items():
        priors[class_] = count / total_count
    return priors

In [5]:
# Calculate the conditional probabilities P(feature_value | class)
def calculate_conditional_probabilities(df, class_column, feature_columns):
    conditional_probs = {}
    for class_ in Classes:
        class_data = df[df[class_column] == class_]
        class_prob = {}
        for feature in feature_columns:
            feature_counts = class_data[feature].value_counts().to_dict()
            total_class_count = len(class_data)
            # Store the conditional probabilities for each feature value
            class_prob[feature] = {value: count / total_class_count for value, count in feature_counts.items()}
        conditional_probs[class_] = class_prob
    return conditional_probs

In [6]:
# Naive Bayes prediction function
def predict(T, prior_probs, cond_probs, feature_columns):
    probabilities = []
    for class_ in Classes:
        # Start with the prior probability
        prob = np.log(prior_probs[class_])
        for feature in feature_columns:
            feature_value = T[feature]
            # Add the log of the conditional probability
            prob += np.log(cond_probs[class_][feature].get(feature_value, 1e-6))  # Handle unseen values with a small probability
        probabilities.append(prob)

    # Choose the class with the highest probability
    predicted_class = Classes[np.argmax(probabilities)]
    return predicted_class

In [7]:
# Feature columns (excluding the target 'Traffic jam')
feature_columns = ['Weather', 'Time of week', 'Time of day']

In [8]:
# Calculate prior probabilities and conditional probabilities
priors = calculate_prior(df_2, 'Traffic jam')
conditionals = calculate_conditional_probabilities(df_2, 'Traffic jam', feature_columns)

In [9]:
# New instance to classify
T = {'Weather': 'rainy', 'Time of week': 'weekend', 'Time of day': 'morning'}

In [10]:
# Predict the class
predicted_class = predict(T, priors, conditionals, feature_columns)
print(f"Predicted class: {predicted_class}")

Predicted class: no
