In [11]:
import pandas as pd

# Sample weather dataset
data = {
    'Outlook': ['Sunny', 'Sunny', 'Overcast', 'Rainy', 'Rainy', 'Rainy', 'Overcast', 'Sunny', 'Sunny', 'Rainy', 'Sunny', 'Overcast', 'Overcast', 'Rainy'],
    'Temperature': ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Cool', 'Mild', 'Cool', 'Mild', 'Mild', 'Mild', 'Hot', 'Mild'],
    'Humidity': ['High', 'High', 'High', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'High'],
    'Windy': ['False', 'True', 'False', 'False', 'False', 'True', 'True', 'False', 'False', 'False', 'True', 'True', 'False', 'True'],
    'Play': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No']
}

# Convert data into a DataFrame
df = pd.DataFrame(data)

# Function to calculate probabilities
def calculate_probability(df, feature_col, target_col):
    probabilities = df.groupby([feature_col, target_col]).size() / df.groupby(target_col).size()
    return probabilities

# Generate frequency tables for each feature and target variable
outlook_prob = calculate_probability(df, 'Outlook', 'Play')
temperature_prob = calculate_probability(df, 'Temperature', 'Play')
humidity_prob = calculate_probability(df, 'Humidity', 'Play')
windy_prob = calculate_probability(df, 'Windy', 'Play')

# Print the likelihood tables
print("Likelihood Table for Outlook:")
print(outlook_prob)
print("\nLikelihood Table for Temperature:")
print(temperature_prob)
print("\nLikelihood Table for Humidity:")
print(humidity_prob)
print("\nLikelihood Table for Windy:")
print(windy_prob)

# Bayes Theorem to calculate posterior probability (example for a new data point)
# For instance, if the Outlook is Sunny, Temperature is Cool, Humidity is Normal, and Windy is False
posterior_prob = {
    'Yes': outlook_prob['Sunny']['Yes'] * temperature_prob['Cool']['Yes'] * humidity_prob['Normal']['Yes'] * windy_prob['False']['Yes'] * (df['Play'].value_counts(normalize=True)['Yes']),
    'No': outlook_prob['Sunny']['No'] * temperature_prob['Cool']['No'] * humidity_prob['Normal']['No'] * windy_prob['False']['No'] * (df['Play'].value_counts(normalize=True)['No'])
}

# Normalize probabilities
total = sum(posterior_prob.values())
posterior_prob = {key: value/total for key, value in posterior_prob.items()}

# Print posterior probability
print("\nPosterior Probability:")
print(posterior_prob)


Likelihood Table for Outlook:
Outlook   Play
Overcast  Yes     0.444444
Rainy     No      0.400000
          Yes     0.333333
Sunny     No      0.600000
          Yes     0.222222
dtype: float64

Likelihood Table for Temperature:
Temperature  Play
Cool         No      0.200000
             Yes     0.333333
Hot          No      0.400000
             Yes     0.222222
Mild         No      0.400000
             Yes     0.444444
dtype: float64

Likelihood Table for Humidity:
Humidity  Play
High      No      0.800000
          Yes     0.333333
Normal    No      0.200000
          Yes     0.666667
dtype: float64

Likelihood Table for Windy:
Windy  Play
False  No      0.400000
       Yes     0.666667
True   No      0.600000
       Yes     0.333333
dtype: float64

Posterior Probability:
{'Yes': 0.8605851979345954, 'No': 0.1394148020654045}
