In [6]:
import pandas as pd

table_L = pd.DataFrame(index=["spam", "ham"])
table_L["prior"] = 0.5
table_L["likelihood"] = 0.6, 0.2
table_L["joint"] = table_L["prior"] * table_L["likelihood"]

norm_const = table_L["joint"].sum()

table_L["posterior"] = table_L["joint"] / norm_const # P(S|L)

print(table_L)

      prior  likelihood  joint  posterior
spam    0.5         0.6    0.3       0.75
ham     0.5         0.2    0.1       0.25


In [8]:
import pandas as pd

table_L = pd.DataFrame(index=["spam", "ham"])
table_L["prior"] = 0.5
table_L["likelihood"] = 0.4, 0.05
table_L["joint"] = table_L["prior"] * table_L["likelihood"]

norm_const = table_L["joint"].sum()

table_L["posterior"] = table_L["joint"] / norm_const

print(table_L)

      prior  likelihood  joint  posterior
spam    0.5        0.40  0.200   0.888889
ham     0.5        0.05  0.025   0.111111


In [17]:
import pandas as pd

table_L = pd.DataFrame(index=["spam", "ham"])
table_L["prior"] = 0.5
table_L["likelihood"] = 0.6, 0.2
table_L["joint"] = table_L["prior"] * table_L["likelihood"]

norm_const = table_L["joint"].sum()

table_L["posterior"] = table_L["joint"] / norm_const

print(table_L)

table_LW = pd.DataFrame(index=["spam", "ham"])
table_LW["prior"] = table_L.loc[:, "posterior"]
table_LW["likelihood"] = 0.4, 0.05
table_LW["joint"] = table_LW["prior"] * table_LW["likelihood"]

norm_const = table_LW["joint"].sum()

table_LW["posterior"] = table_LW["joint"] / norm_const

print(table_LW)
print(table_LW.index)


      prior  likelihood  joint  posterior
spam    0.5         0.6    0.3       0.75
ham     0.5         0.2    0.1       0.25
      prior  likelihood   joint  posterior
spam   0.75        0.40  0.3000       0.96
ham    0.25        0.05  0.0125       0.04
Index(['spam', 'ham'], dtype='object')


In [19]:
import pandas as pd

def update_bayesian_table(table: pd.DataFrame, likelihood) -> pd.DataFrame:
    updated_table = pd.DataFrame(index=table.index)
    updated_table["prior"] = table["posterior"]
    updated_table["likelihood"] = likelihood
    updated_table["joint"] = updated_table["prior"] * updated_table["likelihood"]
    norm_const = updated_table["joint"].sum()
    updated_table["posterior"] = updated_table["joint"] / norm_const
    return updated_table

table_L = pd.DataFrame(index=["spam", "ham"])
table_L["posterior"] = 0.75, 0.25
print(table_L)

updated_table = update_bayesian_table(table_L, (0.4, 0.05))
print(updated_table)

      posterior
spam       0.75
ham        0.25
      prior  likelihood   joint  posterior
spam   0.75        0.40  0.3000       0.96
ham    0.25        0.05  0.0125       0.04


In [42]:
from typing import Iterable
import pandas as pd

def bayesian_table(table: pd.DataFrame, prior: float | Iterable[float], likelihood: Iterable[float]) -> pd.DataFrame:
    calc_prior = (prior, 1-prior) if type(prior) is float else prior
    posterior = table.get("posterior")
    posterior = posterior if posterior is not None else calc_prior
    updated_table = pd.DataFrame(index=table.index)
    updated_table["prior"] = posterior
    updated_table["likelihood"] = likelihood
    updated_table["joint"] = posterior * updated_table["likelihood"]
    norm_const = updated_table["joint"].sum()
    updated_table["posterior"] = updated_table["joint"] / norm_const
    return updated_table

table = pd.DataFrame(index=['Spam', 'Ham'])

prior = 0.5
likelihood = [0.6, 0.2]
table = bayesian_table(table, prior, likelihood)
print(table)

likelihood = [0.4, 0.05]
table = bayesian_table(table, prior, likelihood)
print(table)

      prior  likelihood  joint  posterior
Spam    0.5         0.6    0.3       0.75
Ham     0.5         0.2    0.1       0.25
      prior  likelihood   joint  posterior
Spam   0.75        0.40  0.3000       0.96
Ham    0.25        0.05  0.0125       0.04


In [57]:
from bayes_util import bayesian_table

table = pd.DataFrame(index=['x', 'y'])

prior = 0.5
likelihood_w = (0.9, 0.2)
likelihood_b = (0.1, 0.8)

table_B = bayesian_table(table=table, prior=prior, likelihood=likelihood_b)
print(f"table_B:\n{table_B}")

table_BB = bayesian_table(table=table_B, prior=prior, likelihood=likelihood_b)
print(f"table_BB:\n{table_BB}")

table = pd.DataFrame(index=['x', 'y'])

table_W = bayesian_table(table=table, prior=prior, likelihood=likelihood_w)
print(f"table_W:\n{table_W}")

table_BW = bayesian_table(table=table_B, prior=prior, likelihood=likelihood_w)
print(f"table_BW:\n{table_BW}")



table_B:
   prior  likelihood  joint  posterior
x    0.5         0.1   0.05   0.111111
y    0.5         0.8   0.40   0.888889
table_BB:
      prior  likelihood     joint  posterior
x  0.111111         0.1  0.011111   0.015385
y  0.888889         0.8  0.711111   0.984615
table_W:
   prior  likelihood  joint  posterior
x    0.5         0.9   0.45   0.818182
y    0.5         0.2   0.10   0.181818
table_BW:
      prior  likelihood     joint  posterior
x  0.111111         0.9  0.100000       0.36
y  0.888889         0.2  0.177778       0.64


In [3]:
import pandas as pd
import numpy as np
from bayes_util import bayesian_table, get_likelihoods

data = pd.read_csv("PlayTennis.csv")

# def get_likelihoods(data: pd.DataFrame, hypothesis: str | float, var: str) -> list:
#     indices = { hypothesis: data.iloc[:, -1] == hypothesis for hypothesis in data.iloc[:, -1].unique() }
#     var_data = data[var][indices[hypothesis]]
#     len_var_data = len(var_data)
#     (states, counts) = np.unique(var_data, return_counts=True)
#     return [ (state, counts[idx_state] / len_var_data) for (idx_state, state) in enumerate(states) ]

prior = 9/14, 5/14
print(f"prior = {prior}")

yes_likelihood_wind = get_likelihoods(data=data, hypotheses=["Yes"], feature_name="Wind")
print(f"likelihood_wind:\n{yes_likelihood_wind}")

no_likelihood_wind = get_likelihoods(data=data, hypotheses=["No"], feature_name="Wind")
print(f"likelihood_wind:\n{no_likelihood_wind}")

yes_likelihood_outlook = get_likelihoods(data=data, hypotheses=["Yes"], feature_name="Outlook")
print(f"likelihood_outlook:\n{yes_likelihood_outlook}")##






prior = (0.6428571428571429, 0.35714285714285715)
likelihood_wind:
{'Strong': [0.3333333333333333, 0.6], 'Weak': [0.6666666666666666, 0.4]}
likelihood_wind:
{'Strong': [0.6], 'Weak': [0.4]}
likelihood_outlook:
{'Overcast': [0.4444444444444444], 'Rain': [0.3333333333333333], 'Sunny': [0.2222222222222222]}


In [2]:

import pandas as pd
from bayes_util import *

data = pd.read_csv("PlayTennis.csv")

hypotheses = list(data.iloc[:, -1].unique())
feature_names = data.columns[:-1]
hypothesis_likelihoods = [ 
    (feature_name, get_likelihoods(data=data, hypotheses=hypotheses, feature_name=feature_name))
    for feature_name in feature_names 
]

def calc(prior, likelihoods, idx_feature) -> pd.DataFrame:
    is_last = len(likelihoods) == idx_feature+1
    feature_name = likelihoods[idx_feature][0]
    rows = []
    for (state, state_likelihoods) in likelihoods[idx_feature][1].items():
        posterior = bayesian_table_custom(prior=prior, likelihood=state_likelihoods)
        df = None
        if is_last:
            df = pd.DataFrame(posterior).transpose()
            df.columns = hypotheses
        else:
            df = calc(prior=posterior, likelihoods=likelihoods, idx_feature=idx_feature+1)
        state_df = pd.Series([state]*len(df)).to_frame()
        state_df.columns = [feature_name]
        concat_state_df = pd.concat([state_df, df], axis="columns")
        rows.append(concat_state_df)
    concat_rows = pd.concat(rows, axis=0)
    return concat_rows.reset_index(drop=True)

result_df = calc(prior=[5/14, 9/14], likelihoods=hypothesis_likelihoods, idx_feature=0)

print(result_df)

     Outlook Temperature Humidity    Wind        No       Yes
0   Overcast        Cool     High  Strong  0.000000  1.000000
1   Overcast        Cool     High    Weak  0.000000  1.000000
2   Overcast        Cool   Normal  Strong  0.000000  1.000000
3   Overcast        Cool   Normal    Weak  0.000000  1.000000
4   Overcast         Hot     High  Strong  0.000000  1.000000
5   Overcast         Hot     High    Weak  0.000000  1.000000
6   Overcast         Hot   Normal  Strong  0.000000  1.000000
7   Overcast         Hot   Normal    Weak  0.000000  1.000000
8   Overcast        Mild     High  Strong  0.000000  1.000000
9   Overcast        Mild     High    Weak  0.000000  1.000000
10  Overcast        Mild   Normal  Strong  0.000000  1.000000
11  Overcast        Mild   Normal    Weak  0.000000  1.000000
12      Rain        Cool     High  Strong  0.633431  0.366569
13      Rain        Cool     High    Weak  0.365482  0.634518
14      Rain        Cool   Normal  Strong  0.177632  0.822368
15      