In [None]:
import pandas as pd
from sklearn.preprocessing import KBinsDiscretizer
from pgmpy.models import DiscreteBayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination
from sklearn.model_selection import train_test_split

In [None]:
data = pd.read_csv("data_preparation/new_data/merged_matches.csv")


def get_result(row):
    if row['homeTeamWinner'] == True:
        return 'HomeWin'
    elif row['awayTeamWinner'] == True:
        return 'AwayWin'
    elif row['homeTeamWinner'] == False and row['awayTeamWinner'] == False:
        return 'Draw'
    else:
        return 'Unknown'

data['Result'] = data.apply(get_result, axis=1)


In [None]:
features = ["home_Rank", "home_Wins", "home_Ties", "home_Losses", "home_Points", 
            "home_Goal_for", "home_Goal_against", "home_Goal_difference",
            
            "away_Rank", "away_Wins", "away_Ties", "away_Losses", "away_Points",
              "away_Goal_for", "away_Goal_against", "away_Goal_difference",
            ]

# Discretize numerical features into 3 bins
discretizer = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy='uniform')
data[features] = discretizer.fit_transform(data[features])
data[features] = data[features].astype(int).astype(str)  # Convert to string for pgmpy


In [None]:
# Determine split index
split_index = int(0.8 * len(data))

# Take the first 80% as training, the rest as testing
train_data = data.iloc[:split_index]
test_data = data.iloc[split_index:]

# Create the Bayesian Network structure
edges = [(feature, 'Result') for feature in features]

model = DiscreteBayesianNetwork(edges)
model.fit(train_data[features + ['Result']], estimator=MaximumLikelihoodEstimator)
inference = VariableElimination(model)

In [None]:
# Select the match row by eventId
# match_id = 711715
match_id = 711715

match_row = data[data['eventId'] == match_id].iloc[0]

# Build evidence dictionary with features (converted to strings)
evidence = {feature: str(match_row[feature]) for feature in features}
evidence


In [None]:
# Perform inference
query_result = inference.query(variables=['Result'], evidence=evidence)
print(query_result)

In [None]:
# 1. Extract the CPT object
cpd = model.get_cpds('Result')

# 2. Identify child and parent variables
child = cpd.variable
parents = cpd.variables[1:]    # first element is the child

# 3. Gather state names
state_names = cpd.state_names  # dict mapping each var → list of its states
child_states = state_names[child]
parent_states = [state_names[p] for p in parents]

# 4. Build a MultiIndex of all parent‐configurations
index = pd.MultiIndex.from_product(parent_states, names=parents)

# 5. Get the raw CPT values and transpose so rows match parent‐configs
#    cpd.get_values() returns an array of shape (child_card, prod(evidence_card))
values = cpd.get_values().T

# 6. Build the DataFrame
df_cpt = pd.DataFrame(
    values,
    index=index,
    columns=[f"{child}={s}" for s in child_states]
)

# 7. Display the first few rows
print("First 10 rows of the CPT for 'Result':")
print(df_cpt.head(10))