In [None]:
import pandas as pd
from sklearn.preprocessing import KBinsDiscretizer
from pgmpy.models import DiscreteBayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination

In [None]:
data = pd.read_csv("data_preparation/new_data/merged_matches.csv")


def get_result(row):
    if row['homeTeamWinner'] == True:
        return 'HomeWin'
    elif row['awayTeamWinner'] == True:
        return 'AwayWin'
    elif row['homeTeamWinner'] == False and row['awayTeamWinner'] == False:
        return 'Draw'
    else:
        return 'Unknown'

data['Result'] = data.apply(get_result, axis=1)


In [None]:
features = ["home_Rank", "home_Wins", "home_Ties", "home_Losses", "home_Points", 
            "home_Goal_for", "home_Goal_against", "home_Goal_difference",
            
            "away_Rank", "away_Wins", "away_Ties", "away_Losses", "away_Points",
              "away_Goal_for", "away_Goal_against", "away_Goal_difference",]

# Discretize numerical features into 3 bins
discretizer = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy='uniform')
data[features] = discretizer.fit_transform(data[features])
data[features] = data[features].astype(int).astype(str)  # Convert to string for pgmpy


In [None]:
# Create the Bayesian Network structure
edges = [(feature, 'Result') for feature in features]

model = DiscreteBayesianNetwork(edges)
model.fit(data[features + ['Result']], estimator=MaximumLikelihoodEstimator)
inference = VariableElimination(model)

In [None]:
# Select the match row by eventId
match_id = 705018
match_row = data[data['eventId'] == match_id].iloc[0]

# Build evidence dictionary with features (converted to strings)
evidence = {feature: str(match_row[feature]) for feature in features}

# Perform inference
query_result = inference.query(variables=['Result'], evidence=evidence)
print(query_result)
