In [None]:
import pandas as pd
from sklearn.preprocessing import KBinsDiscretizer
from pgmpy.models import DiscreteBayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination

In [None]:
data = pd.read_csv("data_preparation/new_data/merged_matches.csv")

In [None]:
features = ["home_Rank", "home_Goal_difference", 'home_avg_points_per_match',
            "away_Rank", "away_Goal_difference", 'away_avg_points_per_match',
            ]


# Discretize numerical features into 3 bins
discretizer = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy='uniform')
data[features] = discretizer.fit_transform(data[features])
data[features] = data[features].astype(int).astype(str)  # Convert to string for pgmpy


In [None]:
data['date'] = pd.to_datetime(data['date'])
train_data = data[data['date'] <= '2025-05-15']
future_matches = data[data['date'] > '2025-05-15']

# Create the Bayesian Network structure
edges = [(feature, 'Result') for feature in features]

model = DiscreteBayesianNetwork(edges)
model.fit(train_data[features + ['Result']], estimator=MaximumLikelihoodEstimator)
inference = VariableElimination(model)

In [None]:
evidence = {
    "home_Rank": 2,
    "home_Goal_difference": 35,
    "home_avg_points_per_match": 2.15,

    "away_Rank": 9,
    "away_Goal_difference": -2,
    "away_avg_points_per_match": 1.38,
}

predictions = []
for _, row in future_matches.iterrows():
    # evidence = {feature: str(row[feature]) for feature in features}
    result = inference.query(variables=["Result"], evidence=evidence)
    predictions.append({
        "eventId": row["eventId"],
        "HomeTeam": row["home_Team"],
        "AwayTeam": row["away_Team"],
        "Prediction": result
    })


In [None]:
result = inference.query(variables=["Result"], evidence=evidence)
print(result)


In [None]:
# 1. Extract the CPT object
cpd = model.get_cpds('Result')

# 2. Identify child and parent variables
child = cpd.variable
parents = cpd.variables[1:]    # first element is the child

# 3. Gather state names
state_names = cpd.state_names  # dict mapping each var → list of its states
child_states = state_names[child]
parent_states = [state_names[p] for p in parents]

# 4. Build a MultiIndex of all parent‐configurations
index = pd.MultiIndex.from_product(parent_states, names=parents)

# 5. Get the raw CPT values and transpose so rows match parent‐configs
#    cpd.get_values() returns an array of shape (child_card, prod(evidence_card))
values = cpd.get_values().T

# 6. Build the DataFrame
df_cpt = pd.DataFrame(
    values,
    index=index,
    columns=[f"{child}={s}" for s in child_states]
)

# 7. Display the first few rows
print("First 10 rows of the CPT for 'Result':")
print(df_cpt.head(10))