**Imports**

In [None]:
import pandas as pd
from pgmpy.models import DiscreteBayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination
import matplotlib.pyplot as plt
import networkx as nx

import logging
logging.getLogger('pgmpy').setLevel(logging.WARNING)


**DAG Visualization of the relations in the dataframe**

In [None]:
# Create the graph
G = nx.DiGraph()

# Define the edges
edges = [
    ("home_Form", "home_Strength"),
    ("home_Rank", "home_Strength"),
    ("home_Goal_difference", "home_Strength"),
    
    ("away_Form", "away_Strength"),
    ("away_Rank", "away_Strength"),
    ("away_Goal_difference", "away_Strength"),

    ("home_Strength", "Result"),
    ("away_Strength", "Result")
]

G.add_edges_from(edges)



# Draw the graph
plt.figure(figsize=(12, 8))
nx.draw(
    G, with_labels=True, node_size=3000, node_color='skyblue',
    arrowsize=20, font_size=11, font_weight='bold', edge_color='gray'
)
plt.title("Bayesian network DAG", fontsize=16)
plt.axis("off")
plt.tight_layout()
plt.show()


In [None]:
matches = pd.read_csv("data_preparation/new_data/merged_matches.csv")
matches['date'] = pd.to_datetime(matches['date'])
matches

**Defining match date and teams, that will be predicted**

In [None]:
Home_Team = "FC Augsburg"
Away_Team = "1. FC Union Berlin"

date = "2025-05-15"

**Train and test and then fitting the model, using MLE**

In [None]:
# Filter matches up to and including 15.05.2025 for training
cutoff_date = pd.to_datetime(date)
train_df = matches[matches['date'] <= cutoff_date]

# Extract the Bayern Munich vs Hoffenheim match on 17.05.2025
test_date = pd.to_datetime("2025-05-17")
test_match = matches[
    (matches['date'].dt.date == test_date.date()) &
    ((matches['home_Team'] == Home_Team) & (matches['away_Team'] == Away_Team))
]

if test_match.empty:
    print("Test match not found.")
    exit()

# Select key features (use binned columns)
features = ['home_Strength', 'home_Form', 'away_Form', 'away_Strength', 'Result',
            'home_GD_binned', 'away_GD_binned', 'home_Rank_binned', 'away_Rank_binned']
train_data = train_df[features].copy()



# Define Bayesian Network structure
model = DiscreteBayesianNetwork([
    ("home_Form", "home_Strength"),
    ("home_Rank_binned", "home_Strength"),
    ("home_GD_binned", "home_Strength"),

    ("away_Form", "away_Strength"),
    ("away_Rank_binned", "away_Strength"),
    ("away_GD_binned", "away_Strength"),

    ("home_Strength", "Result"),
    ("away_Strength", "Result")
])

# Fit the model using MLE
model.fit(train_data, estimator=MaximumLikelihoodEstimator)

# Create inference object
inference = VariableElimination(model)

# Extract the test instance
test_instance = test_match.iloc[0]
evidence = {
    'home_Form': test_instance['home_Form'],
    'away_Form': test_instance['away_Form'],
    'home_Strength': test_instance['home_Strength'],
    'away_Strength': test_instance['away_Strength'],
}

# Query the model
result_prediction = inference.query(variables=['Result'], evidence=evidence)
# Extract and sort the probabilities
# Extract the values and states from the result
# Original states and values
values = result_prediction.values # type: ignore
states = result_prediction.state_names['Result'] # type: ignore

# Custom labels
label_map = {
    'HomeWin': Home_Team,
    'Draw': 'Draw',
    'AwayWin': Away_Team,
}

# Zip and rename
results = [(label_map[state], prob) for state, prob in zip(states, values)] # type: ignore

# Desired display order using new labels
desired_order = [Home_Team, 'Draw', Away_Team]
ordered_results = sorted(results, key=lambda x: desired_order.index(x[0]))

col1_width = 20
col2_width = 18

# Print header
print(f"{'Outcome':<{col1_width}} {'Probability in %':>{col2_width}}")
print("-" * (col1_width + col2_width))

# Print rows
for outcome, prob in ordered_results:
    print(f"{outcome:<{col1_width}} {prob * 100:>{col2_width}.2f}")