# Example prediction

This notebook includes example prediction on match taking place April 7th, 2025 - Arsenal vs. Real Madrid. This match, being played in Champions League, is not present in the training data at all, neither are any past results in the competition. This is a great showcase of how underlying stats can help us predict full time even without clear score indication, as Arsenal's win was predicted with 80% certainty at half-time, with score still being 0-0.

In [43]:
import pandas as pd
import numpy as np
import time
import os
import sys
import warnings
import gc
import traceback
import pickle
from datetime import datetime

# Neural Networks - TensorFlow/Keras (for loading base_nn models)
import tensorflow as tf


# Evaluation Metrics & Utils
from scipy.stats import entropy, poisson

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', 50)
pd.options.display.float_format = '{:.4f}'.format

PREMATCH_ODDS_COLS = ['OddHome', 'OddDraw', 'OddAway']

INPLAY_LOAD_DIR = 'models/in_play'
INPLAY_CLUSTER_COLS = ['C_LTH', 'C_HTB', 'C_LTA', 'C_VAD', 'C_VHD', 'C_PHB']

SEED = 42
np.random.seed(SEED)
if 'tf' in sys.modules:
    tf.random.set_seed(SEED)

HEATMAP_MAX_GOALS = 6 # Max goals to display on heatmap axes
HEATMAP_SCORE_RANGE = np.arange(HEATMAP_MAX_GOALS + 1)
VIZ_OUTPUT_DIR = "example_output"
os.makedirs(VIZ_OUTPUT_DIR, exist_ok=True)
VIZ_SAVE_FORMAT = 'png'
VIZ_HIGH_DPI = 300
plt.style.use('seaborn-v0_8-whitegrid')

# --- Input Data for the Single Match ---
matches_data = [
    {
        'MatchDesc': 'Arsenal vs Real Madrid',
        'MatchDate': '2025-04-07', 'MatchTime': '20:00:00',
        'HomeTeam': 'Arsenal', 'AwayTeam': 'Real Madrid', 'Division': 'TopLeague',
        'FTHome': 3, 'FTAway': 0, 'FTResult': 'H', # Actual outcome (NOT USED by model)
        'HTHome': 0, 'HTAway': 0,
        'HomeShots': 12, 'AwayShots': 9, 'HomeTarget': 11, 'AwayTarget': 3,
        'HomeCorners': 5, 'AwayCorners': 3, 'HomeYellow': 1, 'AwayYellow': 2,
        'HomeRed': 0, 'AwayRed': 1, 'HomeFouls': 8, 'AwayFouls': 9,
        'Form3Home': 7, 'Form3Away': 4, 'Form5Home': 9, 'Form5Away': 8,
        'HomeElo': 1996, 'AwayElo': 1948,
        'OddHome': 2.22, 'OddDraw': 3.55, 'OddAway': 3.35,
        'Over25': 1.84, 'Under25': 2.01,
        'HandiSize': -0.5, 'HandiHome': 1.85, 'HandiAway': 1.90,
        'MaxOver25': 1.84, 'MaxUnder25': 2.01,
        'MaxHome': 2.22, 'MaxDraw': 3.55, 'MaxAway': 3.35
    }
]

df_demo = pd.DataFrame(matches_data)

_placeholder_cols_from_inplay_fe = [
    '1XOdd', 'X2Odd', '12Odd', 'Max1XOdd', 'MaxX2Odd', 'Max12Odd',
    'EloDifference', 'EloTotal', 'EloAdvantage', 'Form3Difference', 'Form5Difference',
    'FormOlderHome', 'FormOlderAway', 'FormMomentumHome', 'FormMomentumAway',
    'FormMomentumDiff', 'OddsDifference', 'MaxOddsDifference', 'ImpliedProbHome',
    'ImpliedProbDraw', 'ImpliedProbAway', 'ImpliedProbTotal', 'BookmakerMargin',
    'HTTotalGoals', 'ShotsDifference', 'ShotsTotal', 'TargetDifference',
    'TargetTotal', 'ShotAccuracyHome', 'ShotAccuracyAway', 'ShotAccuracyDiff',
    'CornersDifference', 'CornersTotal', 'GameDominanceIndex', 'CardPointsHome',
    'CardPointsAway', 'CardPointsDiff', 'FoulsDifference', 'FoulsTotal',
    'ScoringEfficiencyHome', 'ScoringEfficiencyAway', 'DefensiveRatingHome',
    'DefensiveRatingAway', 'DrawLikelihood', 'FormEfficiencyHome', 'FormEfficiencyAway',
    'CleanSheetProbHome', 'CleanSheetProbAway', 'LowScoreIndicator', 'DrawTendency',
    'DefensiveOrganization', 'HomeDefensiveStyle', 'AwayDefensiveStyle',
    'ExpectedGoalsHome', 'ExpectedGoalsAway'
]
all_potentially_needed_cols = list(set(
    list(df_demo.columns) + PREMATCH_ODDS_COLS + INPLAY_CLUSTER_COLS +
    _placeholder_cols_from_inplay_fe
))

for col in all_potentially_needed_cols:
    if col not in df_demo.columns:
        df_demo[col] = np.nan 

df_demo['Division'].fillna('TopLeague', inplace=True)
df_demo['MatchDate'] = pd.to_datetime(df_demo['MatchDate'])
if 'MatchTime' in df_demo.columns:
    df_demo['MatchDateTime'] = pd.to_datetime(df_demo['MatchDate'].astype(str) + ' ' + df_demo['MatchTime'].astype(str))
else:
    df_demo['MatchDateTime'] = pd.to_datetime(df_demo['MatchDate'])

In [45]:
# --- In-Play Helpers ---
def inplay_load_pipeline_components(load_dir):
    """Loads all necessary components from a specified directory."""
    components = {}
    files_to_load = [
        'scaler.pkl', 'feature_names.pkl', 'target_transformer.pkl',
        'label_encoder.pkl', 'draw_calibrator.pkl', 'meta_feature_names.pkl',
        'cluster_medians.pkl',
        'meta_model.pkl',
        'base_nn_home.pkl', 'base_nn_away.pkl',
        'base_xgb_home.pkl', 'base_xgb_away.pkl',
        'base_lgbm_home.pkl', 'base_lgbm_away.pkl',
        'base_draw_specialist.pkl', 'base_hda_classifier.pkl'
    ]
    successful = True
    if not os.path.isdir(load_dir):
        print(f"   ERROR: Load directory not found: {load_dir}")
        return None

    for fname in files_to_load:
        fpath = os.path.join(load_dir, fname)
        key = fname.replace('.pkl', '')
        try:
            with open(fpath, 'rb') as f:
                components[key] = pickle.load(f)
        except FileNotFoundError:
            if fname in ['cluster_medians.pkl', 'target_transformer.pkl']:
                components[key] = None; print(f"   Optional file not found: {fname}, setting to None.")
            else: print(f"   ERROR: Required file not found: {fname}"); successful = False
        except Exception as e: print(f"   ERROR loading {fname}: {e}"); successful = False

    if not successful: print(f"Critical error: Failed to load In-Play components."); return None
    essential_keys = ['scaler', 'feature_names', 'label_encoder',
                      'meta_feature_names', 'meta_model'] 
    essential_keys += [k for k in files_to_load if k.startswith('base_') and k.endswith('.pkl')]
    essential_keys = [k.replace('.pkl','') for k in essential_keys if k not in ['cluster_medians.pkl', 'target_transformer.pkl']] 
    if 'draw_calibrator' not in essential_keys : essential_keys.append('draw_calibrator')


    missing_essentials = [k for k in essential_keys if k not in components or components[k] is None]
    if missing_essentials:
        print(f"   ERROR: Missing essential In-Play components: {missing_essentials}")
        return None
    if not all(k for k in components if k.startswith('base_')): 
        print(f"   ERROR: Not all base models loaded successfully.")
        return None

    return components

def combine_odds(odd1, odd2): 
    if pd.isna(odd1) or pd.isna(odd2) or odd1 < 1.0 or odd2 < 1.0: return np.nan
    prob1, prob2 = 1.0 / odd1, 1.0 / odd2; combined_prob = prob1 + prob2;
    if combined_prob <= 0: return np.nan
    return max(1.0 / combined_prob, 1.00)

def inplay_feature_engineering(df_orig, model_expected_feature_names=None): 
    """Applies the feature engineering steps from the in-play script (evaluation version)."""
    df = df_orig.copy()
    for col in ['OddHome', 'OddDraw', 'OddAway', 'MaxHome', 'MaxDraw', 'MaxAway',
                'HomeElo', 'AwayElo', 'Form3Home', 'Form5Home', 'Form3Away', 'Form5Away',
                'HTHome', 'HTAway', 'FTHome', 'FTAway', 'HomeShots', 'AwayShots',
                'HomeTarget', 'AwayTarget', 'HomeCorners', 'AwayCorners',
                'HomeYellow', 'AwayYellow', 'HomeRed', 'AwayRed', 'HomeFouls', 'AwayFouls',
                'Over25', 'Under25']:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce')
        else:
            df[col] = np.nan


    if 'MatchDate' in df.columns: df['MatchDate'] = pd.to_datetime(df['MatchDate'], errors='coerce')
    else: df['MatchDate'] = pd.NaT
    if 'MatchTime' in df.columns:
        df['MatchTime'] = df['MatchTime'].astype(str).fillna('00:00:00').str.replace('.', ':', regex=False)
        try: df['MatchTime'] = pd.to_datetime(df['MatchTime'], format='%H:%M:%S', errors='coerce').dt.time
        except Exception: df['MatchTime'] = pd.to_datetime('00:00:00', errors='coerce').dt.time
        df['MatchTime'] = df['MatchTime'].fillna(pd.to_datetime('00:00:00').time())
    else: df['MatchTime'] = pd.to_datetime('00:00:00').time()

    valid_dt_idx = df['MatchDate'].notna() & df['MatchTime'].notna()
    df['MatchDateTime'] = pd.NaT
    if valid_dt_idx.any():
        df.loc[valid_dt_idx, 'MatchDateTime'] = pd.to_datetime(df.loc[valid_dt_idx, 'MatchDate'].astype(str) + ' ' + df.loc[valid_dt_idx, 'MatchTime'].astype(str), errors='coerce')

    if 'HTHome' in df.columns and 'HTAway' in df.columns:
        df['HTHome'] = pd.to_numeric(df['HTHome'], errors='coerce'); df['HTAway'] = pd.to_numeric(df['HTAway'], errors='coerce')
        df['HTTotalGoals'] = df['HTHome'].fillna(0) + df['HTAway'].fillna(0)

    df['1XOdd'] = df.apply(lambda x: combine_odds(x.get('OddHome'), x.get('OddDraw')), axis=1)
    df['X2Odd'] = df.apply(lambda x: combine_odds(x.get('OddDraw'), x.get('OddAway')), axis=1)
    df['12Odd'] = df.apply(lambda x: combine_odds(x.get('OddHome'), x.get('OddAway')), axis=1)
    if all(c in df.columns for c in ['MaxHome', 'MaxDraw', 'MaxAway']):
        df['Max1XOdd'] = df.apply(lambda x: combine_odds(x['MaxHome'], x['MaxDraw']), axis=1)
        df['MaxX2Odd'] = df.apply(lambda x: combine_odds(x['MaxDraw'], x['MaxAway']), axis=1)
        df['Max12Odd'] = df.apply(lambda x: combine_odds(x['MaxHome'], x['MaxAway']), axis=1)

    if 'HomeElo' in df.columns and 'AwayElo' in df.columns:
        df['HomeElo'] = pd.to_numeric(df['HomeElo'], errors='coerce'); df['AwayElo'] = pd.to_numeric(df['AwayElo'], errors='coerce')
        df['HomeElo'].fillna(1500.0, inplace=True); df['AwayElo'].fillna(1500.0, inplace=True)
        df['EloDifference'] = df['HomeElo'] - df['AwayElo']; df['EloTotal'] = df['HomeElo'] + df['AwayElo']
        df['EloAdvantage'] = np.divide(df['EloDifference'], df['EloTotal'], out=np.zeros_like(df['EloDifference'].values, dtype=float), where=df['EloTotal']!=0)


    form_cols_src_fe = ['Form3Home', 'Form5Home', 'Form3Away', 'Form5Away']
    for col in form_cols_src_fe:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce')
            df[col].fillna(0, inplace=True)
        else: df[col] = 0
    if all(c in df.columns for c in form_cols_src_fe):
        df['Form3Difference'] = df['Form3Home'] - df['Form3Away']; df['Form5Difference'] = df['Form5Home'] - df['Form5Away']
        df['FormOlderHome'] = df['Form5Home'] - df['Form3Home']; df['FormOlderAway'] = df['Form5Away'] - df['Form3Away']
        df['FormMomentumHome'] = df['Form3Home'] - df['FormOlderHome']; df['FormMomentumAway'] = df['Form3Away'] - df['FormOlderAway']
        df['FormMomentumDiff'] = df['FormMomentumHome'] - df['FormMomentumAway']

    df['OddsDifference'] = df.get('OddHome', np.nan) - df.get('OddAway', np.nan)
    if 'MaxHome' in df.columns and 'MaxAway' in df.columns:
        df['MaxOddsDifference'] = df.get('MaxHome', np.nan) - df.get('MaxAway', np.nan)

    df['ImpliedProbHome'] = 1.0 / df.get('OddHome', np.nan); df['ImpliedProbDraw'] = 1.0 / df.get('OddDraw', np.nan); df['ImpliedProbAway'] = 1.0 / df.get('OddAway', np.nan)
    df['ImpliedProbTotal'] = df['ImpliedProbHome'].fillna(0) + df['ImpliedProbDraw'].fillna(0) + df['ImpliedProbAway'].fillna(0)
    df['BookmakerMargin'] = df['ImpliedProbTotal'] - 1.0
    df['BookmakerMargin'] = np.where(df['ImpliedProbTotal'] == 0, np.nan, df['BookmakerMargin']) 


    stat_cols_fe = ['HomeShots', 'AwayShots', 'HomeTarget', 'AwayTarget']
    stat_features_created_fe = all(c in df.columns for c in stat_cols_fe)
    if stat_features_created_fe:
        for col in stat_cols_fe: df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)
        df['ShotsDifference'] = df['HomeShots'] - df['AwayShots']; df['ShotsTotal'] = df['HomeShots'] + df['AwayShots']
        df['TargetDifference'] = df['HomeTarget'] - df['AwayTarget']; df['TargetTotal'] = df['HomeTarget'] + df['AwayTarget']
        df['ShotAccuracyHome'] = np.divide(df['HomeTarget'], df['HomeShots'], out=np.zeros_like(df['HomeTarget'].values, dtype=float), where=df['HomeShots']!=0)
        df['ShotAccuracyAway'] = np.divide(df['AwayTarget'], df['AwayShots'], out=np.zeros_like(df['AwayTarget'].values, dtype=float), where=df['AwayShots']!=0)
        df['ShotAccuracyDiff'] = df['ShotAccuracyHome'] - df['ShotAccuracyAway']

    corner_cols_fe = ['HomeCorners', 'AwayCorners']
    corner_features_created_fe = all(c in df.columns for c in corner_cols_fe)
    if corner_features_created_fe:
        for col in corner_cols_fe: df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)
        df['CornersDifference'] = df['HomeCorners'] - df['AwayCorners']; df['CornersTotal'] = df['HomeCorners'] + df['AwayCorners']
        if stat_features_created_fe and 'ShotsDifference' in df.columns: df['GameDominanceIndex'] = (df.get('ShotsDifference',0).fillna(0) + df.get('CornersDifference',0).fillna(0)) / 2.0
        elif 'CornersDifference' in df.columns: df['GameDominanceIndex'] = df['CornersDifference'].fillna(0)
        else: df['GameDominanceIndex'] = 0
    elif 'GameDominanceIndex' in (model_expected_feature_names or []): df['GameDominanceIndex'] = 0


    disc_cols_fe = ['HomeYellow', 'AwayYellow', 'HomeRed', 'AwayRed', 'HomeFouls', 'AwayFouls']
    discipline_features_created_fe = all(c in df.columns for c in disc_cols_fe)
    if discipline_features_created_fe:
        for col in disc_cols_fe: df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)
        df['CardPointsHome'] = df['HomeYellow'] + (2 * df['HomeRed']); df['CardPointsAway'] = df['AwayYellow'] + (2 * df['AwayRed'])
        df['CardPointsDiff'] = df['CardPointsHome'] - df['CardPointsAway']; df['FoulsDifference'] = df['HomeFouls'] - df['AwayFouls']
        df['FoulsTotal'] = df['HomeFouls'] + df['AwayFouls']

    adv_deps_ots_fe = ['HomeTarget','HomeShots','Form5Home','ImpliedProbHome','AwayTarget','AwayShots','Form5Away','ImpliedProbAway','HomeFouls','HomeElo','AwayShots','AwayTarget','AwayFouls','AwayElo','EloDifference','ImpliedProbDraw','Form5Difference','FormMomentumHome','ShotAccuracyHome','FormMomentumAway','ShotAccuracyAway','Under25','HomeCorners','AwayCorners']
    existing_deps_fe = [dep for dep in adv_deps_ots_fe if dep in df.columns and df[dep].notna().all()]
    advanced_features_possible_fe = len(existing_deps_fe) == len(adv_deps_ots_fe)

    if advanced_features_possible_fe:
        for col in existing_deps_fe: df[col].fillna(0, inplace=True)

        df['ScoringEfficiencyHome']=(df['HomeTarget']/(df['HomeShots'].clip(lower=1)))*df['Form5Home']*df['ImpliedProbHome']
        df['ScoringEfficiencyAway']=(df['AwayTarget']/(df['AwayShots'].clip(lower=1)))*df['Form5Away']*df['ImpliedProbAway']
        df['DefensiveRatingHome']=np.divide(df['HomeFouls']*df['HomeElo'],(df['AwayShots']+df['AwayTarget']).clip(lower=1),out=np.zeros_like(df['HomeFouls'].values,dtype=float),where=(df['AwayShots']+df['AwayTarget']).clip(lower=1)!=0)
        df['DefensiveRatingAway']=np.divide(df['AwayFouls']*df['AwayElo'],(df['HomeShots']+df['HomeTarget']).clip(lower=1),out=np.zeros_like(df['AwayFouls'].values,dtype=float),where=(df['HomeShots']+df['HomeTarget']).clip(lower=1)!=0)
        df['DrawLikelihood']=(1-abs(df['EloDifference'])/1000).clip(lower=0)*df['ImpliedProbDraw']*(1-abs(df['Form5Difference'])/15).clip(lower=0)
        df['FormEfficiencyHome']=df['Form5Home']*df.get('FormMomentumHome',0)*df.get('ShotAccuracyHome',0)
        df['FormEfficiencyAway']=df['Form5Away']*df.get('FormMomentumAway',0)*df.get('ShotAccuracyAway',0)
        df['CleanSheetProbHome']=1/(1+np.exp(-(df.get('DefensiveRatingHome',0)-df.get('ScoringEfficiencyAway',0))))
        df['CleanSheetProbAway']=1/(1+np.exp(-(df.get('DefensiveRatingAway',0)-df.get('ScoringEfficiencyHome',0))))
        under25_val = df.get('Under25', pd.Series([0.5]*len(df))).fillna(0.5)
        df['LowScoreIndicator']=under25_val*df.get('ImpliedProbDraw',0)*(1/(df.get('HomeTarget',0)+df.get('AwayTarget',0)+1))
        df['DrawTendency']=((1-abs(df.get('EloDifference',0))/1000).clip(lower=0)*df.get('ImpliedProbDraw',0)*(1-abs(df.get('Form5Difference',0))/15).clip(lower=0)*(1/(abs(df.get('HomeTarget',0)-df.get('AwayTarget',0))+1)))
        df['DefensiveOrganization']=np.divide((df.get('HomeFouls',0)+df.get('AwayFouls',0)), (df.get('HomeShots',0)+df.get('AwayShots',0)+1).clip(lower=1), out=np.zeros_like(df.get('HomeFouls',pd.Series([0.0]*len(df))).values,dtype=float), where=(df.get('HomeShots',0)+df.get('AwayShots',0)+1).clip(lower=1)!=0)*under25_val
        df['HomeDefensiveStyle']=np.divide(df.get('HomeFouls',0)*df.get('HomeCorners',0), (df.get('HomeShots',0)+1).clip(lower=1), out=np.zeros_like(df.get('HomeFouls',pd.Series([0.0]*len(df))).values,dtype=float), where=(df.get('HomeShots',0)+1).clip(lower=1)!=0)
        df['AwayDefensiveStyle']=np.divide(df.get('AwayFouls',0)*df.get('AwayCorners',0), (df.get('AwayShots',0)+1).clip(lower=1), out=np.zeros_like(df.get('AwayFouls',pd.Series([0.0]*len(df))).values,dtype=float), where=(df.get('AwayShots',0)+1).clip(lower=1)!=0)
        df['ExpectedGoalsHome']=df.get('HomeTarget',0)*df.get('ShotAccuracyHome',0)*(df.get('Form5Home',0)/15).clip(0,1)*df.get('ImpliedProbHome',0)
        df['ExpectedGoalsAway']=df.get('AwayTarget',0)*df.get('ShotAccuracyAway',0)*(df.get('Form5Away',0)/15).clip(0,1)*df.get('ImpliedProbAway',0)
    else:
        adv_cols_to_zero = ['ScoringEfficiencyHome', 'ScoringEfficiencyAway', 'DefensiveRatingHome', 'DefensiveRatingAway', 'DrawLikelihood', 'FormEfficiencyHome', 'FormEfficiencyAway', 'CleanSheetProbHome', 'CleanSheetProbAway', 'LowScoreIndicator', 'DrawTendency', 'DefensiveOrganization', 'HomeDefensiveStyle', 'AwayDefensiveStyle', 'ExpectedGoalsHome', 'ExpectedGoalsAway']
        for col in adv_cols_to_zero:
            if col not in df.columns: df[col] = 0.0
            else: df[col].fillna(0.0, inplace=True) 


    return df

def generate_bivariate_poisson_matrix(lambda_home, lambda_away, max_goals_axis):
    """ Generates a raw bivariate Poisson probability matrix. """
    lambda_home = max(0, lambda_home)
    lambda_away = max(0, lambda_away)

    home_probs = poisson.pmf(np.arange(max_goals_axis + 1), lambda_home)
    away_probs = poisson.pmf(np.arange(max_goals_axis + 1), lambda_away)
    prob_matrix = np.outer(home_probs, away_probs)
    return prob_matrix

In [48]:
# Block 3: Model Execution, Prediction, Visualization

# --- 1. Prepare Data and Calculate Pre-Match Odds Probabilities ---
match_row = df_demo.iloc[0]

odd_h = match_row['OddHome']
odd_d = match_row['OddDraw']
odd_a = match_row['OddAway']

if pd.notna(odd_h) and pd.notna(odd_d) and pd.notna(odd_a) and odd_h > 0 and odd_d > 0 and odd_a > 0:
    inv_h, inv_d, inv_a = 1/odd_h, 1/odd_d, 1/odd_a
    total_inv_odds = inv_h + inv_d + inv_a
    prematch_prob_h = inv_h / total_inv_odds
    prematch_prob_d = inv_d / total_inv_odds
    prematch_prob_a = inv_a / total_inv_odds
else:
    prematch_prob_h, prematch_prob_d, prematch_prob_a = np.nan, np.nan, np.nan

print("\n--- Pre-Match Probabilities (from Odds) ---")
print(f"{match_row['HomeTeam']} Win: {prematch_prob_h:.2%}")
print(f"Draw: {prematch_prob_d:.2%}")
print(f"{match_row['AwayTeam']} Win: {prematch_prob_a:.2%}")


ht_prob_h, ht_prob_d, ht_prob_a = np.nan, np.nan, np.nan
avg_pred_h_final, avg_pred_a_final = np.nan, np.nan

try:
    inplay_components = inplay_load_pipeline_components(INPLAY_LOAD_DIR)
    if inplay_components is None:
        raise RuntimeError("Failed to load in-play components.")

    inplay_scaler = inplay_components['scaler']
    inplay_feature_names = inplay_components['feature_names']
    inplay_target_transformer = inplay_components.get('target_transformer')
    inplay_le = inplay_components['label_encoder'] 
    inplay_draw_calibrator = inplay_components['draw_calibrator']
    inplay_meta_feature_names = inplay_components['meta_feature_names']
    inplay_cluster_medians = inplay_components.get('cluster_medians')
    inplay_meta_model = inplay_components['meta_model']
    inplay_base_models = { k:v for k,v in inplay_components.items() if k.startswith('base_') }

    df_demo_eng_inplay = inplay_feature_engineering(df_demo.copy(), model_expected_feature_names=inplay_feature_names)

    for feature_col_name in inplay_feature_names:
        if feature_col_name not in df_demo_eng_inplay.columns:
            print(f"Final Alignment Warning (In-Play): Feature '{feature_col_name}' missing after FE. Adding with 0s.")
            df_demo_eng_inplay[feature_col_name] = 0.0

    X_eval = df_demo_eng_inplay[inplay_feature_names].copy()

    if X_eval.isnull().sum().sum() > 0:
        print(f"Warning: Imputing {X_eval.isnull().sum().sum()} NaNs in In-Play features before scaling.")
        for col in X_eval.columns[X_eval.isnull().any()]:
            X_eval[col].fillna(0, inplace=True)

    X_eval_scaled_values = inplay_scaler.transform(X_eval)
    X_eval_scaled = pd.DataFrame(X_eval_scaled_values, columns=inplay_feature_names, index=X_eval.index)
    X_eval_np = X_eval_scaled.values

    # --- 2. Predict with base model, turn them into meta-features ---
    preds_nn_home = inplay_base_models['base_nn_home'].predict(X_eval_np)
    preds_nn_away = inplay_base_models['base_nn_away'].predict(X_eval_np)
    preds_xgb_home = inplay_base_models['base_xgb_home'].predict(X_eval_np)
    preds_xgb_away = inplay_base_models['base_xgb_away'].predict(X_eval_np)
    preds_lgbm_home = inplay_base_models['base_lgbm_home'].predict(X_eval_np)
    preds_lgbm_away = inplay_base_models['base_lgbm_away'].predict(X_eval_np)
    preds_draw_prob_raw = inplay_base_models['base_draw_specialist'].predict_proba(X_eval_np)[:, 1]
    preds_hda_probs_base = inplay_base_models['base_hda_classifier'].predict_proba(X_eval_np)

    if inplay_target_transformer:
        preds_nn_combined = np.column_stack([preds_nn_home, preds_nn_away])
        finite_median_nn = 0.0
        if np.any(np.isfinite(preds_nn_combined)): finite_median_nn = np.nanmedian(preds_nn_combined[np.isfinite(preds_nn_combined)])
        preds_nn_combined = np.nan_to_num(preds_nn_combined, nan=finite_median_nn, posinf=finite_median_nn, neginf=finite_median_nn)
        preds_nn_inv = inplay_target_transformer.inverse_transform(preds_nn_combined)
        preds_nn_home, preds_nn_away = preds_nn_inv[:, 0], preds_nn_inv[:, 1]

    preds_draw_prob_filled = pd.Series(preds_draw_prob_raw).fillna(np.nanmedian(preds_draw_prob_raw if np.any(np.isfinite(preds_draw_prob_raw)) else 0.33)).values
    if np.any(~np.isfinite(preds_draw_prob_filled)): preds_draw_prob_filled = np.nan_to_num(preds_draw_prob_filled, nan=0.33, posinf=1.0, neginf=0.0)
    preds_draw_prob_calibrated = inplay_draw_calibrator.transform(preds_draw_prob_filled.reshape(-1,1)).ravel() 
    preds_draw_prob_calibrated = pd.Series(preds_draw_prob_calibrated, index=X_eval_scaled.index).clip(0.0, 1.0)

    
    meta_features_eval = pd.DataFrame(index=X_eval_scaled.index)
    meta_features_eval['draw_prob_calibrated'] = preds_draw_prob_calibrated
    meta_features_eval['nn_oof_home'] = preds_nn_home; meta_features_eval['nn_oof_away'] = preds_nn_away
    meta_features_eval['xgb_oof_home'] = preds_xgb_home; meta_features_eval['xgb_oof_away'] = preds_xgb_away
    meta_features_eval['lgbm_oof_home'] = preds_lgbm_home; meta_features_eval['lgbm_oof_away'] = preds_lgbm_away
    meta_features_eval['nn_score_diff'] = meta_features_eval['nn_oof_home'] - meta_features_eval['nn_oof_away']
    meta_features_eval['xgb_score_diff'] = meta_features_eval['xgb_oof_home'] - meta_features_eval['xgb_oof_away']
    meta_features_eval['lgbm_score_diff'] = meta_features_eval['lgbm_oof_home'] - meta_features_eval['lgbm_oof_away']
    meta_features_eval['nn_abs_diff'] = meta_features_eval['nn_score_diff'].abs()
    meta_features_eval['xgb_abs_diff'] = meta_features_eval['xgb_score_diff'].abs()
    meta_features_eval['lgbm_abs_diff'] = meta_features_eval['lgbm_score_diff'].abs()
    meta_features_eval['hda_predicted_class'] = np.argmax(preds_hda_probs_base, axis=1)
    sorted_hda_probs = np.sort(preds_hda_probs_base, axis=1)
    margin_calc = lambda row: row[-1] - row[-2] if len(row) >= 2 and pd.notna(row[-1]) and pd.notna(row[-2]) else 0.0
    meta_features_eval['hda_prob_margin'] = np.apply_along_axis(margin_calc, 1, sorted_hda_probs)
    epsilon = 1e-9
    safe_hda_probs = np.clip(preds_hda_probs_base, epsilon, 1.0 - epsilon)
    safe_hda_probs = safe_hda_probs / np.sum(safe_hda_probs, axis=1, keepdims=True)
    meta_features_eval['hda_prob_entropy'] = entropy(safe_hda_probs, axis=1)

    actual_cluster_cols_for_meta = [col for col in INPLAY_CLUSTER_COLS if col in df_demo_eng_inplay.columns and col in inplay_meta_feature_names]
    if actual_cluster_cols_for_meta:
        cluster_features_subset = df_demo_eng_inplay[actual_cluster_cols_for_meta].loc[X_eval_scaled.index].copy()
        if cluster_features_subset.isnull().sum().sum() > 0:
            loaded_medians = inplay_components.get('cluster_medians') 
            if loaded_medians is not None:
                cluster_features_subset = cluster_features_subset.fillna(loaded_medians)
            else:
                cluster_features_subset.fillna(0, inplace=True)
        meta_features_eval = pd.concat([meta_features_eval, cluster_features_subset], axis=1)


    for col in inplay_meta_feature_names:
        if col not in meta_features_eval.columns: meta_features_eval[col] = 0.0
    meta_features_aligned = meta_features_eval.reindex(columns=inplay_meta_feature_names, fill_value=0.0)
    if meta_features_aligned.isnull().sum().sum() > 0: meta_features_aligned.fillna(0.0, inplace=True)

    final_preds_probs_meta = inplay_meta_model.predict_proba(meta_features_aligned)


    hda_classes_ordered = inplay_le.classes_ 
    prob_dict = dict(zip(hda_classes_ordered, final_preds_probs_meta[0]))

    ht_prob_h = prob_dict.get('H', np.nan)
    ht_prob_d = prob_dict.get('D', np.nan)
    ht_prob_a = prob_dict.get('A', np.nan)

    avg_pred_h_final = (preds_nn_home[0] + preds_xgb_home[0] + preds_lgbm_home[0]) / 3.0
    avg_pred_a_final = (preds_nn_away[0] + preds_xgb_away[0] + preds_lgbm_away[0]) / 3.0

except Exception as e:
    print(f"An unexpected error occurred during In-Play Prediction: {e}")
    traceback.print_exc()
    print("Skipping In-Play Prediction outputs and heatmap.")

print("\n--- Half-Time Probabilities ---")
print(f"{match_row['HomeTeam']} Win: {ht_prob_h:.2%}")
print(f"Draw: {ht_prob_d:.2%}")
print(f"{match_row['AwayTeam']} Win: {ht_prob_a:.2%}")


# --- 3. Generate and Save Heatmap ---
if pd.notna(avg_pred_h_final) and pd.notna(avg_pred_a_final):
    print(f"\n--- Generating Goal Distribution Heatmap in example_output for: {match_row['MatchDesc']} ---")

    prob_matrix_final_scores = generate_bivariate_poisson_matrix(
        lambda_home=avg_pred_h_final,
        lambda_away=avg_pred_a_final,
        max_goals_axis=HEATMAP_MAX_GOALS
    )

    match_info = matches_data[0]
    home_team_name = match_info.get('HomeTeam', 'Home').replace(" ", "\n")
    away_team_name = match_info.get('AwayTeam', 'Away')
    match_prefix = match_info['MatchDesc'].replace(" vs ", "_vs_").replace(" ", "_").replace(".", "")
    ht_home_goals = match_info['HTHome']
    ht_away_goals = match_info['HTAway']

    impossible_mask = np.zeros_like(prob_matrix_final_scores, dtype=bool)
    for h_idx, h_score_val in enumerate(HEATMAP_SCORE_RANGE):
        for a_idx, a_score_val in enumerate(HEATMAP_SCORE_RANGE):
            if h_score_val < ht_home_goals or a_score_val < ht_away_goals:
                impossible_mask[h_idx, a_idx] = True

    prob_matrix_viz = prob_matrix_final_scores.copy()
    prob_matrix_viz[impossible_mask] = 0.0
    prob_matrix_display = np.flipud(prob_matrix_viz)
    impossible_mask_display = np.flipud(impossible_mask)

    annot_labels = np.full_like(prob_matrix_display, "", dtype=object)
    original_probs_flipped = np.flipud(prob_matrix_final_scores)

    for r in range(prob_matrix_display.shape[0]):
        for c in range(prob_matrix_display.shape[1]):
            if impossible_mask_display[r, c]:
                annot_labels[r, c] = "—"
            else:
                prob_percent = original_probs_flipped[r, c] * 100
                if prob_percent >= 0.05:
                    annot_labels[r, c] = f"{prob_percent:.1f}"

    fig_h, ax_h = plt.subplots(figsize=(8, 7.5))
    sns.heatmap(prob_matrix_display * 100, 
                annot=annot_labels,
                fmt="", 
                cmap="Blues",
                linewidths=0.8, linecolor='lightgray',
                cbar=True, cbar_kws={'label': 'Probability of Final Score (%)', 'shrink': 0.75},
                ax=ax_h, annot_kws={"size": 9}, 
                vmin=0) 

    ax_h.set_title(f"{match_info['MatchDesc']} (HT: {int(ht_home_goals)}-{int(ht_away_goals)}) - Final Score Probability", fontsize=13, pad=15)
    ax_h.set_xlabel(f"{away_team_name} Goals", fontsize=12)
    ax_h.set_ylabel(f"{home_team_name} Goals", fontsize=12, labelpad=10)
    ax_h.set_xticks(np.arange(HEATMAP_MAX_GOALS + 1) + 0.5)
    ax_h.set_xticklabels(HEATMAP_SCORE_RANGE, fontsize=10)
    ax_h.set_yticks(np.arange(HEATMAP_MAX_GOALS + 1) + 0.5)
    ax_h.set_yticklabels(list(reversed(HEATMAP_SCORE_RANGE)), fontsize=10)

    ax_h.annotate('', xy=(0.0, 1.02), xycoords='axes fraction', xytext=(0.0, 0.95),
                  arrowprops=dict(arrowstyle="<-", color='black', lw=1.5), annotation_clip=False) 
    ax_h.annotate('', xy=(1.02, 0.0), xycoords='axes fraction', xytext=(0.95, 0.0),
                  arrowprops=dict(arrowstyle="<-", color='black', lw=1.5), annotation_clip=False)

    ax_h.tick_params(axis='both', direction='out', length=4, width=1)
    plt.tight_layout(rect=[0, 0, 1, 0.96])
    heatmap_filename = os.path.join(VIZ_OUTPUT_DIR, f"{match_prefix}_goal_distribution_heatmap.{VIZ_SAVE_FORMAT}")
    plt.savefig(heatmap_filename, dpi=VIZ_HIGH_DPI, bbox_inches='tight')
    plt.close(fig_h)
else:
    print("\nSkipping heatmap generation as in-play predictions for scores were not available.")

print("\n--- Script Finished ---")


--- Pre-Match Probabilities (from Odds) ---
Arsenal Win: 43.71%
Draw: 27.33%
Real Madrid Win: 28.96%

--- Half-Time Probabilities ---
Arsenal Win: 80.96%
Draw: 15.99%
Real Madrid Win: 3.05%

--- Generating Goal Distribution Heatmap in example_output for: Arsenal vs Real Madrid ---

--- Script Finished ---
