# Evaluation

This is the example showcase of testing the prediction framework proposed in the thesis. Results may vary from the reported benchmark results due to the random nature of some used principles.

## A. Evaluating Pre-match model

### Data loading

Effortlessly view, navigate, sort, and filter data. Create charts and access essential data insights, including descriptive statistics and missing values – all without writing a single line of code.

In [114]:
import pandas as pd
import numpy as np
import os
import gc
import warnings

from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.impute import SimpleImputer
import tensorflow as tf
from tensorflow.keras.models import load_model
from sklearn.metrics import classification_report, accuracy_score, precision_recall_fscore_support

warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)
pd.options.display.float_format = '{:.4f}'.format

print(f"TensorFlow Version: {tf.__version__}")

DATA_FILE = 'training/football_data.csv'
MODEL_FULL_PATH = 'models/pre_match/full_features.keras'
MODEL_REDUCED_PATH = 'models/pre_match/reduced_features.keras'

TARGET_COL = 'FTResult'
TARGET_MAP = {'H': 0, 'D': 1, 'A': 2}
INV_TARGET_MAP = {v: k for k, v in TARGET_MAP.items()}

# --- Define feature sets ---
RAW_INPUT_FEATURES_FROM_CSV = [
    'Division', 'MatchDate', 'HomeTeam', 'AwayTeam', 'FTResult', 
    'HomeElo', 'AwayElo', 
    'Form3Home', 'Form5Home', 'Form3Away', 'Form5Away', 
    'OddHome', 'OddDraw', 'OddAway',
    'Over25', 'Under25', 'MaxOver25', 'MaxUnder25', 
    'HandiSize', 'HandiHome', 'HandiAway'

]
FEATURES_TO_LOAD_FROM_CSV = list(set(RAW_INPUT_FEATURES_FROM_CSV))


NUMERICAL_FEATURES_FULL_MODEL = [ 
    'HomeElo', 'AwayElo', 'EloDiff', 'Form3Home', 'Form5Home', 'Form3Away', 'Form5Away',
    'Form3Diff', 'Form5Diff', 'OddHome', 'OddDraw', 'OddAway', 'Over25', 'Under25',
    'MaxOver25', 'MaxUnder25', 'HandiSize', 'HandiHome', 'HandiAway', 'Year', 'Month'
]
NUMERICAL_FEATURES_REDUCED_MODEL = [ 
    'Form3Home', 'Form5Home', 'Form3Away', 'Form5Away', 'Form3Diff', 'Form5Diff',
    'OddHome', 'OddDraw', 'OddAway', 'Year', 'Month'
]
CATEGORICAL_LOW_CARDINALITY_MODEL = ['DayOfWeek'] 
CATEGORICAL_HIGH_CARDINALITY_MODEL = ['HomeTeam', 'AwayTeam', 'Division'] 

FULL_MODEL_RAW_DATA_REQUIREMENTS = [
    'HomeElo', 'AwayElo', 'Over25', 'Under25', 'MaxOver25', 'MaxUnder25',
    'HandiSize', 'HandiHome', 'HandiAway'
]
ODDS_COLS = ['OddHome', 'OddDraw', 'OddAway']


EVALUATION_SUBSET_SIZE = 500
NN_BATCH_SIZE = 256
SEED = 1
np.random.seed(SEED)
tf.random.set_seed(SEED)

TensorFlow Version: 2.19.0


### Data cleaning, pre-processing and feature engineering

Create graphs and visualizations that match your chosen color scheme.

In [115]:
def load_and_clean_data(data_file, csv_cols_to_load, target_col, odds_cols):
    """Loads specified columns, cleans dates/target, handles odds."""
    if not os.path.exists(data_file):
        raise FileNotFoundError(f"Data file not found: {data_file}")

    df = pd.read_csv(data_file, usecols=csv_cols_to_load, low_memory=False)

    df['MatchDateTime'] = pd.to_datetime(df['MatchDate'], errors='coerce')
    df.dropna(subset=['MatchDateTime'], inplace=True)
    df.sort_values(by='MatchDateTime', ascending=True, inplace=True)
    df.reset_index(drop=True, inplace=True)

    df = df[df[target_col].isin(TARGET_MAP.keys())]
    if df.empty: raise ValueError("DataFrame empty after filtering for valid target values.")

    for col in odds_cols:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce')
            df.loc[df[col] <= 0, col] = 1e6
    df.dropna(subset=odds_cols, inplace=True)

    if df.empty:
        raise ValueError("DataFrame is empty after cleaning odds.")
    return df

def engineer_features(df):
    """Engineers time-based and differential features."""
    df_eng = df.copy()
    df_eng['Year'] = df_eng['MatchDateTime'].dt.year
    df_eng['Month'] = df_eng['MatchDateTime'].dt.month
    df_eng['DayOfWeek'] = df_eng['MatchDateTime'].dt.dayofweek.astype(str)

    cols_for_diff_calculation = {
        'Form3Home': 0, 'Form3Away': 0, 'Form5Home': 0, 'Form5Away': 0,
        'HomeElo': 1600, 'AwayElo': 1500 
    }
    for col, default_val in cols_for_diff_calculation.items():
        if col in df_eng.columns:
            df_eng[col] = pd.to_numeric(df_eng[col], errors='coerce')
            if df_eng[col].isnull().any():
                df_eng[col].fillna(df_eng[col].median(), inplace=True)
        else: 
            df_eng[col] = default_val

    df_eng['Form3Diff'] = df_eng['Form3Home'] - df_eng['Form3Away']
    df_eng['Form5Diff'] = df_eng['Form5Home'] - df_eng['Form5Away']
    df_eng['EloDiff'] = df_eng['HomeElo'] - df_eng['AwayElo']
    return df_eng

def fit_preprocessors(df_engineered, num_feat_full_model, num_feat_reduced_model,
                      cat_low_feat_model, cat_high_feat_model):
    """Fits all preprocessors based on the full engineered dataset."""
    preprocessors = {}

    available_num_full = [col for col in num_feat_full_model if col in df_engineered.columns]
    available_num_reduced = [col for col in num_feat_reduced_model if col in df_engineered.columns]
    available_cat_low = [col for col in cat_low_feat_model if col in df_engineered.columns]
    available_cat_high = [col for col in cat_high_feat_model if col in df_engineered.columns]

    # Imputers
    if available_num_full:
        preprocessors['num_imputer_full'] = SimpleImputer(strategy='median').fit(df_engineered[available_num_full])
    if available_num_reduced:
        preprocessors['num_imputer_reduced'] = SimpleImputer(strategy='median').fit(df_engineered[available_num_reduced])
    if available_cat_low:
        preprocessors['cat_low_imputer'] = SimpleImputer(strategy='most_frequent').fit(df_engineered[available_cat_low])
    if available_cat_high:
        preprocessors['cat_high_imputer'] = SimpleImputer(strategy='most_frequent').fit(df_engineered[available_cat_high])

    df_imputed = df_engineered.copy()
    if 'num_imputer_full' in preprocessors and available_num_full:
        df_imputed[available_num_full] = preprocessors['num_imputer_full'].transform(df_imputed[available_num_full])
    if 'num_imputer_reduced' in preprocessors and available_num_reduced:
        df_imputed[available_num_reduced] = preprocessors['num_imputer_reduced'].transform(df_imputed[available_num_reduced])
    if 'cat_low_imputer' in preprocessors and available_cat_low:
        df_imputed[available_cat_low] = preprocessors['cat_low_imputer'].transform(df_imputed[available_cat_low])
    if 'cat_high_imputer' in preprocessors and available_cat_high:
        df_imputed[available_cat_high] = preprocessors['cat_high_imputer'].transform(df_imputed[available_cat_high])

    # Scalers
    if 'num_imputer_full' in preprocessors and available_num_full:
        preprocessors['scaler_full'] = StandardScaler().fit(df_imputed[available_num_full])
    if 'num_imputer_reduced' in preprocessors and available_num_reduced:
        preprocessors['scaler_reduced'] = StandardScaler().fit(df_imputed[available_num_reduced])

    if 'cat_low_imputer' in preprocessors and available_cat_low:
        preprocessors['ohe_low'] = OneHotEncoder(sparse_output=False, handle_unknown='ignore').fit(df_imputed[available_cat_low])

    # Label Encoders
    label_encoders = {}
    vocab_sizes = {}
    unk_token = '<UNK>' 

    team_cols_for_le = [col for col in ['HomeTeam', 'AwayTeam'] if col in available_cat_high]
    if team_cols_for_le and 'cat_high_imputer' in preprocessors:
        all_teams = pd.concat([df_imputed[col] for col in team_cols_for_le]).astype(str).unique()
        le_team = LabelEncoder().fit(np.append(all_teams, unk_token))
        label_encoders['Team'] = le_team
        vocab_sizes['Team'] = len(le_team.classes_)

    if 'Division' in available_cat_high and 'cat_high_imputer' in preprocessors:
        all_divisions = df_imputed['Division'].astype(str).unique()
        le_div = LabelEncoder().fit(np.append(all_divisions, unk_token))
        label_encoders['Division'] = le_div
        vocab_sizes['Division'] = len(le_div.classes_)

    preprocessors['label_encoders'] = label_encoders
    preprocessors['vocab_sizes'] = vocab_sizes

    preprocessors['fitted_cols_num_full'] = available_num_full
    preprocessors['fitted_cols_num_reduced'] = available_num_reduced
    preprocessors['fitted_cols_cat_low'] = available_cat_low
    preprocessors['fitted_cols_cat_high'] = available_cat_high

    return preprocessors

def transform_data_for_keras(df_slice, preprocessors,
                             num_feat_model_list, cat_low_feat_model_list, cat_high_feat_model_list,
                             model_type_flag, loaded_model_ref):
    """Transforms a slice of data using fitted preprocessors for Keras model input."""
    df_proc = df_slice.copy()

    num_imputer = preprocessors.get(f'num_imputer_{model_type_flag}')
    scaler = preprocessors.get(f'scaler_{model_type_flag}')
    cat_low_imputer = preprocessors.get('cat_low_imputer')
    ohe_low = preprocessors.get('ohe_low')
    cat_high_imputer = preprocessors.get('cat_high_imputer')
    label_encoders = preprocessors.get('label_encoders', {})
    vocab_sizes = preprocessors.get('vocab_sizes', {})
    unk_token = '<UNK>'

    # --- Numerical Features ---
    fitted_num_cols_for_modeltype = preprocessors.get(f'fitted_cols_num_{model_type_flag}', [])

    cols_to_process_num = [col for col in num_feat_model_list if col in df_proc.columns and col in fitted_num_cols_for_modeltype]

    if cols_to_process_num and num_imputer and scaler:
        X_num_imputed = num_imputer.transform(df_proc[cols_to_process_num])
        X_num_scaled = scaler.transform(X_num_imputed)
    else:
        expected_shape_num = len(fitted_num_cols_for_modeltype) if scaler else 0
        X_num_scaled = np.zeros((len(df_proc), expected_shape_num))

    fitted_cat_low_cols = preprocessors.get('fitted_cols_cat_low', [])
    cols_to_process_cat_low = [col for col in cat_low_feat_model_list if col in df_proc.columns and col in fitted_cat_low_cols]

    if cols_to_process_cat_low and cat_low_imputer and ohe_low:
        X_cat_low_imputed = cat_low_imputer.transform(df_proc[cols_to_process_cat_low])
        X_cat_low_ohe = ohe_low.transform(X_cat_low_imputed)
    else:
        expected_shape_ohe = len(ohe_low.get_feature_names_out()) if ohe_low and hasattr(ohe_low, 'get_feature_names_out') else 0
        X_cat_low_ohe = np.zeros((len(df_proc), expected_shape_ohe))

    X_main_input = np.hstack([X_num_scaled, X_cat_low_ohe])

    keras_input_list = [X_main_input]
    expected_high_card_keras_inputs = ['HomeTeam', 'AwayTeam', 'Division']

    fitted_cat_high_cols = preprocessors.get('fitted_cols_cat_high', [])
    if cat_high_imputer and any(col in fitted_cat_high_cols for col in cat_high_feat_model_list):
        cols_to_impute_high_cat = [col for col in cat_high_feat_model_list if col in df_proc.columns and col in fitted_cat_high_cols]
        if cols_to_impute_high_cat:
            df_proc[cols_to_impute_high_cat] = cat_high_imputer.transform(df_proc[cols_to_impute_high_cat])

    for col_name in expected_high_card_keras_inputs:
        le_key = 'Team' if 'Team' in col_name else 'Division'
        le = label_encoders.get(le_key)

        if le is None: raise ValueError(f"LabelEncoder '{le_key}' for '{col_name}' not found.")

        effective_vocab_size = vocab_sizes.get(le_key, len(le.classes_))
        try:
            if loaded_model_ref:
                embedding_layer = loaded_model_ref.get_layer(name=f'embedding_{le_key.lower()}')
                effective_vocab_size = embedding_layer.input_dim
        except: pass

        if col_name in df_proc.columns:
            encoded_col = df_proc[col_name].astype(str).apply(
                lambda x: le.transform([x])[0] if x in le.classes_ else le.transform([unk_token])[0]
            )
            encoded_col_capped = encoded_col.apply(lambda idx: min(idx, effective_vocab_size - 1))
            keras_input_list.append(encoded_col_capped.values.reshape(-1, 1))
        else: 
            unk_idx_capped = min(le.transform([unk_token])[0], effective_vocab_size - 1)
            keras_input_list.append(np.full((len(df_proc), 1), unk_idx_capped))

    return keras_input_list


# --- Main Data Processing Flow ---
print("--- Starting Data Preparation ---")
df_full_dataset = load_and_clean_data(DATA_FILE, FEATURES_TO_LOAD_FROM_CSV, TARGET_COL, ODDS_COLS)
df_engineered_full = engineer_features(df_full_dataset)
del df_full_dataset; gc.collect()

preprocessors = fit_preprocessors(
    df_engineered_full,
    NUMERICAL_FEATURES_FULL_MODEL,
    NUMERICAL_FEATURES_REDUCED_MODEL,
    CATEGORICAL_LOW_CARDINALITY_MODEL,
    CATEGORICAL_HIGH_CARDINALITY_MODEL
)

df_latest_raw_engineered = df_engineered_full.iloc[-EVALUATION_SUBSET_SIZE:].copy()
del df_engineered_full; gc.collect()

actual_raw_requirements_for_full_model = [
    col for col in FULL_MODEL_RAW_DATA_REQUIREMENTS if col in df_latest_raw_engineered.columns
]

if actual_raw_requirements_for_full_model:
    df_evaluation_subset = df_latest_raw_engineered.dropna(subset=actual_raw_requirements_for_full_model).copy()
else:
    print(f"Warning: Key raw features for filtering evaluation subset ({FULL_MODEL_RAW_DATA_REQUIREMENTS}) not found in the engineered latest data. Using all {len(df_latest_raw_engineered)} latest matches.")
    df_evaluation_subset = df_latest_raw_engineered.copy()

print(f"Selected {len(df_evaluation_subset)} matches for final evaluation (after filtering for Full NN raw feature requirements).")

if df_evaluation_subset.empty:
    raise SystemExit("Evaluation subset is empty after filtering. Cannot proceed.")

y_true_eval = df_evaluation_subset[TARGET_COL].map(TARGET_MAP).values
print(f"Target (y_true_eval) shape: {y_true_eval.shape}")
print("--- Data Preparation Complete ---")

--- Starting Data Preparation ---
Selected 499 matches for final evaluation (after filtering for Full NN raw feature requirements).
Target (y_true_eval) shape: (499,)
--- Data Preparation Complete ---


### Model prediction and evaluation

asdasda


In [116]:
def evaluate_model(model_name, model_path, df_eval_data, y_true,
                   preproc, num_feat, cat_low_feat, cat_high_feat, model_type_flag):
    if not os.path.exists(model_path):
        print(f"Model file not found: {model_path}. Skipping evaluation for {model_name}.")
        return {"Accuracy": np.nan, "F1 (Macro)": np.nan, "Evaluated Samples": 0}

    model = None
    keras_eval_inputs = None

    try:
        model = load_model(model_path, compile=False)
    except Exception as e:
        print(f"Error loading model {model_path}: {e}")
        return {"Accuracy": np.nan, "F1 (Macro)": np.nan, "Evaluated Samples": 0} 

    try:
        keras_eval_inputs = transform_data_for_keras(
            df_eval_data, preproc, num_feat, cat_low_feat, cat_high_feat, model_type_flag, model
        )

        pred_proba = model.predict(keras_eval_inputs, batch_size=NN_BATCH_SIZE, verbose=0)
        preds = np.argmax(pred_proba, axis=1)

        acc = accuracy_score(y_true, preds)
        _, _, f1_macro, _ = precision_recall_fscore_support(y_true, preds, average='macro', zero_division=0)


        return {
            "Accuracy": acc,
            "F1 (Macro)": f1_macro,
            "Evaluated Samples": len(y_true)
        }

    except Exception as e:
        print(f"Error during evaluation of {model_name}: {e}")
        return {"Accuracy": np.nan, "F1 (Macro)": np.nan,
                "Evaluated Samples": len(y_true) if 'y_true' in locals() else 0}
    finally:
        if model is not None: del model
        if keras_eval_inputs is not None: del keras_eval_inputs
        gc.collect()
        if tf.keras.backend.is_keras_tensor(tf.zeros(1)):
            tf.keras.backend.clear_session()


# --- Run Evaluations ---
results_summary = {}
default_metrics = {"Accuracy": np.nan, "F1 (Macro)": np.nan, "Evaluated Samples": 0}

if 'df_evaluation_subset' in locals() and not df_evaluation_subset.empty:
    if os.path.exists(MODEL_FULL_PATH):
        results_summary['Full Features NN'] = evaluate_model(
            "Full Features NN", MODEL_FULL_PATH, df_evaluation_subset, y_true_eval,
            preprocessors, NUMERICAL_FEATURES_FULL_MODEL, CATEGORICAL_LOW_CARDINALITY_MODEL, CATEGORICAL_HIGH_CARDINALITY_MODEL, 'full'
        )
    else:
        print(f"Full Features Model not found at {MODEL_FULL_PATH}. Skipping.")
        results_summary['Full Features NN'] = default_metrics.copy()


    if os.path.exists(MODEL_REDUCED_PATH):
        results_summary['Reduced Features NN'] = evaluate_model(
            "Reduced Features NN", MODEL_REDUCED_PATH, df_evaluation_subset, y_true_eval,
            preprocessors, NUMERICAL_FEATURES_REDUCED_MODEL, CATEGORICAL_LOW_CARDINALITY_MODEL, CATEGORICAL_HIGH_CARDINALITY_MODEL, 'reduced'
        )
    else:
        print(f"Reduced Features Model not found at {MODEL_REDUCED_PATH}. Skipping.")
        results_summary['Reduced Features NN'] = default_metrics.copy()
else:
    print("Evaluation subset is empty or not defined. Skipping model evaluations.")
    results_summary['Full Features NN'] = default_metrics.copy()
    results_summary['Reduced Features NN'] = default_metrics.copy()


# --- Final Summary Output ---
print("\n\n--- Evaluation Summary ---")
if not results_summary:
    print("No models were evaluated.")
else:
    summary_df = pd.DataFrame.from_dict(results_summary, orient='index')

    desired_cols_summary = [
        "Evaluated Samples", "Accuracy", "F1 (Macro)"
    ]
    cols_to_display = [col for col in desired_cols_summary if col in summary_df.columns]

    if cols_to_display: 
        summary_df_simplified = summary_df[cols_to_display]
        if not summary_df_simplified.empty:
            try:
                print(summary_df_simplified.to_markdown(floatfmt=".4f"))
            except ImportError:
                print(summary_df_simplified.to_string(float_format="%.4f"))
        else:
            print("Simplified summary is empty (no relevant columns found or data).")
    else:
        print("No columns to display in the simplified summary.")

print("\n--- Evaluation Script Finished ---")



--- Evaluation Summary ---
                     Evaluated Samples  Accuracy  F1 (Macro)
Full Features NN                   499    0.5090      0.3823
Reduced Features NN                499    0.4830      0.3341

--- Evaluation Script Finished ---


# B. Evaluating In-play model

### Data loading

asdasd

In [129]:

import pandas as pd
import numpy as np
import pickle
import time
import warnings
import gc
import os
from scipy.stats import entropy
import xgboost as xgb
import lightgbm as lgb

from sklearn.metrics import (accuracy_score, mean_absolute_error, mean_squared_error,
                             brier_score_loss, r2_score)
from sklearn.preprocessing import label_binarize

import numpy as np
import pandas as pd
from sklearn.metrics import (accuracy_score, mean_absolute_error, mean_squared_error,
                             r2_score, roc_auc_score)
from sklearn.preprocessing import label_binarize

warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=UserWarning)
warnings.filterwarnings('ignore', category=pd.errors.PerformanceWarning)
from sklearn.exceptions import ConvergenceWarning
warnings.filterwarnings('ignore', category=ConvergenceWarning)
pd.options.mode.chained_assignment = None 
pd.set_option('display.max_columns', None)
pd.options.display.float_format = '{:.4f}'.format

DATA_FILE = 'training/football_data.csv' 
FINAL_MODEL_DIR = 'models/in_play/'     

CLUSTER_COLS = ['C_LTH', 'C_HTB', 'C_LTA', 'C_VAD', 'C_VHD', 'C_PHB'] 

EVALUATION_SUBSET_NAME = "Latest 500"
EVALUATION_SUBSET_SIZE = 500

### Data cleaning, pre-processing and feature engineering

sadasda

In [130]:
def load_pipeline_prerequisites_from_inplay_dir(model_dir_path):
    components = {}
    files_to_load_map = {
        'scaler_inplay': 'scaler.pkl',
        'feature_names': 'feature_names.pkl',
        'target_transformer_nn': 'target_transformer.pkl',
        'iso_reg_calibrator': 'draw_calibrator.pkl',
        'label_encoder': 'label_encoder.pkl', 
        'meta_model_final': 'meta_model.pkl',
        'cluster_medians': 'cluster_medians.pkl',
        'meta_feature_names': 'meta_feature_names.pkl',
        'base_nn_home': 'base_nn_home.pkl', 'base_nn_away': 'base_nn_away.pkl',
        'base_xgb_home': 'base_xgb_home.pkl', 'base_xgb_away': 'base_xgb_away.pkl',
        'base_lgbm_home': 'base_lgbm_home.pkl', 'base_lgbm_away': 'base_lgbm_away.pkl',
        'base_draw_specialist': 'base_draw_specialist.pkl',
        'base_hda_classifier': 'base_hda_classifier.pkl'
    }
    all_successful = True
    for key, fname in files_to_load_map.items():
        fpath = os.path.join(model_dir_path, fname)
        try:
            with open(fpath, 'rb') as f: components[key] = pickle.load(f)
        except FileNotFoundError:
            if key == 'cluster_medians': components[key] = None
            else: all_successful = False; print(f"   ERROR: Required file not found: {fpath}")
        except Exception as e: all_successful = False; print(f"   ERROR loading {fpath}: {e}")

    if not all_successful: return None
    if 'feature_names' in components and components['feature_names'] is not None and not isinstance(components['feature_names'], list):
        raise TypeError("Loaded 'feature_names' is not a list.")
    return components

# --- Data Loading and Extensive Feature Engineering ---
def load_and_engineer_data_like_training(data_file_path, cluster_cols_config):
    try:
        df = pd.read_csv(data_file_path, low_memory=False)
    except FileNotFoundError: raise
    except Exception as e: raise

    initial_rows = len(df)
    required_cols = ['FTHome', 'FTAway', 'OddHome', 'OddDraw', 'OddAway', 'HomeElo', 'AwayElo']
    missing_required = [col for col in required_cols if col not in df.columns]
    if missing_required: raise ValueError(f"Missing essential columns for cleaning: {missing_required}")
    df = df.dropna(subset=required_cols)
    df = df[(df['OddHome'] >= 1.0) & (df['OddDraw'] >= 1.0) & (df['OddAway'] >= 1.0)]
    if 'Form3Home' in df.columns: df = df.dropna(subset=['Form3Home'])

    essential_stats_cols = ['HomeShots', 'AwayShots', 'HomeTarget', 'AwayTarget', 'HomeCorners', 'AwayCorners']
    existing_stats_cols = [col for col in essential_stats_cols if col in df.columns] 
    if existing_stats_cols: 
        cols_to_dropna_stats = [col for col in essential_stats_cols if col in df.columns]
        if cols_to_dropna_stats: df = df.dropna(subset=cols_to_dropna_stats)

    actual_cluster_cols_present = [col for col in cluster_cols_config if col in df.columns]
    if actual_cluster_cols_present:
        df = df.dropna(subset=actual_cluster_cols_present)

    df = df.reset_index(drop=True)
    if len(df) == 0: raise ValueError("No data remaining after initial cleaning.")

    def combine_odds_fe(odd1, odd2):
        if pd.isna(odd1) or pd.isna(odd2) or odd1 < 1.0 or odd2 < 1.0: return np.nan
        prob1, prob2 = 1.0 / odd1, 1.0 / odd2; combined_prob = prob1 + prob2;
        return max(1.0 / combined_prob, 1.00) if combined_prob > 0 else np.nan

    if 'MatchDate' in df.columns: df['MatchDate'] = pd.to_datetime(df['MatchDate'], errors='coerce')
    else: df['MatchDate'] = pd.NaT
    if 'MatchTime' in df.columns:
        df['MatchTime'] = df['MatchTime'].astype(str).fillna('00:00:00').str.replace('.', ':', regex=False)
        try: df['MatchTime'] = pd.to_datetime(df['MatchTime'], format='%H:%M:%S', errors='coerce').dt.time
        except Exception: df['MatchTime'] = pd.to_datetime('00:00:00', errors='coerce').dt.time
        df['MatchTime'] = df['MatchTime'].fillna(pd.to_datetime('00:00:00').time())
    else: df['MatchTime'] = pd.to_datetime('00:00:00').time()

    valid_dt_idx = df['MatchDate'].notna() & df['MatchTime'].notna()
    df['MatchDateTime'] = pd.NaT
    if valid_dt_idx.any():
        df.loc[valid_dt_idx, 'MatchDateTime'] = pd.to_datetime(df.loc[valid_dt_idx, 'MatchDate'].astype(str) + ' ' + df.loc[valid_dt_idx, 'MatchTime'].astype(str), errors='coerce')

    if 'HTHome' in df.columns and 'HTAway' in df.columns:
        df['HTHome'] = pd.to_numeric(df['HTHome'], errors='coerce'); df['HTAway'] = pd.to_numeric(df['HTAway'], errors='coerce')
        df['HTTotalGoals'] = df['HTHome'].fillna(0) + df['HTAway'].fillna(0) 

    df['1XOdd'] = df.apply(lambda x: combine_odds_fe(x.get('OddHome'), x.get('OddDraw')), axis=1)
    df['X2Odd'] = df.apply(lambda x: combine_odds_fe(x.get('OddDraw'), x.get('OddAway')), axis=1)
    df['12Odd'] = df.apply(lambda x: combine_odds_fe(x.get('OddHome'), x.get('OddAway')), axis=1)
    if all(c in df.columns for c in ['MaxHome', 'MaxDraw', 'MaxAway']):
        df['Max1XOdd'] = df.apply(lambda x: combine_odds_fe(x['MaxHome'], x['MaxDraw']), axis=1)
        df['MaxX2Odd'] = df.apply(lambda x: combine_odds_fe(x['MaxDraw'], x['MaxAway']), axis=1)
        df['Max12Odd'] = df.apply(lambda x: combine_odds_fe(x['MaxHome'], x['MaxAway']), axis=1)

    if 'HomeElo' in df.columns and 'AwayElo' in df.columns:
        df['HomeElo'] = pd.to_numeric(df['HomeElo'], errors='coerce'); df['AwayElo'] = pd.to_numeric(df['AwayElo'], errors='coerce')
        home_elo_median = df['HomeElo'].median(); away_elo_median = df['AwayElo'].median()
        df['HomeElo'].fillna(home_elo_median if pd.notna(home_elo_median) else 1500.0, inplace=True)
        df['AwayElo'].fillna(away_elo_median if pd.notna(away_elo_median) else 1500.0, inplace=True)
        df['EloDifference'] = df['HomeElo'] - df['AwayElo']
        df['EloTotal'] = df['HomeElo'] + df['AwayElo']
        df['EloAdvantage'] = np.divide(df['EloDifference'], df['EloTotal'], out=np.zeros_like(df['EloDifference'], dtype=float), where=df['EloTotal']!=0)

    form_cols_src_fe = ['Form3Home', 'Form5Home', 'Form3Away', 'Form5Away']
    for col in form_cols_src_fe:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce')
            median_val = df[col].median() 
            df[col].fillna(median_val if pd.notna(median_val) else 0, inplace=True)
        else: df[col] = 0 
    if all(c in df.columns for c in form_cols_src_fe):
        df['Form3Difference'] = df['Form3Home'] - df['Form3Away']
        df['Form5Difference'] = df['Form5Home'] - df['Form5Away']
        df['FormOlderHome'] = df['Form5Home'] - df['Form3Home']
        df['FormOlderAway'] = df['Form5Away'] - df['Form3Away']
        df['FormMomentumHome'] = df['Form3Home'] - df['FormOlderHome']
        df['FormMomentumAway'] = df['Form3Away'] - df['FormOlderAway']
        df['FormMomentumDiff'] = df['FormMomentumHome'] - df['FormMomentumAway']

    df['OddsDifference'] = df['OddHome'] - df['OddAway']
    if 'MaxHome' in df.columns and 'MaxAway' in df.columns:
        df['MaxOddsDifference'] = df['MaxHome'] - df['MaxAway']

    df['ImpliedProbHome'] = 1.0 / df['OddHome']; df['ImpliedProbDraw'] = 1.0 / df['OddDraw']; df['ImpliedProbAway'] = 1.0 / df['OddAway']
    df['ImpliedProbTotal'] = df['ImpliedProbHome'] + df['ImpliedProbDraw'] + df['ImpliedProbAway']
    df['BookmakerMargin'] = df['ImpliedProbTotal'] - 1.0

    stat_cols_fe = ['HomeShots', 'AwayShots', 'HomeTarget', 'AwayTarget'] 
    stat_features_created_fe = all(c in df.columns for c in stat_cols_fe)
    if stat_features_created_fe:
        for col in stat_cols_fe: 
            df[col] = pd.to_numeric(df[col], errors='coerce'); df[col] = df[col].fillna(df[col].median())
        df['ShotsDifference'] = df['HomeShots'] - df['AwayShots']
        df['ShotsTotal'] = df['HomeShots'] + df['AwayShots']
        df['TargetDifference'] = df['HomeTarget'] - df['AwayTarget']
        df['TargetTotal'] = df['HomeTarget'] + df['AwayTarget']
        df['ShotAccuracyHome'] = np.divide(df['HomeTarget'], df['HomeShots'], out=np.zeros_like(df['HomeTarget'], dtype=float), where=df['HomeShots']!=0)
        df['ShotAccuracyAway'] = np.divide(df['AwayTarget'], df['AwayShots'], out=np.zeros_like(df['AwayTarget'], dtype=float), where=df['AwayShots']!=0)
        df['ShotAccuracyDiff'] = df['ShotAccuracyHome'] - df['ShotAccuracyAway']

    corner_cols_fe = ['HomeCorners', 'AwayCorners']
    corner_features_created_fe = all(c in df.columns for c in corner_cols_fe)
    if corner_features_created_fe:
        for col in corner_cols_fe: df[col] = pd.to_numeric(df[col], errors='coerce'); df[col] = df[col].fillna(df[col].median())
        df['CornersDifference'] = df['HomeCorners'] - df['AwayCorners']
        df['CornersTotal'] = df['HomeCorners'] + df['AwayCorners']
        if stat_features_created_fe and 'ShotsDifference' in df.columns: 
            df['GameDominanceIndex'] = (df.get('ShotsDifference',0).fillna(0) + df.get('CornersDifference',0).fillna(0)) / 2.0
        elif 'CornersDifference' in df.columns: df['GameDominanceIndex'] = df['CornersDifference'].fillna(0)
        else: df['GameDominanceIndex'] = 0
    elif 'GameDominanceIndex' in (model_expected_feature_names or []): df['GameDominanceIndex'] = 0 

    disc_cols_fe = ['HomeYellow', 'AwayYellow', 'HomeRed', 'AwayRed', 'HomeFouls', 'AwayFouls']
    discipline_features_created_fe = all(c in df.columns for c in disc_cols_fe)
    if discipline_features_created_fe:
        for col in disc_cols_fe: df[col] = pd.to_numeric(df[col], errors='coerce'); df[col] = df[col].fillna(df[col].median())
        df['CardPointsHome'] = df['HomeYellow'] + (2 * df['HomeRed'])
        df['CardPointsAway'] = df['AwayYellow'] + (2 * df['AwayRed'])
        df['CardPointsDiff'] = df['CardPointsHome'] - df['CardPointsAway']
        df['FoulsDifference'] = df['HomeFouls'] - df['AwayFouls']
        df['FoulsTotal'] = df['HomeFouls'] + df['AwayFouls']

    adv_deps_ots_fe = ['HomeTarget','HomeShots','Form5Home','ImpliedProbHome','AwayTarget','AwayShots','Form5Away','ImpliedProbAway','HomeFouls','HomeElo','AwayShots','AwayTarget','AwayFouls','AwayElo','EloDifference','ImpliedProbDraw','Form5Difference','FormMomentumHome','ShotAccuracyHome','FormMomentumAway','ShotAccuracyAway','Under25','HomeCorners','AwayCorners']
    existing_deps_fe = [dep for dep in adv_deps_ots_fe if dep in df.columns]
    advanced_features_possible_fe = len(existing_deps_fe) == len(adv_deps_ots_fe)

    if advanced_features_possible_fe:
        for col in existing_deps_fe:
            if df[col].isnull().any():
                median_val = df[col].median()
                df[col].fillna(median_val if pd.notna(median_val) else 0, inplace=True)

        df['ScoringEfficiencyHome']=(df['HomeTarget']/(df['HomeShots'].clip(lower=1)))*df['Form5Home']*df['ImpliedProbHome'] 
        df['ScoringEfficiencyAway']=(df['AwayTarget']/(df['AwayShots'].clip(lower=1)))*df['Form5Away']*df['ImpliedProbAway'] 
        df['DefensiveRatingHome']=np.divide(df['HomeFouls']*df['HomeElo'],(df['AwayShots']+df['AwayTarget']).clip(lower=1),out=np.zeros_like(df['HomeFouls'],dtype=float),where=(df['AwayShots']+df['AwayTarget']).clip(lower=1)!=0)
        df['DefensiveRatingAway']=np.divide(df['AwayFouls']*df['AwayElo'],(df['HomeShots']+df['HomeTarget']).clip(lower=1),out=np.zeros_like(df['AwayFouls'],dtype=float),where=(df['HomeShots']+df['HomeTarget']).clip(lower=1)!=0)
        df['DrawLikelihood']=(1-abs(df['EloDifference'])/1000).clip(lower=0)*df['ImpliedProbDraw']*(1-abs(df['Form5Difference'])/15).clip(lower=0)
        df['FormEfficiencyHome']=df['Form5Home']*df.get('FormMomentumHome',0)*df.get('ShotAccuracyHome',0) 
        df['FormEfficiencyAway']=df['Form5Away']*df.get('FormMomentumAway',0)*df.get('ShotAccuracyAway',0) 
        df['CleanSheetProbHome']=1/(1+np.exp(-(df.get('DefensiveRatingHome',0)-df.get('ScoringEfficiencyAway',0))))
        df['CleanSheetProbAway']=1/(1+np.exp(-(df.get('DefensiveRatingAway',0)-df.get('ScoringEfficiencyHome',0))))
        under25_median_ots = df.get('Under25', pd.Series(dtype='float')).median() if 'Under25' in df and not df['Under25'].isnull().all() else 0.5
        df['LowScoreIndicator']=df.get('Under25', under25_median_ots)*df.get('ImpliedProbDraw',0)*(1/(df.get('HomeTarget',0)+df.get('AwayTarget',0)+1))
        df['DrawTendency']=((1-abs(df.get('EloDifference',0))/1000).clip(lower=0)*df.get('ImpliedProbDraw',0)*(1-abs(df.get('Form5Difference',0))/15).clip(lower=0)*(1/(abs(df.get('HomeTarget',0)-df.get('AwayTarget',0))+1)))
        df['DefensiveOrganization']=np.divide((df.get('HomeFouls',0)+df.get('AwayFouls',0)), (df.get('HomeShots',0)+df.get('AwayShots',0)+1).clip(lower=1), out=np.zeros_like(df.get('HomeFouls',pd.Series(0,index=df.index)),dtype=float), where=(df.get('HomeShots',0)+df.get('AwayShots',0)+1).clip(lower=1)!=0)*df.get('Under25', under25_median_ots)
        df['HomeDefensiveStyle']=np.divide(df.get('HomeFouls',0)*df.get('HomeCorners',0), (df.get('HomeShots',0)+1).clip(lower=1), out=np.zeros_like(df.get('HomeFouls',pd.Series(0,index=df.index)),dtype=float), where=(df.get('HomeShots',0)+1).clip(lower=1)!=0)
        df['AwayDefensiveStyle']=np.divide(df.get('AwayFouls',0)*df.get('AwayCorners',0), (df.get('AwayShots',0)+1).clip(lower=1), out=np.zeros_like(df.get('AwayFouls',pd.Series(0,index=df.index)),dtype=float), where=(df.get('AwayShots',0)+1).clip(lower=1)!=0)
        df['ExpectedGoalsHome']=df.get('HomeTarget',0)*df.get('ShotAccuracyHome',0)*(df.get('Form5Home',0)/15).clip(0,1)*df.get('ImpliedProbHome',0)
        df['ExpectedGoalsAway']=df.get('AwayTarget',0)*df.get('ShotAccuracyAway',0)*(df.get('Form5Away',0)/15).clip(0,1)*df.get('ImpliedProbAway',0)

    df.sort_values(by='MatchDateTime', ascending=True, inplace=True)
    df.reset_index(drop=True, inplace=True)
    return df

# --- Main Data Preparation Flow ---
pipeline_components = load_pipeline_prerequisites_from_inplay_dir(FINAL_MODEL_DIR)
if not pipeline_components:
    raise SystemExit("Failed to load model pipeline components. Exiting.")

loaded_feature_names_from_pkl = pipeline_components.get('feature_names')
if not loaded_feature_names_from_pkl:
    raise SystemExit("Error: 'feature_names.pkl' not loaded or empty from pipeline components.")

df_full_engineered = load_and_engineer_data_like_training(DATA_FILE, CLUSTER_COLS)

for feature_col_name in loaded_feature_names_from_pkl:
    if feature_col_name not in df_full_engineered.columns:
        print(f"Final Alignment Warning: Feature '{feature_col_name}' (from feature_names.pkl) missing after FE. Adding with 0s.")
        df_full_engineered[feature_col_name] = 0

X_to_scale = df_full_engineered[loaded_feature_names_from_pkl].copy()

if X_to_scale.isnull().sum().sum() > 0:
    for col in X_to_scale.columns[X_to_scale.isnull().any()]:
        median_val = X_to_scale[col].median()
        X_to_scale[col].fillna(median_val if pd.notna(median_val) else 0, inplace=True)
    if X_to_scale.isnull().sum().sum() > 0: X_to_scale.fillna(0, inplace=True)

scaler = pipeline_components.get('scaler_inplay')
if not scaler: raise SystemExit("Error: Scaler ('scaler.pkl') not loaded.")
try:
    X_full_scaled_values = scaler.transform(X_to_scale)
    X_full_scaled_df = pd.DataFrame(X_full_scaled_values, columns=loaded_feature_names_from_pkl, index=df_full_engineered.index)
except ValueError as e:
    print(f"Error during scaling: {e}")
    raise SystemExit("Halting due to scaling error.")

if len(df_full_engineered) < EVALUATION_SUBSET_SIZE:
    df_evaluation_subset_orig = df_full_engineered.loc[X_full_scaled_df.index].copy()
    X_evaluation_subset_scaled = X_full_scaled_df.copy()
else:
    latest_indices = df_full_engineered.index[-EVALUATION_SUBSET_SIZE:]
    df_evaluation_subset_orig = df_full_engineered.loc[latest_indices].copy()
    X_evaluation_subset_scaled = X_full_scaled_df.loc[latest_indices].copy()

print(f"Evaluation subset selected: {len(df_evaluation_subset_orig)} matches.")

y_true_scores_eval = df_evaluation_subset_orig[['FTHome', 'FTAway']].copy()
y_true_hda_eval_text = ['H' if h > a else ('A' if h < a else 'D') for h, a in y_true_scores_eval.values]

le_outcome = pipeline_components.get('label_encoder')
if not le_outcome: raise SystemExit("Error: LabelEncoder not loaded.")
try:
    y_true_hda_eval_numeric = le_outcome.transform(y_true_hda_eval_text)
    pipeline_components['le_classes_for_metrics'] = le_outcome.classes_
except ValueError:
    unique_labels_in_subset = sorted(list(set(y_true_hda_eval_text)))
    temp_le = LabelEncoder().fit(unique_labels_in_subset)
    y_true_hda_eval_numeric = temp_le.transform(y_true_hda_eval_text)
    pipeline_components['label_encoder'] = temp_le 
    pipeline_components['le_classes_for_metrics'] = temp_le.classes_ 

print("--- Data Preparation for In-Play Model Complete ---")

Evaluation subset selected: 500 matches.
--- Data Preparation for In-Play Model Complete ---


### Model prediction and evaluation


sadadsada

In [139]:
def predict_with_full_pipeline(X_scaled_subset, df_original_subset_for_clusters, components):
    X_scaled_np = X_scaled_subset.values

    preds_nn_home = components['base_nn_home'].predict(X_scaled_np)
    preds_nn_away = components['base_nn_away'].predict(X_scaled_np)
    preds_xgb_home = components['base_xgb_home'].predict(X_scaled_np)
    preds_xgb_away = components['base_xgb_away'].predict(X_scaled_np)
    preds_lgbm_home = components['base_lgbm_home'].predict(X_scaled_np)
    preds_lgbm_away = components['base_lgbm_away'].predict(X_scaled_np)
    preds_draw_prob = components['base_draw_specialist'].predict_proba(X_scaled_np)[:, 1]
    preds_hda_probs_base = components['base_hda_classifier'].predict_proba(X_scaled_np)

    if components.get('target_transformer_nn') is not None:
        preds_nn_combined = np.column_stack([preds_nn_home, preds_nn_away])
        finite_median_nn = 0.0
        if np.any(np.isfinite(preds_nn_combined)):
            finite_median_nn = np.nanmedian(preds_nn_combined[np.isfinite(preds_nn_combined)])

        preds_nn_combined_clean = np.nan_to_num(
            preds_nn_combined, nan=finite_median_nn,
            posinf=(np.nanmax(preds_nn_combined[np.isfinite(preds_nn_combined)]) if np.any(np.isfinite(preds_nn_combined)) else finite_median_nn),
            neginf=(np.nanmin(preds_nn_combined[np.isfinite(preds_nn_combined)]) if np.any(np.isfinite(preds_nn_combined)) else finite_median_nn)
        )
        if np.any(np.isnan(preds_nn_combined_clean)): preds_nn_combined_clean = np.nan_to_num(preds_nn_combined_clean, nan=0.0)

        preds_nn_inv = components['target_transformer_nn'].inverse_transform(preds_nn_combined_clean)
        preds_nn_home, preds_nn_away = preds_nn_inv[:, 0], preds_nn_inv[:, 1]

    finite_median_draw = 0.33
    if np.any(np.isfinite(preds_draw_prob)):
        finite_median_draw = np.nanmedian(preds_draw_prob[np.isfinite(preds_draw_prob)])
    preds_draw_prob_filled = pd.Series(preds_draw_prob).fillna(finite_median_draw).values
    if np.any(~np.isfinite(preds_draw_prob_filled)):
        preds_draw_prob_filled = np.nan_to_num(preds_draw_prob_filled, nan=0.33, posinf=1.0, neginf=0.0)

    calibrator = components.get('iso_reg_calibrator')
    if calibrator:
        preds_draw_prob_filled_finite = np.nan_to_num(preds_draw_prob_filled, nan=0.33, posinf=1.0, neginf=0.0)
        preds_draw_prob_calibrated = calibrator.transform(preds_draw_prob_filled_finite.reshape(-1, 1)).ravel()
    else:
        preds_draw_prob_calibrated = preds_draw_prob_filled
    preds_draw_prob_calibrated = pd.Series(preds_draw_prob_calibrated, index=X_scaled_subset.index).clip(0.0, 1.0)

    meta_features_df = pd.DataFrame(index=X_scaled_subset.index)
    meta_features_df['draw_prob_calibrated'] = preds_draw_prob_calibrated
    meta_features_df['nn_oof_home'] = preds_nn_home; meta_features_df['nn_oof_away'] = preds_nn_away
    meta_features_df['xgb_oof_home'] = preds_xgb_home; meta_features_df['xgb_oof_away'] = preds_xgb_away
    meta_features_df['lgbm_oof_home'] = preds_lgbm_home; meta_features_df['lgbm_oof_away'] = preds_lgbm_away
    meta_features_df['nn_score_diff'] = meta_features_df['nn_oof_home'] - meta_features_df['nn_oof_away']
    meta_features_df['xgb_score_diff'] = meta_features_df['xgb_oof_home'] - meta_features_df['xgb_oof_away']
    meta_features_df['lgbm_score_diff'] = meta_features_df['lgbm_oof_home'] - meta_features_df['lgbm_oof_away']
    meta_features_df['nn_abs_diff'] = meta_features_df['nn_score_diff'].abs()
    meta_features_df['xgb_abs_diff'] = meta_features_df['xgb_score_diff'].abs()
    meta_features_df['lgbm_abs_diff'] = meta_features_df['lgbm_score_diff'].abs()
    meta_features_df['hda_predicted_class'] = np.argmax(preds_hda_probs_base, axis=1)
    sorted_hda_probs = np.sort(preds_hda_probs_base, axis=1)
    margin_calc = lambda row: row[-1] - row[-2] if len(row) >= 2 and pd.notna(row[-1]) and pd.notna(row[-2]) else 0.0
    meta_features_df['hda_prob_margin'] = np.apply_along_axis(margin_calc, 1, sorted_hda_probs)
    epsilon = 1e-9
    safe_hda_probs = np.clip(preds_hda_probs_base, epsilon, 1.0 - epsilon)
    safe_hda_probs = safe_hda_probs / np.sum(safe_hda_probs, axis=1, keepdims=True)
    meta_features_df['hda_prob_entropy'] = entropy(safe_hda_probs, axis=1)

    actual_cluster_cols_in_original_df = [col for col in CLUSTER_COLS if col in df_original_subset_for_clusters.columns]
    if actual_cluster_cols_in_original_df:
        cluster_features_subset = df_original_subset_for_clusters[actual_cluster_cols_in_original_df].loc[X_scaled_subset.index].copy()
        if cluster_features_subset.isnull().sum().sum() > 0:
            cluster_medians = components.get('cluster_medians')
            cluster_features_subset = cluster_features_subset.fillna(cluster_medians if cluster_medians is not None else 0)
        meta_features_df = pd.concat([meta_features_df, cluster_features_subset], axis=1)

    meta_feature_names_expected = components.get('meta_feature_names')
    if not meta_feature_names_expected:
        meta_feature_names_expected = list(meta_features_df.columns)

    for col in meta_feature_names_expected:
        if col not in meta_features_df.columns: meta_features_df[col] = 0
    meta_features_aligned = meta_features_df.reindex(columns=meta_feature_names_expected, fill_value=0)

    if meta_features_aligned.isnull().sum().sum() > 0: meta_features_aligned.fillna(0, inplace=True)

    meta_model = components.get('meta_model_final')
    if not meta_model: raise ValueError("Meta-model not loaded.")

    final_preds_encoded = meta_model.predict(meta_features_aligned)
    le_for_inverse_transform = components.get('label_encoder')
    final_preds_hda = le_for_inverse_transform.inverse_transform(final_preds_encoded)
    final_preds_probs = meta_model.predict_proba(meta_features_aligned)

    results = {
        "pred_hda": final_preds_hda, "pred_probs": final_preds_probs,
        "base_pred_nn_home": meta_features_df.get('nn_oof_home', pd.Series(np.zeros(len(X_scaled_subset)), index=X_scaled_subset.index)).values,
        "base_pred_nn_away": meta_features_df.get('nn_oof_away', pd.Series(np.zeros(len(X_scaled_subset)), index=X_scaled_subset.index)).values,
        "base_pred_xgb_home": meta_features_df.get('xgb_oof_home', pd.Series(np.zeros(len(X_scaled_subset)), index=X_scaled_subset.index)).values,
        "base_pred_xgb_away": meta_features_df.get('xgb_oof_away', pd.Series(np.zeros(len(X_scaled_subset)), index=X_scaled_subset.index)).values,
        "base_pred_lgbm_home": meta_features_df.get('lgbm_oof_home', pd.Series(np.zeros(len(X_scaled_subset)), index=X_scaled_subset.index)).values,
        "base_pred_lgbm_away": meta_features_df.get('lgbm_oof_away', pd.Series(np.zeros(len(X_scaled_subset)), index=X_scaled_subset.index)).values,
    }
    return results

def calculate_and_format_metrics_ots_style(y_true_scores, y_true_hda_text_subset, predictions, le_for_eval):
    """Calculates metrics similar to the OTS script."""
    metrics_summary = {}
    y_pred_hda_text = predictions["pred_hda"]
    y_pred_probs = predictions["pred_probs"]

    y_true_hda_numeric_subset = le_for_eval.transform(y_true_hda_text_subset)
    y_pred_hda_numeric_subset = le_for_eval.transform(y_pred_hda_text)

    metrics_summary["Outcome Accuracy"] = accuracy_score(y_true_hda_numeric_subset, y_pred_hda_numeric_subset)

    avg_base_pred_h = (predictions["base_pred_nn_home"] + predictions["base_pred_xgb_home"] + predictions["base_pred_lgbm_home"]) / 3.0
    avg_base_pred_a = (predictions["base_pred_nn_away"] + predictions["base_pred_xgb_away"] + predictions["base_pred_lgbm_away"]) / 3.0
    true_h_scores = y_true_scores['FTHome'].values; true_a_scores = y_true_scores['FTAway'].values

    pred_h_round = np.round(avg_base_pred_h).astype(int)
    pred_a_round = np.round(avg_base_pred_a).astype(int)
    metrics_summary["Exact Score Acc"] = np.mean((pred_h_round == true_h_scores) & (pred_a_round == true_a_scores))
    metrics_summary["Exact Goal Diff"] = np.mean((pred_h_round - pred_a_round) == (true_h_scores - true_a_scores))

    metrics_summary["R2_Home"] = r2_score(true_h_scores, avg_base_pred_h)
    metrics_summary["R2_Away"] = r2_score(true_a_scores, avg_base_pred_a)
    metrics_summary["MSE_Home"] = mean_squared_error(true_h_scores, avg_base_pred_h)
    metrics_summary["MSE_Away"] = mean_squared_error(true_a_scores, avg_base_pred_a)
    metrics_summary["MAE_Home"] = mean_absolute_error(true_h_scores, avg_base_pred_h)
    metrics_summary["MAE_Away"] = mean_absolute_error(true_a_scores, avg_base_pred_a)

    roc_auc_w = np.nan
    all_known_classes_string = le_for_eval.classes_
    all_known_classes_numeric = le_for_eval.transform(all_known_classes_string)

    if len(y_true_hda_numeric_subset) > 0 and y_pred_probs.shape[0] == len(y_true_hda_numeric_subset):
        try:
            y_true_bin = label_binarize(y_true_hda_numeric_subset, classes=all_known_classes_numeric)

            if y_true_bin.shape[1] == y_pred_probs.shape[1]:
                brier_m = np.mean(np.sum((y_pred_probs - y_true_bin)**2, axis=1))
                if y_true_bin.shape[1] >= 2 and np.sum(np.any(y_true_bin, axis=0)) >= 2:
                    if np.isnan(y_pred_probs).any():
                        pass
                    roc_auc_w = roc_auc_score(y_true_bin, y_pred_probs,
                                              multi_class='ovr', average='weighted',
                                              labels=all_known_classes_numeric) 
                pass
        except ValueError as ve:
            expected_errors = ["Only one class present in y_true",
                               "Number of classes in y_true not equal to the number of columns in 'y_score'",
                               "Input 'y_true' does not contain all the labels defined in 'labels'"]
            if any(err_msg in str(ve) for err_msg in expected_errors):
                pass
            else:
                pass 
        except Exception as e:
            pass 

    metrics_summary["Brier Score"] = brier_m
    metrics_summary["ROC AUC (OvR W)"] = roc_auc_w
    metrics_summary["Evaluated Samples"] = len(y_true_hda_numeric_subset)
    return metrics_summary

# --- Run Evaluation ---
evaluation_results_dict = {}
default_metrics_inplay = {
    "Outcome Accuracy": np.nan, "Exact Score Acc": np.nan, "Exact Goal Diff": np.nan,
    "R2_Home": np.nan, "R2_Away": np.nan, "MSE_Home": np.nan, "MSE_Away": np.nan,
    "MAE_Home": np.nan, "MAE_Away": np.nan, "ROC AUC (OvR W)": np.nan, "Brier Score": np.nan,
    "Evaluated Samples": 0
}

if pipeline_components:
    if 'X_evaluation_subset_scaled' in locals() and not X_evaluation_subset_scaled.empty and \
            'y_true_hda_eval_numeric' in locals() and len(y_true_hda_eval_numeric) > 0 and \
            'df_evaluation_subset_orig' in locals() and 'y_true_scores_eval' in locals():

        predictions = predict_with_full_pipeline(X_evaluation_subset_scaled, df_evaluation_subset_orig, pipeline_components)

        le_for_metrics_final = pipeline_components.get('label_encoder')

        metrics = calculate_and_format_metrics_ots_style(y_true_scores_eval, y_true_hda_eval_text, predictions, le_for_metrics_final)
        evaluation_results_dict[FINAL_MODEL_DIR.strip('/')] = metrics
    else:
        evaluation_results_dict[FINAL_MODEL_DIR.strip('/')] = default_metrics_inplay.copy()
else:
    evaluation_results_dict[FINAL_MODEL_DIR.strip('/')] = default_metrics_inplay.copy()

print("\n\n--- In-Play Model Evaluation Summary ---")
if not evaluation_results_dict: print("No model was evaluated.")
else:
    summary_df = pd.DataFrame.from_dict(evaluation_results_dict, orient='index')

    model_name_key = FINAL_MODEL_DIR.strip('/')
    if model_name_key in summary_df.index:
        metrics_to_print = summary_df.loc[model_name_key]
        print(f"--- Metrics for: {EVALUATION_SUBSET_NAME} ({metrics_to_print.get('Evaluated Samples', 0):.0f} matches) ---")
        print(f"   - Outcome Accuracy: {metrics_to_print.get('Outcome Accuracy', np.nan):.4f}")
        print(f"   - Exact Score Acc:  {metrics_to_print.get('Exact Score Acc', np.nan):.4f}")
        print(f"   - Exact Goal Diff:  {metrics_to_print.get('Exact Goal Diff', np.nan):.4f}")
        r2_h_val = metrics_to_print.get('R2_Home', np.nan); r2_a_val = metrics_to_print.get('R2_Away', np.nan)
        print(f"   - R2 (Home/Away):   {r2_h_val:.4f} / {r2_a_val:.4f}")
        mse_h_val = metrics_to_print.get('MSE_Home', np.nan); mse_a_val = metrics_to_print.get('MSE_Away', np.nan)
        print(f"   - MSE (Home/Away):  {mse_h_val:.4f} / {mse_a_val:.4f}")
        mae_h_val = metrics_to_print.get('MAE_Home', np.nan); mae_a_val = metrics_to_print.get('MAE_Away', np.nan)
        print(f"   - MAE (Home/Away):  {mae_h_val:.4f} / {mae_a_val:.4f}")
        print(f"   - ROC AUC (OvR W):  {metrics_to_print.get('ROC AUC (OvR W)', np.nan):.4f}")
        print(f"   - Brier Score Loss: {metrics_to_print.get('Brier Score', np.nan):.4f}")
    else:
        print(f"No results found for model '{model_name_key}'.")

print("\n--- In-Play Evaluation Script Finished ---")



--- In-Play Model Evaluation Summary ---
--- Metrics for: Latest 500 (500 matches) ---
   - Outcome Accuracy: 0.7140
   - Exact Score Acc:  0.2980
   - Exact Goal Diff:  0.3900
   - R2 (Home/Away):   0.6468 / 0.6279
   - MSE (Home/Away):  0.6059 / 0.4254
   - MAE (Home/Away):  0.6213 / 0.5129
   - ROC AUC (OvR W):  0.8756
   - Brier Score Loss: 0.3882

--- In-Play Evaluation Script Finished ---
