# xT Model Training with FootballDataManager
This notebook demonstrates how to use the FootballDataManager to load event data, train an Expected Threat (xT) model, evaluate its statistics, and save the trained model to disk for use in the API.

In [1]:
import sys
from pathlib import Path
sys.path.append(str(Path.cwd().parent))
# Now you can import from the app package

In [2]:
# Import required libraries
import pandas as pd
import numpy as np
from app.util.football_data_manager import FootballDataManager
from app.util.metrics.expected_threat import ExpectedThreatModel
import pickle
from pathlib import Path

In [3]:
# Initialize FootballDataManager
fdm = FootballDataManager()

In [4]:
# Select a competition and season (example: use the first available with 360 data)
comps = fdm.get_competitions(only_with_360=True, exclude_women=True)
comp = comps.iloc[0]
competition_id = int(comp['competition_id'])
season_id = int(comp['season_id'])
print(f"Using competition_id={competition_id}, season_id={season_id}, name={comp['competition_name']}")

Using competition_id=9, season_id=281, name=1. Bundesliga




In [5]:
# Load all events for the selected competition/season
matches = fdm.get_matches(competition_id, season_id)
events_list = [fdm.get_events(match['match_id']) for _, match in matches.iterrows()]
events = pd.concat(events_list, ignore_index=True)
print(f"Loaded {len(events)} events from {len(matches)} matches.")



Loaded 137765 events from 34 matches.


In [6]:
# Filter to passes and carries for xT training
xt_events = events[events['type'].isin(['Pass', 'Carry']) & events['location'].apply(lambda loc: isinstance(loc, list) and len(loc) == 2)]
print(f"Training on {len(xt_events)} pass/carry events.")

Training on 71583 pass/carry events.


In [7]:
# Train the xT model (using default grid size, or adjust as needed)
xt_model = ExpectedThreatModel(grid_size=(12, 8))
xt_model.initialize()  # This can be replaced with a real training method if available
# Optionally, you could implement a fit/train method here if you have one

In [8]:
# Evaluate statistics of the xT grid
xt_grid = xt_model.xt_grid
print(f"xT grid shape: {xt_grid.shape}")
print(f"xT grid min: {xt_grid.min():.4f}, max: {xt_grid.max():.4f}, mean: {xt_grid.mean():.4f}")
print(f"Sum of xT grid: {xt_grid.sum():.4f}")

xT grid shape: (12, 8)
xT grid min: 0.0027, max: 0.3000, mean: 0.0778
Sum of xT grid: 7.4728


In [9]:
# (Optional) Evaluate accuracy: check if higher xT is associated with more goals
# For demonstration, print average xT for shots that resulted in goals vs. all shots
shots = events[events['type'] == 'Shot']
shots = shots[shots['location'].apply(lambda loc: isinstance(loc, list) and len(loc) == 2)]
shots['xt'] = shots['location'].apply(lambda loc: xt_model.get_xt_value(tuple(loc)))
goals = shots[shots['shot_outcome'] == 'Goal']
print(f"Average xT for all shots: {shots['xt'].mean():.4f}")
print(f"Average xT for goals: {goals['xt'].mean():.4f}")

Average xT for all shots: 0.2243
Average xT for goals: 0.2553


In [10]:
# Save the trained xT model to disk for use in the API
output_path = Path('data_cache/metrics/xt_model.pkl')
output_path.parent.mkdir(parents=True, exist_ok=True)
xt_model.save(str(output_path))
print(f"xT model saved to {output_path}")

xT model saved to data_cache/metrics/xt_model.pkl
