## Packages and configuration

In [None]:
from statsbombpy import sb
import pandas as pd
from mplsoccer import VerticalPitch,Pitch
from highlight_text import ax_text, fig_text
from matplotlib.colors import LinearSegmentedColormap
import matplotlib.pyplot as plt
import matplotlib.patheffects as path_effects
import seaborn as sns
import pprint

## Load Competiton, Match, and Event Data from statsbombpy

In [None]:
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

# Call statsbombpy API to get all free competitions, then chec Women's comps
free_comps = sb.competitions()
women_comps = free_comps[free_comps['competition_gender'] == 'female']
women_comps

# EUROS

In [None]:
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

matches_euros = sb.matches(competition_id=53, season_id=315)
matches_euros.head(2)

### EUROS final - events

In [None]:
final_match = matches_euros[matches_euros['competition_stage'] == 'Final'].iloc[0]
final_match_id = final_match['match_id']

events_df = sb.events(match_id=final_match_id)

In [None]:
shots_df = events_df[events_df['type'] == 'Shot'].copy()

# Mark goals
shots_df['goal'] = shots_df['shot_outcome'].apply(lambda x: 1 if x == 'Goal' else 0)

In [None]:
# LINEUP

# Select first two Starting XI rows
xi_rows = events_df.loc[events_df['type'] == 'Starting XI'].iloc[:2]

# Create an empty list to store DataFrames
lineups = []

for _, xi_row in xi_rows.iterrows():
    tactics = xi_row['tactics']
    lineup_df = pd.DataFrame(tactics['lineup'])
    
    # Extract player and position info
    lineup_df['player_id'] = lineup_df['player'].apply(lambda x: x['id'])
    lineup_df['player'] = lineup_df['player'].apply(lambda x: x['name'])
    
    lineup_df['position_id'] = lineup_df['position'].apply(lambda x: x['id'])
    lineup_df['position'] = lineup_df['position'].apply(lambda x: x['name'])
    
    # Add team and formation
    lineup_df['team'] = xi_row['team']
    
    lineups.append(lineup_df)

# Combine home and away into one DataFrame
lineup_df = pd.concat(lineups, ignore_index=True)

lineup_df_euros = lineup_df

In [None]:
# Filter shots up to minute 120 (to exclude penalties)
shots_up_to_120 = shots_df[shots_df['minute'] < 120]

# Calculate total xG, goals, number of shots, and avg xG per shot for each player
player_xg_summary = shots_up_to_120.groupby(['player', 'team']).agg(
    shots=('shot_statsbomb_xg', 'count'),
    total_xg=('shot_statsbomb_xg', 'sum'),
    xg_per_shot=('shot_statsbomb_xg', 'mean'),
    goals=('goal', 'sum')
).sort_values('total_xg', ascending=False)

# Merge position info from the lineup
player_xg_summary = player_xg_summary.reset_index().merge(
    lineup_df_euros[['player', 'team', 'position', 'jersey_number']],
    on=['player', 'team'],
    how='left'
)

# Fill missing positions and jersey numbers for substitutes
player_xg_summary['position'] = player_xg_summary['position'].fillna('Sub')
player_xg_summary['jersey_number'] = player_xg_summary['jersey_number'].fillna(-1).astype(int)

# Reorder columns
player_xg_summary = player_xg_summary[
    ['player', 'team', 'position', 'jersey_number', 'shots', 'total_xg', 'xg_per_shot', 'goals']
]

player_xg_summary_euros = player_xg_summary
player_xg_summary_euros

In [None]:
# Total xG by substitutes
subs_xg = player_xg_summary_euros.loc[player_xg_summary_euros['position'] == 'Sub', 'total_xg'].sum()

# Total xG by other positions
starters_xg = player_xg_summary_euros.loc[player_xg_summary_euros['position'] != 'Sub', 'total_xg'].sum()

# Combine in a DataFrame for easy view
xg_split = pd.DataFrame({
    'Category': ['Subs', 'Starters/Other'],
    'Total_xG': [subs_xg, starters_xg]
})

xg_split

In [None]:
# Calculate total xG and goals for each team
team_xg_summary = shots_up_to_120.groupby('team').agg(
    shots=('shot_statsbomb_xg', 'count'),
    total_xg=('shot_statsbomb_xg', 'sum'),
    goals=('goal', 'sum'),
    xg_per_shot=('shot_statsbomb_xg', 'mean')
).sort_values('total_xg', ascending=False)

team_xg_summary

## WORLDS

In [None]:
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

matches_worlds = sb.matches(competition_id=72, season_id=107)
matches_worlds.head(2)

In [None]:
final_match = matches_worlds[matches_worlds['competition_stage'] == 'Final'].iloc[0]
final_match_id = final_match['match_id']

events_df = sb.events(match_id=final_match_id)

In [None]:
shots_df = events_df[events_df['type'] == 'Shot'].copy()

# Mark goals
shots_df['goal'] = shots_df['shot_outcome'].apply(lambda x: 1 if x == 'Goal' else 0)

In [None]:
# LINEUP

# Select first two Starting XI rows
xi_rows = events_df.loc[events_df['type'] == 'Starting XI'].iloc[:2]

# Create an empty list to store DataFrames
lineups = []

for _, xi_row in xi_rows.iterrows():
    tactics = xi_row['tactics']
    lineup_df = pd.DataFrame(tactics['lineup'])
    
    # Extract player and position info
    lineup_df['player_id'] = lineup_df['player'].apply(lambda x: x['id'])
    lineup_df['player'] = lineup_df['player'].apply(lambda x: x['name'])
    
    lineup_df['position_id'] = lineup_df['position'].apply(lambda x: x['id'])
    lineup_df['position'] = lineup_df['position'].apply(lambda x: x['name'])
    
    # Add team and formation
    lineup_df['team'] = xi_row['team']
    
    lineups.append(lineup_df)

# Combine home and away into one DataFrame
lineup_df = pd.concat(lineups, ignore_index=True)

lineup_df_worlds = lineup_df

In [None]:
# Filter shots up to minute 120 (to exclude penalties)
shots_up_to_120 = shots_df[shots_df['minute'] < 120]

# Calculate total xG, goals, number of shots, and avg xG per shot for each player
player_xg_summary = shots_up_to_120.groupby(['player', 'team']).agg(
    shots=('shot_statsbomb_xg', 'count'),
    total_xg=('shot_statsbomb_xg', 'sum'),
    xg_per_shot=('shot_statsbomb_xg', 'mean'),
    goals=('goal', 'sum')
).sort_values('total_xg', ascending=False)

# Merge position info from the lineup
player_xg_summary = player_xg_summary.reset_index().merge(
    lineup_df_worlds[['player', 'team', 'position', 'jersey_number']],
    on=['player', 'team'],
    how='left'
)

# Fill missing positions and jersey numbers for substitutes
player_xg_summary['position'] = player_xg_summary['position'].fillna('Sub')
player_xg_summary['jersey_number'] = player_xg_summary['jersey_number'].fillna(-1).astype(int)

# Reorder columns
player_xg_summary = player_xg_summary[
    ['player', 'team', 'position', 'jersey_number', 'shots', 'total_xg', 'xg_per_shot', 'goals']
]

player_xg_summary_worlds = player_xg_summary
player_xg_summary_worlds

In [None]:
# Total xG by substitutes
subs_xg = player_xg_summary_worlds.loc[player_xg_summary_worlds['position'] == 'Sub', 'total_xg'].sum()

# Total xG by other positions
starters_xg = player_xg_summary_worlds.loc[player_xg_summary_worlds['position'] != 'Sub', 'total_xg'].sum()

# Combine in a DataFrame for easy view
xg_split = pd.DataFrame({
    'Category': ['Subs', 'Starters/Other'],
    'Total_xG': [subs_xg, starters_xg]
})

xg_split

In [None]:
# Calculate total xG and goals for each team
team_xg_summary = shots_up_to_120.groupby('team').agg(
    shots=('shot_statsbomb_xg', 'count'),
    total_xg=('shot_statsbomb_xg', 'sum'),
    goals=('goal', 'sum'),
    xg_per_shot=('shot_statsbomb_xg', 'mean')
).sort_values('total_xg', ascending=False)

team_xg_summary