In [None]:
from statsbombpy import sb
import pandas as pd

# Define a function to convert the events dict into a DataFrame with debugging information
def convert_events_to_df(events_data):
    # Desired columns
    columns = ['50_50', 'bad_behaviour_card', 'ball_receipt_outcome',
       'block_deflection', 'block_save_block', 'carry_end_location',
       'clearance_aerial_won', 'clearance_body_part', 'clearance_head',
       'clearance_left_foot', 'clearance_right_foot', 'counterpress',
       'dribble_outcome', 'duel_outcome', 'duel_type', 'duration',
       'foul_committed_advantage', 'foul_committed_card',
       'foul_committed_penalty', 'foul_won_advantage', 'foul_won_defensive',
       'foul_won_penalty', 'goalkeeper_body_part', 'goalkeeper_end_location',
       'goalkeeper_outcome', 'goalkeeper_position', 'goalkeeper_punched_out',
       'goalkeeper_technique', 'goalkeeper_type', 'id', 'index',
       'injury_stoppage_in_chain', 'interception_outcome', 'location',
       'match_id', 'minute', 'off_camera', 'out', 'pass_aerial_won',
       'pass_angle', 'pass_assisted_shot_id', 'pass_body_part', 'pass_cross',
       'pass_cut_back', 'pass_deflected', 'pass_end_location',
       'pass_goal_assist', 'pass_height', 'pass_length', 'pass_outcome',
       'pass_outswinging', 'pass_recipient', 'pass_recipient_id',
       'pass_shot_assist', 'pass_switch', 'pass_technique',
       'pass_through_ball', 'pass_type', 'period', 'play_pattern', 'player',
       'player_id', 'position', 'possession', 'possession_team',
       'possession_team_id', 'related_events', 'second', 'shot_aerial_won',
       'shot_body_part', 'shot_deflected', 'shot_end_location',
       'shot_first_time', 'shot_freeze_frame', 'shot_key_pass_id',
       'shot_outcome', 'shot_statsbomb_xg', 'shot_technique', 'shot_type',
       'substitution_outcome', 'substitution_outcome_id',
       'substitution_replacement', 'substitution_replacement_id', 'tactics',
       'team', 'team_id', 'timestamp', 'type', 'under_pressure']
    
    # Create a DataFrame based on the type of events_data
    if isinstance(events_data, dict):
        df = pd.DataFrame.from_dict(events_data, orient='index')
    else:
        df = pd.DataFrame(events_data)
    print(f"DEBUG: Initial DataFrame shape: {df.shape}")
    
    # Ensure all desired columns exist, adding missing ones with default None
    for col in columns:
        if col not in df.columns:
            print(f"DEBUG: Missing column '{col}' detected. Adding it with default None")
            df[col] = None
    df = df[columns]
    
    # Debug output before setting index
    print(f"DEBUG: DataFrame shape after adding missing columns: {df.shape}")
    
    # Set 'id' as the primary key by making it the index
    df.set_index('id', inplace=True)
    print("DEBUG: Set 'id' as index. Final DataFrame shape:", df.shape)
    return df

# Fetch data as before
competitions = sb.competitions()
premier_league_15_16_matches = sb.matches(competition_id=9, season_id=281)
events = sb.events(match_id=premier_league_15_16_matches['match_id'][0])
competitions_360 = competitions[competitions['match_available_360'].notna()].info()

# Convert events dict to a DataFrame and display debugging info
events_df = convert_events_to_df(events)
print("DEBUG: Converted events_df shape:", events_df.shape)
print("Events DataFrame Columns:", events_df.columns)

events_df.head()

In [6]:
# Define a function to pull out freeze_frame data using sb.frames
def pull_freeze_frame(match_id):
    freeze_frame = sb.frames(match_id=match_id)
    print(f"Freeze frame data for match_id {match_id}:")
    print(freeze_frame.info())
    return freeze_frame


In [7]:
# Show freeze frame dataframe for the first match
match_id = premier_league_15_16_matches['match_id'][0]
freeze_frame_df = pull_freeze_frame(match_id)
freeze_frame_df.head()



Freeze frame data for match_id 3895302:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 56319 entries, 0 to 56318
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   id            56319 non-null  object
 1   visible_area  56319 non-null  object
 2   match_id      56319 non-null  int64 
 3   teammate      56319 non-null  bool  
 4   actor         56319 non-null  bool  
 5   keeper        56319 non-null  bool  
 6   location      56319 non-null  object
dtypes: bool(3), int64(1), object(3)
memory usage: 1.9+ MB
None


Unnamed: 0,id,visible_area,match_id,teammate,actor,keeper,location
0,ff56e821-21e9-4cef-ba2a-7eb5eb3769c6,"[16.638549335883955, 80.0, 42.634221931834276,...",3895302,True,False,False,"[33.623681824113135, 40.159018633164074]"
1,ff56e821-21e9-4cef-ba2a-7eb5eb3769c6,"[16.638549335883955, 80.0, 42.634221931834276,...",3895302,True,False,False,"[36.74041423873898, 53.63999223538578]"
2,ff56e821-21e9-4cef-ba2a-7eb5eb3769c6,"[16.638549335883955, 80.0, 42.634221931834276,...",3895302,True,False,False,"[41.68140190196648, 24.747150774999632]"
3,ff56e821-21e9-4cef-ba2a-7eb5eb3769c6,"[16.638549335883955, 80.0, 42.634221931834276,...",3895302,True,False,False,"[45.75852507051164, 34.30730339844553]"
4,ff56e821-21e9-4cef-ba2a-7eb5eb3769c6,"[16.638549335883955, 80.0, 42.634221931834276,...",3895302,True,False,False,"[49.77081997053183, 49.75472239512554]"


In [8]:
# Check if id in events_df index is present in freeze_frame_df['id']
common_ids = events_df.index.isin(freeze_frame_df['id'])
print('Number of ids in events_df present in freeze_frame_df:', common_ids.sum())
print('Total ids in events_df:', len(events_df))
print('Ids not found:', events_df.index[~common_ids])

Number of ids in events_df present in freeze_frame_df: 3525
Total ids in events_df: 4223
Ids not found: Index(['794ec549-5288-4d1a-93e8-0fc6d3968784',
       '3a9a2c57-b177-47ed-9447-11f081b0e388',
       '1f956a2b-ece0-475b-8802-e82ed922c307',
       '79b90fbb-2cfd-419b-90f4-f0e5980c47d3',
       'd5386215-ef71-4cb0-a46b-087754baba4e',
       'd1909cc4-05fd-4f56-aaba-6b55a729f6f4',
       '221b0c8d-6386-4ae8-bb4a-a1dc98742312',
       '1d43d9bb-9f3e-4ee1-85c8-1773543caa18',
       '75b4f810-0483-47f7-b87f-dfab064b2781',
       'cd460252-ecf1-4f5e-a65e-f2919175a0f2',
       ...
       'c9846a34-9c23-4220-bbb1-494aac45d185',
       '99cc0537-4d36-4026-a5fb-5a9d518968ae',
       '951df4ef-6fe9-4ed5-9d2d-fc81ba113620',
       '679bc3e6-5c1e-4a9b-b832-7b2f924ce052',
       '42a0bfd8-10e7-4ca7-8843-32f4e92c830a',
       '60b25f6e-244d-44f6-aa32-e8bd6ddbbbaa',
       'a3d7c89d-209a-4afa-a1d4-6b2e09ecd3e6',
       '032eff1d-4af7-4f99-b41c-969fb96fcc39',
       '32a65be5-91ac-4743-b4e8-75003bd

In [9]:
# For the events ids not found in freeze_frame_df, show their event 'type'
missing_ids = events_df.index[~common_ids]
print('Event types for missing ids:')
print(events_df.loc[missing_ids, 'type'])


Event types for missing ids:
id
794ec549-5288-4d1a-93e8-0fc6d3968784       Starting XI
3a9a2c57-b177-47ed-9447-11f081b0e388       Starting XI
1f956a2b-ece0-475b-8802-e82ed922c307        Half Start
79b90fbb-2cfd-419b-90f4-f0e5980c47d3        Half Start
d5386215-ef71-4cb0-a46b-087754baba4e        Half Start
                                             ...      
60b25f6e-244d-44f6-aa32-e8bd6ddbbbaa      Substitution
a3d7c89d-209a-4afa-a1d4-6b2e09ecd3e6     Bad Behaviour
032eff1d-4af7-4f99-b41c-969fb96fcc39     Bad Behaviour
32a65be5-91ac-4743-b4e8-75003bd305ed    Tactical Shift
ef6e9049-43e1-45ce-bddb-920b5946cb2c    Tactical Shift
Name: type, Length: 698, dtype: object


In [10]:
# Count occurrences of each event 'type' for missing ids
missing_types = events_df.loc[missing_ids, 'type']
print('Count of event types for missing ids:')
print(missing_types.value_counts())

# List unique event types among the missing ids
print('Unique event types for missing ids:')
print(missing_types.unique())


Count of event types for missing ids:
type
Pass                 220
Ball Receipt*        187
Carry                154
Pressure              48
Goal Keeper           17
Ball Recovery         12
Duel                   8
Substitution           8
Injury Stoppage        6
Miscontrol             4
Half Start             4
Half End               4
Dribble                3
Interception           2
Starting XI            2
Shot                   2
Referee Ball-Drop      2
Dispossessed           2
Foul Won               2
Tactical Shift         2
Bad Behaviour          2
Clearance              2
Foul Committed         1
Block                  1
Player On              1
Dribbled Past          1
Player Off             1
Name: count, dtype: int64
Unique event types for missing ids:
['Starting XI' 'Half Start' 'Pass' 'Ball Receipt*' 'Carry' 'Pressure'
 'Miscontrol' 'Dribble' 'Duel' 'Interception' 'Ball Recovery'
 'Dispossessed' 'Block' 'Foul Committed' 'Foul Won' 'Shot' 'Goal Keeper'
 'Injury Stoppa

In [11]:
# For events that have freeze_frame_df data (i.e. common_ids), show their event 'type'
common_event_types = events_df.loc[events_df.index[common_ids], 'type']
print('Count of event types for events with freeze_frame data:')
print(common_event_types.value_counts())

print('Unique event types for events with freeze_frame data:')
print(common_event_types.unique())


Count of event types for events with freeze_frame data:
type
Ball Receipt*     968
Pass              952
Carry             868
Pressure          384
Ball Recovery      82
Duel               44
Block              35
Shot               25
Dribble            23
Clearance          22
Dispossessed       21
Miscontrol         19
Goal Keeper        17
Foul Committed     16
Interception       15
Foul Won           15
Dribbled Past      11
50/50               4
Offside             2
Error               1
Shield              1
Name: count, dtype: int64
Unique event types for events with freeze_frame data:
['Pass' 'Ball Receipt*' 'Carry' 'Pressure' 'Miscontrol' 'Dribble' 'Duel'
 'Interception' 'Ball Recovery' 'Dispossessed' 'Block' 'Foul Committed'
 'Foul Won' 'Error' 'Shot' 'Goal Keeper' '50/50' 'Clearance' 'Offside'
 'Dribbled Past' 'Shield']


In [12]:
competitions

Unnamed: 0,competition_id,season_id,country_name,competition_name,competition_gender,competition_youth,competition_international,season_name,match_updated,match_updated_360,match_available_360,match_available
0,9,281,Germany,1. Bundesliga,male,False,False,2023/2024,2024-07-15T14:15:54.671676,2024-07-15T14:17:00.877356,2024-07-15T14:17:00.877356,2024-07-15T14:15:54.671676
1,9,27,Germany,1. Bundesliga,male,False,False,2015/2016,2024-05-19T11:11:14.192381,,,2024-05-19T11:11:14.192381
2,1267,107,Africa,African Cup of Nations,male,False,True,2023,2024-06-13T07:51:02.452825,,,2024-06-13T07:51:02.452825
3,16,4,Europe,Champions League,male,False,False,2018/2019,2024-06-12T07:44:38.559714,2021-06-13T16:17:31.694,,2024-06-12T07:44:38.559714
4,16,1,Europe,Champions League,male,False,False,2017/2018,2024-02-13T02:35:28.134882,2021-06-13T16:17:31.694,,2024-02-13T02:35:28.134882
...,...,...,...,...,...,...,...,...,...,...,...,...
69,55,43,Europe,UEFA Euro,male,False,True,2020,2024-04-16T12:44:40.558402,2024-04-16T12:47:18.505110,2024-04-16T12:47:18.505110,2024-04-16T12:44:40.558402
70,35,75,Europe,UEFA Europa League,male,False,False,1988/1989,2024-02-12T14:45:05.702250,2021-06-13T16:17:31.694,,2024-02-12T14:45:05.702250
71,53,106,Europe,UEFA Women's Euro,female,False,True,2022,2024-02-13T13:27:17.178263,2024-02-13T13:30:52.820588,2024-02-13T13:30:52.820588,2024-02-13T13:27:17.178263
72,72,107,International,Women's World Cup,female,False,True,2023,2024-07-14T16:59:48.469596,2024-07-14T17:01:38.887279,2024-07-14T17:01:38.887279,2024-07-14T16:59:48.469596
