In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from mplsoccer import Pitch
from highlight_text import fig_text
import os
import glob
import io
import matplotlib.font_manager as fm
from matplotlib import font_manager

In [4]:
font_path = "../assets/fonts"
for x in os.listdir(font_path):
    for y in os.listdir(f"{font_path}/{x}"):
        if (y.split(".")[-1] == "ttf") or (y.split(".")[-1] == "otf"):
            fm.fontManager.addfont(f"{font_path}/{x}/{y}")
            try:
                fm.FontProperties(weight=y.split("-")[-1].split(".")[0].lower(), fname=y.split("-")[0])
            except Exception:
                continue
                
plt.style.use("soc_base.mplstyle")

In [5]:
green = '#69f900'
red = '#cb2217'
blue = '#00285e'
sky_blue = '#6caddf'
bg_color= '#fffcf7'
gold = '#ffc758'
col2 = '#00a0de'

line_color= '#000000'
bbox_pad = .8
bboxprops = {'linewidth': 0, 'pad': bbox_pad}

In [6]:
# path = r'C:\Users\Hp2\Music\joel\An√°lisis de fulbo\WSL24-25' # use the path with your data
path = r'C:\Users\MIRIRAI\Documents\Joel\Analisis-de-fulbo-main\WSL24-25'
all_files = glob.glob(os.path.join(path, "*.csv"))

df = pd.concat((pd.read_csv(f) for f in all_files), ignore_index=True)

fotmob = pd.read_csv('fotmob.csv')
df = pd.merge(left=df, right=fotmob, left_on='teamName', right_on='club', how='left')

In [7]:
df.columns

Index(['level_0', 'index', 'eventId', 'minute', 'second', 'teamId', 'x', 'y',
       'expandedMinute', 'period', 'type', 'outcomeType', 'qualifiers',
       'satisfiedEventsTypes', 'isTouch', 'playerId', 'endX', 'endY',
       'blockedX', 'blockedY', 'goalMouthZ', 'goalMouthY', 'isShot',
       'relatedEventId', 'relatedPlayerId', 'isGoal_x', 'cardType_x',
       'isOwnGoal_x', 'cumulative_mins', 'isGoal_y', 'cardType_y',
       'isOwnGoal_y', 'x1_bin_xT', 'y1_bin_xT', 'x2_bin_xT', 'y2_bin_xT',
       'start_zone_value_xT', 'end_zone_value_xT', 'xT', 'teamName',
       'oppositionTeamName', 'shirtNo', 'name', 'position', 'isFirstEleven',
       'prog_pass', 'prog_carry', 'pass_or_carry_angle', 'shortName',
       'possession_id', 'possession_team', 'club', 'fotmob_id'],
      dtype='object')

In [None]:
def calculate_progressive_passes(df):
    """
    Calculate progressive passes - passes that move the ball significantly closer to the goal.
    Progressive passes are defined as passes that move the ball at least 25% closer to the goal.
    
    Parameters:
    df (DataFrame): Event data with x, y coordinates
    
    Returns:
    DataFrame: Original dataframe with progressive passes indicator
    """
    # Calculate the distance from the goal for the pass origin
    df['beginning'] = np.sqrt(np.square(120 - df['x_n']) + np.square(40 - df['y_n']))
    
    # Calculate the distance from the goal for the pass destination
    df['end'] = np.sqrt(np.square(120 - df['endX_n']) + np.square(40 - df['endY_n']))
    
    # A pass is progressive if it moves the ball at least 25% closer to the goal
    df['progressive'] = df['end'] / df['beginning'] < 0.75
    
    # Convert boolean to int for aggregation
    df['progressive'] = df['progressive'].astype(int)
    
    return df

In [None]:
def analyze_post_recovery_actions(data, min_90s_played=0):
    """
    Analyze post-recovery actions including ball retention and progressive passes.
    
    Parameters:
    data (DataFrame): Event data with x, y coordinates and event types
    min_90s_played (float): Minimum number of 90-minute matches played to include player
    
    Returns:
    DataFrame: Player statistics for post-recovery actions
    """
    # Create shifted columns to link each event with the next event
    data['x_n'] = data['x'].shift(-1)
    data['y_n'] = data['y'].shift(-1)
    data['endX_n'] = data['endX'].shift(-1)
    data['endY_n'] = data['endY'].shift(-1)
    data['type_n'] = data['type'].shift(-1)
    data['outcomeType_n'] = data['outcomeType'].shift(-1)
    
    # Identify ball recoveries followed by various types of actions
    data['PR_pass'] = (data['type'] == 'BallRecovery') & \
                      ((data['type_n'] == 'Pass') | 
                       (data['type_n'] == 'TakeOn') | 
                       (data['type_n'] == 'Dispossessed') | 
                       (data['type_n'] == 'OffsidePass') | 
                       (data['type_n'] == 'Foul')) & \
                      ((data['outcomeType_n'] == 'Successful') | 
                       (data['outcomeType_n'] == 'Unsuccessful'))
    
    # Extract the post-recovery events
    PR_df = data[data['PR_pass'] == True].copy()
    
    # Calculate post-recovery progressive passes
    PR_df = calculate_progressive_passes(PR_df)
    
    # Create indicator columns for different types of post-recovery events
    PR_df['Successful_Pass_PR'] = (PR_df['type_n'] == 'Pass') & (PR_df['outcomeType_n'] == 'Successful')
    PR_df['Unsuccessful_Pass_PR'] = (PR_df['type_n'] == 'Pass') & (PR_df['outcomeType_n'] == 'Unsuccessful')
    PR_df['Dispossessed_PR'] = (PR_df['type_n'] == 'Dispossessed') & (PR_df['outcomeType_n'] == 'Successful')
    PR_df['Successful_TakeOn_PR'] = (PR_df['type_n'] == 'TakeOn') & (PR_df['outcomeType_n'] == 'Successful')
    PR_df['Unsuccessful_TakeOn_PR'] = (PR_df['type_n'] == 'TakeOn') & (PR_df['outcomeType_n'] == 'Unsuccessful')
    PR_df['OffsidePass_PR'] = (PR_df['type_n'] == 'OffsidePass')
    PR_df['Foul_PR'] = (PR_df['type_n'] == 'Foul')
    
    # Create a new column for successful progressive passes only
    PR_df['Successful_Progressive_Pass_PR'] = PR_df['progressive'] & PR_df['Successful_Pass_PR']
    
    # Group by player and calculate totals
    PR_df_grouped = PR_df.groupby(['player', 'team']).agg(
        Successful_Pass_PR=('Successful_Pass_PR', 'sum'),
        Unsuccessful_Pass_PR=('Unsuccessful_Pass_PR', 'sum'),
        Successful_TakeOn_PR=('Successful_TakeOn_PR', 'sum'), 
        Unsuccessful_TakeOn_PR=('Unsuccessful_TakeOn_PR', 'sum'),
        Dispossessed_PR=('Dispossessed_PR', 'sum'),
        OffsidePass_PR=('OffsidePass_PR', 'sum'),
        Foul_PR=('Foul_PR', 'sum'),
        Progressive_Pass_PR=('Successful_Progressive_Pass_PR', 'sum')  # Changed to only count successful progressive passes
    ).reset_index()
    
    # Get total ball recoveries per player
    total_recoveries = data[data['type'] == 'BallRecovery'].groupby(['player']).size().reset_index(name='Total_Recoveries')
    
    # Merge with the grouped dataframe
    PR_df_grouped = PR_df_grouped.merge(total_recoveries, on='player', how='left')
    
    # Calculate ball retention percentage
    PR_df_grouped['Ball_Retention_%'] = (
        PR_df_grouped['Successful_Pass_PR'] + PR_df_grouped['Successful_TakeOn_PR'] + PR_df_grouped['Foul_PR']
    ) / (
        PR_df_grouped['Successful_Pass_PR'] + PR_df_grouped['Unsuccessful_Pass_PR'] +
        PR_df_grouped['Dispossessed_PR'] + PR_df_grouped['OffsidePass_PR'] +
        PR_df_grouped['Unsuccessful_TakeOn_PR'] + PR_df_grouped['Successful_TakeOn_PR'] + PR_df_grouped['Foul_PR'] + 0.0001  # Add small value to prevent division by zero
    ) * 100
    
    # Calculate progressive passes percentage
    PR_df_grouped['%_Prog_Passes'] = (PR_df_grouped['Progressive_Pass_PR'] / (PR_df_grouped['Total_Recoveries'] + 0.0001)) * 100
    
    # Calculate the overall counts of actions by player
    overall_counts = data.groupby(['player', 'type']).size().unstack().reset_index().fillna(0)
    
    # Calculate the counts of successful and unsuccessful actions
    successful_counts = data[data['outcomeType'] == 'Successful'].groupby(['player', 'type']).size().unstack().add_prefix('Successful_').reset_index().fillna(0)
    unsuccessful_counts = data[data['outcomeType'] == 'Unsuccessful'].groupby(['player', 'type']).size().unstack().add_prefix('Unsuccessful_').reset_index().fillna(0)
    
    # Merge all counts into a single DataFrame
    player_stats = overall_counts.merge(successful_counts, on='player', how='left').merge(unsuccessful_counts, on='player', how='left').fillna(0)
    
    # Merge with the post-recovery stats
    merged_df = player_stats.merge(PR_df_grouped, on='player', how='inner')
    
    # Get the columns to display
    columns_to_display = [
        'player', 'team', 'Ball_Retention_%', 'Total_Recoveries',
        'Progressive_Pass_PR', '%_Prog_Passes', 'Successful_Pass_PR', 'Unsuccessful_Pass_PR',
        'Successful_TakeOn_PR', 'Unsuccessful_TakeOn_PR', 'Dispossessed_PR', 
        'OffsidePass_PR', 'Foul_PR'
    ]
    
    if 'BallRecovery' in merged_df.columns:
        columns_to_display.append('BallRecovery')
    
    final_df = merged_df[columns_to_display]
    
    return final_df, PR_df

In [None]:
def plot_post_recovery_passes(player_data, player_name, team_name, games_played, player_stats):
    """
    Plot post-recovery passes on a football pitch.
    
    Parameters:
    player_data (DataFrame): Event data filtered for a specific player
    player_name (str): Name of the player
    team_name (str): Name of the player's team
    games_played (float): Number of 90-minute games played
    player_stats (DataFrame): Player statistics including ball retention and progressive passes percentages
    
    Returns:
    matplotlib.figure.Figure: The resulting figure
    """
    # Get player stats from the stats dataframe
    player_row = player_stats[player_stats['player'] == player_name].iloc[0]
    ball_retention_pct = player_row['Ball_Retention_%']
    prog_passes_pct = player_row['%_Prog_Passes']
    
    # Filter for ball recoveries
    recov_data = player_data[player_data['type'] == 'BallRecovery']
    
    # Filter for successful passes after recovery
    succ_pass_afrecov = recov_data[
        (recov_data['type_n'] == 'Pass') & 
        (recov_data['outcomeType_n'] == 'Successful')
    ]
    
    # Reset index for the successful passes
    succ_pass_afrecov_reset = succ_pass_afrecov.reset_index()
    
    # Calculate progressive passes
    succ_pass_afrecov_reset = calculate_progressive_passes(succ_pass_afrecov_reset)
    succ_pass_afrecov_prog = succ_pass_afrecov_reset[succ_pass_afrecov_reset['progressive'] == 1]
    
    # Filter for unsuccessful passes after recovery
    unsucc_pass_afrecov = recov_data[
        (recov_data['type_n'] == 'Pass') & 
        (recov_data['outcomeType_n'] == 'Unsuccessful')
    ]
    
    # Reset index for the unsuccessful passes
    unsucc_pass_afrecov_reset = unsucc_pass_afrecov.reset_index()
    
    # Calculate progressive unsuccessful passes
    unsucc_pass_afrecov_reset = calculate_progressive_passes(unsucc_pass_afrecov_reset)
    unsucc_pass_afrecov_prog = unsucc_pass_afrecov_reset[unsucc_pass_afrecov_reset['progressive'] == 1]
    
    # Count the passes
    succ_PR = len(succ_pass_afrecov)
    unsucc_PR = len(unsucc_pass_afrecov)
    prog_PR = len(succ_pass_afrecov_prog)  # Only successful progressive passes
    
    # Count fouls
    foul_PR = len(recov_data[recov_data['type_n'] == 'Foul'])
    
    # Calculate per-90 stats
    succ_PR_per90 = succ_PR / games_played if games_played > 0 else 0
    unsucc_PR_per90 = unsucc_PR / games_played if games_played > 0 else 0
    prog_PR_per90 = prog_PR / games_played if games_played > 0 else 0
    
    # Create the plot
    fig, ax = plt.subplots(figsize=(15.5, 10))
    fig.set_facecolor('#242526')
    ax.patch.set_facecolor('#242526')
    
    # Draw the pitch
    pitch = Pitch(pitch_type='statsbomb', pitch_color='#1e1e1e', line_color='#FFFFFF')
    pitch.draw(ax=ax)
    
    # Plot successful passes if they exist
    if not succ_pass_afrecov.empty:
        lc1 = pitch.lines(
            succ_pass_afrecov.x_n, succ_pass_afrecov.y_n,
            succ_pass_afrecov.endX_n, succ_pass_afrecov.endY_n,
            lw=4, transparent=True, comet=True, label='completed passes',
            color='#24a8ff', ax=ax
        )
        pitch.scatter(
            succ_pass_afrecov.endX_n, succ_pass_afrecov.endY_n, 
            s=70, marker='o', edgecolors='none', c='#24a8ff', 
            zorder=3, label='goal', ax=ax, alpha=1
        )
    
    # Plot progressive passes if they exist
    if not succ_pass_afrecov_prog.empty:
        lc2 = pitch.lines(
            succ_pass_afrecov_prog.x_n, succ_pass_afrecov_prog.y_n,
            succ_pass_afrecov_prog.endX_n, succ_pass_afrecov_prog.endY_n,
            lw=4, transparent=True, comet=True, label='progressive passes',
            color='#03fc24', ax=ax
        )
        pitch.scatter(
            succ_pass_afrecov_prog.endX_n, succ_pass_afrecov_prog.endY_n, 
            s=70, marker='o', edgecolors='none', c='#03fc24', 
            zorder=4, label='goal', ax=ax, alpha=1
        )
    
    # Plot unsuccessful passes if they exist
    if not unsucc_pass_afrecov.empty:
        lc3 = pitch.lines(
            unsucc_pass_afrecov.x_n, unsucc_pass_afrecov.y_n,
            unsucc_pass_afrecov.endX_n, unsucc_pass_afrecov.endY_n,
            lw=4, transparent=True, comet=True, label='unsuccessful passes',
            color='#FF5959', ax=ax, alpha=0.5
        )
        pitch.scatter(
            unsucc_pass_afrecov.endX_n, unsucc_pass_afrecov.endY_n, 
            s=70, marker='o', edgecolors='none', c='#FF5959', 
            zorder=1, label='goal', ax=ax, alpha=0.8
        )
    
    # Invert the y-axis
    plt.gca().invert_yaxis()
    
    # Add text annotations
    fig_text(
        0.516, 0.995, f"<{player_name}>", font='Arial Rounded MT Bold', size=30,
        ha="center", color="#FFFFFF", fontweight='bold', highlight_textprops=[{"color": '#FFFFFF'}]
    )
    fig_text(
        0.518, 0.941,
        f"Successful Passes after Ball Recovery | {team_name} | {games_played:.2f} 90s Played",
        font='Arial Rounded MT Bold', size=22,
        ha="center", color="#FFFFFF", fontweight='bold'
    )
    fig_text(
        0.518, 0.892,
        f"{ball_retention_pct:.1f}% of the Balls that {player_name.split()[0] if player_name else 'Player'} Recovers end up in a successful ACTION",
        font='Arial Rounded MT Bold', size=18,
        ha="center", color="#FFFFFF", fontweight='bold'
    )
    
    # Add progressive passes percentage
    #fig_text(
     #   0.518, 0.843,
      #  f"{prog_passes_pct:.1f}% of Recoveries by {player_name.split()[0] if player_name else 'Player'} Result in a Progressive Pass",
       # font='Arial Rounded MT Bold', size=18,
        #ha="center", color="#FFFFFF", fontweight='bold'
    #)
    
    fig_text(
        0.770, 0.105, "Made by: @pranav_m28\nData: Opta\n2024-25", 
        font='Arial Rounded MT Bold', size=18,
        ha="center", color="#FFFFFF", fontweight='bold'
    )
    fig_text(
        0.265, 0.105, 
        f"<Successful Passes = {succ_PR} ({succ_PR_per90:.2f})>\n<Progressive Passes = {prog_PR} ({prog_PR_per90:.2f})>\n<Unsuccessful Passes = {unsucc_PR} ({unsucc_PR_per90:.2f})>",
        font='Arial Rounded MT Bold', size=16,
        ha="center", color="#FFFFFF", fontweight='bold', 
        highlight_textprops=[{"color": '#24a8ff'}, {"color": '#03fc24'}, {"color": "#FF5959"}]
    )
    
    # Set plot style
    plt.style.use("dark_background")
    mpl.rc('axes', edgecolor='#131313', linewidth=1.2)
    for param in ['figure.facecolor', 'axes.facecolor', 'savefig.facecolor']:
        plt.rcParams[param] = '#1e1e1e' 
    for param in ['text.color', 'axes.labelcolor', 'xtick.color', 'ytick.color']:
        plt.rcParams[param] = '0.9'
    
    # Add direction of play indicator
    fig_text(
        0.510, 0.10, "Direction of Play", font='Arial Rounded MT Bold', size=18,
        ha="center", color="#FFFFFF", fontweight='bold'
    )
    plt.arrow(49.2, -3, 20, 0, fc='#FFFFFF', ls='-', lw=1.9, head_length=1, head_width=1)
    
    return fig
