In [2]:
import pandas as pd
import numpy as np

In [7]:
poss = pd.read_csv('data/possessions/full_poss.csv')
poss = poss.drop(columns=['Unnamed: 0'])

  poss = pd.read_csv('data/possessions/full_poss.csv')


In [12]:
box = pd.DataFrame()
for i in range(17, 25):
    box_ = pd.read_csv('data/box/box_{}.csv'.format(i))
    box = pd.concat([box, box_])

In [18]:
pd.set_option('display.max_columns', None)

In [8]:
def calculate_player_possessions(poss):
    """
    Calculate the number of possessions each player participated in for each game.

    Parameters:
    - poss: pandas DataFrame with columns 'game_id', 'possession_id', 
            'off1', 'off2', 'off3', 'off4', 'off5',
            'def1', 'def2', 'def3', 'def4', 'def5', 'points_scored'

    Returns:
    - player_possessions: pandas DataFrame with columns 'game_id', 'playerId', 
                          'possessions_participated'
    """
    # Define offensive and defensive player columns
    off_cols = [f'off{i}' for i in range(1, 6)]
    def_cols = [f'def{i}' for i in range(1, 6)]
    
    # Select only the relevant columns
    player_columns = ['game_id', 'possession_id'] + off_cols + def_cols
    players_df = poss[player_columns].copy()
    
    # Melt the offensive players into long format
    off_melted = players_df.melt(
        id_vars=['game_id', 'possession_id'],
        value_vars=off_cols,
        var_name='position',
        value_name='playerId'
    )
    off_melted['position'] = 'offense'
    
    # Melt the defensive players into long format
    def_melted = players_df.melt(
        id_vars=['game_id', 'possession_id'],
        value_vars=def_cols,
        var_name='position',
        value_name='playerId'
    )
    def_melted['position'] = 'defense'
    
    # Combine offensive and defensive players
    all_players = pd.concat([off_melted, def_melted], ignore_index=True)
    
    # Optional: Remove any potential NaN values (if any)
    all_players = all_players.dropna(subset=['playerId'])
    
    # Now, group by 'game_id' and 'playerId' to count unique possessions
    player_possessions = all_players.groupby(['game_id', 'playerId'])['possession_id'].nunique().reset_index()
    
    # Rename the 'possession_id' column to 'possessions_participated'
    player_possessions.rename(columns={'possession_id': 'possessions_participated'}, inplace=True)
    
    # Optional: Sort the results for better readability
    player_possessions = player_possessions.sort_values(by=['game_id', 'possessions_participated'], ascending=[True, False]).reset_index(drop=True)
    
    return player_possessions



In [9]:
player_possessions_df = calculate_player_possessions(poss)

In [10]:
player_possessions_df

Unnamed: 0,game_id,playerId,possessions_participated
0,400927392,651,165
1,400927392,882,162
2,400927392,918,157
3,400927392,1006,156
4,400927392,2529185,148
...,...,...,...
34012,401726992,3906753,77
34013,401726992,4398938,57
34014,401726992,4257500,24
34015,401726992,981,13


In [16]:

box = box.merge(player_possessions_df, left_on=['game_id','athlete_id'], right_on=['game_id','playerId'])

In [20]:
box.columns

Index(['game_id', 'season', 'season_type', 'game_date', 'game_date_time',
       'athlete_id', 'athlete_display_name', 'team_id', 'team_name',
       'team_location', 'team_short_display_name', 'minutes',
       'field_goals_made', 'field_goals_attempted',
       'three_point_field_goals_made', 'three_point_field_goals_attempted',
       'free_throws_made', 'free_throws_attempted', 'offensive_rebounds',
       'defensive_rebounds', 'rebounds', 'assists', 'steals', 'blocks',
       'turnovers', 'fouls', 'plus_minus', 'points', 'starter', 'ejected',
       'did_not_play', 'reason', 'active', 'athlete_jersey',
       'athlete_short_name', 'athlete_headshot_href', 'athlete_position_name',
       'athlete_position_abbreviation', 'team_display_name', 'team_uid',
       'team_slug', 'team_logo', 'team_abbreviation', 'team_color',
       'team_alternate_color', 'home_away', 'team_winner', 'team_score',
       'opponent_team_id', 'opponent_team_name', 'opponent_team_location',
       'opponent_

In [21]:
stats = ['field_goals_made', 'field_goals_attempted',
       'three_point_field_goals_made', 'three_point_field_goals_attempted',
       'free_throws_made', 'free_throws_attempted', 'offensive_rebounds',
       'defensive_rebounds', 'rebounds', 'assists', 'steals', 'blocks',
       'turnovers', 'fouls']

for stat in stats:
    box[f"{stat}_rate"] = box[stat] / box['possessions_participated'] * 100

In [22]:
box

Unnamed: 0,game_id,season,season_type,game_date,game_date_time,athlete_id,athlete_display_name,team_id,team_name,team_location,team_short_display_name,minutes,field_goals_made,field_goals_attempted,three_point_field_goals_made,three_point_field_goals_attempted,free_throws_made,free_throws_attempted,offensive_rebounds,defensive_rebounds,rebounds,assists,steals,blocks,turnovers,fouls,plus_minus,points,starter,ejected,did_not_play,reason,active,athlete_jersey,athlete_short_name,athlete_headshot_href,athlete_position_name,athlete_position_abbreviation,team_display_name,team_uid,team_slug,team_logo,team_abbreviation,team_color,team_alternate_color,home_away,team_winner,team_score,opponent_team_id,opponent_team_name,opponent_team_location,opponent_team_display_name,opponent_team_abbreviation,opponent_team_logo,opponent_team_color,opponent_team_alternate_color,opponent_team_score,playerId,possessions_participated,field_goals_made_rate,field_goals_attempted_rate,three_point_field_goals_made_rate,three_point_field_goals_attempted_rate,free_throws_made_rate,free_throws_attempted_rate,offensive_rebounds_rate,defensive_rebounds_rate,rebounds_rate,assists_rate,steals_rate,blocks_rate,turnovers_rate,fouls_rate
0,400981432,2017,3,2017-10-04,2017-10-04 20:00:00,812.0,Candace Parker,6,Sparks,Los Angeles,Sparks,36.0,8.0,17.0,1.0,6.0,2.0,3.0,2.0,13.0,15.0,5.0,2.0,4.0,5.0,3.0,-9,19.0,True,False,False,COACH'S DECISION,True,3.0,C. Parker,https://a.espncdn.com/i/headshots/wnba/players...,Forward,F,Los Angeles Sparks,s:40~l:59~t:6,los-angeles-sparks,https://a.espncdn.com/i/teamlogos/wnba/500/la.png,LA,532481,49ac9c,away,False,76,8,Lynx,Minnesota,Minnesota Lynx,MIN,https://a.espncdn.com/i/teamlogos/wnba/500/min...,005084,47a757,85,812,219,3.652968,7.762557,0.456621,2.739726,0.913242,1.369863,0.913242,5.936073,6.849315,2.283105,0.913242,1.826484,2.283105,1.369863
1,400981432,2017,3,2017-10-04,2017-10-04 20:00:00,1068.0,Nneka Ogwumike,6,Sparks,Los Angeles,Sparks,26.0,4.0,6.0,0.0,1.0,3.0,4.0,2.0,1.0,3.0,0.0,3.0,0.0,1.0,6.0,-14,11.0,True,False,False,COACH'S DECISION,False,30.0,N. Ogwumike,https://a.espncdn.com/i/headshots/wnba/players...,Forward,F,Los Angeles Sparks,s:40~l:59~t:6,los-angeles-sparks,https://a.espncdn.com/i/teamlogos/wnba/500/la.png,LA,532481,49ac9c,away,False,76,8,Lynx,Minnesota,Minnesota Lynx,MIN,https://a.espncdn.com/i/teamlogos/wnba/500/min...,005084,47a757,85,1068,138,2.898551,4.347826,0.000000,0.724638,2.173913,2.898551,1.449275,0.724638,2.173913,0.000000,2.173913,0.000000,0.724638,4.347826
2,400981432,2017,3,2017-10-04,2017-10-04 20:00:00,581.0,Alana Beard,6,Sparks,Los Angeles,Sparks,32.0,1.0,4.0,0.0,0.0,0.0,0.0,1.0,2.0,3.0,1.0,3.0,0.0,2.0,5.0,-11,2.0,True,False,False,COACH'S DECISION,False,0.0,A. Beard,,Guard,G,Los Angeles Sparks,s:40~l:59~t:6,los-angeles-sparks,https://a.espncdn.com/i/teamlogos/wnba/500/la.png,LA,532481,49ac9c,away,False,76,8,Lynx,Minnesota,Minnesota Lynx,MIN,https://a.espncdn.com/i/teamlogos/wnba/500/min...,005084,47a757,85,581,189,0.529101,2.116402,0.000000,0.000000,0.000000,0.000000,0.529101,1.058201,1.587302,0.529101,1.587302,0.000000,1.058201,2.645503
3,400981432,2017,3,2017-10-04,2017-10-04 20:00:00,2529047.0,Odyssey Sims,6,Sparks,Los Angeles,Sparks,33.0,3.0,9.0,0.0,3.0,8.0,9.0,1.0,2.0,3.0,3.0,2.0,0.0,3.0,4.0,-22,14.0,True,False,False,COACH'S DECISION,True,1.0,O. Sims,https://a.espncdn.com/i/headshots/wnba/players...,Guard,G,Los Angeles Sparks,s:40~l:59~t:6,los-angeles-sparks,https://a.espncdn.com/i/teamlogos/wnba/500/la.png,LA,532481,49ac9c,away,False,76,8,Lynx,Minnesota,Minnesota Lynx,MIN,https://a.espncdn.com/i/teamlogos/wnba/500/min...,005084,47a757,85,2529047,133,2.255639,6.766917,0.000000,2.255639,6.015038,6.766917,0.751880,1.503759,2.255639,2.255639,1.503759,0.000000,2.255639,3.007519
4,400981432,2017,3,2017-10-04,2017-10-04 20:00:00,2529122.0,Chelsea Gray,6,Sparks,Los Angeles,Sparks,36.0,7.0,15.0,0.0,3.0,1.0,1.0,0.0,1.0,1.0,8.0,3.0,0.0,3.0,1.0,-1,15.0,True,False,False,COACH'S DECISION,True,12.0,C. Gray,https://a.espncdn.com/i/headshots/wnba/players...,Guard,G,Los Angeles Sparks,s:40~l:59~t:6,los-angeles-sparks,https://a.espncdn.com/i/teamlogos/wnba/500/la.png,LA,532481,49ac9c,away,False,76,8,Lynx,Minnesota,Minnesota Lynx,MIN,https://a.espncdn.com/i/teamlogos/wnba/500/min...,005084,47a757,85,2529122,210,3.333333,7.142857,0.000000,1.428571,0.476190,0.476190,0.000000,0.476190,0.476190,3.809524,1.428571,0.000000,1.428571,0.476190
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34011,401620178,2024,2,2024-05-14,2024-05-14 19:00:00,3142055.0,Myisha Hines-Allen,16,Mystics,Washington,Mystics,19.0,2.0,6.0,1.0,3.0,0.0,0.0,2.0,3.0,5.0,2.0,1.0,0.0,2.0,4.0,-14.0,5.0,False,False,False,COACH'S DECISION,False,22.0,M. Hines-Allen,https://a.espncdn.com/i/headshots/wnba/players...,Forward,F,Washington Mystics,s:40~l:59~t:16,washington-mystics,https://a.espncdn.com/i/teamlogos/wnba/500/wsh...,WSH,e03a3e,002b5c,home,False,80,9,Liberty,New York,New York Liberty,NY,https://a.espncdn.com/i/teamlogos/wnba/500/ny.png,86cebc,000000,85,3142055,109,1.834862,5.504587,0.917431,2.752294,0.000000,0.000000,1.834862,2.752294,4.587156,1.834862,0.917431,0.000000,1.834862,3.669725
34012,401620178,2024,2,2024-05-14,2024-05-14 19:00:00,4433408.0,Aaliyah Edwards,16,Mystics,Washington,Mystics,14.0,2.0,4.0,0.0,0.0,2.0,2.0,1.0,1.0,2.0,0.0,0.0,1.0,0.0,2.0,-2.0,6.0,False,False,False,COACH'S DECISION,True,24.0,A. Edwards,https://a.espncdn.com/i/headshots/wnba/players...,Forward,F,Washington Mystics,s:40~l:59~t:16,washington-mystics,https://a.espncdn.com/i/teamlogos/wnba/500/wsh...,WSH,e03a3e,002b5c,home,False,80,9,Liberty,New York,New York Liberty,NY,https://a.espncdn.com/i/teamlogos/wnba/500/ny.png,86cebc,000000,85,4433408,83,2.409639,4.819277,0.000000,0.000000,2.409639,2.409639,1.204819,1.204819,2.409639,0.000000,0.000000,1.204819,0.000000,2.409639
34013,401620178,2024,2,2024-05-14,2024-05-14 19:00:00,2566110.0,Julie Vanloo,16,Mystics,Washington,Mystics,13.0,1.0,4.0,1.0,4.0,0.0,0.0,1.0,0.0,1.0,4.0,1.0,0.0,2.0,2.0,-9.0,3.0,False,False,False,COACH'S DECISION,False,35.0,J. Vanloo,https://a.espncdn.com/i/headshots/wnba/players...,Guard,G,Washington Mystics,s:40~l:59~t:16,washington-mystics,https://a.espncdn.com/i/teamlogos/wnba/500/wsh...,WSH,e03a3e,002b5c,home,False,80,9,Liberty,New York,New York Liberty,NY,https://a.espncdn.com/i/teamlogos/wnba/500/ny.png,86cebc,000000,85,2566110,73,1.369863,5.479452,1.369863,5.479452,0.000000,0.000000,1.369863,0.000000,1.369863,5.479452,1.369863,0.000000,2.739726,2.739726
34014,401620178,2024,2,2024-05-14,2024-05-14 19:00:00,3058893.0,Shatori Walker-Kimbrough,16,Mystics,Washington,Mystics,14.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,4.0,-11.0,0.0,False,False,False,COACH'S DECISION,False,32.0,S. Walker-Kimbrough,https://a.espncdn.com/i/headshots/wnba/players...,Guard,G,Washington Mystics,s:40~l:59~t:16,washington-mystics,https://a.espncdn.com/i/teamlogos/wnba/500/wsh...,WSH,e03a3e,002b5c,home,False,80,9,Liberty,New York,New York Liberty,NY,https://a.espncdn.com/i/teamlogos/wnba/500/ny.png,86cebc,000000,85,3058893,85,0.000000,3.529412,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.176471,0.000000,0.000000,1.176471,4.705882
