Euroleague/Eurocup player driven game simulator

In [1]:
import pandas as pd
import math
import numpy as np
import euroleague_api
from euroleague_api.shot_data import ShotData 
from euroleague_api.EuroLeagueData import EuroLeagueData
from euroleague_api.boxscore_data import BoxScoreData
from euroleague_api.play_by_play_data import PlayByPlay 
import random as rnd
from collections import Counter
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)

import warnings
warnings.filterwarnings('ignore')

def get_euroleague_data(start_season: int, end_season: int):

   playbyplay = PlayByPlay(competition='E')
   playbyplay_data = playbyplay.get_game_play_by_play_data_multiple_seasons(start_season, end_season)
   
   shotdata = ShotData(competition='E')
   shot_data = shotdata.get_game_shot_data_multiple_seasons(start_season, end_season)
   
   boxdata = BoxScoreData(competition='E')
   boxscore_data = boxdata.get_player_boxscore_stats_multiple_seasons(start_season, end_season)
   
   return playbyplay_data, shot_data, boxscore_data

playbyplay, shotdata, boxscore = get_euroleague_data(2024,2024)

  from .autonotebook import tqdm as notebook_tqdm

Season 2024: 100%|███████████████████████████████████████████████████████████████████| 170/170 [00:58<00:00,  2.92it/s]


[A
Season 2024: 100%|███████████████████████████████████████████████████████████████████| 170/170 [01:27<00:00,  1.95it/s]


[A
[A
Season 2024: 100%|███████████████████████████████████████████████████████████████████| 170/170 [01:03<00:00,  2.67it/s]

eason loop: 100%|███████████████████████████████████████████████████████████████████████| 1/1 [01:04<00:00, 64.42s/it]

In [29]:
playbyplay.head()

Unnamed: 0,Season,Phase,Round,Gamecode,TYPE,NUMBEROFPLAY,CODETEAM,PLAYER_ID,PLAYTYPE,PLAYER,TEAM,DORSAL,MINUTE,MARKERTIME,POINTS_A,POINTS_B,COMMENT,PLAYINFO,PERIOD
0,2024,RS,1,1,0,49,,,BP,,,,1,,,,,Begin Period,1
1,2024,RS,1,1,0,50,PAN,,JB,,Panathinaikos AKTOR Athens,,1,09:59,,,,,1
2,2024,RS,1,1,0,51,PAN,P002328,2FGA,"GRIGONIS, MARIUS",Panathinaikos AKTOR Athens,40.0,1,09:49,,,,Missed Two Pointer (0/1 - 0 pt),1
3,2024,RS,1,1,0,52,BER,P006919,D,"OLINDE, LOUIS",ALBA Berlin,19.0,1,09:46,,,,Def Rebound (1),1
4,2024,RS,1,1,0,53,BER,P010791,2FGA,"KOUMADJE, KHALIFA",ALBA Berlin,21.0,1,09:30,,,,Missed Two Pointer (0/1 - 0 pt),1


In [2]:
def clean_playbyplay_data(playbyplay, boxdata, shotdata):
    
    """
    Cleans and amplifies euroleague_api play-by-play data by:
    - Merging shot data and relevant boxscore data with play-by-play
    - Identifying players on the court at each moment of each game
    - Finds and addresses inconsistencies
    - Identifies and fixes potential errors
    - Breaks down start and finish of each possession
    - Eliminates garbage time possessions
    
    Args:
        playbyplay_data: Raw play-by-play data
        boxscore_data: Raw boxscore data
        shot_data: Raw shot location data
        
    Returns:
        pd.DataFrame: Clean and consistent play-by-play dataset
        
    """

    # Get initial players on the court via boxscore and merge with Play by Play
    startershome = boxdata[(boxdata['IsStarter'] == 1) & (boxdata['Home'] == 1)]
    startersaway = boxdata[(boxdata['IsStarter'] == 1) & (boxdata['Home'] == 0)]
    
    df1 = startershome.groupby(["Season", "Gamecode"]).agg({"Player_ID": lambda x: [str(item).strip() for item in x.unique().tolist()]}).reset_index()
    df1['HomePlayersOnCourt'] = df1['Player_ID']
    df1 = df1[['Season','Gamecode','HomePlayersOnCourt']]
    
    df2 = startersaway.groupby(["Season", "Gamecode"]).agg({"Player_ID": lambda x: [str(item).strip() for item in x.unique().tolist()]}).reset_index()
    df2['AwayPlayersOnCourt'] = df2['Player_ID']
    df2 = df2[['Season','Gamecode','AwayPlayersOnCourt']]
    
    home = boxdata[boxdata['Home'] == 1]
    home.loc[:, 'HomeTeam'] = home['Team']
    home = home[['Season','Gamecode','HomeTeam']].drop_duplicates()
    away = boxdata[boxdata['Home'] == 0]
    away.loc[:, 'AwayTeam'] = away['Team']
    away = away[['Season','Gamecode','AwayTeam']].drop_duplicates()
    
    teams = pd.merge(home,away,left_on=['Season','Gamecode'],right_on=['Season','Gamecode'])
    starters = pd.merge(df1,df2, left_on=['Season','Gamecode'],right_on=['Season','Gamecode'])
    starters['PERIOD'] = 1
    starters['PLAYINFO'] = 'Begin Period'
    
    playbyplay.drop_duplicates(inplace=True)
    
    playbyplay2 = pd.merge(playbyplay,starters, left_on=['Season','Gamecode','PLAYINFO','PERIOD'],right_on=['Season','Gamecode','PLAYINFO','PERIOD'], how='left')
    
    
    # Merge shot data with new play by play (including starters)
    alldata = pd.merge(playbyplay2, shotdata, left_on=['Season','Gamecode','NUMBEROFPLAY'], right_on=['Season','Gamecode','NUM_ANOT'], how='left')
    
    alldata = pd.merge(alldata,teams,left_on=['Season','Gamecode'],right_on=['Season','Gamecode'])
    
    
    # Preproccessing and Cleaning of Play by Play
    alldata['PLAYTYPE'] = alldata['PLAYTYPE'].str.strip()
    
    alldata['PLAYTYPE'] = np.where(alldata['PLAYTYPE'] == '2FGAB','2FGA',alldata['PLAYTYPE'])
    alldata['PLAYTYPE'] = np.where(alldata['PLAYTYPE'] == '3FGAB','3FGA',alldata['PLAYTYPE'])
    
    
    
    # Add game minute and seconds elapsed columns
    def fill_marker_time(row):
        if pd.isna(row['MARKERTIME']) or row['MARKERTIME'] == '':
            if row['PLAYTYPE'] == 'BP':
                return '10:00'
            elif row['PLAYTYPE'] in ['EP', 'EG']:
                return '00:00'
        return row['MARKERTIME']
    
    def calculate_marker_time(row):
        if row['MARKERTIME'] == 'ND':
            minute_x = row['MINUTE_x']
            if row['PERIOD'] == 1:
                return f"{10 - minute_x}:00"
            elif row['PERIOD'] == 2:
                return f"{20 - minute_x}:00"
            elif row['PERIOD'] == 3:
                return f"{30 - minute_x}:00"
            elif row['PERIOD'] == 4:
                return f"{40 - minute_x}:00"
        return row['MARKERTIME']
    
    alldata['MARKERTIME'] = alldata.apply(calculate_marker_time, axis=1)
    
    alldata['MARKERTIME'] = alldata.apply(fill_marker_time, axis=1)
    
    def time_to_seconds_elapsed(time_str):
        minutes, seconds = map(int, time_str.split(':'))
        total_seconds = minutes * 60 + seconds
        elapsed_seconds = 600 - total_seconds
        return elapsed_seconds
    
    alldata['PeriodSecondsElapsed'] = alldata['MARKERTIME'].apply(time_to_seconds_elapsed)
    
    alldata['NumberOfPlay'] = alldata.groupby(['Season', 'Gamecode']).cumcount() + 1
    
    
    # Iterate through all games and use subsitution rows from play by play to find players on the court for each event
    home_starters = []
    current_gamecode = None
    for index, row in alldata.iterrows():
        if current_gamecode != row['Gamecode']:
            # Initialize home_starters with the current HomePlayersOnCourt
            home_starters = row['HomePlayersOnCourt'] if isinstance(row['HomePlayersOnCourt'], list) else []
            current_gamecode = row['Gamecode']
        elif row['PLAYINFO'] == 'In' and row['CODETEAM'] == row['HomeTeam']:
            home_starters.append(row['PLAYER_ID'])
        elif row['PLAYINFO'] == 'Out' and row['CODETEAM'] == row['HomeTeam']:
            if row['PLAYER_ID'] in home_starters:
                home_starters.remove(row['PLAYER_ID'])
        alldata.at[index, 'HomePlayersOnCourt'] = home_starters.copy() if home_starters else None
    away_starters = []
    current_gamecode = None
    for index, row in alldata.iterrows():
        if current_gamecode != row['Gamecode']:
            # Initialize away_starters with the current AwayPlayersOnCourt
            away_starters = row['AwayPlayersOnCourt'] if isinstance(row['AwayPlayersOnCourt'], list) else []
            current_gamecode = row['Gamecode']
        elif row['PLAYINFO'] == 'In' and row['CODETEAM'] == row['AwayTeam']:
            away_starters.append(row['PLAYER_ID'])
        elif row['PLAYINFO'] == 'Out' and row['CODETEAM'] == row['AwayTeam']:
            if row['PLAYER_ID'] in away_starters:
                away_starters.remove(row['PLAYER_ID'])
        alldata.at[index, 'AwayPlayersOnCourt'] = away_starters.copy() if away_starters else None
    
    
    
    # Remove non wanted events (technical fouls, substitutions, challanges, timeouts, tipoffs, etc), no OT
    alldata = alldata[~alldata['PLAYTYPE'].isin(['TPOFF','IN','OUT','TOUT','F','RV','BF',
                                                             'AG','TOUT_TV','CCH'])].sort_values(by=['Season','Gamecode',
                                                                                                   'NumberOfPlay'], ascending=[True,True,True])
    alldata = alldata[(alldata['PERIOD']).isin([1,2,3,4])]
    
    alldata = alldata[~(alldata['PLAYTYPE']).isin(['TPOFF','JB'])]
    print("2")
    
    # Fix errors in play by play
    alldata['CODETEAM'] = np.where(
        (alldata['PLAYTYPE'] == 'BP') & (~alldata['PLAYTYPE'].shift(-1).isin(['CM','FV','C','CMT','CMU'])),
        alldata['CODETEAM'].shift(-1),
        np.where(
            (alldata['PLAYTYPE'] == 'BP') & (~alldata['PLAYTYPE'].shift(-2).isin(['CM','FV','C','CMT','CMU'])),
            alldata['CODETEAM'].shift(-2),
            np.where(
                (alldata['PLAYTYPE'] == 'BP') & (~alldata['PLAYTYPE'].shift(-3).isin(['CM','FV','C','CMT','CMU'])),
                alldata['CODETEAM'].shift(-3),
                alldata['CODETEAM'] ) ))
    
    alldata['PLAYTYPE'] = np.where(
        (alldata['PLAYTYPE'] == 'O') & 
        (alldata['PLAYTYPE'].shift(1).isin(['2FGA', '3FGA', 'FTA'])) & 
        (alldata['CODETEAM'].shift(1) != alldata['CODETEAM']),
        'D',
        np.where(
            (alldata['PLAYTYPE'] == 'D') & 
            (alldata['PLAYTYPE'].shift(1).isin(['2FGA', '3FGA', 'FTA'])) & 
            (alldata['CODETEAM'].shift(1) == alldata['CODETEAM']),
            'O',
            np.where(
                (alldata['PLAYTYPE'] == 'O') & 
                (alldata['PLAYTYPE'].shift(2).isin(['2FGA', '3FGA', 'FTA'])) & 
                (alldata['PLAYTYPE'].shift(1) == 'FV') & 
                (alldata['CODETEAM'].shift(1) == alldata['CODETEAM']),
                'D',
                alldata['PLAYTYPE'] )   ))
    
    alldata['Next_PLAYTYPE_1'] = alldata['PLAYTYPE'].shift(-1)
    alldata['Next_PLAYTYPE_2'] = alldata['PLAYTYPE'].shift(-2)
    alldata['Next_PLAYTYPE_3'] = alldata['PLAYTYPE'].shift(-3)
    alldata['Next_PLAYTYPE_4'] = alldata['PLAYTYPE'].shift(-4)
    
    alldata['Next_Teamcode_1'] = alldata['CODETEAM'].shift(-1)
    alldata['Next_Teamcode_2'] = alldata['CODETEAM'].shift(-2)
    alldata['Next_Teamcode_3'] = alldata['CODETEAM'].shift(-3)
    alldata['Next_Teamcode_4'] = alldata['CODETEAM'].shift(-4)
    
    
    alldata['LastPossessionOfQuarter'] = 0
    alldata['LastPossessionOfQuarter'] = (alldata['Next_PLAYTYPE_1'].isin(['EP','EG'])).astype(int)
    
    alldata['MissedFTDreb'] = 0
    alldata['MissedFTDreb'] = ((alldata['PLAYTYPE'] == 'FTA') & 
                        (alldata['Next_PLAYTYPE_1'] == 'D')).astype(int)
    
    alldata['MissedFTOreb'] = 0
    alldata['MissedFTOreb'] = ((alldata['PLAYTYPE'] == 'FTA') & 
                        (alldata['Next_PLAYTYPE_1'] == 'O')).astype(int)
    
    alldata['techFT'] = np.where(
        ((alldata['PLAYTYPE'].isin(['FTA', 'FTM'])) & (alldata['PLAYTYPE'].shift(1).isin(['C', 'CMT','CMTI','B']))),1,0)
    
    alldata['UnsportsFT'] = np.where(((alldata['PLAYTYPE']).isin(['FTA','FTM'])) & 
                                    ((alldata['PLAYTYPE'].shift(1).isin(['CMU','CMD'])) | 
                                     (alldata['PLAYTYPE'].shift(2).isin(['CMU', 'CMD','CMT'])) |
                                     (alldata['PLAYTYPE'].shift(3).isin(['CMU', 'CMD',]))),1,0)
    
    alldata['And1'] = (
        ((alldata['PLAYTYPE'].isin(['2FGM', '3FGM'])) & 
                        (alldata['Next_PLAYTYPE_1'] == 'CM') & ((alldata['Next_PLAYTYPE_2']).isin(['FTA','FTM'])) &
                        (alldata['CODETEAM'] == alldata['Next_Teamcode_2']))|
        ((alldata['PLAYTYPE'].isin(['2FGM', '3FGM'])) & 
                        (alldata['Next_PLAYTYPE_1'] == 'AS') & ((alldata['Next_PLAYTYPE_2']).isin(['CM'])) &
                        (alldata['CODETEAM'] != alldata['Next_Teamcode_2']) &
                        (alldata['PLAYER_ID'] == alldata['PLAYER_ID'].shift(-3))) |
        ((alldata['PLAYTYPE'].isin(['2FGM', '3FGM'])) & 
                        (alldata['Next_PLAYTYPE_1'] == 'CM') & ((alldata['Next_PLAYTYPE_2']).isin(['AS'])) &
                        (alldata['CODETEAM'] == alldata['Next_Teamcode_3']) &
                        (alldata['Next_PLAYTYPE_3'].isin(['FTA','FTM'])))).astype(int)
    
    alldata['AssistedBasket'] = np.where(((alldata['PLAYTYPE']).isin(['2FGM','3FGM','FTM'])) & (alldata['Next_PLAYTYPE_1'] == 'AS'),1,0)
    
    alldata['UnsportsBasket'] = np.where(((alldata['PLAYTYPE']).isin(['2FGM','3FGM','FTM'])) & (alldata['Next_PLAYTYPE_1'] == 'CMU')
                                         & (alldata['PeriodSecondsElapsed'] == alldata['PeriodSecondsElapsed'].shift(-1))
                                         & (alldata['CODETEAM'] != alldata['Next_Teamcode_1']),1,0)
    
    alldata['OFFnotTO'] = np.where(((alldata['PLAYTYPE']).isin(['OF'])) & (alldata['Next_PLAYTYPE_1'] != 'TO'),1,0)
    
    alldata['TOSteal'] = np.where(((alldata['PLAYTYPE']).isin(['TO'])) & (alldata['Next_PLAYTYPE_1'] == 'ST'),1,0)
    
    alldata['StealTO'] = np.where(((alldata['PLAYTYPE']).isin(['ST'])) & (alldata['Next_PLAYTYPE_1'] == 'TO')&
                                  (alldata['CODETEAM'] != alldata['Next_Teamcode_1']),1,0)
    
    alldata['POTENTIALERROR'] = np.where(
        (  ( (alldata['PLAYTYPE'].isin(['2FGA', '3FGA']) & (~alldata['PLAYTYPE'].shift(-1).isin(['O', 'D', 'EP', 'EG', 'BP', 'FV','CM']))) |
                (alldata['PLAYTYPE'].isin(['AS']) & (alldata['PLAYTYPE'].shift(1).isin(['2FGA', '3FGA']))) |
                (alldata['PLAYTYPE'].isin(['2FGM', '3FGM']) & (alldata['PLAYTYPE'].shift(-1).isin(['O', 'D'])))  ) & ~(
                (alldata['PLAYTYPE'].isin(['2FGA', '3FGA']) & 
                 (alldata['PLAYTYPE'].shift(-1) == 'TO') & 
                 (alldata['CODETEAM'] == alldata['CODETEAM'].shift(-1)))  ) ),  1,  0)
    
    alldata['POTENTIALERROR2'] = np.where((alldata['PLAYTYPE'] == alldata['PLAYTYPE'].shift(-1)) & (alldata['PLAYER_ID'] == alldata['PLAYER_ID'].shift(-1))
                                          & (alldata['PeriodSecondsElapsed'] == alldata['PeriodSecondsElapsed'].shift(-1)) 
                                          &(~alldata['PLAYTYPE'].isin(['FTA','FTM'])), 1,0)
    
    alldata['POTENTIALERROR3'] = np.where(
        (alldata['PLAYTYPE'].str.contains('FGA')) &  
        (alldata['PLAYTYPE'].shift(-1).str.contains('FGA')) &  
        (alldata['CODETEAM'] == alldata['CODETEAM'].shift(-1)) &  
        (alldata['PLAYTYPE'].shift(-2) == 'O') &  
        (alldata['CODETEAM'] == alldata['CODETEAM'].shift(-2)),1,0)
    
    alldata['Temp_FoulOffBall'] = np.where(
        (alldata['PLAYTYPE'].isin(['AS', '2FGM', '3FGM'])) & 
        (alldata['Next_PLAYTYPE_1'].isin(['CM'])) & (alldata['Next_Teamcode_1'] != alldata['CODETEAM']) &
        (~alldata['Next_PLAYTYPE_2'].isin(['CM', 'FV', 'CMT', 'FTA', 'FTM'])) & 
        (alldata['Next_Teamcode_2'] == alldata['CODETEAM']),  1,  0)
    
    alldata['FoulOffBall'] = np.where(
        (alldata['PLAYTYPE'] == 'CM') & 
        (alldata['Temp_FoulOffBall'].shift(1) == 1) & ((alldata['PLAYTYPE']).shift(-1) == 'AS'),  1,  alldata['Temp_FoulOffBall'])
    
    alldata.drop(columns=['Temp_FoulOffBall'], inplace=True)
    
    
    
    # Account for FT Events
    ft_data = alldata[((alldata['PLAYTYPE'] == 'FTA') | (alldata['PLAYTYPE'] == 'FTM')) & ((alldata['techFT'] == 0) & (alldata['UnsportsFT'] == 0))]
    
    ft_data = ft_data.groupby(by=['Season','CODETEAM','Gamecode',
                                 'PERIOD','MARKERTIME',]).agg({'NumberOfPlay':'max',}).reset_index()
    ft_data['Last_FT1'] = 1
    
    alldata = pd.merge(alldata,ft_data, left_on=['Season','CODETEAM','Gamecode','PERIOD','MARKERTIME','NumberOfPlay'],
            right_on=['Season','CODETEAM','Gamecode','PERIOD','MARKERTIME','NumberOfPlay'],how='left')
    
    alldata['Last_FT'] = np.where((alldata['Last_FT1'] == 1) & (alldata['Last_FT1'].shift(-1) == 1),0,alldata['Last_FT1'])
    
    alldata['FirstFTAst'] = np.where(((alldata['PLAYTYPE']).isin(['AS'])) & ((alldata['PLAYTYPE']).shift(1) == 'FTM')
                                & ((alldata['Last_FT']).shift(1) != 1),1,0)
    
    alldata['LastFTMissedAst'] = np.where(((alldata['PLAYTYPE']).isin(['AS'])) & ((alldata['PLAYTYPE']).shift(1) == 'FTA')
                                          & ((alldata['PLAYTYPE']).shift(2) == 'FTM') & (((alldata['PLAYTYPE']).shift(-1)).isin(['O','D']))
                                & ((alldata['Last_FT']).shift(1) == 1),1,0)
    
    alldata['And1Assisted'] = np.where(((alldata['PLAYTYPE'] == 'AS') & ((alldata['And1']).shift(1) == 1)) |
                                       ((alldata['PLAYTYPE'] == 'AS') & ((alldata['And1']).shift(2) == 1)
                                       &(alldata['PLAYTYPE'].shift(1) == 'CM')),1,0)
    
    alldata['Last_FTError'] = np.where((alldata['Last_FT'] == 1) & (alldata['PLAYTYPE'] == 'FTA') & (~alldata['PLAYTYPE'].shift(-1).isin(['O', 'D'])),1,0)
    
    print("3")
    
    # Number possessions for home and away teams
    home_possessions = 0
    away_possessions = 0
    current_possession_team = None
    current_game_id = None
    
    for index, row in alldata.iterrows():
        game_id = (row['Season'], row['Gamecode'])  
        if game_id != current_game_id:
            current_game_id = game_id
            home_possessions = 0
            away_possessions = 0
            current_possession_team = None
        if row['PLAYTYPE'] == 'BP' and row['PERIOD'] == 1: 
            if row['CODETEAM'] == row['HomeTeam']:
                current_possession_team = 'HomeTeam'
                home_possessions = 1
            else:
                current_possession_team = 'AwayTeam'
                away_possessions = 1      
        if row['LastPossessionOfQuarter'] == 1:
            pass
        elif row['POTENTIALERROR2'] == 1:
            pass
        elif row['POTENTIALERROR3'] == 1:
            pass
        elif row['FoulOffBall'] == 1:
            pass
        elif row['StealTO'] == 1:
            pass
        elif row['UnsportsBasket'] == 1:
            pass
        elif row['LastFTMissedAst'] == 1:
            pass
        elif ((row['PLAYTYPE'] in ['2FGM','3FGM','FTM']) & (row['AssistedBasket'] == 1)):
            pass
        elif ((row['PLAYTYPE'] in ['2FGM', '3FGM']) & (row['And1'] == 0) & (row['AssistedBasket'] == 0)):
            if current_possession_team == 'HomeTeam':
                away_possessions += 1
                current_possession_team = 'AwayTeam'
            else:
                home_possessions += 1
                current_possession_team = 'HomeTeam'
        elif ((row['PLAYTYPE'] in ['FTA','FTM']) & (row['MissedFTOreb'] == 1)):
            pass
        elif ((row['PLAYTYPE'] == 'FTM')  & (row['AssistedBasket'] == 0) & (row['Last_FT'] == 1)):
            if current_possession_team == 'HomeTeam':
                away_possessions += 1
                current_possession_team = 'AwayTeam'
            else:
                home_possessions += 1
                current_possession_team = 'HomeTeam'
        elif row['POTENTIALERROR'] == 1:
            if current_possession_team == 'HomeTeam':
                away_possessions += 1
                current_possession_team = 'AwayTeam'
            else:
                home_possessions += 1
                current_possession_team = 'HomeTeam'
        elif row['Last_FTError'] == 1:
            if current_possession_team == 'HomeTeam':
                away_possessions += 1
                current_possession_team = 'AwayTeam'
            else:
                home_possessions += 1
                current_possession_team = 'HomeTeam'
        elif row['OFFnotTO'] == 1:
            if current_possession_team == 'HomeTeam':
                away_possessions += 1
                current_possession_team = 'AwayTeam'
            else:
                home_possessions += 1
                current_possession_team = 'HomeTeam'
        elif row['PLAYTYPE'] == 'D':
            if current_possession_team == 'HomeTeam':
                away_possessions += 1
                current_possession_team = 'AwayTeam'
            else:
                home_possessions += 1
                current_possession_team = 'HomeTeam'
        elif row['PLAYTYPE'] == 'OF':
            if current_possession_team == 'HomeTeam':
                current_possession_team = 'HomeTeam'
            else:
                current_possession_team = 'AwayTeam'
        elif (row['PLAYTYPE'] == 'TO') and (row['TOSteal'] == 0):
            if current_possession_team == 'HomeTeam':
                away_possessions += 1
                current_possession_team = 'AwayTeam'
            else:
                home_possessions += 1
                current_possession_team = 'HomeTeam'
        elif row['FirstFTAst'] == 1:
            pass
        elif row['And1Assisted'] == 1:
            pass
        elif row['PLAYTYPE'] in ['AS', 'ST'] and row['FoulOffBall'] == 0:
            if current_possession_team == 'HomeTeam':
                away_possessions += 1
                current_possession_team = 'AwayTeam'
            else:
                home_possessions += 1
                current_possession_team = 'HomeTeam'
        elif row['PLAYTYPE'] == 'EG':
            home_possessions == 0
        elif row['PLAYTYPE'] == 'EG':
            away_possessions == 0
        elif row['PLAYTYPE'] == 'BP' and row['CODETEAM'] == row['HomeTeam'] and row['PERIOD'] != 1:
            home_possessions += 1
            current_possession_team = 'HomeTeam'
        elif row['PLAYTYPE'] == 'BP' and row['CODETEAM'] == row['AwayTeam'] and row['PERIOD'] != 1:
            away_possessions += 1
            current_possession_team = 'AwayTeam'
        else:
            pass
        alldata.at[index, 'HomeTeamInPossession'] = home_possessions
        alldata.at[index, 'AwayTeamInPossession'] = away_possessions
    
    alldata['HomeTeamInPossession2'] = alldata['HomeTeamInPossession']
    alldata['AwayTeamInPossession2'] = alldata['AwayTeamInPossession']
    
    alldata['HomeTeamInPossession2'] = np.where(
        (alldata['PLAYTYPE'].isin(['2FGA', '3FGA', 'O','JB','OF','RV','BP'])) & (alldata['CODETEAM'] == alldata['AwayTeam'])
        & ((alldata['POTENTIALERROR'] == 0) | (alldata['POTENTIALERROR2'] == 1)) & (alldata['OFFnotTO'] == 0),'', alldata['HomeTeamInPossession2'])
    
    alldata['AwayTeamInPossession2'] = np.where(
        (alldata['PLAYTYPE'].isin(['2FGA', '3FGA', 'O','JB','OF','RV','BP'])) & (alldata['CODETEAM'] == alldata['HomeTeam'])
        & ((alldata['POTENTIALERROR'] == 0) | (alldata['POTENTIALERROR2'] == 1)) & (alldata['OFFnotTO'] == 0),'', alldata['AwayTeamInPossession2'])
    
    alldata['HomeTeamInPossession2'] = np.where(
        (alldata['FoulOffBall'] == 1) & (alldata['CODETEAM'] == alldata['HomeTeam'])& (alldata['And1Assisted'] == 0)
        & (alldata['POTENTIALERROR'] == 0),'', alldata['HomeTeamInPossession2'])
    
    alldata['AwayTeamInPossession2'] = np.where(
        (alldata['FoulOffBall'] == 1)  & (alldata['CODETEAM'] == alldata['AwayTeam']) & (alldata['And1Assisted'] == 0)
        & (alldata['POTENTIALERROR'] == 0),'', alldata['AwayTeamInPossession2'])
    
    alldata['HomeTeamInPossession2'] = np.where(
        (alldata['UnsportsBasket'] == 1) & (alldata['CODETEAM'] != alldata['HomeTeam'])
        & (alldata['POTENTIALERROR'] == 0),'', alldata['HomeTeamInPossession2'])
    
    alldata['AwayTeamInPossession2'] = np.where(
        (alldata['UnsportsBasket'] == 1)  & (alldata['CODETEAM'] != alldata['AwayTeam'])
        & (alldata['POTENTIALERROR'] == 0),'', alldata['AwayTeamInPossession2'])
    
    alldata['HomeTeamInPossession2'] = np.where(
        (alldata['AssistedBasket'] == 1) & (alldata['CODETEAM'] == alldata['AwayTeam'])
        & (alldata['POTENTIALERROR'] == 0),'', alldata['HomeTeamInPossession2'])
    
    alldata['AwayTeamInPossession2'] = np.where(
        (alldata['AssistedBasket'] == 1)  & (alldata['CODETEAM'] == alldata['HomeTeam'])
        & (alldata['POTENTIALERROR'] == 0),'', alldata['AwayTeamInPossession2'])
    
    alldata['HomeTeamInPossession2'] = np.where(
        (alldata['FirstFTAst'] == 1) & (alldata['CODETEAM'] == alldata['AwayTeam'])
        & (alldata['POTENTIALERROR'] == 0),'', alldata['HomeTeamInPossession2'])
    
    alldata['AwayTeamInPossession2'] = np.where(
        (alldata['FirstFTAst'] == 1)  & (alldata['CODETEAM'] == alldata['HomeTeam'])
        & (alldata['POTENTIALERROR'] == 0),'', alldata['AwayTeamInPossession2'])
    
    alldata['HomeTeamInPossession2'] = np.where(
        (alldata['And1Assisted'] == 1) & (alldata['CODETEAM'] == alldata['AwayTeam'])
        & (alldata['POTENTIALERROR'] == 0),'', alldata['HomeTeamInPossession2'])
    
    alldata['AwayTeamInPossession2'] = np.where(
        (alldata['And1Assisted'] == 1)  & (alldata['CODETEAM'] == alldata['HomeTeam'])
        & (alldata['POTENTIALERROR'] == 0),'', alldata['AwayTeamInPossession2'])
    
    alldata['HomeTeamInPossession2'] = np.where(
        (alldata['PLAYTYPE'].isin(['CM','FV','C','CMT','CMU','B'])) & (alldata['CODETEAM'] == alldata['HomeTeam']) &(alldata['FoulOffBall'] != 1)
        & (alldata['POTENTIALERROR'] == 0),'', alldata['HomeTeamInPossession2'])
    
    alldata['AwayTeamInPossession2'] = np.where(
        (alldata['PLAYTYPE'].isin(['CM','FV','C','CMT','CMU','B'])) & (alldata['CODETEAM'] == alldata['AwayTeam']) &(alldata['FoulOffBall'] != 1)
        & (alldata['POTENTIALERROR'] == 0),'', alldata['AwayTeamInPossession2'])
    
    
    alldata['HomeTeamInPossession2'] = np.where(
        (alldata['PLAYTYPE'].isin(['TO'])) & (alldata['CODETEAM'] == alldata['AwayTeam']) & (alldata['TOSteal'] == 1)
        & (alldata['POTENTIALERROR'] == 0),'', alldata['HomeTeamInPossession2'])
    
    alldata['AwayTeamInPossession2'] = np.where(
        (alldata['PLAYTYPE'].isin(['TO'])) & (alldata['CODETEAM'] == alldata['HomeTeam']) & (alldata['TOSteal'] == 1)
        & (alldata['POTENTIALERROR'] == 0),'', alldata['AwayTeamInPossession2'])
    
    alldata['HomeTeamInPossession2'] = np.where(
        (alldata['PLAYTYPE'].isin(['2FGM', '3FGM'])) & (alldata['CODETEAM'] == alldata['AwayTeam']) & (alldata['And1'] == 1),'', alldata['HomeTeamInPossession2'])
    
    alldata['AwayTeamInPossession2'] = np.where(
        (alldata['PLAYTYPE'].isin(['2FGM', '3FGM'])) & (alldata['CODETEAM'] == alldata['HomeTeam']) & (alldata['And1'] == 1),'', alldata['AwayTeamInPossession2'])
    
    alldata['HomeTeamInPossession2'] = np.where(
        ((alldata['PLAYTYPE']).isin(['FTA','FTM'])) & (alldata['CODETEAM'] == alldata['AwayTeam']) & ( (alldata['MissedFTDreb'] == 1) | (alldata['MissedFTOreb'] == 1)),
        '', 
        alldata['HomeTeamInPossession2'])
    
    alldata['AwayTeamInPossession2'] = np.where(
        ((alldata['PLAYTYPE']).isin(['FTA','FTM'])) & (alldata['CODETEAM'] == alldata['HomeTeam']) & ( (alldata['MissedFTDreb'] == 1) | (alldata['MissedFTOreb'] == 1)),'', 
        alldata['AwayTeamInPossession2'])
    
    alldata['HomeTeamInPossession2'] = np.where(
        ((alldata['PLAYTYPE']).isin(['FTA','FTM'])) & (alldata['CODETEAM'] == alldata['AwayTeam']) & ( (alldata['Last_FT'] != 1)
                                                                                                      ),'',  alldata['HomeTeamInPossession2'])
    alldata['AwayTeamInPossession2'] = np.where(
        ((alldata['PLAYTYPE']).isin(['FTA','FTM'])) & (alldata['CODETEAM'] == alldata['HomeTeam']) & ( (alldata['Last_FT'] != 1)
                                                                                                      ),'',  alldata['AwayTeamInPossession2'])
    alldata['HomeTeamInPossession2'] = np.where(
        ((alldata['PLAYTYPE']).isin(['D','ST'])) & (alldata['CODETEAM'] == alldata['HomeTeam']) & (alldata['LastPossessionOfQuarter'] == 1),'', alldata['HomeTeamInPossession2'])
    
    alldata['AwayTeamInPossession2'] = np.where(
        ((alldata['PLAYTYPE']).isin(['D','ST'])) & (alldata['CODETEAM'] == alldata['AwayTeam']) & (alldata['LastPossessionOfQuarter'] == 1), '', alldata['AwayTeamInPossession2'])
    
    alldata['HomeTeamInPossession2'] = np.where( (alldata['LastPossessionOfQuarter'] == 1) &
        ~alldata['PLAYTYPE'].isin(['D','ST']) & (alldata['CODETEAM'] == alldata['AwayTeam']), '',  alldata['HomeTeamInPossession2'])
    
    alldata['AwayTeamInPossession2'] = np.where( (alldata['LastPossessionOfQuarter'] == 1) &
        ~alldata['PLAYTYPE'].isin(['D','ST']) & (alldata['CODETEAM'] == alldata['HomeTeam']),'', alldata['AwayTeamInPossession2'])
    
    alldata = alldata[~alldata['PLAYTYPE'].isin(['EG','EP'])]
    
    alldata['Phase'] = alldata['Phase_x']
    alldata['Round'] = alldata['Round_x']
    alldata['Team'] = alldata['CODETEAM']
    alldata['PlayerId'] = alldata['PLAYER_ID']
    alldata['PlayerName'] = alldata['PLAYER_x']
    alldata['Clock'] = alldata['MARKERTIME']
    alldata['Period'] = alldata['PERIOD']
    alldata['PlayType'] = alldata['PLAYTYPE']
    alldata['Action'] = alldata['ACTION']
    alldata['PointsScored'] = alldata['POINTS'].fillna(0)
    alldata['ShotCoord_X'] = alldata['COORD_X']
    alldata['ShotCoord_Y'] = alldata['COORD_Y']
    alldata['ShotZone'] = alldata['ZONE']
    alldata['FastBreak'] = alldata['FASTBREAK'].fillna(0)
    alldata['SecondChance'] = alldata['SECOND_CHANCE'].fillna(0)
    alldata['PointsOffTurnover'] = alldata['POINTS_OFF_TURNOVER'].fillna(0)
    alldata['HomePossession'] = alldata['HomeTeamInPossession2']
    alldata['AwayPossession'] = alldata['AwayTeamInPossession2']
    alldata['PlayInfo'] = alldata['PLAYINFO']
    alldata['POINTS_A'] = alldata.groupby((alldata['PLAYTYPE'] == 'BP').cumsum())['POINTS_A_x'].transform(lambda x: x.ffill().fillna(0))
    alldata['POINTS_B'] = alldata.groupby((alldata['PLAYTYPE'] == 'BP').cumsum())['POINTS_B_x'].transform(lambda x: x.ffill().fillna(0))
    alldata = alldata[(abs(alldata['POINTS_A'] - alldata['POINTS_B']) <= 20) | (alldata['Period'] < 4)]
    
    # Return Clean Play by Play Dataset
    return alldata[['Season','Phase','Round','Gamecode','HomeTeam','AwayTeam','Period','Clock','NumberOfPlay',
                    'Team','PlayerId','PlayerName','PlayType','Action','PlayInfo','PointsScored','ShotCoord_X',
                    'ShotCoord_Y','ShotZone','FastBreak','SecondChance','PointsOffTurnover','PeriodSecondsElapsed',
                    'HomePossession','AwayPossession','HomePlayersOnCourt','AwayPlayersOnCourt','FoulOffBall','And1','TOSteal',
                    'LastPossessionOfQuarter','And1Assisted','MissedFTOreb','POTENTIALERROR','POTENTIALERROR2','POTENTIALERROR3',
                    'UnsportsFT','techFT','UnsportsBasket','Last_FT']].sort_values(by=['Season','Gamecode','NumberOfPlay'],ascending=[True,True,True])

In [3]:
cleaned_data = clean_playbyplay_data(playbyplay, boxscore, shotdata)

2
3


In [30]:
cleaned_data.head()

Unnamed: 0,Season,Phase,Round,Gamecode,HomeTeam,AwayTeam,Period,Clock,NumberOfPlay,Team,PlayerId,PlayerName,PlayType,Action,PlayInfo,PointsScored,ShotCoord_X,ShotCoord_Y,ShotZone,FastBreak,SecondChance,PointsOffTurnover,PeriodSecondsElapsed,HomePossession,AwayPossession,HomePlayersOnCourt,AwayPlayersOnCourt,FoulOffBall,And1,TOSteal,LastPossessionOfQuarter,And1Assisted,MissedFTOreb,POTENTIALERROR,POTENTIALERROR2,POTENTIALERROR3,UnsportsFT,techFT,UnsportsBasket,Last_FT
0,2024,RS,1,1,BER,PAN,1,10:00,1,PAN,,,BP,,Begin Period,0.0,,,,0,0,0,0,,1.0,"[P007025, P008780, P006919, P010791, P011631]","[P009622, P012774, P003842, P002328, P007866]",0,0,0,0,0,0,0,0,0,0,0,0,
1,2024,RS,1,1,BER,PAN,1,09:49,3,PAN,P002328,"GRIGONIS, MARIUS",2FGA,Missed Two Pointer,Missed Two Pointer (0/1 - 0 pt),0.0,-370.0,156.0,D,0,0,0,11,,1.0,"[P007025, P008780, P006919, P010791, P011631]","[P009622, P012774, P003842, P002328, P007866]",0,0,0,0,0,0,0,0,0,0,0,0,
2,2024,RS,1,1,BER,PAN,1,09:46,4,BER,P006919,"OLINDE, LOUIS",D,,Def Rebound (1),0.0,,,,0,0,0,14,1.0,1.0,"[P007025, P008780, P006919, P010791, P011631]","[P009622, P012774, P003842, P002328, P007866]",0,0,0,0,0,0,0,0,0,0,0,0,
3,2024,RS,1,1,BER,PAN,1,09:30,5,BER,P010791,"KOUMADJE, KHALIFA",2FGA,Missed Two Pointer,Missed Two Pointer (0/1 - 0 pt),0.0,94.0,81.0,C,0,0,0,30,1.0,,"[P007025, P008780, P006919, P010791, P011631]","[P009622, P012774, P003842, P002328, P007866]",0,0,0,0,0,0,0,0,0,0,0,0,
4,2024,RS,1,1,BER,PAN,1,09:27,6,PAN,P012774,"NUNN, KENDRICK",D,,Def Rebound (1),0.0,,,,0,0,0,33,1.0,2.0,"[P007025, P008780, P006919, P010791, P011631]","[P009622, P012774, P003842, P002328, P007866]",0,0,0,0,0,0,0,0,0,0,0,0,


In [4]:
def individual_player_breakdown(cleaned_data, boxdata):

    '''
    
    Break play by play data into play specific datasets and aggregate events while each player is on the court
    Unique weighting system to determine players' impact on each possession 
        - (i.e player recieves more credit for offensive possession if they took the shot)
    Add in opponent offense/defense statistics for future calculations of elo ratings
    Add in how each possession began (used later to increase/decrease probabilities when determining transiation matrices)
    
    Returns exploded possession by possession dataset for each player, with outcomes and player impact for both offense and defense

    '''
    
    # Breakdown play by play for each player
    cleaned_data_longhome = cleaned_data.explode('HomePlayersOnCourt')
    cleaned_data_longaway = cleaned_data.explode('AwayPlayersOnCourt')
    
    cleaned_data_longhome2 = cleaned_data_longhome[(cleaned_data_longhome['Team'] == cleaned_data_longhome['HomeTeam'])]
    cleaned_data_longaway2 = cleaned_data_longaway[(cleaned_data_longaway['Team'] == cleaned_data_longaway['AwayTeam'])]
    
    cleaned_data_longhomeNew = cleaned_data.explode('HomePlayersOnCourt')
    cleaned_data_longawayNew = cleaned_data.explode('AwayPlayersOnCourt')
    
    playtypes_away = ["FTA", "FTM", "2FGM", "3FGM", "2FGA","3FGA","TO",'O','AS']
    playtypes_home = ["FV", "CM", "ST","D"]
    
    cleaned_data_longhome2New = cleaned_data_longhomeNew[
        (cleaned_data_longhomeNew['AwayPossession'] != "") &
        ( (cleaned_data_longhomeNew['PlayType'].isin(playtypes_away) & 
             (cleaned_data_longhomeNew['Team'] == cleaned_data_longhomeNew['AwayTeam'])) |
            (cleaned_data_longhomeNew['PlayType'].isin(playtypes_home) & 
             (cleaned_data_longhomeNew['Team'] == cleaned_data_longhomeNew['HomeTeam']))  )]
    
    cleaned_data_longaway2New = cleaned_data_longawayNew[
        (cleaned_data_longawayNew['HomePossession'] != "") &
        ( (cleaned_data_longawayNew['PlayType'].isin(playtypes_away) & 
             (cleaned_data_longawayNew['Team'] == cleaned_data_longawayNew['HomeTeam'])) |
            (cleaned_data_longawayNew['PlayType'].isin(playtypes_home) & 
             (cleaned_data_longawayNew['Team'] == cleaned_data_longawayNew['AwayTeam'])) )]
    
    euroReboundHome = cleaned_data_longhome[(cleaned_data_longhome['PlayType'].isin(['D','FV','CM'])) & (cleaned_data_longhome['Team'] == cleaned_data_longhome['AwayTeam'])]
    euroReboundAway = cleaned_data_longaway[(cleaned_data_longaway['PlayType'].isin(['D','FV','CM'])) & (cleaned_data_longaway['Team'] == cleaned_data_longaway['AwayTeam'])]
    
    # Aggregate events for each player both home/away and offense/defense
    homestats_for = cleaned_data_longhome2.groupby(['Season', 'Gamecode','Phase', 'HomePlayersOnCourt', 'HomePossession']).agg(
        fta_for_team= ('PlayType', lambda x: x.isin(['FTA', 'FTM']).sum()),
        ftm_for_team =('PlayType', lambda x: (x == 'FTM').sum()),
        to_for_team=('PlayType', lambda x: (x == 'TO').sum()),
        three_made_for_team=('PlayType', lambda x: (x == '3FGM').sum()),
        three_missed_for_team=('PlayType', lambda x: (x == '3FGA').sum()),
        two_made_for_team=('PlayType', lambda x: (x == '2FGM').sum()),
        two_missed_for_team=('PlayType', lambda x: (x == '2FGA').sum()),
        oreb_for_team=('PlayType', lambda x: (x == 'O').sum()),
        ast_for_team=('PlayType', lambda x: (x == 'AS').sum()),).reset_index()
    
    awaystats_for = cleaned_data_longaway2.groupby(['Season', 'Gamecode', 'Phase','AwayPlayersOnCourt', 'AwayPossession']).agg(
        fta_for_team=('PlayType', lambda x: x.isin(['FTA', 'FTM']).sum()),
        ftm_for_team=('PlayType', lambda x: (x == 'FTM').sum()),
        to_for_team=('PlayType', lambda x: (x == 'TO').sum()),
        three_made_for_team=('PlayType', lambda x: (x == '3FGM').sum()),
        three_missed_for_team=('PlayType', lambda x: (x == '3FGA').sum()),
        two_made_for_team=('PlayType', lambda x: (x == '2FGM').sum()),
        two_missed_for_team=('PlayType', lambda x: (x == '2FGA').sum()),
        oreb_for_team=('PlayType', lambda x: (x == 'O').sum()),
        ast_for_team=('PlayType', lambda x: (x == 'AS').sum()),).reset_index()
    
    homestats_against = cleaned_data_longhome2New.groupby(['Season', 'Gamecode', 'Phase', 'HomePlayersOnCourt', 'AwayPossession']).agg(
        fta_against_team=('PlayType', lambda x: x.isin(['FTA', 'FTM']).sum()),
        ftm_against_team=('PlayType', lambda x: (x == 'FTM').sum()),
        to_against_team=('PlayType', lambda x: (x == 'TO').sum()),
        three_made_against_team=('PlayType', lambda x: (x == '3FGM').sum()),
        three_missed_against_team=('PlayType', lambda x: (x == '3FGA').sum()),
        two_made_against_team=('PlayType', lambda x: (x == '2FGM').sum()),
        two_missed_against_team=('PlayType', lambda x: (x == '2FGA').sum()),
        oreb_against_team=('PlayType', lambda x: (x == 'O').sum()),
        ast_against_team=('PlayType', lambda x: (x == 'AS').sum()),
    ).reset_index()
    
    awaystats_against = cleaned_data_longaway2New.groupby(['Season', 'Gamecode', 'Phase','AwayPlayersOnCourt', 'HomePossession']).agg(
        fta_against_team=('PlayType', lambda x: x.isin(['FTA', 'FTM']).sum()),
        ftm_against_team=('PlayType', lambda x: (x == 'FTM').sum()),
        to_against_team=('PlayType', lambda x: (x == 'TO').sum()),
        three_made_against_team=('PlayType', lambda x: (x == '3FGM').sum()),
        three_missed_against_team=('PlayType', lambda x: (x == '3FGA').sum()),
        two_made_against_team=('PlayType', lambda x: (x == '2FGM').sum()),
        two_missed_against_team=('PlayType', lambda x: (x == '2FGA').sum()),
        oreb_against_team=('PlayType', lambda x: (x == 'O').sum()),
        ast_against_team=('PlayType', lambda x: (x == 'AS').sum()),
    ).reset_index()
    
    homedefstats = cleaned_data_longhome.groupby(['Season', 'Gamecode','Phase','HomeTeam' ,'HomePlayersOnCourt', 'AwayPossession']).agg(
        dreb_for_team=('PlayType', lambda x: (x == 'D').sum()),
        block_for_team=('PlayType', lambda x: (x == 'FV').sum()),
        foul_for_team=('PlayType', lambda x: (x == 'CM').sum()),
        steal_for_team=('PlayType', lambda x: (x == 'ST').sum()),).reset_index()
    
    awaydefstats = cleaned_data_longaway.groupby(['Season', 'Gamecode', 'Phase','AwayTeam','AwayPlayersOnCourt', 'HomePossession']).agg(
        dreb_for_team=('PlayType', lambda x: (x == 'D').sum()),
        block_for_team=('PlayType', lambda x: (x == 'FV').sum()),
        foul_for_team=('PlayType', lambda x: (x == 'CM').sum()),
        steal_for_team=('PlayType', lambda x: (x == 'ST').sum()),).reset_index()
    
    print("5")
    
    # Time of Possession
    home_timeofpossession = cleaned_data.groupby(['Season', 'Gamecode', 'HomePossession']).agg(
        min_seconds_elapsed_for=('PeriodSecondsElapsed', 'min'),
        max_seconds_elapsed_for=('PeriodSecondsElapsed', 'max')).reset_index()
    
    home_timeofpossession['Duration'] = home_timeofpossession['max_seconds_elapsed_for'] - home_timeofpossession['min_seconds_elapsed_for']
    
    away_timeofpossession = cleaned_data.groupby(['Season', 'Gamecode','AwayPossession']).agg(
        min_seconds_elapsed_for=('PeriodSecondsElapsed', 'min'),
        max_seconds_elapsed_for=('PeriodSecondsElapsed', 'max')).reset_index()
    
    away_timeofpossession['Duration'] = away_timeofpossession['max_seconds_elapsed_for'] - away_timeofpossession['min_seconds_elapsed_for']
    
    
    
    # Merge Home/Away and For/Against stats with time of possession numbers 
    homestatsAgainst = homestats_against.merge(home_timeofpossession,left_on=['Season','Gamecode','AwayPossession'],right_on=['Season','Gamecode','HomePossession'])
    awayStatsAgainst = awaystats_against.merge(away_timeofpossession,left_on=['Season','Gamecode','HomePossession'],right_on=['Season','Gamecode','AwayPossession'])
    
    homestatsAgainst.rename(columns={'HomePlayersOnCourt': 'PlayerID', 'AwayPossession': 'Possession'}, inplace=True)
    awayStatsAgainst.rename(columns={'AwayPlayersOnCourt': 'PlayerID', 'HomePossession': 'Possession'}, inplace=True)
    
    PlayerStatsPerPossession_Against = pd.concat([homestatsAgainst,awayStatsAgainst], ignore_index=True)
    
    PlayerStatsPerPossession_Against = PlayerStatsPerPossession_Against[PlayerStatsPerPossession_Against['Possession'] != ""]
    
    PlayerStatsPerPossession_Against['Possession'] = PlayerStatsPerPossession_Against['Possession'].astype(float)
    
    PlayerStatsPerPossession_Against.sort_values(by=['PlayerID','Season','Gamecode','Possession'], ascending=[False,True,True,True],inplace=True)
    
    homestatsFor = homestats_for.merge(home_timeofpossession,left_on=['Season','Gamecode','HomePossession'],right_on=['Season','Gamecode','HomePossession'])
    
    awayStatsFor = awaystats_for.merge(away_timeofpossession,left_on=['Season','Gamecode','AwayPossession'],right_on=['Season','Gamecode','AwayPossession'])
    
    homestatsFor.rename(columns={'HomePlayersOnCourt': 'PlayerID', 'HomePossession': 'Possession'}, inplace=True)
    awayStatsFor.rename(columns={'AwayPlayersOnCourt': 'PlayerID', 'AwayPossession': 'Possession'}, inplace=True)
    
    PlayerStatsPerPossession_For = pd.concat([homestatsFor,awayStatsFor], ignore_index=True)
    
    PlayerStatsPerPossession_For = PlayerStatsPerPossession_For[PlayerStatsPerPossession_For['Possession'] != ""]
    
    PlayerStatsPerPossession_For['Possession'] = PlayerStatsPerPossession_For['Possession'].astype(float)
    
    PlayerStatsPerPossession_For.sort_values(by=['PlayerID','Season','Gamecode','Possession'], ascending=[False,True,True,True],inplace=True)
    
    homedefstats.rename(columns={'HomePlayersOnCourt': 'PlayerID', 'AwayPossession': 'Possession'}, inplace=True)
    awaydefstats.rename(columns={'AwayPlayersOnCourt': 'PlayerID', 'HomePossession': 'Possession'}, inplace=True)
    
    defstats = pd.concat([homedefstats,awaydefstats])
    
    defstats = defstats[defstats['Possession'] != ""]
    
    defstats['Possession'] = defstats['Possession'].astype(float)
    
    PlayerStatsPerPossession_For.sort_values(by=['PlayerID','Season','Gamecode','Possession'],ascending = [True,False,False,False], inplace=True)
    
    PlayerStatsPerPossession_Against2 = PlayerStatsPerPossession_Against.merge(defstats,left_on=['Season','Gamecode','Possession','PlayerID'],
                                                                              right_on=['Season','Gamecode','Possession','PlayerID'],how='left')
    PlayerStatsPerPossession_Against2['dreb_for_team'] = PlayerStatsPerPossession_Against2['dreb_for_team'].fillna(0)
    PlayerStatsPerPossession_Against2['steal_for_team'] = PlayerStatsPerPossession_Against2['steal_for_team'].fillna(0)
    PlayerStatsPerPossession_Against2['block_for_team'] = PlayerStatsPerPossession_Against2['block_for_team'].fillna(0)
    PlayerStatsPerPossession_Against2['foul_for_team'] = PlayerStatsPerPossession_Against2['foul_for_team'].fillna(0)
    
    
    # Player team stats per possession
    PlayerStatsPerPossession_Against2['PlayerPossessionNumber'] = PlayerStatsPerPossession_Against2.groupby(['PlayerID']).cumcount() + 1
    PlayerStatsPerPossession_For['PlayerPossessionNumber'] = PlayerStatsPerPossession_For.groupby(['PlayerID']).cumcount() + 1
    
    
    # Back to orignal exploded dataset, find actual player stats on each possession to calculate usage amongst the team
    cleaned_data_longhome['FTAPlayer'] = np.where((cleaned_data_longhome['PlayerId'] == cleaned_data_longhome['HomePlayersOnCourt'])&
                                                    ((cleaned_data_longhome['PlayType']).isin(['FTA','FTM'])),1,0)
    cleaned_data_longhome['FTMPlayer'] = np.where((cleaned_data_longhome['PlayerId'] == cleaned_data_longhome['HomePlayersOnCourt'])&
                                                    ((cleaned_data_longhome['PlayType']).isin(['FTM'])),1,0)
    
    
    cleaned_data_longhome['OrebPlayer'] = np.where((cleaned_data_longhome['PlayerId'] == cleaned_data_longhome['HomePlayersOnCourt'])&
                                                    ((cleaned_data_longhome['PlayType']).isin(['O'])),1,0)
    
    cleaned_data_longhome['DrebPlayer'] = np.where((cleaned_data_longhome['PlayerId'] == cleaned_data_longhome['HomePlayersOnCourt'])&
                                                    ((cleaned_data_longhome['PlayType']).isin(['D'])),1,0)
    
    cleaned_data_longhome['TOPlayer'] = np.where((cleaned_data_longhome['PlayerId'] == cleaned_data_longhome['HomePlayersOnCourt'])&
                                                    ((cleaned_data_longhome['PlayType']).isin(['TO'])),1,0)
    
    cleaned_data_longhome['3FGPlayer'] = np.where((cleaned_data_longhome['PlayerId'] == cleaned_data_longhome['HomePlayersOnCourt'])&
                                                    ((cleaned_data_longhome['PlayType']).isin(['3FGM','3FGA'])),1,0)
    
    cleaned_data_longhome['3FGMPlayer'] = np.where((cleaned_data_longhome['PlayerId'] == cleaned_data_longhome['HomePlayersOnCourt'])&
                                                    ((cleaned_data_longhome['PlayType']).isin(['3FGM'])),1,0)
    
    cleaned_data_longhome['2FGPlayer'] = np.where((cleaned_data_longhome['PlayerId'] == cleaned_data_longhome['HomePlayersOnCourt'])&
                                                    ((cleaned_data_longhome['PlayType']).isin(['2FGM','2FGA'])),1,0)
    
    cleaned_data_longhome['2FGMPlayer'] = np.where((cleaned_data_longhome['PlayerId'] == cleaned_data_longhome['HomePlayersOnCourt'])&
                                                    ((cleaned_data_longhome['PlayType']).isin(['2FGM'])),1,0)
    
    cleaned_data_longhome['FoulPlayer'] = np.where((cleaned_data_longhome['PlayerId'] == cleaned_data_longhome['HomePlayersOnCourt'])&
                                                    ((cleaned_data_longhome['PlayType']).isin(['CM'])),1,0)
    
    cleaned_data_longhome['BlockPlayer'] = np.where((cleaned_data_longhome['PlayerId'] == cleaned_data_longhome['HomePlayersOnCourt'])&
                                                    ((cleaned_data_longhome['PlayType']).isin(['FV'])),1,0)
    
    cleaned_data_longhome['STPlayer'] = np.where((cleaned_data_longhome['PlayerId'] == cleaned_data_longhome['HomePlayersOnCourt'])&
                                                    ((cleaned_data_longhome['PlayType']).isin(['ST'])),1,0)
    
    cleaned_data_longhome['AstPlayer'] = np.where((cleaned_data_longhome['PlayerId'] == cleaned_data_longhome['HomePlayersOnCourt'])&
                                                    ((cleaned_data_longhome['PlayType']).isin(['AS'])),1,0)
    
    cleaned_data_longaway['FTAPlayer'] = np.where((cleaned_data_longaway['PlayerId'] == cleaned_data_longaway['AwayPlayersOnCourt'])&
                                                    ((cleaned_data_longaway['PlayType']).isin(['FTA','FTM'])),1,0)
    cleaned_data_longaway['FTMPlayer'] = np.where((cleaned_data_longaway['PlayerId'] == cleaned_data_longaway['AwayPlayersOnCourt'])&
                                                    ((cleaned_data_longaway['PlayType']).isin(['FTM'])),1,0)
    
    cleaned_data_longaway['OrebPlayer'] = np.where((cleaned_data_longaway['PlayerId'] == cleaned_data_longaway['AwayPlayersOnCourt'])&
                                                    ((cleaned_data_longaway['PlayType']).isin(['O'])),1,0)
    
    cleaned_data_longaway['DrebPlayer'] = np.where((cleaned_data_longaway['PlayerId'] == cleaned_data_longaway['AwayPlayersOnCourt'])&
                                                    ((cleaned_data_longaway['PlayType']).isin(['D'])),1,0)
    
    cleaned_data_longaway['TOPlayer'] = np.where((cleaned_data_longaway['PlayerId'] == cleaned_data_longaway['AwayPlayersOnCourt'])&
                                                    ((cleaned_data_longaway['PlayType']).isin(['TO'])),1,0)
    
    cleaned_data_longaway['3FGPlayer'] = np.where((cleaned_data_longaway['PlayerId'] == cleaned_data_longaway['AwayPlayersOnCourt'])&
                                                    ((cleaned_data_longaway['PlayType']).isin(['3FGM','3FGA'])),1,0)
    
    cleaned_data_longaway['3FGMPlayer'] = np.where((cleaned_data_longaway['PlayerId'] == cleaned_data_longaway['AwayPlayersOnCourt'])&
                                                    ((cleaned_data_longaway['PlayType']).isin(['3FGM'])),1,0)
    
    cleaned_data_longaway['2FGPlayer'] = np.where((cleaned_data_longaway['PlayerId'] == cleaned_data_longaway['AwayPlayersOnCourt'])&
                                                    ((cleaned_data_longaway['PlayType']).isin(['2FGM','2FGA'])),1,0)
    
    cleaned_data_longaway['2FGMPlayer'] = np.where((cleaned_data_longaway['PlayerId'] == cleaned_data_longaway['AwayPlayersOnCourt'])&
                                                    ((cleaned_data_longaway['PlayType']).isin(['2FGM'])),1,0)
    
    cleaned_data_longaway['FoulPlayer'] = np.where((cleaned_data_longaway['PlayerId'] == cleaned_data_longaway['AwayPlayersOnCourt'])&
                                                    ((cleaned_data_longaway['PlayType']).isin(['CM'])),1,0)
    
    cleaned_data_longaway['BlockPlayer'] = np.where((cleaned_data_longaway['PlayerId'] == cleaned_data_longaway['AwayPlayersOnCourt'])&
                                                    ((cleaned_data_longaway['PlayType']).isin(['FV'])),1,0)
    
    cleaned_data_longaway['STPlayer'] = np.where((cleaned_data_longaway['PlayerId'] == cleaned_data_longaway['AwayPlayersOnCourt'])&
                                                    ((cleaned_data_longaway['PlayType']).isin(['ST'])),1,0)
    
    cleaned_data_longaway['AstPlayer'] = np.where((cleaned_data_longaway['PlayerId'] == cleaned_data_longaway['AwayPlayersOnCourt'])&
                                                    ((cleaned_data_longaway['PlayType']).isin(['AS'])),1,0)
    
    # Aggregate individual player stats per possession both home/away and for/against
    homeusage_for = cleaned_data_longhome.groupby(['Season', 'Gamecode','Phase','HomeTeam', 'HomePlayersOnCourt', 'HomePossession']).agg(
        fta_player=('FTAPlayer', 'sum'),
        ftm_player=('FTMPlayer', 'sum'),
        to_player = ('TOPlayer', 'sum'),
        oreb_player=('OrebPlayer', 'sum'),
        threefga_player=('3FGPlayer', 'sum'),
        threefgm_player=('3FGMPlayer', 'sum'),
        twoefga_player=('2FGPlayer', 'sum'),
        twoefgm_player=('2FGMPlayer', 'sum'),
        assist_player=('AstPlayer', 'sum'),).reset_index()
    
    awayusage_for = cleaned_data_longaway.groupby(['Season', 'Gamecode', 'Phase','AwayTeam','AwayPlayersOnCourt', 'AwayPossession']).agg(
        fta_player=('FTAPlayer', 'sum'),
        ftm_player=('FTMPlayer', 'sum'),
        to_player = ('TOPlayer', 'sum'),
        oreb_player=('OrebPlayer', 'sum'),
        threefga_player=('3FGPlayer', 'sum'),
        threefgm_player=('3FGMPlayer', 'sum'),
        twoefga_player=('2FGPlayer', 'sum'),
        twoefgm_player=('2FGMPlayer', 'sum'),
        assist_player=('AstPlayer', 'sum'),).reset_index()
    
    homeusage_against = cleaned_data_longhome.groupby(['Season', 'Gamecode','Phase','HomeTeam' ,'HomePlayersOnCourt', 'AwayPossession']).agg(
        dreb_player = ('DrebPlayer', 'sum'),
        steal_player=('STPlayer', 'sum'),
        block_player=('BlockPlayer', 'sum'),
        foul_player=('FoulPlayer', 'sum'),).reset_index()
    
    awayusage_against = cleaned_data_longaway.groupby(['Season', 'Gamecode', 'Phase','AwayTeam','AwayPlayersOnCourt', 'HomePossession']).agg(
        dreb_player = ('DrebPlayer', 'sum'),
        steal_player=('STPlayer', 'sum'),
        block_player=('BlockPlayer', 'sum'),
        foul_player=('FoulPlayer', 'sum'),).reset_index()
    
    print("7")
    
    
    # Calculate player impact on each defensive possession
    homeusage_against = homeusage_against[homeusage_against['AwayPossession'] != ""]
    awayusage_against = awayusage_against[awayusage_against['HomePossession'] != ""]
    homeusage_against['AwayPossession'] = homeusage_against['AwayPossession'].astype(float)
    awayusage_against['HomePossession'] = awayusage_against['HomePossession'].astype(float)
    
    homeusage_against.rename(columns={'HomePlayersOnCourt': 'PlayerID', 'AwayPossession': 'OppPossession',
                                     'HomeTeam':'Team'}, inplace=True)
    awayusage_against.rename(columns={'AwayPlayersOnCourt': 'PlayerID', 'HomePossession': 'OppPossession',
                                     'AwayTeam':'Team'}, inplace=True)
    
    PlayerUsagePerPossession_Against = pd.concat([homeusage_against,awayusage_against], ignore_index=True)
    
    PlayerUsagePerPossession_Against['UsageFactor'] = (
        PlayerUsagePerPossession_Against['dreb_player'] * 0.5 +
        PlayerUsagePerPossession_Against['steal_player'] * 2 +
        PlayerUsagePerPossession_Against['block_player'] * 1.5 +
                 PlayerUsagePerPossession_Against['foul_player']+.4 + 0.2)
    
    
    # Calculate player impact on each offensive possession
    homeusage_for = homeusage_for[homeusage_for['HomePossession'] != ""]
    awayusage_for = awayusage_for[awayusage_for['AwayPossession'] != ""]
    homeusage_for['HomePossession'] = homeusage_for['HomePossession'].astype(float)
    awayusage_for['AwayPossession'] = awayusage_for['AwayPossession'].astype(float)
    
    homeusage_for.rename(columns={'HomePlayersOnCourt': 'PlayerID', 'HomePossession': 'Possession',
                                     'HomeTeam':'Team'}, inplace=True)
    awayusage_for.rename(columns={'AwayPlayersOnCourt': 'PlayerID', 'AwayPossession': 'Possession',
                                     'AwayTeam':'Team'}, inplace=True)
    
    PlayerUsagePerPossession_For = pd.concat([homeusage_for,awayusage_for], ignore_index=True)
    
    PlayerUsagePerPossession_For['UsageFactor'] = (
        PlayerUsagePerPossession_For['threefga_player'] * 1 +  
        PlayerUsagePerPossession_For['twoefga_player'] * 1 +   
        PlayerUsagePerPossession_For['fta_player'] * 0.44 +    
        PlayerUsagePerPossession_For['assist_player'] * 0.7 + 
        PlayerUsagePerPossession_For['to_player'] * 1 + 
        PlayerUsagePerPossession_For['oreb_player'] * .5 + .1 )
    
    
    # Calculate total usage of each team on each defensive possession so that we can find ratio of player to team
    teamsumagainst = PlayerUsagePerPossession_Against.groupby(by=['Season','Gamecode','Team','OppPossession'])['UsageFactor'].sum().reset_index()
    teamsumagainst = teamsumagainst.rename(columns={'UsageFactor': 'TeamUsage'})
    
    PlayerUsagePerPossession_Against2 = pd.merge(
        PlayerUsagePerPossession_Against,
        teamsumagainst,
        on=['Season', 'Gamecode', 'Team', 'OppPossession'],
        how='left')
    
    PlayerUsagePerPossession_Against2=PlayerUsagePerPossession_Against2.sort_values(by=['Season','Gamecode','OppPossession'])
    
    PlayerUsagePerPossession_Against2['UsagePercent'] = PlayerUsagePerPossession_Against2['UsageFactor']/PlayerUsagePerPossession_Against2['TeamUsage']
    
    # Calculate total usage of each team on each offensive possession so that we can find ratio of player to team
    teamsum = PlayerUsagePerPossession_For.groupby(by=['Season','Gamecode','Team','Possession'])['UsageFactor'].sum().reset_index()
    
    teamsum = teamsum.rename(columns={'UsageFactor': 'TeamUsage'})
    
    PlayerUsagePerPossession_For2 = pd.merge(
        PlayerUsagePerPossession_For,
        teamsum,
        on=['Season', 'Gamecode', 'Team', 'Possession'],
        how='left')
    
    PlayerUsagePerPossession_For2=PlayerUsagePerPossession_For2.sort_values(by=['Season','Gamecode','Possession'])
    
    PlayerUsagePerPossession_For2['UsagePercent'] = PlayerUsagePerPossession_For2['UsageFactor']/PlayerUsagePerPossession_For2['TeamUsage']
    
    
    # Join in playerIDs and then For/Against together, add in new aggregate columns
    playerids = boxdata[['Player_ID','Player','Team','Gamecode','Season']].drop_duplicates()
    playerids['Player_ID'] = playerids['Player_ID'].str.strip()
    
    PlayerStatsPerPossession_ForX = PlayerStatsPerPossession_For.merge(playerids,left_on=['PlayerID','Season','Gamecode'],right_on=['Player_ID','Season','Gamecode'],how='left')
    PlayerStatsPerPossession_AgainstX = PlayerStatsPerPossession_Against2.merge(playerids,left_on=['PlayerID','Season','Gamecode'],right_on=['Player_ID','Season','Gamecode'],how='left')
    
    OffensePlayerData = pd.merge(
        PlayerStatsPerPossession_ForX,
        PlayerUsagePerPossession_For2,
        on=['Season', 'Gamecode', 'Possession','PlayerID'],
        how='left')
    
    DefensePlayerData = pd.merge(
        PlayerStatsPerPossession_AgainstX,
        PlayerUsagePerPossession_Against2,
        left_on=['Season', 'Gamecode', 'Possession','PlayerID'],
        right_on=['Season','Gamecode','OppPossession','PlayerID'],
        how='left')
    
    OffensePlayerData['three_fga_for_team'] = OffensePlayerData['three_made_for_team'] + OffensePlayerData['three_missed_for_team']
    OffensePlayerData['two_fga_for_team'] = OffensePlayerData['two_made_for_team'] + OffensePlayerData['two_missed_for_team']
    DefensePlayerData['three_fga_against_team'] = DefensePlayerData['three_made_against_team'] + DefensePlayerData['three_missed_against_team']
    DefensePlayerData['two_fga_against_team'] = DefensePlayerData['two_made_against_team'] + DefensePlayerData['two_missed_against_team']
    
    OffensePlayerData = OffensePlayerData.sort_values(by=['Season','PlayerID','Gamecode','Possession'],ascending=[False,False,False,False])
    OffensePlayerData['PlayerPossessionNumber2'] = OffensePlayerData.groupby(['PlayerID']).cumcount() + 1
    
    OffensePlayerData['Phase'] = OffensePlayerData['Phase_x']
    OffensePlayerData['duration_for_team'] = OffensePlayerData['Duration']
    OffensePlayerData['Team'] = OffensePlayerData['Team_x']
    
    DefensePlayerData = DefensePlayerData.sort_values(by=['Season','PlayerID','Gamecode','Possession'],ascending=[False,False,False,False])
    DefensePlayerData['PlayerPossessionNumber2'] = DefensePlayerData.groupby(['PlayerID']).cumcount() + 1
    
    DefensePlayerData['Phase'] = DefensePlayerData['Phase_x']
    DefensePlayerData['duration_against_team'] = DefensePlayerData['Duration']
    DefensePlayerData['Team'] = DefensePlayerData['Team_x']
    
    
    # Find Team Offense Rolling averages for comparison to create Elo Ratings. If less than 5 games in the season, use the league averages
    OffenseTeamData = OffensePlayerData
    DefenseTeamData = DefensePlayerData
    
    OffenseTeamData['three_fga_for_team'] = OffenseTeamData['three_made_for_team'] + OffenseTeamData['three_missed_for_team']
    OffenseTeamData['two_fga_for_team'] = OffenseTeamData['two_made_for_team'] + OffenseTeamData['two_missed_for_team']
    
    OffenseTeamData2 = OffenseTeamData.groupby(by=['Season','Gamecode','Team_x'])[['fta_for_team','ftm_for_team',
                                                                                     'to_for_team','three_made_for_team','three_missed_for_team',
                                                                                     'three_fga_for_team', 'two_made_for_team','two_missed_for_team',
                                                                             'two_fga_for_team','oreb_for_team','ast_for_team','Duration']].mean().reset_index()
    
    OffenseTeamData2 = OffenseTeamData2.sort_values(by=['Season','Team_x','Gamecode'],ascending=[False,False,False])
    OffenseTeamData2['GameNumber'] = OffenseTeamData2.groupby(['Team_x','Season']).cumcount() + 1
    
    
    league_avg = OffenseTeamData2[OffenseTeamData2['Season'] == OffenseTeamData2['Season'].max()][['fta_for_team', 'ftm_for_team', 'to_for_team',
                                                                                                  'three_made_for_team',	'three_missed_for_team',
                                                                                                   'three_fga_for_team',	'two_made_for_team'	,
                                                                                                   'two_missed_for_team',	'two_fga_for_team'	,
                                                                                                   'oreb_for_team',	'ast_for_team'	,'Duration',]].mean()
    def rolling_with_league_avg(group, stat, league_avg):
        rolling_avg = group[stat].rolling(window=10, min_periods=1).mean()
        rolling_avg = rolling_avg.where(group['GameNumber'] > 7, other=league_avg[stat])
        return rolling_avg
    
    OffenseTeamData2['fta_RollingAvg'] = OffenseTeamData2.groupby('Team_x').apply(lambda x: rolling_with_league_avg(x, 'fta_for_team', league_avg)).reset_index(level=0, drop=True)
    OffenseTeamData2['ftm_RollingAvg'] = OffenseTeamData2.groupby('Team_x').apply(lambda x: rolling_with_league_avg(x, 'ftm_for_team', league_avg)).reset_index(level=0, drop=True)
    OffenseTeamData2['to_RollingAvg'] = OffenseTeamData2.groupby('Team_x').apply(lambda x: rolling_with_league_avg(x, 'to_for_team', league_avg)).reset_index(level=0, drop=True)
    OffenseTeamData2['three_made_RollingAvg'] = OffenseTeamData2.groupby('Team_x').apply(lambda x: rolling_with_league_avg(x, 'three_made_for_team', league_avg)).reset_index(level=0, drop=True)
    OffenseTeamData2['three_missed_RollingAvg'] = OffenseTeamData2.groupby('Team_x').apply(lambda x: rolling_with_league_avg(x, 'three_missed_for_team', league_avg)).reset_index(level=0, drop=True)
    OffenseTeamData2['three_fga_RollingAvg'] = OffenseTeamData2.groupby('Team_x').apply(lambda x: rolling_with_league_avg(x, 'three_fga_for_team', league_avg)).reset_index(level=0, drop=True)
    OffenseTeamData2['two_made_RollingAvg'] = OffenseTeamData2.groupby('Team_x').apply(lambda x: rolling_with_league_avg(x, 'two_made_for_team', league_avg)).reset_index(level=0, drop=True)
    OffenseTeamData2['two_missed_RollingAvg'] = OffenseTeamData2.groupby('Team_x').apply(lambda x: rolling_with_league_avg(x, 'two_missed_for_team', league_avg)).reset_index(level=0, drop=True)
    OffenseTeamData2['two_fga_RollingAvg'] = OffenseTeamData2.groupby('Team_x').apply(lambda x: rolling_with_league_avg(x, 'two_fga_for_team', league_avg)).reset_index(level=0, drop=True)
    OffenseTeamData2['oreb_RollingAvg'] = OffenseTeamData2.groupby('Team_x').apply(lambda x: rolling_with_league_avg(x, 'oreb_for_team', league_avg)).reset_index(level=0, drop=True)
    OffenseTeamData2['ast_RollingAvg'] = OffenseTeamData2.groupby('Team_x').apply(lambda x: rolling_with_league_avg(x, 'ast_for_team', league_avg)).reset_index(level=0, drop=True)
    OffenseTeamData2['duration_RollingAvg'] = OffenseTeamData2.groupby('Team_x').apply(lambda x: rolling_with_league_avg(x, 'Duration', league_avg)).reset_index(level=0, drop=True)
    
    # Add in Opposing Team name to join in defense rolling averages
    firstteam = boxdata.groupby(by=['Season','Gamecode'])['Team'].max().reset_index()
    secondteam = boxdata.groupby(by=['Season','Gamecode'])['Team'].min().reset_index()
    
    bothteams = firstteam.merge(secondteam,left_on=['Season','Gamecode'],right_on=['Season','Gamecode'],how='left')
    bothteams['Team1'] = bothteams['Team_x']
    bothteams['Team2'] = bothteams['Team_y']
    bothteams['SeasonNEW'] = bothteams['Season']
    bothteams['GamecodeNEW'] = bothteams['Gamecode']
    bothteams.drop(columns=['Team_x','Team_y','Season','Gamecode'],inplace=True)
    
    OffenseTeamData3 = OffenseTeamData2.merge(bothteams, left_on=['Season', 'Gamecode'], right_on=['SeasonNEW', 'GamecodeNEW'], how='left')
    
    OffenseTeamData3['OpposingTeam'] = np.where(OffenseTeamData3['Team1'] == OffenseTeamData3['Team_x'], OffenseTeamData3['Team2'], OffenseTeamData3['Team1'])
    
    OffenseTeamData3 = OffenseTeamData3[['Season','Gamecode','Team_x','OpposingTeam',
                                         'fta_RollingAvg','ftm_RollingAvg','to_RollingAvg','three_made_RollingAvg',
                                        'three_missed_RollingAvg','three_fga_RollingAvg','two_made_RollingAvg','two_missed_RollingAvg',
                                        'two_fga_RollingAvg','oreb_RollingAvg','ast_RollingAvg','duration_RollingAvg']]
    
    
    
    # Find Team Defense Rolling averages for comparison to create Elo Ratings. If less than 5 games in the season, use the league averages
    DefenseTeamData['three_fga_against_team'] = DefenseTeamData['three_made_against_team'] + DefenseTeamData['three_missed_against_team']
    DefenseTeamData['two_fga_against_team'] = DefenseTeamData['two_made_against_team'] + DefenseTeamData['two_missed_against_team']
    
    DefenseTeamData2 = DefenseTeamData.groupby(by=['Season','Gamecode','Team_x'])[['fta_against_team','ftm_against_team',
                                                                                     'to_against_team','three_made_against_team','three_missed_against_team',
                                                                                     'three_fga_against_team', 'two_made_against_team','two_missed_against_team',
                                                                             'two_fga_against_team','oreb_against_team','ast_against_team','Duration',
                                                                                  'dreb_for_team','block_for_team','steal_for_team','foul_for_team']].mean().reset_index()
    
    DefenseTeamData2 = DefenseTeamData2.sort_values(by=['Season','Team_x','Gamecode'],ascending=[False,False,False])
    DefenseTeamData2['GameNumber'] = DefenseTeamData2.groupby(['Team_x', 'Season']).cumcount() + 1
    
    league_avg2 = DefenseTeamData2[DefenseTeamData2['Season'] == DefenseTeamData2['Season'].max()][['fta_against_team', 'ftm_against_team', 'to_against_team',
                                                                                                  'three_made_against_team',	'three_missed_against_team',
                                                                                                   'three_fga_against_team',	'two_made_against_team'	,
                                                                                                   'two_missed_against_team',	'two_fga_against_team'	,
                                                                                                   'oreb_against_team',	'ast_against_team'	,'Duration',
                                                                                                   'dreb_for_team','block_for_team','steal_for_team','foul_for_team']].mean()
    def rolling_with_league_avg2(group, stat, league_avg2):
        rolling_avg = group[stat].rolling(window=10, min_periods=1).mean()
        rolling_avg = rolling_avg.where(group['GameNumber'] > 7, other=league_avg2[stat])
        return rolling_avg
    
    DefenseTeamData2['fta_RollingAvg'] = DefenseTeamData2.groupby('Team_x').apply(lambda x: rolling_with_league_avg2(x, 'fta_against_team', league_avg2)).reset_index(level=0, drop=True)
    DefenseTeamData2['ftm_RollingAvg'] = DefenseTeamData2.groupby('Team_x').apply(lambda x: rolling_with_league_avg2(x, 'ftm_against_team', league_avg2)).reset_index(level=0, drop=True)
    DefenseTeamData2['to_RollingAvg'] = DefenseTeamData2.groupby('Team_x').apply(lambda x: rolling_with_league_avg2(x, 'to_against_team', league_avg2)).reset_index(level=0, drop=True)
    DefenseTeamData2['three_made_RollingAvg'] = DefenseTeamData2.groupby('Team_x').apply(lambda x: rolling_with_league_avg2(x, 'three_made_against_team', league_avg2)).reset_index(level=0, drop=True)
    DefenseTeamData2['three_missed_RollingAvg'] = DefenseTeamData2.groupby('Team_x').apply(lambda x: rolling_with_league_avg2(x, 'three_missed_against_team', league_avg2)).reset_index(level=0, drop=True)
    DefenseTeamData2['three_fga_RollingAvg'] = DefenseTeamData2.groupby('Team_x').apply(lambda x: rolling_with_league_avg2(x, 'three_fga_against_team', league_avg2)).reset_index(level=0, drop=True)
    DefenseTeamData2['two_made_RollingAvg'] = DefenseTeamData2.groupby('Team_x').apply(lambda x: rolling_with_league_avg2(x, 'two_made_against_team', league_avg2)).reset_index(level=0, drop=True)
    DefenseTeamData2['two_missed_RollingAvg'] = DefenseTeamData2.groupby('Team_x').apply(lambda x: rolling_with_league_avg2(x, 'two_missed_against_team', league_avg2)).reset_index(level=0, drop=True)
    DefenseTeamData2['two_fga_RollingAvg'] = DefenseTeamData2.groupby('Team_x').apply(lambda x: rolling_with_league_avg2(x, 'two_fga_against_team', league_avg2)).reset_index(level=0, drop=True)
    DefenseTeamData2['oreb_RollingAvg'] = DefenseTeamData2.groupby('Team_x').apply(lambda x: rolling_with_league_avg2(x, 'oreb_against_team', league_avg2)).reset_index(level=0, drop=True)
    DefenseTeamData2['ast_RollingAvg'] = DefenseTeamData2.groupby('Team_x').apply(lambda x: rolling_with_league_avg2(x, 'ast_against_team', league_avg2)).reset_index(level=0, drop=True)
    DefenseTeamData2['duration_RollingAvg'] = DefenseTeamData2.groupby('Team_x').apply(lambda x: rolling_with_league_avg2(x, 'Duration', league_avg2)).reset_index(level=0, drop=True)
    DefenseTeamData2['dreb_RollingAvg_NONOPPTEAM'] = DefenseTeamData2.groupby('Team_x').apply(lambda x: rolling_with_league_avg2(x, 'dreb_for_team', league_avg2)).reset_index(level=0, drop=True)
    DefenseTeamData2['block_RollingAvg_NONOPPTEAM'] = DefenseTeamData2.groupby('Team_x').apply(lambda x: rolling_with_league_avg2(x, 'block_for_team', league_avg2)).reset_index(level=0, drop=True)
    DefenseTeamData2['steal_RollingAvg_NONOPPTEAM'] = DefenseTeamData2.groupby('Team_x').apply(lambda x: rolling_with_league_avg2(x, 'steal_for_team', league_avg2)).reset_index(level=0, drop=True)
    DefenseTeamData2['foul_RollingAvg_NONOPPTEAM'] = DefenseTeamData2.groupby('Team_x').apply(lambda x: rolling_with_league_avg2(x, 'foul_for_team', league_avg2)).reset_index(level=0, drop=True)
    
    print("11")
    
    # Add in Opposing Team name to join in offense rolling averages
    firstteam = boxdata.groupby(by=['Season','Gamecode'])['Team'].max().reset_index()
    secondteam = boxdata.groupby(by=['Season','Gamecode'])['Team'].min().reset_index()
    
    bothteams = firstteam.merge(secondteam,left_on=['Season','Gamecode'],right_on=['Season','Gamecode'],how='left')
    bothteams['Team1'] = bothteams['Team_x']
    bothteams['Team2'] = bothteams['Team_y']
    bothteams['SeasonNEW'] = bothteams['Season']
    bothteams['GamecodeNEW'] = bothteams['Gamecode']
    bothteams.drop(columns=['Team_x','Team_y','Season','Gamecode'],inplace=True)
    
    DefenseTeamData3 = DefenseTeamData2.merge(bothteams, left_on=['Season', 'Gamecode'], right_on=['SeasonNEW', 'GamecodeNEW'], how='left')
    
    DefenseTeamData3['OpposingTeam'] = np.where(DefenseTeamData3['Team1'] == DefenseTeamData3['Team_x'], DefenseTeamData3['Team2'], DefenseTeamData3['Team1'])
    
    DefenseTeamData3 = DefenseTeamData3[['Season','Gamecode','Team_x','OpposingTeam','dreb_RollingAvg_NONOPPTEAM','block_RollingAvg_NONOPPTEAM'
                      ,'steal_RollingAvg_NONOPPTEAM','foul_RollingAvg_NONOPPTEAM',
                                         'fta_RollingAvg','ftm_RollingAvg','to_RollingAvg','three_made_RollingAvg',
                                        'three_missed_RollingAvg','three_fga_RollingAvg','two_made_RollingAvg','two_missed_RollingAvg',
                                        'two_fga_RollingAvg','oreb_RollingAvg','ast_RollingAvg','duration_RollingAvg']]
    
    DefenseTeamData3.rename(columns={'Team_x': 'Team'}, inplace=True)
    
    # Add in how each possession was started. 4 options: After a Score, After a deadlball, after turnover in play, or after defensive rebound
    homeaway = cleaned_data[['Season','Gamecode','HomeTeam','AwayTeam']].drop_duplicates()
    OffensePlayerData2 = OffensePlayerData.merge(homeaway, left_on=['Season','Gamecode'], right_on=['Season','Gamecode'],how='left')
    DefensePlayerData2 = DefensePlayerData.merge(homeaway, left_on=['Season','Gamecode'], right_on=['Season','Gamecode'],how='left')
    OffensePlayerData2['Home'] = np.where(OffensePlayerData2['HomeTeam'] == OffensePlayerData2['Team'],1,0)
    DefensePlayerData2['Home'] = np.where(DefensePlayerData2['HomeTeam_y'] == DefensePlayerData2['Team'],1,0)
    
    rp = cleaned_data.groupby(['Season', 'Gamecode', 'HomeTeam','HomePossession']).apply(lambda x: x.iloc[0][['PlayType']]).reset_index()
    rp.rename(columns={'HomePossession': 'Possession',
                      'HomeTeam':'Team'}, inplace=True)
    rp2 = cleaned_data.groupby(['Season', 'Gamecode','AwayTeam', 'AwayPossession']).apply(lambda x: x.iloc[0][['PlayType']]).reset_index()
    rp2.rename(columns={'AwayPossession': 'Possession',
                      'AwayTeam':'Team'}, inplace=True)
    resultpossessions = pd.concat([rp,rp2])
    resultpossessions['Possession'] = np.where(resultpossessions['Possession'] == "",10000,resultpossessions['Possession'])
    resultpossessions['Possession'] = resultpossessions['Possession'].astype(float)
    resultpossessions['SOP_AfterPoints'] = np.where(resultpossessions['PlayType'].isin(['2FGM','3FGM','AS','FTM']), 1, 0)
    resultpossessions['SOP_DeadBall'] = np.where(resultpossessions['PlayType'].isin(['TO','CM','BP','OF']), 1, 0)
    resultpossessions['SOP_AfterTurnover'] = np.where(resultpossessions['PlayType'].isin(['ST']), 1, 0)
    resultpossessions['SOP_DefensiveRebound'] = np.where(resultpossessions['PlayType'].isin(['D','2FGA','3FGA','FTA','FV']), 1, 0)
    OffensePlayerData2['OpposingTeam'] = np.where(OffensePlayerData2['Team'] == OffensePlayerData2['HomeTeam'],OffensePlayerData2['AwayTeam'],OffensePlayerData2['HomeTeam'])
    
    
    # Final Offense Player Dataset with Team Rolling Averages for Comparison and Start of Possession Data
    OffensePlayerDataNEW = OffensePlayerData2.merge(resultpossessions, left_on=['Season','Gamecode','Team','Possession'], 
                           right_on=['Season','Gamecode','Team','Possession'],how='left')
    
    
    DefensePlayerData1 = DefensePlayerData2.merge(bothteams,left_on=['Season','Gamecode'],right_on=['SeasonNEW','GamecodeNEW'],how='left')
    DefensePlayerData1['OpposingTeam'] = np.where(DefensePlayerData1['Team'] == DefensePlayerData1['HomeTeam_y'],DefensePlayerData1['AwayTeam_y'],DefensePlayerData1['HomeTeam_y'])
    resultpossessions['JoinTeamD'] = resultpossessions['Team']
    resultpossessions = resultpossessions.drop(columns='Team')
    
    
    # Final Defense Player Dataset with Team Rolling Averages for Comparison and Start of Possession Data
    DefensePlayerDataNEW = DefensePlayerData1.merge(resultpossessions, left_on=['Season','Gamecode','OpposingTeam','OppPossession'],
                            right_on=['Season','Gamecode','JoinTeamD','Possession'],how='left')
    
    
    # Add in Usage Rolling averages
    DefenseTeamData34 = DefenseTeamData3.rename(columns={'Team': 'Team1'})
    OffensePlayerDataNEW1 = OffensePlayerDataNEW.merge(DefenseTeamData34, left_on=['Season','Gamecode','Team_x'], right_on=['Season','Gamecode','OpposingTeam'],how='left')
    OffenseTeamData34 = OffenseTeamData3.rename(columns={'Team': 'Team1'})
    DefensePlayerDataNEW1 = DefensePlayerDataNEW.merge(OffenseTeamData34, left_on=['Season','Gamecode','Team_x'], right_on=['Season','Gamecode','OpposingTeam'],how='left')
    OffensePlayerDataNEW1['Usage_RollingAvg'] = .2
    DefensePlayerDataNEW1['Usage_RollingAvg'] = .2

    return OffensePlayerDataNEW1, DefensePlayerDataNEW1, homeusage_for, awayusage_for, homeusage_against, awayusage_against

In [5]:
OffensePlayerDataNEW1, DefensePlayerDataNEW1, homeusage_for, awayusage_for, homeusage_against, awayusage_against  = individual_player_breakdown(cleaned_data,boxscore)

5
7
11


In [31]:
OffensePlayerDataNEW1.head()

Unnamed: 0,Season,Gamecode,Phase_x,PlayerID,Possession,fta_for_team,ftm_for_team,to_for_team,three_made_for_team,three_missed_for_team,two_made_for_team,two_missed_for_team,oreb_for_team,ast_for_team,min_seconds_elapsed_for,max_seconds_elapsed_for,Duration,PlayerPossessionNumber,Player_ID,Player,Team_x,Phase_y,Team_y,fta_player,ftm_player,to_player,oreb_player,threefga_player,threefgm_player,twoefga_player,twoefgm_player,assist_player,UsageFactor,TeamUsage,UsagePercent,three_fga_for_team,two_fga_for_team,PlayerPossessionNumber2,Phase,duration_for_team,Team,HomeTeam,AwayTeam,Home,OpposingTeam_x,PlayType,SOP_AfterPoints,SOP_DeadBall,SOP_AfterTurnover,SOP_DefensiveRebound,Team1,OpposingTeam_y,dreb_RollingAvg_NONOPPTEAM,block_RollingAvg_NONOPPTEAM,steal_RollingAvg_NONOPPTEAM,foul_RollingAvg_NONOPPTEAM,fta_RollingAvg,ftm_RollingAvg,to_RollingAvg,three_made_RollingAvg,three_missed_RollingAvg,three_fga_RollingAvg,two_made_RollingAvg,two_missed_RollingAvg,two_fga_RollingAvg,oreb_RollingAvg,ast_RollingAvg,duration_RollingAvg,Usage_RollingAvg
0,2024,171,RS,PTGB,68.0,0,0,0,0,0,1,0,0,0,444,462,18,1,PTGB,"LLULL, SERGIO",MAD,RS,MAD,0,0,0,0,0,0,0,0,0,0.1,1.5,0.066667,0,1,1,RS,18,MAD,MAD,MUN,1,MUN,D,0,0,0,1,MUN,MAD,0.623377,0.031635,0.172577,0.244899,0.248889,0.192291,0.167288,0.124974,0.225821,0.350794,0.281563,0.223147,0.50471,0.145447,0.24924,17.348822,0.2
1,2024,171,RS,PTGB,67.0,0,0,0,0,0,0,1,0,0,409,436,27,2,PTGB,"LLULL, SERGIO",MAD,RS,MAD,0,0,0,0,0,0,0,0,0,0.1,1.5,0.066667,0,1,2,RS,27,MAD,MAD,MUN,1,MUN,3FGM,1,0,0,0,MUN,MAD,0.623377,0.031635,0.172577,0.244899,0.248889,0.192291,0.167288,0.124974,0.225821,0.350794,0.281563,0.223147,0.50471,0.145447,0.24924,17.348822,0.2
2,2024,171,RS,PTGB,66.0,0,0,0,0,0,1,0,0,0,374,399,25,3,PTGB,"LLULL, SERGIO",MAD,RS,MAD,0,0,0,0,0,0,0,0,0,0.1,1.5,0.066667,0,1,3,RS,25,MAD,MAD,MUN,1,MUN,2FGM,1,0,0,0,MUN,MAD,0.623377,0.031635,0.172577,0.244899,0.248889,0.192291,0.167288,0.124974,0.225821,0.350794,0.281563,0.223147,0.50471,0.145447,0.24924,17.348822,0.2
3,2024,171,RS,PTGB,65.0,2,0,0,0,0,0,0,0,0,359,360,1,4,PTGB,"LLULL, SERGIO",MAD,RS,MAD,0,0,0,0,0,0,0,0,0,0.1,1.48,0.067568,0,0,4,RS,1,MAD,MAD,MUN,1,MUN,D,0,0,0,1,MUN,MAD,0.623377,0.031635,0.172577,0.244899,0.248889,0.192291,0.167288,0.124974,0.225821,0.350794,0.281563,0.223147,0.50471,0.145447,0.24924,17.348822,0.2
4,2024,171,RS,PTGB,64.0,0,0,1,0,1,0,0,1,0,316,338,22,5,PTGB,"LLULL, SERGIO",MAD,RS,MAD,0,0,0,0,1,0,0,0,0,1.1,3.0,0.366667,1,0,5,RS,22,MAD,MAD,MUN,1,MUN,AS,1,0,0,0,MUN,MAD,0.623377,0.031635,0.172577,0.244899,0.248889,0.192291,0.167288,0.124974,0.225821,0.350794,0.281563,0.223147,0.50471,0.145447,0.24924,17.348822,0.2


In [6]:
def calculate_player_elo_ratings (OffensePlayerDataNEW1,DefensePlayerDataNEW1):

    """
    Calculates player level elo ratings using previously created player possession data.

    Returns player impact ratings for offense/defense eFG%, OReb/DReb, TO, and FTA Rate, as well as pace.
    
    """
    
    # default for k value
    n = 1.5
    
    # k values for each specific stat. the larger the k value, the mroe discrepency there is between very good and very bad at each stat
    k_values = {
        'two_made_for_team': .7,
        'two_missed_for_team': .7,
        'two_fga_for_team': .9,
        'three_made_for_team': .6,
        'three_missed_for_team': .6,
        'three_fga_for_team': .9,
        'fta_for_team': .6,
        'ftm_for_team': .4,
        'oreb_for_team': .6,        
        'to_for_team': .2,
        'Duration': .5,
        'UsagePercent': .5,
        'two_made_against_team': .5,
        'two_missed_against_team': .5,
        'two_fga_against_team': .7,
        'three_made_against_team': .4,
        'three_missed_against_team': .4,
        'three_fga_against_team': .7,
        'fta_against_team': .6,
        'ftm_against_team': .001,
        'oreb_against_team': .6,  
        'to_against_team': .2,
        'UsagePercent': .6}
    
    def elo_adjustment(current_elo, outcome, k):
        return current_elo + k * outcome
    
    initial_elo = 1500
    offense_stats_to_evaluate = [
        ('two_made_for_team', 'two_made_RollingAvg'),
        ('two_missed_for_team', 'two_missed_RollingAvg'),
        ('two_fga_for_team', 'two_fga_RollingAvg'),
        ('three_made_for_team', 'three_made_RollingAvg'),
        ('three_missed_for_team', 'three_missed_RollingAvg'),
        ('three_fga_for_team', 'three_fga_RollingAvg'),
        ('fta_for_team', 'fta_RollingAvg'),
        ('ftm_for_team', 'ftm_RollingAvg'),
        ('oreb_for_team', 'oreb_RollingAvg'),
        ('to_for_team', 'to_RollingAvg'),
        ('Duration', 'duration_RollingAvg'),
        ('UsagePercent', 'Usage_RollingAvg')]
    
    defense_stats_to_evaluate = [
        ('two_made_against_team', 'two_made_RollingAvg'),
        ('two_missed_against_team', 'two_missed_RollingAvg'),
        ('two_fga_against_team', 'two_fga_RollingAvg'),
        ('three_made_against_team', 'three_made_RollingAvg'),
        ('three_missed_against_team', 'three_missed_RollingAvg'),
        ('three_fga_against_team', 'three_fga_RollingAvg'),
        ('fta_against_team', 'fta_RollingAvg'),
        ('ftm_against_team', 'ftm_RollingAvg'),
        ('oreb_against_team', 'oreb_RollingAvg'),
        ('to_against_team', 'to_RollingAvg'),
        ('Duration', 'duration_RollingAvg'),
        ('UsagePercent', 'Usage_RollingAvg')]
    
    elo_offense_ratings_dict = {player_id: {stat[0]: initial_elo for stat in offense_stats_to_evaluate}
                                for player_id in OffensePlayerDataNEW1['PlayerID'].unique()}
    
    elo_defense_ratings_dict = {player_id: {stat[0]: initial_elo for stat in defense_stats_to_evaluate}
                                for player_id in DefensePlayerDataNEW1['PlayerID'].unique()}
    
    
    
    # Elo ratings for offense
    for _, row in OffensePlayerDataNEW1.iterrows():
        player_id = row['PlayerID']
        usage = row['UsagePercent']
        
        for stat, rolling_stat in offense_stats_to_evaluate:
            k = k_values.get(stat, .5)  
    
            if "Usage" not in stat:
                player_stat = row[stat] * usage
                rolling_stat_value = row[rolling_stat] * usage
            else:
                player_stat = row[stat]
                rolling_stat_value = row[rolling_stat]
    
            outcome = (player_stat - rolling_stat_value) / rolling_stat_value if rolling_stat_value else 0
    
            elo_offense_ratings_dict[player_id][stat] = elo_adjustment(elo_offense_ratings_dict[player_id][stat], outcome, k)
    
    # Elo ratings for defense 
    for _, row in DefensePlayerDataNEW1.iterrows():
        player_id = row['PlayerID']
        usage = row['UsagePercent']
        
        for stat, rolling_stat in defense_stats_to_evaluate:
            # Get the corresponding k value for the stat
            k = k_values.get(stat, 1)  # Default k value is 5 if not specified in the dictionary
    
            if "Usage" not in stat:
                player_stat = row[stat] * usage
                rolling_stat_value = row[rolling_stat] * usage
            else:
                player_stat = row[stat]
                rolling_stat_value = row[rolling_stat]
            
            # Calculate the outcome using the rolling stat value
            outcome = (player_stat - rolling_stat_value) / rolling_stat_value if rolling_stat_value else 0
            
            # Update the elo defense rating for the player
            elo_defense_ratings_dict[player_id][stat] = elo_adjustment(elo_defense_ratings_dict[player_id][stat], outcome, k)
    
    # Elo ratings into DataFrames and join for final ratings
    elo_offense_results = []
    for player_id, ratings in elo_offense_ratings_dict.items():
        result_row = {'PlayerID': player_id}
        result_row.update(ratings)
        elo_offense_results.append(result_row)
    
    elo_defense_results = []
    for player_id, ratings in elo_defense_ratings_dict.items():
        result_row = {'PlayerID': player_id}
        result_row.update(ratings)
        elo_defense_results.append(result_row)
    
    elo_offense_df = pd.DataFrame(elo_offense_results)
    elo_defense_df = pd.DataFrame(elo_defense_results)
    
    elo_offense_df.columns = [col + '_offense' if col != 'PlayerID' else col for col in elo_offense_df.columns]
    elo_defense_df.columns = [col + '_defense' if col != 'PlayerID' else col for col in elo_defense_df.columns]
    
    elo_combined_df = pd.merge(elo_offense_df, elo_defense_df, on='PlayerID')
    
    elo_combined_df = elo_combined_df.merge(OffensePlayerDataNEW1[['PlayerID', 'Player']].drop_duplicates(), 
                                            left_on='PlayerID', right_on='PlayerID', how='left')
    
    # Elo Ratings final dataset
    elo_combined_df.rename(columns={'two_made_for_team_offense': 'two_made_O',
                                    'two_missed_for_team_offense': 'two_miss_O',
                                    'two_fga_for_team_offense': 'two_attempt_O',
                                    'three_made_for_team_offense': 'three_made_O',
                                    'three_missed_for_team_offense': 'three_miss_O',
                                    'three_fga_for_team_offense': 'three_attempt_O',
                                    'fta_for_team_offense': 'fta_O',
                                    'ftm_for_team_offense': 'ftm_O',
                                    'oreb_for_team_offense': 'oreb_O',
                                    'to_for_team_offense': 'to_O',
                                    'Duration_offense': 'pace_O',
                                    'UsagePercent_offense': 'usage_O',
                                    'two_made_against_team_defense': 'two_made_D',
                                    'two_missed_against_team_defense': 'two_miss_D',
                                    'two_fga_against_team_defense': 'two_attempt_D',
                                    'three_made_against_team_defense': 'three_made_D',
                                    'three_missed_against_team_defense': 'three_miss_D',
                                    'three_fga_against_team_defense': 'three_attempt_D',
                                    'fta_against_team_defense': 'fta_D',
                                    'ftm_against_team_defense': 'ftm_D',
                                    'oreb_against_team_defense': 'oreb_D',
                                    'to_against_team_defense': 'to_D',
                                    'Duration_defense': 'pace_D',
                                    'UsagePercent_defense': 'usage_D'}, inplace=True)
    
    return elo_combined_df

In [7]:
elo_combined_df = calculate_player_elo_ratings (OffensePlayerDataNEW1,DefensePlayerDataNEW1)

In [32]:
elo_combined_df.head()

Unnamed: 0,PlayerID,two_made_O,two_miss_O,two_attempt_O,three_made_O,three_miss_O,three_attempt_O,fta_O,ftm_O,oreb_O,to_O,pace_O,usage_O,two_made_D,two_miss_D,two_attempt_D,three_made_D,three_miss_D,three_attempt_D,fta_D,ftm_D,oreb_D,to_D,pace_D,usage_D,Player
0,PTGB,1449.142791,1394.54772,1401.423197,1531.75656,1638.393345,1647.622663,1415.38536,1440.142896,1449.514957,1500.973356,1494.628434,1516.171159,1487.454319,1512.419351,1496.872403,1509.28835,1503.057764,1508.790618,1498.298658,1500.021266,1473.002569,1484.861296,1493.493008,1478.57118,"LLULL, SERGIO"
1,PLUO,1495.961872,1500.791792,1498.246321,1505.182142,1504.484984,1506.822417,1487.997576,1490.348586,1509.687901,1502.892141,1498.478017,1483.412046,1490.010444,1493.839461,1487.855458,1500.904724,1505.792248,1507.205116,1492.127987,1499.989145,1483.225281,1499.53763,1499.722144,1500.85104,"LAZIC, BRANKO"
2,PLMG,1494.265593,1519.432713,1505.31538,1480.925045,1488.922358,1478.933626,1496.402711,1502.535044,1487.541744,1501.942694,1500.000387,1504.892465,1507.820873,1507.441781,1510.708533,1501.927659,1494.536592,1495.420565,1453.681565,1499.919464,1475.809375,1494.835032,1500.222527,1502.690334,"MARJANOVIC, BOBAN"
3,PLCZ,1583.182987,1565.955738,1596.085243,1480.529866,1431.100946,1427.667474,1532.803563,1493.75409,1470.736732,1458.362989,1505.32001,1463.164906,1512.502294,1504.986694,1512.986812,1497.638757,1469.289103,1465.400955,1471.594412,1499.963974,1492.900472,1498.713609,1504.851091,1491.519525,"MOTIEJUNAS, DONATAS"
4,PKIR,1479.204222,1510.891973,1490.878775,1485.953837,1486.032145,1477.237897,1527.017753,1514.555698,1503.990123,1507.936713,1498.611976,1459.903963,1489.194078,1510.655592,1496.739914,1494.943608,1501.753234,1501.920747,1533.657765,1500.058348,1501.535971,1499.327696,1501.076579,1489.346196,"KAHUDI, CHARLES"


In [8]:
def home_court_advantage(OffensePlayerDataNEW1, DefensePlayerDataNEW1):

    """
    Calculates home court advantage:
        - Compilies cumulative team outcomes when teams are playing at home vs playing away on both offense and defense.
        - Uses the outcomes to create a transition matrix for the home teams, away teams, and combined.
            - Assess the impact of all major stat categories (i.e shooting percentages, offensive rebounding, fouls, etc)

    Returns 4 transition matrices which are the percentage differences at each element of the transition matrix
        - Impact of home offense/defense , away offense/defense.
        - To be used alongside of team transition matrices used later.
    
    """
    
    # Home Court Advantage. group datasets by Home, Away and Nuetral, then create a transtion matrix for each to find the differences
    HomeOTeam = OffensePlayerDataNEW1[(OffensePlayerDataNEW1['Home'] == 1) & (OffensePlayerDataNEW1['Phase_x'] == 'RS')
    ].groupby(by=['Season', 'Gamecode', 'Team', 'Possession'])[
        ['fta_for_team', 'ftm_for_team', 'to_for_team', 'three_made_for_team', 'three_missed_for_team', 
         'two_made_for_team', 'two_missed_for_team', 'two_fga_for_team', 'three_fga_for_team', 
         'oreb_for_team', 'ast_for_team']].mean().reset_index()
    
    OTeam = OffensePlayerDataNEW1[OffensePlayerDataNEW1['Home'].isin([0, 1]) & (OffensePlayerDataNEW1['Phase_x'] == 'RS')
    ].groupby(by=['Team', 'Season', 'Gamecode', 'Possession'])[
        ['fta_for_team', 'ftm_for_team', 'to_for_team', 'three_made_for_team', 'three_missed_for_team', 
         'two_made_for_team', 'two_missed_for_team', 'two_fga_for_team', 'three_fga_for_team', 
         'oreb_for_team', 'ast_for_team']].mean().reset_index()
    
    AwayOTeam = OffensePlayerDataNEW1[(OffensePlayerDataNEW1['Home'] == 0) & (OffensePlayerDataNEW1['Phase_x'] == 'RS')
    ].groupby(by=['Season', 'Gamecode', 'Team', 'Possession'])[
        ['fta_for_team', 'ftm_for_team', 'to_for_team', 'three_made_for_team', 'three_missed_for_team', 
         'two_made_for_team', 'two_missed_for_team', 'two_fga_for_team', 'three_fga_for_team', 
         'oreb_for_team', 'ast_for_team']].mean().reset_index()
    
    DTeam = DefensePlayerDataNEW1[DefensePlayerDataNEW1['Home'].isin([0, 1]) & (DefensePlayerDataNEW1['Phase_x'] == 'RS')
    ].groupby(by=['Team', 'Season', 'Gamecode', 'OppPossession'])[
        ['fta_against_team', 'ftm_against_team', 'to_against_team', 'three_made_against_team', 
         'three_missed_against_team', 'two_made_against_team', 'two_missed_against_team', 
         'two_fga_against_team', 'three_fga_against_team', 'oreb_against_team', 'ast_against_team']].mean().reset_index()
    
    AwayDTeam = DefensePlayerDataNEW1[(DefensePlayerDataNEW1['Home'] == 0) & (DefensePlayerDataNEW1['Phase_x'] == 'RS')
    ].groupby(by=['Team', 'Season', 'Gamecode', 'OppPossession'])[
        ['fta_against_team', 'ftm_against_team', 'to_against_team', 'three_made_against_team', 
         'three_missed_against_team', 'two_made_against_team', 'two_missed_against_team', 
         'two_fga_against_team', 'three_fga_against_team', 'oreb_against_team', 'ast_against_team']].mean().reset_index()
    
    HomeDTeam = DefensePlayerDataNEW1[(DefensePlayerDataNEW1['Home'] == 1) & (DefensePlayerDataNEW1['Phase_x'] == 'RS')
    ].groupby(by=['Team', 'Season', 'Gamecode', 'OppPossession'])[
        ['fta_against_team', 'ftm_against_team', 'to_against_team', 'three_made_against_team', 
         'three_missed_against_team', 'two_made_against_team', 'two_missed_against_team', 
         'two_fga_against_team', 'three_fga_against_team', 'oreb_against_team', 'ast_against_team']].mean().reset_index()

    # Calcuates Transition Matrix for team offense
    def calculate_transition_matrix_team_offense(team_data, possessions_to_sim):
    
        team_data['TripToFTline_outcome'] = team_data['fta_for_team']
        team_data['ThreePointAttempt_outcome'] = team_data['three_fga_for_team']
        team_data['TwoPointAttempt_outcome'] = team_data['two_fga_for_team']
        team_data['Turnover_outcome'] = team_data['to_for_team']
        team_data['ThreePointMiss'] = team_data['three_missed_for_team']
        team_data['ThreePointMake'] = team_data['three_made_for_team']
        team_data['TwoPointMiss'] = team_data['two_missed_for_team']
        team_data['TwoPointMake'] = team_data['two_made_for_team']
        team_data['FTMiss'] = team_data['fta_for_team'] - team_data['ftm_for_team']
        team_data['FTMake'] = team_data['ftm_for_team']
    
        team_data['2ptOreb'] = np.where((team_data['oreb_for_team'] > 0) & (team_data['two_missed_for_team'] > 0), 
                                        np.minimum(team_data['oreb_for_team'], team_data['two_missed_for_team']), 0)
        team_data['2ptNotOreb'] = np.where((team_data['oreb_for_team'] == 0) & (team_data['two_missed_for_team'] > 0), 
                                           team_data['two_missed_for_team'], 0)
        team_data['3ptOreb'] = np.where((team_data['oreb_for_team'] > 0) & (team_data['three_missed_for_team'] > 0), 
                                        np.minimum(team_data['oreb_for_team'], team_data['three_missed_for_team']), 0)
        team_data['3ptNotOreb'] = np.where((team_data['oreb_for_team'] == 0) & (team_data['three_missed_for_team'] > 0), 
                                           team_data['three_missed_for_team'], 0)
    
        ft_missed = team_data['fta_for_team'] - team_data['ftm_for_team']
        team_data['FTOreb'] = np.where((team_data['oreb_for_team'] > 0) & (ft_missed > 0), 1, 0)
        team_data['FTNotOreb'] = 1 - team_data['FTOreb']
    
        states = ['Initial Possession', '3pt Attempt', '3pt Make', '3pt Miss', '2pt Attempt', '2pt Make', '2pt Miss', 
                  'Trip to FT Line', 'FT Attempt 1', 'FT Attempt 2', 'FT Make 1', 'FT Miss 1', 'FT Make 2', 'FT Miss 2', 'Turnover',
                  '2pt Oreb', '3pt Oreb', 'FT Oreb', '2pt NonOreb', '3pt NonOreb', 'FT NonOreb']
    
        transition_matrix = pd.DataFrame(0, index=states, columns=states)
    
        transition_matrix.loc['Initial Possession', 'Trip to FT Line'] = team_data['TripToFTline_outcome'].sum()
        transition_matrix.loc['Initial Possession', '3pt Attempt'] = team_data['ThreePointAttempt_outcome'].sum()
        transition_matrix.loc['Initial Possession', '2pt Attempt'] = team_data['TwoPointAttempt_outcome'].sum()
        transition_matrix.loc['Initial Possession', 'Turnover'] = team_data['Turnover_outcome'].sum()
    
        transition_matrix.loc['2pt Attempt', '2pt Make'] = team_data['TwoPointMake'].sum()
        transition_matrix.loc['2pt Attempt', '2pt Miss'] = team_data['TwoPointMiss'].sum()
    
        transition_matrix.loc['3pt Attempt', '3pt Make'] = team_data['ThreePointMake'].sum()
        transition_matrix.loc['3pt Attempt', '3pt Miss'] = team_data['ThreePointMiss'].sum()
    
        transition_matrix.loc['2pt Miss', '2pt Oreb'] = team_data['2ptOreb'].sum()
        transition_matrix.loc['2pt Miss', '2pt NonOreb'] = team_data['2ptNotOreb'].sum()
    
        transition_matrix.loc['3pt Miss', '3pt Oreb'] = team_data['3ptOreb'].sum()
        transition_matrix.loc['3pt Miss', '3pt NonOreb'] = team_data['3ptNotOreb'].sum()
    
        transition_matrix.loc['Trip to FT Line', 'FT Attempt 1'] = team_data['TripToFTline_outcome'].sum()
        transition_matrix.loc['FT Attempt 1', 'FT Make 1'] = team_data['FTMake'].sum()
        transition_matrix.loc['FT Attempt 1', 'FT Miss 1'] = team_data['FTMiss'].sum()
        transition_matrix.loc['FT Make 1', 'FT Attempt 2'] = team_data['FTMake'].sum()
        transition_matrix.loc['FT Miss 1', 'FT Attempt 2'] = team_data['FTMiss'].sum()
        transition_matrix.loc['FT Attempt 2', 'FT Make 2'] = team_data['FTMake'].sum()
        transition_matrix.loc['FT Attempt 2', 'FT Miss 2'] = team_data['FTMiss'].sum()
    
        transition_matrix.loc['FT Miss 2', 'FT Oreb'] = team_data['FTOreb'].sum()
        transition_matrix.loc['FT Miss 2', 'FT NonOreb'] = team_data['FTNotOreb'].sum()
    
        transition_matrix.loc['2pt Oreb', 'Initial Possession'] = team_data['2ptOreb'].sum()
        transition_matrix.loc['3pt Oreb', 'Initial Possession'] = team_data['3ptOreb'].sum()
        transition_matrix.loc['FT Oreb', 'Initial Possession'] = team_data['FTOreb'].sum()
    
        transition_matrix = transition_matrix.fillna(0)
    
        return transition_matrix
    
    def calculate_transition_matrix_team_defense(team_data, possessions_to_sim):
        team_data['TripToFTline_outcome'] = team_data['fta_against_team']
        team_data['ThreePointAttempt_outcome'] = team_data['three_fga_against_team']
        team_data['TwoPointAttempt_outcome'] = team_data['two_fga_against_team']
        team_data['Turnover_outcome'] = team_data['to_against_team']
        team_data['ThreePointMiss'] = team_data['three_missed_against_team']
        team_data['ThreePointMake'] = team_data['three_made_against_team']
        team_data['TwoPointMiss'] = team_data['two_missed_against_team']
        team_data['TwoPointMake'] = team_data['two_made_against_team']
        team_data['FTMiss'] = team_data['fta_against_team'] - team_data['ftm_against_team']
        team_data['FTMake'] = team_data['ftm_against_team']
    
        team_data['2ptOreb'] = np.where((team_data['oreb_against_team'] > 0) & (team_data['two_missed_against_team'] > 0), 
                                        np.minimum(team_data['oreb_against_team'], team_data['two_missed_against_team']), 0)
        team_data['2ptNotOreb'] = np.where((team_data['oreb_against_team'] == 0) & (team_data['two_missed_against_team'] > 0), 
                                           team_data['two_missed_against_team'], 0)
        team_data['3ptOreb'] = np.where((team_data['oreb_against_team'] > 0) & (team_data['three_missed_against_team'] > 0), 
                                        np.minimum(team_data['oreb_against_team'], team_data['three_missed_against_team']), 0)
        team_data['3ptNotOreb'] = np.where((team_data['oreb_against_team'] == 0) & (team_data['three_missed_against_team'] > 0), 
                                           team_data['three_missed_against_team'], 0)
    
        ft_missed = team_data['fta_against_team'] - team_data['ftm_against_team']
        team_data['FTOreb'] = np.where((team_data['oreb_against_team'] > 0) & (ft_missed > 0), 1, 0)
        team_data['FTNotOreb'] = 1 - team_data['FTOreb']
    
        states = ['Initial Possession', '3pt Attempt', '3pt Make', '3pt Miss', '2pt Attempt', '2pt Make', '2pt Miss', 
                  'Trip to FT Line', 'FT Attempt 1', 'FT Attempt 2', 'FT Make 1', 'FT Miss 1', 'FT Make 2', 'FT Miss 2', 'Turnover',
                  '2pt Oreb', '3pt Oreb', 'FT Oreb', '2pt NonOreb', '3pt NonOreb', 'FT NonOreb']
    
        transition_matrix = pd.DataFrame(0, index=states, columns=states)
    
        transition_matrix.loc['Initial Possession', 'Trip to FT Line'] = team_data['TripToFTline_outcome'].sum()
        transition_matrix.loc['Initial Possession', '3pt Attempt'] = team_data['ThreePointAttempt_outcome'].sum()
        transition_matrix.loc['Initial Possession', '2pt Attempt'] = team_data['TwoPointAttempt_outcome'].sum()
        transition_matrix.loc['Initial Possession', 'Turnover'] = team_data['Turnover_outcome'].sum()
    
        transition_matrix.loc['2pt Attempt', '2pt Make'] = team_data['TwoPointMake'].sum()
        transition_matrix.loc['2pt Attempt', '2pt Miss'] = team_data['TwoPointMiss'].sum()
    
        transition_matrix.loc['3pt Attempt', '3pt Make'] = team_data['ThreePointMake'].sum()
        transition_matrix.loc['3pt Attempt', '3pt Miss'] = team_data['ThreePointMiss'].sum()
    
        transition_matrix.loc['2pt Miss', '2pt Oreb'] = team_data['2ptOreb'].sum()
        transition_matrix.loc['2pt Miss', '2pt NonOreb'] = team_data['2ptNotOreb'].sum()
    
        transition_matrix.loc['3pt Miss', '3pt Oreb'] = team_data['3ptOreb'].sum()
        transition_matrix.loc['3pt Miss', '3pt NonOreb'] = team_data['3ptNotOreb'].sum()
    
        transition_matrix.loc['Trip to FT Line', 'FT Attempt 1'] = team_data['TripToFTline_outcome'].sum()
        transition_matrix.loc['FT Attempt 1', 'FT Make 1'] = team_data['FTMake'].sum()
        transition_matrix.loc['FT Attempt 1', 'FT Miss 1'] = team_data['FTMiss'].sum()
        transition_matrix.loc['FT Make 1', 'FT Attempt 2'] = team_data['FTMake'].sum()
        transition_matrix.loc['FT Miss 1', 'FT Attempt 2'] = team_data['FTMiss'].sum()
        transition_matrix.loc['FT Attempt 2', 'FT Make 2'] = team_data['FTMake'].sum()
        transition_matrix.loc['FT Attempt 2', 'FT Miss 2'] = team_data['FTMiss'].sum()
    
        transition_matrix.loc['FT Miss 2', 'FT Oreb'] = team_data['FTOreb'].sum()
        transition_matrix.loc['FT Miss 2', 'FT NonOreb'] = team_data['FTNotOreb'].sum()
    
        transition_matrix.loc['2pt Oreb', 'Initial Possession'] = team_data['2ptOreb'].sum()
        transition_matrix.loc['3pt Oreb', 'Initial Possession'] = team_data['3ptOreb'].sum()
        transition_matrix.loc['FT Oreb', 'Initial Possession'] = team_data['FTOreb'].sum()
    
        transition_matrix = transition_matrix.fillna(0)
    
        return transition_matrix
    
    # Calcuate percentage differences amongst home, nuetral and away, nuetral
    def calculate_percentage_difference(home_matrix, away_matrix):
        epsilon = 1e-9
        row_sums = home_matrix.sum(axis=1)
        
        home_matrix = home_matrix.div(row_sums, axis=0)
    
        row_sums2 = away_matrix.sum(axis=1)
        
        away_matrix = away_matrix.div(row_sums2, axis=0)
        
        percentage_difference_matrix = (home_matrix - away_matrix) / (away_matrix + epsilon)
        return percentage_difference_matrix.fillna(0) 
    
    # helper function to add in a column i have in the proper version of the transtion matrix
    def add_end_possession(transition_matrix):
        transition_matrix.loc['End Possession'] = 0
    
        transition_matrix['End Possession'] = 0
    
        
        return transition_matrix
    
    homeO_matrix = calculate_transition_matrix_team_offense(HomeOTeam, possessions_to_sim=20000)
    neutralOmatrix = calculate_transition_matrix_team_offense(OTeam, possessions_to_sim=20000)
    awayO_matrix = calculate_transition_matrix_team_offense(AwayOTeam, possessions_to_sim=20000)
    
    homeD_matrix = calculate_transition_matrix_team_defense(HomeDTeam, possessions_to_sim=20000)
    nuetralDmatrix = calculate_transition_matrix_team_defense(DTeam, possessions_to_sim=20000)
    awayD_matrix = calculate_transition_matrix_team_defense(AwayDTeam, possessions_to_sim=20000)
    
    # Differences betweeen Home and Nuetral and Away and Nuetral
    homeODiff = add_end_possession(calculate_percentage_difference(homeO_matrix, neutralOmatrix))
    homeDDiff = add_end_possession(calculate_percentage_difference(homeD_matrix, nuetralDmatrix))
    awayDDiff = add_end_possession(calculate_percentage_difference(awayD_matrix, nuetralDmatrix))
    awayODiff = add_end_possession(calculate_percentage_difference(awayO_matrix, neutralOmatrix))

    return homeODiff, homeDDiff, awayODiff, awayDDiff

In [9]:
homeODiff, homeDDiff, awayODiff, awayDDiff = home_court_advantage(OffensePlayerDataNEW1, DefensePlayerDataNEW1)

In [34]:
homeODiff.head()

Unnamed: 0,Initial Possession,3pt Attempt,3pt Make,3pt Miss,2pt Attempt,2pt Make,2pt Miss,Trip to FT Line,FT Attempt 1,FT Attempt 2,FT Make 1,FT Miss 1,FT Make 2,FT Miss 2,Turnover,2pt Oreb,3pt Oreb,FT Oreb,2pt NonOreb,3pt NonOreb,FT NonOreb,End Possession
Initial Possession,0.0,-0.002186,0.0,0.0,0.00411,0.0,0.0,0.005891,0.0,0.0,0.0,0.0,0.0,0.0,-0.016454,0.0,0.0,0.0,0.0,0.0,0.0,0
3pt Attempt,0.0,0.0,0.035468,-0.019656,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
3pt Make,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
3pt Miss,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.000946,0.0,0.0,0.000475,0.0,0
2pt Attempt,0.0,0.0,0.0,0.0,0.0,0.003007,-0.003793,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0


In [10]:
# Function which uses Elo ratings to create scaled probabilities
def calculate_elo_probability(rating, target_prob, target_rating=1500, scale_factor=1000):
    base_rating = target_rating + scale_factor * math.log10((1 / target_prob) - 1)
    exponent = (rating - base_rating) / scale_factor
    return 1 / (1 + math.pow(10, -exponent))

    
# Function which uses Elo ratings to create pace target number
def calculate_scaled_pace(pace_O, pace_D, target_value=70.1, reference_rating=1500):
    combined_pace = (pace_O + pace_D) / 2
    scaled_number = target_value * (reference_rating / combined_pace)
    return scaled_number


# Create offense Transtion matrix for 2 teams 
def calculate_transition_matrix_offense(elo_ratings_df, team1, team2, calcs):
    states = [
        'Initial Possession', '3pt Attempt', '3pt Make', '3pt Miss',
        '2pt Attempt', '2pt Make', '2pt Miss',
        'Trip to FT Line', 'FT Attempt 1', 'FT Attempt 2',
        'FT Make 1', 'FT Miss 1', 'FT Make 2', 'FT Miss 2',
        'Turnover', '2pt Oreb', '3pt Oreb', 'FT Oreb',
        '2pt NonOreb', '3pt NonOreb', 'FT NonOreb', 'End Possession']
    
    def handle_transition(current_state, next_state, transition_matrix):
        transition_matrix.loc[current_state, next_state] += 1
        return next_state

    def generate_team_matrix(team_df, calcs, total_possessions):
        transition_matrix = pd.DataFrame(0, index=states, columns=states)
    
        team_df['normalized_usage'] = team_df['usage_O'] / team_df['usage_O'].sum()

        team_ratings = {
            'to_O': (team_df['to_O'] * team_df['normalized_usage']).sum(),
            'fta_O': (team_df['fta_O'] * team_df['normalized_usage']).sum(),
            'two_attempt_O': (team_df['two_attempt_O'] * team_df['normalized_usage']).sum(),
            'three_attempt_O': (team_df['three_attempt_O'] * team_df['normalized_usage']).sum(),
            'three_made_O': (team_df['three_made_O'] * team_df['normalized_usage']).sum(),
            'two_made_O': (team_df['two_made_O'] * team_df['normalized_usage']).sum(),
            'ftm_O': (team_df['ftm_O'] * team_df['normalized_usage']).sum(),
            'oreb_O': (team_df['oreb_O'] * team_df['normalized_usage']).sum()}
    
        turnover_prob = calculate_elo_probability(team_ratings['to_O'], target_prob=0.12)
        ft_attempt_prob = calculate_elo_probability(team_ratings['fta_O'], target_prob=0.09)
        two_point_attempt_prob = calculate_elo_probability(team_ratings['two_attempt_O'], target_prob=0.41)
        three_point_attempt_prob = calculate_elo_probability(team_ratings['three_attempt_O'], target_prob=0.28)
    
        three_made_prob = calculate_elo_probability(team_ratings['three_made_O'], target_prob=0.37)
        two_made_prob = calculate_elo_probability(team_ratings['two_made_O'], target_prob=0.55)
        ft_made_prob = calculate_elo_probability(team_ratings['ftm_O'], target_prob=0.78)

        ft_oreb_prob = calculate_elo_probability(team_ratings['oreb_O'], target_prob=0.175)
        two_pt_oreb_prob = calculate_elo_probability(team_ratings['oreb_O'], target_prob=0.327)
        three_pt_oreb_prob = calculate_elo_probability(team_ratings['oreb_O'], target_prob=0.300)

        state_probs = {
            'Turnover': turnover_prob,
            'Trip to FT Line': ft_attempt_prob,
            '3pt Attempt': three_point_attempt_prob,
            '2pt Attempt': two_point_attempt_prob}

        total_prob = sum(state_probs.values())
        state_probs = {k: v / total_prob for k, v in state_probs.items()}

        for _ in range(calcs):
            current_state = 'Initial Possession'
            while current_state != 'End Possession':
                if current_state == 'Initial Possession':
                    next_state = np.random.choice(list(state_probs.keys()), p=list(state_probs.values()))
                    current_state = handle_transition(current_state, next_state, transition_matrix)
    
                elif current_state == '2pt Attempt':
                    outcome = np.random.choice(['2pt Make', '2pt Miss'], p=[two_made_prob, 1 - two_made_prob])
                    current_state = handle_transition(current_state, outcome, transition_matrix)
    
                    if outcome == '2pt Make':
                        current_state = handle_transition(current_state, 'End Possession', transition_matrix)
                    else:
                        if np.random.random() < two_pt_oreb_prob:
                            current_state = handle_transition(current_state, '2pt Oreb', transition_matrix)
                            current_state = handle_transition(current_state, 'Initial Possession', transition_matrix)
                        else:
                            current_state = handle_transition(current_state, '2pt NonOreb', transition_matrix)
                            current_state = handle_transition(current_state, 'End Possession', transition_matrix)
    
                elif current_state == '3pt Attempt':
                    outcome = np.random.choice(['3pt Make', '3pt Miss'], p=[three_made_prob, 1 - three_made_prob])
                    current_state = handle_transition(current_state, outcome, transition_matrix)
    
                    if outcome == '3pt Make':
                        current_state = handle_transition(current_state, 'End Possession', transition_matrix)
                    else:
                        if np.random.random() < three_pt_oreb_prob:
                            current_state = handle_transition(current_state, '3pt Oreb', transition_matrix)
                            current_state = handle_transition(current_state, 'Initial Possession', transition_matrix)
                        else:
                            current_state = handle_transition(current_state, '3pt NonOreb', transition_matrix)
                            current_state = handle_transition(current_state, 'End Possession', transition_matrix)
    
                elif current_state == 'Trip to FT Line':
                    current_state = handle_transition(current_state, 'FT Attempt 1', transition_matrix)
    
                    for attempt in ['FT Attempt 1', 'FT Attempt 2']:
                        outcome = np.random.choice([f'FT Make {attempt[-1]}', f'FT Miss {attempt[-1]}'],
                                                   p=[ft_made_prob, 1 - ft_made_prob])
                        current_state = handle_transition(current_state, outcome, transition_matrix)
    
                        if attempt == 'FT Attempt 2':
                            if outcome == 'FT Miss 2':
                                if np.random.random() < ft_oreb_prob:
                                    current_state = handle_transition(current_state, 'FT Oreb', transition_matrix)
                                    current_state = handle_transition(current_state, 'Initial Possession', transition_matrix)
                                else:
                                    current_state = handle_transition(current_state, 'FT NonOreb', transition_matrix)
                                    current_state = handle_transition(current_state, 'End Possession', transition_matrix)
                            else:
                                current_state = handle_transition(current_state, 'End Possession', transition_matrix)
                        else:
                            current_state = handle_transition(current_state, 'FT Attempt 2', transition_matrix)
    
                elif current_state == 'Turnover':
                    current_state = handle_transition(current_state, 'End Possession', transition_matrix)
    
        return transition_matrix

    team1_df = elo_ratings_df[elo_ratings_df['Team'] == team1]
    team2_df = elo_ratings_df[elo_ratings_df['Team'] == team2]

    pace_O_team1 = team1_df['pace_O'].mean()
    pace_D_team1 = team1_df['pace_D'].mean()
    pace_O_team2 = team2_df['pace_O'].mean()
    pace_D_team2 = team2_df['pace_D'].mean()

    scaled_pace_team = (calculate_scaled_pace(pace_O_team1, pace_D_team1) + calculate_scaled_pace(pace_O_team2, pace_D_team2)) / 2

    total_possessions_team = 5 * scaled_pace_team

    team1_matrix = generate_team_matrix(team1_df, calcs, total_possessions_team)
    team2_matrix = generate_team_matrix(team2_df, calcs, total_possessions_team)

    return team1_matrix, team2_matrix


# Create transition matrix for 2 teams defense
def calculate_transition_matrix_defense(elo_ratings_df, team1, team2, calcs):
    states = [
        'Initial Possession', '3pt Attempt', '3pt Make', '3pt Miss',
        '2pt Attempt', '2pt Make', '2pt Miss',
        'Trip to FT Line', 'FT Attempt 1', 'FT Attempt 2',
        'FT Make 1', 'FT Miss 1', 'FT Make 2', 'FT Miss 2',
        'Turnover', '2pt Oreb', '3pt Oreb', 'FT Oreb',
        '2pt NonOreb', '3pt NonOreb', 'FT NonOreb', 'End Possession']
    
    def handle_transition(current_state, next_state, transition_matrix):
        transition_matrix.loc[current_state, next_state] += 1
        return next_state

    def generate_team_matrix(team_df, calcs, total_possessions):
        transition_matrix = pd.DataFrame(0, index=states, columns=states)

        team_df['normalized_usage'] = team_df['usage_D'] / team_df['usage_D'].sum()

        team_ratings = {
            'to_D': (team_df['to_D'] * team_df['normalized_usage']).sum(),
            'fta_D': (team_df['fta_D'] * team_df['normalized_usage']).sum(),
            'two_attempt_D': (team_df['two_attempt_D'] * team_df['normalized_usage']).sum(),
            'three_attempt_D': (team_df['three_attempt_D'] * team_df['normalized_usage']).sum(),
            'three_made_D': (team_df['three_made_D'] * team_df['normalized_usage']).sum(),
            'two_made_D': (team_df['two_made_D'] * team_df['normalized_usage']).sum(),
            'ftm_D': (team_df['ftm_D'] * team_df['normalized_usage']).sum(),
            'oreb_D': (team_df['oreb_D'] * team_df['normalized_usage']).sum()}

        turnover_prob = calculate_elo_probability(team_ratings['to_D'], target_prob=0.12)
        ft_attempt_prob = calculate_elo_probability(team_ratings['fta_D'], target_prob=0.09)
        two_point_attempt_prob = calculate_elo_probability(team_ratings['two_attempt_D'], target_prob=0.41)
        three_point_attempt_prob = calculate_elo_probability(team_ratings['three_attempt_D'], target_prob=0.28)

        three_made_prob = calculate_elo_probability(team_ratings['three_made_D'], target_prob=0.37)
        two_made_prob = calculate_elo_probability(team_ratings['two_made_D'], target_prob=0.55)
        ft_made_prob = calculate_elo_probability(team_ratings['ftm_D'], target_prob=0.78)

        ft_oreb_prob = calculate_elo_probability(team_ratings['oreb_D'], target_prob=0.175)
        two_pt_oreb_prob = calculate_elo_probability(team_ratings['oreb_D'], target_prob=0.327)
        three_pt_oreb_prob = calculate_elo_probability(team_ratings['oreb_D'], target_prob=0.300)

        state_probs = {
            'Turnover': turnover_prob,
            'Trip to FT Line': ft_attempt_prob,
            '3pt Attempt': three_point_attempt_prob,
            '2pt Attempt': two_point_attempt_prob
        }

        total_prob = sum(state_probs.values())
        state_probs = {k: v / total_prob for k, v in state_probs.items()}

        for _ in range(calcs):
            current_state = 'Initial Possession'
            while current_state != 'End Possession':
                if current_state == 'Initial Possession':
                    next_state = np.random.choice(list(state_probs.keys()), p=list(state_probs.values()))
                    current_state = handle_transition(current_state, next_state, transition_matrix)
    
                elif current_state == '2pt Attempt':
                    outcome = np.random.choice(['2pt Make', '2pt Miss'], p=[two_made_prob, 1 - two_made_prob])
                    current_state = handle_transition(current_state, outcome, transition_matrix)
    
                    if outcome == '2pt Make':
                        current_state = handle_transition(current_state, 'End Possession', transition_matrix)
                    else:
                        if np.random.random() < two_pt_oreb_prob:
                            current_state = handle_transition(current_state, '2pt Oreb', transition_matrix)
                            current_state = handle_transition(current_state, 'Initial Possession', transition_matrix)
                        else:
                            current_state = handle_transition(current_state, '2pt NonOreb', transition_matrix)
                            current_state = handle_transition(current_state, 'End Possession', transition_matrix)
    
                elif current_state == '3pt Attempt':
                    outcome = np.random.choice(['3pt Make', '3pt Miss'], p=[three_made_prob, 1 - three_made_prob])
                    current_state = handle_transition(current_state, outcome, transition_matrix)
    
                    if outcome == '3pt Make':
                        current_state = handle_transition(current_state, 'End Possession', transition_matrix)
                    else:
                        if np.random.random() < three_pt_oreb_prob:
                            current_state = handle_transition(current_state, '3pt Oreb', transition_matrix)
                            current_state = handle_transition(current_state, 'Initial Possession', transition_matrix)
                        else:
                            current_state = handle_transition(current_state, '3pt NonOreb', transition_matrix)
                            current_state = handle_transition(current_state, 'End Possession', transition_matrix)
    
                elif current_state == 'Trip to FT Line':
                    current_state = handle_transition(current_state, 'FT Attempt 1', transition_matrix)
    
                    for attempt in ['FT Attempt 1', 'FT Attempt 2']:
                        outcome = np.random.choice([f'FT Make {attempt[-1]}', f'FT Miss {attempt[-1]}'],
                                                   p=[ft_made_prob, 1 - ft_made_prob])
                        current_state = handle_transition(current_state, outcome, transition_matrix)
    
                        if attempt == 'FT Attempt 2':
                            if outcome == 'FT Miss 2':
                                if np.random.random() < ft_oreb_prob:
                                    current_state = handle_transition(current_state, 'FT Oreb', transition_matrix)
                                    current_state = handle_transition(current_state, 'Initial Possession', transition_matrix)
                                else:
                                    current_state = handle_transition(current_state, 'FT NonOreb', transition_matrix)
                                    current_state = handle_transition(current_state, 'End Possession', transition_matrix)
                            else:
                                current_state = handle_transition(current_state, 'End Possession', transition_matrix)
                        else:
                            current_state = handle_transition(current_state, 'FT Attempt 2', transition_matrix)
    
                elif current_state == 'Turnover':
                    current_state = handle_transition(current_state, 'End Possession', transition_matrix)
    
        return transition_matrix

    team1_df = elo_ratings_df[elo_ratings_df['Team'] == team1]
    team2_df = elo_ratings_df[elo_ratings_df['Team'] == team2]

    pace_O_team1 = team1_df['pace_O'].mean()
    pace_D_team1 = team1_df['pace_D'].mean()
    pace_O_team2 = team2_df['pace_O'].mean()
    pace_D_team2 = team2_df['pace_D'].mean()

    scaled_pace_team = (calculate_scaled_pace(pace_O_team1, pace_D_team1) + calculate_scaled_pace(pace_O_team2, pace_D_team2)) / 2

    total_possessions_team = 5 * scaled_pace_team

    team1_matrix = generate_team_matrix(team1_df, calcs, total_possessions_team)
    team2_matrix = generate_team_matrix(team2_df, calcs, total_possessions_team)

    return team1_matrix, team2_matrix, scaled_pace_team

In [19]:
# Functions to simulate possession, games, and multiple games and append to final dataset for results

# function to simulate one possession 
def simulate_possession(transition_matrix, initial_state='Initial Possession', max_steps=25):
    states = transition_matrix.index
    terminal_states = ['3pt Make', '2pt Make', 'FT Make 2', 'Turnover', 
                        '2pt NonOreb', '3pt NonOreb', 'FT NonOreb']
    state = initial_state
    possession_steps = [state]
    
    for _ in range(max_steps):
        transition_probs = transition_matrix.loc[state]
        next_state = np.random.choice(states, p=transition_probs)
        possession_steps.append(next_state)
        state = next_state
        
        if state in terminal_states:
            break
    
    return possession_steps


# count points scored on possession
def calculate_possession_stats(possession):
    stats = Counter(possession)
    points = 0
    if '3pt Make' in possession:
        points += 3
    if '2pt Make' in possession:
        points += 2
    if 'FT Make 1' in possession:
        points += 1
    if 'FT Make 2' in possession:
        points += 1
    stats['Points'] = points
    return stats


# simulate a game based on number of possessions in the game
def simulate_game(team_a_matrix, team_b_matrix, simmedpossessions2):
    team_a_stats = Counter()
    team_b_stats = Counter()

    full_possessions = int(simmedpossessions2)
    fractional_possession = simmedpossessions2 - full_possessions

    for _ in range(full_possessions):
        team_a_possession = simulate_possession(team_a_matrix)
        team_a_stats += calculate_possession_stats(team_a_possession)
        
        team_b_possession = simulate_possession(team_b_matrix)
        team_b_stats += calculate_possession_stats(team_b_possession)

    if fractional_possession > 0:
        team_a_fractional = simulate_possession(team_a_matrix)
        team_a_fractional_stats = calculate_possession_stats(team_a_fractional)
        for key in team_a_fractional_stats:
            team_a_stats[key] += team_a_fractional_stats[key] * fractional_possession
        
        team_b_fractional = simulate_possession(team_b_matrix)
        team_b_fractional_stats = calculate_possession_stats(team_b_fractional)
        for key in team_b_fractional_stats:
            team_b_stats[key] += team_b_fractional_stats[key] * fractional_possession
    
    return team_a_stats, team_b_stats


# team metrics from game simulation
def calculate_team_metrics(stats):
    metrics = {}
    metrics['3pt Attempts'] = stats['3pt Attempt']
    metrics['3pt Makes'] = stats['3pt Make']
    metrics['2pt Attempts'] = stats['2pt Attempt']
    metrics['2pt Makes'] = stats['2pt Make']
    metrics['FT Attempts'] = stats['FT Attempt 1'] + stats['FT Attempt 2']
    metrics['FT Makes'] = stats['FT Make 1'] + stats['FT Make 2']
    metrics['OREB'] = stats['2pt Oreb'] + stats['3pt Oreb'] + stats['FT Oreb'] 
    metrics['Non-OREB'] = stats['2pt NonOreb'] + stats['3pt NonOreb'] + stats['FT NonOreb']
    metrics['Points'] = stats['Points']
    metrics['Turnovers'] = stats['Turnover']
    metrics['Misses'] = stats['2pt Miss'] + stats['3pt Miss'] + stats['FT Attempt 2 Miss']
    metrics['Opp DREB'] = (stats['2pt Miss'] + stats['3pt Miss'] + stats['FT Attempt 2 Miss']) - (stats['2pt Oreb'] + stats['3pt Oreb'] + stats['FT Oreb'])



    metrics['3pt%'] = metrics['3pt Makes'] / metrics['3pt Attempts'] if metrics['3pt Attempts'] > 0 else 0
    metrics['2pt%'] = metrics['2pt Makes'] / metrics['2pt Attempts'] if metrics['2pt Attempts'] > 0 else 0
    metrics['FT%'] = metrics['FT Makes'] / metrics['FT Attempts'] if metrics['FT Attempts'] > 0 else 0
    metrics['FTA%'] = metrics['FT Attempts'] / (metrics['2pt Attempts'] + metrics['3pt Attempts']) if (metrics['2pt Attempts'] + metrics['3pt Attempts']) > 0 else 0
    metrics['OR%'] = metrics['OREB'] / (metrics['OREB'] + metrics['Opp DREB']) if (metrics['OREB'] + metrics['Opp DREB']) > 0 else 0

    return metrics


# simulate multiple games 
def run_multiple_games(team_a_matrix, team_b_matrix, num_games, simmedpossessions):
    results = []
    
    for _ in range(num_games):
        # Use simmedpossessions in the game simulation
        team_a_stats, team_b_stats = simulate_game(team_a_matrix, team_b_matrix, simmedpossessions)
        
        # Calculate team metrics
        team_a_metrics = calculate_team_metrics(team_a_stats)
        team_b_metrics = calculate_team_metrics(team_b_stats)
        
        # Assign each team's DREB metric to the opposing team's metrics
        team_a_metrics['DREB'] = team_b_metrics.get('Opp DREB', 0)  # Team A gets Team B's DREB as Opp DREB
        team_b_metrics['DREB'] = team_a_metrics.get('Opp DREB', 0)  # Team B gets Team A's DREB as Opp DREB
        
        results.append((team_a_metrics, team_b_metrics))
    
    return results, simmedpossessions
    

# take results of all games simulated to find derived means for each stat
def analyze_results(results, simmedpossessions, team_a_name, team_b_name):
    filtered_results = [game for game in results if game[0]['Points'] != game[1]['Points']]

    team_a_totals = {key: sum(game[0][key] for game in filtered_results) for key in results[0][0]}
    team_b_totals = {key: sum(game[1][key] for game in filtered_results) for key in results[0][1]}
    
    num_games = len(filtered_results)  # Update number of games to only include non-tied games

    metrics_data = []

    if num_games == 0:  
        metrics_data.append({"Metric": "Average", team_a_name: None, team_b_name: None})
        metrics_data.append({"Metric": "Win Percentage", team_a_name: 0, team_b_name: 0})
        metrics_data.append({"Metric": "Tie Percentage", team_a_name: 1, team_b_name: None})
        metrics_data.append({"Metric": "Supremacy", team_a_name: None, team_b_name: None})
        metrics_data.append({"Metric": "Total", team_a_name: None, team_b_name: None})
    else:
        for key in team_a_totals.keys():
            team_a_average = team_a_totals[key] / num_games
            team_b_average = team_b_totals[key] / num_games if key in team_b_totals else None
            metrics_data.append({"Metric": key, team_a_name: team_a_average, team_b_name: team_b_average})

        team_a_wins = sum(game[0]['Points'] > game[1]['Points'] for game in filtered_results)
        team_b_wins = sum(game[1]['Points'] > game[0]['Points'] for game in filtered_results)
        
        metrics_data.append({"Metric": "Win Percentage", team_a_name: team_a_wins / num_games, team_b_name: team_b_wins / num_games})

        total_team_a_points = sum(game[0]['Points'] for game in filtered_results)
        total_team_b_points = sum(game[1]['Points'] for game in filtered_results)
        
        sup_team_a = (total_team_a_points - total_team_b_points)/num_games
        sup_team_b = (total_team_b_points - total_team_a_points)/num_games
        
        metrics_data.append({"Metric": "Supremacy", team_a_name: sup_team_a, team_b_name: sup_team_b})

        total_points = (total_team_a_points + total_team_b_points)/num_games
        metrics_data.append({"Metric": "Total", team_a_name: total_points, team_b_name: total_points})
        metrics_data.append({"Metric": "Possessions", team_a_name: simmedpossessions, team_b_name: simmedpossessions})

    results_df = pd.DataFrame(metrics_data)

    return results_df

def simulate_matchup(home_team,away_team,HFA, number_of_simulations, possessionAdjust, teamsDF1):
    team1O, team2O = calculate_transition_matrix_offense(teamsDF1, home_team,away_team, calcs=25000)
    team1D, team2D, simmedpossessions = calculate_transition_matrix_defense(teamsDF1, home_team,away_team, calcs=25000)
    team1 = team1O + team2D
    team2 = team2O + team1D
    team1 = team1 * (1 + (HFA * (homeODiff + awayDDiff)))
    team2 = team2 * (1 + (HFA * (awayODiff + homeDDiff)))
    row_sums = team1.sum(axis=1)
    row_sums2 = team2.sum(axis=1)
    HomeTeamMatrix = team1.div(row_sums, axis=0).fillna(0)
    AwayTeamMatrix = team2.div(row_sums2, axis=0).fillna(0)
    
    results, simmedpossessionsA = run_multiple_games(HomeTeamMatrix, AwayTeamMatrix, number_of_simulations, simmedpossessions + possessionAdjust)
    box_score = analyze_results(results,simmedpossessionsA, home_team, away_team)

    return box_score.round(3)

In [20]:
# Determine who will be playing upcoming game for each team
def assess_teams(OffensePlayerDataNEW1,elo_combined_df):
    
    # Assess the players who are active and project the number of possessions each player will have based on rolling averages.
    PlayerTeams = OffensePlayerDataNEW1[['PlayerID', 'Team', 'Season']]
    
    # Sort by Season in descending order and take the first occurrence of each PlayerID
    latest_teams = (PlayerTeams
        .sort_values('Season', ascending=False)
        .groupby('PlayerID')
        .first()
        .reset_index()[['PlayerID', 'Team']])
    
    elo_combined_df2 = elo_combined_df.merge(PlayerTeams,
                          left_on='PlayerID', right_on=['PlayerID'],how='left').drop_duplicates().dropna()
    
    MostRecentSeason = OffensePlayerDataNEW1[OffensePlayerDataNEW1['Season'] == OffensePlayerDataNEW1['Season'].max()]
    TeamMostRecentGame = MostRecentSeason.groupby(by='Team_x')['Gamecode'].max().reset_index()
    TeamMostRecentGame['MostRecentGame'] = TeamMostRecentGame['Gamecode']
    
    avgPossessionsPerPlayer = MostRecentSeason.groupby(by=['PlayerID','Player','Team_x','Gamecode']).size().reset_index().sort_values(by=['PlayerID','Team_x','Gamecode'],
                                                                                                                ascending=[True,True,False])
    avgPossessionsPerPlayer['Order'] = avgPossessionsPerPlayer.groupby(['PlayerID']).cumcount() + 1
    
    avgPossessionsPerPlayer = avgPossessionsPerPlayer[avgPossessionsPerPlayer['Order'] <= 3]
    avgPossessionsPerPlayer['PossessionCount'] = avgPossessionsPerPlayer[0]
    
    avgPossessionsPerPlayer = avgPossessionsPerPlayer.merge(TeamMostRecentGame,left_on=['Team_x','Gamecode'],right_on=['Team_x','Gamecode'],how='left')
    avgPossessionsPerPlayer['PlayedInMostRecentGame'] = np.where(avgPossessionsPerPlayer['MostRecentGame'] >= 0,1,0)
    
    PlayerPossessionsToSim = avgPossessionsPerPlayer.groupby(by=['PlayerID','Player','Team_x']).agg({'PossessionCount':'mean',
                                                                                     'PlayedInMostRecentGame':'max'}).reset_index()
    
    
    teamsDF = elo_combined_df2.merge(PlayerPossessionsToSim[['PlayerID','PossessionCount','PlayedInMostRecentGame']], on='PlayerID',how='left')

    return teamsDF


# function to add or remove a player from a team
def update_or_remove_player_data(players_data, df):
    df_copy = df.copy()
    
    for player_info in players_data:
        player_name = player_info['Player']
        
        # Check if the action is to remove the player
        if player_info.get('Action') == 'remove':
            # Just remove exact player name match
            rows_before = len(df_copy)
            df_copy = df_copy[df_copy['Player'] != player_name]
            rows_removed = rows_before - len(df_copy)
            if rows_removed > 0:
                print(f"Removed {player_name} from the DataFrame.")
            else:
                print(f"Player {player_name} not found in the DataFrame.")
        else:
            # Get the player's original data from the source DataFrame
            player_data = df[df['Player'] == player_name]
            
            if not player_data.empty:
                new_player_data = player_data.copy()
                
                # Update only the specified fields
                if player_info.get('PossessionCount') is not None:
                    new_player_data['PossessionCount'] = player_info['PossessionCount']
                if player_info.get('Team') is not None:
                    new_player_data['Team'] = player_info['Team']
                if player_info.get('PlayedInMostRecentGame') is not None:
                    new_player_data['PlayedInMostRecentGame'] == 1
                
                # Add the modified player data to the DataFrame
                df_copy = pd.concat([df_copy, new_player_data], ignore_index=True)
                print(f"Added {player_name} to {player_info['Team']}.")
            else:
                print(f"Player {player_name} not found in the DataFrame.")
    
    # Remove duplicates but keep the last occurrence (the new team entry)
    df_copy = df_copy.drop_duplicates(subset=['Player'], keep='last')
    
    return df_copy

    

In [21]:
teamsDF = assess_teams(OffensePlayerDataNEW1,elo_combined_df)

In [35]:
teamsDF.head()

Unnamed: 0,PlayerID,two_made_O,two_miss_O,two_attempt_O,three_made_O,three_miss_O,three_attempt_O,fta_O,ftm_O,oreb_O,to_O,pace_O,usage_O,two_made_D,two_miss_D,two_attempt_D,three_made_D,three_miss_D,three_attempt_D,fta_D,ftm_D,oreb_D,to_D,pace_D,usage_D,Player,Team,Season,PossessionCount,PlayedInMostRecentGame
0,PTGB,1449.142791,1394.54772,1401.423197,1531.75656,1638.393345,1647.622663,1415.38536,1440.142896,1449.514957,1500.973356,1494.628434,1516.171159,1487.454319,1512.419351,1496.872403,1509.28835,1503.057764,1508.790618,1498.298658,1500.021266,1473.002569,1484.861296,1493.493008,1478.57118,"LLULL, SERGIO",MAD,2024,27.666667,1
1,PLUO,1495.961872,1500.791792,1498.246321,1505.182142,1504.484984,1506.822417,1487.997576,1490.348586,1509.687901,1502.892141,1498.478017,1483.412046,1490.010444,1493.839461,1487.855458,1500.904724,1505.792248,1507.205116,1492.127987,1499.989145,1483.225281,1499.53763,1499.722144,1500.85104,"LAZIC, BRANKO",RED,2024,8.333333,0
2,PLMG,1494.265593,1519.432713,1505.31538,1480.925045,1488.922358,1478.933626,1496.402711,1502.535044,1487.541744,1501.942694,1500.000387,1504.892465,1507.820873,1507.441781,1510.708533,1501.927659,1494.536592,1495.420565,1453.681565,1499.919464,1475.809375,1494.835032,1500.222527,1502.690334,"MARJANOVIC, BOBAN",ULK,2024,14.333333,0
3,PLCZ,1583.182987,1565.955738,1596.085243,1480.529866,1431.100946,1427.667474,1532.803563,1493.75409,1470.736732,1458.362989,1505.32001,1463.164906,1512.502294,1504.986694,1512.986812,1497.638757,1469.289103,1465.400955,1471.594412,1499.963974,1492.900472,1498.713609,1504.851091,1491.519525,"MOTIEJUNAS, DONATAS",MCO,2024,34.0,1
4,PKIR,1479.204222,1510.891973,1490.878775,1485.953837,1486.032145,1477.237897,1527.017753,1514.555698,1503.990123,1507.936713,1498.611976,1459.903963,1489.194078,1510.655592,1496.739914,1494.943608,1501.753234,1501.920747,1533.657765,1500.058348,1501.535971,1499.327696,1501.076579,1489.346196,"KAHUDI, CHARLES",ASV,2024,20.333333,0


In [22]:
def run_full_simuluation (home_team, away_team, HFA, players_to_update, number_of_simulations, possession_adjust,
                         teamsDF, homeusage_for, awayusage_for, homeusage_against, awayusage_against):

    '''
    Returns simulated team level stats and player level box score.
    
    Parameters:
        home_team (str): Team code (e.g., 'BAR', 'IST', 'ASV')
        away_team (str): Team code (e.g., 'MAD', 'TEL', 'RED')
        players_to_update (list): List of dictionaries to add or remove players
        number_of_simulations (int): Number of simulations (default 15000)
        HFA (float): Home court advantage (default 0.8)
        possession_adjust (float): Possession adjustment (e.g., to slow the game down by 1 possession, -1.)
    '''
    
    teamsDF1 = update_or_remove_player_data(players_to_update, teamsDF)
    teamsDF1 = teamsDF1[teamsDF1['PlayedInMostRecentGame'] == 1]
    
    SimmedGameStats = simulate_matchup(home_team, away_team, HFA, number_of_simulations, possession_adjust, teamsDF1)

    playerstats1A = pd.concat([homeusage_for,awayusage_for])
    playerstats13 = (playerstats1A.sort_values(['PlayerID', 'Season', 'Gamecode', 'Possession'], 
                                            ascending=[True, False, False, False]))
    
    # Create decay weights for each possession
    def calculate_decay_weights(group, decay_factor=0.98):
        indices = np.arange(len(group))
        weights = np.power(decay_factor, indices)
        # Normalize weights to sum to 1
        weights = weights / weights.sum()
        return weights
    
    # Apply weighted average
    def weighted_mean(group, col_name, weights):
        return np.average(group[col_name], weights=weights)
    
    # Group by player and calculate weighted stats
    playerstats52 = (playerstats13.groupby(['Team', 'PlayerID'])
                     .apply(lambda x: pd.Series({
                         'fta': weighted_mean(x, 'fta_player', calculate_decay_weights(x)),
                         'threefga': weighted_mean(x, 'threefga_player', calculate_decay_weights(x)),
                         'twofga': weighted_mean(x, 'twoefga_player', calculate_decay_weights(x)),
                         'twofgm': weighted_mean(x, 'twoefgm_player', calculate_decay_weights(x)),
                         'threefgm': weighted_mean(x, 'threefgm_player', calculate_decay_weights(x)),
                         'ftmade': weighted_mean(x, 'ftm_player', calculate_decay_weights(x)),
                         'assist': weighted_mean(x, 'assist_player', calculate_decay_weights(x)),
                         'to': weighted_mean(x, 'to_player', calculate_decay_weights(x)),
                         'oreb': weighted_mean(x, 'oreb_player', calculate_decay_weights(x))
                     }))
                     .reset_index())
    
    playerstats2A = pd.concat([homeusage_against,awayusage_against])
    playerstats23 = (playerstats2A.sort_values(['PlayerID', 'Season', 'Gamecode', 'OppPossession'], 
                                            ascending=[True, False, False, False]))
    playerstats22 = playerstats23.groupby(by=['Team','PlayerID']).agg(dreb=('dreb_player','mean'),
                                                                     ).reset_index()
    playerstats12 = pd.merge(playerstats52,playerstats22)
    
    averagesShooting = playerstats12[['fta', 'threefga', 'twofga', 'twofgm', 'threefgm', 'ftmade', 'assist', 'to', 'oreb','dreb']].mean()
    
    weight_ft = .3  # Weight for free throw percentage
    weight_two = .25  # Weight for two-point percentage
    weight_three = .4 # Weight for three-point percentage
    
    # Regressed shooting percentages
    playerstats12['ft%'] = ((playerstats12['ftmade'] / playerstats12['fta']) * playerstats12['fta'] +
                                      (averagesShooting['ftmade'] / averagesShooting['fta']) * weight_ft) / \
                                      (playerstats12['fta'] + weight_ft)
    
    playerstats12['two%'] = ((playerstats12['twofgm'] / playerstats12['twofga']) * playerstats12['twofga'] +
                                       (averagesShooting['twofgm'] / averagesShooting['twofga']) * weight_two) / \
                                       (playerstats12['twofga'] + weight_two)
    
    playerstats12['three%'] = ((playerstats12['threefgm'] / playerstats12['threefga']) * playerstats12['threefga'] +
                                         (averagesShooting['threefgm'] / averagesShooting['threefga']) * weight_three) / \
                                         (playerstats12['threefga'] + weight_three)
    
    playerstats12['points'] = playerstats12['threefgm']*3 + playerstats12['twofgm']*2 + playerstats12['ftmade']
    
    PlayerPossessionsToSimWithAttempts = teamsDF1.merge(playerstats12, left_on=['Team','PlayerID'], right_on=['Team','PlayerID'], how='left')
    
    PlayerPossessionsToSimWithAttempts = PlayerPossessionsToSimWithAttempts[PlayerPossessionsToSimWithAttempts['Team'].isin([SimmedGameStats.columns[1],SimmedGameStats.columns[2]])]
    
    SimmedGameStats2 = SimmedGameStats.set_index('Metric')
    
    df_inverted = SimmedGameStats2.transpose()
    
    InitialEstimates = PlayerPossessionsToSimWithAttempts.merge(df_inverted, left_on='Team',right_on=df_inverted.index,how='left')
    
    InitialEstimates['PlayerFTA'] = InitialEstimates['fta']*InitialEstimates['PossessionCount']
    InitialEstimates['Player2FGA'] = InitialEstimates['twofga']*InitialEstimates['PossessionCount']
    InitialEstimates['Player3FGA'] = InitialEstimates['threefga']*InitialEstimates['PossessionCount']
    InitialEstimates['PlayerAssist'] = InitialEstimates['assist']*InitialEstimates['PossessionCount']
    InitialEstimates['PlayerTO'] = InitialEstimates['to']*InitialEstimates['PossessionCount']
    InitialEstimates['PlayerOreb'] = InitialEstimates['oreb']*InitialEstimates['PossessionCount']
    InitialEstimates['PlayerPts'] = InitialEstimates['points']*InitialEstimates['PossessionCount']
    InitialEstimates['PlayerDreb'] = InitialEstimates['dreb']*InitialEstimates['PossessionCount']
    
    InitialEstimatesTeamTotals  = InitialEstimates.groupby(by='Team').agg(fta2=('PlayerFTA', 'sum'),
                                            twofga2 = ('Player2FGA','sum'),
                                            threefga2 = ('Player3FGA','sum'),
                                                                          points2 = ('PlayerPts','sum'),
                                                                         assist2 = ('PlayerAssist','sum'),
                                                                         to2 = ('PlayerTO','sum'),
                                                                         oreb2 = ('PlayerOreb','sum'),                                                                 
                                                                         dreb2 = ('PlayerDreb','sum'),
                                                                         ).reset_index()
    
    NormalizedEstimates = InitialEstimates.merge(InitialEstimatesTeamTotals,left_on='Team',right_on='Team',how='left')
    NormalizedEstimates['FTA'] = (NormalizedEstimates['FT Attempts']/NormalizedEstimates['fta2'])*NormalizedEstimates['PlayerFTA']
    NormalizedEstimates['2FGA'] = (NormalizedEstimates['2pt Attempts']/NormalizedEstimates['twofga2'])*NormalizedEstimates['Player2FGA']
    NormalizedEstimates['3FGA'] = (NormalizedEstimates['3pt Attempts']/NormalizedEstimates['threefga2'])*NormalizedEstimates['Player3FGA']
    NormalizedEstimates['TO'] = (NormalizedEstimates['Turnovers']/NormalizedEstimates['to2'])*NormalizedEstimates['PlayerTO']
    NormalizedEstimates['ORB'] = (NormalizedEstimates['OREB']/NormalizedEstimates['oreb2'])*NormalizedEstimates['PlayerOreb']
    NormalizedEstimates['DRB'] = ((NormalizedEstimates['DREB']*.93)/NormalizedEstimates['dreb2'])*NormalizedEstimates['PlayerDreb']
    NormalizedEstimates['TRB'] = NormalizedEstimates['ORB'] + NormalizedEstimates['DRB']
    
    
    NormalizedEstimates['PlayerFTM_initial'] = NormalizedEstimates['ft%']*NormalizedEstimates['FTA']
    NormalizedEstimates['Player2FGM_initial'] = NormalizedEstimates['two%']*NormalizedEstimates['2FGA']
    NormalizedEstimates['Player3FGM_initial'] = NormalizedEstimates['three%']*NormalizedEstimates['3FGA']
    
    NormalizedEstimates['PlayerFTM_initial'] = NormalizedEstimates['PlayerFTM_initial'].fillna(0)
    NormalizedEstimates['Player2FGM_initial'] = NormalizedEstimates['Player2FGM_initial'].fillna(0)
    NormalizedEstimates['Player3FGM_initial'] = NormalizedEstimates['Player3FGM_initial'].fillna(0)
    
    normtotals = NormalizedEstimates.groupby(by='Team').agg(ftm2=('PlayerFTM_initial', 'sum'),
                                            twofgm2 = ('Player2FGM_initial','sum'),
                                            threefgm2 = ('Player3FGM_initial','sum'),).reset_index()
    
    NormalizedEstimates2 = NormalizedEstimates.merge(normtotals,left_on='Team',right_on='Team',how='left')
    NormalizedEstimates2['FTM'] = (NormalizedEstimates2['FT Makes']/NormalizedEstimates2['ftm2'])*NormalizedEstimates2['PlayerFTM_initial']
    NormalizedEstimates2['2FGM'] = (NormalizedEstimates2['2pt Makes']/NormalizedEstimates2['twofgm2'])*NormalizedEstimates2['Player2FGM_initial']
    NormalizedEstimates2['3FGM'] = (NormalizedEstimates2['3pt Makes']/NormalizedEstimates2['threefgm2'])*NormalizedEstimates2['Player3FGM_initial']
    
    NormalizedEstimates2['Pts'] = (NormalizedEstimates2['3FGM']*3) + (NormalizedEstimates2['2FGM']*2) + (NormalizedEstimates2['FTM'])
    
    NormalizedEstimates2['2PT%'] = NormalizedEstimates2['2FGM'] / NormalizedEstimates2['2FGA']
    NormalizedEstimates2['FT%'] = NormalizedEstimates2['FTM'] / NormalizedEstimates2['FTA']
    NormalizedEstimates2['3PT%'] = NormalizedEstimates2['3FGM'] / NormalizedEstimates2['3FGA']
    
    possessionTime = 2400/SimmedGameStats.iloc[21][1]
    NormalizedEstimates2['Minutes'] = round((NormalizedEstimates2['PossessionCount'] * possessionTime)/60,1)
    
    SimmedBoxScore = NormalizedEstimates2[['Team','PlayerID','Player','Minutes','FTM','FTA','FT%','2FGM','2FGA','2PT%','3FGM','3FGA','3PT%','Pts','TO','ORB','DRB','TRB']].drop_duplicates()
    
    SimmedBoxScore = SimmedBoxScore.fillna(0).round(2).sort_values(by=['Team','Pts'], ascending=[False,False])
    
    return SimmedGameStats, SimmedBoxScore


In [26]:
updated_players = [
        {'Player': 'CAMPAZZO, FACUNDO', 'Action': 'remove'}, 
        {'Player': 'TAVARES, WALTER', 'Team': 'MAD', 'PossessionCount': 35}
    ]

SimmedTeamStats, SimmedBoxScore = run_full_simuluation(
    home_team='MAD',
    away_team='BAR', 
    HFA=0.8, 
    players_to_update=updated_players, 
    number_of_simulations=10000, 
    possession_adjust=0,
    teamsDF=teamsDF,
    homeusage_for=homeusage_for,
    awayusage_for=awayusage_for,
    homeusage_against=homeusage_against,
    awayusage_against=awayusage_against,
)


Removed CAMPAZZO, FACUNDO from the DataFrame.
Added TAVARES, WALTER to MAD.


In [36]:
SimmedTeamStats

Unnamed: 0,Metric,MAD,BAR
0,3pt Attempts,26.492,24.745
1,3pt Makes,10.334,8.837
2,2pt Attempts,35.785,37.713
3,2pt Makes,19.659,20.781
4,FT Attempts,16.082,14.774
5,FT Makes,12.478,11.351
6,OREB,10.469,10.183
7,Non-OREB,23.611,24.393
8,Points,82.778,79.402
9,Turnovers,10.709,10.896


In [37]:
SimmedBoxScore

Unnamed: 0,Team,PlayerID,Player,Minutes,FTM,FTA,FT%,2FGM,2FGA,2PT%,3FGM,3FGA,3PT%,Pts,TO,ORB,DRB,TRB
18,MAD,P003108,"HEZONJA, MARIO",21.7,3.42,4.29,0.8,3.37,6.0,0.56,1.77,4.48,0.39,15.46,1.83,1.7,4.09,5.8
13,MAD,P006540,"MUSA, DZANAN",22.9,3.97,5.11,0.78,3.04,5.62,0.54,1.77,4.13,0.43,15.37,2.31,0.94,2.63,3.57
0,MAD,PTGB,"LLULL, SERGIO",15.7,0.45,0.57,0.79,0.73,1.4,0.52,2.34,6.09,0.38,8.9,0.89,0.28,0.93,1.21
20,MAD,P005791,"TAVARES, WALTER",19.8,1.53,2.16,0.71,3.68,6.7,0.55,0.0,0.0,0.0,8.88,1.65,3.92,4.19,8.11
2,MAD,P013378,"RATHAN-MAYES, XAVIER",11.3,0.46,0.6,0.77,1.67,3.14,0.53,1.18,3.06,0.39,7.34,0.63,0.68,0.83,1.51
14,MAD,P003733,"ABALDE, ALBERTO",19.3,0.0,0.0,0.78,1.46,2.49,0.59,0.81,1.99,0.41,5.36,1.06,0.78,1.66,2.44
10,MAD,P009213,"NDIAYE, ELI",13.8,0.16,0.2,0.79,1.25,2.41,0.52,0.74,1.88,0.4,4.88,0.07,0.04,1.66,1.7
6,MAD,P011247,"FELIZ, ANDRES",14.6,0.22,0.28,0.79,1.23,2.25,0.55,0.61,1.76,0.35,4.51,0.73,0.55,0.87,1.42
11,MAD,P007630,"GARUBA, USMAN",12.7,1.27,1.59,0.8,1.57,2.72,0.58,0.03,0.08,0.39,4.49,0.71,0.9,1.98,2.87
15,MAD,P003583,"IBAKA, SERGE",7.9,0.47,0.61,0.78,1.01,1.77,0.57,0.53,1.41,0.37,4.07,0.7,0.47,1.37,1.85
