In [1]:
import os
import sys

notebook_dir = os.path.dirname(os.path.abspath('__file__'))

parent_dir = os.path.dirname(notebook_dir)

sys.path.append(parent_dir)
from utils import get_categorical_columns, get_numeric_columns, get_matchups

In [2]:
import pandas as pd
pd.set_option('display.max_columns', 50)

In [3]:
keys = ['GAME_DATE', 'SEASON_ID', 'MATCHUP', 'GAME_ID', 'TEAM_ID_HOME', 'TEAM_ID_AWAY']

df_games = pd.read_csv('../nba_data/data/games.csv')
df_games['GAME_DATE'] = pd.to_datetime(df_games['GAME_DATE'])
df_games.sort_values(by='GAME_DATE', inplace=True)

num_cols = get_numeric_columns(df_games)
cat_cols = get_categorical_columns(df_games)

df_games.head()

Unnamed: 0,TEAM_ID_HOME,TEAM_NAME_HOME,WL_HOME,MIN_HOME,PTS_HOME,FGM_HOME,FGA_HOME,FG_PCT_HOME,FG3M_HOME,FG3A_HOME,FG3_PCT_HOME,FTM_HOME,FTA_HOME,FT_PCT_HOME,OREB_HOME,DREB_HOME,REB_HOME,AST_HOME,STL_HOME,BLK_HOME,TOV_HOME,PF_HOME,PLUS_MINUS_HOME,SEASON_ID,GAME_DATE,MATCHUP,GAME_ID,TEAM_ID_AWAY,TEAM_NAME_AWAY,WL_AWAY,MIN_AWAY,PTS_AWAY,FGM_AWAY,FGA_AWAY,FG_PCT_AWAY,FG3M_AWAY,FG3A_AWAY,FG3_PCT_AWAY,FTM_AWAY,FTA_AWAY,FT_PCT_AWAY,OREB_AWAY,DREB_AWAY,REB_AWAY,AST_AWAY,STL_AWAY,BLK_AWAY,TOV_AWAY,PF_AWAY,PLUS_MINUS_AWAY
6329,1610612747,Los Angeles Lakers,W,240,116,42,93,0.452,14,29,0.483,18,28,0.643,18,34,52,23,8,6,19,23,13.0,22013,2013-10-29,LAC @ LAL,21300003,1610612746,Los Angeles Clippers,L,239,103,41,83,0.494,8,21,0.381,13,23,0.565,10,30,40,27,11,4,16,21,-13.0
6498,1610612754,Indiana Pacers,W,241,97,34,71,0.479,7,17,0.412,22,32,0.688,10,34,44,17,4,18,20,13,10.0,22013,2013-10-29,ORL @ IND,21300001,1610612753,Orlando Magic,L,241,87,36,93,0.387,9,19,0.474,6,10,0.6,13,26,39,17,10,6,17,26,-10.0
6171,1610612748,Miami Heat,W,239,107,37,72,0.514,11,20,0.55,22,29,0.759,5,35,40,26,10,7,18,21,12.0,22013,2013-10-29,CHI @ MIA,21300002,1610612741,Chicago Bulls,L,238,95,35,83,0.422,7,26,0.269,18,23,0.783,11,30,41,23,11,4,18,27,-12.0
540,1610612756,Phoenix Suns,W,240,104,43,83,0.518,4,19,0.211,14,18,0.778,11,36,47,18,8,5,15,17,13.0,22013,2013-10-30,PHX vs. POR,21300015,1610612757,Portland Trail Blazers,L,240,91,33,81,0.407,8,26,0.308,17,24,0.708,11,28,39,19,5,5,13,14,-13.0
6392,1610612755,Philadelphia 76ers,W,238,114,43,80,0.538,8,21,0.381,20,24,0.833,8,32,40,24,16,1,18,21,4.0,22013,2013-10-30,MIA @ PHI,21300005,1610612748,Miami Heat,L,242,110,42,85,0.494,16,40,0.4,10,13,0.769,7,24,31,30,7,0,19,25,-4.0


In [4]:
def get_avg_stats_last_n_games(team, game_date, season_team_stats, n):
  all_prev_games = season_team_stats[season_team_stats['GAME_DATE'] < game_date]
  team_prev_games = all_prev_games[(all_prev_games['TEAM_NAME_HOME'] == team) | (all_prev_games['TEAM_NAME_AWAY'] == team)]
  team_prev_games_by_date = team_prev_games.sort_values(by='GAME_DATE') \
                                              .tail(n)
  
  cols_to_use = list(set(num_cols) - set(keys))
  team_prev_games_by_date = team_prev_games_by_date[cols_to_use]

  return team_prev_games_by_date.mean()

In [5]:
recent_performance_df = pd.DataFrame()
n_games = 10

for season in df_games['SEASON_ID'].unique():
    season_team_stats = df_games[df_games['SEASON_ID'] == season].sort_values(by='GAME_DATE', ascending=False).reset_index(drop=True)
    season_recent_performance_df = pd.DataFrame()

    for idx, row in season_team_stats.iterrows():
        game_id = row['GAME_ID']
        game_date = row['GAME_DATE']
        h_team = row['TEAM_NAME_HOME']
        a_team = row['TEAM_NAME_AWAY']

        h_team_recent_performance = get_avg_stats_last_n_games(h_team, game_date, season_team_stats, n_games)
        h_team_recent_performance.index = ['HOME_LAST_10_AVG_' + x for x in h_team_recent_performance.index]

        a_team_recent_performance = get_avg_stats_last_n_games(a_team, game_date, season_team_stats, n_games)
        a_team_recent_performance.index = ['AWAY_LAST_10_AVG_' + x for x in a_team_recent_performance.index]
        
        new_row = pd.concat([h_team_recent_performance, a_team_recent_performance], sort=False)
        new_row['GAME_ID'] = game_id

        season_recent_performance_df = pd.concat([season_recent_performance_df, pd.DataFrame([new_row])], ignore_index=True)
        season_recent_performance_df = season_recent_performance_df[new_row.index]


    recent_performance_df = pd.concat([recent_performance_df, season_recent_performance_df])

In [6]:
recent_performance_df.dropna(inplace=True)

In [7]:
recent_performance_df.to_csv('../nba_data/data/recent_team_performance.csv', index=False)