In [1]:
import math
import time

import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import random

import numpy as np
import pandas as pd
pd.set_option('display.max_columns', 50)

In [2]:
keys = ['SEASON_ID', 'GAME_DATE', 'MATCHUP', 'GAME_ID']

df_games = pd.read_csv('../nba_data/data/games.csv')
df_games['GAME_DATE'] = pd.to_datetime(df_games['GAME_DATE'])
df_games.sort_values(by='GAME_DATE', inplace=True)

df_games.head()

Unnamed: 0,TEAM_ID_HOME,TEAM_NAME_HOME,WL_HOME,MIN_HOME,PTS_HOME,FGM_HOME,FGA_HOME,FG_PCT_HOME,FG3M_HOME,FG3A_HOME,FG3_PCT_HOME,FTM_HOME,FTA_HOME,FT_PCT_HOME,OREB_HOME,DREB_HOME,REB_HOME,AST_HOME,STL_HOME,BLK_HOME,TOV_HOME,PF_HOME,PLUS_MINUS_HOME,SEASON_ID,GAME_DATE,MATCHUP,GAME_ID,TEAM_ID_AWAY,TEAM_NAME_AWAY,WL_AWAY,MIN_AWAY,PTS_AWAY,FGM_AWAY,FGA_AWAY,FG_PCT_AWAY,FG3M_AWAY,FG3A_AWAY,FG3_PCT_AWAY,FTM_AWAY,FTA_AWAY,FT_PCT_AWAY,OREB_AWAY,DREB_AWAY,REB_AWAY,AST_AWAY,STL_AWAY,BLK_AWAY,TOV_AWAY,PF_AWAY,PLUS_MINUS_AWAY
6329,1610612747,Los Angeles Lakers,W,240,116,42,93,0.452,14,29,0.483,18,28,0.643,18,34,52,23,8,6,19,23,13.0,22013,2013-10-29,LAC @ LAL,21300003,1610612746,Los Angeles Clippers,L,239,103,41,83,0.494,8,21,0.381,13,23,0.565,10,30,40,27,11,4,16,21,-13.0
6498,1610612754,Indiana Pacers,W,241,97,34,71,0.479,7,17,0.412,22,32,0.688,10,34,44,17,4,18,20,13,10.0,22013,2013-10-29,ORL @ IND,21300001,1610612753,Orlando Magic,L,241,87,36,93,0.387,9,19,0.474,6,10,0.6,13,26,39,17,10,6,17,26,-10.0
6171,1610612748,Miami Heat,W,239,107,37,72,0.514,11,20,0.55,22,29,0.759,5,35,40,26,10,7,18,21,12.0,22013,2013-10-29,CHI @ MIA,21300002,1610612741,Chicago Bulls,L,238,95,35,83,0.422,7,26,0.269,18,23,0.783,11,30,41,23,11,4,18,27,-12.0
540,1610612756,Phoenix Suns,W,240,104,43,83,0.518,4,19,0.211,14,18,0.778,11,36,47,18,8,5,15,17,13.0,22013,2013-10-30,PHX vs. POR,21300015,1610612757,Portland Trail Blazers,L,240,91,33,81,0.407,8,26,0.308,17,24,0.708,11,28,39,19,5,5,13,14,-13.0
6392,1610612755,Philadelphia 76ers,W,238,114,43,80,0.538,8,21,0.381,20,24,0.833,8,32,40,24,16,1,18,21,4.0,22013,2013-10-30,MIA @ PHI,21300005,1610612748,Miami Heat,L,242,110,42,85,0.494,16,40,0.4,10,13,0.769,7,24,31,30,7,0,19,25,-4.0


### Compute Elo Rating

##### Reference: [How We Calculate NBA Elo Ratings](https://fivethirtyeight.com/features/how-we-calculate-nba-elo-ratings/#:~:text=Take%20a%20team's%20margin%20of,accounting%20for%20home%2Dcourt%20advantage.)

In [3]:
def win_probs(home_elo, away_elo, home_court_advantage) :
  '''
  Home and road team win probabilities implied by Elo ratings and home court adjustment.
  Odds the home team will win based on elo ratings and home court advantage.
  '''
  h = math.pow(10, home_elo / 400)
  r = math.pow(10, away_elo / 400)
  a = math.pow(10, home_court_advantage / 400)

  denom = r + a*h
  home_prob = a*h / denom
  away_prob = r / denom 

  return home_prob, away_prob


def home_odds_on(home_elo, away_elo, home_court_advantage) :
  h = math.pow(10, home_elo / 400)
  r = math.pow(10, away_elo / 400)
  a = math.pow(10, home_court_advantage / 400)

  return a*h / r


def elo_k(MOV, elo_diff):
  '''
  This function determines the constant used in the elo rating, based on margin of victory and difference in elo ratings.
  '''
  k = 20
  multiplier = abs(MOV + 3) ** 0.8 / (7.5 + 0.006 * abs(elo_diff))
  return k * multiplier


def get_prev_elo(team, game_date, season, df_games, df_elo) :
  '''Takes into account prev season elo.'''
  
  prev_game = df_games.loc[(df_games['GAME_DATE'] < game_date) & \
                            ((df_games['TEAM_NAME_HOME'] == team) | (df_games['TEAM_NAME_AWAY'] == team))]\
                              .sort_values(by='GAME_DATE').tail(1).iloc[0]


  if team == prev_game['TEAM_NAME_HOME']:
    elo_rating = df_elo[df_elo['GAME_ID'] == prev_game['GAME_ID']]['TEAM_ELO_AFTER_HOME'].values[0]
  elif team == prev_game['TEAM_NAME_AWAY']:
    elo_rating = df_elo[df_elo['GAME_ID'] == prev_game['GAME_ID']]['TEAM_ELO_AFTER_AWAY'].values[0]
  
  if prev_game['SEASON_ID'] != season :
    # Computing new elo rating when a new season begin
    return (0.75 * elo_rating) + (0.25 * 1505)
  else :
    return elo_rating
     

def update_elo(home_score, away_score, home_elo, away_elo, home_court_advantage):
  '''
  Updates the home and away teams' Elo ratings after a game.
  '''
  home_prob, away_prob = win_probs(home_elo, away_elo, home_court_advantage)

  home_win = int(home_score > away_score)
  away_win = int(home_score < away_score)

  k = elo_k(home_score - away_score, home_elo - away_elo)

  updated_home_elo = home_elo + k * (home_win - home_prob)
  updated_away_elo = away_elo + k * (away_win - away_prob)

  return updated_home_elo, updated_away_elo

In [4]:
df_elo = pd.DataFrame(columns=['GAME_ID', 'TEAM_NAME_HOME', 'TEAM_NAME_AWAY', 'TEAM_ELO_BEFORE_HOME', 'TEAM_ELO_BEFORE_AWAY', 'TEAM_ELO_AFTER_HOME', 'TEAM_ELO_AFTER_AWAY'])
df_teams_elos = pd.DataFrame(columns=['GAME_ID', 'TEAM_NAME', 'ELO', 'GAME_DATE', 'PLAYED_WHERE', 'SEASON_ID'])


for index, row in df_games.iterrows():

    game_id = row['GAME_ID']
    game_date = row['GAME_DATE']
    season = row['SEASON_ID']
    home_team, away_team = row['TEAM_NAME_HOME'], row['TEAM_NAME_AWAY']
    home_score, away_score = row['PTS_HOME'], row['PTS_AWAY']

    if (home_team not in df_elo['TEAM_NAME_HOME'].values and home_team not in df_elo['TEAM_NAME_AWAY'].values) :
        home_team_elo_before = 1500
    else :
        home_team_elo_before = get_prev_elo(home_team, game_date, season, df_games, df_elo)

    if (away_team not in df_elo['TEAM_NAME_HOME'].values and away_team not in df_elo['TEAM_NAME_AWAY'].values) :
        away_team_elo_before = 1500
    else :
        away_team_elo_before = get_prev_elo(away_team, game_date, season, df_games, df_elo)

    home_team_elo_after, away_team_elo_after = update_elo(home_score, away_score, home_team_elo_before, away_team_elo_before, 69)

    new_row = {
        'GAME_ID': game_id,
        'TEAM_NAME_HOME': home_team,
        'TEAM_NAME_AWAY': away_team,
        'TEAM_ELO_BEFORE_HOME': home_team_elo_before,
        'TEAM_ELO_BEFORE_AWAY': away_team_elo_before,
        'TEAM_ELO_AFTER_HOME' : home_team_elo_after,
        'TEAM_ELO_AFTER_AWAY': away_team_elo_after
    }

    teams_row_one = {'GAME_ID': game_id,'TEAM_NAME': home_team, 'ELO': home_team_elo_before, 'GAME_DATE': game_date, 'PLAYED_WHERE': 'HOME', 'SEASON_ID': season}
    teams_row_two = {'GAME_ID': game_id,'TEAM_NAME': away_team, 'ELO': away_team_elo_before, 'GAME_DATE': game_date, 'PLAYED_WHERE': 'AWAY', 'SEASON_ID': season}

    df_teams_elos = pd.concat([df_teams_elos, pd.DataFrame([teams_row_one])], ignore_index=True)
    df_teams_elos = pd.concat([df_teams_elos, pd.DataFrame([teams_row_two])], ignore_index=True)
    df_elo = pd.concat([df_elo, pd.DataFrame([new_row])], ignore_index=True)

In [5]:
df_teams_elos

Unnamed: 0,GAME_ID,TEAM_NAME,ELO,GAME_DATE,PLAYED_WHERE,SEASON_ID
0,21300003,Los Angeles Lakers,1500,2013-10-29,HOME,22013
1,21300003,Los Angeles Clippers,1500,2013-10-29,AWAY,22013
2,21300001,Indiana Pacers,1500,2013-10-29,HOME,22013
3,21300001,Orlando Magic,1500,2013-10-29,AWAY,22013
4,21300002,Miami Heat,1500,2013-10-29,HOME,22013
...,...,...,...,...,...,...
23953,22201230,Golden State Warriors,1579.404383,2023-04-09,AWAY,22022
23954,22201227,Denver Nuggets,1572.710058,2023-04-09,HOME,22022
23955,22201227,Sacramento Kings,1547.557108,2023-04-09,AWAY,22022
23956,22201219,Miami Heat,1528.465022,2023-04-09,HOME,22022


In [6]:
df_elo.to_csv('../nba_data/data/elo.csv', index=False)
df_teams_elos.to_csv('../nba_data/data/teams_elo.csv', index=False)

### Visualize Elo Rating

##### Reference: [The Complete History Of The NBA](https://projects.fivethirtyeight.com/complete-history-of-the-nba/#celtics)

In [7]:
dates = list(set([d.strftime('%m-%d-%Y') for d in df_teams_elos['GAME_DATE']]))
dates = sorted(dates, key=lambda x: time.strptime(x, '%m-%d-%Y'))

teams = df_games['TEAM_NAME_AWAY']
dataset = pd.DataFrame(columns=dates)
dataset['TEAM_NAME'] = teams.drop_duplicates()
dataset = dataset.set_index('TEAM_NAME')

for index, row in df_teams_elos.iterrows():
  date = row['GAME_DATE'].strftime('%m-%d-%Y')
  team = row['TEAM_NAME']
  elo = row['ELO']
  dataset[date][team] = elo

df_teams_elos['ELO'] = df_teams_elos['ELO'].astype(float)

df_elo.head()

Unnamed: 0,GAME_ID,TEAM_NAME_HOME,TEAM_NAME_AWAY,TEAM_ELO_BEFORE_HOME,TEAM_ELO_BEFORE_AWAY,TEAM_ELO_AFTER_HOME,TEAM_ELO_AFTER_AWAY
0,21300003,Los Angeles Lakers,Los Angeles Clippers,1500,1500.0,1509.8509,1490.1491
1,21300001,Indiana Pacers,Orlando Magic,1500,1500.0,1508.343237,1491.656763
2,21300002,Miami Heat,Chicago Bulls,1500,1500.0,1509.355197,1490.644803
3,21300015,Phoenix Suns,Portland Trail Blazers,1500,1500.0,1509.8509,1490.1491
4,21300005,Philadelphia 76ers,Miami Heat,1500,1509.355197,1505.210202,1504.144995


In [8]:
import plotly.express as px

def runningMeanFast(x, N):
    return np.convolve(x, np.ones((N,)) / N, mode='valid')

elos = df_teams_elos['ELO'].values

sample_teams = ['Los Angeles Lakers', 'Denver Nuggets', 'Golden State Warriors']

sampled_data = df_teams_elos[df_teams_elos['TEAM_NAME'].isin(sample_teams)]

# Calculate the average ELO rating
average_elo = np.mean(sampled_data['ELO'])

fig = px.line(sampled_data, x='GAME_DATE', y='ELO', color='TEAM_NAME',
              title='ELO over Time', labels={'GAME_DATE': 'Game Date', 'ELO': 'ELO'})

# Add a horizontal line at the average ELO rating
fig.add_hline(y=average_elo, line_dash="dash", line_color="black", annotation_text=f'Avg ELO: {average_elo:.2f}')

fig.show()