In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
import os

# Predicting Outcomes of Basketball Games

$\texttt{Spencer Ashton}\\$
$\texttt{Trevor Wai}\\$
$\texttt{Carson Watkin}\\$
$\texttt{Zach Joachim}\\$

### December 5, 2023

# **1 Introduction**

### 1.1 Previous work.
Research has been done that analyzes the effect of different statistics of a given NBA game on its outcome (SOURCE). This work is helpful, as it allows players and coaches to focus on drills and metrics that will more likely lead to winning during practice and for general managers to make informed trade decisions to fill positions on rosters that correspond to the . We seek to extend this work by focusing more on outcomes of games that have not been played yet.

### 1.2 Projecting future games.
While it is one thing to analyze basketball games that have already occurred to find patterns that lead to success, it is another task to assign a probability that a team wins an upcoming game. Such a result would have great implications for sports traders to set fair odds for betting, as well as for coaches to make game-specific lineup adjustments.

# **2 Data**

### 2.1 Box scores. 
We got our data by scraping basketballreference.com.  This website contains the box scores from every game sorted by team and season. WHAT IS A BOX SCORE? We had to combine the box scores from every team in a given season into one big dataframe representing each game played in a season. Another thing we considered: do we compare across seasons or just within a given season? We talk about this more LATER.



### 2.2 Data cleaning.
Talking about the cleaning from above. One challenge we ran into was that this approach included each game twice; once from each participating teams' dataset. We employed SOME TACTIC TO DEAL WITH THAT... Below, we show the process of cleaning the data.

In [10]:
team_data = pd.read_csv('./NBA/Utah Jazz/UTA_2018_team_game_log.csv', index_col=1)
team_data.drop('Unnamed: 0', axis=1, inplace=True)
team_data.fillna(value={'H/A': 'vs'}, inplace=True)
team_data.dropna(inplace=True)
team_data.rename(columns={'Tm': 'Team Score', 'Opp.1': 'Opp Score'}, inplace=True)
team_data["FG_cum"] = team_data["FG"].cumsum()
team_data["3P_cum"] = team_data["3P"].cumsum()
team_data["FGA_cum"] = team_data["FGA"].cumsum()
team_data['TOV_cum'] = team_data['TOV'].cumsum()
team_data['FTA_cum'] = team_data['FTA'].cumsum()
team_data['ORB_cum'] = team_data['ORB'].cumsum()
team_data['TRB_cum'] = team_data['TRB'].cumsum()
team_data['FT_cum'] = team_data['FT'].cumsum()
team_data.head()

Unnamed: 0_level_0,Date,H/A,Opp,W/L,Team Score,Opp Score,FG,FGA,FG%,3P,...,TOV,PF,FG_cum,3P_cum,FGA_cum,TOV_cum,FTA_cum,ORB_cum,TRB_cum,FT_cum
G,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1.0,2017-10-18,vs,DEN,W,106.0,96.0,41.0,81.0,0.506,9.0,...,15.0,18.0,41.0,9.0,81.0,15.0,16.0,6.0,36.0,15.0
2.0,2017-10-20,@,MIN,L,97.0,100.0,37.0,77.0,0.481,9.0,...,19.0,21.0,78.0,18.0,158.0,34.0,33.0,13.0,74.0,29.0
3.0,2017-10-21,vs,OKC,W,96.0,87.0,38.0,77.0,0.494,9.0,...,18.0,21.0,116.0,27.0,235.0,52.0,47.0,20.0,115.0,40.0
4.0,2017-10-24,@,LAC,L,84.0,102.0,34.0,79.0,0.43,7.0,...,18.0,23.0,150.0,34.0,314.0,70.0,58.0,28.0,156.0,49.0
5.0,2017-10-25,@,PHO,L,88.0,97.0,32.0,77.0,0.416,6.0,...,23.0,19.0,182.0,40.0,391.0,93.0,83.0,35.0,200.0,67.0


In [11]:
opp_data = pd.read_csv('./NBA/Utah Jazz/UTA_2018_opp_game_log.csv', index_col=1)
opp_data.drop('Unnamed: 0', axis=1, inplace=True)
opp_data.fillna(value={'H/A': 'vs'}, inplace=True)
opp_data.dropna(inplace=True)
opp_data.rename(columns={'Tm': 'Team Score', 'Opp.1': 'Opp Score'}, inplace=True)
opp_data["FG_cum"] = opp_data["FG"].cumsum()
opp_data["3P_cum"] = opp_data["3P"].cumsum()
opp_data["FGA_cum"] = opp_data["FGA"].cumsum()
opp_data['TOV_cum'] = opp_data['TOV'].cumsum()
opp_data['FTA_cum'] = opp_data['FTA'].cumsum()
opp_data['ORB_cum'] = opp_data['ORB'].cumsum()
opp_data['TRB_cum'] = opp_data['TRB'].cumsum()
opp_data['FT_cum'] = opp_data['FT'].cumsum()
opp_data.head()

Unnamed: 0_level_0,Date,H/A,Opp,W/L,Team Score,Opp Score,FG,FGA,FG%,3P,...,TOV,PF,FG_cum,3P_cum,FGA_cum,TOV_cum,FTA_cum,ORB_cum,TRB_cum,FT_cum
G,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1.0,2017-10-18,vs,DEN,W,106.0,96.0,36.0,77.0,0.468,13.0,...,20.0,18.0,36.0,13.0,77.0,20.0,12.0,7.0,38.0,11.0
2.0,2017-10-20,@,MIN,L,97.0,100.0,36.0,81.0,0.444,7.0,...,15.0,17.0,72.0,20.0,158.0,35.0,38.0,17.0,82.0,32.0
3.0,2017-10-21,vs,OKC,W,96.0,87.0,32.0,78.0,0.41,11.0,...,17.0,16.0,104.0,31.0,236.0,52.0,57.0,25.0,120.0,44.0
4.0,2017-10-24,@,LAC,L,84.0,102.0,39.0,83.0,0.47,10.0,...,13.0,16.0,143.0,41.0,319.0,65.0,78.0,36.0,164.0,58.0
5.0,2017-10-25,@,PHO,L,88.0,97.0,42.0,96.0,0.438,6.0,...,16.0,25.0,185.0,47.0,415.0,81.0,87.0,52.0,219.0,65.0


### 2.3 Feature engineering
Obviously a good model would just look at total points scored. We had to engineer it.  Blah blah blah. Taking out the points from the box scores. Also talking about four factors from SOURCE.

In [12]:
team_four_factors = pd.DataFrame({'Date':[],
                                  'W/L': [],
                                  'H/A':[],
                                  'eFG%':[],
                                  'TOV%':[],
                                  'ORB%':[],
                                  'FTR':[],
                                  'Score':[]})

team_four_factors['Date'] = team_data['Date']
team_four_factors['W/L'] = team_data['W/L'].replace({'W': 1, 'L': 0})
team_four_factors['H/A'] = team_data['H/A']
team_four_factors["eFG%"] = (team_data['FG_cum'] + 0.5 * team_data['3P_cum']) / team_data['FGA_cum']
team_four_factors['TOV%'] = team_data['TOV_cum'] / (team_data['FGA_cum'] + 0.44 * team_data['FTA_cum'] + team_data['TOV_cum'])
team_four_factors['ORB%'] = team_data['ORB'] / (team_data['ORB_cum'] + (opp_data['TRB_cum'] - opp_data['ORB_cum']))
team_four_factors['FTR'] = team_data['FT_cum'] / team_data['FGA_cum']
team_four_factors['Score'] = team_data['Team Score']
team_four_factors.set_index('Date', inplace=True)
team_four_factors.loc[team_four_factors['H/A'] == '@', 'H/A'] = 0
team_four_factors.loc[team_four_factors['H/A'] == 'vs', 'H/A'] = 1
# team_four_factors.drop(team_four_factors['H/A'] == 0, 'H/A')
team_four_factors.head()

Unnamed: 0_level_0,W/L,H/A,eFG%,TOV%,ORB%,FTR,Score
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2017-10-18,1,1,0.561728,0.145575,0.162162,0.185185,106.0
2017-10-20,0,0,0.550633,0.164633,0.089744,0.183544,97.0
2017-10-21,1,1,0.551064,0.169007,0.06087,0.170213,96.0
2017-10-24,0,0,0.531847,0.170932,0.051282,0.156051,84.0
2017-10-25,0,0,0.516624,0.178667,0.034653,0.171355,88.0


In [13]:
opp_four_factors = pd.DataFrame({'Team':[],
                                 'Date':[],
                                 'eFG%':[],
                                 'TOV%':[],
                                 'ORB%':[],
                                 'FTR':[],})

opp_four_factors['Team'] = opp_data['Opp']
opp_four_factors['Date'] = opp_data['Date']
opp_four_factors['eFG%'] = (opp_data['FG_cum'] + 0.5 * opp_data['3P_cum']) / opp_data['FGA_cum']
opp_four_factors['TOV%'] = opp_data['TOV_cum'] / (opp_data['FGA_cum'] + 0.44 * opp_data['FTA_cum'] + opp_data['TOV_cum'])
opp_four_factors['ORB%'] = opp_data['ORB_cum'] / (opp_data['ORB_cum'] + (team_data['TRB_cum'] - team_data['ORB_cum']))
opp_four_factors['FTR'] = opp_data['FT_cum'] / opp_data['FGA_cum']
opp_four_factors['Score'] = opp_data['Opp Score']
opp_four_factors.set_index('Date', inplace=True)
opp_four_factors.head()

Unnamed: 0_level_0,Team,eFG%,TOV%,ORB%,FTR,Score
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-10-18,DEN,0.551948,0.195542,0.189189,0.142857,96.0
2017-10-20,MIN,0.518987,0.166889,0.217949,0.202532,100.0
2017-10-21,OKC,0.506356,0.166092,0.208333,0.186441,87.0
2017-10-24,LAC,0.512539,0.155383,0.219512,0.181818,102.0
2017-10-25,PHO,0.50241,0.151606,0.239631,0.156627,97.0


In [14]:
result = team_four_factors.join(opp_four_factors, rsuffix='_opp', on='Date')
result.head()

Unnamed: 0_level_0,W/L,H/A,eFG%,TOV%,ORB%,FTR,Score,Team,eFG%_opp,TOV%_opp,ORB%_opp,FTR_opp,Score_opp
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2017-10-18,1,1,0.561728,0.145575,0.162162,0.185185,106.0,DEN,0.551948,0.195542,0.189189,0.142857,96.0
2017-10-20,0,0,0.550633,0.164633,0.089744,0.183544,97.0,MIN,0.518987,0.166889,0.217949,0.202532,100.0
2017-10-21,1,1,0.551064,0.169007,0.06087,0.170213,96.0,OKC,0.506356,0.166092,0.208333,0.186441,87.0
2017-10-24,0,0,0.531847,0.170932,0.051282,0.156051,84.0,LAC,0.512539,0.155383,0.219512,0.181818,102.0
2017-10-25,0,0,0.516624,0.178667,0.034653,0.171355,88.0,PHO,0.50241,0.151606,0.239631,0.156627,97.0


In [15]:
result = result[result['H/A'] != 0] # Only include home games.
result.tail()

Unnamed: 0_level_0,W/L,H/A,eFG%,TOV%,ORB%,FTR,Score,Team,eFG%_opp,TOV%_opp,ORB%_opp,FTR_opp,Score_opp
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2018-03-28,0,1,0.522453,0.132222,0.003797,0.203122,94.0,BOS,0.509256,0.136976,0.202682,0.190438,97.0
2018-03-30,1,1,0.52362,0.132435,0.002192,0.203118,107.0,MEM,0.509614,0.136879,0.202465,0.189576,97.0
2018-04-03,1,1,0.525961,0.132573,0.004281,0.203348,117.0,LAL,0.509834,0.136723,0.201441,0.190491,110.0
2018-04-05,1,1,0.526779,0.131998,0.003023,0.203673,117.0,LAC,0.509256,0.136881,0.202374,0.191554,95.0
2018-04-10,1,1,0.528548,0.132436,0.003541,0.202147,119.0,GSW,0.506404,0.136477,0.202535,0.191214,79.0


In [21]:

id_to_team = {'ATL': 'Atlanta Hawks', 'BOS': 'Boston Celtics', 'BRK': 'Brooklyn Nets',
        'CHO': 'Charlotte Hornets', 'CHI': 'Chicago Bulls', 'CLE': 'Cleveland Cavaliers',
        'DAL': 'Dallas Mavericks', 'DEN': 'Denver Nuggets', 'DET': 'Detroit Pistons',
        'GSW': 'Golden State Warriors', 'HOU': 'Houston Rockets', 'IND': 'Indiana Pacers',
        'LAC': 'Los Angeles Clippers', 'LAL': 'Los Angeles Lakers', 'MEM': 'Memphis Grizzlies',
        'MIA': 'Miami Heat', 'MIL': 'Milwaukee Bucks', 'MIN': 'Minnesota Timberwolves',
        'NOP': 'New Orleans Pelicans', 'NYK': 'New York Knicks', 'OKC': 'Oklahoma City Thunder',
        'ORL': 'Orlando Magic', 'PHI': 'Philadelphia 76ers', 'PHO': 'Phoenix Suns',
        'POR': 'Portland Trail Blazers', 'SAC': 'Sacramento Kings', 'SAS': 'San Antonio Spurs',
        'TOR': 'Toronto Raptors', 'UTA': 'Utah Jazz', 'WAS': 'Washington Wizards'}
team_to_id = res = dict((v,k) for k,v in id_to_team.items())

nba_path = './NBA'
team_dataframes = {}


for team in os.listdir(nba_path):
    team_folder = os.path.join(nba_path, team)
     
    # Check if it's a directory
    if os.path.isdir(team_folder):
        
        # Construct the file paths
        opp_log_path = os.path.join(team_folder, f'{team_to_id[team]}_2018_opp_game_log.csv')
        team_log_path = os.path.join(team_folder, f'{team_to_id[team]}_2018_team_game_log.csv')
        
        # Read the CSV files into dataframes
        opp_log_df = pd.read_csv(opp_log_path, index_col=1)
        team_log_df = pd.read_csv(team_log_path, index_col=1)

        # Clean and engineer the opponent data:
        opp_log_df.drop('Unnamed: 0', axis=1, inplace=True)
        opp_log_df.fillna(value={'H/A': 'vs'}, inplace=True)
        opp_log_df.dropna(inplace=True)
        opp_log_df.rename(columns={'Tm': 'Team Score', 'Opp.1': 'Opp Score'}, inplace=True)
        opp_log_df["FG_cum"] = opp_log_df["FG"].cumsum()
        opp_log_df["3P_cum"] = opp_log_df["3P"].cumsum()
        opp_log_df["FGA_cum"] = opp_log_df["FGA"].cumsum()
        opp_log_df['TOV_cum'] = opp_log_df['TOV'].cumsum()
        opp_log_df['FTA_cum'] = opp_log_df['FTA'].cumsum()
        opp_log_df['ORB_cum'] = opp_log_df['ORB'].cumsum()
        opp_log_df['TRB_cum'] = opp_log_df['TRB'].cumsum()
        opp_log_df['FT_cum'] = opp_log_df['FT'].cumsum()

        # Clean and engineer the team data:
        team_log_df.drop('Unnamed: 0', axis=1, inplace=True)
        team_log_df.fillna(value={'H/A': 'vs'}, inplace=True)
        team_log_df.dropna(inplace=True)
        team_log_df.rename(columns={'Tm': 'Team Score', 'Opp.1': 'Opp Score'}, inplace=True)
        team_log_df["FG_cum"] = team_log_df["FG"].cumsum()
        team_log_df["3P_cum"] = team_log_df["3P"].cumsum()
        team_log_df["FGA_cum"] = team_log_df["FGA"].cumsum()
        team_log_df['TOV_cum'] = team_log_df['TOV'].cumsum()
        team_log_df['FTA_cum'] = team_log_df['FTA'].cumsum()
        team_log_df['ORB_cum'] = team_log_df['ORB'].cumsum()
        team_log_df['TRB_cum'] = team_log_df['TRB'].cumsum()
        team_log_df['FT_cum'] = team_log_df['FT'].cumsum()

        
        # Create four_factors on opponent data.
        opp_four_factors = pd.DataFrame({'Team':[],
                                 'Date':[],
                                 'eFG%':[],
                                 'TOV%':[],
                                 'ORB%':[],
                                 'FTR':[],})

        opp_four_factors['Team'] = opp_log_df['Opp']
        opp_four_factors['Date'] = opp_log_df['Date']
        opp_four_factors['eFG%'] = (opp_log_df['FG_cum'] + 0.5 * opp_log_df['3P_cum']) / opp_log_df['FGA_cum']
        opp_four_factors['TOV%'] = opp_log_df['TOV_cum'] / (opp_log_df['FGA_cum'] + 0.44 * opp_log_df['FTA_cum'] + opp_log_df['TOV_cum'])
        #opp_four_factors['ORB%'] = opp_log_df['ORB_cum'] / (opp_log_df['ORB_cum'] + (team_log_df['TRB_cum'] - team_log_df['ORB_cum']))
        opp_four_factors['FTR'] = opp_log_df['FT_cum'] / opp_log_df['FGA_cum']
        opp_four_factors.set_index('Date', inplace=True)
        

        # Create four factors on team data.
        team_four_factors = pd.DataFrame({'Date':[],
                                  'W/L': [],
                                  'H/A':[],
                                  'eFG%':[],
                                  'TOV%':[],
                                  'ORB%':[],
                                  'FTR':[]})

        team_four_factors['Date'] = team_log_df['Date']
        team_four_factors['W/L'] = team_log_df['W/L'].replace({'W': 1, 'L': 0})
        team_four_factors['H/A'] = team_log_df['H/A']
        team_four_factors["eFG%"] = (team_log_df['FG_cum'] + 0.5 * team_log_df['3P_cum']) / team_log_df['FGA_cum']
        team_four_factors['TOV%'] = team_log_df['TOV_cum'] / (team_log_df['FGA_cum'] + 0.44 * team_log_df['FTA_cum'] + team_log_df['TOV_cum'])
        #team_four_factors['ORB%'] = team_log_df['ORB_cum'] / (team_log_df['ORB_cum'] + (opp_log_df['TRB_cum'] - opp_log_df['ORB_cum']))
        team_four_factors['FTR'] = team_log_df['FT_cum'] / team_log_df['FGA_cum']
        team_four_factors.set_index('Date', inplace=True)
        team_four_factors.loc[team_four_factors['H/A'] == '@', 'H/A'] = 0
        team_four_factors.loc[team_four_factors['H/A'] == 'vs', 'H/A'] = 1

        result = team_four_factors.join(opp_four_factors, rsuffix='_opp', on='Date')
        result = result[result['H/A'] != 0] # Only include home games.
        # Store the dataframes in the dictionary
        team_dataframes[team_to_id[team]] = {'opp_log': opp_log_df, 'team_log': team_log_df, 'result': result}

In [53]:
for team in team_dataframes.keys():
    for opponent in team_dataframes[team]["result"]["Team"]:
        team_dataframes[team]["result"]["ORB%"] = team_dataframes[opponent]["team_log"]["W/L"]

In [102]:
for team in team_dataframes.keys():
    for i in range(1, 41):
        team_dataframes[team]["result"]["ORB%"][i] = team_dataframes[team]["team_log"]["ORB_cum"][i] / ( team_dataframes[team]["team_log"]["TRB_cum"][i]
                                                                                                   + team_dataframes[team_dataframes[team]["result"]["Team"][i]]["team_log"]["TRB_cum"][i]
                                                                                                   - team_dataframes[team_dataframes[team]["result"]["Team"][i]]["team_log"]["ORB_cum"][i])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  team_dataframes[team]["result"]["ORB%"][i] = team_dataframes[team]["team_log"]["ORB_cum"][i] / ( team_dataframes[team]["team_log"]["TRB_cum"][i]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  team_dataframes[team]["result"]["ORB%"][i] = team_dataframes[team]["team_log"]["ORB_cum"][i] / ( team_dataframes[team]["team_log"]["TRB_cum"][i]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  team_dataframes[team]["result"]["ORB%"][i] = team_dataframes[team]["team_log"]["ORB_cu

In [87]:
team= "UTA"
team_dataframes[team]["result"]["ORB%"][3]
team_dataframes[team]["team_log"]["ORB_cum"][3]
team_dataframes[team_dataframes[team]["result"]["Team"][i]]["team_log"]["TRB_cum"][83]
#team_dataframes[team_dataframes[team]["result"]["Team"][i]]["team_log"]["ORB_cum"][3]

KeyError: 83

In [101]:
team_dataframes["MIN"]["result"]

Unnamed: 0_level_0,W/L,H/A,eFG%,TOV%,ORB%,FTR,Team,eFG%_opp,TOV%_opp,ORB%_opp,FTR_opp
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2017-10-20,1,1,0.48494,0.129894,,0.228916,UTA,0.521084,0.141616,,0.186747
2017-10-24,0,1,0.517751,0.134593,0.146667,0.210059,IND,0.575145,0.134346,,0.141618
2017-10-27,1,1,0.515717,0.13273,0.145833,0.227898,OKC,0.575382,0.131058,,0.156489
2017-11-04,1,1,0.526247,0.142181,0.137778,0.23622,DAL,0.557107,0.143466,,0.159898
2017-11-05,1,1,0.525444,0.142223,0.123077,0.243787,CHO,0.547891,0.139268,,0.156214
2017-11-15,1,1,0.513014,0.131102,0.125,0.243493,SAS,0.551197,0.144348,,0.156895
2017-11-19,0,1,0.51663,0.132497,0.148492,0.23799,DET,0.540942,0.143737,,0.158696
2017-11-22,1,1,0.511765,0.126047,0.13936,0.248366,ORL,0.543702,0.142742,,0.164524
2017-11-24,0,1,0.510223,0.124478,0.140728,0.245353,MIA,0.546149,0.14213,,0.165037
2017-11-26,1,1,0.513767,0.122778,0.136103,0.239016,PHO,0.547937,0.142146,,0.162115


In [None]:
# Create ORB%_opp metric.
for team in team_dataframes.keys():
    orb_opp = []
    for i in range(82):
        orb_opp.append(team_dataframes[team_dataframes[team]["result"]["Team"].values[i]]["result"]["ORB%"].values[i])
    team_dataframes[team]["result"]["ORB%_opp"] = orb_opp