## Imports

Import our data and packages

In [361]:
# Import libaries
import pandas as pd
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)


In [362]:
df = pd.read_csv('../data/teams_csv')
df.shape

(114254, 43)

## Data Cleaning

First, let's look at how many games were played in the regular season from 2017 - 2019.

In [363]:
df["Season"][df.shape[0]-1]

2019

In [364]:
# message = (
#     f'From the {df["Season"][0]} NBA season, through the {df["Season"][df.shape[0]-1]} season, there were a total of '
#     f'{}
# )

### Data Types

In [365]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 114254 entries, 0 to 114253
Data columns (total 43 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   Season             114254 non-null  int64  
 1   Time Period        114254 non-null  object 
 2   Date               114254 non-null  object 
 3   Team               114254 non-null  object 
 4   Team Abbreviation  114254 non-null  object 
 5   Location           114254 non-null  object 
 6   Overtime           5602 non-null    object 
 7   Result             114254 non-null  object 
 8   Player             114254 non-null  object 
 9   MP                 114254 non-null  object 
 10  FG                 114254 non-null  int64  
 11  FGA                114254 non-null  int64  
 12  FG%                109934 non-null  float64
 13  3P                 114254 non-null  int64  
 14  3PA                114254 non-null  int64  
 15  3P%                87990 non-null   float64
 16  FT

### Renaming Columns

In [366]:
df.rename(columns = {
    "TS%"  : "True Shooting Percentage",
    "eFG%" : "Effective Field Goal Percentage",
    "3PAr" : "3 Point Attempt Rate",
    "FTr"  : "Free Throw Attempt Rate",
    "ORB%" : "Offensive Rebound Percentage",
    "DRB%" : "Defensive Rebound Percentage",
    "TRB%" : "Total Rebound Percentage",
    "AST%" : "Assist Percentage",
    "STL%" : "Steal Percentage",
    "BLK%" : "Block Percentage",
    "TOV%" : "Turnover Percentage",
    "USG%" : "Usage Percentage",
    "ORtg" : "Offensive Rating",
    "DRtg" : "Defensive Rating",
    "BPM"  : "Box Plus/Minus",
    "MP"   : "Minutes Played",
    "FG"   : "Field Goals",
    "FGA"  : "Field Goal Attempts",
    "FG%"  : "Field Goal Percentage",
    "3P"   : "3-Point Field Goals",
    "3PA"  : "3-Point Attempts",
    "3P%"  : "3-Point Percentage",
    "FT"   : "Free Throws",
    "FTA"  : "Free Throw Attempts",
    "FT%"  : "Free Throw Percentage",
    "ORB"  : "Offensive Rebounds",
    "DRB"  : "Defensive Rebounds",
    "TRB"  : "Total Rebounds",
    "AST"  : "Assists",
    "STL"  : "Steals",
    "BLK"  : "Blocks",
    "TOV"  : "Turnovers",
    "PF"   : "Personal Fouls",
    "PTS"  : "Points"
    }, inplace = True)
df.head()

Unnamed: 0,Season,Time Period,Date,Team,Team Abbreviation,Location,Overtime,Result,Player,Minutes Played,Field Goals,Field Goal Attempts,Field Goal Percentage,3-Point Field Goals,3-Point Attempts,3-Point Percentage,Free Throws,Free Throw Attempts,Free Throw Percentage,Offensive Rebounds,Defensive Rebounds,Total Rebounds,Assists,Steals,Blocks,Turnovers,Personal Fouls,Points,True Shooting Percentage,Effective Field Goal Percentage,3 Point Attempt Rate,Free Throw Attempt Rate,Offensive Rebound Percentage,Defensive Rebound Percentage,Total Rebound Percentage,Assist Percentage,Steal Percentage,Block Percentage,Turnover Percentage,Usage Percentage,Offensive Rating,Defensive Rating,Box Plus/Minus
0,2018,Regular Season,2017-10-19,Toronto Raptors,TOR,Home,,W,DeMar DeRozan,32:03,2,9,0.222,0,0,,7,8,0.875,1,5,6,5,2,1,5,1,11,0.439,0.222,0.0,0.889,3.6,16.3,10.2,20.8,3.0,2.6,28.5,23.4,86.0,97.0,-5.3
1,2018,Regular Season,2017-10-19,Toronto Raptors,TOR,Home,,W,Kyle Lowry,31:20,4,7,0.571,1,3,0.333,3,3,1.0,0,4,4,9,1,0,4,1,12,0.721,0.643,0.429,0.429,0.0,13.3,7.0,41.9,1.5,0.0,32.5,16.8,121.0,103.0,2.3
2,2018,Regular Season,2017-10-19,Toronto Raptors,TOR,Home,,W,Jonas Valančiūnas,30:30,9,17,0.529,0,1,0.0,5,6,0.833,8,7,15,2,2,0,1,4,23,0.586,0.529,0.059,0.353,30.0,23.9,26.8,12.7,3.2,0.0,4.8,28.9,136.0,96.0,4.3
3,2018,Regular Season,2017-10-19,Toronto Raptors,TOR,Home,,W,Norman Powell,24:35,5,11,0.455,3,6,0.5,2,2,1.0,0,4,4,1,1,0,2,2,15,0.631,0.591,0.545,0.182,0.0,17.0,8.9,6.7,2.0,0.0,14.4,24.1,111.0,101.0,0.0
4,2018,Regular Season,2017-10-19,Toronto Raptors,TOR,Home,,W,Serge Ibaka,24:14,3,8,0.375,2,4,0.5,0,0,,1,3,4,0,0,2,0,2,8,0.5,0.5,0.5,0.0,4.7,12.9,9.0,0.0,0.0,7.0,0.0,14.1,113.0,101.0,-2.0


### Converting Data Types  
#### Minutes Played Column

While almost all of our columns are in the correct format, I want to change the "Minutes Played" Column into a float type, and convert seconds into a decimal out of 100. So, if a player played for 3 minutes and 45 seconds, instead of recording it as "3:45", it will now be recorded as _$3.75$_

In [367]:
def convert_to_float(string):
    if ":" in string:
        mins, secs = string.split(":")
        secs = round(int(secs)/60, 2)
        mins = int(mins)
        time = float(mins + secs)
        return time
    return float(string)

In [368]:
df["Minutes Played"] = df["Minutes Played"].map(convert_to_float)
df["Minutes Played"].dtypes

dtype('float64')

#### Date Column

In [369]:
df["Date"] = pd.to_datetime(df["Date"])
df["Date"].dtypes

dtype('<M8[ns]')

### Duplicate Columns

In [370]:
df.loc[(df["Date"] == '2017-10-19') & (df["Team"] == "Toronto Raptors"), :]

Unnamed: 0,Season,Time Period,Date,Team,Team Abbreviation,Location,Overtime,Result,Player,Minutes Played,Field Goals,Field Goal Attempts,Field Goal Percentage,3-Point Field Goals,3-Point Attempts,3-Point Percentage,Free Throws,Free Throw Attempts,Free Throw Percentage,Offensive Rebounds,Defensive Rebounds,Total Rebounds,Assists,Steals,Blocks,Turnovers,Personal Fouls,Points,True Shooting Percentage,Effective Field Goal Percentage,3 Point Attempt Rate,Free Throw Attempt Rate,Offensive Rebound Percentage,Defensive Rebound Percentage,Total Rebound Percentage,Assist Percentage,Steal Percentage,Block Percentage,Turnover Percentage,Usage Percentage,Offensive Rating,Defensive Rating,Box Plus/Minus
0,2018,Regular Season,2017-10-19,Toronto Raptors,TOR,Home,,W,DeMar DeRozan,32.05,2,9,0.222,0,0,,7,8,0.875,1,5,6,5,2,1,5,1,11,0.439,0.222,0.0,0.889,3.6,16.3,10.2,20.8,3.0,2.6,28.5,23.4,86.0,97.0,-5.3
1,2018,Regular Season,2017-10-19,Toronto Raptors,TOR,Home,,W,Kyle Lowry,31.33,4,7,0.571,1,3,0.333,3,3,1.0,0,4,4,9,1,0,4,1,12,0.721,0.643,0.429,0.429,0.0,13.3,7.0,41.9,1.5,0.0,32.5,16.8,121.0,103.0,2.3
2,2018,Regular Season,2017-10-19,Toronto Raptors,TOR,Home,,W,Jonas Valančiūnas,30.5,9,17,0.529,0,1,0.0,5,6,0.833,8,7,15,2,2,0,1,4,23,0.586,0.529,0.059,0.353,30.0,23.9,26.8,12.7,3.2,0.0,4.8,28.9,136.0,96.0,4.3
3,2018,Regular Season,2017-10-19,Toronto Raptors,TOR,Home,,W,Norman Powell,24.58,5,11,0.455,3,6,0.5,2,2,1.0,0,4,4,1,1,0,2,2,15,0.631,0.591,0.545,0.182,0.0,17.0,8.9,6.7,2.0,0.0,14.4,24.1,111.0,101.0,0.0
4,2018,Regular Season,2017-10-19,Toronto Raptors,TOR,Home,,W,Serge Ibaka,24.23,3,8,0.375,2,4,0.5,0,0,,1,3,4,0,0,2,0,2,8,0.5,0.5,0.5,0.0,4.7,12.9,9.0,0.0,0.0,7.0,0.0,14.1,113.0,101.0,-2.0
5,2018,Regular Season,2017-10-19,Toronto Raptors,TOR,Home,,W,Delon Wright,23.37,4,6,0.667,0,1,0.0,5,5,1.0,1,1,2,5,1,0,0,0,13,0.793,0.667,0.167,0.833,4.9,4.5,4.7,33.4,2.1,0.0,0.0,15.0,185.0,104.0,11.9
6,2018,Regular Season,2017-10-19,Toronto Raptors,TOR,Home,,W,C.J. Miles,20.02,7,12,0.583,6,9,0.667,2,2,1.0,0,5,5,0,0,0,0,4,22,0.854,0.833,0.75,0.167,0.0,26.1,13.6,0.0,0.0,0.0,0.0,27.5,165.0,102.0,15.1
7,2018,Regular Season,2017-10-19,Toronto Raptors,TOR,Home,,W,Jakob Poeltl,17.5,1,2,0.5,0,0,,0,0,,0,5,5,0,1,0,3,3,2,0.5,0.5,0.0,0.0,0.0,29.8,15.6,0.0,2.7,0.0,60.0,12.2,34.0,95.0,-12.0
8,2018,Regular Season,2017-10-19,Toronto Raptors,TOR,Home,,W,OG Anunoby,17.23,3,6,0.5,1,3,0.333,2,2,1.0,2,1,3,2,0,0,0,1,9,0.654,0.583,0.5,0.333,13.3,6.1,9.5,18.2,0.0,0.0,0.0,17.1,158.0,108.0,5.5
9,2018,Regular Season,2017-10-19,Toronto Raptors,TOR,Home,,W,Fred VanVleet,12.98,1,5,0.2,0,2,0.0,0,0,,0,1,1,2,1,1,2,1,2,0.2,0.2,0.4,0.0,0.0,8.0,4.2,20.9,3.7,6.5,28.6,23.0,49.0,95.0,-11.2


In [371]:
df.drop_duplicates(inplace = True)
df.shape

(57127, 43)

### Handling Nulls

In [378]:
df.isna().sum()

Season                                0
Time Period                           0
Date                                  0
Team                                  0
Team Abbreviation                     0
Location                              0
Overtime                              0
Result                                0
Player                                0
Minutes Played                        0
Field Goals                           0
Field Goal Attempts                   0
3-Point Field Goals                   0
3-Point Attempts                      0
Free Throws                           0
Free Throw Attempts                   0
Offensive Rebounds                    0
Defensive Rebounds                    0
Total Rebounds                        0
Assists                               0
Steals                                0
Blocks                                0
Turnovers                             0
Personal Fouls                        0
Points                                0


### Overtime

In [374]:
df["Overtime"].value_counts()

OT     2434
2OT     279
3OT      65
4OT      23
Name: Overtime, dtype: int64

In [375]:
df["Overtime"] = df["Overtime"].map({"OT"  : 1,
                                     "2OT" : 2,
                                     "3OT" : 3,
                                     "4OT" : 4})

df.loc[df["Overtime"].isna(), "Overtime"] = 0
df.head()

Unnamed: 0,Season,Time Period,Date,Team,Team Abbreviation,Location,Overtime,Result,Player,Minutes Played,Field Goals,Field Goal Attempts,Field Goal Percentage,3-Point Field Goals,3-Point Attempts,3-Point Percentage,Free Throws,Free Throw Attempts,Free Throw Percentage,Offensive Rebounds,Defensive Rebounds,Total Rebounds,Assists,Steals,Blocks,Turnovers,Personal Fouls,Points,True Shooting Percentage,Effective Field Goal Percentage,3 Point Attempt Rate,Free Throw Attempt Rate,Offensive Rebound Percentage,Defensive Rebound Percentage,Total Rebound Percentage,Assist Percentage,Steal Percentage,Block Percentage,Turnover Percentage,Usage Percentage,Offensive Rating,Defensive Rating,Box Plus/Minus
0,2018,Regular Season,2017-10-19,Toronto Raptors,TOR,Home,0.0,W,DeMar DeRozan,32.05,2,9,0.222,0,0,,7,8,0.875,1,5,6,5,2,1,5,1,11,0.439,0.222,0.0,0.889,3.6,16.3,10.2,20.8,3.0,2.6,28.5,23.4,86.0,97.0,-5.3
1,2018,Regular Season,2017-10-19,Toronto Raptors,TOR,Home,0.0,W,Kyle Lowry,31.33,4,7,0.571,1,3,0.333,3,3,1.0,0,4,4,9,1,0,4,1,12,0.721,0.643,0.429,0.429,0.0,13.3,7.0,41.9,1.5,0.0,32.5,16.8,121.0,103.0,2.3
2,2018,Regular Season,2017-10-19,Toronto Raptors,TOR,Home,0.0,W,Jonas Valančiūnas,30.5,9,17,0.529,0,1,0.0,5,6,0.833,8,7,15,2,2,0,1,4,23,0.586,0.529,0.059,0.353,30.0,23.9,26.8,12.7,3.2,0.0,4.8,28.9,136.0,96.0,4.3
3,2018,Regular Season,2017-10-19,Toronto Raptors,TOR,Home,0.0,W,Norman Powell,24.58,5,11,0.455,3,6,0.5,2,2,1.0,0,4,4,1,1,0,2,2,15,0.631,0.591,0.545,0.182,0.0,17.0,8.9,6.7,2.0,0.0,14.4,24.1,111.0,101.0,0.0
4,2018,Regular Season,2017-10-19,Toronto Raptors,TOR,Home,0.0,W,Serge Ibaka,24.23,3,8,0.375,2,4,0.5,0,0,,1,3,4,0,0,2,0,2,8,0.5,0.5,0.5,0.0,4.7,12.9,9.0,0.0,0.0,7.0,0.0,14.1,113.0,101.0,-2.0


In [376]:
df["Overtime"].value_counts()

0.0    54326
1.0     2434
2.0      279
3.0       65
4.0       23
Name: Overtime, dtype: int64

#### Field Goal, 3 - Point and Free Throw Percentages

In [377]:
df.drop(columns = ["Field Goal Percentage","3-Point Percentage", "Free Throw Percentage"],
           inplace = True)

#### Offensive and Defensive Ratings

In [379]:
df.loc[df["Offensive Rating"].isna(), :]

Unnamed: 0,Season,Time Period,Date,Team,Team Abbreviation,Location,Overtime,Result,Player,Minutes Played,Field Goals,Field Goal Attempts,3-Point Field Goals,3-Point Attempts,Free Throws,Free Throw Attempts,Offensive Rebounds,Defensive Rebounds,Total Rebounds,Assists,Steals,Blocks,Turnovers,Personal Fouls,Points,True Shooting Percentage,Effective Field Goal Percentage,3 Point Attempt Rate,Free Throw Attempt Rate,Offensive Rebound Percentage,Defensive Rebound Percentage,Total Rebound Percentage,Assist Percentage,Steal Percentage,Block Percentage,Turnover Percentage,Usage Percentage,Offensive Rating,Defensive Rating,Box Plus/Minus
2393,2018,Regular Season,2017-11-20,Dallas Mavericks,DAL,Home,1.0,L,Nerlens Noel,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,,,,,,,,,,-5.7
4017,2018,Regular Season,2017-10-28,Dallas Mavericks,DAL,Home,0.0,L,Salah Mejri,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,,,,,,,,,,-6.2
11055,2018,Regular Season,2018-03-12,Portland Trail Blazers,POR,Home,0.0,W,Jake Layman,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,,,,,,,,,,-4.7
15855,2018,Regular Season,2017-12-17,Detroit Pistons,DET,Home,0.0,W,Boban Marjanović,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,,,,,,,,,,-11.0
19652,2018,Regular Season,2017-12-12,Los Angeles Lakers,LAL,Away,1.0,L,Andrew Bogut,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,,,,,,,,,,-8.8
46937,2018,Regular Season,2018-02-05,Los Angeles Clippers,LAC,Home,0.0,W,Boban Marjanović,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,,,,,,,,,,-9.8
46948,2018,Regular Season,2018-02-05,Dallas Mavericks,DAL,Away,0.0,L,Jalen Jones,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,,,,,,,,,,-4.3
57429,2019,Regular Season,2018-11-10,Los Angeles Clippers,LAC,Home,1.0,W,Boban Marjanović,0.0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,,,,,,,,,,,,,,,-6.5
63363,2019,Regular Season,2018-12-01,Boston Celtics,BOS,Away,0.0,W,Robert Williams,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,,,,,,,,,,-5.5
64478,2019,Regular Season,2019-03-18,Denver Nuggets,DEN,Away,0.0,W,Jarred Vanderbilt,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,,,,,,,,,,-5.2


In [100]:
def bpm_relevance(number):
    
    opponent_initialism = list(bulls.loc[(bulls["Date"] == bulls["Date"].unique()[number]) & 
                    (bulls["Team Initialism"] != "CHI"), :]["Team Initialism"])[0]
    
    bulls_result = list(bulls.loc[(bulls["Date"] == bulls["Date"].unique()[number]) & 
                    (bulls["Team Initialism"] == "CHI"), :]["Result"])[0]
    
    opponent_result = list(bulls.loc[(bulls["Date"] == bulls["Date"].unique()[number]) & 
                    (bulls["Team Initialism"] != "CHI"), :]["Result"])[0]
    
    bulls_bpm = bulls.loc[(bulls["Date"] == bulls["Date"].unique()[number]) & 
                          (bulls["Team Initialism"] == "CHI") & 
                          (bulls["Player"] != "Team Totals"), :]["Box Plus/Minus"].mean()

    opponent_bpm = bulls.loc[(bulls["Date"] == bulls["Date"].unique()[number]) & 
                             (bulls["Team Initialism"] == opponent_initialism) & 
                             (bulls["Player"] != "Team Totals"), :]["Box Plus/Minus"].mean()
    
    print (f"The Bulls average BPM was: {bulls_bpm}, and the Bulls got the {bulls_result}")
    print (f"They played {opponent_initialism}, who's average BPM was: {opponent_bpm}, and they got the {opponent_result}")

In [106]:
bpm_relevance(1336)

The Bulls average BPM was: -1.4099999999999997, and the Bulls got the W
They played UTA, who's average BPM was: -1.4, and they got the L


In [130]:
 def bpm_result(numbers):
    games_list = []
    
    for number in range(0, numbers):
        # Give the team initialism by utilizing unique dates, and when the initialism is not CHI
        opponent_initialism = list(bulls.loc[(bulls["Date"] == bulls["Date"].unique()[number]) & 
                        (bulls["Team Initialism"] != "CHI"), :]["Team Initialism"])[0]
        
        # Get the result of win or loss
        bulls_result = list(bulls.loc[(bulls["Date"] == bulls["Date"].unique()[number]) & 
                        (bulls["Team Initialism"] == "CHI"), :]["Result"])[0]
        
        # Get the total team points in the game for the bulls
        bulls_points = list(bulls.loc[(bulls["Date"] == bulls["Date"].unique()[number]) & 
                           (bulls["Team Initialism"] == "CHI") &
                           (bulls["Player"] == "Team Totals"), "Points"])[0]
        
        # Get the result of win or loss
        opponent_result = list(bulls.loc[(bulls["Date"] == bulls["Date"].unique()[number]) & 
                        (bulls["Team Initialism"] != "CHI"), :]["Result"])[0]
        
        # Get the total team points in the game for the opponent
        opponent_points = list(bulls.loc[(bulls["Date"] == bulls["Date"].unique()[number]) & 
                   (bulls["Team Initialism"] != "CHI") &
                   (bulls["Player"] == "Team Totals"), "Points"])[0]
        
        # Collect the average BPM for the Bulls in that game
        bulls_avg_bpm = round(bulls.loc[(bulls["Date"] == bulls["Date"].unique()[number]) & 
                              (bulls["Team Initialism"] == "CHI") & 
                              (bulls["Player"] != "Team Totals"), :]["Box Plus/Minus"].mean(), 4)
        
        # Collect the average BPM for the opponent in that game
        opponent_avg_bpm = round(bulls.loc[(bulls["Date"] == bulls["Date"].unique()[number]) & 
                                 (bulls["Team Initialism"] == opponent_initialism) & 
                                 (bulls["Player"] != "Team Totals"), :]["Box Plus/Minus"].mean(), 4)
        
        # Collect the sum BPM for the Bulls in that game
        bulls_sum_bpm =  round(bulls.loc[(bulls["Date"] == bulls["Date"].unique()[number]) & 
                              (bulls["Team Initialism"] == "CHI") & 
                              (bulls["Player"] != "Team Totals"), :]["Box Plus/Minus"].sum(), 4)
        
        # Collect the sum BPM for the opponent        
        opponent_sum_bpm = round(bulls.loc[(bulls["Date"] == bulls["Date"].unique()[number]) & 
                                 (bulls["Team Initialism"] == opponent_initialism) & 
                                 (bulls["Player"] != "Team Totals"), :]["Box Plus/Minus"].sum(), 4)
        
        
#         if bulls_result == "W":
#             if bulls_avg_bpm < opponent_avg_bpm:
#                 games_list.append(number)
#             elif bulls_sum_bpm < opponent_sum_bpm:
#                 games_list.append(number)
#         elif bulls_result == "L":
#             if bulls_avg_bpm > opponent_avg_bpm:
#                 games_list.append(number)
#             elif bulls_sum_bpm > opponent_sum_bpm:
#                 games_list.append(number)                
        
        season = (list(bulls.loc[bulls["Date"] == bulls["Date"].unique()[number], "Season"])[0])
        date = (list(bulls.loc[bulls["Date"] == bulls["Date"].unique()[number], "Date"])[0])
        avg_bpm_difference = bulls_avg_bpm - opponent_avg_bpm
        sum_bpm_difference = bulls_sum_bpm - opponent_sum_bpm
        point_differential = bulls_points - opponent_points
        
        cols = ["Season", "Date", "Bulls Average BPM", "Bulls Summed BPM", "Bulls Total Points", "Bulls Result", 
                "Opponent Initialism", "Opponent Average BPM", "Opponent Summed BPM", "Opponent Total Points", 
                "Opponent Result", "Average BPM Difference", "Summed BPM Difference", "Bulls Point Differential"]
        
        game = dict(zip(cols,[season, date, bulls_avg_bpm, bulls_sum_bpm, bulls_points, bulls_result, 
                              opponent_initialism, opponent_avg_bpm, opponent_sum_bpm, opponent_points, opponent_result,
                              avg_bpm_difference, sum_bpm_difference, point_differential]))
                        
        games_list.append(game)
    
    games_df = pd.DataFrame(games_list)                    
                        
    return games_df

In [131]:
bpm_df = bpm_result(len(bulls["Date"].unique()))

In [132]:
bpm_df.head()

Unnamed: 0,Season,Date,Bulls Average BPM,Bulls Summed BPM,Bulls Total Points,Bulls Result,Opponent Initialism,Opponent Average BPM,Opponent Summed BPM,Opponent Total Points,Opponent Result,Average BPM Difference,Summed BPM Difference,Bulls Point Differential
0,1985,1984-10-26,6.0273,66.3,109,W,WSB,-4.8364,-53.2,93,L,10.8637,119.5,16
1,1985,1984-10-27,-0.6333,-5.7,106,L,MIL,-3.3583,-40.3,108,W,2.725,34.6,-2
2,1985,1984-10-29,-0.41,-4.1,116,W,MIL,-0.4182,-4.6,110,L,0.0082,0.5,6
3,1985,1984-10-30,0.21,2.1,109,W,KCK,-0.56,-5.6,104,L,0.77,7.7,5
4,1985,1984-11-01,-3.3083,-39.7,113,L,DEN,1.3,15.6,129,W,-4.6083,-55.3,-16


In [135]:
bpm_df.loc[(bpm_df["Bulls Result"] == "W") &
           (bpm_df["Average BPM Difference"] < -1), :]

Unnamed: 0,Season,Date,Bulls Average BPM,Bulls Summed BPM,Bulls Total Points,Bulls Result,Opponent Initialism,Opponent Average BPM,Opponent Summed BPM,Opponent Total Points,Opponent Result,Average BPM Difference,Summed BPM Difference,Bulls Point Differential
27,1985,1984-12-20,-2.4455,-26.9,132,W,ATL,-0.0818,-0.9,129,L,-2.3637,-26.0,3
43,1985,1985-01-26,-6.7,-67.0,117,W,ATL,-4.45,-44.5,104,L,-2.25,-22.5,13
54,1985,1985-02-23,-5.1091,-56.2,140,W,GSW,-2.175,-26.1,125,L,-2.9341,-30.1,15
59,1985,1985-03-05,-1.6,-16.0,104,W,WSB,-0.4556,-4.1,99,L,-1.1444,-11.9,5
70,1985,1985-03-23,-3.8111,-34.3,107,W,DAL,-0.7556,-6.8,97,L,-3.0555,-27.5,10
78,1985,1985-04-06,-6.02,-60.2,117,W,ATL,-0.5889,-5.3,114,L,-5.4311,-54.9,3
86,1986,1985-10-25,-1.5778,-14.2,116,W,CLE,1.49,14.9,115,L,-3.0678,-29.1,1
93,1986,1985-11-09,-6.29,-62.9,97,W,NYK,-2.5333,-22.8,94,L,-3.7567,-40.1,3
102,1986,1985-11-26,-3.84,-38.4,128,W,DEN,-2.2556,-20.3,123,L,-1.5844,-18.1,5
108,1986,1985-12-06,-3.13,-31.3,131,W,SAS,-1.8,-18.0,123,L,-1.33,-13.3,8


In [137]:
len(bulls.loc[bulls["Box Plus/Minus"] > 15, :])

1211

In [104]:
len(bad_games)/len(bulls["Date"].unique())

0.275243081525804

In [109]:
bulls.loc[(bulls["Date"] == bulls["Date"].unique()[1]) & 
         (bulls["Player"] == "Team Totals"),"Box Plus/Minus"]

33   NaN
46   NaN
Name: Box Plus/Minus, dtype: float64

In [117]:
print (list(bulls.loc[bulls["Date"] == bulls["Date"].unique()[1200], "Season"])[0])
print (list(bulls.loc[bulls["Date"] == bulls["Date"].unique()[1200], "Date"])[0])

1997
1997-03-22


In [119]:
list(bulls.loc[(bulls["Date"] == bulls["Date"].unique()[0]) & 
          (bulls["Team Initialism"] == "CHI") &
          (bulls["Player"] == "Team Totals"), "Points"])[0]

109

In [156]:
time = "48:45"
mins, secs = time.split(":")
secs = int(secs) * 100/60
secs = str(int(round(secs, 0)))

In [158]:
float(mins + "." + secs)

48.75

In [175]:
for i in range(0, len(bulls["Minutes Played"])):
    mins, secs = bulls["Minutes Played"][i].split(":")
    secs = int(secs) * 100/60
    secs = str(int(round(secs, 0)))
    time = float(mins + "." + secs)
    print (time)

AttributeError: 'float' object has no attribute 'split'

In [184]:
a, b = bulls["Minutes Played"][11].split(":")
a

ValueError: not enough values to unpack (expected 2, got 1)

In [186]:
type(bulls["Minutes Played"][11])

str