In [60]:
# Import libaries
import pandas as pd
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)
import requests
from bs4 import BeautifulSoup
import time

In [100]:
team_names = {
    "Philadelphia 76ers"    : "PHI",
    "Boston Celtics"        : "BOS",
    "New York Knicks"       : "NYK",
    "Washington Bullets"    : "WSB",
    "Miami Heat"            : "MIA",
    "New Jersey Nets"       : "NJN",
    "Detroit Pistons"       : "DET",
    "Chicago Bulls"         : "CHI",
    "Milwaukee Bucks"       : "MIL",
    "Cleveland Cavaliers"   : "CLE",
    "Indiana Pacers"        : "IND",
    "Atlanta Hawks"         : "ATL",
    "Orlando Magic"         : "ORL",
    "San Antonio Spurs"     : "SAS",
    "Utah Jazz"             : "UTA",
    "Dallas Mavericks"      : "DAL",
    "Denver Nuggets"        : "DEN",
    "Houston Rockets"       : "HOU",
    "Minnesota Timberwolves": "MIN",
    "Charlotte Hornets"     : "CHH",
    "Los Angeles Lakers"    : "LAL",
    "Portland Trail Blazers": "POR",
    "Phoenix Suns"          : "PHO",
    "Seattle SuperSonics"   : "SEA",
    "Golden State Warriors" : "GSW",
    "Los Angeles Clippers"  : "LAC",
    "Sacramento Kings"      : "SAC",
    "Toronto Raptors"       : "TOR",
    "Vancouver Grizzlies"   : "VAN",
    "Kansas City Kings"     : "KCK",
    "Washington Wizards"    : "WAS"
}

### Season Schedule Dataframe

In [62]:
def season_schedule(team_acronym, year, point_in_season):
    season_url = "https://www.basketball-reference.com/teams/" + team_acronym + "/" + str(year) + "_games.html"
    res = requests.get(season_url)
    soup = BeautifulSoup(res.content, "lxml")
    
    if point_in_season.lower() == "regular season":
        game_type = "games"
    elif point_in_season.lower() == "playoffs":
        game_type = "games_playoffs"
    
    table = soup.find("table", {"id" : game_type})

    games_cols = []
    for header in table.find_all("thead"):
        for row in header.find_all("th", {"class" : "poptip sort_default_asc left"}):
            games_cols.append(row.text)
    games_cols.append("Game")
    games_cols.append("Location")
    games_cols.append("Result")
    games_cols.append("Wins")
    games_cols.append("Losses")
    games_cols.append("Time Period")
    
    games_cols

    game_review_list = []
    for body in table.find_all("tbody"):
        for row in body.find_all("tr", {"class" : None}):
            game = row.find("th", {"data-stat" : "g"}).text
            date = (row.find("td", {"data-stat" : "date_game"}).attrs["csk"])
            opponent = row.find("td", {"data-stat" : "opp_name"}).text
            location = row.find("td", {"data-stat" : "game_location"}).text
            result = row.find("td", {"data-stat" : "game_result"}).text
            wins = row.find("td", {"data-stat" : "wins"}).text
            losses = row.find("td", {"data-stat" : "losses"}).text
            stage = point_in_season.title()
            game_review = dict(zip(games_cols,[date, opponent, game, location, result, wins, losses, stage]))
            game_review_list.append(game_review)
    game_review_df = pd.DataFrame(game_review_list)
    game_review_df["Team Acronym"] = [team_names[i] for i in game_review_df["Opponent"]]
    game_review_df["Season"] = year
    game_review_df["Location"] = game_review_df["Location"].map({"" : "Home", "@" : "Away"})
    game_review_df = game_review_df.loc[:, ["Season", "Time Period", "Game", "Date", "Team Acronym", "Opponent", "Location", "Result", "Wins", "Losses"]]
    return game_review_df

### Game Data Function

In [81]:
def get_game_stats(url, team_acronym, season, date, game, location, point_in_season):
    res = requests.get(url)
    soup = BeautifulSoup(res.content, "lxml")

    # Identify our two tables
    table = soup.find("table", {"id" : "box-" + team_acronym + "-game-basic"})
    table2 = soup.find("table", {"id" : "box-" + team_acronym + "-game-advanced"})

    # create a dataframe of our players
    players_list = []
    for row in table.find_all("th", {"class" : "left"}):
        players_list.append(row.text)
    players_df = pd.DataFrame(players_list, columns = ["Player"])
    players_df["Team_Abbr"] = team_acronym
    for full_name, abbreviation in team_names.items():
        if abbreviation == team_acronym:
            players_df["Team"] = full_name
    players_df["Season"] = season
    players_df["Date"] = date
    players_df["Game"] = game
    if team_acronym == "CHI":
        players_df["Location"] = location
    else:
        if location == "Home":
            players_df["Location"] = "Away"
        else:
            players_df["Location"] = "Home"
    players_df["Time Period"] = point_in_season.title()
    players_df = players_df.loc[:, ["Season","Time Period", "Game", "Date", "Team_Abbr", "Team", "Location", "Player"]]
    
    
    # Identify our columns from table 1
    stats_cols = []
    for row in table.find_all("th", {"class" : "poptip center"}):
        stats_cols.append(row.text)

    # combine our table body into a list of zipped dictionaries
    stats_box_list = []
    for body in table.find_all("tbody"):
        for row in body.find_all("tr"):
            stats = dict(zip(stats_cols,[stat.text for stat in row.find_all("td")]))
            stats_box_list.append(stats)

    # add in the bottom row of team overall data
    for foot in table.find_all("tfoot"):
        for row in foot.find_all("tr"):
            stats = dict(zip(stats_cols,[stat.text for stat in row.find_all("td")]))
            stats_box_list.append(stats)

    # Create a dataframe of the first table
    stats_df = pd.DataFrame(stats_box_list).dropna().reset_index(drop = True)

    # Collect our column names
    adv_stats_cols = []
    for row in table2.find_all("th", {"class" : "poptip center"}):
        adv_stats_cols.append(row.text)

    # collect our advanced stats from 

    for row in table2.find_all("th", {"class" : "poptip sort_default_asc center"}):
        adv_stats_cols.append(row.text)

    for row in table2.find_all("th", {"class" : "poptip right"}):
        adv_stats_cols.append(row.text)

    order = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 14, 11, 12, 15, 16, 13]
    adv_stats_cols = [adv_stats_cols[i] for i in order]
    adv_stats_cols = adv_stats_cols[:-1]

    # Collect our stats from the advaced table and create a zipped dictionary
    stats_box_list = []
    for body in table2.find_all("tbody"):
        for row in body.find_all("tr"):
            stats = dict(zip(adv_stats_cols,[stat.text for stat in row.find_all("td")]))
            stats_box_list.append(stats)

    for foot in table2.find_all("tfoot"):
        for row in foot.find_all("tr"):
            stats = dict(zip(adv_stats_cols,[stat.text for stat in row.find_all("td")]))
            stats_box_list.append(stats)

    # create a dataframe of the advanced stats that drops the repeated minutes played row
    advanced_stats_df = pd.DataFrame(stats_box_list).dropna().reset_index(drop = True).drop(columns = "MP")

    # Concatenate all 3 data sets into one
    box_stats_df = pd.concat([players_df, advanced_stats_df, stats_df], axis = 1)
    
    return box_stats_df

### All Seasons' Stats Function

In [82]:
def season_stats(team_acronym, list_of_years):
    base_url = "https://www.basketball-reference.com/boxscores/"
    
    stages = ["regular season", "playoffs"]
    
    final_df = pd.DataFrame()
    for year in list_of_years:
        for stage in stages:
            schedule_df = season_schedule(team_acronym, year, stage)

            city = ""
            for i in range(len(schedule_df.index)):
                date = schedule_df["Date"][i].replace("-", "")
                if schedule_df["Location"][i] == "Home":
                    city = "CHI"
                else:
                    city = schedule_df["Team Acronym"][i]

                game_url = base_url + date + "0" + city + ".html"

                game_df = pd.concat([get_game_stats(game_url,
                                                    "CHI", 
                                                    schedule_df["Season"][i], 
                                                    schedule_df["Date"][i], 
                                                    schedule_df["Game"][i], 
                                                    schedule_df["Location"][i],
                                                    schedule_df["Time Period"][i]),
                                     get_game_stats(game_url, 
                                                    schedule_df["Team Acronym"][i], 
                                                    schedule_df["Season"][i], 
                                                    schedule_df["Date"][i], 
                                                    schedule_df["Game"][i], 
                                                    schedule_df["Location"][i],
                                                    schedule_df["Time Period"][i])])

                final_df = final_df.append(game_df)
                if i % 10 == 0:
                    print (f"Collecting statistics from game {schedule_df['Game'][i]} of the {year} {stage.title()} against the {schedule_df['Opponent'][i]}.")
                time.sleep(5)
    final_df['Date'] = pd.to_datetime(final_df['Date'])
    final_df.reset_index(drop = True, inplace = True)
    
    return final_df

In [89]:
mj_seasons = season_stats(team_names["Chicago Bulls"], [(i + 1985) for i in range(14)])
mj_seasons.shape

Collecting statistics from game 1 of the 1985 Regular Season against the Washington Bullets.
Collecting statistics from game 11 of the 1985 Regular Season against the Philadelphia 76ers.
Collecting statistics from game 21 of the 1985 Regular Season against the New York Knicks.
Collecting statistics from game 31 of the 1985 Regular Season against the Atlanta Hawks.
Collecting statistics from game 41 of the 1985 Regular Season against the Indiana Pacers.
Collecting statistics from game 51 of the 1985 Regular Season against the Indiana Pacers.
Collecting statistics from game 61 of the 1985 Regular Season against the Boston Celtics.
Collecting statistics from game 71 of the 1985 Regular Season against the Dallas Mavericks.
Collecting statistics from game 81 of the 1985 Regular Season against the Atlanta Hawks.
Collecting statistics from game 1 of the 1985 Playoffs against the Milwaukee Bucks.
Collecting statistics from game 1 of the 1986 Regular Season against the Cleveland Cavaliers.
Coll

Collecting statistics from game 61 of the 1993 Regular Season against the Charlotte Hornets.
Collecting statistics from game 71 of the 1993 Regular Season against the Boston Celtics.
Collecting statistics from game 81 of the 1993 Regular Season against the Charlotte Hornets.
Collecting statistics from game 1 of the 1993 Playoffs against the Atlanta Hawks.
Collecting statistics from game 11 of the 1993 Playoffs against the New York Knicks.
Collecting statistics from game 1 of the 1994 Regular Season against the Charlotte Hornets.
Collecting statistics from game 11 of the 1994 Regular Season against the San Antonio Spurs.
Collecting statistics from game 21 of the 1994 Regular Season against the San Antonio Spurs.
Collecting statistics from game 31 of the 1994 Regular Season against the Dallas Mavericks.
Collecting statistics from game 41 of the 1994 Regular Season against the Milwaukee Bucks.
Collecting statistics from game 51 of the 1994 Regular Season against the Charlotte Hornets.
Col

(30694, 42)

In [98]:
mj_seasons.head(20)

Unnamed: 0,Season,Time Period,Game,Date,Team_Abbr,Team,Location,Player,True Shooting Percentage,Effective Field Goal Percentage,3 Point Attempt Rate,Free Throw Attempt Rate,Offensive Rebound Percentage,Defensive Rebound Percentage,Total Rebound Percentage,Assist Percentage,Steal Percentage,Block Percentage,Turnover Percentage,Usage Percentage,Offensive Rating,Defensive Rating,Box Plus/Minus,Minutes Played,Field Goals,Field Goal Attempts,Field Goal Percentage,3-Point Field Goals,3-Point Attempts,3-Point Percentage,Free Throws,Free Throw Attempts,Free Throw Percentage,Offensive Rebounds,Defensive Rebounds,Total Rebounds,Assists,Steals,Blocks,Turnovers,Personal Fouls,Points
0,1985,Regular Season,1,1984-10-26,CHI,Chicago Bulls,Home,Michael Jordan,0.419,0.313,0.0,0.438,3.1,11.3,7.8,20.0,2.3,5.5,20.8,24.4,86.0,85.0,0.8,40:00,5,16,0.313,0,0,,6,7,0.857,1,5,6,7,2,4,5,2,16
1,1985,Regular Season,1,1984-10-26,CHI,Chicago Bulls,Home,Orlando Woolridge,0.66,0.684,0.0,0.263,6.3,16.3,12.0,11.5,0.0,1.4,0.0,22.0,138.0,90.0,8.5,39:00,13,19,0.684,0,0,,2,5,0.4,2,7,9,3,0,1,0,1,28
2,1985,Regular Season,1,1984-10-26,CHI,Chicago Bulls,Home,Ennis Whatley,0.652,0.625,0.0,0.125,3.3,7.3,5.6,31.3,2.4,0.0,32.2,13.6,110.0,89.0,1.9,37:00,5,8,0.625,0,0,,1,1,1.0,1,3,4,10,2,0,4,2,11
3,1985,Regular Season,1,1984-10-26,CHI,Chicago Bulls,Home,Steve Johnson,0.623,0.643,0.0,0.071,17.6,10.4,13.4,11.5,2.6,1.6,12.2,19.0,120.0,87.0,6.2,35:00,9,14,0.643,0,0,,0,1,0.0,5,4,9,3,2,1,2,2,18
4,1985,Regular Season,1,1984-10-26,CHI,Chicago Bulls,Home,Caldwell Jones,0.0,0.0,0.2,0.0,0.0,15.6,9.0,17.2,0.0,0.0,0.0,7.0,57.0,90.0,-3.2,29:00,0,5,0.0,0,1,0.0,0,0,,0,5,5,5,0,0,0,4,0
5,1985,Regular Season,1,1984-10-26,CHI,Chicago Bulls,Home,Quintin Dailey,0.678,0.667,0.0,0.056,4.4,3.2,3.7,6.3,1.6,0.0,27.5,36.8,90.0,93.0,-3.4,28:00,12,18,0.667,0,0,,1,1,1.0,1,1,2,1,1,0,7,3,25
6,1985,Regular Season,1,1984-10-26,CHI,Chicago Bulls,Home,Jawann Oldham,0.0,0.0,1.0,0.0,0.0,20.9,12.0,0.0,0.0,12.7,50.0,6.2,0.0,81.0,-1.4,13:00,0,1,0.0,0,1,0.0,0,0,,0,3,3,0,0,3,1,1,0
7,1985,Regular Season,1,1984-10-26,CHI,Chicago Bulls,Home,Sidney Green,0.5,0.5,0.0,0.0,13.7,60.4,40.6,0.0,0.0,0.0,42.9,31.5,58.0,71.0,-13.9,9:00,2,4,0.5,0,0,,0,0,,1,6,7,0,0,0,3,1,4
8,1985,Regular Season,1,1984-10-26,CHI,Chicago Bulls,Home,Dave Corzine,0.5,0.5,0.0,0.0,0.0,15.1,8.7,0.0,0.0,0.0,33.3,20.3,62.0,90.0,-15.7,6:00,1,2,0.5,0,0,,0,0,,0,1,1,0,0,0,1,2,2
9,1985,Regular Season,1,1984-10-26,CHI,Chicago Bulls,Home,Rod Higgins,1.5,1.5,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20.3,300.0,98.0,43.8,2:00,1,1,1.0,1,1,1.0,0,0,,0,0,0,0,0,0,0,0,3


In [96]:
mj_seasons.rename(columns = {
    "TS%"  : "True Shooting Percentage",
    "eFG%" : "Effective Field Goal Percentage",
    "3PAr" : "3 Point Attempt Rate",
    "FTr"  : "Free Throw Attempt Rate",
    "ORB%" : "Offensive Rebound Percentage",
    "DRB%" : "Defensive Rebound Percentage",
    "TRB%" : "Total Rebound Percentage",
    "AST%" : "Assist Percentage",
    "STL%" : "Steal Percentage",
    "BLK%" : "Block Percentage",
    "TOV%" : "Turnover Percentage",
    "USG%" : "Usage Percentage",
    "ORtg" : "Offensive Rating",
    "DRtg" : "Defensive Rating",
    "BPM"  : "Box Plus/Minus",
    "MP"   : "Minutes Played",
    "FG"   : "Field Goals",
    "FGA"  : "Field Goal Attempts",
    "FG%"  : "Field Goal Percentage",
    "3P"   : "3-Point Field Goals",
    "3PA"  : "3-Point Attempts",
    "3P%"  : "3-Point Percentage",
    "FT"   : "Free Throws",
    "FTA"  : "Free Throw Attempts",
    "FT%"  : "Free Throw Percentage",
    "ORB"  : "Offensive Rebounds",
    "DRB"  : "Defensive Rebounds",
    "TRB"  : "Total Rebounds",
    "AST"  : "Assists",
    "STL"  : "Steals",
    "BLK"  : "Blocks",
    "TOV"  : "Turnovers",
    "PF"   : "Personal Fouls",
    "PTS"  : "Points"
    }, inplace = True)
mj_seasons.head()

Unnamed: 0,Season,Time Period,Game,Date,Team_Abbr,Team,Location,Player,True Shooting Percentage,Effective Field Goal Percentage,3 Point Attempt Rate,Free Throw Attempt Rate,Offensive Rebound Percentage,Defensive Rebound Percentage,Total Rebound Percentage,Assist Percentage,Steal Percentage,Block Percentage,Turnover Percentage,Usage Percentage,Offensive Rating,Defensive Rating,Box Plus/Minus,Minutes Played,Field Goals,Field Goal Attempts,Field Goal Percentage,3-Point Field Goals,3-Point Attempts,3-Point Percentage,Free Throws,Free Throw Attempts,Free Throw Percentage,Offensive Rebounds,Defensive Rebounds,Total Rebounds,Assists,Steals,Blocks,Turnovers,Personal Fouls,Points
0,1985,Regular Season,1,1984-10-26,CHI,Chicago Bulls,Home,Michael Jordan,0.419,0.313,0.0,0.438,3.1,11.3,7.8,20.0,2.3,5.5,20.8,24.4,86,85,0.8,40:00,5,16,0.313,0,0,,6,7,0.857,1,5,6,7,2,4,5,2,16
1,1985,Regular Season,1,1984-10-26,CHI,Chicago Bulls,Home,Orlando Woolridge,0.66,0.684,0.0,0.263,6.3,16.3,12.0,11.5,0.0,1.4,0.0,22.0,138,90,8.5,39:00,13,19,0.684,0,0,,2,5,0.4,2,7,9,3,0,1,0,1,28
2,1985,Regular Season,1,1984-10-26,CHI,Chicago Bulls,Home,Ennis Whatley,0.652,0.625,0.0,0.125,3.3,7.3,5.6,31.3,2.4,0.0,32.2,13.6,110,89,1.9,37:00,5,8,0.625,0,0,,1,1,1.0,1,3,4,10,2,0,4,2,11
3,1985,Regular Season,1,1984-10-26,CHI,Chicago Bulls,Home,Steve Johnson,0.623,0.643,0.0,0.071,17.6,10.4,13.4,11.5,2.6,1.6,12.2,19.0,120,87,6.2,35:00,9,14,0.643,0,0,,0,1,0.0,5,4,9,3,2,1,2,2,18
4,1985,Regular Season,1,1984-10-26,CHI,Chicago Bulls,Home,Caldwell Jones,0.0,0.0,0.2,0.0,0.0,15.6,9.0,17.2,0.0,0.0,0.0,7.0,57,90,-3.2,29:00,0,5,0.0,0,1,0.0,0,0,,0,5,5,5,0,0,0,4,0


In [101]:
mj_seasons.to_csv("./data/mj_bulls_season.csv", index = False)