In [1]:
from bs4 import BeautifulSoup
import requests
from urllib.parse import urljoin
import re
import numpy as np
import pandas as pd
import json

In [66]:
name_to_abbrev = {'Toronto Raptors': 'TOR', 'Golden State Warriors': 'GSW', 'Cleveland Cavaliers': 'CLE',
                 'San Antonio Spurs': 'SAS', 'Miami Heat': 'MIA', 'Dallas Mavericks': 'DAL', 
                 'Los Angeles Lakers': 'LAL', 'Boston Celtics': 'BOS', 'Detroit Pistons': 'DET', 'Chicago Bulls': 'CHI', 
                 'Houston Rockets': 'HOU', 'Philadelphia 76ers': 'PHI', }
nba_champion_url = 'https://www.basketball-reference.com/playoffs/'
nba_team_stats_url = 'https://www.basketball-reference.com/play-index/tsl_finder.cgi?request=1&match=single&type=advanced&year_min=1980&year_max=&lg_id=NBA&franch_id=&c1stat=&c1comp=&c1val=&c2stat=&c2comp=&c2val=&c3stat=&c3comp=&c3val=&c4stat=&c4comp=&c4val=&order_by=wins&order_by_asc=&offset=0'
base_url = requests.get(nba_team_stats_url).url
def get_soup_from_url(url):
    return BeautifulSoup(requests.get(url).text, 'html.parser')
def create_champion_dict(soup): 
    champions = {}
    for row in soup.find_all('tr'):
        if row.find('th') and row.find_all('th')[0].get('data-stat') == 'year_id' and row.find('td') and row.find('a'): 
            year = int(row.find('a').text)
            if year < 1980: 
                break
            year_str = f'{year - 1}-{year % 100}'
            champ = [stat.text for stat in row.find_all('td') if stat.get('data-stat') == 'champion'][0]
            champions[year_str] = champ
    return champions
def create_team_dataset(soup, champions):
    searching = True
    rows_list = []
    while searching: 
        for row in soup.find_all('tr'): 
            if row.find_all('th')[0].get('data-stat') == 'ranker' and row.find('td') and row.find('a'): 
                current_row = {}
                current_row['Team'] = row.find('a').get('title')
                for stat in row.find_all('td'): 
                    if stat.get('data-stat') == 'season':
                        season = stat.text
                        current_row['Champion'] = current_row['Team'] == champions.get(season)
                        current_row['Team'] += ' ' + season
                    elif stat.get('data-stat') == 'win_loss_pct':
                        current_row['win_loss_pct'] = float(stat.text)
                    elif stat.get('data-stat') == 'efg_pct':
                        current_row['efg_pct'] = float(stat.text)
                    elif stat.get('data-stat') == 'off_rtg':
                        current_row['off_rtg'] = float(stat.text)  
                    elif stat.get('data-stat') == 'def_rtg':
                        current_row['def_rtg'] = float(stat.text)
                rows_list.append(current_row)
        searching = False
        for link in soup.find_all('a'): 
            if link.text == 'Next page': 
                soup = get_soup_from_url(urljoin(base_url, link.get('href')))
                searching = True
    dataset = pd.DataFrame(rows_list)
    dataset.set_index('Team')
    return dataset

In [67]:
champions = create_champion_dict(get_soup_from_url(nba_champion_url))
dataset = create_team_dataset(get_soup_from_url(nba_team_stats_url), champions)

Chicago Bulls 1995-96
Chicago Bulls 1996-97
Boston Celtics 1985-86
Chicago Bulls 1991-92
Golden State Warriors 2014-15
Golden State Warriors 2016-17
Miami Heat 2012-13
Philadelphia 76ers 1982-83
Los Angeles Lakers 1986-87
Detroit Pistons 1988-89
Boston Celtics 1980-81
Boston Celtics 1983-84
Los Angeles Lakers 1984-85
Los Angeles Lakers 1987-88
Chicago Bulls 1997-98
San Antonio Spurs 2013-14
Chicago Bulls 1990-91
Los Angeles Lakers 1979-80
Detroit Pistons 1989-90
Houston Rockets 1993-94
Golden State Warriors 2017-18
Toronto Raptors 2018-19
Los Angeles Lakers 1981-82
Chicago Bulls 1992-93
Los Angeles Lakers 2009-10
Dallas Mavericks 2010-11
Cleveland Cavaliers 2015-16
Houston Rockets 1994-95
Miami Heat 2011-12
San Antonio Spurs 1998-99


In [68]:
dataset.head()

Unnamed: 0,Team,Champion,win_loss_pct,off_rtg,def_rtg,efg_pct
0,Golden State Warriors 2015-16,False,0.89,114.5,103.8,0.563
1,Chicago Bulls 1995-96,True,0.878,115.2,101.8,0.517
2,Chicago Bulls 1996-97,True,0.841,114.4,102.4,0.511
3,Boston Celtics 1985-86,True,0.817,111.8,102.6,0.518
4,Chicago Bulls 1991-92,True,0.817,115.5,104.5,0.518
