In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import json

In [None]:
def get_standings(soup, year):
    '''
    this will get the table and parse the data
    '''
    # get the table
    table = soup.find_all('table')[0]
    # convert table to list
    table_list = table.getText(separator='\n').split('\n')
    # create empty lists for standings and playoffs
    standings = []
    playoffs = []
    # Check which teams made the playoffs and add all teams to standings
    for i in range(len(table_list)):
        if '--' in table_list[i]:
            playoffs.append(table_list[i+1])
        else:
            standings.append(table_list[i])
    # standings has team abbreviations and team names - just keep abbreviation
    teams = standings[:64:2]
    # the table data is after the teams
    standings = standings[64:]
    
    standings_df = pd.DataFrame(np.reshape(standings, (33, 12)))
    standings_df.columns = standings_df.iloc[0]
    standings_df.drop(0, inplace=True)
    standings_df.insert(0, 'Team', teams)
    standings_df.insert(1, 'Year', year)
    
    standings_df['Playoffs'] = 0
    
    standings_df.loc[standings_df['Team'].isin(playoffs), 'Playoffs'] = 1
    
    use_cols = ['Team', 'Year', 'W', 'L', 'T', 'PF', 'PA', 'DIFF', 'Playoffs']
    
    standings_df = standings_df[use_cols]
    
    return standings_df

In [268]:
def fix_team_names(standings_df):
    standings_df['Team'] = standings_df['Team'].str.replace('STL', 'LA')
    standings_df['Team'] = standings_df['Team'].str.replace('LAR', 'LA')
    standings_df['Team'] = standings_df['Team'].str.replace('SD', 'LAC')
    standings_df['Team'] = standings_df['Team'].str.replace('JAX', 'JAC')
    standings_df['Team'] = standings_df['Team'].str.replace('WSH', 'WAS')
    
    return standings_df

In [269]:
url = 'http://www.espn.com/nfl/standings/_/season/{}/group/league'

In [270]:
years = ['2013', '2014', '2015', '2016', '2017']

standings_df = pd.DataFrame()

for year in years:
    page = requests.get(url.format(year))
    soup = BeautifulSoup(page.text, 'html.parser')
    tmp = get_standings(soup, year)
    standings_df = pd.concat([standings_df, tmp])

for col in standings_df.columns[2:]:
    standings_df[col] = standings_df[col].astype(float)
    
standings_df = fix_team_names(standings_df)

In [271]:
standings_df.to_csv('data/standings.csv')