In [6]:
import requests
import pandas as pd
import numpy as np
import time

def load_nfl_teams():
    nfl_teams = pd.read_csv('../datasets/nfl_teams.csv')
    nfl_teams.loc[(nfl_teams.team_name == 'St. Louis Cardinals'), 'team_id_pfr'] = 'CRD'
    labels = list(nfl_teams.columns)[4:] + ['team_name_short']
    nfl_teams.drop(labels, axis=1, inplace=True)
    return nfl_teams

def load_scores():
    return pd.read_csv('../datasets/spreadspoke_scores.csv')

def transform_date(date):
    mapping = {
        '1'  : 'January',
        '2'  : 'February',
        '3'  : 'March',
        '4'  : 'April',
        '5'  : 'May',
        '6'  : 'June',
        '7'  : 'July',
        '8'  : 'August',
        '9'  : 'September',
        '10' : 'October',
        '11' : 'November',
        '12' : 'December',
    }
    
    arr = date.split('/')
    month = arr[0]
    day = arr[1]
    
    return mapping[month] + ' ' + day

def get_team_id(name):
    teams = load_nfl_teams()
    return teams.loc[teams['team_name'] == name, 'team_id_pfr'].iloc[0]

def get_game_stats(date, year, home_team_id):
    scores = load_scores()
    teams = load_nfl_teams()
    
    # Get the URL for this team and d,p the data into a dataframe
    url = 'https://www.pro-football-reference.com/teams/' + home_team_id.lower() + '/' + str(year) +'.htm'
    df = pd.read_html(url)[1]

    # Sort out some weird formatting leftover from the HTML
    df.columns = [col[1] for col in df.columns.values]

    # Drop the unnecessary labels (already in score dataset)
    drop_labels = [
        'Week',
        'Day',
        'Tm',
        'Rec',
        'OT',
        'Unnamed: 3_level_1',
        'Unnamed: 4_level_1',
        'Unnamed: 5_level_1',
        'Unnamed: 8_level_1'
    ]
    df.drop(columns=drop_labels, inplace=True)
    
    # In later seasons, PFR added an "expected points" metric
    # If we see this happening, we drop those expected points metrics
    if len(df.columns) > 13:
        df.drop(columns=['Offense', 'Defense', 'Sp. Tms'], inplace=True)

    # Rename some of the labels for clarity
    new_labels = [list(df.columns)[0].lower()] + ['away_team', 'opp_score', 'home_first_downs', 'home_total_yds', 'home_pass_yds', 'home_rush_yds', 'home_TO', 'away_first_downs', 'away_total_yds', 'away_pass_yds', 'away_rush_yds', 'away_TO']
    df.columns = [val for val in new_labels]
    df.drop(columns=['opp_score'], inplace=True)
    
    # Grab the stats from the specific date in question
    new_date = transform_date(date)
    stats = df.loc[df['date'] == new_date]
    away_team_name = stats['away_team'].to_string(index=False).strip()
    away_team_id = teams.loc[teams['team_name'] == away_team_name, 'team_id_pfr'].iloc[0]
    stats.loc[:, 'away_team'] = away_team_id
    
    arr = date.split('/')
    game_id = ''.join(arr) + home_team_id
    stats.insert(0, 'game_id', game_id)
    return stats.iloc[0]


def get_game_id(date, home_team):
    arr = date.split('/')
    return ''.join(arr) + home_team

In [7]:
scores = load_scores()

stats = ['game_id',
            'date',
            'away_team',
            'home_first_downs',
            'home_total_yds',
            'home_pass_yds',
            'home_rush_yds',
            'home_TO',
            'away_first_downs',
            'away_total_yds',
            'away_pass_yds',
            'away_rush_yds',
            'away_TO']
df = pd.DataFrame(columns=stats)

teams = load_nfl_teams()

# 0 thru 4031 are fine
# index 4032 is bad
# 4033 thru 8500 are good

scores.drop(4032, inplace=True)

# This loop takes forever; it gets the average stats 
i = 0
for index, row in scores.iterrows():
    home_team_name = row['team_home']
    home_team_id = teams.loc[teams['team_name'] == home_team_name, 'team_id_pfr'].iloc[0]
    year = row['schedule_season']
    date = row['schedule_date']
    game_stats = get_game_stats(date, year, home_team_id)
    args = {'game_id' : game_stats['game_id'],
            'date' : game_stats['date'],
            'away_team' : game_stats['away_team'],
            'home_first_downs' : game_stats['home_first_downs'],
            'home_total_yds' : game_stats['home_total_yds'],
            'home_pass_yds' : game_stats['home_pass_yds'],
            'home_rush_yds' : game_stats['home_rush_yds'],
            'home_TO' : game_stats['home_TO'],
            'away_first_downs' : game_stats['away_first_downs'],
            'away_total_yds' : game_stats['away_total_yds'],
            'away_pass_yds' : game_stats['away_pass_yds'],
            'away_rush_yds' : game_stats['away_rush_yds'],
            'away_TO' : game_stats['away_TO']}

    df = df.append(args, ignore_index=True)
    i += 1
    
df.to_csv('../datasets/all_game_stats.csv')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


KeyboardInterrupt: 

In [2]:
# Cleaning up some things from the data that I missed in the cell above
# Basically I didn't want to run all of the requests again
# So I took the data and stored it in a CSV. This is the cleanup of that data
new_df = pd.read_csv('../datasets/all_game_stats.csv')
new_df.rename(columns={'Unnamed: 0': 'home_team'}, inplace=True)

for index, row in new_df.iterrows():
    new_df.iloc[index, 0] = row.loc['game_id'][-3:]
new_df
    
new_df.to_csv('../datasets/all_game_stats.csv', index=False)

In [8]:
df = pd.read_csv('../datasets/all_game_stats.csv')
teams = load_nfl_teams()
scores = load_scores()

team_ids = teams['team_id_pfr']
for team_id in team_ids:
    team_df = df.loc[(df['home_team'] == team_id) | (df['away_team'] == team_id)]
    filename = '../datasets/team_data/unclean_data/' + team_id + '_game_stats.csv'
    team_df.to_csv(filename)


In [6]:
cols = ['team_id', 'game_id', 'date', 'opp_team_id', 'first_downs',
       'total_yds', 'pass_yds', 'rush_yds', 'TO',
       'opp_first_downs', 'opp_total_yds', 'opp_pass_yds', 'opp_rush_yds',
       'opp_TO']

# This method creates a new CSV file for each team
# This was done because in the previous CSV file we knew which team was home
# and which was away, but that made it much more difficult to get a rolling average
# of each team's statistics because if a team was home one week and away the other,
# we would have to go back to make sure if they're home or away each week,
# and then find their previous 16 games, add them up, and take an average. It would've been a pain in the ass

# This way, we have intermediary datasets that both allow us to take a rolling average very easily
# with the pandas.rolling() builtin, but also allows us to more easily pick and choose
# which stats we want to include in our model and have them stay a uniform, consistent name throughout.

for team_id in team_ids:
    file = '../datasets/team_data/unclean_data/' + team_id + '_game_stats.csv'
    dirty_df = pd.read_csv(file)
    clean_df = pd.DataFrame(columns=cols)
    app = {'team_id': team_id}
    for index, row in dirty_df.iterrows():
        # If home team...
        app['game_id'] = row['game_id']
        app['date'] = row['date']
        if row['home_team'] == team_id:
            # Set the opposing team's id
            app['opp_team_id']     = row['away_team']
            
            # Set the team's offensive stats 
            app['first_downs']     = row['home_first_downs']
            app['total_yds']       = row['home_total_yds']
            app['pass_yds']        = row['home_pass_yds']
            app['rush_yds']        = row['home_rush_yds']
            app['TO']              = row['home_TO']
            
            # Set the opposition team's stats
            app['opp_first_downs'] = row['away_first_downs']
            app['opp_total_yds']   = row['away_total_yds']
            app['opp_pass_yds']    = row['away_pass_yds']
            app['opp_rush_yds']    = row['away_rush_yds']
            app['opp_TO']          = row['away_TO']
        else:
            # Set opposing team's id
            app['opp_team_id']     = row['home_team']
            
            # Set team's offensive stats
            app['first_downs']     = row['away_first_downs']
            app['total_yds']       = row['away_total_yds']
            app['pass_yds']        = row['away_pass_yds']
            app['rush_yds']        = row['away_rush_yds']
            app['TO']              = row['away_TO']
            
            # Set the opposition team's stats
            app['opp_first_downs'] = row['home_first_downs']
            app['opp_total_yds']   = row['home_total_yds']
            app['opp_pass_yds']    = row['home_pass_yds']
            app['opp_rush_yds']    = row['home_rush_yds']
            app['opp_TO']          = row['home_TO']

        clean_df = clean_df.append(app, ignore_index=True)
    clean_df = clean_df.iloc[:, [1, 2, 0, 4, 5, 6, 7, 8, 3, 9, 10, 11, 12, 13]]
    clean_df.to_csv('../datasets/team_data/clean_data/' + team_id + '_game_stats.csv', index=False)


In [11]:
# Example of what one of the CSV files we created above looks like
df = pd.read_csv(f'../datasets/team_data/clean_data/HTX_game_stats.csv')
df.head()

Unnamed: 0,game_id,date,team_id,first_downs,total_yds,pass_yds,rush_yds,TO,opp_team_id,opp_first_downs,opp_total_yds,opp_pass_yds,opp_rush_yds,opp_TO
0,982002HTX,September 8,HTX,13.0,210.0,123.0,87.0,1.0,DAL,11.0,267.0,112.0,155.0,2.0
1,9152002SDG,September 15,HTX,7.0,118.0,29.0,89.0,3.0,SDG,16.0,267.0,143.0,124.0,1.0
2,9222002HTX,September 22,HTX,10.0,204.0,78.0,126.0,2.0,CLT,16.0,339.0,251.0,88.0,2.0
3,9292002PHI,September 29,HTX,12.0,242.0,151.0,91.0,3.0,PHI,21.0,391.0,289.0,102.0,3.0
4,10132002HTX,October 13,HTX,18.0,338.0,197.0,141.0,0.0,BUF,25.0,403.0,230.0,173.0,1.0


In [5]:
# The following loop calculates each team's rolling average stats over the last 16 games and
# renames the statistic columns to have avg_ in front to better indicate their form

for team_id in team_ids:
    df = pd.read_csv(f'../datasets/team_data/clean_data/{team_id}_game_stats.csv')
    cols = df.columns
    new_cols = {}
    
    # Create mapping to rename the columns
    for col in enumerate(cols):
        if col[0] > 2 and col[0] != 8:
            s = 'avg_' + col[1]
            new_cols[col[1]] = s
        else:
            new_cols[col[1]] = col[1]
    
    # Combine the rolling average of every statistic with the qualitative data
    avg_df = pd.concat([df.loc[:, 'game_id'], df.loc[:, 'date'], df.loc[:, 'team_id'], df.loc[:, 'opp_team_id'], df.rolling(17).mean()], axis=1)
    avg_df = avg_df.rename(columns=new_cols)
    
    # Save to a CSV file. We can now access these values more easily for each game
    avg_df.to_csv(f'../datasets/team_data/avg_data/{team_id}_avgs.csv', index=False)

In [12]:
# This loop is here more as a utility
# We wanted to keep features low for this first model,
# so initially we'll only grab the total yards and turnovers for each team

# If you change which columns we use, we'll also have to change some highlighted ares in cell 1 (which for some reason 
# is lower than this one)
from IPython.display import display
for team_id in team_ids:
    avg_df = pd.read_csv(f'../datasets/team_data/avg_data/{team_id}_avgs.csv')
    # Change the following line and run this if we want to change the stats we grab
    avg_df = avg_df.loc[:, ['game_id', 'avg_total_yds', 'avg_TO']]
    avg_df.to_csv(f'../datasets/team_data/avg_data/{team_id}_avgs.csv', index=False)

In [17]:
# The following cell inserts each team's ID into the scores data to make it a bit easier to stay consistent
# It also drops columns that we don't want for the initial model, but they can easily be brought back by changing
# the second to last line of this cell.
scores = load_scores()
teams = load_nfl_teams()
def get_home_team_id(row):
    team_name = row['team_home']
    team = teams[teams['team_name'] == team_name]
    return team['team_id_pfr'].item()

def get_away_team_id(row):
    team_name = row['team_away']
    team = teams[teams['team_name'] == team_name]
    return team['team_id_pfr'].item()
    
def insert_ids(row):
    print(row['schedule_date'])
    home_id = get_home_team_id(row)
    away_id = get_away_team_id(row)
    return pd.Series([home_id, away_id])
    
scores[['home_id', 'away_id']] = scores.apply(insert_ids, axis=1)
scores.drop(columns=['team_favorite_id', 'spread_favorite', 'stadium_neutral', 'weather_temperature', 'weather_wind_mph', 'weather_humidity', 'weather_detail', 'Unnamed: 17', 'Unnamed: 18', 'Unnamed: 19'], inplace=True)
scores.to_csv('../datasets/unclean_scores.csv', index=False)

9/2/1966
9/2/1966
9/3/1966
9/4/1966
9/9/1966
9/10/1966
9/10/1966
9/10/1966
9/11/1966
9/11/1966
9/11/1966
9/11/1966
9/11/1966
9/11/1966
9/11/1966
9/16/1966
9/18/1966
9/18/1966
9/18/1966
9/18/1966
9/18/1966
9/18/1966
9/18/1966
9/18/1966
9/18/1966
9/18/1966
9/25/1966
9/25/1966
9/25/1966
9/25/1966
9/25/1966
9/25/1966
9/25/1966
9/25/1966
9/25/1966
9/25/1966
9/25/1966
9/30/1966
10/2/1966
10/2/1966
10/2/1966
10/2/1966
10/2/1966
10/2/1966
10/2/1966
10/2/1966
10/2/1966
10/2/1966
10/8/1966
10/8/1966
10/8/1966
10/8/1966
10/9/1966
10/9/1966
10/9/1966
10/9/1966
10/9/1966
10/9/1966
10/9/1966
10/16/1966
10/16/1966
10/16/1966
10/16/1966
10/16/1966
10/16/1966
10/16/1966
10/16/1966
10/16/1966
10/16/1966
10/16/1966
10/23/1966
10/23/1966
10/23/1966
10/23/1966
10/23/1966
10/23/1966
10/23/1966
10/23/1966
10/23/1966
10/23/1966
10/23/1966
10/30/1966
10/30/1966
10/30/1966
10/30/1966
10/30/1966
10/30/1966
10/30/1966
10/30/1966
10/30/1966
10/30/1966
10/31/1966
11/6/1966
11/6/1966
11/6/1966
11/6/1966
11/6/1966
11

11/29/1970
11/29/1970
11/29/1970
11/30/1970
12/5/1970
12/6/1970
12/6/1970
12/6/1970
12/6/1970
12/6/1970
12/6/1970
12/6/1970
12/6/1970
12/6/1970
12/6/1970
12/6/1970
12/7/1970
12/12/1970
12/12/1970
12/13/1970
12/13/1970
12/13/1970
12/13/1970
12/13/1970
12/13/1970
12/13/1970
12/13/1970
12/13/1970
12/13/1970
12/14/1970
12/19/1970
12/20/1970
12/20/1970
12/20/1970
12/20/1970
12/20/1970
12/20/1970
12/20/1970
12/20/1970
12/20/1970
12/20/1970
12/20/1970
12/20/1970
12/26/1970
12/26/1970
12/27/1970
12/27/1970
1/3/1971
1/3/1971
1/17/1971
9/19/1971
9/19/1971
9/19/1971
9/19/1971
9/19/1971
9/19/1971
9/19/1971
9/19/1971
9/19/1971
9/19/1971
9/19/1971
9/19/1971
9/20/1971
9/26/1971
9/26/1971
9/26/1971
9/26/1971
9/26/1971
9/26/1971
9/26/1971
9/26/1971
9/26/1971
9/26/1971
9/26/1971
9/26/1971
9/27/1971
10/3/1971
10/3/1971
10/3/1971
10/3/1971
10/3/1971
10/3/1971
10/3/1971
10/3/1971
10/3/1971
10/3/1971
10/3/1971
10/3/1971
10/4/1971
10/10/1971
10/10/1971
10/10/1971
10/10/1971
10/10/1971
10/10/1971
10/10/1971
1

12/14/1974
12/15/1974
12/15/1974
12/15/1974
12/15/1974
12/15/1974
12/15/1974
12/15/1974
12/15/1974
12/15/1974
12/15/1974
12/21/1974
12/21/1974
12/22/1974
12/22/1974
12/29/1974
12/29/1974
1/12/1975
9/21/1975
9/21/1975
9/21/1975
9/21/1975
9/21/1975
9/21/1975
9/21/1975
9/21/1975
9/21/1975
9/21/1975
9/21/1975
9/21/1975
9/22/1975
9/28/1975
9/28/1975
9/28/1975
9/28/1975
9/28/1975
9/28/1975
9/28/1975
9/28/1975
9/28/1975
9/28/1975
9/28/1975
9/28/1975
9/29/1975
10/5/1975
10/5/1975
10/5/1975
10/5/1975
10/5/1975
10/5/1975
10/5/1975
10/5/1975
10/5/1975
10/5/1975
10/5/1975
10/5/1975
10/6/1975
10/12/1975
10/12/1975
10/12/1975
10/12/1975
10/12/1975
10/12/1975
10/12/1975
10/12/1975
10/12/1975
10/12/1975
10/12/1975
10/12/1975
10/13/1975
10/19/1975
10/19/1975
10/19/1975
10/19/1975
10/19/1975
10/19/1975
10/19/1975
10/19/1975
10/19/1975
10/19/1975
10/19/1975
10/19/1975
10/20/1975
10/25/1975
10/26/1975
10/26/1975
10/26/1975
10/26/1975
10/26/1975
10/26/1975
10/26/1975
10/26/1975
10/26/1975
10/26/1975
10/26/

11/26/1978
11/26/1978
11/26/1978
11/26/1978
11/26/1978
11/27/1978
12/3/1978
12/3/1978
12/3/1978
12/3/1978
12/3/1978
12/3/1978
12/3/1978
12/3/1978
12/3/1978
12/3/1978
12/3/1978
12/3/1978
12/3/1978
12/4/1978
12/9/1978
12/9/1978
12/10/1978
12/10/1978
12/10/1978
12/10/1978
12/10/1978
12/10/1978
12/10/1978
12/10/1978
12/10/1978
12/10/1978
12/10/1978
12/11/1978
12/16/1978
12/16/1978
12/17/1978
12/17/1978
12/17/1978
12/17/1978
12/17/1978
12/17/1978
12/17/1978
12/17/1978
12/17/1978
12/17/1978
12/17/1978
12/18/1978
12/24/1978
12/24/1978
12/30/1978
12/30/1978
12/31/1978
12/31/1978
1/7/1979
1/7/1979
1/21/1979
9/1/1979
9/2/1979
9/2/1979
9/2/1979
9/2/1979
9/2/1979
9/2/1979
9/2/1979
9/2/1979
9/2/1979
9/2/1979
9/2/1979
9/2/1979
9/3/1979
9/6/1979
9/9/1979
9/9/1979
9/9/1979
9/9/1979
9/9/1979
9/9/1979
9/9/1979
9/9/1979
9/9/1979
9/9/1979
9/9/1979
9/9/1979
9/10/1979
9/16/1979
9/16/1979
9/16/1979
9/16/1979
9/16/1979
9/16/1979
9/16/1979
9/16/1979
9/16/1979
9/16/1979
9/16/1979
9/16/1979
9/16/1979
9/17/1979
9

11/21/1982
11/21/1982
11/21/1982
11/21/1982
11/22/1982
11/25/1982
11/25/1982
11/28/1982
11/28/1982
11/28/1982
11/28/1982
11/28/1982
11/28/1982
11/28/1982
11/28/1982
11/28/1982
11/28/1982
11/28/1982
11/29/1982
12/2/1982
12/5/1982
12/5/1982
12/5/1982
12/5/1982
12/5/1982
12/5/1982
12/5/1982
12/5/1982
12/5/1982
12/5/1982
12/5/1982
12/5/1982
12/6/1982
12/11/1982
12/11/1982
12/12/1982
12/12/1982
12/12/1982
12/12/1982
12/12/1982
12/12/1982
12/12/1982
12/12/1982
12/12/1982
12/12/1982
12/12/1982
12/13/1982
12/18/1982
12/18/1982
12/19/1982
12/19/1982
12/19/1982
12/19/1982
12/19/1982
12/19/1982
12/19/1982
12/19/1982
12/19/1982
12/19/1982
12/19/1982
12/20/1982
12/26/1982
12/26/1982
12/26/1982
12/26/1982
12/26/1982
12/26/1982
12/26/1982
12/26/1982
12/26/1982
12/26/1982
12/26/1982
12/26/1982
12/26/1982
12/27/1982
1/2/1983
1/2/1983
1/2/1983
1/2/1983
1/2/1983
1/2/1983
1/2/1983
1/2/1983
1/2/1983
1/2/1983
1/2/1983
1/2/1983
1/2/1983
1/3/1983
1/8/1983
1/8/1983
1/8/1983
1/8/1983
1/9/1983
1/9/1983
1/9/1983


9/14/1986
9/14/1986
9/14/1986
9/14/1986
9/14/1986
9/14/1986
9/14/1986
9/14/1986
9/14/1986
9/15/1986
9/18/1986
9/21/1986
9/21/1986
9/21/1986
9/21/1986
9/21/1986
9/21/1986
9/21/1986
9/21/1986
9/21/1986
9/21/1986
9/21/1986
9/21/1986
9/22/1986
9/28/1986
9/28/1986
9/28/1986
9/28/1986
9/28/1986
9/28/1986
9/28/1986
9/28/1986
9/28/1986
9/28/1986
9/28/1986
9/28/1986
9/28/1986
9/29/1986
10/5/1986
10/5/1986
10/5/1986
10/5/1986
10/5/1986
10/5/1986
10/5/1986
10/5/1986
10/5/1986
10/5/1986
10/5/1986
10/5/1986
10/5/1986
10/6/1986
10/12/1986
10/12/1986
10/12/1986
10/12/1986
10/12/1986
10/12/1986
10/12/1986
10/12/1986
10/12/1986
10/12/1986
10/12/1986
10/12/1986
10/12/1986
10/13/1986
10/19/1986
10/19/1986
10/19/1986
10/19/1986
10/19/1986
10/19/1986
10/19/1986
10/19/1986
10/19/1986
10/19/1986
10/19/1986
10/19/1986
10/19/1986
10/20/1986
10/26/1986
10/26/1986
10/26/1986
10/26/1986
10/26/1986
10/26/1986
10/26/1986
10/26/1986
10/26/1986
10/26/1986
10/26/1986
10/26/1986
10/26/1986
10/27/1986
11/2/1986
11/2/198

12/3/1989
12/3/1989
12/4/1989
12/10/1989
12/10/1989
12/10/1989
12/10/1989
12/10/1989
12/10/1989
12/10/1989
12/10/1989
12/10/1989
12/10/1989
12/10/1989
12/10/1989
12/10/1989
12/11/1989
12/16/1989
12/16/1989
12/17/1989
12/17/1989
12/17/1989
12/17/1989
12/17/1989
12/17/1989
12/17/1989
12/17/1989
12/17/1989
12/17/1989
12/17/1989
12/18/1989
12/23/1989
12/23/1989
12/23/1989
12/24/1989
12/24/1989
12/24/1989
12/24/1989
12/24/1989
12/24/1989
12/24/1989
12/24/1989
12/24/1989
12/24/1989
12/25/1989
12/31/1989
12/31/1989
1/6/1990
1/6/1990
1/7/1990
1/7/1990
1/14/1990
1/14/1990
1/28/1990
9/9/1990
9/9/1990
9/9/1990
9/9/1990
9/9/1990
9/9/1990
9/9/1990
9/9/1990
9/9/1990
9/9/1990
9/9/1990
9/9/1990
9/9/1990
9/10/1990
9/16/1990
9/16/1990
9/16/1990
9/16/1990
9/16/1990
9/16/1990
9/16/1990
9/16/1990
9/16/1990
9/16/1990
9/16/1990
9/16/1990
9/16/1990
9/17/1990
9/23/1990
9/23/1990
9/23/1990
9/23/1990
9/23/1990
9/23/1990
9/23/1990
9/23/1990
9/23/1990
9/23/1990
9/23/1990
9/23/1990
9/23/1990
9/24/1990
9/30/1990
9/3

10/3/1993
10/3/1993
10/3/1993
10/3/1993
10/3/1993
10/4/1993
10/10/1993
10/10/1993
10/10/1993
10/10/1993
10/10/1993
10/10/1993
10/10/1993
10/10/1993
10/10/1993
10/10/1993
10/11/1993
10/14/1993
10/17/1993
10/17/1993
10/17/1993
10/17/1993
10/17/1993
10/17/1993
10/17/1993
10/17/1993
10/18/1993
10/24/1993
10/24/1993
10/24/1993
10/24/1993
10/24/1993
10/24/1993
10/24/1993
10/24/1993
10/24/1993
10/25/1993
10/31/1993
10/31/1993
10/31/1993
10/31/1993
10/31/1993
10/31/1993
10/31/1993
10/31/1993
10/31/1993
10/31/1993
10/31/1993
11/1/1993
11/7/1993
11/7/1993
11/7/1993
11/7/1993
11/7/1993
11/7/1993
11/7/1993
11/7/1993
11/7/1993
11/7/1993
11/7/1993
11/8/1993
11/14/1993
11/14/1993
11/14/1993
11/14/1993
11/14/1993
11/14/1993
11/14/1993
11/14/1993
11/14/1993
11/14/1993
11/14/1993
11/14/1993
11/15/1993
11/21/1993
11/21/1993
11/21/1993
11/21/1993
11/21/1993
11/21/1993
11/21/1993
11/21/1993
11/21/1993
11/21/1993
11/21/1993
11/21/1993
11/22/1993
11/25/1993
11/25/1993
11/28/1993
11/28/1993
11/28/1993
11/28/1

11/10/1996
11/10/1996
11/10/1996
11/10/1996
11/10/1996
11/10/1996
11/10/1996
11/10/1996
11/10/1996
11/10/1996
11/10/1996
11/10/1996
11/10/1996
11/11/1996
11/17/1996
11/17/1996
11/17/1996
11/17/1996
11/17/1996
11/17/1996
11/17/1996
11/17/1996
11/17/1996
11/17/1996
11/17/1996
11/17/1996
11/17/1996
11/17/1996
11/18/1996
11/24/1996
11/24/1996
11/24/1996
11/24/1996
11/24/1996
11/24/1996
11/24/1996
11/24/1996
11/24/1996
11/24/1996
11/24/1996
11/24/1996
11/24/1996
11/24/1996
11/25/1996
11/28/1996
11/28/1996
12/1/1996
12/1/1996
12/1/1996
12/1/1996
12/1/1996
12/1/1996
12/1/1996
12/1/1996
12/1/1996
12/1/1996
12/1/1996
12/1/1996
12/2/1996
12/5/1996
12/8/1996
12/8/1996
12/8/1996
12/8/1996
12/8/1996
12/8/1996
12/8/1996
12/8/1996
12/8/1996
12/8/1996
12/8/1996
12/8/1996
12/8/1996
12/9/1996
12/14/1996
12/14/1996
12/15/1996
12/15/1996
12/15/1996
12/15/1996
12/15/1996
12/15/1996
12/15/1996
12/15/1996
12/15/1996
12/15/1996
12/15/1996
12/15/1996
12/16/1996
12/21/1996
12/21/1996
12/22/1996
12/22/1996
12/22

12/18/1999
12/19/1999
12/19/1999
12/19/1999
12/19/1999
12/19/1999
12/19/1999
12/19/1999
12/19/1999
12/19/1999
12/19/1999
12/19/1999
12/19/1999
12/20/1999
12/24/1999
12/25/1999
12/26/1999
12/26/1999
12/26/1999
12/26/1999
12/26/1999
12/26/1999
12/26/1999
12/26/1999
12/26/1999
12/26/1999
12/26/1999
12/26/1999
12/27/1999
1/2/2000
1/2/2000
1/2/2000
1/2/2000
1/2/2000
1/2/2000
1/2/2000
1/2/2000
1/2/2000
1/2/2000
1/2/2000
1/2/2000
1/2/2000
1/2/2000
1/3/2000
1/8/2000
1/8/2000
1/9/2000
1/9/2000
1/15/2000
1/15/2000
1/16/2000
1/16/2000
1/23/2000
1/23/2000
1/30/2000
9/3/2000
9/3/2000
9/3/2000
9/3/2000
9/3/2000
9/3/2000
9/3/2000
9/3/2000
9/3/2000
9/3/2000
9/3/2000
9/3/2000
9/3/2000
9/3/2000
9/4/2000
9/10/2000
9/10/2000
9/10/2000
9/10/2000
9/10/2000
9/10/2000
9/10/2000
9/10/2000
9/10/2000
9/10/2000
9/10/2000
9/10/2000
9/10/2000
9/10/2000
9/11/2000
9/17/2000
9/17/2000
9/17/2000
9/17/2000
9/17/2000
9/17/2000
9/17/2000
9/17/2000
9/17/2000
9/17/2000
9/17/2000
9/17/2000
9/17/2000
9/18/2000
9/24/2000
9/24/

9/7/2003
9/7/2003
9/7/2003
9/7/2003
9/7/2003
9/7/2003
9/8/2003
9/14/2003
9/14/2003
9/14/2003
9/14/2003
9/14/2003
9/14/2003
9/14/2003
9/14/2003
9/14/2003
9/14/2003
9/14/2003
9/14/2003
9/14/2003
9/14/2003
9/14/2003
9/15/2003
9/21/2003
9/21/2003
9/21/2003
9/21/2003
9/21/2003
9/21/2003
9/21/2003
9/21/2003
9/21/2003
9/21/2003
9/21/2003
9/21/2003
9/21/2003
9/22/2003
9/28/2003
9/28/2003
9/28/2003
9/28/2003
9/28/2003
9/28/2003
9/28/2003
9/28/2003
9/28/2003
9/28/2003
9/28/2003
9/28/2003
9/28/2003
9/29/2003
10/5/2003
10/5/2003
10/5/2003
10/5/2003
10/5/2003
10/5/2003
10/5/2003
10/5/2003
10/5/2003
10/5/2003
10/5/2003
10/5/2003
10/5/2003
10/6/2003
10/12/2003
10/12/2003
10/12/2003
10/12/2003
10/12/2003
10/12/2003
10/12/2003
10/12/2003
10/12/2003
10/12/2003
10/12/2003
10/12/2003
10/12/2003
10/13/2003
10/19/2003
10/19/2003
10/19/2003
10/19/2003
10/19/2003
10/19/2003
10/19/2003
10/19/2003
10/19/2003
10/19/2003
10/19/2003
10/19/2003
10/19/2003
10/20/2003
10/26/2003
10/26/2003
10/26/2003
10/26/2003
10/26

9/17/2006
9/17/2006
9/17/2006
9/17/2006
9/17/2006
9/18/2006
9/24/2006
9/24/2006
9/24/2006
9/24/2006
9/24/2006
9/24/2006
9/24/2006
9/24/2006
9/24/2006
9/24/2006
9/24/2006
9/24/2006
9/24/2006
9/25/2006
10/1/2006
10/1/2006
10/1/2006
10/1/2006
10/1/2006
10/1/2006
10/1/2006
10/1/2006
10/1/2006
10/1/2006
10/1/2006
10/1/2006
10/1/2006
10/2/2006
10/8/2006
10/8/2006
10/8/2006
10/8/2006
10/8/2006
10/8/2006
10/8/2006
10/8/2006
10/8/2006
10/8/2006
10/8/2006
10/8/2006
10/8/2006
10/9/2006
10/15/2006
10/15/2006
10/15/2006
10/15/2006
10/15/2006
10/15/2006
10/15/2006
10/15/2006
10/15/2006
10/15/2006
10/15/2006
10/15/2006
10/16/2006
10/22/2006
10/22/2006
10/22/2006
10/22/2006
10/22/2006
10/22/2006
10/22/2006
10/22/2006
10/22/2006
10/22/2006
10/22/2006
10/22/2006
10/23/2006
10/29/2006
10/29/2006
10/29/2006
10/29/2006
10/29/2006
10/29/2006
10/29/2006
10/29/2006
10/29/2006
10/29/2006
10/29/2006
10/29/2006
10/29/2006
10/30/2006
11/5/2006
11/5/2006
11/5/2006
11/5/2006
11/5/2006
11/5/2006
11/5/2006
11/5/2006


10/18/2009
10/18/2009
10/18/2009
10/18/2009
10/18/2009
10/18/2009
10/18/2009
10/18/2009
10/18/2009
10/18/2009
10/18/2009
10/19/2009
10/25/2009
10/25/2009
10/25/2009
10/25/2009
10/25/2009
10/25/2009
10/25/2009
10/25/2009
10/25/2009
10/25/2009
10/25/2009
10/25/2009
10/26/2009
11/1/2009
11/1/2009
11/1/2009
11/1/2009
11/1/2009
11/1/2009
11/1/2009
11/1/2009
11/1/2009
11/1/2009
11/1/2009
11/1/2009
11/2/2009
11/8/2009
11/8/2009
11/8/2009
11/8/2009
11/8/2009
11/8/2009
11/8/2009
11/8/2009
11/8/2009
11/8/2009
11/8/2009
11/8/2009
11/9/2009
11/12/2009
11/15/2009
11/15/2009
11/15/2009
11/15/2009
11/15/2009
11/15/2009
11/15/2009
11/15/2009
11/15/2009
11/15/2009
11/15/2009
11/15/2009
11/15/2009
11/16/2009
11/19/2009
11/22/2009
11/22/2009
11/22/2009
11/22/2009
11/22/2009
11/22/2009
11/22/2009
11/22/2009
11/22/2009
11/22/2009
11/22/2009
11/22/2009
11/22/2009
11/22/2009
11/23/2009
11/26/2009
11/26/2009
11/26/2009
11/29/2009
11/29/2009
11/29/2009
11/29/2009
11/29/2009
11/29/2009
11/29/2009
11/29/2009
11/

10/21/2012
10/21/2012
10/22/2012
10/25/2012
10/28/2012
10/28/2012
10/28/2012
10/28/2012
10/28/2012
10/28/2012
10/28/2012
10/28/2012
10/28/2012
10/28/2012
10/28/2012
10/28/2012
10/29/2012
11/1/2012
11/4/2012
11/4/2012
11/4/2012
11/4/2012
11/4/2012
11/4/2012
11/4/2012
11/4/2012
11/4/2012
11/4/2012
11/4/2012
11/4/2012
11/5/2012
11/8/2012
11/11/2012
11/11/2012
11/11/2012
11/11/2012
11/11/2012
11/11/2012
11/11/2012
11/11/2012
11/11/2012
11/11/2012
11/11/2012
11/11/2012
11/12/2012
11/15/2012
11/18/2012
11/18/2012
11/18/2012
11/18/2012
11/18/2012
11/18/2012
11/18/2012
11/18/2012
11/18/2012
11/18/2012
11/18/2012
11/18/2012
11/19/2012
11/22/2012
11/22/2012
11/22/2012
11/25/2012
11/25/2012
11/25/2012
11/25/2012
11/25/2012
11/25/2012
11/25/2012
11/25/2012
11/25/2012
11/25/2012
11/25/2012
11/25/2012
11/26/2012
11/29/2012
12/2/2012
12/2/2012
12/2/2012
12/2/2012
12/2/2012
12/2/2012
12/2/2012
12/2/2012
12/2/2012
12/2/2012
12/2/2012
12/2/2012
12/2/2012
12/2/2012
12/3/2012
12/6/2012
12/9/2012
12/9/2012

11/15/2015
11/15/2015
11/15/2015
11/15/2015
11/15/2015
11/15/2015
11/15/2015
11/15/2015
11/15/2015
11/15/2015
11/15/2015
11/16/2015
11/19/2015
11/22/2015
11/22/2015
11/22/2015
11/22/2015
11/22/2015
11/22/2015
11/22/2015
11/22/2015
11/22/2015
11/22/2015
11/22/2015
11/22/2015
11/23/2015
11/26/2015
11/26/2015
11/26/2015
11/29/2015
11/29/2015
11/29/2015
11/29/2015
11/29/2015
11/29/2015
11/29/2015
11/29/2015
11/29/2015
11/29/2015
11/29/2015
11/29/2015
11/30/2015
12/3/2015
12/6/2015
12/6/2015
12/6/2015
12/6/2015
12/6/2015
12/6/2015
12/6/2015
12/6/2015
12/6/2015
12/6/2015
12/6/2015
12/6/2015
12/6/2015
12/6/2015
12/7/2015
12/10/2015
12/13/2015
12/13/2015
12/13/2015
12/13/2015
12/13/2015
12/13/2015
12/13/2015
12/13/2015
12/13/2015
12/13/2015
12/13/2015
12/13/2015
12/13/2015
12/13/2015
12/14/2015
12/17/2015
12/19/2015
12/20/2015
12/20/2015
12/20/2015
12/20/2015
12/20/2015
12/20/2015
12/20/2015
12/20/2015
12/20/2015
12/20/2015
12/20/2015
12/20/2015
12/20/2015
12/21/2015
12/24/2015
12/26/2015
12/2

12/2/2018
12/2/2018
12/2/2018
12/2/2018
12/2/2018
12/2/2018
12/2/2018
12/2/2018
12/2/2018
12/2/2018
12/2/2018
12/2/2018
12/2/2018
12/3/2018
12/6/2018
12/9/2018
12/9/2018
12/9/2018
12/9/2018
12/9/2018
12/9/2018
12/9/2018
12/9/2018
12/9/2018
12/9/2018
12/9/2018
12/9/2018
12/9/2018
12/9/2018
12/10/2018
12/13/2018
12/15/2018
12/15/2018
12/16/2018
12/16/2018
12/16/2018
12/16/2018
12/16/2018
12/16/2018
12/16/2018
12/16/2018
12/16/2018
12/16/2018
12/16/2018
12/16/2018
12/17/2018
12/22/2018
12/22/2018
12/23/2018
12/23/2018
12/23/2018
12/23/2018
12/23/2018
12/23/2018
12/23/2018
12/23/2018
12/23/2018
12/23/2018
12/23/2018
12/23/2018
12/23/2018
12/24/2018
12/30/2018
12/30/2018
12/30/2018
12/30/2018
12/30/2018
12/30/2018
12/30/2018
12/30/2018
12/30/2018
12/30/2018
12/30/2018
12/30/2018
12/30/2018
12/30/2018
12/30/2018
12/30/2018
1/5/2019
1/5/2019
1/6/2019
1/6/2019
1/12/2019
1/12/2019
1/13/2019
1/13/2019
1/20/2019
1/20/2019
2/3/2019
9/5/2019
9/8/2019
9/8/2019
9/8/2019
9/8/2019
9/8/2019
9/8/2019
9/8

In [18]:
# Just some more cleaning on the data. Added a total score column and gave each a label: over or under

scores = pd.read_csv('../datasets/unclean_scores.csv')
scores = scores.drop(columns=['stadium'])
scores = scores[scores['over_under_line'].notna()]
scores['total_score'] = scores['score_home'] + scores['score_away']

def label_game(row):
    total = row['total_score']
    ou = row['over_under_line']
    
    # This part is a little weird but essentially it checks to see if the
    # input string is empty. NaN removal didn't catch some of these, so we make the empty
    # ones nan and then remove them later.
    if len(ou) < 2:
        return np.nan
    
    if float(total) > float(ou):
        return 'over'
    else:
        return 'under'
scores['label'] = scores.apply(label_game, axis=1)

scores = scores[(scores['label'] == 'over') | (scores['label'] == 'under')]
drop = ['team_home', 'team_away', 'schedule_week', 'schedule_playoff']
scores = scores.drop(columns=drop, axis=1)
scores = scores.reindex(columns=['schedule_date', 'schedule_season', 'home_id', 'away_id', 'score_home', 'score_away', 'total_score', 'over_under_line', 'label'])

scores.to_csv('../datasets/clean_scores.csv', index=False)

In [19]:
# The following code finally gives us our clean dataset
# We will need to change some stuff to add in more stats but it's nothing more than adding
# a few lines of code
# This adds the rolling average stats of each team for each individual game to the dataset
scores = pd.read_csv('../datasets/clean_scores.csv')
def get_game_avg(row):
    home_id = row['home_id']
    away_id = row['away_id']
    game_id = get_game_id(row['schedule_date'], home_id)
    print(game_id)
    
    # TODO: Change this if we end up adding more statistics to the dataset (columns of dataset will be different)
    home_df = pd.read_csv(f'../datasets/team_data/avg_data/{home_id}_avgs.csv')
    home_avgs = home_df[home_df['game_id'] == game_id]
    home_avg_yards = home_avgs['avg_total_yds'].iloc[0]
    home_avg_TO = home_avgs['avg_TO'].iloc[0]
    
    away_df = pd.read_csv(f'../datasets/team_data/avg_data/{away_id}_avgs.csv')
    away_avgs = away_df[away_df['game_id'] == game_id]
    away_avg_yards = away_avgs['avg_total_yds'].iloc[0]
    away_avg_TO = away_avgs['avg_TO'].iloc[0]
    return pd.Series([home_avg_yards, home_avg_TO, away_avg_yards, away_avg_TO])

#get_game_avg(scores.iloc[10000])
scores[['home_avg_yards', 'home_avg_TO', 'away_avg_yards', 'away_avg_TO']] = scores.apply(get_game_avg, axis=1)

scores.to_csv('../datasets/scores_w_avgs.csv', index=False)

1141968GNB
1141968GNB
1121969CLT
1111970KAN
1171971CLT
1161972DAL
1141973MIA
1131974MIA
1121975MIN
1181976DAL
191977MIN
1151978DAL
1211979DAL
911979TAM
921979BUF
921979CHI
921979DEN
921979KAN
921979RAM
921979MIN
921979NOR
921979NYJ
921979PHI
921979SEA
921979CRD
921979WAS
931979NWE
961979DEN
991979CLT
991979BUF
991979CHI
991979DET
991979GNB
991979KAN
991979MIA
991979NWE
991979NYG
991979PIT
991979SDG
991979SFO
9101979PHI
9161979ATL
9161979CIN
9161979CLE
9161979DAL
9161979GNB
9161979OTI
9161979RAM
9161979MIN
9161979NOR
9161979NYJ
9161979SDG
9161979SEA
9161979CRD
9171979WAS
9231979BUF
9231979CIN
9231979DEN
9231979DET
9231979KAN
9231979MIA
9231979MIN
9231979NWE
9231979NYG
9231979PIT
9231979SFO
9231979CRD
9231979TAM
9241979CLE
9301979ATL
9301979CLT
9301979CHI
9301979DAL
9301979DET
9301979OTI
9301979RAM
9301979NOR
9301979NYJ
9301979RAI
9301979PHI
9301979SDG
9301979SEA
1011979GNB
1071979ATL
1071979CLT
1071979BUF
1071979CIN
1071979CLE
1071979DEN
1071979OTI
1071979MIN
1071979NWE
1071979NOR
10719

11211982GNB
11211982OTI
11211982NOR
11211982NYG
11211982NYJ
11211982PHI
11211982CRD
11221982RAI
11251982DAL
11251982DET
11281982ATL
11281982BUF
11281982CIN
11281982RAM
11281982MIN
11281982NWE
11281982NYJ
11281982SDG
11281982SFO
11281982SEA
11281982WAS
11291982TAM
1221982RAM
1251982CLT
1251982CHI
1251982CLE
1251982DEN
1251982GNB
1251982RAI
1251982MIA
1251982NOR
1251982NYG
1251982PHI
1251982PIT
1251982WAS
1261982DET
12111982NYG
12111982SFO
12121982ATL
12121982BUF
12121982CIN
12121982GNB
12121982KAN
12121982RAM
12121982MIN
12121982NWE
12121982NYJ
12121982SEA
12121982CRD
12131982OTI
12181982RAI
12181982MIA
12191982CLT
12191982CHI
12191982CLE
12191982DAL
12191982DEN
12191982DET
12191982PHI
12191982SFO
12191982SEA
12191982TAM
12191982WAS
12201982SDG
12261982ATL
12261982CIN
12261982DAL
12261982OTI
12261982KAN
12261982RAI
12261982RAM
12261982MIN
12261982NOR
12261982PIT
12261982SDG
12261982CRD
12261982TAM
12271982MIA
121983CLT
121983DET
121983OTI
121983KAN
121983NWE
121983NOR
121983PHI
121983PI

1281985CIN
1281985DEN
1281985GNB
1281985OTI
1281985KAN
1281985MIN
1281985NWE
1281985PHI
1281985SDG
1281985SEA
1281985CRD
1291985SFO
12141985DEN
12141985NYJ
12151985ATL
12151985CLE
12151985DAL
12151985DET
12151985RAI
12151985RAM
12151985NOR
12151985PIT
12151985SDG
12151985TAM
12151985WAS
12161985MIA
12201985SEA
12211985NYG
12211985CRD
12221985DET
12221985CLT
12221985KAN
12221985MIA
12221985MIN
12221985NWE
12221985NOR
12221985NYJ
12221985SFO
12221985TAM
12231985RAM
1261986CHI
971986BUF
971986CHI
971986DEN
971986GNB
971986KAN
971986MIN
971986NWE
971986NOR
971986SDG
971986SEA
971986CRD
971986TAM
971986WAS
981986DAL
9111986NYJ
9141986ATL
9141986CHI
9141986CIN
9141986DET
9141986OTI
9141986RAM
9141986MIA
9141986NOR
9141986NYG
9141986SEA
9141986TAM
9141986WAS
9151986PIT
9181986CLE
9211986BUF
9211986DAL
9211986DET
9211986CLT
9211986KAN
9211986RAI
9211986MIN
9211986NWE
9211986NYJ
9211986PHI
9211986SDG
9211986SFO
9221986GNB
9281986BUF
9281986CIN
9281986CLE
9281986DEN
9281986OTI
9281986CLT
9281986

1011989RAI
1011989MIN
1011989NOR
1011989NYJ
1011989CRD
1011989SFO
1021989CHI
1081989DEN
1081989GNB
1081989CLT
1081989RAM
1081989MIA
1081989MIN
1081989NWE
1081989NOR
1081989PHI
1081989PIT
1081989SEA
1081989TAM
1081989WAS
1091989NYJ
10151989ATL
10151989CHI
10151989CIN
10151989CLE
10151989DAL
10151989DEN
10151989RAI
10151989MIN
10151989NOR
10151989NYG
10151989CRD
10151989SDG
10151989TAM
10161989BUF
10221989BUF
10221989CIN
10221989DET
10221989OTI
10221989KAN
10221989RAM
10221989MIA
10221989PHI
10221989CRD
10221989SDG
10221989SFO
10221989SEA
10221989WAS
10231989CLE
10291989BUF
10291989CHI
10291989CIN
10291989CLE
10291989DAL
10291989DEN
10291989GNB
10291989CLT
10291989RAI
10291989NOR
10291989NYJ
10291989PIT
10291989SEA
10301989NYG
1151989ATL
1151989DEN
1151989GNB
1151989OTI
1151989KAN
1151989RAI
1151989MIA
1151989MIN
1151989NWE
1151989CRD
1151989SDG
1151989TAM
1151989WAS
1161989SFO
11121989BUF
11121989DET
11121989KAN
11121989RAM
11121989NWE
11121989NYJ
11121989PHI
11121989CRD
11121989PIT
111

10181992CHI
10181992CLE
10181992DAL
10181992DEN
10181992CLT
10181992RAM
10181992MIA
10181992CRD
10181992SFO
10181992SEA
10181992WAS
10191992PIT
10251992GNB
10251992OTI
10251992KAN
10251992RAI
10251992MIA
10251992MIN
10251992NWE
10251992NYG
10251992PHI
10251992SDG
10251992TAM
10261992NYJ
1111992ATL
1111992BUF
1111992CIN
1111992DAL
1111992DET
1111992NOR
1111992NYJ
1111992CRD
1111992PIT
1111992SDG
1111992WAS
1121992CHI
1181992BUF
1181992CHI
1181992DEN
1181992DET
1181992OTI
1181992CLT
1181992KAN
1181992RAM
1181992NWE
1181992NYG
1181992PHI
1181992SEA
1181992TAM
1191992ATL
11151992ATL
11151992CLE
11151992DAL
11151992DEN
11151992GNB
11151992CLT
11151992KAN
11151992RAI
11151992MIN
11151992NYJ
11151992PIT
11151992SFO
11151992TAM
11161992MIA
11221992BUF
11221992CHI
11221992CIN
11221992RAI
11221992RAM
11221992MIA
11221992MIN
11221992NWE
11221992NYG
11221992CRD
11221992PIT
11221992SDG
11221992SEA
11231992NOR
11261992DAL
11261992DET
11291992ATL
11291992CIN
11291992CLE
11291992GNB
11291992CLT
112919

10221995RAM
10221995TAM
10221995WAS
10231995NWE
10291995CRD
10291995ATL
10291995CIN
10291995DET
10291995OTI
10291995CLT
10291995MIA
10291995NWE
10291995PHI
10291995PIT
10291995SFO
10291995WAS
10301995MIN
1151995ATL
1151995CHI
1151995CIN
1151995CLE
1151995DEN
1151995CLT
1151995KAN
1151995MIN
1151995NOR
1151995NYJ
1151995SDG
1151995SFO
1151995SEA
1161995DAL
11121995CRD
11121995BUF
11121995DAL
11121995DET
11121995GNB
11121995OTI
11121995JAX
11121995MIA
11121995NOR
11121995NYG
11121995PHI
11121995SDG
11121995RAM
11131995PIT
11191995ATL
11191995CAR
11191995CHI
11191995CIN
11191995CLE
11191995DEN
11191995KAN
11191995MIN
11191995NWE
11191995NYJ
11191995RAI
11191995PHI
11191995TAM
11191995WAS
11201995MIA
11231995DAL
11231995DET
11261995CRD
11261995BUF
11261995CLE
11261995GNB
11261995OTI
11261995CLT
11261995JAX
11261995NOR
11261995NYG
11261995SFO
11261995SEA
11261995WAS
11271995SDG
11301995CRD
1231995CAR
1231995DAL
1231995DEN
1231995GNB
1231995MIA
1231995MIN
1231995NWE
1231995NYJ
1231995RAI
123

10181998ATL
10181998BUF
10181998CHI
10181998MIA
10181998MIN
10181998NYG
10181998PIT
10181998SDG
10181998SFO
10181998TAM
10181998OTI
10191998NWE
10251998CAR
10251998DEN
10251998DET
10251998GNB
10251998MIA
10251998NOR
10251998NYJ
10251998RAI
10251998SDG
10251998RAM
10251998OTI
10261998KAN
1111998ATL
1111998RAV
1111998BUF
1111998CAR
1111998CIN
1111998DET
1111998GNB
1111998CLT
1111998KAN
1111998PIT
1111998SEA
1111998TAM
1111998WAS
1121998PHI
1181998CRD
1181998RAV
1181998CHI
1181998DAL
1181998DEN
1181998JAX
1181998MIA
1181998MIN
1181998NWE
1181998NYJ
1181998PHI
1181998SFO
1181998SEA
1181998TAM
1191998PIT
11151998CRD
11151998ATL
11151998BUF
11151998CAR
11151998DET
11151998CLT
11151998JAX
11151998MIN
11151998NOR
11151998NYG
11151998RAI
11151998SDG
11151998OTI
11151998WAS
11161998KAN
11221998ATL
11221998BUF
11221998CIN
11221998DAL
11221998DEN
11221998MIN
11221998NYG
11221998PIT
11221998SDG
11221998SFO
11221998RAM
11221998TAM
11221998OTI
11221998WAS
11231998NWE
11261998DAL
11261998DET
11291998R

1012001NYJ
1072001ATL
1072001RAV
1072001BUF
1072001CLE
1072001DEN
1072001MIA
1072001NOR
1072001NYG
1072001RAI
1072001PHI
1072001PIT
1072001SFO
1072001SEA
1072001TAM
1082001DET
10142001ATL
10142001CAR
10142001CHI
10142001CIN
10142001GNB
10142001CLT
10142001KAN
10142001MIN
10142001NWE
10142001NYJ
10142001SEA
10142001RAM
10142001OTI
10152001DAL
10182001JAX
10212001CRD
10212001CIN
10212001CLE
10212001DET
10212001CLT
10212001MIN
10212001NOR
10212001NYJ
10212001SDG
10212001TAM
10212001WAS
10222001NYG
10252001KAN
10282001RAV
10282001CAR
10282001CHI
10282001DAL
10282001DEN
10282001DET
10282001PHI
10282001SDG
10282001SEA
10282001RAM
10282001TAM
10282001WAS
10292001PIT
1142001CRD
1142001ATL
1142001BUF
1142001CHI
1142001GNB
1142001MIA
1142001NOR
1142001NYG
1142001PIT
1142001SDG
1142001SFO
1142001OTI
1142001WAS
1152001RAI
11112001CRD
11112001ATL
11112001CHI
11112001CLE
11112001DEN
11112001DET
11112001CLT
11112001JAX
11112001NWE
11112001NYJ
11112001PHI
11112001SFO
11112001SEA
11112001RAM
11122001OT

12282003NOR
12282003NYG
12282003SDG
12282003OTI
132004RAV
132004CAR
142004GNB
142004CLT
1102004NWE
1102004RAM
1112004KAN
1112004PHI
1182004NWE
1182004PHI
212004NWE
992004NWE
9112004MIA
9122004BUF
9122004CHI
9122004CLE
9122004DEN
9122004HTX
9122004MIN
9122004NOR
9122004NYJ
9122004PHI
9122004PIT
9122004SFO
9122004RAM
9122004WAS
9132004CAR
9192004CRD
9192004ATL
9192004RAV
9192004CIN
9192004DAL
9192004DET
9192004GNB
9192004JAX
9192004KAN
9192004NOR
9192004NYG
9192004RAI
9192004SDG
9192004TAM
9192004OTI
9202004PHI
9262004ATL
9262004CIN
9262004DEN
9262004DET
9262004CLT
9262004KAN
9262004MIA
9262004MIN
9262004NYG
9262004RAI
9262004SEA
9262004RAM
9262004OTI
9272004WAS
1032004CRD
1032004BUF
1032004CAR
1032004CHI
1032004CLE
1032004GNB
1032004HTX
1032004JAX
1032004MIA
1032004PIT
1032004SDG
1032004SFO
1032004TAM
1042004RAV
10102004ATL
10102004DAL
10102004DEN
10102004HTX
10102004CLT
10102004NWE
10102004NOR
10102004NYJ
10102004PIT
10102004SDG
10102004SFO
10102004SEA
10102004WAS
10112004GNB
10172004A

11262006MIN
11262006NWE
11262006NYJ
11262006SDG
11262006RAM
11262006OTI
11262006WAS
11272006SEA
11302006CIN
1232006BUF
1232006CHI
1232006CLE
1232006DEN
1232006GNB
1232006MIA
1232006NWE
1232006NOR
1232006NYG
1232006RAI
1232006PIT
1232006RAM
1232006OTI
1232006WAS
1242006PHI
1272006PIT
12102006CRD
12102006CAR
12102006CIN
12102006DAL
12102006DET
12102006HTX
12102006JAX
12102006KAN
12102006MIA
12102006NYJ
12102006SDG
12102006SFO
12102006TAM
12102006WAS
12112006RAM
12142006SEA
12162006ATL
12172006CRD
12172006RAV
12172006BUF
12172006CAR
12172006CHI
12172006GNB
12172006MIN
12172006NWE
12172006NOR
12172006NYG
12172006RAI
12172006SDG
12172006OTI
12182006CLT
12212006GNB
12232006RAI
12242006ATL
12242006BUF
12242006CLE
12242006DEN
12242006DET
12242006HTX
12242006JAX
12242006NYG
12242006PIT
12242006SFO
12242006SEA
12242006RAM
12252006DAL
12252006MIA
12302006WAS
12312006RAV
12312006CHI
12312006CIN
12312006DAL
12312006DEN
12312006HTX
12312006CLT
12312006KAN
12312006MIN
12312006NOR
12312006NYJ
12312006

10252009CLE
10252009DAL
10252009HTX
10252009KAN
10252009MIA
10252009NYG
10252009RAI
10252009PIT
10252009RAM
10252009TAM
10262009WAS
1112009CRD
1112009RAV
1112009BUF
1112009CHI
1112009DAL
1112009DET
1112009GNB
1112009CLT
1112009NYJ
1112009PHI
1112009SDG
1112009OTI
1122009NOR
1182009ATL
1182009CHI
1182009CIN
1182009CLT
1182009JAX
1182009NWE
1182009NOR
1182009NYG
1182009PHI
1182009SFO
1182009SEA
1182009TAM
1192009DEN
11122009SFO
11152009CRD
11152009CAR
11152009GNB
11152009CLT
11152009MIA
11152009MIN
11152009NYJ
11152009RAI
11152009PIT
11152009SDG
11152009RAM
11152009OTI
11152009WAS
11162009CLE
11192009CAR
11222009RAV
11222009CHI
11222009DAL
11222009DEN
11222009DET
11222009GNB
11222009JAX
11222009KAN
11222009MIN
11222009NWE
11222009NYG
11222009RAI
11222009RAM
11222009TAM
11232009HTX
11262009DAL
11262009DEN
11262009DET
11292009ATL
11292009RAV
11292009BUF
11292009CIN
11292009HTX
11292009MIN
11292009NYJ
11292009PHI
11292009SDG
11292009SFO
11292009RAM
11292009OTI
11302009NOR
1232009BUF
1262009

9162012NYG
9162012PHI
9162012PIT
9162012SDG
9162012SFO
9162012SEA
9162012RAM
9172012ATL
9202012CAR
9232012CRD
9232012RAV
9232012CHI
9232012CLE
9232012DAL
9232012DEN
9232012CLT
9232012MIA
9232012MIN
9232012NOR
9232012RAI
9232012SDG
9232012OTI
9232012WAS
9242012SEA
9272012RAV
9302012CRD
9302012ATL
9302012BUF
9302012DEN
9302012DET
9302012GNB
9302012HTX
9302012JAX
9302012KAN
9302012NYJ
9302012PHI
9302012RAM
9302012TAM
1012012DAL
1042012RAM
1072012CAR
1072012CIN
1072012CLT
1072012JAX
1072012KAN
1072012MIN
1072012NWE
1072012NOR
1072012NYG
1072012PIT
1072012SFO
1072012WAS
1082012NYJ
10112012OTI
10142012CRD
10142012ATL
10142012RAV
10142012CLE
10142012HTX
10142012MIA
10142012NYJ
10142012PHI
10142012SFO
10142012SEA
10142012TAM
10142012WAS
10152012SDG
10182012SFO
10212012BUF
10212012CAR
10212012CIN
10212012HTX
10212012CLT
10212012MIN
10212012NWE
10212012NYG
10212012RAI
10212012RAM
10212012TAM
10222012CHI
10252012MIN
10282012CHI
10282012CLE
10282012DAL
10282012DEN
10282012DET
10282012GNB
10282012K

1282014GNB
12112014RAM
12142014ATL
12142014RAV
12142014BUF
12142014CAR
12142014CLE
12142014DET
12142014CLT
12142014KAN
12142014NWE
12142014NYG
12142014PHI
12142014SDG
12142014SEA
12142014OTI
12152014CHI
12182014JAX
12202014SFO
12202014WAS
12212014CRD
12212014CAR
12212014CHI
12212014DAL
12212014HTX
12212014MIA
12212014NOR
12212014NYJ
12212014RAI
12212014PIT
12212014RAM
12212014TAM
12222014CIN
12282014ATL
12282014RAV
12282014DEN
12282014GNB
12282014HTX
12282014KAN
12282014MIA
12282014MIN
12282014NWE
12282014NYG
12282014PIT
12282014SFO
12282014SEA
12282014TAM
12282014OTI
12282014WAS
132015CAR
132015PIT
142015DAL
142015CLT
1102015NWE
1102015SEA
1112015DEN
1112015GNB
1182015NWE
1182015SEA
212015NWE
9102015NWE
9132015CRD
9132015BUF
9132015CHI
9132015DAL
9132015DEN
9132015HTX
9132015JAX
9132015NYJ
9132015RAI
9132015SDG
9132015RAM
9132015TAM
9132015WAS
9142015ATL
9142015SFO
9172015KAN
9202015BUF
9202015CAR
9202015CHI
9202015CIN
9202015CLE
9202015GNB
9202015JAX
9202015MIN
9202015NOR
9202015NYG


11122017WAS
11132017CAR
11162017PIT
11192017CHI
11192017CLE
11192017DAL
11192017DEN
11192017GNB
11192017HTX
11192017SDG
11192017MIA
11192017MIN
11192017NOR
11192017NYG
11192017RAI
11202017SEA
11232017DAL
11232017DET
11232017WAS
11262017CRD
11262017ATL
11262017CIN
11262017CLT
11262017KAN
11262017RAM
11262017NWE
11262017NYJ
11262017RAI
11262017PHI
11262017PIT
11262017SFO
11272017RAV
11302017DAL
1232017CRD
1232017ATL
1232017RAV
1232017BUF
1232017CHI
1232017GNB
1232017JAX
1232017SDG
1232017MIA
1232017NOR
1232017NYJ
1232017RAI
1232017SEA
1232017OTI
1242017CIN
1272017ATL
12102017CRD
12102017BUF
12102017CAR
12102017CIN
12102017CLE
12102017DEN
12102017HTX
12102017JAX
12102017KAN
12102017SDG
12102017RAM
12102017NYG
12102017PIT
12102017TAM
12112017MIA
12142017CLT
12162017DET
12162017KAN
12172017BUF
12172017CAR
12172017CLE
12172017JAX
12172017MIN
12172017NOR
12172017NYG
12172017RAI
12172017PIT
12172017SFO
12172017SEA
12172017WAS
12182017TAM
12232017RAV
12232017GNB
12242017CRD
12242017CAR
12242017

1042020WAS
1052020GNB
1052020KAN
1082020CHI
10112020ATL
10112020RAV
10112020CLE
10112020DAL
10112020HTX
10112020KAN
10112020NYJ
10112020PIT
10112020SFO
10112020SEA
10112020WAS
10122020NOR
10132020OTI
10182020CAR
10182020CLT
10182020JAX
10182020MIA
10182020MIN
10182020NWE
10182020NYG
10182020PHI
10182020PIT
10182020SFO
10182020TAM
10182020OTI
10192020BUF
10192020DAL
10222020PHI
10252020CRD
10252020ATL
10252020CIN
10252020DEN
10252020HTX
10252020RAI
10252020SDG
10252020NWE
10252020NOR
10252020NYJ
10252020OTI
10252020WAS
10262020RAM
10292020CAR
1112020RAV
1112020BUF
1112020CHI
1112020CIN
1112020CLE
1112020DEN
1112020DET
1112020GNB
1112020KAN
1112020MIA
1112020PHI
1112020SEA
1122020NYG
1152020SFO
1182020CRD
1182020ATL
1182020BUF
1182020DAL
1182020CLT
1182020JAX
1182020KAN
1182020SDG
1182020MIN
1182020TAM
1182020OTI
1182020WAS
1192020NYJ
11122020OTI
11152020CRD
11152020CAR
11152020CLE
11152020DET
11152020GNB
11152020RAI
11152020RAM
11152020MIA
11152020NWE
11152020NOR
11152020NYG
11152020PIT

In [20]:
# Reordered the columns to make it a bit more readable from a coding standpoint
scores_df = pd.read_csv('../datasets/scores_w_avgs.csv')
scores_df = scores_df.reindex(columns=['schedule_date', 'schedule_season', 'home_id', 'away_id', 'score_home', 'home_avg_yards', 'home_avg_TO', 'score_away', 'away_avg_yards', 'away_avg_TO', 'total_score', 'over_under_line', 'label'])
scores_df.to_csv('../datasets/scores_w_avgs.csv', index=False)
scores_df

Unnamed: 0,schedule_date,schedule_season,home_id,away_id,score_home,home_avg_yards,home_avg_TO,score_away,away_avg_yards,away_avg_TO,total_score,over_under_line,label
0,1/14/1968,1967,GNB,RAI,33,304.117647,2.529412,14,361.764706,2.352941,47,43.0,over
1,1/12/1969,1968,CLT,NYJ,7,332.529412,2.529412,16,365.470588,1.823529,23,40.0,under
2,1/11/1970,1969,KAN,MIN,23,315.470588,2.588235,7,293.705882,2.235294,30,39.0,under
3,1/17/1971,1970,CLT,DAL,16,301.470588,2.529412,13,306.823529,2.000000,29,36.0,under
4,1/16/1972,1971,DAL,MIA,24,342.000000,2.176471,3,311.176471,1.705882,27,34.0,under
...,...,...,...,...,...,...,...,...,...,...,...,...,...
10391,1/17/2021,2020,KAN,CLE,22,417.117647,1.000000,17,370.882353,0.882353,39,56.0,under
10392,1/17/2021,2020,NOR,TAM,20,378.294118,1.294118,30,391.647059,0.882353,50,53.0,under
10393,1/24/2021,2020,GNB,TAM,26,386.294118,0.764706,31,392.352941,0.941176,57,53.0,over
10394,1/24/2021,2020,KAN,BUF,38,421.235294,1.058824,24,376.235294,1.176471,62,55.0,over


In [None]:
# We now have a dataset that contains the result of >10000 NFL games,
# along with each team's rolling average over the past 16 games of their
# yards per game and turnovers per game
# All rows should be filled with proper information

347.2282352941178

35.66305937489083