In [1]:
import math
import os
import pandas as pd
import numpy as np

In [2]:
odds = pd.read_csv('nflodds2020.csv')

In [3]:
home = odds.loc[odds['VH'] == 'H']
away = odds.loc[odds['VH'] == 'V']

In [4]:
home_data = home[['Date', 'Team', 'Final', 'ML']].rename(columns={'Date':'Home Date', 'Team':'Home Team', 'Final':'Home Score', 'ML':'Home ML'})
away_data = away[['Date', 'Team', 'Final', 'ML']].rename(columns={'Date':'Away Date','Team':'Away Team', 'Final':'Away Score', 'ML':'Away ML'})

In [5]:
home_data.reset_index(inplace=True, drop=True)
away_data.reset_index(inplace=True, drop=True)

In [6]:
season = pd.concat([home_data, away_data], axis=1)

In [7]:
season.head()

Unnamed: 0,Home Date,Home Team,Home Score,Home ML,Away Date,Away Team,Away Score,Away ML
0,910,KansasCity,34,-450,910,Houston,20,375
1,913,NewEngland,21,-350,913,Miami,11,290
2,913,Baltimore,38,-330,913,Cleveland,6,270
3,913,Buffalo,27,-300,913,NYJets,17,250
4,913,Carolina,30,130,913,LasVegas,34,-150


In [8]:
season.iloc[150:200]

Unnamed: 0,Home Date,Home Team,Home Score,Home ML,Away Date,Away Team,Away Score,Away ML
150,1122,Carolina,20,150,1122,Detroit,0,-170
151,1122,Houston,27,125,1122,NewEngland,20,-145
152,1122,Jacksonville,3,425,1122,Pittsburgh,27,-550
153,1122,Indianapolis,34,-120,1122,GreenBay,31,100
154,1122,Washington,20,-125,1122,Cincinnati,9,105
155,1122,Baltimore,24,-250,1122,Tennessee,30,210
156,1122,Minnesota,28,-310,1122,Dallas,31,260
157,1122,LasVegas,31,320,1122,KansasCity,35,-380
158,1122,Denver,20,180,1122,Miami,13,-210
159,1122,LAChargers,34,-500,1122,NYJets,28,400


In [9]:
np.where(season['Home Date'] != season['Away Date'])

(array([], dtype=int64),)

In [10]:
season = season[['Home Date', 'Home Team', 'Away Team', 'Home Score', 'Away Score', 'Home ML', 'Away ML']]

In [11]:
season.head()

Unnamed: 0,Home Date,Home Team,Away Team,Home Score,Away Score,Home ML,Away ML
0,910,KansasCity,Houston,34,20,-450,375
1,913,NewEngland,Miami,21,11,-350,290
2,913,Baltimore,Cleveland,38,6,-330,270
3,913,Buffalo,NYJets,27,17,-300,250
4,913,Carolina,LasVegas,30,34,130,-150


In [12]:
rest = pd.read_csv('data/season_data/2020_all_data.csv', index_col=0)
rest['Date'] = pd.to_datetime(rest['Date'])
rest.head()

Unnamed: 0,Date,Time,Datetime,Home,Away,Home Win
0,2020-09-10,8:20PM,2020-09-10 20:20:00,Kansas City Chiefs,Houston Texans,1
1,2020-09-13,1:00PM,2020-09-13 13:00:00,Atlanta Falcons,Seattle Seahawks,0
2,2020-09-13,1:00PM,2020-09-13 13:00:00,Buffalo Bills,New York Jets,1
3,2020-09-13,1:00PM,2020-09-13 13:00:00,Washington Football Team,Philadelphia Eagles,1
4,2020-09-13,1:00PM,2020-09-13 13:00:00,Minnesota Vikings,Green Bay Packers,0


In [13]:
name_mapper = {
    'Arizona':'Arizona Cardinals',
    'Atlanta':'Atlanta Falcons',
    'Baltimore':'Baltimore Ravens',
    'Buffalo': 'Buffalo Bills',
    'Carolina':'Carolina Panthers',
    'Chicago':'Chicago Bears',
    'Cincinnati': 'Cincinnati Bengals',
    'Cleveland':'Cleveland Browns',
    'Dallas':'Dallas Cowboys',
    'Denver': 'Denver Broncos',
    'Detroit': 'Detroit Lions',
    'GreenBay':'Green Bay Packers',
    'Houston':'Houston Texans',
    'Indianapolis': 'Indianapolis Colts',
    'Jacksonville': 'Jacksonville Jaguars',
    'KCChiefs': 'Kansas City Chiefs',
    'Kansas':'Kansas City Chiefs',
    'KansasCity': 'Kansas City Chiefs',
    'LAChargers': 'Los Angeles Chargers',
    'LARams': 'Los Angeles Rams',
    'LasVegas': 'Las Vegas Raiders',
    'LVRaiders': 'Las Vegas Raiders',
    'Miami': 'Miami Dolphins',
    'Minnesota': 'Minnesota Vikings',
    'NYGiants': 'New York Giants',
    'NYJets': 'New York Jets',
    'NewEngland': 'New England Patriots',
    'NewOrleans': 'New Orleans Saints',
    'Philadelphia': 'Philadelphia Eagles',
    'Pittsburgh': 'Pittsburgh Steelers',
    'SanFrancisco': 'San Francisco 49ers',
    'Seattle': 'Seattle Seahawks',
    'Tampa': 'Tampa Bay Buccaneers',
    'TampaBay': 'Tampa Bay Buccaneers',
    'Tennessee': 'Tennessee Titans',
    'Washington': 'Washington Football Team',
    'Washingtom': 'Washington Football Team'
}

In [14]:
season['Home Team'] = season['Home Team'].map(name_mapper)
season['Away Team'] = season['Away Team'].map(name_mapper)

In [15]:
def date_helper(x):
    x = str(x)
    if len(x) < 4:
        x = '0'+x
    if int(x[:2]) <= 12 and int(x[:2]) >= 6:
        x = x+'20'
    else:
        x = x+'21'
    return x

In [16]:
season['Home Date'] = season['Home Date'].apply(date_helper)
season['Home Date'] = pd.to_datetime(season['Home Date'], format='%m%d%y')
season.rename(columns={'Home Date':'Date', 'Home Team':'Home', 'Away Team':'Away'}, inplace=True)

In [17]:
season.head()

Unnamed: 0,Date,Home,Away,Home Score,Away Score,Home ML,Away ML
0,2020-09-10,Kansas City Chiefs,Houston Texans,34,20,-450,375
1,2020-09-13,New England Patriots,Miami Dolphins,21,11,-350,290
2,2020-09-13,Baltimore Ravens,Cleveland Browns,38,6,-330,270
3,2020-09-13,Buffalo Bills,New York Jets,27,17,-300,250
4,2020-09-13,Carolina Panthers,Las Vegas Raiders,30,34,130,-150


In [18]:
rest.head()

Unnamed: 0,Date,Time,Datetime,Home,Away,Home Win
0,2020-09-10,8:20PM,2020-09-10 20:20:00,Kansas City Chiefs,Houston Texans,1
1,2020-09-13,1:00PM,2020-09-13 13:00:00,Atlanta Falcons,Seattle Seahawks,0
2,2020-09-13,1:00PM,2020-09-13 13:00:00,Buffalo Bills,New York Jets,1
3,2020-09-13,1:00PM,2020-09-13 13:00:00,Washington Football Team,Philadelphia Eagles,1
4,2020-09-13,1:00PM,2020-09-13 13:00:00,Minnesota Vikings,Green Bay Packers,0


In [19]:
full_data = rest.merge(season[:256], how='inner', on=['Date', 'Home', 'Away'], sort=False)
#full_data = rest.merge(season[:256], how='inner', on=['Date', 'Home', 'Away'])

In [20]:
full_data

Unnamed: 0,Date,Time,Datetime,Home,Away,Home Win,Home Score,Away Score,Home ML,Away ML
0,2020-09-10,8:20PM,2020-09-10 20:20:00,Kansas City Chiefs,Houston Texans,1,34,20,-450,375
1,2020-09-13,1:00PM,2020-09-13 13:00:00,Atlanta Falcons,Seattle Seahawks,0,25,38,-115,-105
2,2020-09-13,1:00PM,2020-09-13 13:00:00,Buffalo Bills,New York Jets,1,27,17,-300,250
3,2020-09-13,1:00PM,2020-09-13 13:00:00,Washington Football Team,Philadelphia Eagles,1,27,17,195,-230
4,2020-09-13,1:00PM,2020-09-13 13:00:00,Minnesota Vikings,Green Bay Packers,0,34,43,-125,105
...,...,...,...,...,...,...,...,...,...,...
251,2021-01-03,4:25PM,2021-01-03 16:25:00,Chicago Bears,Green Bay Packers,0,16,35,180,-210
252,2021-01-03,4:25PM,2021-01-03 16:25:00,Kansas City Chiefs,Los Angeles Chargers,0,21,38,250,-300
253,2021-01-03,4:25PM,2021-01-03 16:25:00,Denver Broncos,Las Vegas Raiders,0,31,32,130,-150
254,2021-01-03,4:25PM,2021-01-03 16:25:00,Houston Texans,Tennessee Titans,0,38,41,280,-340


In [21]:
full_data.to_csv('data/season_data/2020_with_betting.csv')

In [22]:
rest.head()

Unnamed: 0,Date,Time,Datetime,Home,Away,Home Win
0,2020-09-10,8:20PM,2020-09-10 20:20:00,Kansas City Chiefs,Houston Texans,1
1,2020-09-13,1:00PM,2020-09-13 13:00:00,Atlanta Falcons,Seattle Seahawks,0
2,2020-09-13,1:00PM,2020-09-13 13:00:00,Buffalo Bills,New York Jets,1
3,2020-09-13,1:00PM,2020-09-13 13:00:00,Washington Football Team,Philadelphia Eagles,1
4,2020-09-13,1:00PM,2020-09-13 13:00:00,Minnesota Vikings,Green Bay Packers,0
