In [1]:
import math
import os
import pandas as pd
import numpy as np

In [2]:
odds = pd.read_csv('nflodds2019_modified.csv')

In [3]:
home = odds.loc[odds['VH'] == 'H']
away = odds.loc[odds['VH'] == 'V']

In [4]:
home_data = home[['Date', 'Team', 'Final', 'ML']].rename(columns={'Date':'Home Date', 'Team':'Home Team', 'Final':'Home Score', 'ML':'Home ML'})
away_data = away[['Date', 'Team', 'Final', 'ML']].rename(columns={'Date':'Away Date','Team':'Away Team', 'Final':'Away Score', 'ML':'Away ML'})

In [5]:
home_data.reset_index(inplace=True, drop=True)
away_data.reset_index(inplace=True, drop=True)

In [6]:
season = pd.concat([home_data, away_data], axis=1)

In [7]:
season['Home ML'] = season['Home ML'].astype('int')
season['Away ML'] = season['Away ML'].astype('int')
season['Home Date'] = season['Home Date'].astype('int')
season['Away Date'] = season['Away Date'].astype('int')

In [8]:
season.head()

Unnamed: 0,Home Date,Home Team,Home Score,Home ML,Away Date,Away Team,Away Score,Away ML
0,905,Chicago,3.0,-160,905,GreenBay,10.0,140
1,908,Minnesota,28.0,-190,908,Atlanta,12.0,160
2,908,Philadelphia,32.0,-600,908,Washington,27.0,425
3,908,NYJets,16.0,-140,908,Buffalo,17.0,120
4,908,Miami,10.0,280,908,Baltimore,59.0,-360


In [9]:
np.where(season['Home Date'] != season['Away Date'])

(array([], dtype=int64),)

In [10]:
season = season[['Home Date', 'Home Team', 'Away Team', 'Home Score', 'Away Score', 'Home ML', 'Away ML']]

In [11]:
season.head()

Unnamed: 0,Home Date,Home Team,Away Team,Home Score,Away Score,Home ML,Away ML
0,905,Chicago,GreenBay,3.0,10.0,-160,140
1,908,Minnesota,Atlanta,28.0,12.0,-190,160
2,908,Philadelphia,Washington,32.0,27.0,-600,425
3,908,NYJets,Buffalo,16.0,17.0,-140,120
4,908,Miami,Baltimore,10.0,59.0,280,-360


In [12]:
rest = pd.read_csv('data/season_data/2019_all_data.csv', index_col=0)
rest['Date'] = pd.to_datetime(rest['Date'])
rest['Home'] = np.where(rest['Home'] == 'Washington Redskins', 'Washington Football Team', rest['Home'])
rest['Away'] = np.where(rest['Away'] == 'Washington Redskins', 'Washington Football Team', rest['Away'])
rest['Home'] = np.where(rest['Home'] == 'San Diego Chargers', 'Los Angeles Chargers', rest['Home'])
rest['Away'] = np.where(rest['Away'] == 'San Diego Chargers', 'Los Angeles Chargers', rest['Away'])
rest['Home'] = np.where(rest['Home'] == 'St Louis Rams', 'Los Angeles Rams', rest['Home'])
rest['Away'] = np.where(rest['Away'] == 'St Louis Rams', 'Los Angeles Rams', rest['Away'])
rest['Home'] = np.where(rest['Home'] == 'Oakland Raiders', 'Las Vegas Raiders', rest['Home'])
rest['Away'] = np.where(rest['Away'] == 'Oakland Raiders', 'Las Vegas Raiders', rest['Away'])
rest.head()

Unnamed: 0,Date,Time,Datetime,Home,Away,Home Win
0,2019-09-05,8:20PM,2019-09-05 20:20:00,Chicago Bears,Green Bay Packers,0
1,2019-09-08,1:00PM,2019-09-08 13:00:00,Minnesota Vikings,Atlanta Falcons,1
2,2019-09-08,1:00PM,2019-09-08 13:00:00,New York Jets,Buffalo Bills,0
3,2019-09-08,1:00PM,2019-09-08 13:00:00,Philadelphia Eagles,Washington Football Team,1
4,2019-09-08,1:00PM,2019-09-08 13:00:00,Cleveland Browns,Tennessee Titans,0


In [13]:
name_mapper = {
    'Arizona':'Arizona Cardinals',
    'Atlanta':'Atlanta Falcons',
    'Baltimore':'Baltimore Ravens',
    'Buffalo': 'Buffalo Bills',
    'Carolina':'Carolina Panthers',
    'Chicago':'Chicago Bears',
    'Cincinnati': 'Cincinnati Bengals',
    'Cleveland':'Cleveland Browns',
    'Dallas':'Dallas Cowboys',
    'Denver': 'Denver Broncos',
    'Detroit': 'Detroit Lions',
    'GreenBay':'Green Bay Packers',
    'Houston':'Houston Texans',
    'Indianapolis': 'Indianapolis Colts',
    'Jacksonville': 'Jacksonville Jaguars',
    'KCChiefs': 'Kansas City Chiefs',
    'Kansas':'Kansas City Chiefs',
    'KansasCity': 'Kansas City Chiefs',
    'LAChargers': 'Los Angeles Chargers',
    'LARams': 'Los Angeles Rams',
    'LasVegas': 'Las Vegas Raiders',
    'Oakland': 'Las Vegas Raiders',
    'LVRaiders': 'Las Vegas Raiders',
    'Miami': 'Miami Dolphins',
    'Minnesota': 'Minnesota Vikings',
    'NYGiants': 'New York Giants',
    'NYJets': 'New York Jets',
    'NewEngland': 'New England Patriots',
    'NewOrleans': 'New Orleans Saints',
    'Philadelphia': 'Philadelphia Eagles',
    'Pittsburgh': 'Pittsburgh Steelers',
    'SanFrancisco': 'San Francisco 49ers',
    'Seattle': 'Seattle Seahawks',
    'Tampa': 'Tampa Bay Buccaneers',
    'TampaBay': 'Tampa Bay Buccaneers',
    'Tennessee': 'Tennessee Titans',
    'Washington': 'Washington Football Team',
    'Washingtom': 'Washington Football Team'
}

In [14]:
season['Home Team'] = season['Home Team'].map(name_mapper)
season['Away Team'] = season['Away Team'].map(name_mapper)

In [15]:
def date_helper(x):
    x = str(x)
    if len(x) < 4:
        x = '0'+x
    if int(x[:2]) <= 12 and int(x[:2]) >= 6:
        x = x+'19'
    else:
        x = x+'20'
    return x

In [16]:
season['Home Date'] = season['Home Date'].apply(date_helper)
season['Home Date'] = pd.to_datetime(season['Home Date'], format='%m%d%y')
season.rename(columns={'Home Date':'Date', 'Home Team':'Home', 'Away Team':'Away'}, inplace=True)

In [17]:
season.head()

Unnamed: 0,Date,Home,Away,Home Score,Away Score,Home ML,Away ML
0,2019-09-05,Chicago Bears,Green Bay Packers,3.0,10.0,-160,140
1,2019-09-08,Minnesota Vikings,Atlanta Falcons,28.0,12.0,-190,160
2,2019-09-08,Philadelphia Eagles,Washington Football Team,32.0,27.0,-600,425
3,2019-09-08,New York Jets,Buffalo Bills,16.0,17.0,-140,120
4,2019-09-08,Miami Dolphins,Baltimore Ravens,10.0,59.0,280,-360


In [18]:
rest.head()

Unnamed: 0,Date,Time,Datetime,Home,Away,Home Win
0,2019-09-05,8:20PM,2019-09-05 20:20:00,Chicago Bears,Green Bay Packers,0
1,2019-09-08,1:00PM,2019-09-08 13:00:00,Minnesota Vikings,Atlanta Falcons,1
2,2019-09-08,1:00PM,2019-09-08 13:00:00,New York Jets,Buffalo Bills,0
3,2019-09-08,1:00PM,2019-09-08 13:00:00,Philadelphia Eagles,Washington Football Team,1
4,2019-09-08,1:00PM,2019-09-08 13:00:00,Cleveland Browns,Tennessee Titans,0


In [19]:
full_data = rest.merge(season[:256], how='inner', on=['Date', 'Home', 'Away'], sort=False)

In [20]:
full_data

Unnamed: 0,Date,Time,Datetime,Home,Away,Home Win,Home Score,Away Score,Home ML,Away ML
0,2019-09-05,8:20PM,2019-09-05 20:20:00,Chicago Bears,Green Bay Packers,0,3.0,10.0,-160,140
1,2019-09-08,1:00PM,2019-09-08 13:00:00,Minnesota Vikings,Atlanta Falcons,1,28.0,12.0,-190,160
2,2019-09-08,1:00PM,2019-09-08 13:00:00,New York Jets,Buffalo Bills,0,16.0,17.0,-140,120
3,2019-09-08,1:00PM,2019-09-08 13:00:00,Philadelphia Eagles,Washington Football Team,1,32.0,27.0,-600,425
4,2019-09-08,1:00PM,2019-09-08 13:00:00,Cleveland Browns,Tennessee Titans,0,13.0,43.0,-265,215
...,...,...,...,...,...,...,...,...,...,...
251,2019-12-29,4:25PM,2019-12-29 16:25:00,Dallas Cowboys,Washington Football Team,1,47.0,16.0,-750,525
252,2019-12-29,4:25PM,2019-12-29 16:25:00,Denver Broncos,Las Vegas Raiders,1,16.0,15.0,-250,200
253,2019-12-29,4:25PM,2019-12-29 16:25:00,Baltimore Ravens,Pittsburgh Steelers,1,28.0,10.0,110,-130
254,2019-12-29,4:25PM,2019-12-29 16:25:00,Houston Texans,Tennessee Titans,0,14.0,35.0,350,-450


In [21]:
true_strings = []
for i in range(256):
    string1 = str(rest.loc[i, 'Date']) + rest.loc[i, 'Home'] + rest.loc[i,'Away']
    true_strings.append(string1)

missing = []
for i in range(256):
    string2 = str(full_data.loc[i, 'Date']) + full_data.loc[i, 'Home'] + full_data.loc[i, 'Away']
    missing.append(string2)

In [22]:
set(true_strings)-set(missing) #games in england

set()

In [23]:
full_data.to_csv('data/season_data/2019_with_betting.csv')