In [1]:
import pandas as pd
import numpy as np
import json
import itertools
import ast
from ast import literal_eval
from basketball_reference_web_scraper import readers as br

#### Exploration of the schedule data

In [2]:
full_df = pd.read_csv('2012_2017_schedules.csv', index_col=0)
# Make sure to localize the game start time to avoid UTC date offset errors from late PST games
full_df['start_time'] = pd.to_datetime(full_df['start_time']).dt.tz_localize('UTC').dt.tz_convert('US/Eastern')
full_df.head()

Unnamed: 0,home_team_name,home_team_score,start_time,visiting_team_name,visiting_team_score
0,Cleveland Cavaliers,94.0,2012-10-30 19:00:00-04:00,Washington Wizards,84.0
1,Los Angeles Lakers,91.0,2012-10-30 19:30:00-04:00,Dallas Mavericks,99.0
2,Miami Heat,120.0,2012-10-30 20:00:00-04:00,Boston Celtics,107.0
3,Chicago Bulls,93.0,2012-10-31 19:00:00-04:00,Sacramento Kings,87.0
4,Detroit Pistons,96.0,2012-10-31 19:30:00-04:00,Houston Rockets,105.0


In [3]:
full_df_unique_dates = full_df['start_time'].map(lambda t:t.date()).unique()
print(full_df_unique_dates)

[datetime.date(2012, 10, 30) datetime.date(2012, 10, 31)
 datetime.date(2012, 11, 1) ..., datetime.date(2018, 4, 9)
 datetime.date(2018, 4, 10) datetime.date(2018, 4, 11)]


#### Get box scores for 2012-2013 season

In [4]:
df_twelve = pd.read_csv('2012_schedule.csv', index_col=0)
df_twelve['start_time'] = pd.to_datetime(df_twelve['start_time']).dt.tz_localize('UTC').dt.tz_convert('US/Eastern')
df_twelve.head(15)

Unnamed: 0,home_team_name,home_team_score,start_time,visiting_team_name,visiting_team_score
0,Cleveland Cavaliers,94,2012-10-30 19:00:00-04:00,Washington Wizards,84
1,Los Angeles Lakers,91,2012-10-30 19:30:00-04:00,Dallas Mavericks,99
2,Miami Heat,120,2012-10-30 20:00:00-04:00,Boston Celtics,107
3,Chicago Bulls,93,2012-10-31 19:00:00-04:00,Sacramento Kings,87
4,Detroit Pistons,96,2012-10-31 19:30:00-04:00,Houston Rockets,105
5,Los Angeles Clippers,101,2012-10-31 19:30:00-04:00,Memphis Grizzlies,92
6,New Orleans Hornets,95,2012-10-31 19:00:00-04:00,San Antonio Spurs,99
7,Philadelphia 76ers,84,2012-10-31 19:00:00-04:00,Denver Nuggets,75
8,Phoenix Suns,85,2012-10-31 19:00:00-04:00,Golden State Warriors,87
9,Portland Trail Blazers,116,2012-10-31 19:30:00-04:00,Los Angeles Lakers,106


In [5]:
df_twelve_unique_dates = df_twelve['start_time'].map(lambda t:t.date()).unique()
print(df_twelve_unique_dates)

[datetime.date(2012, 10, 30) datetime.date(2012, 10, 31)
 datetime.date(2012, 11, 1) datetime.date(2012, 11, 2)
 datetime.date(2012, 11, 3) datetime.date(2012, 11, 4)
 datetime.date(2012, 11, 5) datetime.date(2012, 11, 6)
 datetime.date(2012, 11, 7) datetime.date(2012, 11, 8)
 datetime.date(2012, 11, 9) datetime.date(2012, 11, 10)
 datetime.date(2012, 11, 11) datetime.date(2012, 11, 12)
 datetime.date(2012, 11, 13) datetime.date(2012, 11, 14)
 datetime.date(2012, 11, 15) datetime.date(2012, 11, 16)
 datetime.date(2012, 11, 17) datetime.date(2012, 11, 18)
 datetime.date(2012, 11, 19) datetime.date(2012, 11, 20)
 datetime.date(2012, 11, 21) datetime.date(2012, 11, 23)
 datetime.date(2012, 11, 24) datetime.date(2012, 11, 25)
 datetime.date(2012, 11, 26) datetime.date(2012, 11, 27)
 datetime.date(2012, 11, 28) datetime.date(2012, 11, 29)
 datetime.date(2012, 11, 30) datetime.date(2012, 12, 1)
 datetime.date(2012, 12, 2) datetime.date(2012, 12, 3)
 datetime.date(2012, 12, 4) datetime.date(2

In [6]:
box_scores_twelve = []
for d in df_twelve_unique_dates:
    box_scores_twelve.append(br.return_json_encoded_box_scores_for_date(d))



 BeautifulSoup([your markup])

to this:

 BeautifulSoup([your markup], "lxml")

  markup_type=markup_type))


In [7]:
converts_twelve = []
final_box_scores_twelve = list(itertools.chain.from_iterable(box_scores_twelve))
for x in final_box_scores_twelve:
    if type(x) == str:
        converts_twelve.append(ast.literal_eval(x.replace('true','True').replace('false','False')))
    else:
        converts_twelve.append(x)
with open('box_scores_twelve.json', 'w') as _d:
    _d.write(json.dumps(final_box_scores_twelve))

In [8]:
df_box_twelve= pd.DataFrame(converts_twelve)
df_box_twelve.head()

Unnamed: 0,assists,blocks,date,defensive_rebounds,field_goal_attempts,field_goals,first_name,free_throw_attempts,free_throws,is_home,...,opponent,personal_fouls,points,seconds_played,steals,team,three_point_field_goal_attempts,three_point_field_goals,total_rebounds,turnovers
0,3,0,2012-10-30,9,16,10,LeBron,5,4,True,...,BOS,2,26,1732.0,2,MIA,4,2,10,0
1,9,2,2012-10-30,11,7,3,Anderson,3,3,True,...,WAS,4,9,2242.0,0,CLE,0,0,23,1
2,6,3,2012-10-30,9,19,8,Pau,8,6,True,...,DAL,2,23,2419.0,0,LAL,1,1,13,2
3,5,0,2012-10-30,5,15,6,Paul,9,9,False,...,MIA,3,23,2434.0,2,BOS,4,2,5,0
4,4,1,2012-10-30,1,22,10,Dwyane,11,9,True,...,BOS,3,29,2107.0,2,MIA,0,0,3,4


In [11]:
df_box_twelve.to_csv('2012_box_scores.csv')

#### Get box scores for the 2013-2014 season

In [12]:
df_thirteen = pd.read_csv('2013_schedule.csv', index_col=0)
df_thirteen['start_time'] = pd.to_datetime(df_thirteen['start_time']).dt.tz_localize('UTC').dt.tz_convert('US/Eastern')
df_thirteen.head()

Unnamed: 0,home_team_name,home_team_score,start_time,visiting_team_name,visiting_team_score
0,Indiana Pacers,97,2013-10-29 19:00:00-04:00,Orlando Magic,87
1,Los Angeles Lakers,116,2013-10-29 22:30:00-04:00,Los Angeles Clippers,103
2,Miami Heat,107,2013-10-29 20:00:00-04:00,Chicago Bulls,95
3,Cleveland Cavaliers,98,2013-10-30 19:00:00-04:00,Brooklyn Nets,94
4,Dallas Mavericks,118,2013-10-30 20:30:00-04:00,Atlanta Hawks,109


In [13]:
df_thirteen_unique_dates = df_thirteen['start_time'].map(lambda t:t.date()).unique()
print(df_thirteen_unique_dates)

[datetime.date(2013, 10, 29) datetime.date(2013, 10, 30)
 datetime.date(2013, 10, 31) datetime.date(2013, 11, 1)
 datetime.date(2013, 11, 2) datetime.date(2013, 11, 3)
 datetime.date(2013, 11, 4) datetime.date(2013, 11, 5)
 datetime.date(2013, 11, 6) datetime.date(2013, 11, 7)
 datetime.date(2013, 11, 8) datetime.date(2013, 11, 9)
 datetime.date(2013, 11, 10) datetime.date(2013, 11, 11)
 datetime.date(2013, 11, 12) datetime.date(2013, 11, 13)
 datetime.date(2013, 11, 14) datetime.date(2013, 11, 15)
 datetime.date(2013, 11, 16) datetime.date(2013, 11, 17)
 datetime.date(2013, 11, 18) datetime.date(2013, 11, 19)
 datetime.date(2013, 11, 20) datetime.date(2013, 11, 21)
 datetime.date(2013, 11, 22) datetime.date(2013, 11, 23)
 datetime.date(2013, 11, 24) datetime.date(2013, 11, 25)
 datetime.date(2013, 11, 26) datetime.date(2013, 11, 27)
 datetime.date(2013, 11, 29) datetime.date(2013, 11, 30)
 datetime.date(2013, 12, 1) datetime.date(2013, 12, 2)
 datetime.date(2013, 12, 3) datetime.date(

In [14]:
box_scores_thirteen = []
for e in df_thirteen_unique_dates:
    experiment = br.return_json_encoded_box_scores_for_date(e)
    box_scores_thirteen.append(experiment)

In [15]:
converts_thirteen = []
final_box_scores_thirteen = list(itertools.chain.from_iterable(box_scores_thirteen))
for y in final_box_scores_thirteen:
    if type(y) == str:
        converts_thirteen.append(ast.literal_eval(y.replace('true','True').replace('false','False')))
    else:
        converts_thirteen.append(y)
with open('box_scores_thirteen.json', 'w') as _e:
    _e.write(json.dumps(final_box_scores_thirteen))

In [16]:
df_box_thirteen = pd.DataFrame(converts_thirteen)
df_box_thirteen.head()

Unnamed: 0,assists,blocks,date,defensive_rebounds,field_goal_attempts,field_goals,first_name,free_throw_attempts,free_throws,is_home,...,opponent,personal_fouls,points,seconds_played,steals,team,three_point_field_goal_attempts,three_point_field_goals,total_rebounds,turnovers
0,2,1,2013-10-29,4,18,13,Carlos,5,5,False,...,MIA,4,31,1944.0,1,CHI,0,0,7,2
1,1,3,2013-10-29,7,10,8,DeAndre,2,1,False,...,LAL,5,17,2129.0,3,LAC,0,0,11,1
2,5,3,2013-10-29,5,16,8,Paul,7,5,True,...,ORL,0,24,2165.0,1,IND,6,3,6,4
3,11,0,2013-10-29,6,13,5,Chris,5,5,False,...,LAL,4,15,2158.0,5,LAC,2,0,6,1
4,3,1,2013-10-29,3,12,6,Jimmy,7,6,False,...,MIA,4,20,1795.0,5,CHI,4,2,3,2


In [17]:
df_box_thirteen.to_csv('2013_box_scores.csv')

#### Get box scores for 2014-2015 season

In [19]:
df_fourteen = pd.read_csv('2014_schedule.csv', index_col=0)
df_fourteen['start_time'] = pd.to_datetime(df_fourteen['start_time']).dt.tz_localize('UTC').dt.tz_convert('US/Eastern')
df_fourteen.head()

Unnamed: 0,home_team_name,home_team_score,start_time,visiting_team_name,visiting_team_score
0,Los Angeles Lakers,90,2014-10-28 22:30:00-04:00,Houston Rockets,108
1,New Orleans Pelicans,101,2014-10-28 20:00:00-04:00,Orlando Magic,84
2,San Antonio Spurs,101,2014-10-28 20:00:00-04:00,Dallas Mavericks,100
3,Boston Celtics,121,2014-10-29 19:30:00-04:00,Brooklyn Nets,105
4,Charlotte Hornets,108,2014-10-29 19:00:00-04:00,Milwaukee Bucks,106


In [20]:
df_fourteen_unique_dates = df_fourteen['start_time'].map(lambda t:t.date()).unique()
print(df_fourteen_unique_dates)

[datetime.date(2014, 10, 28) datetime.date(2014, 10, 29)
 datetime.date(2014, 10, 30) datetime.date(2014, 10, 31)
 datetime.date(2014, 11, 1) datetime.date(2014, 11, 2)
 datetime.date(2014, 11, 3) datetime.date(2014, 11, 4)
 datetime.date(2014, 11, 5) datetime.date(2014, 11, 6)
 datetime.date(2014, 11, 7) datetime.date(2014, 11, 8)
 datetime.date(2014, 11, 9) datetime.date(2014, 11, 10)
 datetime.date(2014, 11, 11) datetime.date(2014, 11, 12)
 datetime.date(2014, 11, 13) datetime.date(2014, 11, 14)
 datetime.date(2014, 11, 15) datetime.date(2014, 11, 16)
 datetime.date(2014, 11, 17) datetime.date(2014, 11, 18)
 datetime.date(2014, 11, 19) datetime.date(2014, 11, 20)
 datetime.date(2014, 11, 21) datetime.date(2014, 11, 22)
 datetime.date(2014, 11, 23) datetime.date(2014, 11, 24)
 datetime.date(2014, 11, 25) datetime.date(2014, 11, 26)
 datetime.date(2014, 11, 28) datetime.date(2014, 11, 29)
 datetime.date(2014, 11, 30) datetime.date(2014, 12, 1)
 datetime.date(2014, 12, 2) datetime.date

In [21]:
box_scores_fourteen = []
for f in df_fourteen_unique_dates:
    box_scores_fourteen.append(br.return_json_encoded_box_scores_for_date(f))

In [22]:
converts_fourteen = []
final_box_scores_fourteen = list(itertools.chain.from_iterable(box_scores_fourteen))
for z in final_box_scores_fourteen:
    if type(z) == str:
        converts_fourteen.append(ast.literal_eval(z.replace('true','True').replace('false','False')))
    else:
        converts_fourteen.append(z)
with open('box_scores_fourteen.json', 'w') as _f:
    _f.write(json.dumps(final_box_scores_fourteen))

In [23]:
df_box_fourteen = pd.DataFrame(converts_fourteen)
df_box_fourteen.head()

Unnamed: 0,assists,blocks,date,defensive_rebounds,field_goal_attempts,field_goals,first_name,free_throw_attempts,free_throws,is_home,...,opponent,personal_fouls,points,seconds_played,steals,team,three_point_field_goal_attempts,three_point_field_goals,total_rebounds,turnovers
0,2,9,2014-10-28,8,22,10,Anthony,9,6,True,...,ORL,1,26,2180.0,3,NOP,0,0,17,0
1,6,0,2014-10-28,1,17,7,James,16,15,False,...,LAL,1,32,1846.0,1,HOU,6,3,1,0
2,6,0,2014-10-28,3,21,11,Monta,4,3,False,...,SAS,0,26,2233.0,1,DAL,2,1,4,3
3,1,4,2014-10-28,18,15,7,Nikola,2,1,False,...,NOP,2,15,2103.0,1,ORL,0,0,23,1
4,3,0,2014-10-28,2,15,9,Tony,2,1,True,...,DAL,2,23,2090.0,0,SAS,4,4,3,1


In [25]:
df_box_fourteen.to_csv('2014_box_scores.csv')

#### Get box scores for 2015-2016 season

In [26]:
df_fifteen = pd.read_csv('2015_schedule.csv', index_col=0)
df_fifteen['start_time'] = pd.to_datetime(df_fifteen['start_time']).dt.tz_localize('UTC').dt.tz_convert('US/Eastern')
df_fifteen.head()

Unnamed: 0,home_team_name,home_team_score,start_time,visiting_team_name,visiting_team_score
0,Atlanta Hawks,94,2015-10-27 20:00:00-04:00,Detroit Pistons,106
1,Chicago Bulls,97,2015-10-27 20:00:00-04:00,Cleveland Cavaliers,95
2,Golden State Warriors,111,2015-10-27 22:30:00-04:00,New Orleans Pelicans,95
3,Boston Celtics,112,2015-10-28 19:30:00-04:00,Philadelphia 76ers,95
4,Brooklyn Nets,100,2015-10-28 19:30:00-04:00,Chicago Bulls,115


In [27]:
df_fifteen_unique_dates = df_fifteen['start_time'].map(lambda t:t.date()).unique()
print(df_fifteen_unique_dates)

[datetime.date(2015, 10, 27) datetime.date(2015, 10, 28)
 datetime.date(2015, 10, 29) datetime.date(2015, 10, 30)
 datetime.date(2015, 10, 31) datetime.date(2015, 11, 1)
 datetime.date(2015, 11, 2) datetime.date(2015, 11, 3)
 datetime.date(2015, 11, 4) datetime.date(2015, 11, 5)
 datetime.date(2015, 11, 6) datetime.date(2015, 11, 7)
 datetime.date(2015, 11, 8) datetime.date(2015, 11, 9)
 datetime.date(2015, 11, 10) datetime.date(2015, 11, 11)
 datetime.date(2015, 11, 12) datetime.date(2015, 11, 13)
 datetime.date(2015, 11, 14) datetime.date(2015, 11, 15)
 datetime.date(2015, 11, 16) datetime.date(2015, 11, 17)
 datetime.date(2015, 11, 18) datetime.date(2015, 11, 19)
 datetime.date(2015, 11, 20) datetime.date(2015, 11, 21)
 datetime.date(2015, 11, 22) datetime.date(2015, 11, 23)
 datetime.date(2015, 11, 24) datetime.date(2015, 11, 25)
 datetime.date(2015, 11, 27) datetime.date(2015, 11, 28)
 datetime.date(2015, 11, 29) datetime.date(2015, 11, 30)
 datetime.date(2015, 12, 1) datetime.dat

In [28]:
box_scores_fifteen = []
for g in df_fifteen_unique_dates:
    box_scores_fifteen.append(br.return_json_encoded_box_scores_for_date(g))

In [29]:
converts_fifteen = []
final_box_scores_fifteen = list(itertools.chain.from_iterable(box_scores_fifteen))
for aa in final_box_scores_fifteen:
    if type(aa) == str:
        converts_fifteen.append(ast.literal_eval(aa.replace('true','True').replace('false','False')))
    else:
        converts_fifteen.append(aa)
with open('box_scores_fifteen.json', 'w') as _g:
    _g.write(json.dumps(final_box_scores_fifteen))

In [30]:
df_box_fifteen = pd.DataFrame(converts_fifteen)
df_box_fifteen.head()

Unnamed: 0,assists,blocks,date,defensive_rebounds,field_goal_attempts,field_goals,first_name,free_throw_attempts,free_throws,is_home,...,opponent,personal_fouls,points,seconds_played,steals,team,three_point_field_goal_attempts,three_point_field_goals,total_rebounds,turnovers
0,7,0,2015-10-27,3,26,14,Stephen,7,7,True,...,NOP,1,40,2137.0,2,GSW,12,5,6,2
1,5,0,2015-10-27,10,22,12,LeBron,3,0,False,...,CHI,3,25,2164.0,1,CLE,5,1,10,1
2,4,0,2015-10-27,2,14,8,Dennis,2,2,True,...,DET,1,20,1522.0,2,ATL,5,2,3,1
3,3,2,2015-10-27,11,16,6,Andre,10,6,False,...,ATL,4,18,2229.0,1,DET,0,0,19,2
4,7,1,2015-10-27,4,15,7,Mo,3,2,False,...,CHI,2,19,2229.0,1,CLE,7,3,4,1


In [32]:
df_box_fifteen.to_csv('2015_box_scores.csv')

#### Get box scores for 2016-2017 season

In [33]:
df_sixteen = pd.read_csv('2016_schedule.csv', index_col=0)
df_sixteen['start_time'] = pd.to_datetime(df_sixteen['start_time']).dt.tz_localize('UTC').dt.tz_convert('US/Eastern')
df_sixteen.head()

Unnamed: 0,home_team_name,home_team_score,start_time,visiting_team_name,visiting_team_score
0,Cleveland Cavaliers,117,2016-10-25 19:30:00-04:00,New York Knicks,88
1,Golden State Warriors,100,2016-10-25 22:30:00-04:00,San Antonio Spurs,129
2,Portland Trail Blazers,113,2016-10-25 22:00:00-04:00,Utah Jazz,104
3,Boston Celtics,122,2016-10-26 19:30:00-04:00,Brooklyn Nets,117
4,Indiana Pacers,130,2016-10-26 19:00:00-04:00,Dallas Mavericks,121


In [34]:
df_sixteen_unique_dates = df_sixteen['start_time'].map(lambda t:t.date()).unique()
print(df_sixteen_unique_dates)

[datetime.date(2016, 10, 25) datetime.date(2016, 10, 26)
 datetime.date(2016, 10, 27) datetime.date(2016, 10, 28)
 datetime.date(2016, 10, 29) datetime.date(2016, 10, 30)
 datetime.date(2016, 10, 31) datetime.date(2016, 11, 1)
 datetime.date(2016, 11, 2) datetime.date(2016, 11, 3)
 datetime.date(2016, 11, 4) datetime.date(2016, 11, 5)
 datetime.date(2016, 11, 6) datetime.date(2016, 11, 7)
 datetime.date(2016, 11, 8) datetime.date(2016, 11, 9)
 datetime.date(2016, 11, 10) datetime.date(2016, 11, 11)
 datetime.date(2016, 11, 12) datetime.date(2016, 11, 13)
 datetime.date(2016, 11, 14) datetime.date(2016, 11, 15)
 datetime.date(2016, 11, 16) datetime.date(2016, 11, 17)
 datetime.date(2016, 11, 18) datetime.date(2016, 11, 19)
 datetime.date(2016, 11, 20) datetime.date(2016, 11, 21)
 datetime.date(2016, 11, 22) datetime.date(2016, 11, 23)
 datetime.date(2016, 11, 25) datetime.date(2016, 11, 26)
 datetime.date(2016, 11, 27) datetime.date(2016, 11, 28)
 datetime.date(2016, 11, 29) datetime.da

In [41]:
box_scores_sixteen = []
for h in df_sixteen_unique_dates:
    box_scores_sixteen.append(br.return_json_encoded_box_scores_for_date(h))

In [42]:
converts_sixteen = []
final_box_scores_sixteen = list(itertools.chain.from_iterable(box_scores_sixteen))
for bb in final_box_scores_sixteen:
    if type(bb) == str:
        converts_sixteen.append(ast.literal_eval(bb.replace('true','True').replace('false','False')))
    else:
        converts_sixteen.append(bb)
with open('box_scores_sixteen.json', 'w') as _h:
    _h.write(json.dumps(final_box_scores_sixteen))

In [43]:
df_box_sixteen = pd.DataFrame(converts_sixteen)
df_box_sixteen.head()

Unnamed: 0,assists,blocks,date,defensive_rebounds,field_goal_attempts,field_goals,first_name,free_throw_attempts,free_throws,is_home,...,opponent,personal_fouls,points,seconds_played,steals,team,three_point_field_goal_attempts,three_point_field_goals,total_rebounds,turnovers
0,6,0,2016-10-25,8,20,13,Damian,9,9,True,...,UTA,3,39,2125.0,0,POR,6,4,9,4
1,3,0,2016-10-25,2,21,10,Kawhi,15,15,False,...,GSW,1,35,1923.0,5,SAS,3,0,5,2
2,4,2,2016-10-25,10,18,11,Kevin,5,4,True,...,SAS,2,27,2196.0,2,GSW,3,1,10,0
3,3,0,2016-10-25,4,16,12,Joe,2,2,False,...,POR,0,29,1830.0,1,UTA,4,3,4,2
4,4,0,2016-10-25,1,22,12,Kyrie,1,1,True,...,NYK,1,29,1788.0,2,CLE,7,4,2,0


In [45]:
df_box_sixteen.to_csv('2016_box_scores.csv')

#### Get box scores for 2017-2018 season (incomplete)

In [46]:
df_seventeen = pd.read_csv('2017_schedule.csv', index_col=0)
df_seventeen['start_time'] = pd.to_datetime(df_seventeen['start_time']).dt.tz_localize('UTC').dt.tz_convert('US/Eastern')
df_seventeen.head()

Unnamed: 0,home_team_name,home_team_score,start_time,visiting_team_name,visiting_team_score
0,Cleveland Cavaliers,102.0,2017-10-17 20:01:00-04:00,Boston Celtics,99.0
1,Golden State Warriors,121.0,2017-10-17 22:30:00-04:00,Houston Rockets,122.0
2,Boston Celtics,100.0,2017-10-18 19:30:00-04:00,Milwaukee Bucks,108.0
3,Dallas Mavericks,111.0,2017-10-18 20:30:00-04:00,Atlanta Hawks,117.0
4,Detroit Pistons,102.0,2017-10-18 19:00:00-04:00,Charlotte Hornets,90.0


In [47]:
df_seventeen_unique_dates = df_seventeen['start_time'].map(lambda t:t.date()).unique()
print(df_seventeen_unique_dates)

[datetime.date(2017, 10, 17) datetime.date(2017, 10, 18)
 datetime.date(2017, 10, 19) datetime.date(2017, 10, 20)
 datetime.date(2017, 10, 21) datetime.date(2017, 10, 22)
 datetime.date(2017, 10, 23) datetime.date(2017, 10, 24)
 datetime.date(2017, 10, 25) datetime.date(2017, 10, 26)
 datetime.date(2017, 10, 27) datetime.date(2017, 10, 28)
 datetime.date(2017, 10, 29) datetime.date(2017, 10, 30)
 datetime.date(2017, 10, 31) datetime.date(2017, 11, 1)
 datetime.date(2017, 11, 2) datetime.date(2017, 11, 3)
 datetime.date(2017, 11, 4) datetime.date(2017, 11, 5)
 datetime.date(2017, 11, 6) datetime.date(2017, 11, 7)
 datetime.date(2017, 11, 8) datetime.date(2017, 11, 9)
 datetime.date(2017, 11, 10) datetime.date(2017, 11, 11)
 datetime.date(2017, 11, 12) datetime.date(2017, 11, 13)
 datetime.date(2017, 11, 14) datetime.date(2017, 11, 15)
 datetime.date(2017, 11, 16) datetime.date(2017, 11, 17)
 datetime.date(2017, 11, 18) datetime.date(2017, 11, 19)
 datetime.date(2017, 11, 20) datetime.da

In [48]:
box_scores_seventeen = []
for i in df_seventeen_unique_dates:
    box_scores_seventeen.append(br.return_json_encoded_box_scores_for_date(i))

In [49]:
converts_seventeen = []
final_box_scores_seventeen = list(itertools.chain.from_iterable(box_scores_seventeen))
for cc in final_box_scores_seventeen:
    if type(cc) == str:
        converts_seventeen.append(ast.literal_eval(cc.replace('true','True').replace('false','False')))
    else:
        converts_seventeen.append(cc)
with open('box_scores_seventeen.json', 'w') as _i:
    _i.write(json.dumps(final_box_scores_seventeen))

In [50]:
df_box_seventeen = pd.DataFrame(converts_seventeen)
df_box_seventeen.head()

Unnamed: 0,assists,blocks,date,defensive_rebounds,field_goal_attempts,field_goals,first_name,free_throw_attempts,free_throws,is_home,...,opponent,personal_fouls,points,seconds_played,steals,team,three_point_field_goal_attempts,three_point_field_goals,total_rebounds,turnovers
0,9,2,2017-10-17,15,19,12,LeBron,4,4,True,...,BOS,3,29,2472.0,0,CLE,5,1,16,4
1,10,0,2017-10-17,2,17,8,Kyrie,2,2,False,...,CLE,4,22,2361.0,3,BOS,9,4,4,2
2,11,0,2017-10-17,5,23,10,James,4,3,False,...,GSW,2,27,2184.0,1,HOU,9,4,6,3
3,0,0,2017-10-17,1,9,8,Nick,1,1,True,...,HOU,4,23,1570.0,0,GSW,7,6,2,1
4,1,2,2017-10-17,1,16,9,Eric,8,6,False,...,GSW,1,24,1763.0,1,HOU,6,0,1,2


In [52]:
df_box_seventeen.to_csv('2017_box_scores.csv')

#### Merge box scores into single dataframe

In [53]:
df_twelve_c = pd.read_csv('2012_box_scores.csv', index_col=0)

In [54]:
df_thirteen_c = pd.read_csv('2013_box_scores.csv', index_col=0)

In [55]:
df_fourteen_c = pd.read_csv('2014_box_scores.csv', index_col=0)

In [56]:
df_fifteen_c = pd.read_csv('2015_box_scores.csv', index_col=0)

In [57]:
df_sixteen_c = pd.read_csv('2016_box_scores.csv', index_col=0)

In [58]:
df_seventeen_c = pd.read_csv('2017_box_scores.csv', index_col=0)

In [59]:
df_box_scores_final_all = pd.concat([df_twelve_c, df_thirteen_c, df_fourteen_c, df_fifteen_c, 
                                     df_sixteen_c, df_seventeen_c])
df_box_scores_final_all.head()

Unnamed: 0,assists,blocks,date,defensive_rebounds,field_goal_attempts,field_goals,first_name,free_throw_attempts,free_throws,is_home,...,opponent,personal_fouls,points,seconds_played,steals,team,three_point_field_goal_attempts,three_point_field_goals,total_rebounds,turnovers
0,3,0,2012-10-30,9,16,10,LeBron,5,4,True,...,BOS,2,26,1732.0,2,MIA,4,2,10,0
1,9,2,2012-10-30,11,7,3,Anderson,3,3,True,...,WAS,4,9,2242.0,0,CLE,0,0,23,1
2,6,3,2012-10-30,9,19,8,Pau,8,6,True,...,DAL,2,23,2419.0,0,LAL,1,1,13,2
3,5,0,2012-10-30,5,15,6,Paul,9,9,False,...,MIA,3,23,2434.0,2,BOS,4,2,5,0
4,4,1,2012-10-30,1,22,10,Dwyane,11,9,True,...,BOS,3,29,2107.0,2,MIA,0,0,3,4


In [60]:
df_box_scores_final_all.to_csv('2012_2017_box_scores.csv')