In [27]:
import datetime
import time
import functools

import pandas as pd
import numpy as np

import nba_py
import nba_py.game
import nba_py.player
import nba_py.team

import pymysql
from sqlalchemy import create_engine

In [15]:
conn = create_engine('mysql+pymysql://root:@localhost:3306/nba_stats')

try:
    # read sql table of game header
    game_header = pd.read_sql_table('game_header', conn)
    length_1 = len(game_header)
    print(str(length_1) + ' games loaded.')
    # set begin date to the newest date in sql table
    begin = datetime.datetime.strptime(game_header.iloc[-1]['GAME_DATE_EST'][:10], "%Y-%m-%d").date()
except ValueError:
    print('no table yet!')
    length_1 = 0
    # if no table yet, set begin date to 2012-10-29
    begin = datetime.date(2012, 10, 29)
    # grab game headers of begining date
    game_header = nba_py.Scoreboard(month = begin.month, 
                                        day = begin.day, 
                                        year = begin.year, league_id = '00', offset = 0).game_header()

# set end date to today
end = datetime.date.today()

for i in range((end - begin).days + 1):
    # grab game headers from begin date to end date
    day = begin + datetime.timedelta(days = i)
    game_header = game_header.append(nba_py.Scoreboard(month = day.month, 
                                                       day = day.day, 
                                                       year = day.year, 
                                                       league_id = '00', 
                                                       offset = 0).game_header())
    print(str(day) + ' finished!    ' + str(datetime.datetime.now().time())[:8])

length_2 = len(game_header)
# drop the duplicate by game id
game_header = game_header.drop_duplicates('GAME_ID')
length_3 = len(game_header)
print(str(length_2 - length_3) + ' duplicates droped.')
print(str(length_3 - length_1) + ' games added.')

# sort game headers by game id ascending
game_header = game_header.sort_values('GAME_ID')

# commit new game headers to sql table
game_header.to_sql('game_header', conn, index = False, if_exists = 'replace')
print(str(length_3) + ' game headers commit complete!')

7019 games loaded.
2017-06-12 finished!
2017-06-13 finished!
2017-06-14 finished!
2017-06-15 finished!
2017-06-16 finished!
2017-06-17 finished!
2017-06-18 finished!
2017-06-19 finished!
2017-06-20 finished!
2017-06-21 finished!
2017-06-22 finished!
2017-06-23 finished!
2017-06-24 finished!
2017-06-25 finished!
2017-06-26 finished!
2017-06-27 finished!
2017-06-28 finished!
2017-06-29 finished!
2017-06-30 finished!
2017-07-01 finished!
2017-07-02 finished!
2017-07-03 finished!
2017-07-04 finished!
2017-07-05 finished!
2017-07-06 finished!
2017-07-07 finished!
2017-07-08 finished!
2017-07-09 finished!
2017-07-10 finished!
2017-07-11 finished!
2017-07-12 finished!
2017-07-13 finished!
2017-07-14 finished!
2017-07-15 finished!
2017-07-16 finished!
2017-07-17 finished!
2017-07-18 finished!
2017-07-19 finished!
2017-07-20 finished!
2017-07-21 finished!
2017-07-22 finished!
2017-07-23 finished!
2017-07-24 finished!
2017-07-25 finished!
2017-07-26 finished!
2017-07-27 finished!
2017-07-28 fini

In [16]:
game_header

Unnamed: 0,GAME_DATE_EST,GAME_SEQUENCE,GAME_ID,GAME_STATUS_ID,GAME_STATUS_TEXT,GAMECODE,HOME_TEAM_ID,VISITOR_TEAM_ID,SEASON,LIVE_PERIOD,LIVE_PC_TIME,NATL_TV_BROADCASTER_ABBREVIATION,LIVE_PERIOD_TIME_BCAST,WH_STATUS
1315,2013-10-05T00:00:00,1,0011300001,3,Final,20131005/OKCFBU,12321,1610612760,2013,4,,NBA TV,Q4 - NBA TV,1
1316,2013-10-05T00:00:00,2,0011300002,3,Final,20131005/CHIIND,1610612754,1610612741,2013,4,,NBA TV,Q4 - NBA TV,1
1317,2013-10-05T00:00:00,3,0011300003,3,Final,20131005/NOPHOU,1610612745,1610612740,2013,4,,,Q4 -,1
1318,2013-10-05T00:00:00,4,0011300004,3,Final,20131005/GSWLAL,1610612747,1610612744,2013,4,,NBA TV,Q4 - NBA TV,1
1319,2013-10-06T00:00:00,1,0011300005,3,Final,20131006/PHIUBB,12324,1610612755,2013,4,,NBA TV,Q4 - NBA TV,1
1320,2013-10-06T00:00:00,2,0011300006,3,Final,20131006/DENLAL,1610612747,1610612743,2013,4,,NBA TV,Q4 - NBA TV,1
1323,2013-10-07T00:00:00,3,0011300007,3,Final,20131007/MEMCHI,1610612741,1610612763,2013,4,,,Q4 -,1
1324,2013-10-07T00:00:00,4,0011300008,3,Final,20131007/MOSMIN,1610612750,12308,2013,5,,,Q5 -,1
1326,2013-10-07T00:00:00,6,0011300009,3,Final,20131007/MACPHX,1610612756,93,2013,4,,,Q4 -,1
1327,2013-10-07T00:00:00,7,0011300010,3,Final,20131007/LACPOR,1610612757,1610612746,2013,4,,,Q4 -,1


In [49]:
try:
    # read sql table of game stats logs
    game_stats_logs = pd.read_sql_table('game_stats_logs', conn)
    length_1 = len(game_stats_logs)
    print(str(length_1) + ' player stats loaded.')
except ValueError:
    print('no table yet!')
    length_1 = 0
    # if there is no table yet, create one by getting the first game stats
    initial_game = game_header['GAME_ID'].min()
    game_stats = nba_py.game.Boxscore(initial_game).player_stats()
    # get home team stats
    home_team_id = int(game_header[game_header['GAME_ID'] == initial_game]['HOME_TEAM_ID'])
    home_stats_logs = game_stats[game_stats['TEAM_ID'] == int(home_team_id)].copy()
    # set home team location
    home_stats_logs['LOCATION'] = 'HOME'
    # set home team against team id
    home_stats_logs['AGAINST_TEAM_ID'] = int(game_header[game_header['GAME_ID'] == initial_game]['VISITOR_TEAM_ID'])
    # get away team stats
    away_team_id = int(game_header[game_header['GAME_ID'] == initial_game]['VISITOR_TEAM_ID'])
    away_stats_logs = game_stats[game_stats['TEAM_ID'] == int(away_team_id)].copy()
    # set away team location
    away_stats_logs['LOCATION'] = 'AWAY'
    # set away team against team id
    away_stats_logs['AGAINST_TEAM_ID'] = int(game_header[game_header['GAME_ID'] == initial_game]['HOME_TEAM_ID'])
    # concat home team stats and away team stats
    game_stats_logs = pd.concat([home_stats_logs, away_stats_logs])
    # commit initialized game stats logs to sql table
    game_stats_logs.to_sql('game_stats_logs', conn, index = False, if_exists = 'replace')
    print('game stats logs initialized!')

# for game id in game headers from the max one in sql table
for i in game_header[game_header['GAME_ID'] >= game_stats_logs['GAME_ID'].max()]['GAME_ID'][:300]:
    game_stats = nba_py.game.Boxscore(i).player_stats()
    home_team_id = int(game_header[game_header['GAME_ID'] == i]['HOME_TEAM_ID'])
    home_stats_logs = game_stats[game_stats['TEAM_ID'] == int(home_team_id)].copy()
    home_stats_logs['LOCATION'] = 'HOME'
    home_stats_logs['AGAINST_TEAM_ID'] = int(game_header[game_header['GAME_ID'] == i]['VISITOR_TEAM_ID'])
    away_team_id = int(game_header[game_header['GAME_ID'] == i]['VISITOR_TEAM_ID'])
    away_stats_logs = game_stats[game_stats['TEAM_ID'] == int(away_team_id)].copy()
    away_stats_logs['LOCATION'] = 'AWAY'
    away_stats_logs['AGAINST_TEAM_ID'] = int(game_header[game_header['GAME_ID'] == i]['HOME_TEAM_ID'])
    game_stats_logs = game_stats_logs.append(home_stats_logs)
    game_stats_logs = game_stats_logs.append(away_stats_logs)
    print('game ' + i + ' added!    ' + str(datetime.datetime.now().time())[:8])

length_2 = len(game_stats_logs)
# drop duplicate game stats by game id and player id
game_stats_logs = game_stats_logs.drop_duplicates(['GAME_ID', 'PLAYER_ID'])
length_3 = len(game_stats_logs)
print(str(length_2 - length_3) + ' duplicates droped.')
print(str(length_3 - length_1) + ' player stats added.')

# commit new game stats logs to sql table
game_stats_logs.to_sql('game_stats_logs', conn, index = False, if_exists = 'replace')
print(str(length_3) + ' player stats commit complete!')

40866 player stats loaded.
game 0021201043 added!    23:47:04
game 0021201044 added!    23:47:06
game 0021201045 added!    23:47:08
game 0021201046 added!    23:47:10
game 0021201047 added!    23:47:12
game 0021201048 added!    23:47:14
game 0021201049 added!    23:47:16
game 0021201050 added!    23:47:18
game 0021201051 added!    23:47:20
game 0021201052 added!    23:47:22
game 0021201053 added!    23:47:24
game 0021201054 added!    23:47:26
game 0021201055 added!    23:47:29
game 0021201056 added!    23:47:30
game 0021201057 added!    23:47:33
game 0021201058 added!    23:47:34
game 0021201059 added!    23:47:36
game 0021201060 added!    23:47:38
game 0021201061 added!    23:47:40
game 0021201062 added!    23:47:42
game 0021201063 added!    23:47:44
game 0021201064 added!    23:47:46
game 0021201065 added!    23:47:48
game 0021201066 added!    23:47:50
game 0021201067 added!    23:47:51
game 0021201068 added!    23:47:53
game 0021201069 added!    23:47:54
game 0021201070 added!    23

game 0021300048 added!    23:54:55
game 0021300049 added!    23:54:57
game 0021300050 added!    23:54:59
game 0021300051 added!    23:55:01
game 0021300052 added!    23:55:03
game 0021300053 added!    23:55:05
game 0021300054 added!    23:55:07
game 0021300055 added!    23:55:09
game 0021300056 added!    23:55:11
game 0021300057 added!    23:55:13
game 0021300058 added!    23:55:15
game 0021300059 added!    23:55:17
game 0021300060 added!    23:55:19
game 0021300061 added!    23:55:21
game 0021300062 added!    23:55:23
game 0021300063 added!    23:55:25
game 0021300064 added!    23:55:26
game 0021300065 added!    23:55:28
game 0021300066 added!    23:55:29
game 0021300067 added!    23:55:32
game 0021300068 added!    23:55:34
game 0021300069 added!    23:55:36
game 0021300070 added!    23:55:38
game 0021300071 added!    23:55:40
game 0021300072 added!    23:55:42
game 0021300073 added!    23:55:43
game 0021300074 added!    23:55:45
game 0021300075 added!    23:55:47
game 0021300076 adde

In [70]:
game_stats_logs

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_CITY,PLAYER_ID,PLAYER_NAME,START_POSITION,COMMENT,MIN,FGM,...,REB,AST,STL,BLK,TO,PF,PTS,PLUS_MINUS,LOCATION,AGAINST_TEAM_ID
0,0021200001,1610612739,CLE,Cleveland,202087,Alonzo Gee,F,,33:53,2.0,...,3.0,2.0,2.0,0.0,3.0,5.0,4.0,14.0,HOME,1610612764
1,0021200001,1610612739,CLE,Cleveland,202684,Tristan Thompson,F,,31:31,5.0,...,10.0,5.0,1.0,0.0,2.0,2.0,12.0,20.0,HOME,1610612764
2,0021200001,1610612739,CLE,Cleveland,2760,Anderson Varejao,C,,37:22,3.0,...,23.0,9.0,0.0,2.0,1.0,4.0,9.0,7.0,HOME,1610612764
3,0021200001,1610612739,CLE,Cleveland,203079,Dion Waiters,G,,28:14,6.0,...,2.0,0.0,3.0,0.0,3.0,0.0,17.0,13.0,HOME,1610612764
4,0021200001,1610612739,CLE,Cleveland,202681,Kyrie Irving,G,,34:34,11.0,...,6.0,3.0,0.0,1.0,4.0,4.0,29.0,23.0,HOME,1610612764
5,0021200001,1610612739,CLE,Cleveland,101139,CJ Miles,,,17:42,1.0,...,4.0,1.0,0.0,0.0,3.0,0.0,2.0,2.0,HOME,1610612764
6,0021200001,1610612739,CLE,Cleveland,203092,Tyler Zeller,,,14:53,2.0,...,2.0,0.0,1.0,1.0,0.0,2.0,5.0,4.0,HOME,1610612764
7,0021200001,1610612739,CLE,Cleveland,200789,Daniel Gibson,,,16:11,3.0,...,3.0,1.0,0.0,1.0,0.0,2.0,10.0,-9.0,HOME,1610612764
8,0021200001,1610612739,CLE,Cleveland,2575,Luke Walton,,,12:14,1.0,...,1.0,0.0,0.0,0.0,2.0,0.0,2.0,-11.0,HOME,1610612764
9,0021200001,1610612739,CLE,Cleveland,202388,Donald Sloan,,,13:26,2.0,...,0.0,1.0,0.0,0.0,2.0,2.0,4.0,-13.0,HOME,1610612764
