In [1]:
import datetime
import time
import functools

import pandas as pd
import numpy as np

import nba_py
import nba_py.game
import nba_py.player
import nba_py.team

import pymysql
from sqlalchemy import create_engine

In [2]:
conn = create_engine('mysql+pymysql://root:@localhost:3306/nba_stats')

try:
    # read sql table of game header
    game_header = pd.read_sql_table('game_header', conn)
    length_1 = len(game_header)
    print(str(length_1) + ' games loaded.')
    # set begin date to the newest date in sql table
    begin = datetime.datetime.strptime(game_header.iloc[-1]['GAME_DATE_EST'][:10], "%Y-%m-%d").date()
except ValueError:
    print('no table yet!')
    length_1 = 0
    # if no table yet, set begin date to 2012-10-29
    begin = datetime.date(2012, 10, 29)
    # grab game headers of begining date
    game_header = nba_py.Scoreboard(month = begin.month, 
                                        day = begin.day, 
                                        year = begin.year, league_id = '00', offset = 0).game_header()

# set end date to today
end = datetime.date.today()

for i in range((end - begin).days + 1):
    # grab game headers from begin date to end date
    day = begin + datetime.timedelta(days = i)
    game_header = game_header.append(nba_py.Scoreboard(month = day.month, 
                                                       day = day.day, 
                                                       year = day.year, 
                                                       league_id = '00', 
                                                       offset = 0).game_header())
    print(str(day) + ' finished!    ' + str(datetime.datetime.now().time())[:8])

length_2 = len(game_header)
# drop the duplicate by game id
game_header = game_header.drop_duplicates('GAME_ID')
length_3 = len(game_header)
print(str(length_2 - length_3) + ' duplicates droped.')
print(str(length_3 - length_1) + ' games added.')

# sort game headers by game id ascending
game_header = game_header.sort_values('GAME_ID')

# commit new game headers to sql table
game_header.to_sql('game_header', conn, index = False, if_exists = 'replace')
print(str(length_3) + ' game headers commit complete!')

7019 games loaded.
2017-06-12 finished!    09:55:45
2017-06-13 finished!    09:55:47
2017-06-14 finished!    09:55:48
2017-06-15 finished!    09:55:50
2017-06-16 finished!    09:55:52
2017-06-17 finished!    09:55:54
2017-06-18 finished!    09:55:57
2017-06-19 finished!    09:55:58
2017-06-20 finished!    09:56:00
2017-06-21 finished!    09:56:02
2017-06-22 finished!    09:56:04
2017-06-23 finished!    09:56:06
2017-06-24 finished!    09:56:08
2017-06-25 finished!    09:56:09
2017-06-26 finished!    09:56:11
2017-06-27 finished!    09:56:13
2017-06-28 finished!    09:56:14
2017-06-29 finished!    09:56:16
2017-06-30 finished!    09:56:18
2017-07-01 finished!    09:56:20
2017-07-02 finished!    09:56:21
2017-07-03 finished!    09:56:23
2017-07-04 finished!    09:56:26
2017-07-05 finished!    09:56:28
2017-07-06 finished!    09:56:30
2017-07-07 finished!    09:56:32
2017-07-08 finished!    09:56:33
2017-07-09 finished!    09:56:35
2017-07-10 finished!    09:56:37
2017-07-11 finished!    

In [3]:
game_header

Unnamed: 0,GAME_DATE_EST,GAME_SEQUENCE,GAME_ID,GAME_STATUS_ID,GAME_STATUS_TEXT,GAMECODE,HOME_TEAM_ID,VISITOR_TEAM_ID,SEASON,LIVE_PERIOD,LIVE_PC_TIME,NATL_TV_BROADCASTER_ABBREVIATION,LIVE_PERIOD_TIME_BCAST,WH_STATUS
0,2013-10-05T00:00:00,1,0011300001,3,Final,20131005/OKCFBU,12321,1610612760,2013,4,,NBA TV,Q4 - NBA TV,1
1,2013-10-05T00:00:00,2,0011300002,3,Final,20131005/CHIIND,1610612754,1610612741,2013,4,,NBA TV,Q4 - NBA TV,1
2,2013-10-05T00:00:00,3,0011300003,3,Final,20131005/NOPHOU,1610612745,1610612740,2013,4,,,Q4 -,1
3,2013-10-05T00:00:00,4,0011300004,3,Final,20131005/GSWLAL,1610612747,1610612744,2013,4,,NBA TV,Q4 - NBA TV,1
4,2013-10-06T00:00:00,1,0011300005,3,Final,20131006/PHIUBB,12324,1610612755,2013,4,,NBA TV,Q4 - NBA TV,1
5,2013-10-06T00:00:00,2,0011300006,3,Final,20131006/DENLAL,1610612747,1610612743,2013,4,,NBA TV,Q4 - NBA TV,1
6,2013-10-07T00:00:00,3,0011300007,3,Final,20131007/MEMCHI,1610612741,1610612763,2013,4,,,Q4 -,1
7,2013-10-07T00:00:00,4,0011300008,3,Final,20131007/MOSMIN,1610612750,12308,2013,5,,,Q5 -,1
8,2013-10-07T00:00:00,6,0011300009,3,Final,20131007/MACPHX,1610612756,93,2013,4,,,Q4 -,1
9,2013-10-07T00:00:00,7,0011300010,3,Final,20131007/LACPOR,1610612757,1610612746,2013,4,,,Q4 -,1


In [61]:
try:
    # read sql table of game stats logs
    game_stats_logs = pd.read_sql_table('game_stats_logs', conn)
    length_1 = len(game_stats_logs)
    print(str(length_1) + ' player stats loaded.')
except ValueError:
    print('no table yet!')
    length_1 = 0
    # if there is no table yet, create one by getting the first game stats
    initial_game = game_header['GAME_ID'].min()
    game_stats = nba_py.game.Boxscore(initial_game).player_stats()
    # get home team stats
    home_team_id = int(game_header[game_header['GAME_ID'] == initial_game]['HOME_TEAM_ID'])
    home_stats_logs = game_stats[game_stats['TEAM_ID'] == int(home_team_id)].copy()
    # set home team location
    home_stats_logs['LOCATION'] = 'HOME'
    # set home team against team id
    home_stats_logs['AGAINST_TEAM_ID'] = int(game_header[game_header['GAME_ID'] == initial_game]['VISITOR_TEAM_ID'])
    # get away team stats
    away_team_id = int(game_header[game_header['GAME_ID'] == initial_game]['VISITOR_TEAM_ID'])
    away_stats_logs = game_stats[game_stats['TEAM_ID'] == int(away_team_id)].copy()
    # set away team location
    away_stats_logs['LOCATION'] = 'AWAY'
    # set away team against team id
    away_stats_logs['AGAINST_TEAM_ID'] = int(game_header[game_header['GAME_ID'] == initial_game]['HOME_TEAM_ID'])
    # concat home team stats and away team stats
    game_stats_logs = pd.concat([home_stats_logs, away_stats_logs])
    # commit initialized game stats logs to sql table
    game_stats_logs.to_sql('game_stats_logs', conn, index = False, if_exists = 'replace')
    print('game stats logs initialized!')

# ------method 1------for game id in game headers from the max one in sql table
# for i in game_header[game_header['GAME_ID'] >= game_stats_logs['GAME_ID'].max()]['GAME_ID']:

# ------method 2------for game id in game header but not in game stats logs 
for i in game_header['GAME_ID'][game_header['GAME_ID'].isin(game_stats_logs['GAME_ID'].drop_duplicates()) == False]:
    game_stats = nba_py.game.Boxscore(i).player_stats()
    home_team_id = int(game_header[game_header['GAME_ID'] == i]['HOME_TEAM_ID'])
    home_stats_logs = game_stats[game_stats['TEAM_ID'] == int(home_team_id)].copy()
    home_stats_logs['LOCATION'] = 'HOME'
    home_stats_logs['AGAINST_TEAM_ID'] = int(game_header[game_header['GAME_ID'] == i]['VISITOR_TEAM_ID'])
    away_team_id = int(game_header[game_header['GAME_ID'] == i]['VISITOR_TEAM_ID'])
    away_stats_logs = game_stats[game_stats['TEAM_ID'] == int(away_team_id)].copy()
    away_stats_logs['LOCATION'] = 'AWAY'
    away_stats_logs['AGAINST_TEAM_ID'] = int(game_header[game_header['GAME_ID'] == i]['HOME_TEAM_ID'])
    game_stats_logs = game_stats_logs.append(home_stats_logs)
    game_stats_logs = game_stats_logs.append(away_stats_logs)
    print('game ' + i + ' added!    ' + str(datetime.datetime.now().time())[:8])

length_2 = len(game_stats_logs)
# drop duplicate game stats by game id and player id
game_stats_logs = game_stats_logs.drop_duplicates(['GAME_ID', 'PLAYER_ID'])
length_3 = len(game_stats_logs)
print(str(length_2 - length_3) + ' duplicates droped.')
print(str(length_3 - length_1) + ' player stats added.')

# commit new game stats logs to sql table
game_stats_logs.to_sql('game_stats_logs', conn, index = False, if_exists = 'replace')
print(str(length_3) + ' player stats commit complete!')

182419 player stats loaded.
game 0011300114 added!    20:57:51
0 duplicates droped.
0 player stats added.
182419 player stats commit complete!


In [63]:
game_stats_logs

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_CITY,PLAYER_ID,PLAYER_NAME,START_POSITION,COMMENT,MIN,FGM,...,REB,AST,STL,BLK,TO,PF,PTS,PLUS_MINUS,LOCATION,AGAINST_TEAM_ID
0,0011300001,12321,FBU,Istanbul,42547,Ayberk Olmaz,,,6,0,...,3,1,0,0,1,0,0,,HOME,1610612760
1,0011300001,12321,FBU,Istanbul,42534,Baris Ermis,,,9,1,...,1,0,0,1,1,0,2,,HOME,1610612760
2,0011300001,12321,FBU,Istanbul,42546,Berk Ugurlu,,,2,0,...,0,0,0,0,0,0,0,,HOME,1610612760
3,0011300001,12321,FBU,Istanbul,42531,Bo McCalebb,,,24,5,...,2,3,4,0,0,2,13,,HOME,1610612760
4,0011300001,12321,FBU,Istanbul,42544,Bojan Bogdanovic,,,31,4,...,4,0,0,0,1,1,19,,HOME,1610612760
5,0011300001,12321,FBU,Istanbul,42545,Emir Preldzic,,,26,3,...,2,4,2,0,3,1,9,,HOME,1610612760
6,0011300001,12321,FBU,Istanbul,42538,Gasper Vidmar,,,25,1,...,4,1,0,0,1,5,4,,HOME,1610612760
7,0011300001,12321,FBU,Istanbul,42537,Izzet Turkyilmaz,,,10,1,...,1,1,1,0,0,1,3,,HOME,1610612760
8,0011300001,12321,FBU,Istanbul,42541,James Birsen,,,21,3,...,2,0,1,1,1,0,6,,HOME,1610612760
9,0011300001,12321,FBU,Istanbul,42542,Kenan Sipahi,,,22,1,...,1,0,2,0,2,4,6,,HOME,1610612760
