In [2]:
import datetime
import time
import functools

import pandas as pd
import numpy as np

import nba_py
import nba_py.game
import nba_py.player
import nba_py.team

import pymysql
from sqlalchemy import create_engine

In [7]:
conn = create_engine('mysql+pymysql://root:@118.190.202.87:3306/nba_stats')

try:
    # read sql table of game header
    game_header = pd.read_sql_table('game_header', conn)
    length_1 = len(game_header)
    print(str(length_1) + ' games loaded.')
    # set begin date to the newest date in sql table
    begin = datetime.datetime.strptime(game_header.iloc[-1]['GAME_DATE_EST'][:10], "%Y-%m-%d").date()
except ValueError:
    print('no table yet!')
    length_1 = 0
    # if no table yet, set begin date to 2012-10-29
    begin = datetime.date(2012, 10, 29)
    # grab game headers of begining date
    game_header = nba_py.Scoreboard(month = begin.month, 
                                        day = begin.day, 
                                        year = begin.year, league_id = '00', offset = 0).game_header()

# set end date to today
end = datetime.date(2013, 12, 31)

for i in range((end - begin).days + 1):
    # grab game headers from begin date to end date
    day = begin + datetime.timedelta(days = i)
    game_header = game_header.append(nba_py.Scoreboard(month = day.month, 
                                                       day = day.day, 
                                                       year = day.year, 
                                                       league_id = '00', 
                                                       offset = 0).game_header())
    print(str(day) + ' finished!    ' + str(datetime.datetime.now().time())[:8])

length_2 = len(game_header)
# drop the duplicate by game id
game_header = game_header.drop_duplicates('GAME_ID')
length_3 = len(game_header)
print(str(length_2 - length_3) + ' duplicates droped.')
print(str(length_3 - length_1) + ' games added.')

# sort game headers by game id ascending
# game_header = game_header.sort_values('GAME_ID')

# commit new game headers to sql table
game_header.to_sql('game_header', conn, index = False, if_exists = 'replace')
print(str(length_3) + ' game headers commit complete!')

456 games loaded.
2012-12-31 finished!    22:33:11
2013-01-01 finished!    22:33:13
2013-01-02 finished!    22:33:15
2013-01-03 finished!    22:33:17
2013-01-04 finished!    22:33:21
2013-01-05 finished!    22:33:24
2013-01-06 finished!    22:33:25
2013-01-07 finished!    22:33:27
2013-01-08 finished!    22:33:28
2013-01-09 finished!    22:33:30
2013-01-10 finished!    22:33:33
2013-01-11 finished!    22:33:35
2013-01-12 finished!    22:33:37
2013-01-13 finished!    22:33:39
2013-01-14 finished!    22:33:41
2013-01-15 finished!    22:33:43
2013-01-16 finished!    22:33:45
2013-01-17 finished!    22:33:47
2013-01-18 finished!    22:33:48
2013-01-19 finished!    22:33:50
2013-01-20 finished!    22:33:52
2013-01-21 finished!    22:33:54
2013-01-22 finished!    22:33:56
2013-01-23 finished!    22:33:57
2013-01-24 finished!    22:33:59
2013-01-25 finished!    22:34:01
2013-01-26 finished!    22:34:04
2013-01-27 finished!    22:34:06
2013-01-28 finished!    22:34:08
2013-01-29 finished!    2

2013-09-05 finished!    22:41:03
2013-09-06 finished!    22:41:05
2013-09-07 finished!    22:41:07
2013-09-08 finished!    22:41:08
2013-09-09 finished!    22:41:10
2013-09-10 finished!    22:41:12
2013-09-11 finished!    22:41:14
2013-09-12 finished!    22:41:16
2013-09-13 finished!    22:41:17
2013-09-14 finished!    22:41:19
2013-09-15 finished!    22:41:22
2013-09-16 finished!    22:41:23
2013-09-17 finished!    22:41:26
2013-09-18 finished!    22:41:28
2013-09-19 finished!    22:41:30
2013-09-20 finished!    22:41:32
2013-09-21 finished!    22:41:34
2013-09-22 finished!    22:41:36
2013-09-23 finished!    22:41:38
2013-09-24 finished!    22:41:41
2013-09-25 finished!    22:41:43
2013-09-26 finished!    22:41:45
2013-09-27 finished!    22:41:47
2013-09-28 finished!    22:41:50
2013-09-29 finished!    22:41:52
2013-09-30 finished!    22:41:53
2013-10-01 finished!    22:41:55
2013-10-02 finished!    22:41:57
2013-10-03 finished!    22:41:58
2013-10-04 finished!    22:42:00
2013-10-05

In [5]:
game_header

Unnamed: 0,GAME_DATE_EST,GAME_SEQUENCE,GAME_ID,GAME_STATUS_ID,GAME_STATUS_TEXT,GAMECODE,HOME_TEAM_ID,VISITOR_TEAM_ID,SEASON,LIVE_PERIOD,LIVE_PC_TIME,NATL_TV_BROADCASTER_ABBREVIATION,LIVE_PERIOD_TIME_BCAST,WH_STATUS
0,2012-10-30T00:00:00,1,0021200001,3,Final,20121030/WASCLE,1610612739,1610612764,2012,4,,,Q4 -,1
1,2012-10-30T00:00:00,2,0021200002,3,Final,20121030/BOSMIA,1610612748,1610612738,2012,4,,TNT,Q4 - TNT,1
2,2012-10-30T00:00:00,3,0021200003,3,Final,20121030/DALLAL,1610612747,1610612742,2012,4,,TNT,Q4 - TNT,1
0,2012-10-31T00:00:00,1,0021200004,3,Final,20121031/INDTOR,1610612761,1610612754,2012,4,,,Q4 -,1
1,2012-10-31T00:00:00,2,0021200005,3,Final,20121031/DENPHI,1610612755,1610612743,2012,4,,,Q4 -,1
2,2012-10-31T00:00:00,3,0021200006,3,Final,20121031/HOUDET,1610612765,1610612745,2012,4,,,Q4 -,1
3,2012-10-31T00:00:00,4,0021200007,3,Final,20121031/SACCHI,1610612741,1610612758,2012,4,,,Q4 -,1
4,2012-10-31T00:00:00,5,0021200008,3,Final,20121031/SASNOH,1610612740,1610612759,2012,4,,NBA TV,Q4 - NBA TV,1
5,2012-10-31T00:00:00,6,0021200009,3,Final,20121031/DALUTA,1610612762,1610612742,2012,4,,,Q4 -,1
6,2012-10-31T00:00:00,7,0021200010,3,Final,20121031/GSWPHX,1610612756,1610612744,2012,4,,,Q4 -,1


In [6]:
try:
    # read sql table of game stats logs
    game_stats_logs = pd.read_sql_table('game_stats_logs', conn)
    length_1 = len(game_stats_logs)
    print(str(length_1) + ' player stats loaded.')
except ValueError:
    print('no table yet!')
    length_1 = 0
    # if there is no table yet, create one by getting the first game stats
    initial_game = game_header['GAME_ID'].min()
    game_stats = nba_py.game.Boxscore(initial_game).player_stats()
    # get home team stats
    home_team_id = int(game_header[game_header['GAME_ID'] == initial_game]['HOME_TEAM_ID'])
    home_stats_logs = game_stats[game_stats['TEAM_ID'] == int(home_team_id)].copy()
    # set home team location
    home_stats_logs['LOCATION'] = 'HOME'
    # set home team against team id
    home_stats_logs['AGAINST_TEAM_ID'] = int(game_header[game_header['GAME_ID'] == initial_game]['VISITOR_TEAM_ID'])
    # get away team stats
    away_team_id = int(game_header[game_header['GAME_ID'] == initial_game]['VISITOR_TEAM_ID'])
    away_stats_logs = game_stats[game_stats['TEAM_ID'] == int(away_team_id)].copy()
    # set away team location
    away_stats_logs['LOCATION'] = 'AWAY'
    # set away team against team id
    away_stats_logs['AGAINST_TEAM_ID'] = int(game_header[game_header['GAME_ID'] == initial_game]['HOME_TEAM_ID'])
    # concat home team stats and away team stats
    game_stats_logs = pd.concat([home_stats_logs, away_stats_logs])
    # commit initialized game stats logs to sql table
    game_stats_logs.to_sql('game_stats_logs', conn, index = False, if_exists = 'replace')
    print('game stats logs initialized!')

# ------method 1------for game id in game headers from the max one in sql table
# for i in game_header[game_header['GAME_ID'] >= game_stats_logs['GAME_ID'].max()]['GAME_ID']:

# ------method 2------for game id in game header but not in game stats logs 
for i in game_header['GAME_ID'][game_header['GAME_ID'].isin(game_stats_logs['GAME_ID'].drop_duplicates()) == False]:
    game_stats = nba_py.game.Boxscore(i).player_stats()
    home_team_id = int(game_header[game_header['GAME_ID'] == i]['HOME_TEAM_ID'])
    home_stats_logs = game_stats[game_stats['TEAM_ID'] == int(home_team_id)].copy()
    home_stats_logs['LOCATION'] = 'HOME'
    home_stats_logs['AGAINST_TEAM_ID'] = int(game_header[game_header['GAME_ID'] == i]['VISITOR_TEAM_ID'])
    away_team_id = int(game_header[game_header['GAME_ID'] == i]['VISITOR_TEAM_ID'])
    away_stats_logs = game_stats[game_stats['TEAM_ID'] == int(away_team_id)].copy()
    away_stats_logs['LOCATION'] = 'AWAY'
    away_stats_logs['AGAINST_TEAM_ID'] = int(game_header[game_header['GAME_ID'] == i]['HOME_TEAM_ID'])
    game_stats_logs = game_stats_logs.append(home_stats_logs)
    game_stats_logs = game_stats_logs.append(away_stats_logs)
    print('game ' + i + ' added!    ' + str(datetime.datetime.now().time())[:8])

length_2 = len(game_stats_logs)
# drop duplicate game stats by game id and player id
game_stats_logs = game_stats_logs.drop_duplicates(['GAME_ID', 'PLAYER_ID'])
length_3 = len(game_stats_logs)
print(str(length_2 - length_3) + ' duplicates droped.')
print(str(length_3 - length_1) + ' player stats added.')

# commit new game stats logs to sql table
game_stats_logs.to_sql('game_stats_logs', conn, index = False, if_exists = 'replace')
print(str(length_3) + ' player stats commit complete!')

no table yet!
game stats logs initialized!
game 0021200002 added!    22:18:10
game 0021200003 added!    22:18:12
game 0021200004 added!    22:18:14
game 0021200005 added!    22:18:15
game 0021200006 added!    22:18:17
game 0021200007 added!    22:18:19
game 0021200008 added!    22:18:21
game 0021200009 added!    22:18:23
game 0021200010 added!    22:18:25
game 0021200011 added!    22:18:26
game 0021200012 added!    22:18:28
game 0021200014 added!    22:18:30
game 0021200015 added!    22:18:31
game 0021200016 added!    22:18:33
game 0021200017 added!    22:18:35
game 0021200018 added!    22:18:37
game 0021200019 added!    22:18:38
game 0021200020 added!    22:18:40
game 0021200021 added!    22:18:42
game 0021200022 added!    22:18:44
game 0021200023 added!    22:18:46
game 0021200024 added!    22:18:47
game 0021200025 added!    22:18:49
game 0021200026 added!    22:18:50
game 0021200027 added!    22:18:52
game 0021200028 added!    22:18:53
game 0021200029 added!    22:18:55
game 0021200

game 0021200235 added!    22:24:53
game 0021200236 added!    22:24:55
game 0021200237 added!    22:24:57
game 0021200238 added!    22:24:58
game 0021200239 added!    22:25:00
game 0021200240 added!    22:25:01
game 0021200241 added!    22:25:03
game 0021200242 added!    22:25:05
game 0021200243 added!    22:25:06
game 0021200244 added!    22:25:09
game 0021200245 added!    22:25:10
game 0021200246 added!    22:25:12
game 0021200247 added!    22:25:13
game 0021200248 added!    22:25:15
game 0021200249 added!    22:25:16
game 0021200250 added!    22:25:18
game 0021200251 added!    22:25:19
game 0021200252 added!    22:25:21
game 0021200253 added!    22:25:22
game 0021200254 added!    22:25:24
game 0021200255 added!    22:25:26
game 0021200256 added!    22:25:28
game 0021200257 added!    22:25:30
game 0021200258 added!    22:25:31
game 0021200259 added!    22:25:32
game 0021200261 added!    22:25:34
game 0021200260 added!    22:25:35
game 0021200262 added!    22:25:37
game 0021200263 adde

In [63]:
game_stats_logs

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_CITY,PLAYER_ID,PLAYER_NAME,START_POSITION,COMMENT,MIN,FGM,...,REB,AST,STL,BLK,TO,PF,PTS,PLUS_MINUS,LOCATION,AGAINST_TEAM_ID
0,0011300001,12321,FBU,Istanbul,42547,Ayberk Olmaz,,,6,0,...,3,1,0,0,1,0,0,,HOME,1610612760
1,0011300001,12321,FBU,Istanbul,42534,Baris Ermis,,,9,1,...,1,0,0,1,1,0,2,,HOME,1610612760
2,0011300001,12321,FBU,Istanbul,42546,Berk Ugurlu,,,2,0,...,0,0,0,0,0,0,0,,HOME,1610612760
3,0011300001,12321,FBU,Istanbul,42531,Bo McCalebb,,,24,5,...,2,3,4,0,0,2,13,,HOME,1610612760
4,0011300001,12321,FBU,Istanbul,42544,Bojan Bogdanovic,,,31,4,...,4,0,0,0,1,1,19,,HOME,1610612760
5,0011300001,12321,FBU,Istanbul,42545,Emir Preldzic,,,26,3,...,2,4,2,0,3,1,9,,HOME,1610612760
6,0011300001,12321,FBU,Istanbul,42538,Gasper Vidmar,,,25,1,...,4,1,0,0,1,5,4,,HOME,1610612760
7,0011300001,12321,FBU,Istanbul,42537,Izzet Turkyilmaz,,,10,1,...,1,1,1,0,0,1,3,,HOME,1610612760
8,0011300001,12321,FBU,Istanbul,42541,James Birsen,,,21,3,...,2,0,1,1,1,0,6,,HOME,1610612760
9,0011300001,12321,FBU,Istanbul,42542,Kenan Sipahi,,,22,1,...,1,0,2,0,2,4,6,,HOME,1610612760
