In [1]:
import datetime
import time
import functools

import pandas as pd
import numpy as np

import nba_py
import nba_py.game
import nba_py.player
import nba_py.team

import pymysql
from sqlalchemy import create_engine

pwd = 'gaoJibie2014'

In [5]:
conn = create_engine('mysql+pymysql://root:%s@118.190.202.87:3306/nba_stats' % pwd)

try:
    # read sql table of game header
    game_header = pd.read_sql_table('game_header', conn)
    length_1 = len(game_header)
    print(str(length_1) + ' games loaded.')
    # set begin date to the newest date in sql table
    begin = datetime.datetime.strptime(game_header.iloc[-1]['GAME_DATE_EST'][:10], "%Y-%m-%d").date()
except ValueError:
    print('no table yet!')
    length_1 = 0
    # if no table yet, set begin date to 2012-10-29
    begin = datetime.date(2012, 10, 29)
    # grab game headers of begining date
    game_header = nba_py.Scoreboard(month = begin.month, 
                                        day = begin.day, 
                                        year = begin.year, league_id = '00', offset = 0).game_header()

# set end date to today
end = datetime.date.today()

for i in range((end - begin).days + 1):
    # grab game headers from begin date to end date
    day = begin + datetime.timedelta(days = i)
    game_header = game_header.append(nba_py.Scoreboard(month = day.month, 
                                                       day = day.day, 
                                                       year = day.year, 
                                                       league_id = '00', 
                                                       offset = 0).game_header())
    print(str(day) + ' finished!    ' + str(datetime.datetime.now().time())[:8])

length_2 = len(game_header)
# drop the duplicate by game id
game_header = game_header.drop_duplicates('GAME_ID')
length_3 = len(game_header)
print(str(length_2 - length_3) + ' duplicates droped.')
print(str(length_3 - length_1) + ' games added.')

# sort game headers by game id ascending
# game_header = game_header.sort_values('GAME_ID')

# commit new game headers to sql table
game_header.to_sql('game_header', conn, index = False, if_exists = 'replace')
print(str(length_3) + ' game headers commit complete!')

6215 games loaded.
2016-12-31 finished!    10:23:21
2017-01-01 finished!    10:23:23
2017-01-02 finished!    10:23:25
2017-01-03 finished!    10:23:27
2017-01-04 finished!    10:23:29
2017-01-05 finished!    10:23:31
2017-01-06 finished!    10:23:33
2017-01-07 finished!    10:23:34
2017-01-08 finished!    10:23:36
2017-01-09 finished!    10:23:38
2017-01-10 finished!    10:23:40
2017-01-11 finished!    10:23:42
2017-01-12 finished!    10:23:44
2017-01-13 finished!    10:23:46
2017-01-14 finished!    10:23:48
2017-01-15 finished!    10:23:50
2017-01-16 finished!    10:23:52
2017-01-17 finished!    10:23:53
2017-01-18 finished!    10:23:55
2017-01-19 finished!    10:23:57
2017-01-20 finished!    10:23:59
2017-01-21 finished!    10:24:01
2017-01-22 finished!    10:24:03
2017-01-23 finished!    10:24:05
2017-01-24 finished!    10:24:07
2017-01-25 finished!    10:24:08
2017-01-26 finished!    10:24:10
2017-01-27 finished!    10:24:12
2017-01-28 finished!    10:24:14
2017-01-29 finished!    

2017-09-05 finished!    10:31:01
2017-09-06 finished!    10:31:03
2017-09-07 finished!    10:31:05
2017-09-08 finished!    10:31:06
2017-09-09 finished!    10:31:08
2017-09-10 finished!    10:31:09
2017-09-11 finished!    10:31:11
2017-09-12 finished!    10:31:13
2017-09-13 finished!    10:31:15
2017-09-14 finished!    10:31:17
2017-09-15 finished!    10:31:18
2017-09-16 finished!    10:31:20
2017-09-17 finished!    10:31:22
2017-09-18 finished!    10:31:23
2017-09-19 finished!    10:31:25
6 duplicates droped.
804 games added.
7019 game headers commit complete!


In [5]:
game_header

Unnamed: 0,GAME_DATE_EST,GAME_SEQUENCE,GAME_ID,GAME_STATUS_ID,GAME_STATUS_TEXT,GAMECODE,HOME_TEAM_ID,VISITOR_TEAM_ID,SEASON,LIVE_PERIOD,LIVE_PC_TIME,NATL_TV_BROADCASTER_ABBREVIATION,LIVE_PERIOD_TIME_BCAST,WH_STATUS
0,2012-10-30T00:00:00,1,0021200001,3,Final,20121030/WASCLE,1610612739,1610612764,2012,4,,,Q4 -,1
1,2012-10-30T00:00:00,2,0021200002,3,Final,20121030/BOSMIA,1610612748,1610612738,2012,4,,TNT,Q4 - TNT,1
2,2012-10-30T00:00:00,3,0021200003,3,Final,20121030/DALLAL,1610612747,1610612742,2012,4,,TNT,Q4 - TNT,1
0,2012-10-31T00:00:00,1,0021200004,3,Final,20121031/INDTOR,1610612761,1610612754,2012,4,,,Q4 -,1
1,2012-10-31T00:00:00,2,0021200005,3,Final,20121031/DENPHI,1610612755,1610612743,2012,4,,,Q4 -,1
2,2012-10-31T00:00:00,3,0021200006,3,Final,20121031/HOUDET,1610612765,1610612745,2012,4,,,Q4 -,1
3,2012-10-31T00:00:00,4,0021200007,3,Final,20121031/SACCHI,1610612741,1610612758,2012,4,,,Q4 -,1
4,2012-10-31T00:00:00,5,0021200008,3,Final,20121031/SASNOH,1610612740,1610612759,2012,4,,NBA TV,Q4 - NBA TV,1
5,2012-10-31T00:00:00,6,0021200009,3,Final,20121031/DALUTA,1610612762,1610612742,2012,4,,,Q4 -,1
6,2012-10-31T00:00:00,7,0021200010,3,Final,20121031/GSWPHX,1610612756,1610612744,2012,4,,,Q4 -,1


In [33]:
conn = create_engine('mysql+pymysql://root:%s@118.190.202.87:3306/nba_stats' % pwd)

try:
    # read sql table of game stats logs
    game_stats_logs = pd.read_sql_table('game_stats_logs', conn)
    length_1 = len(game_stats_logs)
    print(str(length_1) + ' player stats loaded.')
except ValueError:
    print('no table yet!')
    length_1 = 0
    # if there is no table yet, create one by getting the first game stats
    initial_game = game_header['GAME_ID'].min()
    game_stats = nba_py.game.Boxscore(initial_game).player_stats()
    # get home team stats
    home_team_id = int(game_header[game_header['GAME_ID'] == initial_game]['HOME_TEAM_ID'])
    home_stats_logs = game_stats[game_stats['TEAM_ID'] == int(home_team_id)].copy()
    # set home team location
    home_stats_logs['LOCATION'] = 'HOME'
    # set home team against team id
    home_stats_logs['AGAINST_TEAM_ID'] = int(game_header[game_header['GAME_ID'] == initial_game]['VISITOR_TEAM_ID'])
    # get away team stats
    away_team_id = int(game_header[game_header['GAME_ID'] == initial_game]['VISITOR_TEAM_ID'])
    away_stats_logs = game_stats[game_stats['TEAM_ID'] == int(away_team_id)].copy()
    # set away team location
    away_stats_logs['LOCATION'] = 'AWAY'
    # set away team against team id
    away_stats_logs['AGAINST_TEAM_ID'] = int(game_header[game_header['GAME_ID'] == initial_game]['HOME_TEAM_ID'])
    # concat home team stats and away team stats
    game_stats_logs = pd.concat([home_stats_logs, away_stats_logs])
    # commit initialized game stats logs to sql table
    game_stats_logs.to_sql('game_stats_logs', conn, index = False, if_exists = 'replace')
    print('game stats logs initialized!')

# ------method 1------for game id in game headers from the max one in sql table
# for i in game_header[game_header['GAME_ID'] >= game_stats_logs['GAME_ID'].max()]['GAME_ID']:

# ------method 2------for game id in game header but not in game stats logs 
for i in game_header['GAME_ID'][game_header['GAME_ID'].isin(game_stats_logs['GAME_ID'].drop_duplicates()) == False][:500]:
    game_stats = nba_py.game.Boxscore(i).player_stats()
    home_team_id = int(game_header[game_header['GAME_ID'] == i]['HOME_TEAM_ID'])
    home_stats_logs = game_stats[game_stats['TEAM_ID'] == int(home_team_id)].copy()
    home_stats_logs['LOCATION'] = 'HOME'
    home_stats_logs['AGAINST_TEAM_ID'] = int(game_header[game_header['GAME_ID'] == i]['VISITOR_TEAM_ID'])
    away_team_id = int(game_header[game_header['GAME_ID'] == i]['VISITOR_TEAM_ID'])
    away_stats_logs = game_stats[game_stats['TEAM_ID'] == int(away_team_id)].copy()
    away_stats_logs['LOCATION'] = 'AWAY'
    away_stats_logs['AGAINST_TEAM_ID'] = int(game_header[game_header['GAME_ID'] == i]['HOME_TEAM_ID'])
    game_stats_logs = game_stats_logs.append(home_stats_logs)
    game_stats_logs = game_stats_logs.append(away_stats_logs)
    print('game ' + i + ' added!    ' + str(datetime.datetime.now().time())[:8])

length_2 = len(game_stats_logs)
# drop duplicate game stats by game id and player id
game_stats_logs = game_stats_logs.drop_duplicates(['GAME_ID', 'PLAYER_ID'])
length_3 = len(game_stats_logs)
print(str(length_2 - length_3) + ' duplicates droped.')
print(str(length_3 - length_1) + ' player stats added.')

# commit new game stats logs to sql table
game_stats_logs.to_sql('game_stats_logs', conn, index = False, if_exists = 'replace')
print(str(length_3) + ' player stats commit complete!')

152374 player stats loaded.
game 0011300114 added!    15:52:52
game 0021600140 added!    15:52:54
game 0021600141 added!    15:52:56
game 0021600142 added!    15:52:58
game 0021600143 added!    15:53:00
game 0021600144 added!    15:53:02
game 0021600145 added!    15:53:04
game 0021600146 added!    15:53:05
game 0021600147 added!    15:53:07
game 0021600148 added!    15:53:09
game 0021600149 added!    15:53:10
game 0021600150 added!    15:53:12
game 0021600151 added!    15:53:14
game 0021600152 added!    15:53:15
game 0021600153 added!    15:53:17
game 0021600154 added!    15:53:19
game 0021600155 added!    15:53:20
game 0021600156 added!    15:53:22
game 0021600157 added!    15:53:24
game 0021600158 added!    15:53:26
game 0021600159 added!    15:53:28
game 0021600160 added!    15:53:30
game 0021600161 added!    15:53:32
game 0021600162 added!    15:53:34
game 0021600163 added!    15:53:35
game 0021600164 added!    15:53:37
game 0021600165 added!    15:53:40
game 0021600166 added!    1

game 0021600374 added!    16:00:26
game 0021600375 added!    16:00:29
game 0021600376 added!    16:00:31
game 0021600377 added!    16:00:33
game 0021600378 added!    16:00:35
game 0021600379 added!    16:00:37
game 0021600380 added!    16:00:39
game 0021600381 added!    16:00:41
game 0021600382 added!    16:00:43
game 0021600383 added!    16:00:45
game 0021600384 added!    16:00:47
game 0021600385 added!    16:00:49
game 0021600386 added!    16:00:51
game 0021600387 added!    16:00:53
game 0021600388 added!    16:00:55
game 0021600389 added!    16:00:57
game 0021600390 added!    16:00:59
game 0021600391 added!    16:01:01
game 0021600392 added!    16:01:04
game 0021600393 added!    16:01:06
game 0021600394 added!    16:01:08
game 0021600395 added!    16:01:10
game 0021600396 added!    16:01:11
game 0021600397 added!    16:01:13
game 0021600398 added!    16:01:15
game 0021600399 added!    16:01:17
game 0021600400 added!    16:01:18
game 0021600401 added!    16:01:23
game 0021600402 adde

game 0021600609 added!    16:08:06
game 0021600610 added!    16:08:08
game 0021600611 added!    16:08:10
game 0021600612 added!    16:08:12
game 0021600613 added!    16:08:14
game 0021600614 added!    16:08:16
game 0021600615 added!    16:08:18
game 0021600616 added!    16:08:20
game 0021600617 added!    16:08:22
game 0021600618 added!    16:08:24
game 0021600619 added!    16:08:25
game 0021600620 added!    16:08:28
game 0021600621 added!    16:08:30
game 0021600622 added!    16:08:32
game 0021600623 added!    16:08:33
game 0021600624 added!    16:08:35
game 0021600625 added!    16:08:38
game 0021600626 added!    16:08:40
game 0021600627 added!    16:08:42
game 0021600628 added!    16:08:44
game 0021600629 added!    16:08:46
game 0021600630 added!    16:08:48
game 0021600631 added!    16:08:49
game 0021600632 added!    16:08:52
game 0021600633 added!    16:08:54
game 0021600634 added!    16:08:56
game 0021600635 added!    16:08:58
game 0021600636 added!    16:09:00
game 0021600637 adde

In [63]:
game_stats_logs

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_CITY,PLAYER_ID,PLAYER_NAME,START_POSITION,COMMENT,MIN,FGM,...,REB,AST,STL,BLK,TO,PF,PTS,PLUS_MINUS,LOCATION,AGAINST_TEAM_ID
0,0011300001,12321,FBU,Istanbul,42547,Ayberk Olmaz,,,6,0,...,3,1,0,0,1,0,0,,HOME,1610612760
1,0011300001,12321,FBU,Istanbul,42534,Baris Ermis,,,9,1,...,1,0,0,1,1,0,2,,HOME,1610612760
2,0011300001,12321,FBU,Istanbul,42546,Berk Ugurlu,,,2,0,...,0,0,0,0,0,0,0,,HOME,1610612760
3,0011300001,12321,FBU,Istanbul,42531,Bo McCalebb,,,24,5,...,2,3,4,0,0,2,13,,HOME,1610612760
4,0011300001,12321,FBU,Istanbul,42544,Bojan Bogdanovic,,,31,4,...,4,0,0,0,1,1,19,,HOME,1610612760
5,0011300001,12321,FBU,Istanbul,42545,Emir Preldzic,,,26,3,...,2,4,2,0,3,1,9,,HOME,1610612760
6,0011300001,12321,FBU,Istanbul,42538,Gasper Vidmar,,,25,1,...,4,1,0,0,1,5,4,,HOME,1610612760
7,0011300001,12321,FBU,Istanbul,42537,Izzet Turkyilmaz,,,10,1,...,1,1,1,0,0,1,3,,HOME,1610612760
8,0011300001,12321,FBU,Istanbul,42541,James Birsen,,,21,3,...,2,0,1,1,1,0,6,,HOME,1610612760
9,0011300001,12321,FBU,Istanbul,42542,Kenan Sipahi,,,22,1,...,1,0,2,0,2,4,6,,HOME,1610612760


In [23]:
conn = create_engine('mysql+pymysql://root:%s@118.190.202.87:3306/nba_stats' % pwd)

game_stats_logs = pd.DataFrame()

try:
    # read sql table of game stats logs
    game_stats_logs_id = pd.read_sql_table('game_stats_logs', conn, columns = 'GAME_ID')
    length_1 = len(game_stats_logs_id)
    print(str(length_1) + ' player stats loaded.')
except ValueError:
    print('no table yet!')
    length_1 = 0
    # commit initialized game stats logs to sql table
    game_stats_logs.to_sql('game_stats_logs', conn, index = False, if_exists = 'replace')
    print('game stats logs initialized!')

# ------method 1------for game id in game headers from the max one in sql table
# for i in game_header[game_header['GAME_ID'] >= game_stats_logs['GAME_ID'].max()]['GAME_ID']:

# ------method 2------for game id in game header but not in game stats logs 
for i in game_header['GAME_ID'][game_header['GAME_ID'].isin(game_stats_logs_id['GAME_ID'].drop_duplicates()) == False][:500]:
    game_stats = nba_py.game.Boxscore(i).player_stats()
    home_team_id = int(game_header[game_header['GAME_ID'] == i]['HOME_TEAM_ID'])
    home_stats_logs = game_stats[game_stats['TEAM_ID'] == int(home_team_id)].copy()
    home_stats_logs['LOCATION'] = 'HOME'
    home_stats_logs['AGAINST_TEAM_ID'] = int(game_header[game_header['GAME_ID'] == i]['VISITOR_TEAM_ID'])
    away_team_id = int(game_header[game_header['GAME_ID'] == i]['VISITOR_TEAM_ID'])
    away_stats_logs = game_stats[game_stats['TEAM_ID'] == int(away_team_id)].copy()
    away_stats_logs['LOCATION'] = 'AWAY'
    away_stats_logs['AGAINST_TEAM_ID'] = int(game_header[game_header['GAME_ID'] == i]['HOME_TEAM_ID'])
    game_stats_logs = game_stats_logs.append(home_stats_logs)
    game_stats_logs = game_stats_logs.append(away_stats_logs)
    print('game ' + i + ' added!    ' + str(datetime.datetime.now().time())[:8])

length_2 = len(game_stats_logs)
# drop duplicate game stats by game id and player id
game_stats_logs = game_stats_logs.drop_duplicates(['GAME_ID', 'PLAYER_ID'])
length_3 = len(game_stats_logs)
print(str(length_2 - length_3) + ' duplicates droped.')
print(str(length_3) + ' player stats added.')

# commit new game stats logs to sql table
game_stats_logs.to_sql('game_stats_logs', conn, index = False, if_exists = 'append')
print(str(length_3) + ' player stats commit complete!')

In [2]:
conn = create_engine('mysql+pymysql://root:%s@118.190.202.87:3306/nba_stats' % pwd)

In [4]:
game_stats_logs = pd.read_sql_table('game_stats_logs', conn)