In [1]:
import datetime
import time
import functools

import pandas as pd
import numpy as np

import nba_py
import nba_py.game
import nba_py.player
import nba_py.team

import pymysql
from sqlalchemy import create_engine

pwd = ''

In [2]:
conn = create_engine('mysql+pymysql://root:%s@118.190.202.87:3306/nba_stats' % pwd)

try:
    # read sql table of game header
    game_header = pd.read_sql_table('game_header', conn)
    length_1 = len(game_header)
    print(str(length_1) + ' games loaded.')
    # set begin date to the newest date in sql table
    begin = datetime.datetime.strptime(game_header.iloc[-1]['GAME_DATE_EST'][:10], "%Y-%m-%d").date()
except ValueError:
    print('no table yet!')
    length_1 = 0
    # if no table yet, set begin date to 2012-10-29
    begin = datetime.date(2012, 10, 29)
    # grab game headers of begining date
    game_header = nba_py.Scoreboard(month = begin.month, 
                                        day = begin.day, 
                                        year = begin.year, league_id = '00', offset = 0).game_header()

# set end date to today
end = datetime.date.today()

for i in range((end - begin).days + 1):
    # grab game headers from begin date to end date
    day = begin + datetime.timedelta(days = i)
    game_header = game_header.append(nba_py.Scoreboard(month = day.month, 
                                                       day = day.day, 
                                                       year = day.year, 
                                                       league_id = '00', 
                                                       offset = 0).game_header())
    print(str(day) + ' finished!    ' + str(datetime.datetime.now().time())[:8])

length_2 = len(game_header)
# drop the duplicate by game id
game_header = game_header.drop_duplicates('GAME_ID')
length_3 = len(game_header)
print(str(length_2 - length_3) + ' duplicates droped.')
print(str(length_3 - length_1) + ' games added.')

# sort game headers by game id ascending
# game_header = game_header.sort_values('GAME_ID')

# commit new game headers to sql table
game_header.to_sql('game_header', conn, index = False, if_exists = 'replace')
print(str(length_3) + ' game headers commit complete!')

7019 games loaded.
2017-06-12 finished!    21:01:28
2017-06-13 finished!    21:01:30
2017-06-14 finished!    21:01:32
2017-06-15 finished!    21:01:33
2017-06-16 finished!    21:01:35
2017-06-17 finished!    21:01:36
2017-06-18 finished!    21:01:38
2017-06-19 finished!    21:01:39
2017-06-20 finished!    21:01:41
2017-06-21 finished!    21:01:44
2017-06-22 finished!    21:01:45
2017-06-23 finished!    21:01:47
2017-06-24 finished!    21:01:49
2017-06-25 finished!    21:01:50
2017-06-26 finished!    21:01:52
2017-06-27 finished!    21:01:54
2017-06-28 finished!    21:01:55
2017-06-29 finished!    21:01:57
2017-06-30 finished!    21:01:59
2017-07-01 finished!    21:02:01
2017-07-02 finished!    21:02:02
2017-07-03 finished!    21:02:04
2017-07-04 finished!    21:02:05
2017-07-05 finished!    21:02:07
2017-07-06 finished!    21:02:09
2017-07-07 finished!    21:02:11
2017-07-08 finished!    21:02:13
2017-07-09 finished!    21:02:15
2017-07-10 finished!    21:02:17
2017-07-11 finished!    

In [6]:
conn = create_engine('mysql+pymysql://root:%s@118.190.202.87:3306/nba_stats' % pwd)

game_stats_logs = pd.DataFrame()

try:
    # read sql table of game stats logs id
    game_stats_logs_id = pd.read_sql_table('game_stats_logs', conn, columns = ['GAME_ID'])
    length_1 = len(game_stats_logs_id)
    print(str(length_1) + ' player stats loaded.')
except ValueError:
    print('no table yet!')
    length_1 = 0
    # create table and commit it to sql
    game_stats_logs.to_sql('game_stats_logs', conn, index = False, if_exists = 'replace')
    print('game stats logs initialized!')

# ------method 1------for game id in game headers from the max one in sql table
# for i in game_header[game_header['GAME_ID'] >= game_stats_logs['GAME_ID'].max()]['GAME_ID']:

# ------method 2------for game id in game header but not in game stats logs 
for i in game_header['GAME_ID'][game_header['GAME_ID'].isin(game_stats_logs_id['GAME_ID'].drop_duplicates()) == False]:
    game_stats = nba_py.game.Boxscore(i).player_stats()
    home_team_id = int(game_header[game_header['GAME_ID'] == i]['HOME_TEAM_ID'])
    home_stats_logs = game_stats[game_stats['TEAM_ID'] == int(home_team_id)].copy()
    home_stats_logs['LOCATION'] = 'HOME'
    home_stats_logs['AGAINST_TEAM_ID'] = int(game_header[game_header['GAME_ID'] == i]['VISITOR_TEAM_ID'])
    away_team_id = int(game_header[game_header['GAME_ID'] == i]['VISITOR_TEAM_ID'])
    away_stats_logs = game_stats[game_stats['TEAM_ID'] == int(away_team_id)].copy()
    away_stats_logs['LOCATION'] = 'AWAY'
    away_stats_logs['AGAINST_TEAM_ID'] = int(game_header[game_header['GAME_ID'] == i]['HOME_TEAM_ID'])
    game_stats_logs = game_stats_logs.append(home_stats_logs)
    game_stats_logs = game_stats_logs.append(away_stats_logs)
    print('game ' + i + ' added!    ' + str(datetime.datetime.now().time())[:8])

length_2 = len(game_stats_logs)
# drop duplicate game stats by game id and player id
game_stats_logs = game_stats_logs.drop_duplicates(['GAME_ID', 'PLAYER_ID'])
length_3 = len(game_stats_logs)
print(str(length_2 - length_3) + ' duplicates droped.')
print(str(length_3) + ' player stats added.')

# commit new game stats logs to sql table
game_stats_logs.to_sql('game_stats_logs', conn, index = False, if_exists = 'append')
print(str(length_3) + ' player stats commit complete!')

177958 player stats loaded.
game 0011300114 added!    21:32:07
game 0021601138 added!    21:32:09
game 0021601139 added!    21:32:11
game 0021601140 added!    21:32:12
game 0021601141 added!    21:32:14
game 0021601145 added!    21:32:16
game 0021601142 added!    21:32:18
game 0021601143 added!    21:32:19
game 0021601144 added!    21:32:21
game 0021601151 added!    21:32:23
game 0021601146 added!    21:32:25
game 0021601147 added!    21:32:27
game 0021601148 added!    21:32:29
game 0021601149 added!    21:32:31
game 0021601150 added!    21:32:32
game 0021601152 added!    21:32:34
game 0021601153 added!    21:32:36
game 0021600940 added!    21:32:38
game 0021601154 added!    21:32:39
game 0021601155 added!    21:32:41
game 0021601156 added!    21:32:43
game 0021601157 added!    21:32:45
game 0021601158 added!    21:32:47
game 0021601159 added!    21:32:48
game 0021601160 added!    21:32:50
game 0021601161 added!    21:32:52
game 0021601164 added!    21:32:54
game 0021601162 added!    2

In [7]:
game_stats_logs

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_CITY,PLAYER_ID,PLAYER_NAME,START_POSITION,COMMENT,MIN,FGM,...,REB,AST,STL,BLK,TO,PF,PTS,PLUS_MINUS,LOCATION,AGAINST_TEAM_ID
13,0021601138,1610612741,CHI,Chicago,202703,Nikola Mirotic,F,,30:46,2.0,...,9.0,5.0,2.0,0.0,1.0,1.0,8.0,17.0,HOME,1610612737
14,0021601138,1610612741,CHI,Chicago,1627835,Paul Zipser,F,,26:31,3.0,...,2.0,1.0,0.0,2.0,0.0,4.0,10.0,1.0,HOME,1610612737
15,0021601138,1610612741,CHI,Chicago,201577,Robin Lopez,C,,36:16,4.0,...,4.0,1.0,0.0,0.0,2.0,2.0,8.0,8.0,HOME,1610612737
16,0021601138,1610612741,CHI,Chicago,202710,Jimmy Butler,G,,42:32,11.0,...,5.0,8.0,0.0,1.0,1.0,3.0,33.0,1.0,HOME,1610612737
17,0021601138,1610612741,CHI,Chicago,200765,Rajon Rondo,G,,35:33,11.0,...,11.0,6.0,3.0,0.0,7.0,3.0,25.0,5.0,HOME,1610612737
18,0021601138,1610612741,CHI,Chicago,1627756,Denzel Valentine,,,28:10,5.0,...,2.0,1.0,0.0,1.0,1.0,2.0,13.0,0.0,HOME,1610612737
19,0021601138,1610612741,CHI,Chicago,1626171,Bobby Portis,,,17:04,0.0,...,4.0,1.0,1.0,0.0,0.0,1.0,0.0,-15.0,HOME,1610612737
20,0021601138,1610612741,CHI,Chicago,203487,Michael Carter-Williams,,,11:24,2.0,...,2.0,2.0,0.0,0.0,0.0,1.0,5.0,-1.0,HOME,1610612737
21,0021601138,1610612741,CHI,Chicago,203530,Joffrey Lauvergne,,,11:44,2.0,...,1.0,0.0,0.0,0.0,0.0,0.0,4.0,-6.0,HOME,1610612737
22,0021601138,1610612741,CHI,Chicago,203477,Isaiah Canaan,,DNP - Coach's Decision,,,...,,,,,,,,,HOME,1610612737


In [23]:
game_stats_logs['GAME_TYPE'] = game_stats_logs['GAME_ID'].apply(lambda x: x[:3]).map(game_type)

In [24]:
game_stats_logs

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_CITY,PLAYER_ID,PLAYER_NAME,START_POSITION,COMMENT,MIN,FGM,...,AST,STL,BLK,TO,PF,PTS,PLUS_MINUS,LOCATION,AGAINST_TEAM_ID,GAME_TYPE
13,0021601138,1610612741,CHI,Chicago,202703,Nikola Mirotic,F,,30:46,2.0,...,5.0,2.0,0.0,1.0,1.0,8.0,17.0,HOME,1610612737,regular_season
14,0021601138,1610612741,CHI,Chicago,1627835,Paul Zipser,F,,26:31,3.0,...,1.0,0.0,2.0,0.0,4.0,10.0,1.0,HOME,1610612737,regular_season
15,0021601138,1610612741,CHI,Chicago,201577,Robin Lopez,C,,36:16,4.0,...,1.0,0.0,0.0,2.0,2.0,8.0,8.0,HOME,1610612737,regular_season
16,0021601138,1610612741,CHI,Chicago,202710,Jimmy Butler,G,,42:32,11.0,...,8.0,0.0,1.0,1.0,3.0,33.0,1.0,HOME,1610612737,regular_season
17,0021601138,1610612741,CHI,Chicago,200765,Rajon Rondo,G,,35:33,11.0,...,6.0,3.0,0.0,7.0,3.0,25.0,5.0,HOME,1610612737,regular_season
18,0021601138,1610612741,CHI,Chicago,1627756,Denzel Valentine,,,28:10,5.0,...,1.0,0.0,1.0,1.0,2.0,13.0,0.0,HOME,1610612737,regular_season
19,0021601138,1610612741,CHI,Chicago,1626171,Bobby Portis,,,17:04,0.0,...,1.0,1.0,0.0,0.0,1.0,0.0,-15.0,HOME,1610612737,regular_season
20,0021601138,1610612741,CHI,Chicago,203487,Michael Carter-Williams,,,11:24,2.0,...,2.0,0.0,0.0,0.0,1.0,5.0,-1.0,HOME,1610612737,regular_season
21,0021601138,1610612741,CHI,Chicago,203530,Joffrey Lauvergne,,,11:44,2.0,...,0.0,0.0,0.0,0.0,0.0,4.0,-6.0,HOME,1610612737,regular_season
22,0021601138,1610612741,CHI,Chicago,203477,Isaiah Canaan,,DNP - Coach's Decision,,,...,,,,,,,,HOME,1610612737,regular_season


In [22]:
game_type = {'001': 'pre_season', '002': 'regular_season', '003': 'all_star', '004': 'play_offs'}