## Importing Packages

In [1]:
#Importing packages
from nba_api.stats.static import players
from nba_api.stats.endpoints import leaguegamefinder
from nba_api.stats.endpoints import commonplayerinfo
from nba_api.stats.endpoints import playbyplayv2
import requests, pandas as pd, sys
# !{sys.executable} -m pip install tqdm
from tqdm import tqdm

In [2]:
#Creating header var for api pull
headers  = {
    'Connection': 'keep-alive',
    'Accept': 'application/json, text/plain, */*',
    'x-nba-stats-token': 'true',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36',
    'x-nba-stats-origin': 'stats',
    'Sec-Fetch-Site': 'same-origin',
    'Sec-Fetch-Mode': 'cors',
    'Referer': 'https://stats.nba.com/',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'en-US,en;q=0.9',
}

In [3]:
# create function that gets pbp (play by play) logs from the 2020-21 season
def get_data(game_id):
    play_by_play_url = "https://cdn.nba.com/static/json/liveData/playbyplay/playbyplay_"+game_id+".json"
    response = requests.get(url=play_by_play_url, headers=headers).json()
    play_by_play = response['game']['actions']
    df = pd.DataFrame(play_by_play)
    df['gameid'] = game_id
    return df

### Defining parameters for data pull

In [4]:
gamefinder = leaguegamefinder.LeagueGameFinder(season_nullable='2021-22',
                                               league_id_nullable='00',
                                               season_type_nullable='Regular Season')

### Retrieving Game Ids

In [5]:
games = gamefinder.get_data_frames()[0]

In [6]:
#getting a list of distinct game ids
game_ids = games['GAME_ID'].unique()

In [7]:
len(game_ids)

1230

In [8]:
pbpdata = []

In [9]:
#Here TDQM() creates a % completion bar
for game_id in tqdm(game_ids):
    game_data = get_data(game_id)
    pbpdata.append(game_data)

final_df = pd.concat(pbpdata, ignore_index=True)

100%|██████████| 1230/1230 [09:43<00:00,  2.11it/s]


## Instantiating Data Frame

In [10]:
final_df_copy = pd.DataFrame(final_df)

In [11]:
#Primary columns in the DF
final_df_copy.columns

Index(['actionNumber', 'clock', 'timeActual', 'period', 'periodType',
       'actionType', 'subType', 'qualifiers', 'personId', 'x', 'y',
       'possession', 'scoreHome', 'scoreAway', 'edited', 'orderNumber',
       'xLegacy', 'yLegacy', 'isFieldGoal', 'side', 'description',
       'personIdsFilter', 'teamId', 'teamTricode', 'descriptor',
       'jumpBallRecoveredName', 'jumpBallRecoverdPersonId', 'playerName',
       'playerNameI', 'jumpBallWonPlayerName', 'jumpBallWonPersonId',
       'jumpBallLostPlayerName', 'jumpBallLostPersonId', 'shotDistance',
       'shotResult', 'shotActionNumber', 'reboundTotal',
       'reboundDefensiveTotal', 'reboundOffensiveTotal', 'pointsTotal',
       'assistPlayerNameInitial', 'assistPersonId', 'assistTotal',
       'officialId', 'foulPersonalTotal', 'foulTechnicalTotal',
       'foulDrawnPlayerName', 'foulDrawnPersonId', 'turnoverTotal',
       'blockPlayerName', 'blockPersonId', 'stealPlayerName', 'stealPersonId',
       'value', 'gameid'],
      d

In [12]:
final_df_copy.subType.unique()

array(['start', 'recovered', 'Jump Shot', 'defensive', 'Layup',
       'offensive', 'personal', '1 of 2', '2 of 2', 'out-of-bounds',
       'DUNK', '1 of 1', 'full', nan, 'out', 'in', 'traveling',
       'lost ball', 'equipment issue', 'end', 'delay-of-game',
       'defensive goaltending', 'lane', 'bad pass', '1 of 3', '2 of 3',
       '3 of 3', 'Hook', 'challenge', 'request', 'shot clock', 'palming',
       'offensive foul', 'technical', 'offensive-kicked-ball',
       'offensive goaltending', 'kicked ball', 'altercationrequest',
       'blood rule', 'other', 'backcourt', '8-second-violation',
       '3-second-violation', 'double dribble', 'basket-from-below',
       'court clean up', 'illegal assist', 'discontinued dribble',
       '5-second-violation', 'LaneViolation', 'jumpball',
       'excess timeout', 'injury', 'inbound', 'double lane',
       'punched ball', '5-second-back-to-the-basket', 'too-many-players',
       'jumpball violation', 'shot', ''], dtype=object)

## Setting Pandas option to show all columns

In [13]:
pd.set_option('display.max_columns', None)

In [14]:
#unique timeactual values
timetest1 = final_df_copy.timeActual.unique()[0]
clocktest1 = final_df_copy.clock.unique()[0]

In [15]:
import dateutil.parser as parser

In [16]:
### Transforming data types
final_df_copy.dtypes

actionNumber                  int64
clock                        object
timeActual                   object
period                        int64
periodType                   object
actionType                   object
subType                      object
qualifiers                   object
personId                      int64
x                           float64
y                           float64
possession                    int64
scoreHome                    object
scoreAway                    object
edited                       object
orderNumber                   int64
xLegacy                     float64
yLegacy                     float64
isFieldGoal                   int64
side                         object
description                  object
personIdsFilter              object
teamId                      float64
teamTricode                  object
descriptor                   object
jumpBallRecoveredName        object
jumpBallRecoverdPersonId    float64
playerName                  

In [17]:
# Need to use columns:  Period and Clock to determine what time in the game it is (Clock counts down per period)
# period_to_total_time 

# create a fucntion and use map to apply it to the new column

def period_to_time_conversion(value):
    # final_df_copy['period_to_time'] = 0

    if value == 1:
        return 0
    elif value == 2:
        return 12
    elif value == 3:
        return 24
    else:
        return 36



final_df_copy['period_to_time'] = final_df_copy['period'].map(period_to_time_conversion)



# Creat column that is a combo of clock + period where each period = 12 minutes and count up

In [18]:
final_df_copy.loc[:, ['period', 'period_to_time']]
# period_to_time_conversion
final_df_copy.period_to_time.unique()

array([ 0, 12, 24, 36], dtype=int64)

In [19]:
import time
from datetime import datetime , timedelta

In [20]:
test = final_df_copy['clock'][1:2]
print(test)

1    PT11M56.00S
Name: clock, dtype: object


In [21]:
parsed_time = datetime.strptime('PT10M56.00S', 'PT%MM%S.00S')
print(parsed_time)
print(datetime(year =1900, month=1, day=1, hour=0, minute=12, second=0))
subtracts = datetime(year =1900, month=1, day=1, hour=0, minute=12, second=0)
# final = parsed_time - timedelta.total_seconds(minutes=12)
final2 = subtracts - parsed_time
final2.seconds


1900-01-01 00:10:56
1900-01-01 00:12:00


64

In [22]:
testarray = pd.to_datetime(final_df_copy['clock'].astype(str).str[0:7], format='PT%MM%S')

In [23]:
pd.merge(testarray.astype(str), pd.Series(final_df_copy['clock']), right_index=True, left_index=True)

Unnamed: 0,clock_x,clock_y
0,1900-01-01 00:12:00,PT12M00.00S
1,1900-01-01 00:11:56,PT11M56.00S
2,1900-01-01 00:11:43,PT11M43.00S
3,1900-01-01 00:11:40,PT11M40.00S
4,1900-01-01 00:11:29,PT11M29.00S
...,...,...
691870,1900-01-01 00:00:11,PT00M11.30S
691871,1900-01-01 00:00:00,PT00M00.00S
691872,1900-01-01 00:00:00,PT00M00.00S
691873,1900-01-01 00:00:00,PT00M00.00S


In [24]:
# def time_conversion(parsed_time):
#     x = datetime(year =1900, month=1, day=1, hour=0, minute=12, second=0)
#     time_passed_seconds = x - pd.to_datetime(final_df_copy['clock'], 'PT%MM%S.00S')
#     return time_passed_seconds

# final_df_copy['time_passed_in_seconds'] = final_df_copy['period_to_time'].map(time_conversion)

In [25]:
final_df_copy.head()

Unnamed: 0,actionNumber,clock,timeActual,period,periodType,actionType,subType,qualifiers,personId,x,y,possession,scoreHome,scoreAway,edited,orderNumber,xLegacy,yLegacy,isFieldGoal,side,description,personIdsFilter,teamId,teamTricode,descriptor,jumpBallRecoveredName,jumpBallRecoverdPersonId,playerName,playerNameI,jumpBallWonPlayerName,jumpBallWonPersonId,jumpBallLostPlayerName,jumpBallLostPersonId,shotDistance,shotResult,shotActionNumber,reboundTotal,reboundDefensiveTotal,reboundOffensiveTotal,pointsTotal,assistPlayerNameInitial,assistPersonId,assistTotal,officialId,foulPersonalTotal,foulTechnicalTotal,foulDrawnPlayerName,foulDrawnPersonId,turnoverTotal,blockPlayerName,blockPersonId,stealPlayerName,stealPersonId,value,gameid,period_to_time
0,2,PT12M00.00S,2022-04-10T19:40:56.4Z,1,REGULAR,period,start,[],0,,,0,0,0,2022-04-10T19:40:56Z,20000,,,0,,Period Start,[],,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,22101221,0
1,4,PT11M56.00S,2022-04-10T19:41:00.0Z,1,REGULAR,jumpball,recovered,[],1628989,,,1610612737,0,0,2022-04-10T19:41:00Z,40000,,,0,,Jump Ball C. Capela vs. A. Sengun: Tip to K. H...,"[1628989, 203991, 1630578]",1610613000.0,ATL,startperiod,K. Huerter,1628989.0,Huerter,K. Huerter,Capela,203991.0,Sengun,1630578.0,,,,,,,,,,,,,,,,,,,,,,22101221,0
2,7,PT11M43.00S,2022-04-10T19:41:13.1Z,1,REGULAR,3pt,Jump Shot,[],1628989,7.178055,4.656863,1610612737,0,0,2022-04-10T19:41:20Z,70000,227.0,15.0,1,left,MISS K. Huerter 3PT,[1628989],1610613000.0,ATL,,,,Huerter,K. Huerter,,,,,22.72,Missed,,,,,,,,,,,,,,,,,,,,22101221,0
3,8,PT11M40.00S,2022-04-10T19:41:16.1Z,1,REGULAR,rebound,defensive,[],1629726,,,1610612745,0,0,2022-04-10T19:41:20Z,80000,,,0,,G. Mathews REBOUND (Off:0 Def:1),[1629726],1610613000.0,HOU,,,,Mathews,G. Mathews,,,,,,,7.0,1.0,1.0,0.0,,,,,,,,,,,,,,,,22101221,0
4,9,PT11M29.00S,2022-04-10T19:41:26.0Z,1,REGULAR,3pt,Jump Shot,[],1629726,66.442181,56.372549,1610612745,3,0,2022-04-10T19:41:30Z,90000,32.0,263.0,1,right,G. Mathews 26' 3PT (3 PTS) (A. Sengun 1 AST),"[1629726, 1630578]",1610613000.0,HOU,,,,Mathews,G. Mathews,,,,,26.48,Made,,,,,3.0,A. Sengun,1630578.0,1.0,,,,,,,,,,,,22101221,0


In [26]:
#Notes for to dos for this data set
# --Look at % of makes of shots based on distance
# --What duo had the most assits to dunks in the NBA
# --Calculate true shooting % of all players? using apply? or creating a function... probably this
#     #TS% - True Shooting Percentage; the formula is PTS / (2 * TSA). True shooting percentage is a measure of shooting efficiency that takes into account field goals, 3-point field goals, and free throws.
#     #TSA - True Shooting Attempts; the formula is FGA + 0.44 * FTA.

# -- Most points in the clutch (under 3 minutes left when a game is within 5 points)
# -- Teams shooting % 2PT vs 3PT
# --best assist to turn over ratio
# --

#### - Might have to seperate TIMEACTUAL field into date and time

In [27]:
#Looking at shots taken from rounded distances
final_df_copy.loc[:,['shotDistance']].round().value_counts()#.sort_index()

shotDistance
26.0            22892
2.0             19249
25.0            18937
1.0             16445
3.0             13577
                ...  
82.0                1
83.0                1
84.0                1
88.0                1
89.0                1
Length: 87, dtype: int64

In [28]:
#breaking down shot distance and the % of total shots made from the distance
final_df_copy.loc[:, ['shotResult']].value_counts()

shotResult
Made          141587
Missed        128907
dtype: int64

In [29]:
final_df_copy['actionType'].unique()

array(['period', 'jumpball', '3pt', 'rebound', '2pt', 'foul', 'freethrow',
       'turnover', 'timeout', 'stoppage', 'block', 'substitution',
       'steal', 'violation', 'instantreplay', 'game', 'memo', 'ejection'],
      dtype=object)

In [30]:
#pulling rows where actionType = 'block'
final_df_copy[final_df_copy.actionType == 'block']

Unnamed: 0,actionNumber,clock,timeActual,period,periodType,actionType,subType,qualifiers,personId,x,y,possession,scoreHome,scoreAway,edited,orderNumber,xLegacy,yLegacy,isFieldGoal,side,description,personIdsFilter,teamId,teamTricode,descriptor,jumpBallRecoveredName,jumpBallRecoverdPersonId,playerName,playerNameI,jumpBallWonPlayerName,jumpBallWonPersonId,jumpBallLostPlayerName,jumpBallLostPersonId,shotDistance,shotResult,shotActionNumber,reboundTotal,reboundDefensiveTotal,reboundOffensiveTotal,pointsTotal,assistPlayerNameInitial,assistPersonId,assistTotal,officialId,foulPersonalTotal,foulTechnicalTotal,foulDrawnPlayerName,foulDrawnPersonId,turnoverTotal,blockPlayerName,blockPersonId,stealPlayerName,stealPersonId,value,gameid,period_to_time
51,71,PT05M51.00S,2022-04-10T19:53:02.1Z,1,REGULAR,block,,[],203991,,,1610612745,13,20,2022-04-10T19:53:04Z,690000,,,0,,C. Capela BLOCK (1 BLK),[203991],1.610613e+09,ATL,,,,Capela,C. Capela,,,,,,,,,,,,,,,,,,,,,,,,,,0022101221,0
64,86,PT04M08.00S,2022-04-10T19:55:13.0Z,1,REGULAR,block,,[],1629726,,,1610612737,15,25,2022-04-10T19:55:17Z,840000,,,0,,G. Mathews BLOCK (1 BLK),[1629726],1.610613e+09,HOU,,,,Mathews,G. Mathews,,,,,,,,,,,,,,,,,,,,,,,,,,0022101221,0
115,144,PT00M02.10S,2022-04-10T20:05:35.0Z,1,REGULAR,block,,[],1628021,,,1610612737,24,35,2022-04-10T20:06:47Z,1430000,,,0,,D. Nwaba BLOCK (1 BLK),[1628021],1.610613e+09,HOU,,,,Nwaba,D. Nwaba,,,,,,,,,,,,,,,,,,,,,,,,,,0022101221,0
261,319,PT10M38.00S,2022-04-10T20:51:18.6Z,3,REGULAR,block,,[],1630256,,,1610612737,64,73,2022-04-10T20:51:21Z,3170000,,,0,,J. Tate BLOCK (1 BLK),[1630256],1.610613e+09,HOU,,,,Tate,J. Tate,,,,,,,,,,,,,,,,,,,,,,,,,,0022101221,24
274,333,PT09M37.00S,2022-04-10T20:52:46.7Z,3,REGULAR,block,,[],1630578,,,1610612737,66,73,2022-04-10T20:52:49Z,3310000,,,0,,A. Sengun BLOCK (1 BLK),[1630578],1.610613e+09,HOU,,,,Sengun,A. Sengun,,,,,,,,,,,,,,,,,,,,,,,,,,0022101221,24
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
691748,547,PT09M34.00S,2021-10-20T01:31:36.3Z,4,REGULAR,block,,[],1626192,,,1610612751,99,90,2021-10-20T01:31:39Z,5410000,,,0,,P. Connaughton BLOCK (1 BLK),[1626192],1.610613e+09,MIL,,,,Connaughton,P. Connaughton,,,,,,,,,,,,,,,,,,,,,,,,,,0022100001,36
691773,578,PT07M46.00S,2021-10-20T01:37:35.2Z,4,REGULAR,block,,[],1629670,,,1610612751,106,93,2021-10-20T01:37:38Z,5680000,,,0,,J. Nwora BLOCK (1 BLK),[1629670],1.610613e+09,MIL,,,,Nwora,J. Nwora,,,,,,,,,,,,,,,,,,,,,,,,,,0022100001,36
691786,595,PT06M45.00S,2021-10-20T01:40:09.9Z,4,REGULAR,block,,[],203507,,,1610612751,112,95,2021-10-20T01:40:13Z,5850000,,,0,,G. Antetokounmpo BLOCK (2 BLK),[203507],1.610613e+09,MIL,,,,Antetokounmpo,G. Antetokounmpo,,,,,,,,,,,,,,,,,,,,,,,,,,0022100001,36
691804,613,PT05M14.00S,2021-10-20T01:45:13.8Z,4,REGULAR,block,,[],201142,,,1610612749,112,95,2021-10-20T01:45:17Z,6030000,,,0,,K. Durant BLOCK (2 BLK),[201142],1.610613e+09,BKN,,,,Durant,K. Durant,,,,,,,,,,,,,,,,,,,,,,,,,,0022100001,36


In [31]:
#take shotDistance, actionType is 2pt and 3pt, shotResult is Missed or Made
all_actions = pd.DataFrame(final_df_copy.loc[:, ["shotDistance", "actionType", "shotResult"]])

In [32]:
all_actions

Unnamed: 0,shotDistance,actionType,shotResult
0,,period,
1,,jumpball,
2,22.72,3pt,Missed
3,,rebound,
4,26.48,3pt,Made
...,...,...,...
691870,,turnover,
691871,29.71,3pt,Missed
691872,,rebound,
691873,,period,


In [33]:
shots_df = all_actions[(all_actions.shotResult.isin(['Missed', 'Made'])) & (all_actions.actionType.isin(['2pt', '3pt']))]

In [34]:
shots_df

Unnamed: 0,shotDistance,actionType,shotResult
2,22.72,3pt,Missed
4,26.48,3pt,Made
5,18.69,2pt,Made
6,19.20,2pt,Missed
8,5.10,2pt,Missed
...,...,...,...
691862,6.99,2pt,Missed
691864,0.00,2pt,Missed
691867,1.31,2pt,Made
691868,26.60,3pt,Missed


In [35]:
shots_df.sort_index(axis = 1, inplace = True)
shots_df = shots_df.round()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  shots_df.sort_index(axis = 1, inplace = True)


In [36]:
#shots_df.shotResult.replace(['Missed', 0], ['Made', 1])
shots_df['shotResult'] = shots_df.shotResult.replace(['Missed','Made'], [0, 1])

In [37]:
shots_df.shotResult.value_counts()

0    116783
1     99930
Name: shotResult, dtype: int64

In [38]:
def percentcalc(x):
    for i in shots_df[shots_df.shotResult]:
        if shots_df[shots_df.shotResult == 'Made']:
            return 1
        else:
            return 0

In [39]:
# shots_df.map(percentcalc)

In [42]:
# shots_grouped.shotResult.sum()

## Analyzing what players has the most assits to a dunk 

In [None]:
#looking into what player has assisted the most dunks in the nba (or per team)
assists = final_df[(final_df.subType == 'DUNK') & (final_df.periodType == 'REGULAR')].loc[:,['assistPlayerNameInitial','playerNameI','teamTricode', 'teamId' ]]

In [None]:
#adding new col with combined player
assists = assists[pd.notnull(assists.assistPlayerNameInitial)]

In [None]:
assists['assistor-assiste'] = assists.assistPlayerNameInitial + '  to  ' + assists.playerNameI  

In [41]:
# assists.columns()

In [None]:
assistsfinal = assists.loc[: ,['teamTricode','assistor-assiste']].sort_values(by=['teamTricode'])

In [None]:
assists_grouped = assistsfinal.groupby(['teamTricode','assistor-assiste'])['assistor-assiste'].count()

In [None]:
assists_grouped

teamTricode  assistor-assiste              
ATL          B. Bogdanovic  to  C. Capela      10
             B. Bogdanovic  to  J. Collins      5
             B. Bogdanovic  to  K. Huerter      1
             B. Bogdanovic  to  N. Knight       3
             B. Bogdanovic  to  O. Okongwu      2
                                               ..
WAS          R. Westbrook  to  R. Hachimura    31
             R. Westbrook  to  R. Lopez        13
             R. Westbrook  to  T. Bryant        6
             T. Brown Jr.  to  R. Westbrook     1
             T. Brown Jr.  to  T. Bryant        1
Name: assistor-assiste, Length: 2248, dtype: int64