## Importing Packages

In [1]:
#Importing packages
from nba_api.stats.static import players
from nba_api.stats.endpoints import leaguegamefinder
from nba_api.stats.endpoints import commonplayerinfo
from nba_api.stats.endpoints import playbyplayv2
import requests, pandas as pd, sys
# !{sys.executable} -m pip install tqdm
from tqdm import tqdm
import dateutil.parser as parser
import time
from datetime import datetime,  timedelta

In [2]:
#Creating header var for api pull
headers: dict[str, str]  = {
    'Connection': 'keep-alive',
    'Accept': 'application/json, text/plain, */*',
    'x-nba-stats-token': 'true',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36',
    'x-nba-stats-origin': 'stats',
    'Sec-Fetch-Site': 'same-origin',
    'Sec-Fetch-Mode': 'cors',
    'Referer': 'https://stats.nba.com/',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'en-US,en;q=0.9',
}

In [4]:
# create function that gets pbp (play by play) logs from the 2020-21 season
def get_data(game_id) -> pd.DataFrame:
    play_by_play_url = "https://cdn.nba.com/static/json/liveData/playbyplay/playbyplay_"+game_id+".json"
    response = requests.get(url=play_by_play_url, headers=headers).json()
    play_by_play = response['game']['actions']
    df = pd.DataFrame(play_by_play)
    df['gameid'] = game_id
    return df

### Defining parameters for data pull

In [5]:
gamefinder = leaguegamefinder.LeagueGameFinder(season_nullable='2021-22',
                                               league_id_nullable='00',
                                               season_type_nullable='Regular Season')

### Retrieving Game Ids

In [6]:
games = gamefinder.get_data_frames()[0]

In [7]:
#getting a list of distinct game ids
game_ids = games['GAME_ID'].unique()

In [8]:
len(game_ids)

1230

In [9]:
pbpdata = []

In [10]:
#Here TDQM() creates a % completion bar
for game_id in tqdm(game_ids):
    game_data = get_data(game_id)
    pbpdata.append(game_data)

final_df = pd.concat(pbpdata, ignore_index=True)

100%|██████████| 1230/1230 [05:35<00:00,  3.67it/s]


## Instantiating Data Frame

In [11]:
final_df_copy = pd.DataFrame(final_df)

## Reviewing Column Names

In [12]:
#Primary columns in the DF
final_df_copy.columns

Index(['actionNumber', 'clock', 'timeActual', 'period', 'periodType',
       'actionType', 'subType', 'qualifiers', 'personId', 'x', 'y',
       'possession', 'scoreHome', 'scoreAway', 'edited', 'orderNumber',
       'xLegacy', 'yLegacy', 'isFieldGoal', 'side', 'description',
       'personIdsFilter', 'teamId', 'teamTricode', 'descriptor',
       'jumpBallRecoveredName', 'jumpBallRecoverdPersonId', 'playerName',
       'playerNameI', 'jumpBallWonPlayerName', 'jumpBallWonPersonId',
       'jumpBallLostPlayerName', 'jumpBallLostPersonId', 'shotDistance',
       'shotResult', 'shotActionNumber', 'reboundTotal',
       'reboundDefensiveTotal', 'reboundOffensiveTotal', 'pointsTotal',
       'assistPlayerNameInitial', 'assistPersonId', 'assistTotal',
       'officialId', 'turnoverTotal', 'stealPlayerName', 'stealPersonId',
       'foulPersonalTotal', 'foulTechnicalTotal', 'foulDrawnPlayerName',
       'foulDrawnPersonId', 'blockPlayerName', 'blockPersonId', 'value',
       'gameid'],
      d

## Setting Pandas option to show all columns

In [14]:
pd.set_option('display.max_columns', None)

In [15]:
final_df_copy.head()

Unnamed: 0,actionNumber,clock,timeActual,period,periodType,actionType,subType,qualifiers,personId,x,y,possession,scoreHome,scoreAway,edited,orderNumber,xLegacy,yLegacy,isFieldGoal,side,description,personIdsFilter,teamId,teamTricode,descriptor,jumpBallRecoveredName,jumpBallRecoverdPersonId,playerName,playerNameI,jumpBallWonPlayerName,jumpBallWonPersonId,jumpBallLostPlayerName,jumpBallLostPersonId,shotDistance,shotResult,shotActionNumber,reboundTotal,reboundDefensiveTotal,reboundOffensiveTotal,pointsTotal,assistPlayerNameInitial,assistPersonId,assistTotal,officialId,turnoverTotal,stealPlayerName,stealPersonId,foulPersonalTotal,foulTechnicalTotal,foulDrawnPlayerName,foulDrawnPersonId,blockPlayerName,blockPersonId,value,gameid
0,2,PT12M00.00S,2022-04-11T01:41:52.8Z,1,REGULAR,period,start,[],0,,,0,0,0,2022-04-11T01:41:52Z,20000,,,0,,Period Start,[],,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,22101219
1,4,PT11M56.00S,2022-04-11T01:41:55.9Z,1,REGULAR,jumpball,recovered,[],1629640,,,1610612759,0,0,2022-04-11T01:41:55Z,40000,,,0,,Jump Ball J. Poeltl vs. D. Powell: Tip to K. J...,"[1629640, 1627751, 203939]",1610613000.0,SAS,startperiod,K. Johnson,1629640.0,Johnson,K. Johnson,Poeltl,1627751.0,Powell,203939.0,,,,,,,,,,,,,,,,,,,,,,22101219
2,7,PT11M47.00S,2022-04-11T01:42:04.2Z,1,REGULAR,3pt,Jump Shot,[],1629640,31.882392,48.77451,1610612759,0,0,2022-04-11T01:42:28Z,70000,6.0,247.0,1,left,MISS K. Johnson 24' 3PT,[1629640],1610613000.0,SAS,,,,Johnson,K. Johnson,,,,,24.73,Missed,,,,,,,,,,,,,,,,,,,,22101219
3,8,PT11M45.00S,2022-04-11T01:42:06.2Z,1,REGULAR,rebound,defensive,[],1629029,,,1610612742,0,0,2022-04-11T01:42:28Z,80000,,,0,,L. Doncic REBOUND (Off:0 Def:1),[1629029],1610613000.0,DAL,,,,Doncic,L. Doncic,,,,,,,7.0,1.0,1.0,0.0,,,,,,,,,,,,,,,,22101219
4,9,PT11M26.00S,2022-04-11T01:42:25.2Z,1,REGULAR,3pt,Jump Shot,[],1628973,95.088699,4.656863,1610612742,3,0,2022-04-11T01:42:30Z,90000,-227.0,-6.0,1,right,J. Brunson 3PT (3 PTS) (L. Doncic 1 AST),"[1628973, 1629029]",1610613000.0,DAL,,,,Brunson,J. Brunson,,,,,22.68,Made,,,,,3.0,L. Doncic,1629029.0,1.0,,,,,,,,,,,,22101219


# Data Manipulation and Formating

### Checking data types

In [16]:
### Transforming data types
final_df_copy.dtypes

actionNumber                  int64
clock                        object
timeActual                   object
period                        int64
periodType                   object
actionType                   object
subType                      object
qualifiers                   object
personId                      int64
x                           float64
y                           float64
possession                    int64
scoreHome                    object
scoreAway                    object
edited                       object
orderNumber                   int64
xLegacy                     float64
yLegacy                     float64
isFieldGoal                   int64
side                         object
description                  object
personIdsFilter              object
teamId                      float64
teamTricode                  object
descriptor                   object
jumpBallRecoveredName        object
jumpBallRecoverdPersonId    float64
playerName                  

### Converting time values (clock and period) to get total time passed in a game

In [17]:
# Need to use columns:  Period and Clock to determine what time in the game it is (Clock counts down per period)
# period_to_total_time 

# create a fucntion and use map to apply it to the new column

def period_to_time_conversion(value) -> int:
    # final_df_copy['period_to_time'] = 0

    if value == 1:
        return 0
    elif value == 2:
        return 12
    elif value == 3:
        return 24
    else:
        return 36

final_df_copy['period_to_time'] = final_df_copy['period'].map(period_to_time_conversion)

In [20]:
# Replaces clock
parsed_time = datetime.strptime('PT10M56.00S', 'PT%MM%S.00S')
print(parsed_time)
print(datetime(year =1900, month=1, day=1, hour=0, minute=12, second=0))
# This is a baseline for each period total time
subtracts = datetime(year =1900, month=1, day=1, hour=0, minute=12, second=0)
# final = parsed_time - timedelta.total_seconds(minutes=12)
final2 = subtracts - parsed_time
final2
print(timedelta(seconds = final2.seconds))


1900-01-01 00:10:56
1900-01-01 00:12:00
0:01:04


### This works and gets correct result!

In [23]:
x = datetime(year =1900, month=1, day=1, hour=0, minute=12, second=0)
values_array = final_df_copy.clock.astype(str).str[0:7]
time_passed_seconds = x - pd.to_datetime(values_array, format='PT%MM%S')
values_array
time_passed_seconds

0        0 days 00:00:00
1        0 days 00:00:04
2        0 days 00:00:13
3        0 days 00:00:15
4        0 days 00:00:34
               ...      
691870   0 days 00:11:49
691871   0 days 00:12:00
691872   0 days 00:12:00
691873   0 days 00:12:00
691874   0 days 00:12:00
Name: clock, Length: 691875, dtype: timedelta64[ns]

In [24]:
def time_conversion(unparsed_timearray):
    x = datetime(year =1900, month=1, day=1, hour=0, minute=12, second=0)
    values_array = unparsed_timearray[0:7]
    time_passed_total = x - pd.to_datetime(values_array, format='PT%MM%S')
    return time_passed_total

final_df_copy['time_passed_in_seconds'] = final_df_copy['clock'].map(time_conversion)


In [71]:
final_df_copy[final_df_copy.stealPlayerName != pd.notnull ].head(5)

Unnamed: 0,actionNumber,clock,timeActual,period,periodType,actionType,subType,qualifiers,personId,x,y,possession,scoreHome,scoreAway,edited,orderNumber,xLegacy,yLegacy,isFieldGoal,side,description,personIdsFilter,teamId,teamTricode,descriptor,jumpBallRecoveredName,jumpBallRecoverdPersonId,playerName,playerNameI,jumpBallWonPlayerName,jumpBallWonPersonId,jumpBallLostPlayerName,jumpBallLostPersonId,shotDistance,shotResult,shotActionNumber,reboundTotal,reboundDefensiveTotal,reboundOffensiveTotal,pointsTotal,assistPlayerNameInitial,assistPersonId,assistTotal,officialId,turnoverTotal,stealPlayerName,stealPersonId,foulPersonalTotal,foulTechnicalTotal,foulDrawnPlayerName,foulDrawnPersonId,blockPlayerName,blockPersonId,value,gameid,period_to_time,time_passed_in_seconds
0,2,PT12M00.00S,2022-04-11T01:41:52.8Z,1,REGULAR,period,start,[],0,,,0,0,0,2022-04-11T01:41:52Z,20000,,,0,,Period Start,[],,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,22101219,0,0 days 00:00:00
1,4,PT11M56.00S,2022-04-11T01:41:55.9Z,1,REGULAR,jumpball,recovered,[],1629640,,,1610612759,0,0,2022-04-11T01:41:55Z,40000,,,0,,Jump Ball J. Poeltl vs. D. Powell: Tip to K. J...,"[1629640, 1627751, 203939]",1610613000.0,SAS,startperiod,K. Johnson,1629640.0,Johnson,K. Johnson,Poeltl,1627751.0,Powell,203939.0,,,,,,,,,,,,,,,,,,,,,,22101219,0,0 days 00:00:04
2,7,PT11M47.00S,2022-04-11T01:42:04.2Z,1,REGULAR,3pt,Jump Shot,[],1629640,31.882392,48.77451,1610612759,0,0,2022-04-11T01:42:28Z,70000,6.0,247.0,1,left,MISS K. Johnson 24' 3PT,[1629640],1610613000.0,SAS,,,,Johnson,K. Johnson,,,,,24.73,Missed,,,,,,,,,,,,,,,,,,,,22101219,0,0 days 00:00:13
3,8,PT11M45.00S,2022-04-11T01:42:06.2Z,1,REGULAR,rebound,defensive,[],1629029,,,1610612742,0,0,2022-04-11T01:42:28Z,80000,,,0,,L. Doncic REBOUND (Off:0 Def:1),[1629029],1610613000.0,DAL,,,,Doncic,L. Doncic,,,,,,,7.0,1.0,1.0,0.0,,,,,,,,,,,,,,,,22101219,0,0 days 00:00:15
4,9,PT11M26.00S,2022-04-11T01:42:25.2Z,1,REGULAR,3pt,Jump Shot,[],1628973,95.088699,4.656863,1610612742,3,0,2022-04-11T01:42:30Z,90000,-227.0,-6.0,1,right,J. Brunson 3PT (3 PTS) (L. Doncic 1 AST),"[1628973, 1629029]",1610613000.0,DAL,,,,Brunson,J. Brunson,,,,,22.68,Made,,,,,3.0,L. Doncic,1629029.0,1.0,,,,,,,,,,,,22101219,0,0 days 00:00:34


In [35]:
final_df_copy.columns

Index(['actionNumber', 'clock', 'timeActual', 'period', 'periodType',
       'actionType', 'subType', 'qualifiers', 'personId', 'x', 'y',
       'possession', 'scoreHome', 'scoreAway', 'edited', 'orderNumber',
       'xLegacy', 'yLegacy', 'isFieldGoal', 'side', 'description',
       'personIdsFilter', 'teamId', 'teamTricode', 'descriptor',
       'jumpBallRecoveredName', 'jumpBallRecoverdPersonId', 'playerName',
       'playerNameI', 'jumpBallWonPlayerName', 'jumpBallWonPersonId',
       'jumpBallLostPlayerName', 'jumpBallLostPersonId', 'shotDistance',
       'shotResult', 'shotActionNumber', 'reboundTotal',
       'reboundDefensiveTotal', 'reboundOffensiveTotal', 'pointsTotal',
       'assistPlayerNameInitial', 'assistPersonId', 'assistTotal',
       'officialId', 'turnoverTotal', 'stealPlayerName', 'stealPersonId',
       'foulPersonalTotal', 'foulTechnicalTotal', 'foulDrawnPlayerName',
       'foulDrawnPersonId', 'blockPlayerName', 'blockPersonId', 'value',
       'gameid', 'period_

In [None]:
# Creating a function to create new column with total time passed in game
def total_time_passed(time_passed_in_period: datetime, period_passed_to_minutes: datetime):
    period_minutes = pd.to_datetime(final_df_copy.period_to_time, format='%M')
    minutes_in_period = final_df_copy.time_passed_in_seconds
    return period_minutes + minutes_in_period

In [None]:
final_df_copy['Total_time_passed_in_game'] = map(total_time_passed,final_df_copy.period_to_time, final_df_copy.time_passed_in_seconds)

# Tackle true shooting percentage of all players each game?
### This should be ppossible
# Formula is below:

<!-- TS% = PTS/(2*(FGA+.44*FTA)) -->

In [79]:
#Changing column to int data type
final_df_copy.assistPersonId = final_df_copy.assistPersonId.astype('Int64')

# Creating a dict for mapping object of names and ids
playerid_and_player_initials_dict_assists =dict(zip(final_df_copy.assistPlayerNameInitial.values, final_df_copy.assistPersonId))
playerid_and_player_initials_dict_playername_to_personid =dict(zip(final_df_copy.playerNameI.values, final_df_copy.personId))
playerid_and_player_initials_dict_combined = playerid_and_player_initials_dict_assists | playerid_and_player_initials_dict_playername_to_personid

In [80]:
playerid_and_player_initials_dict_combined

{nan: 0,
 'L. Doncic': 1629029,
 'J. Brunson': 1628973,
 'D. Murray': 1627749,
 'T. Jones': 1630200,
 'D. Finney-Smith': 1627827,
 'D. Vassell': 1630170,
 'Z. Collins': 1628380,
 'S. Dinwiddie': 203915,
 'K. Bates-Diop': 1628966,
 'D. Powell': 203939,
 'J. Richardson': 1626196,
 'J. Green': 1630224,
 'K. Johnson': 1629640,
 'R. Bullock': 203493,
 'J. Primo': 1630563,
 'T. Pinson': 1629033,
 'T. Burke': 203504,
 'B. Marjanovic': 1626246,
 'M. Wright': 1630589,
 'E. Fournier': 203095,
 'O. Anunoby': 1628384,
 'A. Burks': 202692,
 'S. Barnes': 1630567,
 'I. Quickley': 1630193,
 'G. Trent Jr.': 1629018,
 'C. Boucher': 1628449,
 'M. Flynn': 1630201,
 'F. Hunt': 1630624,
 'T. Young': 1629027,
 'J. Sims': 1630579,
 'O. Toppin': 1630167,
 'Y. Watanabe': 1629139,
 'K. Birch': 203920,
 'P. Achiuwa': 1630173,
 'R. Arcidiacono': 1627853,
 'D. Green': 203110,
 'N. Marshall': 1630230,
 'C. McCollum': 203468,
 'J. Poole': 1629673,
 'K. Looney': 1626172,
 'M. Moody': 1630541,
 'N. Bjelica': 202357,
 '

In [39]:
#Calculating TS%
# TS% = PTS/(2*(FGA+.44*FTA))

# final_df_copy[['actionType', 'subType', 'PlayerNameI', 'shotResult', 'assistPlayerNameInitial', 'Turnovers']]
# Need to pull description and parse out the persons name who commited the turn over
#The can be accomplished by parsing the sentence within the 'Description' and pulling B. Griffin as an example in the beginning character strings

# OR 

# Use actionType = "turnover" and grab the FIRST id from the personidsFilter and count the occurance of those ids per person


# below validates my theory that this is always the first person mentioned that is the one who commited the turnover
final_df_copy.description[final_df_copy.turnoverTotal == 1].head(30)


7                      L. Doncic bad pass TURNOVER (1 TO)
23       J. Poeltl bad pass out-of-bounds TURNOVER (1 TO)
32                    J. Brunson bad pass TURNOVER (1 TO)
51                     D. Powell bad pass TURNOVER (1 TO)
141                    D. Murray bad pass TURNOVER (1 TO)
160             Z. Collins offensive foul TURNOVER (1 TO)
203                   R. Bullock bad pass TURNOVER (1 TO)
252               J. Richardson lost ball TURNOVER (1 TO)
288                   D. Vassell bad pass TURNOVER (1 TO)
395                     J. Green bad pass TURNOVER (1 TO)
408                 S. Dinwiddie bad pass TURNOVER (1 TO)
468                    T. Pinson bad pass TURNOVER (1 TO)
515               B. Marjanovic lost ball TURNOVER (1 TO)
541                  I. Quickley bad pass TURNOVER (1 TO)
554                    A. Brooks bad pass TURNOVER (1 TO)
570                   O. Anunoby bad pass TURNOVER (1 TO)
661                     F. Hunt traveling TURNOVER (1 TO)
689           

In [26]:
#Notes for to dos for this data set
# --Look at % of makes of shots based on distance
# --What duo had the most assits to dunks in the NBA
# --Calculate true shooting % of all players? using apply? or creating a function... probably this
#     #TS% - True Shooting Percentage; the formula is PTS / (2 * TSA). True shooting percentage is a measure of shooting efficiency that takes into account field goals, 3-point field goals, and free throws.
#     #TSA - True Shooting Attempts; the formula is FGA + 0.44 * FTA.

# -- Most points in the clutch (under 3 minutes left when a game is within 5 points)
# -- Teams shooting % 2PT vs 3PT
# --best assist to turn over ratio
# --

#### - Might have to seperate TIMEACTUAL field into date and time

## THIS IS ANALYSIS

In [27]:
#Looking at shots taken from rounded distances
final_df_copy.loc[:,['shotDistance']].round().value_counts()#.sort_index()

shotDistance
26.0            22892
2.0             19249
25.0            18937
1.0             16445
3.0             13577
                ...  
82.0                1
83.0                1
84.0                1
88.0                1
89.0                1
Length: 87, dtype: int64

In [28]:
#breaking down shot distance and the % of total shots made from the distance
final_df_copy.loc[:, ['shotResult']].value_counts()

shotResult
Made          141587
Missed        128907
dtype: int64

In [29]:
final_df_copy['actionType'].unique()

array(['period', 'jumpball', '3pt', 'rebound', '2pt', 'foul', 'freethrow',
       'turnover', 'timeout', 'stoppage', 'block', 'substitution',
       'steal', 'violation', 'instantreplay', 'game', 'memo', 'ejection'],
      dtype=object)

In [30]:
#pulling rows where actionType = 'block'
final_df_copy[final_df_copy.actionType == 'block']

Unnamed: 0,actionNumber,clock,timeActual,period,periodType,actionType,subType,qualifiers,personId,x,y,possession,scoreHome,scoreAway,edited,orderNumber,xLegacy,yLegacy,isFieldGoal,side,description,personIdsFilter,teamId,teamTricode,descriptor,jumpBallRecoveredName,jumpBallRecoverdPersonId,playerName,playerNameI,jumpBallWonPlayerName,jumpBallWonPersonId,jumpBallLostPlayerName,jumpBallLostPersonId,shotDistance,shotResult,shotActionNumber,reboundTotal,reboundDefensiveTotal,reboundOffensiveTotal,pointsTotal,assistPlayerNameInitial,assistPersonId,assistTotal,officialId,foulPersonalTotal,foulTechnicalTotal,foulDrawnPlayerName,foulDrawnPersonId,turnoverTotal,blockPlayerName,blockPersonId,stealPlayerName,stealPersonId,value,gameid,period_to_time
51,71,PT05M51.00S,2022-04-10T19:53:02.1Z,1,REGULAR,block,,[],203991,,,1610612745,13,20,2022-04-10T19:53:04Z,690000,,,0,,C. Capela BLOCK (1 BLK),[203991],1.610613e+09,ATL,,,,Capela,C. Capela,,,,,,,,,,,,,,,,,,,,,,,,,,0022101221,0
64,86,PT04M08.00S,2022-04-10T19:55:13.0Z,1,REGULAR,block,,[],1629726,,,1610612737,15,25,2022-04-10T19:55:17Z,840000,,,0,,G. Mathews BLOCK (1 BLK),[1629726],1.610613e+09,HOU,,,,Mathews,G. Mathews,,,,,,,,,,,,,,,,,,,,,,,,,,0022101221,0
115,144,PT00M02.10S,2022-04-10T20:05:35.0Z,1,REGULAR,block,,[],1628021,,,1610612737,24,35,2022-04-10T20:06:47Z,1430000,,,0,,D. Nwaba BLOCK (1 BLK),[1628021],1.610613e+09,HOU,,,,Nwaba,D. Nwaba,,,,,,,,,,,,,,,,,,,,,,,,,,0022101221,0
261,319,PT10M38.00S,2022-04-10T20:51:18.6Z,3,REGULAR,block,,[],1630256,,,1610612737,64,73,2022-04-10T20:51:21Z,3170000,,,0,,J. Tate BLOCK (1 BLK),[1630256],1.610613e+09,HOU,,,,Tate,J. Tate,,,,,,,,,,,,,,,,,,,,,,,,,,0022101221,24
274,333,PT09M37.00S,2022-04-10T20:52:46.7Z,3,REGULAR,block,,[],1630578,,,1610612737,66,73,2022-04-10T20:52:49Z,3310000,,,0,,A. Sengun BLOCK (1 BLK),[1630578],1.610613e+09,HOU,,,,Sengun,A. Sengun,,,,,,,,,,,,,,,,,,,,,,,,,,0022101221,24
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
691748,547,PT09M34.00S,2021-10-20T01:31:36.3Z,4,REGULAR,block,,[],1626192,,,1610612751,99,90,2021-10-20T01:31:39Z,5410000,,,0,,P. Connaughton BLOCK (1 BLK),[1626192],1.610613e+09,MIL,,,,Connaughton,P. Connaughton,,,,,,,,,,,,,,,,,,,,,,,,,,0022100001,36
691773,578,PT07M46.00S,2021-10-20T01:37:35.2Z,4,REGULAR,block,,[],1629670,,,1610612751,106,93,2021-10-20T01:37:38Z,5680000,,,0,,J. Nwora BLOCK (1 BLK),[1629670],1.610613e+09,MIL,,,,Nwora,J. Nwora,,,,,,,,,,,,,,,,,,,,,,,,,,0022100001,36
691786,595,PT06M45.00S,2021-10-20T01:40:09.9Z,4,REGULAR,block,,[],203507,,,1610612751,112,95,2021-10-20T01:40:13Z,5850000,,,0,,G. Antetokounmpo BLOCK (2 BLK),[203507],1.610613e+09,MIL,,,,Antetokounmpo,G. Antetokounmpo,,,,,,,,,,,,,,,,,,,,,,,,,,0022100001,36
691804,613,PT05M14.00S,2021-10-20T01:45:13.8Z,4,REGULAR,block,,[],201142,,,1610612749,112,95,2021-10-20T01:45:17Z,6030000,,,0,,K. Durant BLOCK (2 BLK),[201142],1.610613e+09,BKN,,,,Durant,K. Durant,,,,,,,,,,,,,,,,,,,,,,,,,,0022100001,36


In [31]:
#take shotDistance, actionType is 2pt and 3pt, shotResult is Missed or Made
all_actions = pd.DataFrame(final_df_copy.loc[:, ["shotDistance", "actionType", "shotResult"]])

In [32]:
all_actions

Unnamed: 0,shotDistance,actionType,shotResult
0,,period,
1,,jumpball,
2,22.72,3pt,Missed
3,,rebound,
4,26.48,3pt,Made
...,...,...,...
691870,,turnover,
691871,29.71,3pt,Missed
691872,,rebound,
691873,,period,


In [33]:
shots_df = all_actions[(all_actions.shotResult.isin(['Missed', 'Made'])) & (all_actions.actionType.isin(['2pt', '3pt']))]

In [34]:
shots_df

Unnamed: 0,shotDistance,actionType,shotResult
2,22.72,3pt,Missed
4,26.48,3pt,Made
5,18.69,2pt,Made
6,19.20,2pt,Missed
8,5.10,2pt,Missed
...,...,...,...
691862,6.99,2pt,Missed
691864,0.00,2pt,Missed
691867,1.31,2pt,Made
691868,26.60,3pt,Missed


In [35]:
shots_df.sort_index(axis = 1, inplace = True)
shots_df = shots_df.round()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  shots_df.sort_index(axis = 1, inplace = True)


In [36]:
#shots_df.shotResult.replace(['Missed', 0], ['Made', 1])
shots_df['shotResult'] = shots_df.shotResult.replace(['Missed','Made'], [0, 1])

In [37]:
shots_df.shotResult.value_counts()

0    116783
1     99930
Name: shotResult, dtype: int64

In [38]:
def percentcalc(x):
    for i in shots_df[shots_df.shotResult]:
        if shots_df[shots_df.shotResult == 'Made']:
            return 1
        else:
            return 0

In [39]:
# shots_df.map(percentcalc)

In [42]:
# shots_grouped.shotResult.sum()

## Analyzing what players has the most assits to a dunk 

In [None]:
#looking into what player has assisted the most dunks in the nba (or per team)
assists = final_df[(final_df.subType == 'DUNK') & (final_df.periodType == 'REGULAR')].loc[:,['assistPlayerNameInitial','playerNameI','teamTricode', 'teamId' ]]

In [None]:
#adding new col with combined player
assists = assists[pd.notnull(assists.assistPlayerNameInitial)]

In [None]:
assists['assistor-assiste'] = assists.assistPlayerNameInitial + '  to  ' + assists.playerNameI  

In [41]:
# assists.columns()

In [None]:
assistsfinal = assists.loc[: ,['teamTricode','assistor-assiste']].sort_values(by=['teamTricode'])

In [None]:
assists_grouped = assistsfinal.groupby(['teamTricode','assistor-assiste'])['assistor-assiste'].count()

In [None]:
assists_grouped

teamTricode  assistor-assiste              
ATL          B. Bogdanovic  to  C. Capela      10
             B. Bogdanovic  to  J. Collins      5
             B. Bogdanovic  to  K. Huerter      1
             B. Bogdanovic  to  N. Knight       3
             B. Bogdanovic  to  O. Okongwu      2
                                               ..
WAS          R. Westbrook  to  R. Hachimura    31
             R. Westbrook  to  R. Lopez        13
             R. Westbrook  to  T. Bryant        6
             T. Brown Jr.  to  R. Westbrook     1
             T. Brown Jr.  to  T. Bryant        1
Name: assistor-assiste, Length: 2248, dtype: int64