# Acquiring Data and Feature Engineering

In [55]:
import numpy as np
import pandas as pd
import pickle
import json

import plotly

from nba_api.stats.endpoints.playerdashptshots import PlayerDashPtShots
from nba_api.stats.endpoints.playerprofilev2 import PlayerProfileV2
from nba_api.stats.endpoints.shotchartdetail import ShotChartDetail
from nba_api.stats.endpoints.leaguedashplayershotlocations import LeagueDashPlayerShotLocations
from nba_api.stats.endpoints.shotchartdetail import ShotChartDetail
from nba_api.stats.static import players, teams

In [12]:
pd.options.display.max_columns = None

## Notes on Data from NBA API
- **Closest Defender Shooting Data:**
    - Is available starting the 2013-14 season
    - Only includes data in arenas with SportVu cameras

In [2]:
# Seasons available with closest defender data
seasons = [
    '2013-14',
    '2014-15',
    '2015-16',
    '2016-17',
    '2017-18',
    '2018-19',
    '2019-20',
]

In [5]:
def shooting_by_seasons(seasons, df_select):
    '''
    Function to get closest defender shooting data by season
    ----------
    Parameters
    ----------
    seasons: List of seasons (strings)
    df_select: DataFrame to return (int)
    '''
    results = []
    for season in seasons:
        temp = PlayerDashPtShots(team_id=0, player_id=0, season=season).get_data_frames()[df_select]
        temp['season'] = season
        results.append(temp)
    
    df = pd.concat(results)
    return df

In [6]:
overall = shooting_by_seasons(seasons, 4)
tenplus = shooting_by_seasons(seasons, 5)

In [59]:
# Get active players
players = players.get_players()
players = [ player for player in players if player['is_active'] == True ]

In [90]:
def player_shooting(player_id, season):
    '''
    Function to get player shooting data by season
    ----------
    Parameters
    ----------
    player_id: player id (int) of player to evaluate
    season: player shooting data by season (string) in 'xxxx-xx' format (ex. '2013-14')
    '''
    response = ShotChartDetail(
        team_id=0,
        player_id=player_id,
        context_measure_simple='FGA',
        season_nullable=season,
        season_type_all_star='Regular Season'
    )
    temp = response.get_data_frames()[0]
    
    if len(temp) == 0:
        return None
    
    else:
        temp['season'] = season
        return temp

def player_shot_chart(df):
    return df.groupby(['SHOT_ZONE_AREA', 'SHOT_ZONE_RANGE'])['SHOT_ATTEMPTED_FLAG', 'SHOT_MADE_FLAG'].sum()

In [91]:
players[1]

{'id': 1628389,
 'full_name': 'Bam Adebayo',
 'first_name': 'Bam',
 'last_name': 'Adebayo',
 'is_active': True}

In [92]:
temp = [ player_shooting(1628389, season) for season in seasons ]

In [94]:
temp[-1]

Unnamed: 0,GRID_TYPE,GAME_ID,GAME_EVENT_ID,PLAYER_ID,PLAYER_NAME,TEAM_ID,TEAM_NAME,PERIOD,MINUTES_REMAINING,SECONDS_REMAINING,EVENT_TYPE,ACTION_TYPE,SHOT_TYPE,SHOT_ZONE_BASIC,SHOT_ZONE_AREA,SHOT_ZONE_RANGE,SHOT_DISTANCE,LOC_X,LOC_Y,SHOT_ATTEMPTED_FLAG,SHOT_MADE_FLAG,GAME_DATE,HTM,VTM,season
0,Shot Chart Detail,0021900007,11,1628389,Bam Adebayo,1610612748,Miami Heat,1,11,5,Made Shot,Running Alley Oop Dunk Shot,2PT Field Goal,Restricted Area,Center(C),Less Than 8 ft.,1,-8,10,1,1,20191023,MIA,MEM,2019-20
1,Shot Chart Detail,0021900007,21,1628389,Bam Adebayo,1610612748,Miami Heat,1,10,9,Missed Shot,Jump Shot,2PT Field Goal,Mid-Range,Center(C),16-24 ft.,17,-36,168,1,0,20191023,MIA,MEM,2019-20
2,Shot Chart Detail,0021900007,47,1628389,Bam Adebayo,1610612748,Miami Heat,1,8,14,Missed Shot,Alley Oop Dunk Shot,2PT Field Goal,Restricted Area,Center(C),Less Than 8 ft.,2,-5,20,1,0,20191023,MIA,MEM,2019-20
3,Shot Chart Detail,0021900007,220,1628389,Bam Adebayo,1610612748,Miami Heat,2,9,42,Made Shot,Cutting Dunk Shot,2PT Field Goal,Restricted Area,Center(C),Less Than 8 ft.,1,2,14,1,1,20191023,MIA,MEM,2019-20
4,Shot Chart Detail,0021900007,283,1628389,Bam Adebayo,1610612748,Miami Heat,2,5,44,Missed Shot,Tip Dunk Shot,2PT Field Goal,Restricted Area,Center(C),Less Than 8 ft.,0,0,-6,1,0,20191023,MIA,MEM,2019-20
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
785,Shot Chart Detail,0021901306,20,1628389,Bam Adebayo,1610612748,Miami Heat,1,10,4,Missed Shot,Jump Shot,2PT Field Goal,Mid-Range,Center(C),16-24 ft.,18,-48,174,1,0,20200812,OKC,MIA,2019-20
786,Shot Chart Detail,0021901306,34,1628389,Bam Adebayo,1610612748,Miami Heat,1,9,11,Made Shot,Cutting Finger Roll Layup Shot,2PT Field Goal,Restricted Area,Center(C),Less Than 8 ft.,1,11,8,1,1,20200812,OKC,MIA,2019-20
787,Shot Chart Detail,0021901306,59,1628389,Bam Adebayo,1610612748,Miami Heat,1,6,51,Made Shot,Alley Oop Dunk Shot,2PT Field Goal,Restricted Area,Center(C),Less Than 8 ft.,2,-6,21,1,1,20200812,OKC,MIA,2019-20
788,Shot Chart Detail,0021901306,238,1628389,Bam Adebayo,1610612748,Miami Heat,2,6,30,Made Shot,Dunk Shot,2PT Field Goal,Restricted Area,Center(C),Less Than 8 ft.,2,-20,14,1,1,20200812,OKC,MIA,2019-20


In [73]:
jh.groupby(['SHOT_ZONE_BASIC'])['SHOT_MADE_FLAG'].mean()

SHOT_ZONE_BASIC
Above the Break 3        0.369427
Backcourt                0.000000
In The Paint (Non-RA)    0.448399
Left Corner 3            0.257143
Mid-Range                0.419753
Restricted Area          0.587669
Right Corner 3           0.420000
Name: SHOT_MADE_FLAG, dtype: float64