# Seasonal PPS Variation

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from nba_api.stats.endpoints.playerdashptshots import PlayerDashPtShots

import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

## Acquiring and Merging Data
Shooting data from all shots taken and all shots >= 10ft taken are to be merged below to create 3 distinct zones: 2 point shots < 10ft, 2 point shots >= 10ft, and 3 point shots.

In [28]:
def shooting_by_seasons(seasons, df_select):
    '''
    Parameters
    ----------
    seasons: List of seasons (strings)
    df_select: DataFrame to return (int)
    '''
    results = []
    for season in seasons:
        temp = PlayerDashPtShots(team_id=0, player_id=0, season=season).get_data_frames()[df_select]
        temp['season'] = season
        results.append(temp)
    
    df = pd.concat(results)
    return df

### Note:
Data is available starting from the 2013-14 season

In [56]:
# Get all available data
seasons = [
    '2013-14',
    '2014-15',
    '2015-16',
    '2016-17',
    '2017-18',
    '2018-19',
    '2019-20',
]

In [57]:
df_all = shooting_by_seasons(seasons, 4) # All shots
df_ten = shooting_by_seasons(seasons, 5) # >=10ft shots

In [58]:
# Rename columns and only keep relevent data
df_all.rename(
    columns={
        'PLAYER_ID': 'player_id',
        'PLAYER_NAME_LAST_FIRST': 'last_first',
        'CLOSE_DEF_DIST_RANGE': 'closest_def',
    },
    inplace=True
)

df1 = df_all[[
    'player_id',
    'last_first',
    'season',
    'GP',
    'closest_def',
    'FG2M',
    'FG2A',
    'FG3M',
    'FG3A'
]]

df_ten.rename(
    columns={
        'PLAYER_ID': 'player_id',
        'PLAYER_NAME_LAST_FIRST': 'last_first',
        'CLOSE_DEF_DIST_RANGE': 'closest_def',
        'FG2M': 'FG2>=10M',
        'FG2A': 'FG2>=10A',
    },
    inplace=True
)

df2 = df_ten[[
    'player_id',
    'last_first',
    'season',
    'GP',
    'closest_def',
    'FG2>=10M',
    'FG2>=10A',
]]

In [59]:
# Merge DataFrames
df = df1.merge(df2, on=['player_id', 'last_first', 'season', 'GP', 'closest_def'], how='left')

# Convert NaN shooting values to 0
df['FG2>=10M'] = np.where(df['FG2>=10M'].isna(), 0, df['FG2>=10M'])
df['FG2>=10A'] = np.where(df['FG2>=10A'].isna(), 0, df['FG2>=10A'])

In [60]:
df.isna().sum()

player_id      0
last_first     0
season         0
GP             0
closest_def    0
FG2M           0
FG2A           0
FG3M           0
FG3A           0
FG2>=10M       0
FG2>=10A       0
dtype: int64

In [61]:
# Engineer FG2<10 columns
df['FG2<10M'] = df['FG2M'] - df['FG2>=10M']
df['FG2<10A'] = df['FG2A'] - df['FG2>=10A']

In [62]:
df.drop(columns=['FG2M', 'FG2A'], inplace=True)

In [64]:
# Total players
len(df['last_first'].unique())

1084

## Feature Engineering

In [78]:
# Set GP (games played) type to int for groupby aggregate functions
df['GP'] = df['GP'].astype('int')

In [96]:
a = df.groupby('season')['FG3M', 'FG3A', 'FG2>=10M', 'FG2>=10A', 'FG2<10M', 'FG2<10A'].sum().reset_index()
b = df.groupby('season')['GP'].mean().reset_index()

# Make shooting percentages features for different shot zones
a['3PCT'] = a['FG3M'] / a['FG3A']
a['mid2PCT'] = a['FG2>=10M'] / a['FG2>=10A']
a['short2PCT'] = a['FG2<10M'] / a['FG2<10A']

# Add games played by season
a = a.merge(b, on='season')

# Make points per shot features
a['3s per game'] = a['FG3A'] / a['GP']
a['midrange per game'] = a['FG2>=10A'] / a['GP']
a['short 2s per game'] = a['FG2<10A'] / a['GP']
a['3s PPS'] = a['3PCT'] * 3
a['Short 2s PPS'] = a['short2PCT'] * 2
a['Midrange PPS'] = a['mid2PCT'] * 2


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



### Notes:
- A normal NBA season has a total of 1230 games played.  Data is not available for games played in arenas without SportsVue cameras.
- Starting in the 2018-19 season, the shot clock after an offensive rebound resulted in a 14 second shot clock instead of a 24 second shot clock.  This may have lead to more "desperation" shots.

In [97]:
a

Unnamed: 0,season,FG3M,FG3A,FG2>=10M,FG2>=10A,FG2<10M,FG2<10A,3PCT,mid2PCT,short2PCT,GP,3s per game,midrange per game,short 2s per game,3s PPS,Short 2s PPS,Midrange PPS
0,2013-14,18962,52346,24150,59244,49281,80193,0.362244,0.407636,0.61453,1226,42.696574,48.323002,65.410277,1.086731,1.22906,0.815272
1,2014-15,19247,54588,23768,58168,49023,80493,0.352587,0.40861,0.609034,1227,44.488998,47.406683,65.601467,1.05776,1.218069,0.817219
2,2015-16,20914,58751,23183,56308,49795,80430,0.355977,0.411718,0.61911,1228,47.842834,45.85342,65.496743,1.067931,1.23822,0.823435
3,2016-17,23656,65747,21678,52033,50338,79900,0.359803,0.41662,0.630013,1226,53.627243,42.441272,65.171289,1.07941,1.260025,0.83324
4,2017-18,25585,70209,19505,46934,51546,81049,0.364412,0.415584,0.635986,1222,57.454173,38.407529,66.324877,1.093236,1.271971,0.831167
5,2018-19,27822,78395,17583,43194,55116,96689,0.354895,0.40707,0.570034,1226,63.943719,35.231648,78.865416,1.064685,1.140068,0.814141
6,2019-20,25671,71711,14140,34174,46046,80560,0.357979,0.413765,0.571574,1053,68.101614,32.453941,76.505223,1.073936,1.143148,0.82753


In [98]:
season = a['season']
three_atts = a['3s per game']
three_pct = a['3PCT']
three_pps = a['3s PPS']
mid_atts = a['midrange per game']
mid_pct = a['mid2PCT']
mid_pps = a['Midrange PPS']
short_atts = a['short 2s per game']
short_pct = a['short2PCT']
short_pps = a['Short 2s PPS']

In [99]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=season, y=three_atts, name='3pt A/G'))
fig.add_trace(go.Scatter(x=season, y=mid_atts, name='Midrange A/G'))
fig.add_trace(go.Scatter(x=season, y=short_atts, name='Short 2s A/G'))
fig.show()

In [100]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=season, y=three_pct, name='3pt%'))
fig.add_trace(go.Scatter(x=season, y=mid_pct, name='Midrange%'))
fig.add_trace(go.Scatter(x=season, y=short_pct, name='Short 2s%'))
fig.show()

In [101]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=season, y=three_pps, name='3s PPS'))
fig.add_trace(go.Scatter(x=season, y=mid_pps, name='Midrange PPS'))
fig.add_trace(go.Scatter(x=season, y=short_pps, name='Short 2s PPS'))
fig.show()

In [None]:
def plot_player(df, name):
    temp = df.loc[df['last_first'] == name]
    