# Analyzing a player's shot selection after going on a shooting streak.

In [27]:
# import packages
from nba_api.stats.endpoints import shotchartdetail
import json
import requests
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import math

# Grab player and team ids
# Load teams file
teams = json.loads(requests.get('https://raw.githubusercontent.com/bttmly/nba/master/data/teams.json').text)
# Load players file
players = json.loads(requests.get('https://lifeislinear.davidson.edu/sports/player_dicts.json').text)

### ID helper functions

In [2]:
# Get team ID based on team name
def get_team_id(teamInput):
    for team in teams:
        if team['teamName'] == teamInput:
            return team['teamId']
    return -1
# Get player ID based on player name
def get_player_id(first, last):
    for player in players:
        if player['firstName'] == first and player['lastName'] == last:
            return player['playerId']
    return -1
# Get Name based on player id
def get_player_name(playerId):
    for player in players:
        if player['playerId']==playerId:
            return player['firstName'], player['lastName']
    return -1

### Shooting percentage helper functions.

In [3]:
# Shooting percentage in each zone
def shooting_perc(group):
    made = group['SHOT_MADE_FLAG'].sum()
    attempted = group['SHOT_ATTEMPTED_FLAG'].sum()
    return round((made / attempted) * 100, 2)

# The percentage of total shots from each zone
def shot_perc(group, total):
    attempted = group['SHOT_ATTEMPTED_FLAG'].sum() # shots attempted in the zone
    return round((attempted / total) * 100, 2)

### Shot streak helper functions.

In [4]:
def find_hh(data, hh_thresh, shot_type=-1):
    # Data - the data frame to filter
    # player_id - the player we are investigating
    # hh_thresh - how many mae shots in a row to qualify for a "hot hand"

    if shot_type == 2:
        data = data[data["SHOT_TYPE"] == "2PT Field Goal"]
    elif shot_type == 3:
        data = data[data["SHOT_TYPE"] == "3PT Field Goal"]
    
    made_count = 0
    for index, row in data.iterrows():
        if row["SHOT_MADE_FLAG"] == 1:
            made_count += 1
        else:
            made_count = 0

        if made_count < hh_thresh:
            data.at[index, 'HOT_HAND'] = 0
        else:
            data.at[index, 'HOT_HAND'] = 1

    return data

def heat_check(data):
    # A function to find the misses following a hot hand
    
    # index -1 was throwing errors, so to get the previous hot hand shots we will use shift
    temp = data
    temp['p_hh'] = data["HOT_HAND"].shift(1).fillna(0)
    data['HEAT_CHECK'] = ((data['SHOT_MADE_FLAG'] == 0) & (temp['p_hh'] == 1)).astype(int)
    
    return data

### Load the data

In [5]:
# Create JSON request
ahead_json = shotchartdetail.ShotChartDetail(
            # team_id = get_team_id('Los Angeles Lakers'),
            team_id = 0,
            player_id = get_player_id('LeBron', 'James'),
            context_measure_simple = 'FGA',
            season_nullable = '2022-23',
            season_type_all_star = ['Regular Season', 'Playoffs'],
            ahead_behind_nullable = 'Ahead or Tied')

# Load JSON data into dictionary
ahead_data = json.loads(ahead_json.get_json())

behind_json = shotchartdetail.ShotChartDetail(
            # team_id = get_team_id('Los Angeles Lakers'),
            team_id = 0,
            player_id = get_player_id('LeBron', 'James'),
            context_measure_simple = 'FGA',
            season_nullable = '2022-23',
            season_type_all_star = ['Regular Season', 'Playoffs'],
            ahead_behind_nullable = 'Behind or Tied')

behind_data = json.loads(behind_json.get_json())

# Convert the data into a data frame.
ahead_df = pd.concat(
    pd.DataFrame(shot, columns=ahead_data['resultSets'][0]['headers'])
    for shot in [ahead_data['resultSets'][0]['rowSet']]
)
behind_df = pd.concat(
    pd.DataFrame(shot, columns=behind_data['resultSets'][0]['headers'])
    for shot in [behind_data['resultSets'][0]['rowSet']]
)

### Preparing the data helper function.

In [6]:
STREAK_THRESH = 2

def prepare_data(df):
    # Find the player's shooting percentage per zone.
    df['ZONE_SPECIFIC'] = df['SHOT_ZONE_AREA'] + ' ' + df['SHOT_ZONE_RANGE']
    shooting_zones = df.groupby('ZONE_SPECIFIC')
    shooting_zones = shooting_zones.apply(shooting_perc)

    # Find the shot attempts when a player is on a streak.
    df = find_hh(df, STREAK_THRESH)
    
    # Find the player's misses after a streak. Then find the misses per zone.
    df = heat_check(df)

    # Keep only 3 point shots
    df = df[df['SHOT_TYPE'] == '3PT Field Goal']
    
    heat_check_zones = df.groupby('ZONE_SPECIFIC').sum()['HEAT_CHECK']

    # Convert the Series into data frames, merge, and then return the resulting analyis.
    shooting_zones = pd.DataFrame(shooting_zones, columns=['SHOOTING_PERCENTAGE'])
    heat_check_zones = pd.DataFrame(heat_check_zones)

    # Do we need to find the percentage of total shots taken in each zone?

    return pd.merge(shooting_zones, heat_check_zones, on='ZONE_SPECIFIC'), df



### A function to find the exact distance of a shot. This function returns the avg. shot distance of a player, and then the shot distance after he goes on a streak.

In [21]:
def shot_distance(data):
    hh_data = data[(data['HOT_HAND'] == 1) | (data['HEAT_CHECK'] == 1)]
    no_streak = data[(data['HOT_HAND'] == 0) & data['HEAT_CHECK'] == 0]

    return no_streak['SHOT_DISTANCE'].mean(), hh_data['SHOT_DISTANCE'].mean(), np.var(no_streak['SHOT_DISTANCE']), np.var(hh_data['SHOT_DISTANCE'])

### Iterate over the top 3 point shooters over the past years.

In [28]:
seasons = ['2023-24', '2022-23', '2021-22', '2020-21', '2019-20']
poi = ['LeBron James', 'Stephen Curry', 'Kevin Durant', 'Jordan Poole', 'Klay Thompson',
       'Damian Lillard', 'Trae Young', 'Jayson Tatum', 'Fred VanVleet', 'Zach LaVine']

player_frames = {}

for p_name in poi:
    player_ahead = pd.DataFrame()
    player_behind = pd.DataFrame()
    p = p_name.split()
    
    for season in seasons:
        ahead_json = shotchartdetail.ShotChartDetail(
            # team_id = get_team_id('Los Angeles Lakers'),
            team_id = 0,
            player_id = get_player_id(p[0], p[1]),
            context_measure_simple = 'FGA',
            season_nullable = season,
            season_type_all_star = ['Regular Season', 'Playoffs'],
            ahead_behind_nullable = 'Ahead or Tied')

        # Load JSON data into dictionary
        ahead_data = json.loads(ahead_json.get_json())

        behind_json = shotchartdetail.ShotChartDetail(
                    # team_id = get_team_id('Los Angeles Lakers'),
                    team_id = 0,
                    player_id = get_player_id(p[0], p[1]),
                    context_measure_simple = 'FGA',
                    season_nullable = season,
                    season_type_all_star = ['Regular Season', 'Playoffs'],
                    ahead_behind_nullable = 'Behind or Tied')

        behind_data = json.loads(behind_json.get_json())

        # Convert the data into a data frame.
        ahead_df = pd.concat(
            pd.DataFrame(shot, columns=ahead_data['resultSets'][0]['headers'])
            for shot in [ahead_data['resultSets'][0]['rowSet']]
        )
        behind_df = pd.concat(
            pd.DataFrame(shot, columns=behind_data['resultSets'][0]['headers'])
            for shot in [behind_data['resultSets'][0]['rowSet']]
        )
        player_ahead = pd.concat([player_ahead, ahead_df], ignore_index=True)
        player_behind = pd.concat([player_behind, behind_df], ignore_index=True)

    ahead_analysis, ahead_df = prepare_data(player_ahead)
    behind_analysis, behind_df = prepare_data(player_behind)

    player_analysis = pd.merge(ahead_analysis, behind_analysis, on='ZONE_SPECIFIC', suffixes=('_AHEAD', '_BEHIND'))
    player_frames[p_name] = player_analysis

    # Find the average shot distance, then the average shot distance from a hot hand streak
    avg_shot_dist_a, hh_dist_a, no_streak_var_a, hh_var_a = shot_distance(ahead_df)
    avg_shot_dist_b, hh_dist_b, no_streak_var_b, hh_var_b = shot_distance(behind_df)
    avg_shot_dist = round((avg_shot_dist_a + avg_shot_dist_b) / 2, 2)
    hh_dist = round((hh_dist_a + hh_dist_b) / 2, 2)
    no_streak_var = round((no_streak_var_a + no_streak_var_b) / 2, 2)
    hh_var = round((hh_var_a + hh_var_b) / 2, 2)


    print(p_name)
    print(f'Average shot distance (Not on a streak): {avg_shot_dist}')
    print(f'Variance: {no_streak_var}')
    print(f'Average streak shot distance: {hh_dist}')
    print(f'Variance: {hh_var}')
    
    print()
        

LeBron James
Average shot distance (Not on a streak): 25.88
Variance: 4.16
Average streak shot distance: 26.15
Variance: 4.93

Stephen Curry
Average shot distance (Not on a streak): 26.64
Variance: 11.23
Average streak shot distance: 26.94
Variance: 11.94

Kevin Durant
Average shot distance (Not on a streak): 25.24
Variance: 1.87
Average streak shot distance: 25.34
Variance: 2.25

Jordan Poole
Average shot distance (Not on a streak): 26.32
Variance: 11.71
Average streak shot distance: 26.51
Variance: 9.98

Klay Thompson
Average shot distance (Not on a streak): 25.47
Variance: 5.2
Average streak shot distance: 25.54
Variance: 2.73

Damian Lillard
Average shot distance (Not on a streak): 27.08
Variance: 12.64
Average streak shot distance: 27.14
Variance: 7.49

Trae Young
Average shot distance (Not on a streak): 27.2
Variance: 11.46
Average streak shot distance: 27.24
Variance: 9.32

Jayson Tatum
Average shot distance (Not on a streak): 25.48
Variance: 5.49
Average streak shot distance: 2