# Experimental Metric: Shot Selection Intelligence (CraftedNBA Style)

"Good shots for them" - measuring whether players take more shots from zones where they excel relative to league average.

In [34]:
import pandas as pd
import numpy as np

# Load the shot data
df = pd.read_csv('../data/processed/shot_data_bbref_2024.csv', skiprows=1)

# Rename columns for clarity
df.columns = [
    'Rk', 'Player', 'Age', 'Team', 'Pos', 'G', 'GS', 'MP', 'FG_pct', 'Avg_Dist',
    # FGA Frequency by distance
    'FGA_freq_2P', 'FGA_freq_0_3', 'FGA_freq_3_10', 'FGA_freq_10_16', 'FGA_freq_16_3P', 'FGA_freq_3P',
    # FG% by distance  
    'FG_pct_2P', 'FG_pct_0_3', 'FG_pct_3_10', 'FG_pct_10_16', 'FG_pct_16_3P', 'FG_pct_3P',
    # Other stats
    'Ast_pct_2P', 'Ast_pct_3P', 'Dunk_freq', 'Dunk_made', 'Corner3_freq', 'Corner3_pct', 
    'Heave_att', 'Heave_made', 'Awards', 'bbref_id'
]

# Convert to numeric
numeric_cols = [col for col in df.columns if col not in ['Player', 'Team', 'Pos', 'Awards', 'bbref_id']]
for col in numeric_cols:
    df[col] = pd.to_numeric(df[col], errors='coerce')

print(f"Loaded {len(df)} players")

Loaded 735 players


In [35]:
# Calculate league averages for each zone (both efficiency and frequency)
league_avg_efficiency = {
    'FG_pct_0_3': df['FG_pct_0_3'].mean(),
    'FG_pct_3_10': df['FG_pct_3_10'].mean(),
    'FG_pct_10_16': df['FG_pct_10_16'].mean(),
    'FG_pct_16_3P': df['FG_pct_16_3P'].mean(),
    'FG_pct_3P': df['FG_pct_3P'].mean()
}

league_avg_frequency = {
    'FGA_freq_0_3': df['FGA_freq_0_3'].mean(),
    'FGA_freq_3_10': df['FGA_freq_3_10'].mean(),
    'FGA_freq_10_16': df['FGA_freq_10_16'].mean(),
    'FGA_freq_16_3P': df['FGA_freq_16_3P'].mean(),
    'FGA_freq_3P': df['FGA_freq_3P'].mean()
}

print("League averages:")
print("Efficiency by zone:")
for zone, avg in league_avg_efficiency.items():
    print(f"  {zone}: {avg:.3f}")
print("\nFrequency by zone:")
for zone, avg in league_avg_frequency.items():
    print(f"  {zone}: {avg:.3f}")

def calculate_shot_quality_score(player_data):
    """
    Calculate CraftedNBA-style shot quality score: 
    "Good shots for them" - compares player's efficiency advantage/disadvantage 
    to their shot frequency relative to league average.
    """
    # Define shooting zones with point values
    zones = [
        ('0-3 ft', 'FGA_freq_0_3', 'FG_pct_0_3', 2),
        ('3-10 ft', 'FGA_freq_3_10', 'FG_pct_3_10', 2),
        ('10-16 ft', 'FGA_freq_10_16', 'FG_pct_10_16', 2),
        ('16ft-3P', 'FGA_freq_16_3P', 'FG_pct_16_3P', 2),
        ('3P', 'FGA_freq_3P', 'FG_pct_3P', 3)
    ]
    
    total_score = 0
    zone_scores = {}
    
    for zone_name, freq_col, eff_col, points in zones:
        player_freq = player_data[freq_col]
        player_eff = player_data[eff_col]
        league_freq = league_avg_frequency[freq_col]
        league_eff = league_avg_efficiency[eff_col]
        
        # Only include zones with valid data
        if pd.notna(player_freq) and pd.notna(player_eff) and pd.notna(league_freq) and pd.notna(league_eff):
            # Calculate points per shot for player and league
            player_pps = player_eff * points
            league_pps = league_eff * points
            
            # Efficiency advantage (how much better/worse than league)
            efficiency_advantage = player_pps - league_pps
            
            # Frequency difference (how much more/less they shoot from this zone)
            freq_difference = player_freq - league_freq
            
            # Shot quality score for this zone
            zone_score = efficiency_advantage * freq_difference
            zone_scores[zone_name] = {
                'efficiency_advantage': efficiency_advantage,
                'freq_difference': freq_difference,
                'zone_score': zone_score,
                'player_pps': player_pps,
                'league_pps': league_pps,
                'player_freq': player_freq,
                'league_freq': league_freq
            }
            
            total_score += zone_score
    
    return total_score, zone_scores

# Test with Stephen Curry
curry = df[df['Player'] == 'Stephen Curry']
if curry.empty:
    curry = df[df['Player'].str.contains('Curry', na=False)]

if not curry.empty:
    curry_data = curry.iloc[0]
    curry_score, curry_zones = calculate_shot_quality_score(curry_data)
    
    print(f"\nStephen Curry Shot Quality Score (CraftedNBA Style): {curry_score:.4f}")
    
    print(f"\nCurry's Zone-by-Zone Breakdown:")
    print(f"{'Zone':<10} {'Player PPS':<11} {'League PPS':<11} {'Advantage':<10} {'Freq Diff':<10} {'Zone Score':<12}")
    print("-" * 75)
    
    for zone_name, data in curry_zones.items():
        print(f"{zone_name:<10} {data['player_pps']:<11.3f} {data['league_pps']:<11.3f} "
              f"{data['efficiency_advantage']:<10.3f} {data['freq_difference']:<10.3f} {data['zone_score']:<12.4f}")
    
    print(f"\nInterpretation:")
    print(f"- Positive zone scores = Taking more good shots 'for him'")
    print(f"- Negative zone scores = Taking more bad shots 'for him'")
    print(f"- Total score = {curry_score:.4f} (higher is better shot selection)")
        
else:
    print("Stephen Curry not found in dataset")

League averages:
Efficiency by zone:
  FG_pct_0_3: 0.675
  FG_pct_3_10: 0.436
  FG_pct_10_16: 0.385
  FG_pct_16_3P: 0.357
  FG_pct_3P: 0.316

Frequency by zone:
  FGA_freq_0_3: 0.247
  FGA_freq_3_10: 0.216
  FGA_freq_10_16: 0.080
  FGA_freq_16_3P: 0.038
  FGA_freq_3P: 0.419

Stephen Curry Shot Quality Score (CraftedNBA Style): 0.0506

Curry's Zone-by-Zone Breakdown:
Zone       Player PPS  League PPS  Advantage  Freq Diff  Zone Score  
---------------------------------------------------------------------------
0-3 ft     1.346       1.351       -0.005     -0.164     0.0008      
3-10 ft    0.976       0.871       0.105      -0.045     -0.0047     
10-16 ft   0.910       0.769       0.141      -0.010     -0.0015     
16ft-3P    1.134       0.713       0.421      0.015      0.0062      
3P         1.191       0.947       0.244      0.204      0.0498      

Interpretation:
- Positive zone scores = Taking more good shots 'for him'
- Negative zone scores = Taking more bad shots 'for him'
- T

In [36]:
# Calculate shot quality scores for all players
df['Shot_Quality_Score'] = df.apply(lambda row: calculate_shot_quality_score(row)[0], axis=1)

# Remove players with insufficient data
valid_scores = df.dropna(subset=['Shot_Quality_Score'])

print(f"Calculated shot quality scores for {len(valid_scores)} players")
print(f"Mean score: {valid_scores['Shot_Quality_Score'].mean():.4f}")
print(f"Std score: {valid_scores['Shot_Quality_Score'].std():.4f}")

# Calculate Curry's percentile
if not curry.empty:
    curry_percentile = (valid_scores['Shot_Quality_Score'] < curry_score).mean() * 100
    print(f"\nCurry's Shot Quality Score: {curry_score:.4f}")
    print(f"Curry's Percentile: {curry_percentile:.1f}th")

# Top and bottom shot selection
print("\nBest Shot Selection (Highest Scores - Taking Good Shots 'For Them'):")
top_selection = valid_scores.nlargest(10, 'Shot_Quality_Score')[['Player', 'Team', 'Shot_Quality_Score']]
display(top_selection)

print("\nWorst Shot Selection (Lowest Scores - Taking Bad Shots 'For Them'):")
bottom_selection = valid_scores.nsmallest(10, 'Shot_Quality_Score')[['Player', 'Team', 'Shot_Quality_Score']]
display(bottom_selection)

Calculated shot quality scores for 735 players
Mean score: 0.0378
Std score: 0.1477

Curry's Shot Quality Score: 0.0506
Curry's Percentile: 71.0th

Best Shot Selection (Highest Scores - Taking Good Shots 'For Them'):


Unnamed: 0,Player,Team,Shot_Quality_Score
654,Patrick Baldwin Jr.,LAC,1.192885
549,Jalen Hood-Schifino,LAL,1.131804
484,Reece Beekman,GSW,0.885605
694,PJ Dozier,MIN,0.612462
308,Jaxson Hayes,LAL,0.543216
42,Jarrett Allen,CLE,0.540171
585,Bones Hyland,MIN,0.516328
355,Adem Bona,PHI,0.508713
337,Trayce Jackson-Davis,GSW,0.50318
487,Kai Jones,LAC,0.502305



Worst Shot Selection (Lowest Scores - Taking Bad Shots 'For Them'):


Unnamed: 0,Player,Team,Shot_Quality_Score
729,Terry Taylor,SAC,-0.683339
733,Zyon Pullin,MEM,-0.683339
718,Tristen Newton,MIN,-0.570462
609,Cole Swider,DET,-0.550128
719,Daishen Nix,MIN,-0.550128
727,Riley Minix,SAS,-0.550128
541,Maxwell Lewis,LAL,-0.502385
725,Terence Davis,SAC,-0.399363
710,Malevy Leons,OKC,-0.324497
723,Mac McClung,ORL,-0.324497
