In [40]:
from nba_api.stats.static import players
from nba_api.stats.endpoints import PlayerGameLog
import pandas as pd
import nba_api.stats.endpoints
import time
import random

In [13]:
draft_df = nba_api.stats.endpoints.drafthistory.DraftHistory().get_data_frames()[0]

In [None]:
draft_2003 = draft_df[draft_df["SEASON"] == "2003"]
draft_2003

In [None]:
players_name_id = draft_2003[["PLAYER_NAME", "PERSON_ID"]]

In [41]:
players_10_games = []
for player_id, player_name in zip(players_name_id["PERSON_ID"], players_name_id["PLAYER_NAME"]):
	game_log = PlayerGameLog(player_id=player_id, season="2003-04", season_type_all_star="Regular Season")
	df = game_log.get_data_frames()[0]
	df["GAME_DATE"] = pd.to_datetime(df["GAME_DATE"])
	df_sorted = df.sort_values("GAME_DATE")
	first_10_games = df_sorted.head(10)
	if not first_10_games.empty:	# Ensuring they have played at least a game
		player_stats = {
			"player_name": player_name,
			"player_id": player_id,
			"fta": first_10_games["FTA"].sum(),
			"ftm": first_10_games["FTM"].sum(),
			"ft_pct": first_10_games["FTM"].sum() / first_10_games["FTA"].sum() if first_10_games["FTA"].sum() > 0 else 0
		}
		players_10_games.append(player_stats)
	time.sleep(random.uniform(0.5, 1.5))  # To avoid hitting rate limits
df_players_10_games = pd.DataFrame(players_10_games)

In [None]:
# removing players who didnt shoot free throws in their first 10 games
# since they dont provide meaningful data for free throw percentage
df_players_10_games = df_players_10_games[df_players_10_games["fta"] > 0]
df_players_10_games

Unnamed: 0,player_name,player_id,fta,ftm,ft_pct
0,LeBron James,2544,38,23,0.605263
1,Darko Milicic,2545,3,1,0.333333
2,Carmelo Anthony,2546,60,43,0.716667
3,Chris Bosh,2547,27,23,0.851852
4,Dwyane Wade,2548,36,24,0.666667
5,Chris Kaman,2549,17,12,0.705882
6,Kirk Hinrich,2550,22,15,0.681818
7,T.J. Ford,2551,17,15,0.882353
8,Michael Sweetney,2552,6,5,0.833333
9,Jarvis Hayes,2553,11,9,0.818182


Because each player is a binomial and their free throw variance is different we need to apply shrinkage differently to each player.

Reason for applying different shrinkages is beacause if:
Player A:
- has shot 200FT

Player B:
- has shot 20FT

it is not accurate to apply the same shrinkage to both of them

shrinkage equation

$c = 1 - \frac{(k - 3)\sigma^2}{\sum (y - \bar{y})^2}$

$z = \bar{y} + c (y - \bar{y})$

where:
- z is the JS adjusted average
- $\bar{y}$ is the "grand" average


In [47]:
import numpy as np

In [61]:
# applying james stein
# lets try with changing shrinkage c
grand_average = df_players_10_games["ft_pct"].mean()
k = len(df_players_10_games)

def js_avg(df):
    # free throws are a binomial process
	variance = df["ft_pct"] * (1-df["ft_pct"]) / df["fta"]
	# variance = np.var(df["ft_pct"], ddof=1)
	shrinkage_c = 1 - ((k-3)*variance) / sum((df["ft_pct"] - grand_average)**2)
	df["variance"] = variance
	df["shrinkage_c"] = shrinkage_c
	df["js_ft_pct"] = grand_average + shrinkage_c * (df["ft_pct"] - grand_average)

	return df

df_players_10_games_js = js_avg(df_players_10_games)

In [62]:
df_players_10_games_js

Unnamed: 0,player_name,player_id,fta,ftm,ft_pct,shrinkage_c,js_ft_pct,variance
0,LeBron James,2544,38,23,0.605263,0.855504,0.619637,0.006287
1,Darko Milicic,2545,3,1,0.333333,-0.702367,0.965606,0.074074
2,Carmelo Anthony,2546,60,43,0.716667,0.922223,0.715739,0.003384
3,Chris Bosh,2547,27,23,0.851852,0.89258,0.836049,0.004674
4,Dwyane Wade,2548,36,24,0.666667,0.858136,0.672068,0.006173
5,Chris Kaman,2549,17,12,0.705882,0.719333,0.705562,0.012212
6,Kirk Hinrich,2550,22,15,0.681818,0.773375,0.687013,0.009861
7,T.J. Ford,2551,17,15,0.882353,0.859666,0.857428,0.006106
8,Michael Sweetney,2552,6,5,0.833333,0.46801,0.764924,0.023148
9,Jarvis Hayes,2553,11,9,0.818182,0.6892,0.782924,0.013524


In [57]:
grand_average

np.float64(0.7047410108871327)

In [63]:
k

38

In [None]:
# lets try transforming the data a lil
