# Fetching and Preparing Data from stats.nba.com 

In [1]:
from nba_api.stats.static import players
from nba_api.stats.endpoints import playercareerstats

In [2]:
active_2021_22 = players.get_active_players()
print(f"type: {type(active_2021_22)}\tlength: {len(active_2021_22)}")

type: <class 'list'>	length: 587


In [3]:
def get_career_stats(player_id: str, get_request: bool = True) -> dict:
    career_stats = playercareerstats.PlayerCareerStats(
        player_id=player_id,
        per_mode36="Per36",
        get_request=get_request,
    )
    return career_stats.get_normalized_json()

In [4]:
import json

# achiuwa
player = active_2021_22[0]["id"]
career = json.loads(get_career_stats(player))
print(f"type: {type(career)}\tlength: {len(career)}")

type: <class 'dict'>	length: 12


In [6]:
# steven adams
player1 = active_2021_22[1]
print(player1["full_name"])
career1 = json.loads(get_career_stats(player1["id"]))

print(f"type: {type(career1)}\tlength: {len(career1)}\tkeys: {career1.keys()}")

Steven Adams
type: <class 'dict'>	length: 12	keys: dict_keys(['SeasonTotalsRegularSeason', 'CareerTotalsRegularSeason', 'SeasonTotalsPostSeason', 'CareerTotalsPostSeason', 'SeasonTotalsAllStarSeason', 'CareerTotalsAllStarSeason', 'SeasonTotalsCollegeSeason', 'CareerTotalsCollegeSeason', 'SeasonTotalsShowcaseSeason', 'CareerTotalsShowcaseSeason', 'SeasonRankingsRegularSeason', 'SeasonRankingsPostSeason'])


Globbing the career stats to a single file for pickling/

In [8]:
import pickle
from pathlib import Path

data_dir = Path("/home/klang/project-weekly/frontend/data/")
fname = "bar.pkl"
archive = {}
careers = [career, career1]
archive = {
    dataset["SeasonTotalsRegularSeason"][0]["PLAYER_ID"]: dataset for dataset in careers
}
with open(data_dir / fname, "wb") as f_out:
    pickle.dump(archive, f_out)

with open(data_dir / fname, "rb") as f_in:
    arch_read = pickle.load(f_in)

print(f"type: {type(arch_read)}\tlen: {len(arch_read)}\tkeys: {arch_read.keys()})")

type: <class 'dict'>	len: 2	keys: dict_keys([1630173, 203500]))


In [21]:
def get_jsons(career):
    # extract the summary for the latest season
    post_dict = [
        season
        for season in career["SeasonTotalsPostSeason"]
        if season["SEASON_ID"] == "2021-22"
    ][0]
    reg_dict = [
        season
        for season in career["SeasonTotalsRegularSeason"]
        if season["SEASON_ID"] == "2021-22"
    ][0]
    return (reg_dict, post_dict)

In [22]:
reg_dict, post_dict = get_jsons(arch_read[1630173])

In [23]:
post_dict

{'PLAYER_ID': 1630173,
 'SEASON_ID': '2021-22',
 'LEAGUE_ID': '00',
 'TEAM_ID': 1610612761,
 'TEAM_ABBREVIATION': 'TOR',
 'PLAYER_AGE': 22.0,
 'GP': 6,
 'GS': 1,
 'MIN': 167.0,
 'FGM': 5.4,
 'FGA': 11.2,
 'FG_PCT': 0.481,
 'FG3M': 1.1,
 'FG3A': 3.5,
 'FG3_PCT': 0.313,
 'FTM': 1.3,
 'FTA': 2.2,
 'FT_PCT': 0.6,
 'OREB': 1.7,
 'DREB': 4.5,
 'REB': 6.3,
 'AST': 1.3,
 'STL': 0.2,
 'BLK': 1.1,
 'TOV': 1.9,
 'PF': 3.0,
 'PTS': 13.2}

In [17]:
for stats in arch_read:
    print(stats)
    reg, post = get_jsons(arch_read[stats])

1630173
203500


Proportionally fold in the post season games into the regular season stats, but increase the weights of the post season? Adds an arbitrary coefficient though.

In [18]:
meta_cols = [
    "PLAYER_ID",
    "SEASON_ID",
    "LEAGUE_ID",
    "TEAM_ID",
    "TEAM_ABBREVIATION",
    "PLAYER_AGE",
]
stat_cols = [
    "FGM",
    "FGA",
    "FG_PCT",
    "FG3M",
    "FG3A",
    "FG3_PCT",
    "FTM",
    "FTA",
    "FT_PCT",
    "OREB",
    "DREB",
    "REB",
    "AST",
    "STL",
    "BLK",
    "TOV",
    "PF",
    "PTS",
]


def fold_post_stats(
    reg_season: dict, post_season: dict, merge_stats: list, post_wt: float = 2.0
) -> dict:
    """Merge stats proportionally via games played"""
    merged = {}
    gp_tot = reg_season["GP"] + post_wt * post_season["GP"]
    for stat in merge_stats:
        merged[stat] = (
            reg_season["GP"] / gp_tot * reg_season[stat]
            + post_wt * post_season["GP"] / gp_tot * post_season[stat]
        )

    merged["PLAYER_ID"] = reg_season["PLAYER_ID"]
    return merged

In [19]:
merged = fold_post_stats(reg_season=reg, post_season=post, merge_stats=stat_cols)
merged

{'FGM': 3.6444444444444444,
 'FGA': 6.8533333333333335,
 'FG_PCT': 0.5286444444444445,
 'FG3M': 0.0,
 'FG3A': 0.0,
 'FG3_PCT': 0.0,
 'FTM': 1.9,
 'FTA': 3.5844444444444443,
 'FT_PCT': 0.5433111111111111,
 'OREB': 6.05111111111111,
 'DREB': 7.7266666666666675,
 'REB': 13.777777777777779,
 'AST': 4.615555555555555,
 'STL': 1.0599999999999998,
 'BLK': 0.9755555555555556,
 'TOV': 1.9755555555555557,
 'PF': 2.9555555555555553,
 'PTS': 9.204444444444444,
 'PLAYER_ID': 203500}

In [20]:
reg_season = {"PLAYER_ID": 123, "GP": 30, "FGM": 1, "FGA": 10}
post_season = {"PLAYER_ID": 121, "GP": 10, "FGM": 9, "FGA": 10}
rows = []
rows.append(reg_season)
rows.append(post_season)

import pandas as pd

df = pd.DataFrame.from_dict(rows, orient="columns").set_index("PLAYER_ID")
df.head()

Unnamed: 0_level_0,GP,FGM,FGA
PLAYER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
123,30,1,10
121,10,9,10
