## NBA API

### Description
- Ingest data via nba_api
- reformat data
- push data to Supabase
- This notebook will is meant to explore the nba_api package and test push some data to Supabase
- Future python files will be designed to package up consistent data pulls from the api and updates to Supabase tables
- Once data is consistently landed, I will work on building some ML models and Neural Networks to add Data Science flair to the project
- Final step will be packaging it up in a simple web app to allow others to interact

### Import Packaages

In [1]:
## import packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import nba_api
from nba_api.stats.endpoints import commonplayerinfo
from nba_api.stats.static import players
from nba_api.stats.library.parameters import SeasonAll
from nba_api.stats.static import teams as static_teams
from nba_api.stats.static import players as static_players
from nba_api.stats.endpoints import leaguegamefinder
from nba_api.stats.endpoints import leaguedashplayerstats
import time
import requests
import json
import seaborn as sns
import sys
import os
from supabase import create_client, Client
from dotenv import load_dotenv

load_dotenv()

True

### Test pull to grab recent ATL games

In [2]:

atl = 1610612737  # Atlanta Hawks team_id
games = leaguegamefinder.LeagueGameFinder(team_id_nullable=atl).get_data_frames()[0]
games.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,22025,1610612737,ATL,Atlanta Hawks,22500327,2025-12-03,ATL vs. LAC,L,241,92,...,0.786,7,31,38,20,9.0,1,16,17,-23.0
1,22025,1610612737,ATL,Atlanta Hawks,22500309,2025-12-01,ATL @ DET,L,241,98,...,0.727,7,27,34,27,15.0,4,13,16,-1.0
2,22025,1610612737,ATL,Atlanta Hawks,22500304,2025-11-30,ATL @ PHI,W,288,142,...,0.756,14,44,58,36,10.0,6,21,27,8.0
3,22025,1610612737,ATL,Atlanta Hawks,22500069,2025-11-28,ATL vs. CLE,W,242,130,...,0.692,6,32,38,36,11.0,5,12,14,7.0
4,22025,1610612737,ATL,Atlanta Hawks,22500057,2025-11-25,ATL @ WAS,L,241,113,...,0.75,14,30,44,33,4.0,6,18,19,-19.0


In [3]:
games.columns

Index(['SEASON_ID', 'TEAM_ID', 'TEAM_ABBREVIATION', 'TEAM_NAME', 'GAME_ID',
       'GAME_DATE', 'MATCHUP', 'WL', 'MIN', 'PTS', 'FGM', 'FGA', 'FG_PCT',
       'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB',
       'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PLUS_MINUS'],
      dtype='object')

### Lets do some broader things to get data
- first use the static databases to get team and player IDs (thesse can be dim tables)
- then lets flush out some broader functionality with usefful endpoints

#### Grab team IDs

In [51]:
# All teams (list[dict])
teams_list = static_teams.get_teams()
teams_df = pd.DataFrame(teams_list).sort_values("full_name")
teams_df["date_time_processed"] = pd.Timestamp.utcnow()
teams_df["date_time_processed"] = teams_df["date_time_processed"].astype(str)
teams_df.head()

Unnamed: 0,id,full_name,abbreviation,nickname,city,state,year_founded,date_time_processed
0,1610612737,Atlanta Hawks,ATL,Hawks,Atlanta,Georgia,1949,2025-12-05 01:59:00.683822+00:00
1,1610612738,Boston Celtics,BOS,Celtics,Boston,Massachusetts,1946,2025-12-05 01:59:00.683822+00:00
14,1610612751,Brooklyn Nets,BKN,Nets,Brooklyn,New York,1976,2025-12-05 01:59:00.683822+00:00
29,1610612766,Charlotte Hornets,CHA,Hornets,Charlotte,North Carolina,1988,2025-12-05 01:59:00.683822+00:00
4,1610612741,Chicago Bulls,CHI,Bulls,Chicago,Illinois,1966,2025-12-05 01:59:00.683822+00:00


In [52]:
def get_team_ids_df():
    teams_list = static_teams.get_teams()
    teams_df = pd.DataFrame(teams_list).sort_values("full_name")
    teams_df.rename(columns={'id': 'team_id'}, inplace=True)
    ## add a date time processed column
    teams_df["date_time_processed"] = pd.Timestamp.utcnow()
    teams_df["date_time_processed"] = teams_df["date_time_processed"].astype(str)
    return teams_df

In [53]:
teams_df = get_team_ids_df()
teams_df.head()

Unnamed: 0,team_id,full_name,abbreviation,nickname,city,state,year_founded,date_time_processed
0,1610612737,Atlanta Hawks,ATL,Hawks,Atlanta,Georgia,1949,2025-12-05 01:59:15.764059+00:00
1,1610612738,Boston Celtics,BOS,Celtics,Boston,Massachusetts,1946,2025-12-05 01:59:15.764059+00:00
14,1610612751,Brooklyn Nets,BKN,Nets,Brooklyn,New York,1976,2025-12-05 01:59:15.764059+00:00
29,1610612766,Charlotte Hornets,CHA,Hornets,Charlotte,North Carolina,1988,2025-12-05 01:59:15.764059+00:00
4,1610612741,Chicago Bulls,CHI,Bulls,Chicago,Illinois,1966,2025-12-05 01:59:15.764059+00:00


In [54]:
teams_df.shape

(30, 8)

#### Create some helper dictionaries 

In [55]:
# Helper maps
TEAM_BY_ABBR = {t["abbreviation"]: t for t in teams_list}
TEAM_BY_ID   = {t["id"]: t for t in teams_list}

# Example lookups
TEAM_BY_ABBR["ATL"]["id"]         # -> 1610612737
TEAM_BY_ID[1610612737]["full_name"]  # -> "Atlanta Hawks"


'Atlanta Hawks'

In [56]:
def get_team_id():
    teams_list = static_teams.get_teams()
    TEAM_BY_ID   = {t["id"]: t for t in teams_list}
    return TEAM_BY_ID

In [57]:
team_id_dicts = get_team_id()

#### Grab Player IDs

In [58]:
# All players (current + historical)
players_list = static_players.get_players()
players_df = pd.DataFrame(players_list)
players_df.head()

Unnamed: 0,id,full_name,first_name,last_name,is_active
0,76001,Alaa Abdelnaby,Alaa,Abdelnaby,False
1,76002,Zaid Abdul-Aziz,Zaid,Abdul-Aziz,False
2,76003,Kareem Abdul-Jabbar,Kareem,Abdul-Jabbar,False
3,51,Mahmoud Abdul-Rauf,Mahmoud,Abdul-Rauf,False
4,1505,Tariq Abdul-Wahad,Tariq,Abdul-Wahad,False


In [59]:
# Find by name (returns list; pick the first if unique)
static_players.find_players_by_full_name("LeBron James")

[{'id': 2544,
  'full_name': 'LeBron James',
  'first_name': 'LeBron',
  'last_name': 'James',
  'is_active': True}]

In [60]:
def get_player_df():
    players_list = static_players.get_players()
    players_df = pd.DataFrame(players_list)
    players_df.rename(columns={'id': 'player_id'}, inplace=True)
    ## add a date time processed column
    players_df["date_time_processed"] = pd.Timestamp.utcnow()
    players_df["date_time_processed"] = players_df["date_time_processed"].astype(str)
    return players_df

In [61]:
players_df = get_player_df()
players_df.head()

Unnamed: 0,player_id,full_name,first_name,last_name,is_active,date_time_processed
0,76001,Alaa Abdelnaby,Alaa,Abdelnaby,False,2025-12-05 01:59:35.006494+00:00
1,76002,Zaid Abdul-Aziz,Zaid,Abdul-Aziz,False,2025-12-05 01:59:35.006494+00:00
2,76003,Kareem Abdul-Jabbar,Kareem,Abdul-Jabbar,False,2025-12-05 01:59:35.006494+00:00
3,51,Mahmoud Abdul-Rauf,Mahmoud,Abdul-Rauf,False,2025-12-05 01:59:35.006494+00:00
4,1505,Tariq Abdul-Wahad,Tariq,Abdul-Wahad,False,2025-12-05 01:59:35.006494+00:00


#### Lets look at the game logs endpoint

In [62]:
## here is one call to get all atl games

atl = 1610612737  # Atlanta Hawks team_id
games = leaguegamefinder.LeagueGameFinder(team_id_nullable=atl).get_data_frames()[0]

games

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,22025,1610612737,ATL,Atlanta Hawks,0022500327,2025-12-03,ATL vs. LAC,L,241,92,...,0.786,7,31,38,20,9.0,1,16,17,-23.0
1,22025,1610612737,ATL,Atlanta Hawks,0022500309,2025-12-01,ATL @ DET,L,241,98,...,0.727,7,27,34,27,15.0,4,13,16,-1.0
2,22025,1610612737,ATL,Atlanta Hawks,0022500304,2025-11-30,ATL @ PHI,W,288,142,...,0.756,14,44,58,36,10.0,6,21,27,8.0
3,22025,1610612737,ATL,Atlanta Hawks,0022500069,2025-11-28,ATL vs. CLE,W,242,130,...,0.692,6,32,38,36,11.0,5,12,14,7.0
4,22025,1610612737,ATL,Atlanta Hawks,0022500057,2025-11-25,ATL @ WAS,L,241,113,...,0.750,14,30,44,33,4.0,6,18,19,-19.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3817,21983,1610612737,ATL,Atlanta Hawks,0028300041,1983-11-04,ATL vs. CHI,W,240,103,...,0.692,19,27,46,31,14.0,13,18,27,
3818,21983,1610612737,ATL,Atlanta Hawks,0028300027,1983-11-01,ATL vs. WAS,W,240,95,...,0.633,12,29,41,20,7.0,10,16,34,
3819,21983,1610612737,ATL,Atlanta Hawks,0028300014,1983-10-29,ATL vs. DET,W,240,117,...,0.633,27,21,48,28,14.0,7,23,35,
3820,21983,1610612737,ATL,Atlanta Hawks,0028300005,1983-10-28,ATL @ NJN,L,240,108,...,0.684,15,18,33,14,10.0,5,17,30,


In [66]:
team_id_list = [t for t in teams_df['team_id']]

In [None]:
team_games = [] 
for t in teams_df['team_id']:
    games_df = leaguegamefinder.LeagueGameFinder(team_id_nullable=t).get_data_frames()[0]
    # add a date time processed column
    games_df["date_time_processed"] = pd.Timestamp.utcnow()
    games_df["date_time_processed"] = games_df["date_time_processed"].astype(str)
    team_games.append(games_df)

team_games_df = pd.concat(team_games)

#### Make sure we got all the data

In [71]:
team_games_df['TEAM_NAME'].value_counts()

TEAM_NAME
Los Angeles Lakers                   4098
Boston Celtics                       4046
San Antonio Spurs                    3988
Utah Jazz                            3922
Chicago Bulls                        3893
Houston Rockets                      3889
Dallas Mavericks                     3888
Detroit Pistons                      3884
Phoenix Suns                         3877
Indiana Pacers                       3876
Portland Trail Blazers               3856
Cleveland Cavaliers                  3850
Philadelphia 76ers                   3850
Golden State Warriors                3849
Denver Nuggets                       3843
New York Knicks                      3839
Milwaukee Bucks                      3833
Atlanta Hawks                        3822
Sacramento Kings                     3557
Miami Heat                           3552
Orlando Magic                        3245
Minnesota Timberwolves               3216
Toronto Raptors                      2753
Los Angeles Clippers    

In [72]:
team_games_df.columns

Index(['SEASON_ID', 'TEAM_ID', 'TEAM_ABBREVIATION', 'TEAM_NAME', 'GAME_ID',
       'GAME_DATE', 'MATCHUP', 'WL', 'MIN', 'PTS', 'FGM', 'FGA', 'FG_PCT',
       'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB',
       'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PLUS_MINUS',
       'date_time_processed'],
      dtype='object')

In [73]:
# Step 1: rename columns to match Supabase schema
fact_team_games_df = team_games_df.rename(columns={
    "SEASON_ID": "season_id",
    "TEAM_ID": "team_id",
    "GAME_ID": "game_id",
    "GAME_DATE": "game_date",
    "MATCHUP": "matchup",
    "WL": "wl",
    "PTS": "pts",
    "FG_PCT": "fg_pct",
    "FG3_PCT": "fg3_pct",
    "FT_PCT": "ft_pct",
    "REB": "reb",
    "AST": "ast",
    "STL": "stl",
    "BLK": "blk",
    "TOV": "tov",
    "PLUS_MINUS": "plus_minus",
    "DATE_TIME_PROCESSED": "date_time_processed"
})

# Step 2: define the exact column order your Supabase table uses
supabase_col_order = [
    "game_id",
    "team_id",
    "game_date",
    "matchup",
    "wl",
    "pts",
    "plus_minus",
    "season_id",
    "fg_pct",
    "fg3_pct",
    "ft_pct",
    "reb",
    "ast",
    "stl",
    "blk",
    "tov",
    "date_time_processed"
]

# Step 3: restrict to those columns and reorder
fact_team_games_df = fact_team_games_df[supabase_col_order]

# Step 4: convert dates to ISO format (Supabase/Postgres friendly)
fact_team_games_df["game_date"] = (
    fact_team_games_df["game_date"].astype("datetime64[ns]").dt.date
)

fact_team_games_df.head()


Unnamed: 0,game_id,team_id,game_date,matchup,wl,pts,plus_minus,season_id,fg_pct,fg3_pct,ft_pct,reb,ast,stl,blk,tov,date_time_processed
0,22500327,1610612737,2025-12-03,ATL vs. LAC,L,92,-23.0,22025,0.4,0.333,0.786,38.0,20,9.0,1,16,2025-12-05 01:46:00.605828+00:00
1,22500309,1610612737,2025-12-01,ATL @ DET,L,98,-1.0,22025,0.427,0.35,0.727,34.0,27,15.0,4,13,2025-12-05 01:46:00.605828+00:00
2,22500304,1610612737,2025-11-30,ATL @ PHI,W,142,8.0,22025,0.462,0.333,0.756,58.0,36,10.0,6,21,2025-12-05 01:46:00.605828+00:00
3,22500069,1610612737,2025-11-28,ATL vs. CLE,W,130,7.0,22025,0.527,0.372,0.692,38.0,36,11.0,5,12,2025-12-05 01:46:00.605828+00:00
4,22500057,1610612737,2025-11-25,ATL @ WAS,L,113,-19.0,22025,0.453,0.349,0.75,44.0,33,4.0,6,18,2025-12-05 01:46:00.605828+00:00


In [74]:
# Ensure game_date is a datetime type
fact_team_games_df["game_date"] = pd.to_datetime(fact_team_games_df["game_date"]).dt.date

# Define cutoff
cutoff_date = pd.to_datetime("2025-12-03").date()

# Filter rows
fact_team_games_df = fact_team_games_df[fact_team_games_df["game_date"] < cutoff_date]

fact_team_games_df.head()

Unnamed: 0,game_id,team_id,game_date,matchup,wl,pts,plus_minus,season_id,fg_pct,fg3_pct,ft_pct,reb,ast,stl,blk,tov,date_time_processed
1,22500309,1610612737,2025-12-01,ATL @ DET,L,98,-1.0,22025,0.427,0.35,0.727,34.0,27,15.0,4,13,2025-12-05 01:46:00.605828+00:00
2,22500304,1610612737,2025-11-30,ATL @ PHI,W,142,8.0,22025,0.462,0.333,0.756,58.0,36,10.0,6,21,2025-12-05 01:46:00.605828+00:00
3,22500069,1610612737,2025-11-28,ATL vs. CLE,W,130,7.0,22025,0.527,0.372,0.692,38.0,36,11.0,5,12,2025-12-05 01:46:00.605828+00:00
4,22500057,1610612737,2025-11-25,ATL @ WAS,L,113,-19.0,22025,0.453,0.349,0.75,44.0,33,4.0,6,18,2025-12-05 01:46:00.605828+00:00
5,22500276,1610612737,2025-11-23,ATL vs. CHA,W,113,3.0,22025,0.495,0.355,0.75,37.0,32,6.0,5,7,2025-12-05 01:46:00.605828+00:00


In [75]:
# 1) Ensure datetime, then convert to ISO string
fact_team_games_df["game_date"] = (
    pd.to_datetime(fact_team_games_df["game_date"])
      .dt.strftime("%Y-%m-%d")   # now plain strings like "2025-11-12"
)

# 2) (Recommended) Replace NaN/NaT with None so JSON can handle them
fact_team_games_df = fact_team_games_df.replace({np.nan: None})

In [76]:
## drop duplicates in the raw data
fact_team_games_df = fact_team_games_df.drop_duplicates(
    subset=["game_id", "team_id"],
    keep="first"
).reset_index(drop=True)

In [77]:
fact_team_games_df.shape

(109692, 17)

#### Grab player season stats

In [None]:

SEASONS = ["2020-21","2021-22","2022-23","2023-24", "2024-25"]
player_season_dfs = []

for SEASON in SEASONS:
    print(f"Processing season: {SEASON}")
    player_season_df = leaguedashplayerstats.LeagueDashPlayerStats(
        season=SEASON,
        season_type_all_star="Regular Season",   # or "Playoffs"
        per_mode_detailed="PerGame",            # "Totals", "Per36", "Per100Possessions", etc.
        measure_type_detailed_defense="Base",       # "Base", "Advanced", "Misc", "Scoring", etc.
    ).get_data_frames()[0]
    print(f"Season {SEASON} data shape: {player_season_df.shape}")
    player_season_df['season'] = SEASON
    # add date time processed column
    player_season_df['date_time_processed'] = pd.Timestamp.utcnow()
    player_season_df["date_time_processed"] = player_season_df["date_time_processed"].astype(str)
    
    player_season_dfs.append(player_season_df)

player_season_df_compiled = pd.concat(player_season_dfs)

player_season_df_compiled.head()
player_season_df_compiled.columns


Processing season: 2020-21
Season 2020-21 data shape: (540, 67)
Processing season: 2021-22
Season 2021-22 data shape: (605, 67)
Processing season: 2022-23
Season 2022-23 data shape: (539, 67)
Processing season: 2023-24
Season 2023-24 data shape: (572, 67)
Processing season: 2024-25
Season 2024-25 data shape: (569, 67)


Index(['PLAYER_ID', 'PLAYER_NAME', 'NICKNAME', 'TEAM_ID', 'TEAM_ABBREVIATION',
       'AGE', 'GP', 'W', 'L', 'W_PCT', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M',
       'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST',
       'TOV', 'STL', 'BLK', 'BLKA', 'PF', 'PFD', 'PTS', 'PLUS_MINUS',
       'NBA_FANTASY_PTS', 'DD2', 'TD3', 'WNBA_FANTASY_PTS', 'GP_RANK',
       'W_RANK', 'L_RANK', 'W_PCT_RANK', 'MIN_RANK', 'FGM_RANK', 'FGA_RANK',
       'FG_PCT_RANK', 'FG3M_RANK', 'FG3A_RANK', 'FG3_PCT_RANK', 'FTM_RANK',
       'FTA_RANK', 'FT_PCT_RANK', 'OREB_RANK', 'DREB_RANK', 'REB_RANK',
       'AST_RANK', 'TOV_RANK', 'STL_RANK', 'BLK_RANK', 'BLKA_RANK', 'PF_RANK',
       'PFD_RANK', 'PTS_RANK', 'PLUS_MINUS_RANK', 'NBA_FANTASY_PTS_RANK',
       'DD2_RANK', 'TD3_RANK', 'WNBA_FANTASY_PTS_RANK', 'TEAM_COUNT', 'season',
       'date_time_processed'],
      dtype='object')

In [79]:
player_season_df_compiled.shape

(2825, 69)

In [80]:
player_season_df_compiled.head()

Unnamed: 0,player_id,player_name,nickname,team_id,team_abbreviation,age,gp,w,l,w_pct,...,pfd_rank,pts_rank,plus_minus_rank,nba_fantasy_pts_rank,dd2_rank,td3_rank,wnba_fantasy_pts_rank,team_count,season,date_time_processed
0,203932,Aaron Gordon,Aaron,1610612743,DEN,25.0,50,29,21,0.58,...,79,136,139,115,115,17,117,2,2020-21,2025-12-05 01:49:48.082823+00:00
1,1628988,Aaron Holiday,Aaron,1610612754,IND,24.0,66,30,36,0.455,...,283,276,228,338,178,29,328,1,2020-21,2025-12-05 01:49:48.082823+00:00
2,1630174,Aaron Nesmith,Aaron,1610612738,BOS,21.0,46,22,24,0.478,...,411,382,250,399,245,29,388,1,2020-21,2025-12-05 01:49:48.082823+00:00
3,1627846,Abdel Nader,Abdel,1610612756,PHX,27.0,24,16,8,0.667,...,216,302,143,348,245,29,347,1,2020-21,2025-12-05 01:49:48.082823+00:00
4,1629690,Adam Mokoka,Adam,1610612741,CHI,22.0,14,3,11,0.214,...,524,518,280,525,245,29,526,1,2020-21,2025-12-05 01:49:48.082823+00:00


In [95]:
for c in player_season_df_compiled.columns:
    print(f"The data type for column, {c} is {player_season_df_compiled[c].dtype}")

The data type for column, player_id is int64
The data type for column, player_name is object
The data type for column, nickname is object
The data type for column, team_id is int64
The data type for column, team_abbreviation is object
The data type for column, age is float64
The data type for column, gp is int64
The data type for column, w is int64
The data type for column, l is int64
The data type for column, w_pct is float64
The data type for column, min is float64
The data type for column, fgm is float64
The data type for column, fga is float64
The data type for column, fg_pct is float64
The data type for column, fg3m is float64
The data type for column, fg3a is float64
The data type for column, fg3_pct is float64
The data type for column, ftm is float64
The data type for column, fta is float64
The data type for column, ft_pct is float64
The data type for column, oreb is float64
The data type for column, dreb is float64
The data type for column, reb is float64
The data type for colu

In [None]:
player_season_df_compiled["PLAYER_NAME"] = player_season_df_compiled["PLAYER_NAME"].astype(str)
player_season_df_compiled["NICKNAME"] = player_season_df_compiled["NICKNAME"].astype(str)
player_season_df_compiled["TEAM_ABBREVIATION"] = player_season_df_compiled["TEAM_ABBREVIATION"].astype(str)


In [36]:
rename_map = {
    "PLAYER_ID": "player_id",
    "PLAYER_NAME": "player_name",
    "NICKNAME": "nickname",
    "TEAM_ID": "team_id",
    "TEAM_ABBREVIATION": "team_abbreviation",
    "AGE": "age",
    "GP": "gp",
    "W": "w",
    "L": "l",
    "W_PCT": "w_pct",
    "MIN": "min",
    "FGM": "fgm",
    "FGA": "fga",
    "FG_PCT": "fg_pct",
    "FG3M": "fg3m",
    "FG3A": "fg3a",
    "FG3_PCT": "fg3_pct",
    "FTM": "ftm",
    "FTA": "fta",
    "FT_PCT": "ft_pct",
    "OREB": "oreb",
    "DREB": "dreb",
    "REB": "reb",
    "AST": "ast",
    "TOV": "tov",
    "STL": "stl",
    "BLK": "blk",
    "BLKA": "blka",
    "PF": "pf",
    "PFD": "pfd",
    "PTS": "pts",
    "PLUS_MINUS": "plus_minus",
    "NBA_FANTASY_PTS": "nba_fantasy_pts",
    "DD2": "dd2",
    "TD3": "td3",
    "WNBA_FANTASY_PTS": "wnba_fantasy_pts",
    "GP_RANK": "gp_rank",
    "W_RANK": "w_rank",
    "L_RANK": "l_rank",
    "W_PCT_RANK": "w_pct_rank",
    "MIN_RANK": "min_rank",
    "FGM_RANK": "fgm_rank",
    "FGA_RANK": "fga_rank",
    "FG_PCT_RANK": "fg_pct_rank",
    "FG3M_RANK": "fg3m_rank",
    "FG3A_RANK": "fg3a_rank",
    "FG3_PCT_RANK": "fg3_pct_rank",
    "FTM_RANK": "ftm_rank",
    "FTA_RANK": "fta_rank",
    "FT_PCT_RANK": "ft_pct_rank",
    "OREB_RANK": "oreb_rank",
    "DREB_RANK": "dreb_rank",
    "REB_RANK": "reb_rank",
    "AST_RANK": "ast_rank",
    "TOV_RANK": "tov_rank",
    "STL_RANK": "stl_rank",
    "BLK_RANK": "blk_rank",
    "BLKA_RANK": "blka_rank",
    "PF_RANK": "pf_rank",
    "PFD_RANK": "pfd_rank",
    "PTS_RANK": "pts_rank",
    "PLUS_MINUS_RANK": "plus_minus_rank",
    "NBA_FANTASY_PTS_RANK": "nba_fantasy_pts_rank",
    "DD2_RANK": "dd2_rank",
    "TD3_RANK": "td3_rank",
    "WNBA_FANTASY_PTS_RANK": "wnba_fantasy_pts_rank",
    "TEAM_COUNT": "team_count",
    "SEASON": "season",
    "DATE_TIME_PROCESSED": "date_time_processed"
}

player_season_df_compiled = player_season_df_compiled.rename(columns=rename_map)

#### Test write to Supabase

#### Connect to Supabase

In [37]:
SUPABASE_URL = os.environ.get("SUPABASE_URL")
SUPABASE_KEY = os.environ.get("SUPABASE_SERVICE_KEY")  # or anon key for de

In [38]:
supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)

In [39]:
def upsert_in_chunks(table_name, records, chunk_size=500, conflict_cols=None):
    for i in range(0, len(records), chunk_size):
        chunk = records[i:i+chunk_size]
        query = supabase.table(table_name)
        if conflict_cols:
            query = query.upsert(chunk, on_conflict=",".join(conflict_cols))
        else:
            query = query.insert(chunk)
        resp = query.execute()
        # Optional: print or check resp
        print(f"{table_name}: upserted rows {i}–{i+len(chunk)-1}")

#### Convert dataframes to json

In [84]:
dim_teams_records = teams_df.to_dict(orient="records")

In [85]:
dim_players_records = players_df.to_dict(orient="records")

In [86]:
fact_team_games_records = fact_team_games_df.to_dict(orient="records")

In [87]:
fact_player_season_records = player_season_df_compiled.to_dict(orient="records")

In [88]:
player_season_df_compiled.columns

Index(['player_id', 'player_name', 'nickname', 'team_id', 'team_abbreviation',
       'age', 'gp', 'w', 'l', 'w_pct', 'min', 'fgm', 'fga', 'fg_pct', 'fg3m',
       'fg3a', 'fg3_pct', 'ftm', 'fta', 'ft_pct', 'oreb', 'dreb', 'reb', 'ast',
       'tov', 'stl', 'blk', 'blka', 'pf', 'pfd', 'pts', 'plus_minus',
       'nba_fantasy_pts', 'dd2', 'td3', 'wnba_fantasy_pts', 'gp_rank',
       'w_rank', 'l_rank', 'w_pct_rank', 'min_rank', 'fgm_rank', 'fga_rank',
       'fg_pct_rank', 'fg3m_rank', 'fg3a_rank', 'fg3_pct_rank', 'ftm_rank',
       'fta_rank', 'ft_pct_rank', 'oreb_rank', 'dreb_rank', 'reb_rank',
       'ast_rank', 'tov_rank', 'stl_rank', 'blk_rank', 'blka_rank', 'pf_rank',
       'pfd_rank', 'pts_rank', 'plus_minus_rank', 'nba_fantasy_pts_rank',
       'dd2_rank', 'td3_rank', 'wnba_fantasy_pts_rank', 'team_count', 'season',
       'date_time_processed'],
      dtype='object')

In [90]:
# dim_teams
upsert_in_chunks("dim_teams", dim_teams_records, chunk_size=200, conflict_cols=["team_id"])


dim_teams: upserted rows 0–29


In [91]:
# dim_teams
upsert_in_chunks("dim_players", dim_players_records, chunk_size=200, conflict_cols=["player_id"])


dim_players: upserted rows 0–199
dim_players: upserted rows 200–399
dim_players: upserted rows 400–599
dim_players: upserted rows 600–799
dim_players: upserted rows 800–999
dim_players: upserted rows 1000–1199
dim_players: upserted rows 1200–1399
dim_players: upserted rows 1400–1599
dim_players: upserted rows 1600–1799
dim_players: upserted rows 1800–1999
dim_players: upserted rows 2000–2199
dim_players: upserted rows 2200–2399
dim_players: upserted rows 2400–2599
dim_players: upserted rows 2600–2799
dim_players: upserted rows 2800–2999
dim_players: upserted rows 3000–3199
dim_players: upserted rows 3200–3399
dim_players: upserted rows 3400–3599
dim_players: upserted rows 3600–3799
dim_players: upserted rows 3800–3999
dim_players: upserted rows 4000–4199
dim_players: upserted rows 4200–4399
dim_players: upserted rows 4400–4599
dim_players: upserted rows 4600–4799
dim_players: upserted rows 4800–4999
dim_players: upserted rows 5000–5102


In [93]:
# fact_team_games
upsert_in_chunks(
    "fact_team_games",
    fact_team_games_records,
    chunk_size=400,
    conflict_cols=["game_id", "team_id"],
)

fact_team_games: upserted rows 0–399
fact_team_games: upserted rows 400–799
fact_team_games: upserted rows 800–1199
fact_team_games: upserted rows 1200–1599
fact_team_games: upserted rows 1600–1999
fact_team_games: upserted rows 2000–2399
fact_team_games: upserted rows 2400–2799
fact_team_games: upserted rows 2800–3199
fact_team_games: upserted rows 3200–3599
fact_team_games: upserted rows 3600–3999
fact_team_games: upserted rows 4000–4399
fact_team_games: upserted rows 4400–4799
fact_team_games: upserted rows 4800–5199
fact_team_games: upserted rows 5200–5599
fact_team_games: upserted rows 5600–5999
fact_team_games: upserted rows 6000–6399
fact_team_games: upserted rows 6400–6799
fact_team_games: upserted rows 6800–7199
fact_team_games: upserted rows 7200–7599
fact_team_games: upserted rows 7600–7999
fact_team_games: upserted rows 8000–8399
fact_team_games: upserted rows 8400–8799
fact_team_games: upserted rows 8800–9199
fact_team_games: upserted rows 9200–9599
fact_team_games: upsert

In [98]:
# fact_player_season_stats
upsert_in_chunks(
    "fact_player_season_stats",
    fact_player_season_records,
    chunk_size=500,
    conflict_cols=["player_id", "season"],
)

fact_player_season_stats: upserted rows 0–499
fact_player_season_stats: upserted rows 500–999
fact_player_season_stats: upserted rows 1000–1499
fact_player_season_stats: upserted rows 1500–1999
fact_player_season_stats: upserted rows 2000–2499
fact_player_season_stats: upserted rows 2500–2824
