In [1]:
from nba_api.stats.endpoints import ShotChartDetail
import pandas as pd

In [2]:
# Define the last completed season
season = "2023-24"  # Adjust if needed

# Fetch all shot data (League-wide)
shot_data = ShotChartDetail(
    player_id=0,  # 0 means all players
    team_id=0,  # 0 means all teams
    season_nullable=season,
    context_measure_simple="FGA",  # Field Goal Attempts
)

# Convert to DataFrame
df = shot_data.get_data_frames()[0]

# Save to CSV
df.to_csv("nba_shot_data_2023_24.csv", index=False)

In [3]:
import pandas as pd
from nba_api.stats.endpoints import LeagueGameFinder, PlayByPlayV2
import time

# Get all playoff games for a specific season
season = "2023-24"  # Change to desired season
gamefinder = LeagueGameFinder(season_type_nullable="Playoffs", league_id_nullable="00")
games = gamefinder.get_data_frames()[0]

# Extract game IDs
game_ids = games["GAME_ID"].tolist()

# Function to fetch play-by-play data in batches
def fetch_play_by_play_data_in_batches(game_ids, batch_size=10, output_file="nba_playoffs_play_by_play.csv"):
    play_by_play_data = []
    total_games = len(game_ids)

    # Loop through the game IDs in batches
    for i in range(0, total_games, batch_size):
        print(f"Processing batch {i // batch_size + 1} of {total_games // batch_size + 1}...")
        batch_game_ids = game_ids[i:i + batch_size]

        # Fetch play-by-play data for each game in the batch
        for game_id in batch_game_ids:
            try:
                pbp = PlayByPlayV2(game_id=game_id)
                df = pbp.get_data_frames()[0]
                df["GAME_ID"] = game_id  # Keep track of which game each row belongs to
                play_by_play_data.append(df)
                time.sleep(1)  # Sleep to avoid API rate limits

            except Exception as e:
                print(f"Error fetching data for game ID {game_id}: {e}")
                continue

        # Combine the batch data and append to CSV
        batch_df = pd.concat(play_by_play_data, ignore_index=True)
        batch_df.to_csv(output_file, mode='a', header=not bool(i), index=False)  # Append mode, write header once

        # Clear the batch data list for the next batch
        play_by_play_data.clear()

        # Sleep between batches to avoid overuse
        time.sleep(5)

    print(f"Play-by-play data for playoffs saved successfully to {output_file}!")

# Call the function to fetch data and save it
fetch_play_by_play_data_in_batches(game_ids, batch_size=10, output_file="nba_playoffs_play_by_play.csv")


Processing batch 1 of 642...
Processing batch 2 of 642...
Processing batch 3 of 642...
Processing batch 4 of 642...
Processing batch 5 of 642...
Processing batch 6 of 642...
Processing batch 7 of 642...
Processing batch 8 of 642...
Processing batch 9 of 642...
Processing batch 10 of 642...
Processing batch 11 of 642...
Processing batch 12 of 642...
Processing batch 13 of 642...
Processing batch 14 of 642...
Processing batch 15 of 642...
Processing batch 16 of 642...
Processing batch 17 of 642...
Processing batch 18 of 642...
Processing batch 19 of 642...
Processing batch 20 of 642...
Processing batch 21 of 642...
Processing batch 22 of 642...
Processing batch 23 of 642...
Processing batch 24 of 642...
Processing batch 25 of 642...
Processing batch 26 of 642...
Processing batch 27 of 642...
Processing batch 28 of 642...
Processing batch 29 of 642...
Processing batch 30 of 642...
Processing batch 31 of 642...
Processing batch 32 of 642...
Processing batch 33 of 642...
Processing batch 34

ValueError: No objects to concatenate

In [3]:
season_type = "Regular Season"  # Fetch data for Regular Season

# Fetch Player Tracking Data
player_tracking_full = PlayerDashPtStats(season=season, season_type_all_star=season_type)

# Convert to DataFrame
df_full = player_tracking_full.get_data_frames()[0]

# Save to CSV
df_full.to_csv("nba_full_season_player_tracking.csv", index=False)

NameError: name 'PlayerDashPtStats' is not defined

In [None]:
pip install nba_api pandas tqdm


from nba_api.stats.endpoints import teamdashlineups, leaguegamefinder
import pandas as pd
import time
from tqdm import tqdm  # For progress bar

# Fetch team lineup data for the full season
season = "2023-24"  # Change to the required season

all_team_data = []

teams = [str(team_id) for team_id in range(1610612737, 1610612766)]  # NBA team IDs

for team_id in tqdm(teams, desc="Fetching Team Lineup Data"):
    try:
        lineup_data = teamdashlineups.TeamDashLineups(team_id=team_id, season=season)
        df = lineup_data.get_data_frames()[0]  # Convert response to DataFrame

        all_team_data.append(df)

        time.sleep(1)  # Sleep to avoid rate limits
    except Exception as e:
        print(f"Error fetching Team ID {team_id}: {e}")

# Combine all data
lineup_df = pd.concat(all_team_data, ignore_index=True)

# Save to CSV
lineup_df.to_csv("team_lineups_season.csv", index=False)

print("✅ Team lineup data for the full season saved!")


In [21]:
from nba_api.stats.endpoints import leaguegamefinder, boxscoreplayertrackv2
import pandas as pd
import time
from tqdm import tqdm  # Progress bar for fetching data

# Get all games for the season (Regular + Playoffs)
# gamefinder = leaguegamefinder.LeagueGameFinder(
#     season_type_nullable="Regular Season", league_id_nullable="00"
# )
# regular_season_games = gamefinder.get_data_frames()[0]

gamefinder_playoffs = leaguegamefinder.LeagueGameFinder(
    season_type_nullable="Playoffs", league_id_nullable="00"
)
playoff_games = gamefinder_playoffs.get_data_frames()[0]

# Combine both datasets
# all_games = pd.concat([regular_season_games, playoff_games], ignore_index=True)

# Extract unique Game IDs
game_ids = playoff_games["GAME_ID"].unique().tolist()
print(f"Total Games in the Season: {len(game_ids)}")

all_tracking_data = []

for game_id in tqdm(game_ids, desc="Fetching Player Tracking Data"):
    try:
        tracking_data = boxscoreplayertrackv2.BoxScorePlayerTrackV2(game_id=game_id)
        df = tracking_data.get_data_frames()[0]  # Convert response to DataFrame
        
        all_tracking_data.append(df)

        time.sleep(1)  # Sleep to avoid rate limits
    except Exception as e:
        print(f"Error fetching Game ID {game_id}: {e}")

# Combine all data
tracking_df = pd.concat(all_tracking_data, ignore_index=True)

# Save to CSV
tracking_df.to_csv("full_season_player_tracking.csv", index=False)

print("✅ Full season player tracking data saved!")


Total Games in the Season: 3206


Fetching Player Tracking Data:   1%|          | 20/3206 [00:29<1:17:35,  1.46s/it]


KeyboardInterrupt: 

In [19]:
print(playoff_games.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6412 entries, 0 to 6411
Data columns (total 28 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   SEASON_ID          6412 non-null   object 
 1   TEAM_ID            6412 non-null   int64  
 2   TEAM_ABBREVIATION  6412 non-null   object 
 3   TEAM_NAME          6412 non-null   object 
 4   GAME_ID            6412 non-null   object 
 5   GAME_DATE          6412 non-null   object 
 6   MATCHUP            6412 non-null   object 
 7   WL                 6412 non-null   object 
 8   MIN                6412 non-null   int64  
 9   PTS                6412 non-null   int64  
 10  FGM                6412 non-null   int64  
 11  FGA                6412 non-null   int64  
 12  FG_PCT             6412 non-null   float64
 13  FG3M               6412 non-null   int64  
 14  FG3A               6412 non-null   int64  
 15  FG3_PCT            6390 non-null   float64
 16  FTM                6412 

In [20]:
print(regular_season_games.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30000 entries, 0 to 29999
Data columns (total 28 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   SEASON_ID          30000 non-null  object 
 1   TEAM_ID            30000 non-null  int64  
 2   TEAM_ABBREVIATION  30000 non-null  object 
 3   TEAM_NAME          30000 non-null  object 
 4   GAME_ID            30000 non-null  object 
 5   GAME_DATE          30000 non-null  object 
 6   MATCHUP            30000 non-null  object 
 7   WL                 29998 non-null  object 
 8   MIN                30000 non-null  int64  
 9   PTS                30000 non-null  int64  
 10  FGM                30000 non-null  int64  
 11  FGA                30000 non-null  int64  
 12  FG_PCT             30000 non-null  float64
 13  FG3M               30000 non-null  int64  
 14  FG3A               30000 non-null  int64  
 15  FG3_PCT            30000 non-null  float64
 16  FTM                300

In [10]:
import pandas as pd
import time
import os
from tqdm import tqdm
from nba_api.stats.endpoints import leaguegamefinder, playbyplayv2

# Fetch playoff games for the given season
season = "2023-24"  # Change this as needed

gamefinder = leaguegamefinder.LeagueGameFinder(
    season_type_nullable="Playoffs",
    league_id_nullable="00",
    season_nullable=season
)


In [14]:
print(gamefinder.get_data_frames()[0].info())  # Check the data structure

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 164 entries, 0 to 163
Data columns (total 28 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   SEASON_ID          164 non-null    object 
 1   TEAM_ID            164 non-null    int64  
 2   TEAM_ABBREVIATION  164 non-null    object 
 3   TEAM_NAME          164 non-null    object 
 4   GAME_ID            164 non-null    object 
 5   GAME_DATE          164 non-null    object 
 6   MATCHUP            164 non-null    object 
 7   WL                 164 non-null    object 
 8   MIN                164 non-null    int64  
 9   PTS                164 non-null    int64  
 10  FGM                164 non-null    int64  
 11  FGA                164 non-null    int64  
 12  FG_PCT             164 non-null    float64
 13  FG3M               164 non-null    int64  
 14  FG3A               164 non-null    int64  
 15  FG3_PCT            164 non-null    float64
 16  FTM                164 non

In [None]:
games = gamefinder.get_data_frames()[0]

# Filter out less relevant games (e.g., blowouts, low-possession games)
games["POINT_DIFF"] = abs(games["PTS"] - games["PTS_OPP"])  # Margin of victory
filtered_games = games[(games["POINT_DIFF"] <= 15) & (games["PTS"] >= 90)]  # Close & high-scoring games

game_ids = filtered_games["GAME_ID"].unique().tolist()

print(f"Total Playoff Games: {len(games)}")
print(f"Filtered Games for Analysis: {len(game_ids)} (Close & High-Scoring Games)")

return game_ids

In [None]:
SAVE_FILE = "filtered_playbyplay_playoffs.csv"

def fetch_playbyplay(game_ids, save_file, target_rows=15000):
    all_data = []

    # Load previously saved data if available
    if os.path.exists(save_file):
        existing_df = pd.read_csv(save_file)
        completed_games = existing_df["GAME_ID"].unique().tolist()
        print(f"🔄 Resuming from last saved progress ({len(completed_games)} games already processed).")
    else:
        existing_df = pd.DataFrame()
        completed_games = []

    # Filter out already processed games
    pending_game_ids = [gid for gid in game_ids if gid not in completed_games]
    print(f"🎯 Fetching {len(pending_game_ids)} remaining games...")

    total_entries = len(existing_df)  # Track total rows collected
    for game_id in tqdm(pending_game_ids, desc="Fetching Play-by-Play Data"):
        try:
            pbp = playbyplayv2.PlayByPlayV2(game_id=game_id)
            df = pbp.get_data_frames()[0]
            df["GAME_ID"] = game_id  # Add game ID for reference
            
            all_data.append(df)
            total_entries += len(df)

            # Save after every 5 games or if we reach the target row count
            if len(all_data) % 5 == 0 or total_entries >= target_rows:
                temp_df = pd.concat(all_data, ignore_index=True)
                final_df = pd.concat([existing_df, temp_df], ignore_index=True)
                final_df.to_csv(save_file, index=False)
                print(f"💾 Progress saved! {len(final_df)} total rows so far.")
                
                # Stop fetching if we've reached the target
                if total_entries >= target_rows:
                    print("✅ Target row count reached. Stopping data collection.")
                    return

            time.sleep(1.5)  # Sleep to avoid rate limits

        except Exception as e:
            print(f"⚠️ Error fetching game {game_id}: {e}")
    
    # Save final data
    if all_data:
        final_df = pd.concat([existing_df] + all_data, ignore_index=True)
        final_df.to_csv(save_file, index=False)
        print(f"✅ Data saved! Total rows collected: {len(final_df)}")

# Run the function
fetch_playbyplay(filtered_playoff_game_ids, SAVE_FILE, target_rows=15000)

In [19]:
import pandas as pd
import time
import os
from tqdm import tqdm
from nba_api.stats.endpoints import leaguegamefinder, playbyplayv2

# 🏀 CONFIGURATIONS
SEASON = "2023-24"  # Change this for different seasons
TARGET_ROWS = 20000  # Goal: 10,000 - 15,000 entries
SAVE_FILE = "filtered_playbyplay_playoffs.csv"


### 🔹 Step 1: Get All Playoff Game IDs (Filter for Relevance) ###
def get_filtered_playoff_game_ids(season):
    gamefinder = leaguegamefinder.LeagueGameFinder(
        season_nullable=season, season_type_nullable="Playoffs"
    )
    games = gamefinder.get_data_frames()[0]

    # 🚀 Add Opponent Stats by Self-Merging
    games_opponent = games.merge(
        games, on="GAME_ID", suffixes=("", "_OPP")
    )
    games_opponent = games_opponent[games_opponent["TEAM_ID"] != games_opponent["TEAM_ID_OPP"]]

    # 📌 Calculate Game Relevance
    games_opponent["POINT_DIFF"] = abs(games_opponent["PTS"] - games_opponent["PTS_OPP"])
    games_opponent["TOTAL_POINTS"] = games_opponent["PTS"] + games_opponent["PTS_OPP"]

    # 🔥 Filter Criteria: Close, High-Scoring Games
    filtered_games = games_opponent[
        (games_opponent["POINT_DIFF"] <= 15)  # Close Games
        & (games_opponent["TOTAL_POINTS"] >= 200)  # High Scoring
    ]

    game_ids = filtered_games["GAME_ID"].unique().tolist()

    print(f"🔹 Total Playoff Games: {len(games)}")
    print(f"✅ Filtered Games for Analysis: {len(game_ids)} (Close & High-Scoring Games)")

    return game_ids


### 🔹 Step 2: Fetch Play-by-Play Data with Resume Feature ###
def fetch_playbyplay(game_ids, save_file, target_rows=20000):
    all_data = []

    # 📌 Load Previously Saved Data
    if os.path.exists(save_file):
        existing_df = pd.read_csv(save_file)
        completed_games = existing_df["GAME_ID"].unique().tolist()
        print(f"🔄 Resuming from last saved progress ({len(completed_games)} games processed).")
    else:
        existing_df = pd.DataFrame()
        completed_games = []

    # 🚀 Exclude Already Processed Games
    pending_game_ids = [gid for gid in game_ids if gid not in completed_games]
    print(f"🎯 Fetching {len(pending_game_ids)} remaining games...")

    total_entries = len(existing_df)

    for game_id in tqdm(pending_game_ids, desc="Fetching Play-by-Play Data"):
        try:
            pbp = playbyplayv2.PlayByPlayV2(game_id=game_id)
            df = pbp.get_data_frames()[0]
            df["GAME_ID"] = game_id  # Add Game ID for reference
            all_data.append(df)
            total_entries += len(df)

            # 💾 Save Every 5 Games or on Target Row Count
            if len(all_data) % 5 == 0 or total_entries >= target_rows:
                temp_df = pd.concat(all_data, ignore_index=True)
                final_df = pd.concat([existing_df, temp_df], ignore_index=True)
                final_df.to_csv(save_file, index=False)
                print(f"💾 Progress saved! {len(final_df)} total rows.")

                # 🚀 Stop Fetching if Target Reached
                if total_entries >= target_rows:
                    print("✅ Target row count reached. Stopping data collection.")
                    return

            time.sleep(1.5)  # ⏳ Prevent Rate Limits

        except Exception as e:
            print(f"⚠️ Error fetching game {game_id}: {e}")

    # 💾 Final Save
    if all_data:
        final_df = pd.concat([existing_df] + all_data, ignore_index=True)
        final_df.to_csv(save_file, index=False)
        print(f"✅ Data saved! Total rows collected: {len(final_df)}")


### 🔹 Step 3: Run the Pipeline ###
filtered_playoff_game_ids = get_filtered_playoff_game_ids(SEASON)
# fetch_playbyplay(filtered_playoff_game_ids, SAVE_FILE, TARGET_ROWS)


🔹 Total Playoff Games: 190
✅ Filtered Games for Analysis: 48 (Close & High-Scoring Games)


In [3]:
print(filtered_playoff_game_ids)

['0042300403', '0042300402', '0042300314', '0042300304', '0042300313', '0042300303', '0042300312', '0042300311', '0042300301', '0042300226', '0042300216', '0042300205', '0042300235', '0042300204', '0042300234', '0042300223', '0042300213', '0042300222', '0042300212', '0042300211', '0042300137', '0042300231', '0042300176', '0042300116', '0042300115', '0042300135', '0042300155', '0042300164', '0042300174', '0042300124', '0042300154', '0042300123', '0042300113', '0042300153', '0042300102', '0042300112', '0042300152', '0042300121', '0042300171', '0042300151', '0042300111', '2042300311', '2042300201', '2042300221', '2042300211', '2042300121', '2042300102', '2042300131']


In [6]:
from nba_api.stats.endpoints import leaguegamefinder, boxscoreplayertrackv2
import pandas as pd
import time
from tqdm import tqdm  # Progress bar for fetching data

season = "2023-24"
gamefinder_playoffs = leaguegamefinder.LeagueGameFinder(
        season_nullable=season,
        season_type_nullable="Regular Season",
        league_id_nullable="00"  # NBA league ID
    )
playoff_games = gamefinder_playoffs.get_data_frames()[0]

game_ids = playoff_games["GAME_ID"].unique().tolist()
print(f"Total Games in the Season: {len(game_ids)}")

ConnectionError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))

In [None]:
all_tracking_data = []

for game_id in tqdm(game_ids, desc="Fetching Player Tracking Data"):
    try:
        tracking_data = boxscoreplayertrackv2.BoxScorePlayerTrackV2(game_id=game_id)
        df = tracking_data.get_data_frames()[0]  # Convert response to DataFrame
        
        all_tracking_data.append(df)

        time.sleep(1)  # Sleep to avoid rate limits
    except Exception as e:
        print(f"Error fetching Game ID {game_id}: {e}")

# Combine all data
tracking_df = pd.concat(all_tracking_data, ignore_index=True)

# Save to CSV
tracking_df.to_csv("full_season_player_tracking.csv", index=False)

print("✅ Full season player tracking data saved!")

Fetching Player Tracking Data:  43%|████▎     | 530/1230 [17:32<2:00:39, 10.34s/it]

Error fetching Game ID 0022300671: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


Fetching Player Tracking Data:  43%|████▎     | 531/1230 [18:03<3:11:03, 16.40s/it]

Error fetching Game ID 0022300669: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


Fetching Player Tracking Data:  43%|████▎     | 531/1230 [18:33<24:25,  2.10s/it]  


KeyboardInterrupt: 

In [6]:
from nba_api.stats.endpoints import leaguegamefinder, boxscoreplayertrackv2
import pandas as pd
import time
import os
from tqdm import tqdm

# Define season and file name for saving progress
season = "2023-24"
file_name = "player_tracking_data.csv"

# Get the list of playoff games
gamefinder = leaguegamefinder.LeagueGameFinder(
    season_nullable=season,
    season_type_nullable="Regular Season",
    league_id_nullable="00"  # NBA league ID
)
games_df = gamefinder.get_data_frames()[0]
game_ids = games_df["GAME_ID"].unique().tolist()

print(f"Total Games in the Season: {len(game_ids)}")

# Load existing progress if file exists
if os.path.exists(file_name):
    existing_data = pd.read_csv(file_name)
    completed_games = set(existing_data["GAME_ID"].unique())
else:
    completed_games = set()

# Iterate over game IDs while handling rate limits & resuming progress

Total Games in the Season: 1230


In [7]:
print(len(completed_games))

1230


In [None]:
game_ids_int = [int(game_id) for game_id in game_ids]

['0022301196', '0022301188', '0022301192', '0022301187', '0022301190', '0022301195', '0022301200', '0022301197', '0022301189', '0022301194', '0022301193', '0022301199', '0022301186', '0022301191', '0022301198', '0022301171', '0022301183', '0022301182', '0022301184', '0022301177', '0022301179', '0022301176', '0022301175', '0022301178', '0022301173', '0022301180', '0022301172', '0022301181', '0022301185', '0022301174', '0022301166', '0022301167', '0022301170', '0022301168', '0022301169', '0022301159', '0022301160', '0022301158', '0022301162', '0022301165', '0022301163', '0022301164', '0022301161', '0022301156', '0022301153', '0022301146', '0022301157', '0022301148', '0022301144', '0022301154', '0022301147', '0022301152', '0022301149', '0022301145', '0022301151', '0022301155', '0022301150', '0022301140', '0022301142', '0022301136', '0022301143', '0022301141', '0022301134', '0022301135', '0022301132', '0022301139', '0022301138', '0022301137', '0022301131', '0022301133', '0022301130', '0022

In [13]:
print(completed_games)

{22300672, 22300673, 22300674, 22300675, 22300676, 22300677, 22300678, 22300679, 22300680, 22300681, 22300682, 22300683, 22300684, 22300685, 22300686, 22300687, 22300688, 22300689, 22300690, 22300691, 22300692, 22300693, 22300694, 22300695, 22300696, 22300697, 22300698, 22300699, 22300700, 22300701, 22300702, 22300703, 22300704, 22300705, 22300706, 22300707, 22300708, 22300709, 22300710, 22300711, 22300712, 22300713, 22300714, 22300715, 22300716, 22300717, 22300718, 22300719, 22300720, 22300721, 22300722, 22300723, 22300724, 22300725, 22300726, 22300727, 22300728, 22300729, 22300730, 22300731, 22300732, 22300733, 22300734, 22300735, 22300736, 22300737, 22300738, 22300739, 22300740, 22300741, 22300742, 22300743, 22300744, 22300745, 22300746, 22300747, 22300748, 22300749, 22300750, 22300751, 22300752, 22300753, 22300754, 22300755, 22300756, 22300757, 22300758, 22300759, 22300760, 22300761, 22300762, 22300763, 22300764, 22300765, 22300766, 22300767, 22300768, 22300769, 22300770, 22300771,

In [5]:
for game_id in tqdm(game_ids, desc="Fetching Player Tracking Data"):
    if int(game_id) in completed_games:
        continue  # Skip if already processed

    attempt = 0
    while attempt < 5:  # Retry up to 5 times if API call fails
        try:
            tracking_data = boxscoreplayertrackv2.BoxScorePlayerTrackV2(game_id=game_id)
            df = tracking_data.get_data_frames()[0]
            df["GAME_ID"] = game_id  # Ensure GAME_ID is included

            # Append directly to the CSV file
            df.to_csv(file_name, mode='a', header=not os.path.exists(file_name), index=False)

            time.sleep(1.5)  # Avoid hitting rate limits
            break  # Exit retry loop after success

        except Exception as e:
            attempt += 1
            wait_time = 2 ** attempt  # Exponential backoff
            print(f"⚠️ Error fetching {game_id}, retrying in {wait_time}s... ({attempt}/5)")
            time.sleep(wait_time)

print("✅ Player tracking data saved successfully!")


Fetching Player Tracking Data:   0%|          | 0/1230 [00:00<?, ?it/s]

Fetching Player Tracking Data: 100%|██████████| 1230/1230 [01:57<00:00, 10.45it/s] 

✅ Player tracking data saved successfully!





In [None]:
from nba_api.stats.endpoints import teamdashlineups, leaguegamefinder
import pandas as pd
import time
from tqdm import tqdm  # For progress bar

# Fetch team lineup data for the full season
season = "2023-24"  # Change to the required season

all_team_data = []

teams = [str(team_id) for team_id in range(1610612737, 1610612766)]  # NBA team IDs

for team_id in tqdm(teams, desc="Fetching Team Lineup Data"):
    try:
        lineup_data = teamdashlineups.TeamDashLineups(team_id=team_id, season=season)
        df = lineup_data.get_data_frames()[0]  # Convert response to DataFrame

        all_team_data.append(df)

        time.sleep(1)  # Sleep to avoid rate limits
    except Exception as e:
        print(f"Error fetching Team ID {team_id}: {e}")

# Combine all data
lineup_df = pd.concat(all_team_data, ignore_index=True)

# Save to CSV
lineup_df.to_csv("team_lineups_season.csv", index=False)

Fetching Team Lineup Data:   3%|▎         | 1/29 [00:19<09:07, 19.56s/it]

Error fetching Team ID 1610612737: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))


Fetching Team Lineup Data:   7%|▋         | 2/29 [00:49<11:39, 25.90s/it]

Error fetching Team ID 1610612738: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


Fetching Team Lineup Data:  10%|█         | 3/29 [01:20<12:09, 28.07s/it]

Error fetching Team ID 1610612739: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


In [None]:
from nba_api.stats.endpoints import playbyplayv2

pbp = playbyplayv2.PlayByPlayV2(game_id='0022100123')
pbp_df = pbp.get_data_frames()[0]

# Filter for substitution events
subs = pbp_df[pbp_df['HOMEDESCRIPTION'].str.contains('SUB', na=False) | 
             pbp_df['VISITORDESCRIPTION'].str.contains('SUB', na=False)]