In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px

In [3]:
!pip install nba_api



In [4]:
from nba_api.stats.static import teams #to get team ids
from nba_api.stats.endpoints import commonteamroster #from here we want the player ids (clusters + models)
from nba_api.stats.endpoints import commonplayerinfo
from nba_api.stats.endpoints import shotchartlineupdetail #from here we want the shot charts for our lineups (for visualizations)
from nba_api.stats.endpoints import leaguelineupviz #from here we want lineup shot frequencies (for visualizations)
from nba_api.stats.endpoints import leaguedashlineups #from here we want league average lineup stats (might not use)
from nba_api.stats.endpoints import teamdashlineups #from here we want team lineup stats (for the main analysis)

import time
from multiprocessing import Pool
import logging
from time import sleep
from concurrent.futures import ThreadPoolExecutor
from requests.exceptions import ConnectionError, ReadTimeout
import warnings
from pandas.errors import PerformanceWarning

In [5]:
teams_list = teams.get_teams()
team_ids = [team['id'] for team in teams_list]
team_id_name_map = {team['id']: team['full_name'] for team in teams_list}

seasons = [f"{year}-{str(year+1)[-2:]}" for year in range(2015, 2025)]

In [6]:
allstar_ids = [203999, 201939, 201935, 202695, 2544, 1629029, 203507, 1630162, 1626164, 201142, 1628983]
season_team_df = pd.read_csv("Data/allstars_season_team.csv")
target_ids_str = [str(int(x)) for x in allstar_ids]
combos = list(season_team_df[['TEAM_ID', 'GROUP_VALUE']].drop_duplicates().itertuples(index=False, name=None))

results = []
count = 0
for team_id, season1 in combos:
    count += 1
    tried = 0
    success = False
    while tried < 3 and not success:
        try:
            obj = teamdashlineups.TeamDashLineups(
                team_id=int(team_id),
                season=str(season1),
                measure_type_detailed_defense="Advanced",
                per_mode_detailed="Per48",
                group_quantity=5,
            )
            lineups_df = obj.get_data_frames()[1]
            if lineups_df is not None and not lineups_df.empty:
                results.append(lineups_df)
            print(f"Fetched lineups for team {team_id} season {season1} ({count}/{len(combos)})")
            success = True
        except Exception as e:
            tried += 1
            print(f"Fetch failed for {team_id} {season1} (attempt {tried}): {e}")
            sleep(0.6 * tried)  # polite backoff

# single concat 
if results:
    full_lineups_data = pd.concat(results, ignore_index=True)
else:
    full_lineups_data = pd.DataFrame()

if not full_lineups_data.empty:
    full_lineups_data['GROUP_ID'] = full_lineups_data['GROUP_ID'].str[1:-1]
    mask = full_lineups_data['GROUP_ID'].apply(
        lambda s: any(pid in target_ids_str for pid in s.split('-'))
    )
    filtered = full_lineups_data[mask].copy()
    full_lineups_data = filtered
    full_lineups_data.to_csv("Data/allstar_lineups_data.csv", index=False)
    print("✓ Saved to Data/allstar_lineups_data.csv")


Fetch failed for 1610612743 2024-25 (attempt 1): HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)
Fetched lineups for team 1610612743 season 2024-25 (1/90)
Fetched lineups for team 1610612743 season 2023-24 (2/90)
Fetched lineups for team 1610612743 season 2022-23 (3/90)
Fetched lineups for team 1610612743 season 2021-22 (4/90)
Fetched lineups for team 1610612743 season 2020-21 (5/90)
Fetched lineups for team 1610612743 season 2019-20 (6/90)
Fetched lineups for team 1610612743 season 2018-19 (7/90)
Fetched lineups for team 1610612743 season 2017-18 (8/90)
Fetched lineups for team 1610612743 season 2016-17 (9/90)
Fetched lineups for team 1610612743 season 2015-16 (10/90)
Fetch failed for 1610612744 2024-25 (attempt 1): HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)
Fetched lineups for team 1610612744 season 2024-25 (11/90)
Fetched lineups for team 1610612744 season 2023-24 (12/90)
Fetched lineups for team 1610