In [2]:
import pandas as pd
import time
pd.set_option('display.max_columns', None)


In [3]:
def get_nba_team_abbreviations():
    """
    Get NBA team abbreviations from Wikipedia.
    """

    nba_abbreviations = (pd.read_html(
        "https://en.wikipedia.org/wiki/Wikipedia:WikiProject_National_Basketball_Association/National_Basketball_Association_team_abbreviations")[0]
        .rename(columns={
            "Franchise": "Team",
            "Abbreviation/ Acronym": "Abbreviation"
        })
        .set_index("Team"))
    nba_abbreviations["Abbreviation"] = nba_abbreviations["Abbreviation"].str.split().str[0]
    nba_abbreviations.loc["Phoenix Suns"] = "PHO"
    nba_abbreviations.loc["Charlotte Hornets"] = "CHO"
    nba_abbreviations.loc["Brooklyn Nets"] = "BRK"
    return nba_abbreviations

nba_abbreviations = get_nba_team_abbreviations()

In [3]:
def get_raw_schedule_and_results():
    """
    Fetches and processes NBA schedule and results data.
    """
    season_months = ["october", "november", "december", "january", "february", "march", "april"]
    schedule_and_results_raw = pd.DataFrame()
    for month in season_months:    
        # Get schedule data
        schedule_and_results_raw_month = (pd.read_html(
            f"https://www.basketball-reference.com/leagues/NBA_2025_games-{month}.html",
            flavor='html5lib', 
            header=0)[0]
            .rename(columns={
            "Visitor/Neutral": "Away",
            "Home/Neutral": "Home",
            "PTS": "AwayPoints",
            "PTS.1": "HomePoints"
        }))
        if schedule_and_results_raw_month["HomePoints"].isnull().all():
            print(f"No data found for {month}")
            continue
        schedule_and_results_raw_month = schedule_and_results_raw_month[schedule_and_results_raw_month["HomePoints"].notna()]
        schedule_and_results_raw = pd.concat([schedule_and_results_raw, schedule_and_results_raw_month])
    # Clean up columns
    columns_to_drop = ["Unnamed: 6", "Unnamed: 7", "Notes", "Attend.", "LOG", "Arena", "Start (ET)"]
    schedule_and_results_raw.drop(columns=columns_to_drop, inplace=True)
    return schedule_and_results_raw
    
schedule_and_results_raw = get_raw_schedule_and_results()


No data found for december
No data found for january
No data found for february
No data found for march
No data found for april


In [4]:
def set_up_schedule_and_results(schedule_and_results_raw, nba_abbreviations):
    """
    Fetches and processes NBA schedule and results data.
    
    Returns:
        pd.DataFrame: Processed schedule and results data with game URLs and team matchups
    """
    def get_first_game_dates(df):
        """Get the first game date for each team, both home and away."""
        first_home = df.groupby('Home')['Date'].min().reset_index().set_index('Home')
        first_away = df.groupby('Away')['Date'].min().reset_index().set_index('Away')

        first_games = pd.merge(first_home, first_away, left_index=True, right_index=True)
        first_games.columns = ["FirstGameHome", "FirstGameAway"]
        first_games['FirstGame'] = first_games[["FirstGameHome", "FirstGameAway"]].min(axis=1)
        
        return first_games[['FirstGame']]

    def is_first_game(row, first_games):
        """Check if a game is the first game for either team."""
        return (row["Date"] == first_games.loc[row['Home']].values[0] or 
                row["Date"] == first_games.loc[row['Away']].values[0])
    
    def get_game_winner(row):
        """Get the winner of a game."""
        return "Home" if row["HomePoints"] > row["AwayPoints"] else "Away"
    
    def get_point_differential(row):
        """Get the point differential of a game."""
        return row["HomePoints"] - row["AwayPoints"]
    
    # Process dates and add team abbreviations
    schedule_and_results_raw["Date"] = pd.to_datetime(schedule_and_results_raw["Date"]).dt.strftime("%Y%m%d")
    schedule_and_results_raw["HomeAbbreviation"] = schedule_and_results_raw["Home"].map(nba_abbreviations["Abbreviation"])
    schedule_and_results_raw["AwayAbbreviation"] = schedule_and_results_raw["Away"].map(nba_abbreviations["Abbreviation"])

    # Generate game URLs
    base_game_url = "https://www.basketball-reference.com/boxscores/{date}0{home_team}.html"
    schedule_and_results_raw["GameUrl"] = schedule_and_results_raw.apply(
        lambda x: base_game_url.format(
            date=x["Date"],
            home_team=x["HomeAbbreviation"]
        ), axis=1)

    # Remove first games of the season
    first_games = get_first_game_dates(schedule_and_results_raw)
    schedule_and_results_raw["FirstGame"] = schedule_and_results_raw.apply(lambda row: is_first_game(row, first_games), axis=1)
    schedule_and_results_raw = schedule_and_results_raw[~schedule_and_results_raw["FirstGame"]].reset_index(drop=True)

    # Add consistent team ordering columns
    schedule_and_results_raw["A_Team"] = schedule_and_results_raw[["Home", "Away"]].min(axis=1)
    schedule_and_results_raw["B_Team"] = schedule_and_results_raw[["Home", "Away"]].max(axis=1)

    # Drop first game column
    schedule_and_results_raw.drop(columns=["FirstGame"], inplace=True)

    # Add winner and point differential columns
    schedule_and_results_raw["Winner"] = schedule_and_results_raw.apply(get_game_winner, axis=1)
    schedule_and_results_raw["PointDifferential"] = schedule_and_results_raw.apply(get_point_differential, axis=1)

    return schedule_and_results_raw

schedule_and_results = set_up_schedule_and_results(schedule_and_results_raw, nba_abbreviations)

In [5]:
def scrape_game_page(url):
    """
    Scrapes basic and advanced stats for both teams from a basketball-reference game page.
    
    Args:
        url (str): URL of the basketball-reference game page to scrape
        
    Returns:
        dict: Dictionary containing basic and advanced stats DataFrames for both teams
    """
    # Read all tables from the page
    table_list = pd.read_html(url)
    
    # Helper function to process team stats table
    def process_team_stats(table, prefix):
        df = (table.droplevel(0, axis=1)
               .query("Starters == 'Team Totals'")
               .dropna(axis=1)
               .drop(columns=['Starters'])
               .add_prefix(prefix)
               .reset_index(drop=True))
        
        # Convert all columns to float
        for col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce')
            
        return df
    
    # Create dictionary with processed stats for each team
    away_basic_stats_index = 0
    home_basic_stats_index = int(len(table_list)/2)
    away_advanced_stats_index = int(len(table_list)/2 - 1)
    home_advanced_stats_index = int(len(table_list) - 1)
    stat_dict = {
        'away_basic_stats': process_team_stats(table_list[away_basic_stats_index], 'away_basic_'),
        'home_basic_stats': process_team_stats(table_list[home_basic_stats_index], 'home_basic_'),
        'away_advanced_stats': process_team_stats(table_list[away_advanced_stats_index], 'away_advanced_'),
        'home_advanced_stats': process_team_stats(table_list[home_advanced_stats_index], 'home_advanced_')
    }

    return stat_dict
    

In [6]:
def combine_schedule_and_game_stats(schedule_and_results):
    """
    Combines schedule and game stats data.
    """
    # Initialize empty lists to store stats for each game
    all_game_stats = []
    num_games = schedule_and_results.shape[0]
    # Iterate through each game in schedule_and_results
    for i, game in schedule_and_results.iterrows():
        if i % 10 == 0:
            print(f"{i/num_games * 100:.2f}% complete")
        try:
            # Add delay between requests to avoid rate limiting
            time.sleep(4)
            
            # Scrape stats from the game URL
            game_stats = scrape_game_page(game['GameUrl'])
            
            # Combine all stats DataFrames for this game
            combined_stats = pd.concat([
                game_stats['away_basic_stats'],
                game_stats['away_advanced_stats'],
                game_stats['home_basic_stats'], 
                game_stats['home_advanced_stats']
            ], axis=1)
            
            # Add all columns from the original schedule row
            for col in schedule_and_results.columns:
                combined_stats[col] = game[col]
            
            all_game_stats.append(combined_stats)

            if len(combined_stats.columns) > 80:
                print(combined_stats.columns.shape)
                print(game["GameUrl"])
            
        except Exception as e:
            print(f"Error processing game {game['Away']} vs {game['Home']}: {str(e)}")
            print(game["GameUrl"])
            # On error, wait longer before next request
            time.sleep(10)
            continue
    
    print("100% complete")

    # Combine all games into one DataFrame
    all_games_df = pd.concat(all_game_stats, ignore_index=True)
    
    return all_games_df


In [31]:
# combined_schedule_and_game_stats = combine_schedule_and_game_stats(schedule_and_results)

combined_schedule_and_game_stats = pd.read_csv("combined_schedule_and_game_stats.csv", index_col=0)
combined_schedule_and_game_stats["Date"] = pd.to_datetime(combined_schedule_and_game_stats["Date"], format="%Y%m%d")
combined_schedule_and_game_stats


Unnamed: 0,away_basic_MP,away_basic_FG,away_basic_FGA,away_basic_FG%,away_basic_3P,away_basic_3PA,away_basic_3P%,away_basic_FT,away_basic_FTA,away_basic_FT%,away_basic_ORB,away_basic_DRB,away_basic_TRB,away_basic_AST,away_basic_STL,away_basic_BLK,away_basic_TOV,away_basic_PF,away_basic_PTS,away_advanced_MP,away_advanced_TS%,away_advanced_eFG%,away_advanced_3PAr,away_advanced_FTr,away_advanced_ORB%,away_advanced_DRB%,away_advanced_TRB%,away_advanced_AST%,away_advanced_STL%,away_advanced_BLK%,away_advanced_TOV%,away_advanced_USG%,away_advanced_ORtg,away_advanced_DRtg,home_basic_MP,home_basic_FG,home_basic_FGA,home_basic_FG%,home_basic_3P,home_basic_3PA,home_basic_3P%,home_basic_FT,home_basic_FTA,home_basic_FT%,home_basic_ORB,home_basic_DRB,home_basic_TRB,home_basic_AST,home_basic_STL,home_basic_BLK,home_basic_TOV,home_basic_PF,home_basic_PTS,home_advanced_MP,home_advanced_TS%,home_advanced_eFG%,home_advanced_3PAr,home_advanced_FTr,home_advanced_ORB%,home_advanced_DRB%,home_advanced_TRB%,home_advanced_AST%,home_advanced_STL%,home_advanced_BLK%,home_advanced_TOV%,home_advanced_USG%,home_advanced_ORtg,home_advanced_DRtg,Date,Away,AwayPoints,Home,HomePoints,HomeAbbreviation,AwayAbbreviation,GameUrl,A_Team,B_Team,Winner,PointDifferential
0,240,35,83,0.422,14,37,0.378,17,21,0.810,8,29,37,22,7,3,19,31,101,240,0.547,0.506,0.446,0.253,19.0,85.3,48.7,62.9,7.1,6.7,17.1,100.0,103.0,118.3,240,39,75,0.520,15,30,0.500,23,33,0.697,5,34,39,28,10,7,13,19,116,240,0.648,0.620,0.400,0.440,14.7,81.0,51.3,71.8,10.2,15.2,12.7,100.0,118.3,103.0,2024-10-25,Brooklyn Nets,101.0,Orlando Magic,116.0,ORL,BRK,https://www.basketball-reference.com/boxscores...,Brooklyn Nets,Orlando Magic,Home,15.0
1,240,29,76,0.382,9,29,0.310,40,53,0.755,10,18,28,19,11,5,16,32,107,240,0.539,0.441,0.382,0.697,21.7,50.0,34.1,65.5,10.9,9.4,13.9,100.0,106.1,114.0,240,35,76,0.461,6,23,0.261,39,46,0.848,18,36,54,25,11,13,27,34,115,240,0.597,0.500,0.303,0.605,50.0,78.3,65.9,71.4,10.9,27.7,21.9,100.0,114.0,106.1,2024-10-25,Philadelphia 76ers,107.0,Toronto Raptors,115.0,TOR,PHI,https://www.basketball-reference.com/boxscores...,Philadelphia 76ers,Toronto Raptors,Home,8.0
2,240,37,96,0.385,21,51,0.412,25,31,0.806,22,31,53,22,7,4,18,28,120,240,0.547,0.495,0.531,0.323,40.7,81.6,57.6,59.5,6.9,9.3,14.1,100.0,118.5,123.4,240,39,81,0.481,14,38,0.368,33,38,0.868,7,32,39,25,11,9,14,28,125,240,0.640,0.568,0.469,0.469,18.4,59.3,42.4,64.1,10.9,20.0,12.5,100.0,123.4,118.5,2024-10-25,Charlotte Hornets,120.0,Atlanta Hawks,125.0,ATL,CHO,https://www.basketball-reference.com/boxscores...,Atlanta Hawks,Charlotte Hornets,Home,5.0
3,240,38,81,0.469,13,32,0.406,12,17,0.706,8,34,42,25,3,5,22,22,101,240,0.571,0.549,0.395,0.210,19.0,87.2,51.9,65.8,3.0,10.4,19.9,100.0,101.1,113.1,240,42,82,0.512,13,34,0.382,16,23,0.696,5,34,39,28,13,4,13,15,113,240,0.613,0.591,0.415,0.280,12.8,81.0,48.1,66.7,13.0,8.2,12.4,100.0,113.1,101.1,2024-10-25,Detroit Pistons,101.0,Cleveland Cavaliers,113.0,CLE,DET,https://www.basketball-reference.com/boxscores...,Cleveland Cavaliers,Detroit Pistons,Home,12.0
4,240,41,83,0.494,3,30,0.100,13,20,0.650,9,24,33,28,8,6,16,24,98,240,0.534,0.512,0.361,0.241,22.0,68.6,43.4,68.3,8.4,10.5,14.8,100.0,102.5,128.6,240,44,84,0.524,12,27,0.444,23,29,0.793,11,32,43,19,5,4,12,18,123,240,0.636,0.595,0.321,0.345,31.4,78.0,56.6,43.2,5.2,7.5,11.0,100.0,128.6,102.5,2024-10-25,Indiana Pacers,98.0,New York Knicks,123.0,NYK,IND,https://www.basketball-reference.com/boxscores...,Indiana Pacers,New York Knicks,Home,25.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
238,240,39,90,0.433,7,26,0.269,9,13,0.692,10,34,44,18,8,3,14,12,94,240,0.491,0.472,0.289,0.144,20.4,81.0,48.4,46.2,8.2,8.1,12.8,100.0,95.8,128.4,240,45,88,0.511,22,51,0.431,14,20,0.700,8,39,47,33,10,11,11,12,126,240,0.651,0.636,0.580,0.227,19.0,79.6,51.6,73.3,10.2,17.2,10.2,100.0,128.4,95.8,2024-11-25,Los Angeles Clippers,94.0,Boston Celtics,126.0,BOS,LAC,https://www.basketball-reference.com/boxscores...,Boston Celtics,Los Angeles Clippers,Home,32.0
239,240,34,102,0.333,8,46,0.174,22,28,0.786,19,25,44,22,10,7,11,21,98,240,0.429,0.373,0.451,0.275,30.2,61.0,42.3,64.7,9.8,11.9,8.8,100.0,96.3,120.9,240,43,87,0.494,12,28,0.429,25,34,0.735,16,44,60,35,7,13,19,20,123,240,0.603,0.563,0.322,0.391,39.0,69.8,57.7,81.4,6.9,23.2,15.7,100.0,120.9,96.3,2024-11-25,Portland Trail Blazers,98.0,Memphis Grizzlies,123.0,MEM,POR,https://www.basketball-reference.com/boxscores...,Memphis Grizzlies,Portland Trail Blazers,Home,25.0
240,240,56,92,0.609,19,36,0.528,14,16,0.875,8,35,43,45,5,4,11,23,145,240,0.732,0.712,0.391,0.174,25.8,77.8,56.6,80.4,5.0,6.8,10.0,100.0,144.2,117.3,240,41,92,0.446,11,33,0.333,25,27,0.926,10,23,33,23,5,3,11,18,118,240,0.568,0.505,0.359,0.293,22.2,74.2,43.4,56.1,5.0,5.4,9.6,100.0,117.3,144.2,2024-11-25,New York Knicks,145.0,Denver Nuggets,118.0,DEN,NYK,https://www.basketball-reference.com/boxscores...,Denver Nuggets,New York Knicks,Away,-27.0
241,240,41,83,0.494,20,45,0.444,26,30,0.867,7,35,42,23,6,3,12,18,128,240,0.665,0.614,0.542,0.361,17.9,76.1,49.4,56.1,6.0,6.4,11.1,100.0,128.4,120.3,240,45,93,0.484,19,46,0.413,11,17,0.647,11,32,43,32,8,5,13,21,120,240,0.597,0.586,0.495,0.183,23.9,82.1,50.6,71.1,8.0,13.2,11.5,100.0,120.3,128.4,2024-11-25,Brooklyn Nets,128.0,Golden State Warriors,120.0,GSW,BRK,https://www.basketball-reference.com/boxscores...,Brooklyn Nets,Golden State Warriors,Away,-8.0


In [32]:
def split_home_away_games(combined_stats_df):
    """
    Split combined game statistics into separate home and away dataframes.
    
    Args:
        combined_stats_df (pd.DataFrame): DataFrame containing both home and away game statistics
        
    Returns:
        tuple: (home_games_df, away_games_df) containing separated home and away statistics
    """
    # Create copies to avoid modifying original
    home_games = combined_stats_df.copy()
    away_games = combined_stats_df.copy()

    # Filter and process home games
    home_columns = [col for col in home_games.columns if col.startswith('home_')] + ['Home', 'Date']
    home_games = home_games[home_columns]
    home_games = home_games.rename(columns={'Home': 'Team'})
    home_games = home_games.set_index(['Date', 'Team'])

    # Filter and process away games
    away_columns = [col for col in away_games.columns if col.startswith('away_')] + ['Away', 'Date'] 
    away_games = away_games[away_columns]
    away_games = away_games.rename(columns={'Away': 'Team'})
    away_games = away_games.set_index(['Date', 'Team'])

    return home_games, away_games

# Split the data and display results
home_games_df, away_games_df = split_home_away_games(combined_schedule_and_game_stats)

# print("Home Games:")
# display(home_games_df)
# print("\nAway Games:")
# display(away_games_df)


In [33]:
def calculate_cumulative_team_stats(home_games_df, away_games_df):
    """
    Calculate cumulative statistics for both home and away games and combine them.
    
    Args:
        home_games_df (pd.DataFrame): DataFrame containing home game statistics
        away_games_df (pd.DataFrame): DataFrame containing away game statistics
        
    Returns:
        pd.DataFrame: Combined cumulative statistics for all teams
    """
    # Create copies of home_games_df and away_games_df to avoid modifying originals
    cumulative_home_stats = home_games_df.copy()
    cumulative_away_stats = away_games_df.copy()

    # Sort by Date within each Team group
    cumulative_home_stats = cumulative_home_stats.groupby('Team', group_keys=False).apply(lambda x: x.sort_index(level='Date'))
    cumulative_away_stats = cumulative_away_stats.groupby('Team', group_keys=False).apply(lambda x: x.sort_index(level='Date'))

    # Calculate cumulative averages for each numeric column within each team group
    numeric_columns_home = cumulative_home_stats.select_dtypes(include=['int64', 'float64']).columns
    numeric_columns_away = cumulative_away_stats.select_dtypes(include=['int64', 'float64']).columns

    cumulative_home_stats[numeric_columns_home] = cumulative_home_stats.groupby('Team')[numeric_columns_home].expanding().mean().reset_index(0, drop=True)
    cumulative_away_stats[numeric_columns_away] = cumulative_away_stats.groupby('Team')[numeric_columns_away].expanding().mean().reset_index(0, drop=True)

    # Function to remove prefixes from column names
    def remove_prefix(df, prefix):
        return df.rename(columns={col: col.replace(prefix, '') for col in df.columns if col.startswith(prefix)})

    # Remove prefixes from both dataframes
    clean_home_stats = remove_prefix(cumulative_home_stats, 'home_')
    clean_away_stats = remove_prefix(cumulative_away_stats, 'away_')

    # Combine home and away stats
    all_team_stats = pd.concat([clean_home_stats, clean_away_stats])

    # Sort by Team and Date and remove redundant index level
    all_team_stats = all_team_stats.sort_index()
    # all_team_stats = all_team_stats.reset_index()
    # all_team_stats = all_team_stats.set_index(['Team', 'Date'])
    
    return all_team_stats

# Calculate combined stats
all_team_stats = calculate_cumulative_team_stats(home_games_df, away_games_df)

print("Combined Home and Away Statistics:")
display(all_team_stats)


Combined Home and Away Statistics:


Unnamed: 0_level_0,Unnamed: 1_level_0,basic_MP,basic_FG,basic_FGA,basic_FG%,basic_3P,basic_3PA,basic_3P%,basic_FT,basic_FTA,basic_FT%,basic_ORB,basic_DRB,basic_TRB,basic_AST,basic_STL,basic_BLK,basic_TOV,basic_PF,basic_PTS,advanced_MP,advanced_TS%,advanced_eFG%,advanced_3PAr,advanced_FTr,advanced_ORB%,advanced_DRB%,advanced_TRB%,advanced_AST%,advanced_STL%,advanced_BLK%,advanced_TOV%,advanced_USG%,advanced_ORtg,advanced_DRtg
Date,Team,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1
2024-10-25,Atlanta Hawks,240.000000,39.000000,81.000000,0.481000,14.000000,38.000000,0.368000,33.0,38.000000,0.868000,7.000000,32.000000,39.000000,25.000000,11.000000,9.000000,14.000000,28.000000,125.000000,240.000000,0.640000,0.568000,0.469000,0.469000,18.400000,59.300000,42.400000,64.100000,10.900000,20.000000,12.500000,100.0,123.400000,118.500000
2024-10-25,Brooklyn Nets,240.000000,35.000000,83.000000,0.422000,14.000000,37.000000,0.378000,17.0,21.000000,0.810000,8.000000,29.000000,37.000000,22.000000,7.000000,3.000000,19.000000,31.000000,101.000000,240.000000,0.547000,0.506000,0.446000,0.253000,19.000000,85.300000,48.700000,62.900000,7.100000,6.700000,17.100000,100.0,103.000000,118.300000
2024-10-25,Charlotte Hornets,240.000000,37.000000,96.000000,0.385000,21.000000,51.000000,0.412000,25.0,31.000000,0.806000,22.000000,31.000000,53.000000,22.000000,7.000000,4.000000,18.000000,28.000000,120.000000,240.000000,0.547000,0.495000,0.531000,0.323000,40.700000,81.600000,57.600000,59.500000,6.900000,9.300000,14.100000,100.0,118.500000,123.400000
2024-10-25,Chicago Bulls,240.000000,46.000000,96.000000,0.479000,21.000000,47.000000,0.447000,20.0,23.000000,0.870000,9.000000,37.000000,46.000000,30.000000,10.000000,4.000000,10.000000,17.000000,133.000000,240.000000,0.627000,0.589000,0.490000,0.240000,18.800000,82.200000,49.500000,65.200000,9.300000,7.500000,8.600000,100.0,124.200000,113.900000
2024-10-25,Cleveland Cavaliers,240.000000,42.000000,82.000000,0.512000,13.000000,34.000000,0.382000,16.0,23.000000,0.696000,5.000000,34.000000,39.000000,28.000000,13.000000,4.000000,13.000000,15.000000,113.000000,240.000000,0.613000,0.591000,0.415000,0.280000,12.800000,81.000000,48.100000,66.700000,13.000000,8.200000,12.400000,100.0,113.100000,101.100000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-11-25,Oklahoma City Thunder,240.000000,44.166667,95.333333,0.465833,14.833333,42.000000,0.358667,15.5,18.833333,0.829500,10.500000,35.833333,46.333333,26.333333,12.833333,5.000000,13.000000,22.166667,118.666667,240.000000,0.575167,0.544833,0.440667,0.200667,23.433333,77.550000,50.500000,59.933333,12.266667,11.083333,11.216667,100.0,114.866667,103.850000
2024-11-25,Orlando Magic,240.000000,36.700000,85.600000,0.430700,10.500000,37.700000,0.279200,17.8,23.100000,0.766900,9.300000,30.000000,39.300000,23.800000,9.100000,5.700000,14.500000,19.800000,101.700000,240.000000,0.532500,0.491700,0.439300,0.273300,20.280000,76.110000,46.180000,64.810000,9.260000,11.480000,13.090000,100.0,104.150000,110.100000
2024-11-25,Portland Trail Blazers,240.000000,36.900000,90.900000,0.410100,10.900000,35.500000,0.303200,16.8,22.400000,0.756000,13.900000,31.700000,45.600000,18.300000,8.200000,6.400000,15.400000,19.100000,101.500000,240.000000,0.507500,0.470300,0.389700,0.252300,28.480000,71.540000,48.910000,50.070000,8.260000,12.320000,13.300000,100.0,103.080000,112.260000
2024-11-25,Sacramento Kings,242.777778,41.333333,83.888889,0.493778,12.222222,33.888889,0.352111,19.0,24.000000,0.790778,9.333333,32.111111,41.444444,24.666667,7.222222,5.111111,14.333333,21.000000,113.888889,242.777778,0.604111,0.566667,0.402889,0.287889,24.622222,75.333333,51.266667,59.622222,7.444444,10.255556,13.255556,100.0,117.366667,115.544444


In [35]:
# Create a copy of schedule_and_results to avoid modifying the original
enhanced_schedule = schedule_and_results.copy()

# Function to get the latest stats before a given date
def get_latest_stats(team, date, stats_df):
    team_stats = stats_df.loc[(slice(None), team), :]
    team_stats_before_date = team_stats[team_stats.index.get_level_values('Date') <= date]
    return team_stats_before_date.iloc[-1] if not team_stats_before_date.empty else None

# Add home team stats columns
for column in all_team_stats.columns:
    enhanced_schedule[f'Home_{column}'] = enhanced_schedule.apply(
        lambda row: get_latest_stats(row['Home'], row['Date'], all_team_stats)[column] 
        if get_latest_stats(row['Home'], row['Date'], all_team_stats) is not None 
        else None, 
        axis=1
    )

# Add away team stats columns 
for column in all_team_stats.columns:
    enhanced_schedule[f'Away_{column}'] = enhanced_schedule.apply(
        lambda row: get_latest_stats(row['Away'], row['Date'], all_team_stats)[column]
        if get_latest_stats(row['Away'], row['Date'], all_team_stats) is not None
        else None,
        axis=1
    )

display(enhanced_schedule)


Unnamed: 0,Date,Away,AwayPoints,Home,HomePoints,HomeAbbreviation,AwayAbbreviation,GameUrl,A_Team,B_Team,Winner,PointDifferential,Home_basic_MP,Home_basic_FG,Home_basic_FGA,Home_basic_FG%,Home_basic_3P,Home_basic_3PA,Home_basic_3P%,Home_basic_FT,Home_basic_FTA,Home_basic_FT%,Home_basic_ORB,Home_basic_DRB,Home_basic_TRB,Home_basic_AST,Home_basic_STL,Home_basic_BLK,Home_basic_TOV,Home_basic_PF,Home_basic_PTS,Home_advanced_MP,Home_advanced_TS%,Home_advanced_eFG%,Home_advanced_3PAr,Home_advanced_FTr,Home_advanced_ORB%,Home_advanced_DRB%,Home_advanced_TRB%,Home_advanced_AST%,Home_advanced_STL%,Home_advanced_BLK%,Home_advanced_TOV%,Home_advanced_USG%,Home_advanced_ORtg,Home_advanced_DRtg,Away_basic_MP,Away_basic_FG,Away_basic_FGA,Away_basic_FG%,Away_basic_3P,Away_basic_3PA,Away_basic_3P%,Away_basic_FT,Away_basic_FTA,Away_basic_FT%,Away_basic_ORB,Away_basic_DRB,Away_basic_TRB,Away_basic_AST,Away_basic_STL,Away_basic_BLK,Away_basic_TOV,Away_basic_PF,Away_basic_PTS,Away_advanced_MP,Away_advanced_TS%,Away_advanced_eFG%,Away_advanced_3PAr,Away_advanced_FTr,Away_advanced_ORB%,Away_advanced_DRB%,Away_advanced_TRB%,Away_advanced_AST%,Away_advanced_STL%,Away_advanced_BLK%,Away_advanced_TOV%,Away_advanced_USG%,Away_advanced_ORtg,Away_advanced_DRtg
0,20241025,Brooklyn Nets,101.0,Orlando Magic,116.0,ORL,BRK,https://www.basketball-reference.com/boxscores...,Brooklyn Nets,Orlando Magic,Home,15.0,240.000000,39.000000,75.000000,0.520000,15.000000,30.000000,0.500000,23.000,33.000000,0.697000,5.000000,34.000000,39.000000,28.000000,10.000000,7.000000,13.000000,19.000000,116.000000,240.000000,0.648000,0.620000,0.400000,0.440000,14.700000,81.000000,51.300000,71.800000,10.200000,15.200000,12.700000,100.0,118.300000,103.000000,240.0,35.000000,83.000000,0.422000,14.000000,37.000000,0.378000,17.000000,21.000000,0.810000,8.000000,29.000000,37.000000,22.000000,7.000000,3.000000,19.000000,31.000000,101.000000,240.0,0.547000,0.506000,0.446000,0.253000,19.000000,85.300000,48.700000,62.900000,7.100000,6.700000,17.100000,100.0,103.000000,118.300000
1,20241025,Philadelphia 76ers,107.0,Toronto Raptors,115.0,TOR,PHI,https://www.basketball-reference.com/boxscores...,Philadelphia 76ers,Toronto Raptors,Home,8.0,240.000000,35.000000,76.000000,0.461000,6.000000,23.000000,0.261000,39.000,46.000000,0.848000,18.000000,36.000000,54.000000,25.000000,11.000000,13.000000,27.000000,34.000000,115.000000,240.000000,0.597000,0.500000,0.303000,0.605000,50.000000,78.300000,65.900000,71.400000,10.900000,27.700000,21.900000,100.0,114.000000,106.100000,240.0,29.000000,76.000000,0.382000,9.000000,29.000000,0.310000,40.000000,53.000000,0.755000,10.000000,18.000000,28.000000,19.000000,11.000000,5.000000,16.000000,32.000000,107.000000,240.0,0.539000,0.441000,0.382000,0.697000,21.700000,50.000000,34.100000,65.500000,10.900000,9.400000,13.900000,100.0,106.100000,114.000000
2,20241025,Charlotte Hornets,120.0,Atlanta Hawks,125.0,ATL,CHO,https://www.basketball-reference.com/boxscores...,Atlanta Hawks,Charlotte Hornets,Home,5.0,240.000000,39.000000,81.000000,0.481000,14.000000,38.000000,0.368000,33.000,38.000000,0.868000,7.000000,32.000000,39.000000,25.000000,11.000000,9.000000,14.000000,28.000000,125.000000,240.000000,0.640000,0.568000,0.469000,0.469000,18.400000,59.300000,42.400000,64.100000,10.900000,20.000000,12.500000,100.0,123.400000,118.500000,240.0,37.000000,96.000000,0.385000,21.000000,51.000000,0.412000,25.000000,31.000000,0.806000,22.000000,31.000000,53.000000,22.000000,7.000000,4.000000,18.000000,28.000000,120.000000,240.0,0.547000,0.495000,0.531000,0.323000,40.700000,81.600000,57.600000,59.500000,6.900000,9.300000,14.100000,100.0,118.500000,123.400000
3,20241025,Detroit Pistons,101.0,Cleveland Cavaliers,113.0,CLE,DET,https://www.basketball-reference.com/boxscores...,Cleveland Cavaliers,Detroit Pistons,Home,12.0,240.000000,42.000000,82.000000,0.512000,13.000000,34.000000,0.382000,16.000,23.000000,0.696000,5.000000,34.000000,39.000000,28.000000,13.000000,4.000000,13.000000,15.000000,113.000000,240.000000,0.613000,0.591000,0.415000,0.280000,12.800000,81.000000,48.100000,66.700000,13.000000,8.200000,12.400000,100.0,113.100000,101.100000,240.0,38.000000,81.000000,0.469000,13.000000,32.000000,0.406000,12.000000,17.000000,0.706000,8.000000,34.000000,42.000000,25.000000,3.000000,5.000000,22.000000,22.000000,101.000000,240.0,0.571000,0.549000,0.395000,0.210000,19.000000,87.200000,51.900000,65.800000,3.000000,10.400000,19.900000,100.0,101.100000,113.100000
4,20241025,Indiana Pacers,98.0,New York Knicks,123.0,NYK,IND,https://www.basketball-reference.com/boxscores...,Indiana Pacers,New York Knicks,Home,25.0,240.000000,44.000000,84.000000,0.524000,12.000000,27.000000,0.444000,23.000,29.000000,0.793000,11.000000,32.000000,43.000000,19.000000,5.000000,4.000000,12.000000,18.000000,123.000000,240.000000,0.636000,0.595000,0.321000,0.345000,31.400000,78.000000,56.600000,43.200000,5.200000,7.500000,11.000000,100.0,128.600000,102.500000,240.0,41.000000,83.000000,0.494000,3.000000,30.000000,0.100000,13.000000,20.000000,0.650000,9.000000,24.000000,33.000000,28.000000,8.000000,6.000000,16.000000,24.000000,98.000000,240.0,0.534000,0.512000,0.361000,0.241000,22.000000,68.600000,43.400000,68.300000,8.400000,10.500000,14.800000,100.0,102.500000,128.600000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
238,20241125,Los Angeles Clippers,94.0,Boston Celtics,126.0,BOS,LAC,https://www.basketball-reference.com/boxscores...,Boston Celtics,Los Angeles Clippers,Home,32.0,246.250000,41.000000,87.750000,0.468500,19.375000,51.000000,0.385750,15.375,19.250000,0.795625,8.250000,32.875000,41.125000,27.125000,5.625000,6.250000,12.375000,17.125000,116.750000,246.250000,0.608250,0.579625,0.580750,0.222000,19.462500,72.112500,46.950000,66.025000,5.750000,10.437500,11.412500,100.0,119.700000,113.675000,240.0,40.000000,86.250000,0.467000,12.375000,33.250000,0.371500,17.875000,22.875000,0.781125,11.875000,34.125000,46.000000,24.500000,7.500000,3.875000,17.250000,19.125000,110.250000,240.0,0.575125,0.539625,0.386250,0.268250,28.562500,76.437500,52.875000,60.500000,7.600000,8.087500,15.200000,100.0,111.862500,114.000000
239,20241125,Portland Trail Blazers,98.0,Memphis Grizzlies,123.0,MEM,POR,https://www.basketball-reference.com/boxscores...,Memphis Grizzlies,Portland Trail Blazers,Home,25.0,240.000000,43.800000,91.600000,0.480200,12.700000,35.700000,0.355700,18.600,23.800000,0.783600,12.200000,37.500000,49.700000,30.500000,7.600000,7.500000,17.800000,19.900000,118.900000,240.000000,0.585600,0.550000,0.389100,0.264900,27.940000,77.150000,54.210000,69.640000,7.340000,14.450000,14.900000,100.0,114.960000,105.680000,240.0,36.900000,90.900000,0.410100,10.900000,35.500000,0.303200,16.800000,22.400000,0.756000,13.900000,31.700000,45.600000,18.300000,8.200000,6.400000,15.400000,19.100000,101.500000,240.0,0.507500,0.470300,0.389700,0.252300,28.480000,71.540000,48.910000,50.070000,8.260000,12.320000,13.300000,100.0,103.080000,112.260000
240,20241125,New York Knicks,145.0,Denver Nuggets,118.0,DEN,NYK,https://www.basketball-reference.com/boxscores...,Denver Nuggets,New York Knicks,Away,-27.0,240.000000,43.125000,88.750000,0.487500,14.250000,32.750000,0.436375,21.125,27.375000,0.778750,12.500000,30.875000,43.375000,30.500000,8.000000,5.125000,14.625000,19.500000,121.625000,240.000000,0.605500,0.567875,0.370500,0.309875,28.262500,74.300000,50.825000,70.275000,7.937500,8.950000,12.637500,100.0,120.537500,119.462500,240.0,44.666667,89.444444,0.500556,15.555556,37.222222,0.411444,14.888889,17.444444,0.844667,9.333333,32.555556,41.888889,29.333333,8.111111,4.222222,12.333333,17.555556,119.777778,240.0,0.618111,0.587778,0.416111,0.199222,23.111111,77.944444,51.077778,65.455556,8.255556,8.511111,11.311111,100.0,122.122222,116.477778
241,20241125,Brooklyn Nets,128.0,Golden State Warriors,120.0,GSW,BRK,https://www.basketball-reference.com/boxscores...,Brooklyn Nets,Golden State Warriors,Away,-8.0,240.000000,43.571429,94.000000,0.463429,16.285714,42.857143,0.377429,13.000,19.285714,0.685857,12.857143,33.714286,46.571429,30.714286,10.000000,6.714286,14.714286,21.142857,116.428571,240.000000,0.568000,0.549857,0.456571,0.205143,26.785714,72.014286,48.828571,70.100000,9.814286,12.814286,12.471429,100.0,114.628571,107.885714,242.5,37.400000,82.500000,0.454800,16.100000,40.900000,0.391800,18.200000,21.800000,0.848300,8.900000,29.400000,38.300000,25.100000,5.400000,3.200000,15.100000,20.500000,109.100000,242.5,0.593900,0.553500,0.499100,0.267200,21.710000,76.130000,47.970000,67.650000,5.550000,6.550000,14.130000,100.0,113.790000,116.330000


In [36]:
enhanced_schedule.to_csv("enhanced_schedule.csv")