In [24]:
## script to print game data for the past few years
import time
import pandas as pd
from nba_api.stats.endpoints import leaguegamefinder

def determine_winning_team(row):
    if row['PLUS_MINUS_HOME'] > 0:
        return row['HOME_TEAM']
    elif row['PLUS_MINUS_HOME'] < 0:
        return row['AWAY_TEAM']

seasons = ['2009-10', '2010-11', '2011-12', '2012-13', 
           '2013-14', '2014-15', '2015-16', '2016-17', 
           '2017-18', '2018-19', '2019-20', '2020-21', 
           '2021-22', '2022-23', '2023-24']



combined_games_df = pd.DataFrame()

for season in seasons:
    gamefinder = leaguegamefinder.LeagueGameFinder(season_nullable=season, league_id_nullable='00', season_type_nullable='Regular Season',
                                                   date_from_nullable='10/01/2009', date_to_nullable='04/30/2024')
    games = gamefinder.get_data_frames()[0]
    games = games.sort_values(by=['GAME_DATE'])

    team_names = ['Atlanta Hawks', 'Boston Celtics', 'Brooklyn Nets', 'Charlotte Hornets', 'Chicago Bulls', 
                  'Cleveland Cavaliers', 'Dallas Mavericks', 'Denver Nuggets', 'Detroit Pistons', 
                  'Golden State Warriors', 'Houston Rockets', 'Indiana Pacers', 'LA Clippers', 
                  'Los Angeles Lakers', 'Memphis Grizzlies', 'Miami Heat', 'Milwaukee Bucks', 
                  'Minnesota Timberwolves', 'New Orleans Pelicans', 'New York Knicks', 
                  'Oklahoma City Thunder', 'Orlando Magic', 'Philadelphia 76ers', 'Phoenix Suns', 
                  'Portland Trail Blazers', 'Sacramento Kings', 'San Antonio Spurs', 'Toronto Raptors', 'Utah Jazz', 'Washington Wizards']
    
    games = games[games['TEAM_NAME'].isin(team_names)]

    home_team_data = games[games['MATCHUP'].str.contains('vs.')].copy()
    away_team_data = games[games['MATCHUP'].str.contains('@')].copy()

    aggregated_games = pd.merge(home_team_data, away_team_data, on=['GAME_ID'], suffixes=('_HOME', '_AWAY'))

    home_cols = ['SEASON_ID_HOME', 'GAME_ID', 'GAME_DATE_HOME', 'TEAM_ABBREVIATION_HOME', 'PTS_HOME', 'MIN_HOME',
                 'FGM_HOME', 'FGA_HOME', 'FG_PCT_HOME', 'FG3M_HOME', 'FG3A_HOME', 'FG3_PCT_HOME',
                 'FTM_HOME', 'FTA_HOME', 'FT_PCT_HOME', 'OREB_HOME', 'DREB_HOME', 'REB_HOME',
                 'AST_HOME', 'STL_HOME', 'BLK_HOME', 'TOV_HOME', 'PF_HOME', 'PLUS_MINUS_HOME', 'WL_HOME']

    away_cols = ['SEASON_ID_AWAY', 'GAME_ID', 'GAME_DATE_AWAY', 'TEAM_ABBREVIATION_AWAY', 'PTS_AWAY', 'MIN_AWAY',
                 'FGM_AWAY', 'FGA_AWAY', 'FG_PCT_AWAY', 'FG3M_AWAY', 'FG3A_AWAY', 'FG3_PCT_AWAY',
                 'FTM_AWAY', 'FTA_AWAY', 'FT_PCT_AWAY', 'OREB_AWAY', 'DREB_AWAY', 'REB_AWAY',
                 'AST_AWAY', 'STL_AWAY', 'BLK_AWAY', 'TOV_AWAY', 'PF_AWAY', 'PLUS_MINUS_AWAY', 'WL_AWAY']

    games_df = pd.merge(aggregated_games[home_cols], aggregated_games[away_cols], on='GAME_ID', suffixes=('_HOME', '_AWAY'))

    games_df['SEASON_ID'] = games_df['SEASON_ID_HOME'].combine_first(games_df['SEASON_ID_AWAY'])
    games_df = games_df.drop(columns=['SEASON_ID_HOME', 'SEASON_ID_AWAY'])

    games_df['HOME_TEAM'] = games_df['TEAM_ABBREVIATION_HOME']
    games_df['AWAY_TEAM'] = games_df['TEAM_ABBREVIATION_AWAY']

    games_df['WINNER'] = games_df.apply(determine_winning_team, axis=1)
    column_names = [
        'SEASON_ID', 'GAME_ID', 'GAME_DATE_HOME', 'HOME_TEAM', 'PTS_HOME', 'MIN_HOME',
        'FGM_HOME', 'FGA_HOME', 'FG_PCT_HOME', 'FG3M_HOME', 'FG3A_HOME', 'FG3_PCT_HOME',
        'FTM_HOME', 'FTA_HOME', 'FT_PCT_HOME', 'OREB_HOME', 'DREB_HOME', 'REB_HOME',
        'AST_HOME', 'STL_HOME', 'BLK_HOME', 'TOV_HOME', 'PF_HOME', 'PLUS_MINUS_HOME', 'WL_HOME',
        'GAME_DATE_AWAY', 'AWAY_TEAM', 'PTS_AWAY', 'MIN_AWAY', 'FGM_AWAY', 'FGA_AWAY',
        'FG_PCT_AWAY', 'FG3M_AWAY', 'FG3A_AWAY', 'FG3_PCT_AWAY', 'FTM_AWAY', 'FTA_AWAY',
        'FT_PCT_AWAY', 'OREB_AWAY', 'DREB_AWAY', 'REB_AWAY', 'AST_AWAY', 'STL_AWAY',
        'BLK_AWAY', 'TOV_AWAY', 'PF_AWAY', 'PLUS_MINUS_AWAY', 'WL_AWAY'
    ]

    games_df = games_df[column_names]

    games_df.rename(columns={'GAME_DATE_HOME': 'GAME_DATE'}, inplace=True)
    games_df.drop(columns=['GAME_DATE_AWAY'], inplace=True)
    games_df.drop(columns=['WL_HOME'], inplace=True)
    games_df.drop(columns=['WL_AWAY'], inplace=True)
    games_df['WINNING_TEAM'] = games_df.apply(determine_winning_team, axis=1)
    games_df = games_df.sort_values(by=['GAME_DATE'])
    combined_games_df = pd.concat([combined_games_df, games_df], ignore_index=True)


combined_games_df.to_csv('games.csv', index=False)


In [None]:
## script to print team data and averages from the past few years + ortg, drtg 


In [26]:
## script to print player data by game - past 10 years

In [None]:
## script to print player data averages - past 10 years