In [1]:
from nba_api.stats.static import players, teams as teams_static
from nba_api.stats.endpoints import playergamelog, commonallplayers, commonteamroster
from tqdm.auto import tqdm
import pandas as pd
import time
import random


In [90]:
# получаем всех игроков для сезона
def get_all_players(season):
    teams_list = teams_static.get_teams()
    player_map = {}
    for team in tqdm(teams_list, desc ='Processing Teams'):
        team_id = team.get('id')
        try:
            time.sleep(2.5)
            roster = commonteamroster.CommonTeamRoster(team_id=team_id, season=season)
            roster_df = roster.get_data_frames()[0]
        except Exception as e:
            print(f"Error fetching roster for team ID {team['full_name']}: {e}")
            continue
        time.sleep(2.5)
        for _, row in roster_df.iterrows():
            player_map[row['PLAYER_ID']] = {
            'PLAYER_NAME' : row['PLAYER'],
            'TEAM_ID' : team_id,
            'TEAM_NAME' : team.get('full_name'),
            'POSITION' : row['POSITION'],
            'HEIGHT' : row['HEIGHT'],
            'WEIGHT' : row['WEIGHT'],
            'AGE' : row['AGE']
            }
    df_players = pd.DataFrame.from_dict(player_map, orient='index').reset_index().rename(columns={'index':'PLAYER_ID'})
    return df_players

# получаем лог игроков для сезона
def get_player_game_logs(player_ids, season):
    all_game_logs = []
    for player_id in tqdm(player_ids, desc='Processing Players'):
        try:
            time.sleep(random.uniform(2.5, 4.5))
            gamelog = playergamelog.PlayerGameLog(player_id=player_id, season=season)
            gamelog_df = gamelog.get_data_frames()[0]
            all_game_logs.append(gamelog_df)
        except Exception as e:
            print(f"Error fetching game log for player ID {player_id}: {e}")
            continue
    if all_game_logs:
        combined_game_logs = pd.concat(all_game_logs, ignore_index=True)
    else:
        combined_game_logs = pd.DataFrame()
    return combined_game_logs


In [37]:
players_2025 = get_all_players('2024-25')


Processing Teams:   0%|          | 0/30 [00:00<?, ?it/s]

Error fetching roster for team ID Houston Rockets: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)
Error fetching roster for team ID Los Angeles Clippers: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


KeyboardInterrupt: 

In [38]:
players_2025 = players_2025.rename(columns={'PLAYER_ID':'Player_ID'})

In [None]:
all_game_logs = []
for player_id in tqdm(players_2025['Player_ID'], desc='Processing Players'):
    try:
        time.sleep(1.6)
        gamelog = playergamelog.PlayerGameLog(player_id=player_id, season='2024-25')
        gamelog_df = gamelog.get_data_frames()[0]
        all_game_logs.append(gamelog_df)
    except Exception as e:
        print(f"Error fetching game log for player ID {player_id}: {e}")
        continue




Processing Players:   0%|          | 0/534 [00:00<?, ?it/s]

  combined_game_logs_2025 = pd.concat(all_game_logs, ignore_index=True)


Unnamed: 0,SEASON_ID,Player_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,...,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE
0,22024,1631210,0022401186,"Apr 13, 2025",ATL vs. ORL,W,27,6,12,0.50,...,3,4,2,0,0,1,0,17,-7,1
1,22024,1631210,0022400692,"Feb 01, 2025",NYK vs. LAL,L,2,0,1,0.00,...,1,1,0,0,0,0,0,0,0,1
2,22024,1631210,0022400653,"Jan 27, 2025",NYK vs. MEM,W,4,0,0,0.00,...,1,1,0,0,0,0,1,0,3,1
3,22024,1631210,0022400641,"Jan 25, 2025",NYK vs. SAC,W,3,0,0,0.00,...,0,0,0,1,0,0,0,0,2,1
4,22024,1631210,0022400539,"Jan 12, 2025",NYK vs. MIL,W,5,1,2,0.50,...,1,1,0,1,0,1,1,2,4,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25835,22024,201959,0022400205,"Nov 10, 2024",CHA @ PHI,L,13,1,2,0.50,...,3,6,0,0,1,0,4,3,3,1
25836,22024,201959,0022400184,"Nov 08, 2024",CHA vs. IND,W,13,1,1,1.00,...,3,3,1,0,0,1,2,2,-15,1
25837,22024,201959,0022400169,"Nov 06, 2024",CHA vs. DET,W,15,3,4,0.75,...,3,4,0,0,2,1,3,7,-4,1
25838,22024,201959,0022400162,"Nov 04, 2024",CHA @ MIN,L,16,2,4,0.50,...,3,4,0,1,0,0,1,5,-4,1


In [74]:
combined_game_logs_2025 = pd.concat(all_game_logs, ignore_index=True)
combined_game_logs_2025['GAME_DATE']  = pd.to_datetime(combined_game_logs_2025['GAME_DATE'])
combined_game_logs_2025.set_index('GAME_DATE', inplace=True)
combined_game_logs_2025.reset_index(inplace=True)
combined_game_logs_2025 = combined_game_logs_2025.merge(players_2025[['Player_ID','PLAYER_NAME']], on='Player_ID', how='left')
# combined_game_logs_2025


  combined_game_logs_2025 = pd.concat(all_game_logs, ignore_index=True)


In [92]:
combined_game_logs_2025.to_csv('combined_game_logs_2025.csv', index=False)

In [75]:
combined_game_logs_2025

Unnamed: 0,GAME_DATE,SEASON_ID,Player_ID,Game_ID,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE,PLAYER_NAME
0,2025-04-13,22024,1631210,0022401186,ATL vs. ORL,W,27,6,12,0.50,5,8,0.625,0,0,0.0,1,3,4,2,0,0,1,0,17,-7,1,Jacob Toppin
1,2025-02-01,22024,1631210,0022400692,NYK vs. LAL,L,2,0,1,0.00,0,0,0.000,0,0,0.0,0,1,1,0,0,0,0,0,0,0,1,Jacob Toppin
2,2025-01-27,22024,1631210,0022400653,NYK vs. MEM,W,4,0,0,0.00,0,0,0.000,0,0,0.0,0,1,1,0,0,0,0,1,0,3,1,Jacob Toppin
3,2025-01-25,22024,1631210,0022400641,NYK vs. SAC,W,3,0,0,0.00,0,0,0.000,0,0,0.0,0,0,0,0,1,0,0,0,0,2,1,Jacob Toppin
4,2025-01-12,22024,1631210,0022400539,NYK vs. MIL,W,5,1,2,0.50,0,0,0.000,0,0,0.0,0,1,1,0,1,0,1,1,2,4,1,Jacob Toppin
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25835,2024-11-10,22024,201959,0022400205,CHA @ PHI,L,13,1,2,0.50,0,0,0.000,1,2,0.5,3,3,6,0,0,1,0,4,3,3,1,Taj Gibson
25836,2024-11-08,22024,201959,0022400184,CHA vs. IND,W,13,1,1,1.00,0,0,0.000,0,0,0.0,0,3,3,1,0,0,1,2,2,-15,1,Taj Gibson
25837,2024-11-06,22024,201959,0022400169,CHA vs. DET,W,15,3,4,0.75,0,0,0.000,1,1,1.0,1,3,4,0,0,2,1,3,7,-4,1,Taj Gibson
25838,2024-11-04,22024,201959,0022400162,CHA @ MIN,L,16,2,4,0.50,1,2,0.500,0,0,0.0,1,3,4,0,1,0,0,1,5,-4,1,Taj Gibson


In [76]:
# features engineering

combined_game_logs_2025.sort_values(by=['Player_ID','PLAYER_NAME', 'GAME_DATE'], inplace=True)

stats = ['MIN','PTS', 'AST', 'REB', 'STL', 'BLK', 'TOV', 'FG_PCT', 'FG3_PCT', 'FT_PCT', 'PLUS_MINUS']
for stat in stats:
    combined_game_logs_2025[f'PREV_10_{stat}'] = combined_game_logs_2025.groupby('Player_ID')[stat].transform(lambda x: x.shift(1).rolling(window=10, min_periods=10).mean())

for stat in stats:
    combined_game_logs_2025[f'PREV_3_{stat}'] = combined_game_logs_2025.groupby('Player_ID')[stat].transform(lambda x: x.shift(1).rolling(window=3, min_periods=3).mean())

for stat in stats:
    combined_game_logs_2025[f'last_v_{stat}'] = combined_game_logs_2025.groupby('Player_ID')[stat].shift(1)

combined_game_logs_2025['stdt PTS 10'] = combined_game_logs_2025.groupby('Player_ID')['PTS'].transform(lambda x: x.shift(1).rolling(window=10, min_periods=10).std())  
# combined_game_logs_2025['stdt PTS 3'] = combined_game_logs_2025.groupby('Player_ID')['PTS'].transform(lambda x: x.shift(1).rolling(window=3, min_periods=3).std())      

combined_game_logs_2025 = combined_game_logs_2025.reset_index(drop=True)


In [77]:
combined_game_logs_2025.head(20)
combined_game_logs_2025 = combined_game_logs_2025.dropna(subset='PREV_10_PTS').reset_index(drop=True)
combined_game_logs_2025

Unnamed: 0,GAME_DATE,SEASON_ID,Player_ID,Game_ID,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE,PLAYER_NAME,PREV_10_MIN,PREV_10_PTS,PREV_10_AST,PREV_10_REB,PREV_10_STL,PREV_10_BLK,PREV_10_TOV,PREV_10_FG_PCT,PREV_10_FG3_PCT,PREV_10_FT_PCT,PREV_10_PLUS_MINUS,PREV_3_MIN,PREV_3_PTS,PREV_3_AST,PREV_3_REB,PREV_3_STL,PREV_3_BLK,PREV_3_TOV,PREV_3_FG_PCT,PREV_3_FG3_PCT,PREV_3_FT_PCT,PREV_3_PLUS_MINUS,last_v_MIN,last_v_PTS,last_v_AST,last_v_REB,last_v_STL,last_v_BLK,last_v_TOV,last_v_FG_PCT,last_v_FG3_PCT,last_v_FT_PCT,last_v_PLUS_MINUS,stdt PTS 10
0,2024-11-13,22024,2544,0022400225,LAL vs. MEM,W,37,13,22,0.591,4,7,0.571,5,8,0.625,1,11,12,14,1,0,6,2,35,3,1,LeBron James,34.9,23.2,8.9,7.7,0.6,0.6,2.8,0.5091,0.3997,0.8542,-1.4,34.666667,26.333333,11.666667,9.666667,0.666667,1.0,3.333333,0.565667,0.348333,0.916667,1.333333,35,19,16,10,1,0,3,0.429,0.000,1.000,13,8.080154
1,2024-11-15,22024,2544,0022400015,LAL @ SAS,W,38,6,12,0.500,1,4,0.250,2,4,0.500,1,15,16,12,1,0,7,0,15,3,1,LeBron James,35.1,25.1,9.9,8.4,0.7,0.4,3.2,0.5244,0.4318,0.8167,-0.5,35.333333,25.000000,14.333333,11.333333,0.666667,1.0,3.666667,0.554333,0.357000,0.875000,9.333333,37,35,14,12,1,0,6,0.591,0.571,0.625,3,8.425490
2,2024-11-16,22024,2544,0022400231,LAL @ NOP,W,37,8,19,0.421,2,7,0.286,3,5,0.600,4,3,7,5,1,0,6,1,21,1,1,LeBron James,35.4,24.5,10.3,9.6,0.8,0.4,3.7,0.5206,0.4168,0.8042,-1.6,36.666667,23.000000,14.000000,12.666667,1.000000,0.0,5.333333,0.506667,0.273667,0.708333,6.333333,38,15,12,16,1,0,7,0.500,0.250,0.500,3,8.947377
3,2024-11-19,22024,2544,0022400026,LAL vs. UTA,W,34,9,17,0.529,1,5,0.200,7,11,0.636,0,2,2,12,0,0,4,1,26,-5,1,LeBron James,35.7,23.4,9.8,8.9,0.9,0.3,3.8,0.5082,0.4010,0.7642,-2.8,37.333333,23.666667,10.333333,11.666667,1.000000,0.0,6.333333,0.504000,0.369000,0.575000,2.333333,37,21,5,7,1,0,6,0.421,0.286,0.600,1,8.591986
4,2024-11-21,22024,2544,0022400263,LAL vs. ORL,L,37,12,22,0.545,5,9,0.556,2,4,0.500,2,8,10,7,0,1,4,0,31,-8,1,LeBron James,35.5,24.9,10.2,8.6,0.8,0.3,4.0,0.5397,0.3810,0.7778,-1.6,36.333333,20.666667,9.666667,8.333333,0.666667,0.0,5.666667,0.483333,0.245333,0.578667,-0.333333,34,26,12,2,0,0,4,0.529,0.200,0.636,-5,7.415449
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20729,2025-02-03,22024,1642530,0022400703,MEM vs. SAS,W,2,0,0,0.000,0,0,0.000,0,0,0.000,0,1,1,1,0,0,0,0,0,-2,1,Yuki Kawamura,3.7,1.9,1.0,0.4,0.1,0.0,0.1,0.2633,0.1167,0.1000,0.9,5.000000,3.333333,1.333333,1.333333,0.000000,0.0,0.000000,0.266667,0.222333,0.000000,1.333333,2,0,0,0,0,0,0,0.000,0.000,0.000,-2,3.212822
20730,2025-02-05,22024,1642530,0022400722,MEM @ TOR,W,4,0,1,0.000,0,1,0.000,0,0,0.000,0,0,0,1,0,0,0,0,0,3,1,Yuki Kawamura,3.1,1.6,0.7,0.5,0.1,0.0,0.1,0.2300,0.1167,0.0500,-0.1,2.000000,0.000000,0.666667,0.666667,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,-1.000000,2,0,1,1,0,0,0,0.000,0.000,0.000,-2,3.238655
20731,2025-02-12,22024,1642530,0022400783,MEM @ LAC,L,1,0,0,0.000,0,0,0.000,0,0,0.000,0,0,0,0,0,0,0,0,0,1,1,Yuki Kawamura,3.5,1.6,0.8,0.5,0.1,0.0,0.1,0.2300,0.1167,0.0500,0.2,2.666667,0.000000,0.666667,0.333333,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,-0.333333,4,0,1,0,0,0,0,0.000,0.000,0.000,3,3.238655
20732,2025-04-10,22024,1642530,0022401170,MEM vs. MIN,L,1,0,0,0.000,0,0,0.000,0,0,0.000,0,0,0,0,0,0,0,0,0,-2,1,Yuki Kawamura,3.2,1.6,0.7,0.5,0.1,0.0,0.1,0.2300,0.1167,0.0500,0.4,2.333333,0.000000,0.666667,0.333333,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.666667,1,0,0,0,0,0,0,0.000,0.000,0.000,1,3.238655


In [78]:
# разделение выборки на тренировочную, тестовую и валидационную
from sklearn.model_selection import train_test_split
df = combined_game_logs_2025.copy()

df = df.sort_values(by=['PLAYER_NAME','GAME_DATE']).reset_index(drop=True)

df['game_number'] = df.groupby('PLAYER_NAME').cumcount() + 1
df['total_games'] = df.groupby('PLAYER_NAME')['Game_ID'].transform('count')

test_df = df[df['game_number'] > (df['total_games'] - 3)]
train_val_df = df[df['game_number'] <= (df['total_games'] - 3)]
train_val_df = train_val_df.sort_values(by=['PLAYER_NAME','GAME_DATE']).reset_index(drop=True)
train_df = (
    train_val_df.groupby('PLAYER_NAME')
    .apply(lambda x: x.iloc[:-int(len(x) * 0.2)] if len(x) > 5 else x.iloc[:-1])
    .reset_index(drop=True)
)

val_df = (
    train_val_df.groupby('PLAYER_NAME')
    .apply(lambda x: x.iloc[-int(len(x) * 0.2):] if len(x) > 5 else x.iloc[-1:])
    .reset_index(drop=True)
)

  .apply(lambda x: x.iloc[:-int(len(x) * 0.2)] if len(x) > 5 else x.iloc[:-1])
  .apply(lambda x: x.iloc[-int(len(x) * 0.2):] if len(x) > 5 else x.iloc[-1:])


In [79]:
print(train_df.shape, val_df.shape, test_df.shape)

(15605, 64) (3670, 64) (1459, 64)


In [81]:
train_df.head(10)

Unnamed: 0,GAME_DATE,SEASON_ID,Player_ID,Game_ID,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE,PLAYER_NAME,PREV_10_MIN,PREV_10_PTS,PREV_10_AST,PREV_10_REB,PREV_10_STL,PREV_10_BLK,PREV_10_TOV,PREV_10_FG_PCT,PREV_10_FG3_PCT,PREV_10_FT_PCT,PREV_10_PLUS_MINUS,PREV_3_MIN,PREV_3_PTS,PREV_3_AST,PREV_3_REB,PREV_3_STL,PREV_3_BLK,PREV_3_TOV,PREV_3_FG_PCT,PREV_3_FG3_PCT,PREV_3_FT_PCT,PREV_3_PLUS_MINUS,last_v_MIN,last_v_PTS,last_v_AST,last_v_REB,last_v_STL,last_v_BLK,last_v_TOV,last_v_FG_PCT,last_v_FG3_PCT,last_v_FT_PCT,last_v_PLUS_MINUS,stdt PTS 10,game_number,total_games
0,2025-03-12,22024,1630639,22400947,TOR vs. PHI,W,36,10,20,0.5,4,9,0.444,4,6,0.667,1,5,6,4,0,0,3,4,28,1,1,A.J. Lawson,14.7,6.9,0.4,2.4,0.3,0.2,0.2,0.4265,0.3504,0.2636,1.9,30.666667,15.0,1.0,6.0,1.0,0.0,0.333333,0.255,0.251333,0.712,7.0,33,32,0,12,1,0,1,0.429,0.5,0.636,15,9.723397,1,16
1,2025-03-14,22024,1630639,22400966,TOR @ UTA,W,24,6,10,0.6,2,5,0.4,4,6,0.667,1,4,5,0,0,0,1,2,18,10,1,A.J. Lawson,17.9,9.4,0.8,3.0,0.3,0.2,0.5,0.3765,0.2948,0.3303,1.5,33.666667,22.333333,2.0,7.0,0.666667,0.0,1.333333,0.370333,0.362333,0.767667,2.666667,36,28,4,6,0,0,3,0.5,0.444,0.667,1,11.635196,2,16
2,2025-03-16,22024,1630639,22400980,TOR @ POR,L,33,4,14,0.286,1,6,0.167,0,0,0.0,2,6,8,6,0,0,1,3,9,-10,1,A.J. Lawson,19.8,10.6,0.8,3.3,0.3,0.2,0.6,0.3698,0.2348,0.347,2.9,31.0,26.0,1.333333,7.666667,0.333333,0.0,1.666667,0.509667,0.448,0.656667,8.666667,24,18,0,5,0,0,1,0.6,0.4,0.667,10,11.862171,3,16
3,2025-03-17,22024,1630639,22400991,TOR @ PHX,L,25,4,9,0.444,2,5,0.4,2,2,1.0,0,3,3,0,1,0,1,2,12,-32,1,A.J. Lawson,22.7,11.5,1.4,4.0,0.3,0.2,0.7,0.3984,0.2515,0.347,2.7,31.0,18.333333,3.333333,6.333333,0.0,0.0,1.666667,0.462,0.337,0.444667,0.333333,33,9,6,8,0,0,1,0.286,0.167,0.0,-10,11.296509,4,16
4,2025-03-20,22024,1630639,22401005,TOR @ GSW,L,10,0,2,0.0,0,1,0.0,0,0,0.0,0,2,2,0,0,0,0,1,0,0,1,A.J. Lawson,25.1,12.5,1.4,4.3,0.4,0.2,0.8,0.3428,0.2915,0.447,-0.2,27.333333,13.0,2.0,5.333333,0.333333,0.0,1.0,0.443333,0.322333,0.555667,-10.666667,25,12,0,3,1,0,1,0.444,0.4,1.0,-32,10.793517,5,16
5,2025-03-24,22024,1630639,22401039,TOR @ WAS,W,23,3,7,0.429,1,3,0.333,1,2,0.5,0,2,2,2,2,0,0,1,8,6,1,A.J. Lawson,26.0,12.5,1.4,4.4,0.4,0.2,0.8,0.3428,0.2915,0.447,-0.2,22.666667,7.0,2.0,4.333333,0.333333,0.0,0.666667,0.243333,0.189,0.333333,-14.0,10,0,0,2,0,0,0,0.0,0.0,0.0,0,10.793517,6,16
6,2025-03-26,22024,1630639,22401054,TOR @ BKN,W,16,1,6,0.167,0,4,0.0,0,0,0.0,0,3,3,0,0,0,0,1,2,1,1,A.J. Lawson,26.9,13.3,1.6,4.6,0.6,0.2,0.7,0.3857,0.3248,0.497,-1.2,19.333333,6.666667,0.666667,2.333333,1.0,0.0,0.333333,0.291,0.244333,0.5,-8.666667,23,8,2,2,2,0,0,0.429,0.333,0.5,6,10.033832,7,16
7,2025-03-28,22024,1630639,22401069,TOR vs. CHA,W,9,1,3,0.333,0,2,0.0,0,0,0.0,0,1,1,0,0,2,0,0,2,-7,1,A.J. Lawson,25.9,12.2,1.5,4.7,0.6,0.0,0.7,0.3191,0.2498,0.497,-0.3,16.333333,3.333333,0.666667,2.333333,0.666667,0.0,0.0,0.198667,0.111,0.166667,2.333333,16,2,0,3,0,0,0,0.167,0.0,0.0,1,10.654159,8,16
8,2025-03-30,22024,1630639,22401087,TOR @ PHI,W,5,0,2,0.0,0,2,0.0,0,0,0.0,1,2,3,0,0,0,0,0,0,-1,1,A.J. Lawson,24.1,11.8,1.4,4.5,0.5,0.2,0.7,0.337,0.2387,0.447,-2.4,16.0,4.0,0.666667,2.0,0.666667,0.666667,0.0,0.309667,0.111,0.166667,0.0,9,2,0,1,0,2,0,0.333,0.0,0.0,-7,10.982815,9,16
9,2025-04-01,22024,1630639,22401099,TOR @ CHI,L,13,5,6,0.833,2,3,0.667,1,2,0.5,1,5,6,1,0,0,0,2,13,-4,1,A.J. Lawson,21.4,11.1,1.2,4.5,0.4,0.2,0.7,0.3188,0.2244,0.347,-1.7,10.0,1.333333,0.0,2.333333,0.0,0.666667,0.0,0.166667,0.0,0.0,-2.333333,5,0,0,3,0,0,0,0.0,0.0,0.0,-1,11.532081,10,16


In [85]:
# Обучаем модель catboost на сгенерированных фичах
from catboost import CatBoostRegressor, Pool
#  --- IGNORE ---
print(train_df.shape, val_df.shape, test_df.shape)

X = ['PREV_10_MIN','PREV_3_MIN','last_v_MIN','PREV_10_PTS','PREV_10_AST', 'PREV_10_REB', 'PREV_10_STL', 'PREV_10_BLK',
       'PREV_10_TOV', 'PREV_10_FG_PCT', 'PREV_10_FG3_PCT', 'PREV_10_FT_PCT',
       'PREV_10_PLUS_MINUS', 'PREV_3_PTS', 'PREV_3_AST', 'PREV_3_REB',
       'PREV_3_STL', 'PREV_3_BLK', 'PREV_3_TOV', 'PREV_3_FG_PCT',
       'PREV_3_FG3_PCT', 'PREV_3_FT_PCT', 'PREV_3_PLUS_MINUS', 'last_v_PTS',
       'last_v_AST', 'last_v_REB', 'last_v_STL', 'last_v_BLK', 'last_v_TOV',
       'last_v_FG_PCT', 'last_v_FG3_PCT', 'last_v_FT_PCT', 'stdt PTS 10']

y = ['PTS']

train_pool = Pool(data=train_df[X], label=train_df[y])
val_pool = Pool(data=val_df[X], label=val_df[y])
model = CatBoostRegressor(
    iterations=1000,
    learning_rate=0.01,
    depth=6,
    eval_metric='RMSE',
    random_seed=42,
    early_stopping_rounds=50,
    verbose=100
)

model.fit(train_pool, eval_set=val_pool)
preds = model.predict(test_df[X])
test_df['predicted_PTS'] = preds
test_df[['PLAYER_NAME', 'GAME_DATE', 'PTS', 'predicted_PTS']]


(15605, 64) (3670, 64) (1459, 65)
0:	learn: 8.8007264	test: 8.8085706	best: 8.8085706 (0)	total: 4.43ms	remaining: 4.43s
100:	learn: 6.6170261	test: 6.7353809	best: 6.7353809 (100)	total: 246ms	remaining: 2.19s
200:	learn: 6.1667398	test: 6.3440193	best: 6.3440193 (200)	total: 501ms	remaining: 1.99s
300:	learn: 6.0598325	test: 6.2704745	best: 6.2704745 (300)	total: 758ms	remaining: 1.76s
400:	learn: 6.0146178	test: 6.2516391	best: 6.2516391 (400)	total: 997ms	remaining: 1.49s
500:	learn: 5.9860893	test: 6.2448651	best: 6.2448651 (500)	total: 1.23s	remaining: 1.22s
600:	learn: 5.9603539	test: 6.2414565	best: 6.2414403 (599)	total: 1.46s	remaining: 970ms
700:	learn: 5.9393362	test: 6.2397094	best: 6.2396861 (699)	total: 1.69s	remaining: 722ms
Stopped by overfitting detector  (50 iterations wait)

bestTest = 6.239115931
bestIteration = 736

Shrink model to first 737 iterations.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df['predicted_PTS'] = preds


Unnamed: 0,PLAYER_NAME,GAME_DATE,PTS,predicted_PTS
13,A.J. Lawson,2025-04-09,14,8.465293
14,A.J. Lawson,2025-04-11,12,9.067434
15,A.J. Lawson,2025-04-13,14,10.040820
76,AJ Green,2025-04-10,3,8.427660
77,AJ Green,2025-04-11,9,7.584470
...,...,...,...,...
20712,Ziaire Williams,2025-04-08,9,10.488359
20713,Ziaire Williams,2025-04-10,9,9.991686
20731,Zion Williamson,2025-03-13,20,24.531169
20732,Zion Williamson,2025-03-17,30,23.859442


In [86]:
# смотрим на features importance
feature_importances = model.get_feature_importance()
feature_names = X
importance_df = pd.DataFrame({'Feature': feature_names, 'Importance': feature_importances})
importance_df = importance_df.sort_values(by='Importance', ascending=False)
importance_df


Unnamed: 0,Feature,Importance
3,PREV_10_PTS,58.965895
13,PREV_3_PTS,5.953569
1,PREV_3_MIN,4.559802
0,PREV_10_MIN,3.542622
2,last_v_MIN,3.201622
10,PREV_10_FG3_PCT,1.749175
23,last_v_PTS,1.728493
11,PREV_10_FT_PCT,1.642786
8,PREV_10_TOV,1.335799
20,PREV_3_FG3_PCT,1.285218


In [87]:
# загрузим результаты сезона 2025-2026
players_2026 = get_all_players('2025-26')
players_2026 = players_2026.rename(columns={'PLAYER_ID':'Player_ID'})

Processing Teams:   0%|          | 0/30 [00:00<?, ?it/s]

In [93]:
all_game_logs = []
for player_id in tqdm(players_2026['Player_ID'], desc='Processing Players'):
    try:
        time.sleep(1.6)
        gamelog = playergamelog.PlayerGameLog(player_id=player_id, season='2025-26')
        gamelog_df = gamelog.get_data_frames()[0]
        all_game_logs.append(gamelog_df)
    except Exception as e:
        print(f"Error fetching game log for player ID {player_id}: {e}")
        continue

Processing Players:   0%|          | 0/526 [00:00<?, ?it/s]

Error fetching game log for player ID 1631210: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))


KeyboardInterrupt: 