In [2]:
import pandas as pd


In [3]:
df_players_data = pd.read_csv(r"D:\nba prediction v2\games_2024-25_active_players.csv", encoding='utf-8')

In [4]:
pd.set_option('display.max_columns', None)
df_players_data.head(20)

Unnamed: 0,SEASON_ID,Player_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE,PLAYER_NAME,PLAYER_ID
0,22024,1631210,22401186,"Apr 13, 2025",ATL vs. ORL,W,27,6,12,0.5,5,8,0.625,0,0,0.0,1,3,4,2,0,0,1,0,17,-7,1,Jacob Toppin,1631210
1,22024,1631210,22400692,"Feb 01, 2025",NYK vs. LAL,L,2,0,1,0.0,0,0,0.0,0,0,0.0,0,1,1,0,0,0,0,0,0,0,1,Jacob Toppin,1631210
2,22024,1631210,22400653,"Jan 27, 2025",NYK vs. MEM,W,4,0,0,0.0,0,0,0.0,0,0,0.0,0,1,1,0,0,0,0,1,0,3,1,Jacob Toppin,1631210
3,22024,1631210,22400641,"Jan 25, 2025",NYK vs. SAC,W,3,0,0,0.0,0,0,0.0,0,0,0.0,0,0,0,0,1,0,0,0,0,2,1,Jacob Toppin,1631210
4,22024,1631210,22400539,"Jan 12, 2025",NYK vs. MIL,W,5,1,2,0.5,0,0,0.0,0,0,0.0,0,1,1,0,1,0,1,1,2,4,1,Jacob Toppin,1631210
5,22024,1631210,22400529,"Jan 10, 2025",NYK vs. OKC,L,6,1,2,0.5,0,0,0.0,0,0,0.0,1,2,3,0,0,0,0,0,2,-3,1,Jacob Toppin,1631210
6,22024,1631210,22400511,"Jan 08, 2025",NYK vs. TOR,W,2,0,2,0.0,0,0,0.0,0,0,0.0,1,0,1,0,0,0,0,0,0,-12,1,Jacob Toppin,1631210
7,22024,1631210,22400482,"Jan 04, 2025",NYK @ CHI,L,2,0,0,0.0,0,0,0.0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,1,Jacob Toppin,1631210
8,22024,1631210,22400443,"Dec 30, 2024",NYK @ WAS,W,2,0,0,0.0,0,0,0.0,0,0,0.0,0,1,1,2,0,0,0,0,0,4,1,Jacob Toppin,1631210
9,22024,1631210,22400397,"Dec 23, 2024",NYK vs. TOR,W,1,0,0,0.0,0,0,0.0,0,0,0.0,0,0,0,0,0,0,0,1,0,-1,1,Jacob Toppin,1631210


In [10]:
df = df_players_data.copy()
df['GAME_DATE'] = pd.to_datetime(df['GAME_DATE'])
df = df.sort_values(['PLAYER_NAME','GAME_DATE'])
df = df.set_index('GAME_DATE')

rolling_idx = (df.groupby('PLAYER_NAME')[['PTS','AST','REB','MIN','FG3_PCT']]
                 .rolling(window=5, min_periods=5)
                 .mean()
                 .reset_index())

# Теперь rolling_idx содержит GAME_DATE, PLAYER_NAME, PTS, AST, REB (скользящие средние)

In [11]:
rolling_idx

Unnamed: 0,PLAYER_NAME,GAME_DATE,PTS,AST,REB,MIN,FG3_PCT
0,A.J. Lawson,2024-12-23,,,,,
1,A.J. Lawson,2024-12-26,,,,,
2,A.J. Lawson,2024-12-31,,,,,
3,A.J. Lawson,2025-01-21,,,,,
4,A.J. Lawson,2025-02-23,2.2,0.0,0.8,3.0,0.4
...,...,...,...,...,...,...,...
24076,Zion Williamson,2025-03-17,22.4,5.2,7.2,29.8,0.0
24077,Zion Williamson,2025-03-19,24.2,6.2,6.2,29.4,0.0
24078,Zyon Pullin,2025-02-03,,,,,
24079,Zyon Pullin,2025-02-08,,,,,


In [17]:
# Формируем признаки: среднее по предыдущим 5 играм (exclude current), и целевое значение — значение текущей игры (то, что мы хотим предсказывать)
# Таким образом для строки с рассчитанным 5-игровым средним target — это «6‑е» значение (следующая после 5 использованных).

df = df_players_data.copy()
df['GAME_DATE'] = pd.to_datetime(df['GAME_DATE'])
df = df.sort_values(['PLAYER_NAME','GAME_DATE']).reset_index(drop=True)
stats = ['PTS','AST','REB', 'MIN', 'FG3_PCT']

# Признаки: среднее по предыдущим 5 играм (shift(1) чтобы исключить текущую игру)
for s in stats:
    df[f'{s}_5avg'] = df.groupby('PLAYER_NAME')[s].transform(lambda x: x.shift(1).rolling(window=5, min_periods=5).mean())

# Целевые значения: текущая игра — это то, что мы хотим предсказать ("6‑е" значение относительно предыдущих 5)
for s in stats:
    df[f'{s}_target'] = df[s]

# Отфильтруем только те строки, где есть полноценное 5-игровое среднее (min_periods=5)
df_model = df.dropna(subset=[f'{stats[0]}_5avg'])

# Показать пример: признаки и цели
cols = ['GAME_DATE','PLAYER_NAME'] + [f'{s}_5avg' for s in stats] + [f'{s}_target' for s in stats]
df_model[cols].head(20)


Unnamed: 0,GAME_DATE,PLAYER_NAME,PTS_5avg,AST_5avg,REB_5avg,MIN_5avg,FG3_PCT_5avg,PTS_target,AST_target,REB_target,MIN_target,FG3_PCT_target
5,2025-03-02,A.J. Lawson,2.2,0.0,0.8,3.0,0.4,0,0,0,14,0.0
6,2025-03-04,A.J. Lawson,1.6,0.0,0.8,5.0,0.2,13,1,2,26,0.75
7,2025-03-07,A.J. Lawson,3.0,0.2,0.8,9.2,0.15,6,1,3,27,0.111
8,2025-03-08,A.J. Lawson,4.2,0.4,1.2,13.8,0.1722,7,2,3,32,0.143
9,2025-03-10,A.J. Lawson,5.2,0.8,1.8,20.0,0.2008,32,0,12,33,0.5
10,2025-03-12,A.J. Lawson,11.6,0.8,4.0,26.4,0.3008,28,4,6,36,0.444
11,2025-03-14,A.J. Lawson,17.2,1.6,5.2,30.8,0.3896,18,0,5,24,0.4
12,2025-03-16,A.J. Lawson,18.2,1.4,5.8,30.4,0.3196,9,6,8,33,0.167
13,2025-03-17,A.J. Lawson,18.8,2.4,6.8,31.6,0.3308,12,0,3,25,0.4
14,2025-03-20,A.J. Lawson,19.8,2.0,6.8,30.2,0.3822,0,0,2,10,0.0


In [18]:
df_model

Unnamed: 0,SEASON_ID,Player_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE,PLAYER_NAME,PLAYER_ID,PTS_5avg,AST_5avg,REB_5avg,MIN_5avg,FG3_PCT_5avg,PTS_target,AST_target,REB_target,MIN_target,FG3_PCT_target
5,22024,1630639,22400870,2025-03-02,TOR @ ORL,W,14,0,2,0.000,0,0,0.000,0,0,0.000,0,0,0,0,0,0,1,2,0,16,1,A.J. Lawson,1630639,2.2,0.0,0.8,3.0,0.4000,0,0,0,14,0.000
6,22024,1630639,22400883,2025-03-04,TOR @ ORL,W,26,5,6,0.833,3,4,0.750,0,0,0.000,0,2,2,1,0,2,0,3,13,-8,1,A.J. Lawson,1630639,1.6,0.0,0.8,5.0,0.2000,13,1,2,26,0.750
7,22024,1630639,22400905,2025-03-07,TOR vs. UTA,W,27,2,13,0.154,1,9,0.111,1,2,0.500,2,1,3,1,1,0,0,3,6,14,1,A.J. Lawson,1630639,3.0,0.2,0.8,9.2,0.1500,6,1,3,27,0.111
8,22024,1630639,22400915,2025-03-08,TOR vs. WAS,L,32,2,11,0.182,1,7,0.143,2,2,1.000,2,1,3,2,1,0,0,3,7,-8,1,A.J. Lawson,1630639,4.2,0.4,1.2,13.8,0.1722,7,2,3,32,0.143
9,22024,1630639,22400932,2025-03-10,TOR vs. WAS,W,33,9,21,0.429,7,14,0.500,7,11,0.636,3,9,12,0,1,0,1,4,32,15,1,A.J. Lawson,1630639,5.2,0.8,1.8,20.0,0.2008,32,0,12,33,0.500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24073,22024,1629627,22400913,2025-03-08,NOP @ HOU,L,28,10,19,0.526,0,0,0.000,0,2,0.000,2,0,2,3,2,0,4,2,20,-37,1,Zion Williamson,1629627,25.2,6.4,7.2,29.4,0.0000,20,3,2,28,0.000
24074,22024,1629627,22400943,2025-03-11,NOP vs. LAC,W,33,8,13,0.615,0,0,0.000,6,6,1.000,1,9,10,12,0,1,5,2,22,6,1,Zion Williamson,1629627,25.6,6.4,6.4,29.2,0.0000,22,12,10,33,0.000
24075,22024,1629627,22401141,2025-03-13,NOP vs. ORL,L,28,8,15,0.533,0,0,0.000,4,6,0.667,3,5,8,5,0,0,2,2,20,-14,1,Zion Williamson,1629627,24.6,6.6,6.4,29.6,0.0000,20,5,8,28,0.000
24076,22024,1629627,22400987,2025-03-17,NOP vs. DET,L,28,9,18,0.500,0,0,0.000,12,15,0.800,2,4,6,3,0,2,3,3,30,-18,1,Zion Williamson,1629627,23.8,5.8,6.8,29.4,0.0000,30,3,6,28,0.000
