In [1]:
import pandas as pd


In [2]:
df_players_data = pd.read_csv(r"D:\nba prediction v2\games_2024-25_active_players.csv", encoding='utf-8')

In [8]:
pd.set_option('display.max_columns', None)
df_players_data.head(8)


Unnamed: 0,SEASON_ID,Player_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE,PLAYER_NAME,PLAYER_ID
0,22024,1631210,22401186,"Apr 13, 2025",ATL vs. ORL,W,27,6,12,0.5,5,8,0.625,0,0,0.0,1,3,4,2,0,0,1,0,17,-7,1,Jacob Toppin,1631210
1,22024,1631210,22400692,"Feb 01, 2025",NYK vs. LAL,L,2,0,1,0.0,0,0,0.0,0,0,0.0,0,1,1,0,0,0,0,0,0,0,1,Jacob Toppin,1631210
2,22024,1631210,22400653,"Jan 27, 2025",NYK vs. MEM,W,4,0,0,0.0,0,0,0.0,0,0,0.0,0,1,1,0,0,0,0,1,0,3,1,Jacob Toppin,1631210
3,22024,1631210,22400641,"Jan 25, 2025",NYK vs. SAC,W,3,0,0,0.0,0,0,0.0,0,0,0.0,0,0,0,0,1,0,0,0,0,2,1,Jacob Toppin,1631210
4,22024,1631210,22400539,"Jan 12, 2025",NYK vs. MIL,W,5,1,2,0.5,0,0,0.0,0,0,0.0,0,1,1,0,1,0,1,1,2,4,1,Jacob Toppin,1631210
5,22024,1631210,22400529,"Jan 10, 2025",NYK vs. OKC,L,6,1,2,0.5,0,0,0.0,0,0,0.0,1,2,3,0,0,0,0,0,2,-3,1,Jacob Toppin,1631210
6,22024,1631210,22400511,"Jan 08, 2025",NYK vs. TOR,W,2,0,2,0.0,0,0,0.0,0,0,0.0,1,0,1,0,0,0,0,0,0,-12,1,Jacob Toppin,1631210
7,22024,1631210,22400482,"Jan 04, 2025",NYK @ CHI,L,2,0,0,0.0,0,0,0.0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,1,Jacob Toppin,1631210


In [24]:
# Формируем признаки: среднее по предыдущим 5 играм (exclude current), и целевое значение — значение текущей игры (то, что мы хотим предсказывать)
# Таким образом для строки с рассчитанным 5-игровым средним target — это «6‑е» значение (следующая после 5 использованных).

df = df_players_data.copy()
df['GAME_DATE'] = pd.to_datetime(df['GAME_DATE'])
df = df.sort_values(['PLAYER_NAME','GAME_DATE']).reset_index(drop=True)
stats = ['PTS','AST','REB', 'MIN', 'FG3_PCT']

# Признаки: среднее по предыдущим 5 играм (shift(1) чтобы исключить текущую игру)
for s in stats:
    df[f'{s}_10avg'] = df.groupby('PLAYER_NAME')[s].transform(lambda x: x.shift(1).rolling(window=10, min_periods=1).mean())

for s in stats:
    df[f'{s}_3avg'] = df.groupby('PLAYER_NAME')[s].transform(lambda x: x.shift(1).rolling(window=3, min_periods=3).mean())

for s in stats:
    df[f'{s}_trend'] = df[f'{s}_3avg'] - df[f'{s}_10avg']

# Целевые значения: текущая игра — это то, что мы хотим предсказать ("11‑е" значение относительно предыдущих 5)
for s in stats:
    df[f'{s}_target'] = df[s]

# Отфильтруем только те строки, где есть полноценное 5-игровое среднее (min_periods=5)
df_model = df.dropna(subset=[f'{stats[0]}_10avg'])
# Показать пример: признаки и цели
cols = ['GAME_DATE','PLAYER_NAME'] + [f'{s}_10avg' for s in stats] + [f'{s}_target' for s in stats] + [f'{s}_3avg' for s in stats] + [f'{s}_trend' for s in stats] 
df_model[cols].head(20)


Unnamed: 0,GAME_DATE,PLAYER_NAME,PTS_10avg,AST_10avg,REB_10avg,MIN_10avg,FG3_PCT_10avg,PTS_target,AST_target,REB_target,MIN_target,FG3_PCT_target,PTS_3avg,AST_3avg,REB_3avg,MIN_3avg,FG3_PCT_3avg,PTS_trend,AST_trend,REB_trend,MIN_trend,FG3_PCT_trend
1,2024-12-26,A.J. Lawson,3.0,0.0,0.0,4.0,1.0,6,0,2,5,1.0,,,,,,,,,,
2,2024-12-31,A.J. Lawson,4.5,0.0,1.0,4.5,1.0,0,0,1,4,0.0,,,,,,,,,,
3,2025-01-21,A.J. Lawson,3.0,0.0,1.0,4.333333,0.666667,2,0,0,1,0.0,3.0,0.0,1.0,4.333333,0.666667,0.0,0.0,0.0,0.0,0.0
4,2025-02-23,A.J. Lawson,2.75,0.0,0.75,3.5,0.5,0,0,1,1,0.0,2.666667,0.0,1.0,3.333333,0.333333,-0.083333,0.0,0.25,-0.166667,-0.166667
5,2025-03-02,A.J. Lawson,2.2,0.0,0.8,3.0,0.4,0,0,0,14,0.0,0.666667,0.0,0.666667,2.0,0.0,-1.533333,0.0,-0.133333,-1.0,-0.4
6,2025-03-04,A.J. Lawson,1.833333,0.0,0.666667,4.833333,0.333333,13,1,2,26,0.75,0.666667,0.0,0.333333,5.333333,0.0,-1.166667,0.0,-0.333333,0.5,-0.333333
7,2025-03-07,A.J. Lawson,3.428571,0.142857,0.857143,7.857143,0.392857,6,1,3,27,0.111,4.333333,0.333333,1.0,13.666667,0.25,0.904762,0.190476,0.142857,5.809524,-0.142857
8,2025-03-08,A.J. Lawson,3.75,0.25,1.125,10.25,0.357625,7,2,3,32,0.143,6.333333,0.666667,1.666667,22.333333,0.287,2.583333,0.416667,0.541667,12.083333,-0.070625
9,2025-03-10,A.J. Lawson,4.111111,0.444444,1.333333,12.666667,0.333778,32,0,12,33,0.5,8.666667,1.333333,2.666667,28.333333,0.334667,4.555556,0.888889,1.333333,15.666667,0.000889
10,2025-03-12,A.J. Lawson,6.9,0.4,2.4,14.7,0.3504,28,4,6,36,0.444,15.0,1.0,6.0,30.666667,0.251333,8.1,0.6,3.6,15.966667,-0.099067


In [21]:
df[df['PLAYER_NAME'] =='A.J. Lawson']

Unnamed: 0,SEASON_ID,Player_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE,PLAYER_NAME,PLAYER_ID,PTS_10avg,AST_10avg,REB_10avg,MIN_10avg,FG3_PCT_10avg,PTS_3avg,AST_3avg,REB_3avg,MIN_3avg,FG3_PCT_3avg,PTS_target,AST_target,REB_target,MIN_target,FG3_PCT_target
0,22024,1630639,22400397,2024-12-23,TOR @ NYK,L,4,1,1,1.0,1,1,1.0,0,0,0.0,0,0,0,0,0,0,0,0,3,5,1,A.J. Lawson,1630639,,,,,,,,,,,3,0,0,4,1.0
1,22024,1630639,22400414,2024-12-26,TOR @ MEM,L,5,2,3,0.667,1,1,1.0,1,2,0.5,0,2,2,0,0,0,0,0,6,-4,1,A.J. Lawson,1630639,,,,,,,,,,,6,0,2,5,1.0
2,22024,1630639,22400449,2024-12-31,TOR @ BOS,L,4,0,3,0.0,0,3,0.0,0,0,0.0,1,0,1,0,0,0,0,0,0,-8,1,A.J. Lawson,1630639,,,,,,,,,,,0,0,1,4,0.0
3,22024,1630639,22400608,2025-01-21,TOR vs. ORL,W,1,1,1,1.0,0,0,0.0,0,1,0.0,0,0,0,0,0,0,0,0,2,-3,1,A.J. Lawson,1630639,,,,,,3.0,0.0,1.0,4.333333,0.666667,2,0,0,1,0.0
4,22024,1630639,22400816,2025-02-23,TOR vs. PHX,W,1,0,0,0.0,0,0,0.0,0,0,0.0,0,1,1,0,0,0,0,0,0,0,1,A.J. Lawson,1630639,,,,,,2.666667,0.0,1.0,3.333333,0.333333,0,0,1,1,0.0
5,22024,1630639,22400870,2025-03-02,TOR @ ORL,W,14,0,2,0.0,0,0,0.0,0,0,0.0,0,0,0,0,0,0,1,2,0,16,1,A.J. Lawson,1630639,,,,,,0.666667,0.0,0.666667,2.0,0.0,0,0,0,14,0.0
6,22024,1630639,22400883,2025-03-04,TOR @ ORL,W,26,5,6,0.833,3,4,0.75,0,0,0.0,0,2,2,1,0,2,0,3,13,-8,1,A.J. Lawson,1630639,,,,,,0.666667,0.0,0.333333,5.333333,0.0,13,1,2,26,0.75
7,22024,1630639,22400905,2025-03-07,TOR vs. UTA,W,27,2,13,0.154,1,9,0.111,1,2,0.5,2,1,3,1,1,0,0,3,6,14,1,A.J. Lawson,1630639,,,,,,4.333333,0.333333,1.0,13.666667,0.25,6,1,3,27,0.111
8,22024,1630639,22400915,2025-03-08,TOR vs. WAS,L,32,2,11,0.182,1,7,0.143,2,2,1.0,2,1,3,2,1,0,0,3,7,-8,1,A.J. Lawson,1630639,,,,,,6.333333,0.666667,1.666667,22.333333,0.287,7,2,3,32,0.143
9,22024,1630639,22400932,2025-03-10,TOR vs. WAS,W,33,9,21,0.429,7,14,0.5,7,11,0.636,3,9,12,0,1,0,1,4,32,15,1,A.J. Lawson,1630639,,,,,,8.666667,1.333333,2.666667,28.333333,0.334667,32,0,12,33,0.5


In [28]:
# prediction model code here
features = [f'{s}_10avg' for s in stats] + [f'{s}_3avg' for s in stats] + [f'{s}_trend' for s in stats]
targets = ['PTS_target']

In [29]:
targets

['PTS_target']

In [30]:
features

['PTS_10avg',
 'AST_10avg',
 'REB_10avg',
 'MIN_10avg',
 'FG3_PCT_10avg',
 'PTS_3avg',
 'AST_3avg',
 'REB_3avg',
 'MIN_3avg',
 'FG3_PCT_3avg',
 'PTS_trend',
 'AST_trend',
 'REB_trend',
 'MIN_trend',
 'FG3_PCT_trend']