In [2]:
import pandas as pd
import sklearn as skl
from sklearn.model_selection import TimeSeriesSplit
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.linear_model import RidgeClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score

In [3]:
rr = RidgeClassifier(alpha=1)
split = TimeSeriesSplit(n_splits=3)
sfs = SequentialFeatureSelector(rr, n_features_to_select=30, direction='forward', cv=split)

In [4]:
df = pd.read_csv('nba_games.csv')

df.drop('Unnamed: 0', axis=1, inplace=True)
df = df.sort_values('date').reset_index(drop=True)

del df['mp.1']
del df['mp_opp.1']
del df['index_opp']
del df['gmsc']
del df['gmsc_max']
del df['gmsc_max_opp']
del df['gmsc_opp']

In [5]:
def add_target(team):
    team['target'] = team['won'].shift(-1)
    return team

df = df.groupby('team', group_keys=False).apply(add_target)

df['target'][pd.isnull(df['target'])] = 2
df['target'] = df['target'].astype(int, errors='ignore')

nulls = pd.isnull(df).sum()
nulls = nulls[nulls > 0]

valid_columns = df.columns[~df.columns.isin(nulls.index)]
df = df[valid_columns].copy()

removed_columns = ['season', 'date', 'won', 'target', 'team', 'team_opp']
selected_columns = df.columns[~df.columns.isin(removed_columns)]

scaler = MinMaxScaler()
df[selected_columns] = scaler.fit_transform(df[selected_columns])

df_rolling = df[list(selected_columns) + ['won', 'team', 'season']]

def find_team_averages(team):
    numeric_cols = team.select_dtypes(include=['float64', 'int64'])

    rolling = numeric_cols.rolling(10).mean()
    return rolling

df_rolling = df_rolling.groupby(['team', 'season'], group_keys=False).apply(find_team_averages)

rolling_cols = [f'{col}_10' for col in df_rolling.columns]
df_rolling.columns = rolling_cols

df = pd.concat([df, df_rolling], axis=1)

df = df.dropna()
df.reset_index(drop=True, inplace=True)

def shift_col(team, col_name):
    next_col = team[col_name].shift(-1)
    return next_col

def add_col(df, col_name):
    return df.groupby('team', group_keys=False).apply(lambda x: shift_col(x, col_name))

df['home_next'] = add_col(df, 'home')
df['team_opp_next'] = add_col(df, 'team_opp')
df['date_next'] = add_col(df, 'date')

  team['target'] = team['won'].shift(-1)
  team['target'] = team['won'].shift(-1)
  team['target'] = team['won'].shift(-1)
  team['target'] = team['won'].shift(-1)
  team['target'] = team['won'].shift(-1)
  team['target'] = team['won'].shift(-1)
  team['target'] = team['won'].shift(-1)
  team['target'] = team['won'].shift(-1)
  team['target'] = team['won'].shift(-1)
  team['target'] = team['won'].shift(-1)
  team['target'] = team['won'].shift(-1)
  team['target'] = team['won'].shift(-1)
  team['target'] = team['won'].shift(-1)
  team['target'] = team['won'].shift(-1)
  team['target'] = team['won'].shift(-1)
  team['target'] = team['won'].shift(-1)
  team['target'] = team['won'].shift(-1)
  team['target'] = team['won'].shift(-1)
  team['target'] = team['won'].shift(-1)
  team['target'] = team['won'].shift(-1)
  team['target'] = team['won'].shift(-1)
  team['target'] = team['won'].shift(-1)
  team['target'] = team['won'].shift(-1)
  team['target'] = team['won'].shift(-1)
  team['target']

In [6]:
df.tail(5)

Unnamed: 0,mp,fg,fga,fg%,3p,3pa,3p%,ft,fta,orb,...,tov%_max_opp_10,usg%_max_opp_10,ortg_max_opp_10,drtg_max_opp_10,total_opp_10,home_opp_10,season_10,home_next,team_opp_next,date_next
20605,0.0,0.413043,0.323529,0.464115,0.586207,0.636364,0.43943,0.295455,0.21875,0.206897,...,0.506289,0.152821,0.531754,0.580682,0.348214,0.5,2024.0,0.0,DAL,2024-06-14
20606,0.0,0.586957,0.455882,0.564593,0.517241,0.5,0.480998,0.340909,0.34375,0.448276,...,0.258386,0.159359,0.461611,0.545455,0.3625,0.5,2024.0,0.0,BOS,2024-06-17
20607,0.0,0.217391,0.294118,0.22488,0.482759,0.560606,0.404988,0.272727,0.203125,0.137931,...,0.590566,0.170769,0.521801,0.547727,0.374107,0.5,2024.0,1.0,DAL,2024-06-17
20608,0.0,0.413043,0.426471,0.37799,0.448276,0.530303,0.395487,0.386364,0.3125,0.517241,...,0.610587,0.196795,0.511374,0.552273,0.361607,0.4,2024.0,,,
20609,0.0,0.347826,0.264706,0.430622,0.37931,0.5,0.352732,0.159091,0.203125,0.241379,...,0.248428,0.163077,0.503318,0.518182,0.353571,0.6,2024.0,,,


In [11]:
df.iloc[-1,-1] = ?

'2024-06-17'

In [12]:
full = df.merge(
    df[rolling_cols + ['team_opp_next', 'team', 'date_next']],
    left_on=['team', 'date_next'], 
    right_on=['team_opp_next', 'date_next']
)

In [13]:
full = full.dropna()
full = full.copy()

In [14]:
full.to_csv('full.csv')