In [2]:
import pandas as pd
import pickle
from sklearn.ensemble import RandomForestClassifier
from sklearn.calibration import CalibratedClassifierCV


train = pd.read_csv("../data/mm_games_diff_augmented.csv")

features = ['KADJ T', 'KADJ O', 'KADJ D', 'EFG%', 'EFG%D', 'FTR', 'FTRD', 
            'TOV%', 'TOV%D', 'OREB%', 'DREB%', '2PT%', '2PT%D', '3PT%', 
            '3PT%D', 'AST%', '2PTR', '3PTR', '2PTRD', '3PTRD', 'EFF HGT', 
            'EXP', 'FT%', 'ELITE SOS']

years = train['YEAR'].unique()
max_year = train['YEAR'].max()

for year in range(years.min(), max_year + 2):
    print(f"Training model for year {year}...")
    
    data_train = train[train["YEAR"] < year]
    
    if len(data_train) == 0:
        continue
        
    X_train = data_train[features].values
    y_train = data_train["Winner"].values
    
    model = RandomForestClassifier(
        n_estimators=500,           
        max_depth=4,                
        min_samples_split=10,       
        min_samples_leaf=5,         
        max_features='sqrt',        
        max_samples=0.8,            
        class_weight='balanced',    
        random_state=42,
        n_jobs=1
    )
    
    cal_model = CalibratedClassifierCV(
        estimator=model,
        method='sigmoid',      
        cv=5,                  
        n_jobs=1
    )
    
    cal_model.fit(X_train, y_train)
    
    with open(f'RFmodels/rf_model_{year}.pkl', 'wb') as f:
        pickle.dump(cal_model, f)
    
    print(f"Model for year {year} saved!")

Training model for year 2008...
Training model for year 2009...
Model for year 2009 saved!
Training model for year 2010...
Model for year 2010 saved!
Training model for year 2011...
Model for year 2011 saved!
Training model for year 2012...
Model for year 2012 saved!
Training model for year 2013...
Model for year 2013 saved!
Training model for year 2014...
Model for year 2014 saved!
Training model for year 2015...
Model for year 2015 saved!
Training model for year 2016...
Model for year 2016 saved!
Training model for year 2017...
Model for year 2017 saved!
Training model for year 2018...
Model for year 2018 saved!
Training model for year 2019...
Model for year 2019 saved!
Training model for year 2020...
Model for year 2020 saved!
Training model for year 2021...
Model for year 2021 saved!
Training model for year 2022...
Model for year 2022 saved!
Training model for year 2023...
Model for year 2023 saved!
Training model for year 2024...
Model for year 2024 saved!
Training model for year 