In [42]:
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, log_loss
import pandas as pd
from sklearn.model_selection import cross_val_score
import optuna
from pathlib import Path
import joblib

In [43]:
gamesDF = pd.read_csv('./datasets/training_dataset_l10_wp.csv')
gamesDF = gamesDF.drop(columns=['HOME_L10_LOSSES', 'AWAY_L10_LOSSES', 'PERIOD', 'POINT_DIFF'])

In [44]:
X = gamesDF[['SECONDS_REMAINING','HOME_SCORE','AWAY_SCORE','HOME_WINS', 'HOME_LOSSES', 'AWAY_WINS', 'AWAY_LOSSES', 'HOME_L10_WINS', 'AWAY_L10_WINS']] 
y = gamesDF['HOME_WIN']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [45]:
def save_model(model, model_path_name):
    model_path = Path(model_path_name)
    joblib.dump(model, model_path)
    print(f"Model saved to {model_path.absolute()}")

X_tr, X_val, y_tr, y_val = train_test_split(
    X_train,
    y_train,
    test_size=0.2,
    stratify=y_train,
    random_state=42
)

def objective(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 300, 800),
        "max_depth": trial.suggest_int("max_depth", 2, 4),
        "learning_rate": trial.suggest_float("learning_rate", 0.02, 0.08, log=True),
        "subsample": trial.suggest_float("subsample", 0.6, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.6, 1.0),
        "reg_lambda": trial.suggest_float("reg_lambda", 1e-3, 10.0, log=True),
        "reg_alpha": trial.suggest_float("reg_alpha", 1e-3, 10.0, log=True),
        "objective": "binary:logistic",
        "eval_metric": "logloss",
        "n_jobs": -1,
        "early_stopping_rounds": 50,
        "min_child_weight": trial.suggest_int("min_child_weight", 50, 300),
        "gamma": trial.suggest_float("gamma", 0.5, 5.0),
    }

    model = XGBClassifier(**params)
    model.fit(
        X_tr, y_tr,
        eval_set=[(X_val, y_val)],
        verbose=False,
    )
    y_proba = model.predict_proba(X_val)[:, 1]
    return log_loss(y_val, y_proba)

study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=10)

print(study.best_params)

[32m[I 2026-02-10 10:43:04,806][0m A new study created in memory with name: no-name-3bcf4e44-d5a4-4251-b8a3-67685e82a649[0m
[32m[I 2026-02-10 10:43:19,473][0m Trial 0 finished with value: 0.4513880505477987 and parameters: {'n_estimators': 669, 'max_depth': 2, 'learning_rate': 0.06336610670711006, 'subsample': 0.9422679645388454, 'colsample_bytree': 0.7462234726096552, 'reg_lambda': 0.02994972186733143, 'reg_alpha': 0.04147623480440184, 'min_child_weight': 137, 'gamma': 0.5193928008977384}. Best is trial 0 with value: 0.4513880505477987.[0m
[32m[I 2026-02-10 10:43:28,225][0m Trial 1 finished with value: 0.4689162788732399 and parameters: {'n_estimators': 318, 'max_depth': 2, 'learning_rate': 0.07017531088150633, 'subsample': 0.6355474543932951, 'colsample_bytree': 0.7937608358864334, 'reg_lambda': 1.0262383951777676, 'reg_alpha': 5.422973240692295, 'min_child_weight': 146, 'gamma': 2.7551338164701367}. Best is trial 0 with value: 0.4513880505477987.[0m
[32m[I 2026-02-10 10:43

{'n_estimators': 521, 'max_depth': 4, 'learning_rate': 0.061200500952725015, 'subsample': 0.9330262359837351, 'colsample_bytree': 0.6477852219870596, 'reg_lambda': 0.002291261713927826, 'reg_alpha': 0.20599208702049462, 'min_child_weight': 193, 'gamma': 3.9571590189846457}


In [46]:
bst = XGBClassifier(**study.best_params)
bst.fit(X_train, y_train)
save_model(bst, 'xgboost.joblib')


Model saved to /Users/lemons/Documents/universidad/cs/pj09-sports-betting/ml/xgboost.joblib
