In [1]:
import numpy as np
import pandas as pd
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import StandardScaler
import optuna
import gc



In [5]:
train_x = pd.read_csv('../../TPS_2021/input/tabular-playground-series-nov-2021/xgtrain.csv')
test_x = pd.read_csv('../../TPS_2021/input/tabular-playground-series-nov-2021/xgval.csv')

In [6]:
train_y = train_x['target']
train_x = train_x[train_x.columns.difference(['target'])]

test_y = test_x['target']
test_x = test_x[test_x.columns.difference(['target'])]


In [10]:
train_x.head()

Unnamed: 0,f0,f1,f10,f11,f12,f13,f14,f15,f16,f17,...,f90,f91,f92,f93,f94,f95,f96,f97,f98,f99
0,-0.28209,-0.011114,0.507483,-1.102086,0.250591,-0.779308,-1.122699,-0.646795,-1.073323,-0.16689,...,-0.960254,0.412144,0.311905,0.893324,0.350115,0.593789,0.568979,0.491097,0.149572,-0.602242
1,-0.194928,0.215904,-0.141631,0.325356,0.368926,0.217543,-0.016966,-0.019834,-0.167348,0.977802,...,0.063751,0.614283,-0.029259,0.326728,0.04838,0.348578,0.196977,0.238384,-0.509677,-0.641434
2,5.7368,-0.884513,0.519139,-0.513603,0.355739,0.424652,0.88749,0.910991,0.369959,0.264538,...,-0.125968,0.612566,0.172555,0.745312,-1.303931,-0.168392,0.112347,-0.634083,-0.7034,0.719828
3,-0.046304,0.018744,0.815991,0.024154,-0.854849,0.381389,0.664803,0.743505,-0.6874,0.046986,...,0.426436,0.804224,0.917206,0.554493,-0.424074,-0.091703,-0.145119,0.433999,0.821814,-2.553369
4,1.56757,0.410281,-0.136002,-0.600729,0.285202,-0.790657,0.091353,0.505161,-0.481196,0.293772,...,-0.499046,-0.930463,-0.441759,-0.264764,-2.48973,-0.964765,0.960865,-0.858346,-0.540128,-1.347045


In [11]:
def objective(trial):
    
    params = {
        'C': trial.suggest_float('C', 0.000000001, 1.0),
        'random_state': 0,
        'n_jobs': -1,
    }

    model = LogisticRegression(**params)
    model.fit(train_x, train_y)
    predictions = model.predict_proba(test_x)[:,1]
    auc = roc_auc_score(test_y, predictions)
    
    return auc

In [12]:
%%time
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=3)
print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)

[32m[I 2021-12-15 08:21:49,637][0m A new study created in memory with name: no-name-32c04526-8c3a-4991-832f-3d684988a42d[0m
[32m[I 2021-12-15 08:22:25,876][0m Trial 0 finished with value: 0.750663777763448 and parameters: {'C': 0.40272193880083407}. Best is trial 0 with value: 0.750663777763448.[0m
[32m[I 2021-12-15 08:23:02,286][0m Trial 1 finished with value: 0.7506639105698588 and parameters: {'C': 0.15281436487084918}. Best is trial 1 with value: 0.7506639105698588.[0m
[32m[I 2021-12-15 08:23:39,037][0m Trial 2 finished with value: 0.7506638736174474 and parameters: {'C': 0.20607762586010325}. Best is trial 1 with value: 0.7506639105698588.[0m


Number of finished trials: 3
Best trial: {'C': 0.15281436487084918}
CPU times: user 1.42 s, sys: 3.67 s, total: 5.08 s
Wall time: 1min 49s


In [13]:
study.best_trial.params

{'C': 0.15281436487084918}