In [1]:
from sklearnex import patch_sklearn, config_context
patch_sklearn()

Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


In [2]:
import numpy as np
import pandas as pd
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import StandardScaler
import optuna
import logging
import gc



In [3]:
train_x = pd.read_csv('../../TPS_2021/input/tabular-playground-series-nov-2021/xgtrain.csv')
test_x = pd.read_csv('../../TPS_2021/input/tabular-playground-series-nov-2021/xgval.csv')

In [4]:
train_y = train_x['target']
train_x = train_x[train_x.columns.difference(['target'])]

test_y = test_x['target']
test_x = test_x[test_x.columns.difference(['target'])]


In [5]:
train_x.head()

Unnamed: 0,f0,f1,f10,f11,f12,f13,f14,f15,f16,f17,...,f90,f91,f92,f93,f94,f95,f96,f97,f98,f99
0,-0.28209,-0.011114,0.507483,-1.102086,0.250591,-0.779308,-1.122699,-0.646795,-1.073323,-0.16689,...,-0.960254,0.412144,0.311905,0.893324,0.350115,0.593789,0.568979,0.491097,0.149572,-0.602242
1,-0.194928,0.215904,-0.141631,0.325356,0.368926,0.217543,-0.016966,-0.019834,-0.167348,0.977802,...,0.063751,0.614283,-0.029259,0.326728,0.04838,0.348578,0.196977,0.238384,-0.509677,-0.641434
2,5.7368,-0.884513,0.519139,-0.513603,0.355739,0.424652,0.88749,0.910991,0.369959,0.264538,...,-0.125968,0.612566,0.172555,0.745312,-1.303931,-0.168392,0.112347,-0.634083,-0.7034,0.719828
3,-0.046304,0.018744,0.815991,0.024154,-0.854849,0.381389,0.664803,0.743505,-0.6874,0.046986,...,0.426436,0.804224,0.917206,0.554493,-0.424074,-0.091703,-0.145119,0.433999,0.821814,-2.553369
4,1.56757,0.410281,-0.136002,-0.600729,0.285202,-0.790657,0.091353,0.505161,-0.481196,0.293772,...,-0.499046,-0.930463,-0.441759,-0.264764,-2.48973,-0.964765,0.960865,-0.858346,-0.540128,-1.347045


In [6]:
def objective(trial):
    
    params = {
        'C': trial.suggest_float('C', 0.000000001, 1.0),
        'random_state': 0,
        'n_jobs': -1,
    }

    model = LogisticRegression(**params)
    model.fit(train_x, train_y)
    predictions = model.predict_proba(test_x)[:,1]
    auc = roc_auc_score(test_y, predictions)
    
    return auc

In [7]:
%%time
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=3)
print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)

[32m[I 2021-12-17 07:30:02,518][0m A new study created in memory with name: no-name-90576df4-a268-46ea-9895-4bff75fdaf40[0m
[32m[I 2021-12-17 07:30:03,518][0m Trial 0 finished with value: 0.750663671073779 and parameters: {'C': 0.5267313438233229}. Best is trial 0 with value: 0.750663671073779.[0m
[32m[I 2021-12-17 07:30:04,457][0m Trial 1 finished with value: 0.7506636774640456 and parameters: {'C': 0.3955671300417933}. Best is trial 1 with value: 0.7506636774640456.[0m
[32m[I 2021-12-17 07:30:05,398][0m Trial 2 finished with value: 0.7506636699624283 and parameters: {'C': 0.5812342875242269}. Best is trial 1 with value: 0.7506636774640456.[0m


Number of finished trials: 3
Best trial: {'C': 0.3955671300417933}
CPU times: user 18.9 s, sys: 162 ms, total: 19.1 s
Wall time: 2.88 s


In [8]:
study.best_trial.params

{'C': 0.3955671300417933}

In [9]:
logger = logging.getLogger()
logger.setLevel(logging.INFO)  # Setup the root logger.
logger.addHandler(logging.FileHandler("optuna_lr_output_2.log", mode="w"))

optuna.logging.enable_propagation()  # Propagate logs to the root logger.
optuna.logging.disable_default_handler()  # Stop showing logs in sys.stderr.

study = optuna.create_study(storage="sqlite:///lr_optuna_tps_11_2021.db", study_name="lr_optuna_300_2")

INFO:optuna.storages._rdb.storage:A new study created in RDB with name: lr_optuna_300_2


In [12]:
%%time
study.optimize(objective, n_trials=300)
print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)

INFO:root:sklearn.linear_model.LogisticRegression.fit: running accelerated version on CPU
INFO:root:sklearn.linear_model.LogisticRegression.predict: running accelerated version on CPU
INFO:root:sklearn.metrics.roc_auc_score: running accelerated version on CPU
INFO:optuna.study.study:Trial 0 finished with value: 0.7506636777418833 and parameters: {'C': 0.9497540917577507}. Best is trial 0 with value: 0.7506636777418833.
INFO:root:sklearn.linear_model.LogisticRegression.fit: running accelerated version on CPU
INFO:root:sklearn.linear_model.LogisticRegression.predict: running accelerated version on CPU
INFO:root:sklearn.metrics.roc_auc_score: running accelerated version on CPU
INFO:optuna.study.study:Trial 1 finished with value: 0.7506636794089093 and parameters: {'C': 0.9834183990785008}. Best is trial 0 with value: 0.7506636777418833.
INFO:root:sklearn.linear_model.LogisticRegression.fit: running accelerated version on CPU
INFO:root:sklearn.linear_model.LogisticRegression.predict: runni

Number of finished trials: 300
Best trial: {'C': 0.2293775814943481}
CPU times: user 34min 6s, sys: 12.6 s, total: 34min 18s
Wall time: 5min 23s


In [13]:
study.best_trial.params

{'C': 0.2293775814943481}

In [11]:
'Number of finished trials: 300Best trial: {\'C\': 0.6404117889793832} CPU times: user 32min 54s, sys: 12.4 s, total: 33min 6s Wall time: 5min 15s'


"Number of finished trials: 300Best trial: {'C': 0.6404117889793832} CPU times: user 32min 54s, sys: 12.4 s, total: 33min 6s Wall time: 5min 15s"