In [1]:
import lightgbm as lgb
import numpy as np
import optuna
import sklearn

In [2]:
train = np.loadtxt('train4.csv', delimiter=',', skiprows=1)
valid = np.loadtxt('valid4.csv', delimiter=',', skiprows=1)
test = np.loadtxt('test4.csv', delimiter=',', skiprows=1)

In [3]:
n_features = train.shape[1] - 1
    
X_train = train[:, :n_features]
X_valid = valid[:, :n_features]
X_test = test[:, :n_features]
    
y_train = train[:, n_features:]
y_valid = valid[:, n_features:]
y_test = test[:, n_features:]

In [4]:
eval_res = {}

def objective(trial):
    dtrain = lgb.Dataset(X_train, label=y_train)
    dvalid = lgb.Dataset(X_valid, label=y_valid)

    param = {
        "objective": "binary",
        "metric": "val_loss",
        "verbosity": -1,
        "boosting_type": "gbdt",
        "lambda_l1": trial.suggest_float("lambda_l1", 1e-8, 10.0, log=True),
        "lambda_l2": trial.suggest_float("lambda_l2", 1e-8, 10.0, log=True),
        "num_leaves": trial.suggest_int("num_leaves", 2, 256),
        "feature_fraction": trial.suggest_float("feature_fraction", 0.4, 1.0),
        "bagging_fraction": trial.suggest_float("bagging_fraction", 0.4, 1.0),
        "bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
        'seed': 42
    }

    gbm = lgb.train(param, dtrain, valid_sets=dvalid)
    preds = gbm.predict(X_test)
    pred_labels = np.rint(preds)
    accuracy = sklearn.metrics.accuracy_score(y_test, pred_labels)
    return accuracy

In [5]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100)

[32m[I 2021-11-22 14:52:55,088][0m A new study created in memory with name: no-name-03fbea7f-8013-455c-b851-773f879a7922[0m
[32m[I 2021-11-22 14:52:55,122][0m Trial 0 finished with value: 0.5979381443298969 and parameters: {'lambda_l1': 1.3595957072619465e-07, 'lambda_l2': 0.24058568451707682, 'num_leaves': 107, 'feature_fraction': 0.9178607734556815, 'bagging_fraction': 0.7292547642561444, 'bagging_freq': 2, 'min_child_samples': 98}. Best is trial 0 with value: 0.5979381443298969.[0m
[32m[I 2021-11-22 14:52:55,154][0m Trial 1 finished with value: 0.5979381443298969 and parameters: {'lambda_l1': 0.008892655711130454, 'lambda_l2': 6.187897447071488e-07, 'num_leaves': 154, 'feature_fraction': 0.7998898172498368, 'bagging_fraction': 0.7314733657864022, 'bagging_freq': 7, 'min_child_samples': 56}. Best is trial 0 with value: 0.5979381443298969.[0m
[32m[I 2021-11-22 14:52:55,180][0m Trial 2 finished with value: 0.5773195876288659 and parameters: {'lambda_l1': 0.010636164153550643

[32m[I 2021-11-22 14:52:55,816][0m Trial 15 finished with value: 0.5979381443298969 and parameters: {'lambda_l1': 0.3320230239237088, 'lambda_l2': 7.98288487722226e-05, 'num_leaves': 35, 'feature_fraction': 0.8863771629171865, 'bagging_fraction': 0.6245426604970971, 'bagging_freq': 6, 'min_child_samples': 38}. Best is trial 4 with value: 0.6907216494845361.[0m
[32m[I 2021-11-22 14:52:55,898][0m Trial 16 finished with value: 0.6288659793814433 and parameters: {'lambda_l1': 8.62167444318173e-06, 'lambda_l2': 1.3182763010249702e-06, 'num_leaves': 90, 'feature_fraction': 0.7357038468447434, 'bagging_fraction': 0.8066036321058146, 'bagging_freq': 6, 'min_child_samples': 16}. Best is trial 4 with value: 0.6907216494845361.[0m
[32m[I 2021-11-22 14:52:56,013][0m Trial 17 finished with value: 0.5876288659793815 and parameters: {'lambda_l1': 6.171225435283554e-07, 'lambda_l2': 6.11471928633591, 'num_leaves': 80, 'feature_fraction': 0.997372901760621, 'bagging_fraction': 0.815090899423173

[32m[I 2021-11-22 14:52:57,016][0m Trial 30 finished with value: 0.6185567010309279 and parameters: {'lambda_l1': 4.278249683486739e-07, 'lambda_l2': 1.4514264495832485, 'num_leaves': 147, 'feature_fraction': 0.8996458229146448, 'bagging_fraction': 0.7507662399959377, 'bagging_freq': 2, 'min_child_samples': 25}. Best is trial 4 with value: 0.6907216494845361.[0m
[32m[I 2021-11-22 14:52:57,095][0m Trial 31 finished with value: 0.6082474226804123 and parameters: {'lambda_l1': 3.7338676685691273e-05, 'lambda_l2': 1.591859255730859e-07, 'num_leaves': 106, 'feature_fraction': 0.4811707622386017, 'bagging_fraction': 0.904242881540846, 'bagging_freq': 4, 'min_child_samples': 20}. Best is trial 4 with value: 0.6907216494845361.[0m
[32m[I 2021-11-22 14:52:57,206][0m Trial 32 finished with value: 0.5773195876288659 and parameters: {'lambda_l1': 4.3867644627206586e-05, 'lambda_l2': 4.3895913724978774e-07, 'num_leaves': 158, 'feature_fraction': 0.4766851107930496, 'bagging_fraction': 0.850

[32m[I 2021-11-22 14:52:58,006][0m Trial 45 finished with value: 0.6597938144329897 and parameters: {'lambda_l1': 5.3732311601291164e-08, 'lambda_l2': 0.08558852871462508, 'num_leaves': 145, 'feature_fraction': 0.6710429820794592, 'bagging_fraction': 0.9051621339543836, 'bagging_freq': 6, 'min_child_samples': 91}. Best is trial 4 with value: 0.6907216494845361.[0m
[32m[I 2021-11-22 14:52:58,049][0m Trial 46 finished with value: 0.6494845360824743 and parameters: {'lambda_l1': 1.0376029074704222e-08, 'lambda_l2': 0.503619367145951, 'num_leaves': 184, 'feature_fraction': 0.7840994794550504, 'bagging_fraction': 0.9619079256864517, 'bagging_freq': 7, 'min_child_samples': 85}. Best is trial 4 with value: 0.6907216494845361.[0m
[32m[I 2021-11-22 14:52:58,090][0m Trial 47 finished with value: 0.6391752577319587 and parameters: {'lambda_l1': 4.2238560171683716e-08, 'lambda_l2': 0.002987644916951665, 'num_leaves': 169, 'feature_fraction': 0.6718888831746634, 'bagging_fraction': 0.912580

[32m[I 2021-11-22 14:52:58,661][0m Trial 60 finished with value: 0.5876288659793815 and parameters: {'lambda_l1': 1.4006559346598766e-06, 'lambda_l2': 0.00038237680175852467, 'num_leaves': 180, 'feature_fraction': 0.6570019486070904, 'bagging_fraction': 0.4167762215365646, 'bagging_freq': 5, 'min_child_samples': 28}. Best is trial 4 with value: 0.6907216494845361.[0m
[32m[I 2021-11-22 14:52:58,693][0m Trial 61 finished with value: 0.5567010309278351 and parameters: {'lambda_l1': 5.112975804323173e-06, 'lambda_l2': 0.049199387528184826, 'num_leaves': 2, 'feature_fraction': 0.8577057061248662, 'bagging_fraction': 0.9999995384347473, 'bagging_freq': 6, 'min_child_samples': 97}. Best is trial 4 with value: 0.6907216494845361.[0m
[32m[I 2021-11-22 14:52:58,735][0m Trial 62 finished with value: 0.6391752577319587 and parameters: {'lambda_l1': 4.520427172617077e-08, 'lambda_l2': 0.004105948771276782, 'num_leaves': 169, 'feature_fraction': 0.6165924728468839, 'bagging_fraction': 0.9106

[32m[I 2021-11-22 14:52:59,582][0m Trial 75 finished with value: 0.6082474226804123 and parameters: {'lambda_l1': 3.49453884241575e-08, 'lambda_l2': 0.01210678539674281, 'num_leaves': 222, 'feature_fraction': 0.8784670344396701, 'bagging_fraction': 0.8922396128161353, 'bagging_freq': 1, 'min_child_samples': 40}. Best is trial 4 with value: 0.6907216494845361.[0m
[32m[I 2021-11-22 14:52:59,635][0m Trial 76 finished with value: 0.6597938144329897 and parameters: {'lambda_l1': 1.6842792918949188e-08, 'lambda_l2': 0.00019653849031981183, 'num_leaves': 178, 'feature_fraction': 0.7755588267402446, 'bagging_fraction': 0.843351228692369, 'bagging_freq': 5, 'min_child_samples': 47}. Best is trial 4 with value: 0.6907216494845361.[0m
[32m[I 2021-11-22 14:52:59,696][0m Trial 77 finished with value: 0.6185567010309279 and parameters: {'lambda_l1': 1.01680570200375e-06, 'lambda_l2': 0.00024027165819894903, 'num_leaves': 175, 'feature_fraction': 0.7536666478160248, 'bagging_fraction': 0.8032

[32m[I 2021-11-22 14:53:00,494][0m Trial 90 finished with value: 0.5876288659793815 and parameters: {'lambda_l1': 1.6806207961381285e-06, 'lambda_l2': 6.467028044506913e-08, 'num_leaves': 144, 'feature_fraction': 0.6057246656792723, 'bagging_fraction': 0.8760633770327991, 'bagging_freq': 6, 'min_child_samples': 10}. Best is trial 4 with value: 0.6907216494845361.[0m
[32m[I 2021-11-22 14:53:00,546][0m Trial 91 finished with value: 0.6391752577319587 and parameters: {'lambda_l1': 2.053056716595199e-07, 'lambda_l2': 2.721774883853869e-08, 'num_leaves': 128, 'feature_fraction': 0.699639617134872, 'bagging_fraction': 0.9097114020923097, 'bagging_freq': 4, 'min_child_samples': 54}. Best is trial 4 with value: 0.6907216494845361.[0m
[32m[I 2021-11-22 14:53:00,600][0m Trial 92 finished with value: 0.6288659793814433 and parameters: {'lambda_l1': 5.4765312985088413e-08, 'lambda_l2': 1.8143731960036184e-05, 'num_leaves': 164, 'feature_fraction': 0.6533879011958812, 'bagging_fraction': 0.

In [6]:
print("Number of finished trials: {}".format(len(study.trials)))

print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Number of finished trials: 100
Best trial:
  Value: 0.6907216494845361
  Params: 
    lambda_l1: 1.7223208762016493e-06
    lambda_l2: 0.0003938813884026949
    num_leaves: 23
    feature_fraction: 0.8164434077638283
    bagging_fraction: 0.9379967492292598
    bagging_freq: 6
    min_child_samples: 9


In [9]:
import json

with open('lgbm_params.json', 'w') as fp:
    json.dump(trial.params, fp)