In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import pandas as pd
import nltk
from nltk.corpus import stopwords
import matplotlib.pyplot as plt
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import CountVectorizer
import pickle
import time
import re

In [None]:
from sklearn.svm import SVC

In [None]:
import sklearn.svm

In [None]:
import xgboost as xgb

In [None]:
! pip install optuna

Collecting optuna
  Downloading optuna-3.6.1-py3-none-any.whl (380 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m380.1/380.1 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.13.1-py3-none-any.whl (233 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.4/233.4 kB[0m [31m17.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting colorlog (from optuna)
  Downloading colorlog-6.8.2-py3-none-any.whl (11 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.3-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.8/78.8 kB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: Mako, colorlog, alembic, optuna
Successfully installed Mako-1.3.3 alembic-1.13.1 colorlog-6.8.2 optuna-3.6.1


In [None]:
!pip install optuna-integration

Collecting optuna-integration
  Downloading optuna_integration-3.6.0-py3-none-any.whl (93 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m93.4/93.4 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: optuna-integration
Successfully installed optuna-integration-3.6.0


In [None]:
import optuna

In [None]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [None]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [None]:
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [None]:
from sklearn.metrics import (accuracy_score,
                             classification_report,
                             confusion_matrix)
from sklearn.model_selection import train_test_split

In [None]:
filename = "/content/drive/My Drive/Datasets/g_2k.csv"

df = pd.read_csv(filename,
                 encoding="utf-8", encoding_errors="replace")
# replace values
df.replace({'Not Governance': 0, 'Governance': 1}, inplace=True)

# Data Preprocessing

In this section we will prepare the dataset in order to use in SVM model. Before starting I want to explain processes with 1 line explanations



*   Cleaning Everything Except Alphabetical and Numerical Characters: We'll clean unrelevant digits using regular expressions
*   Tokenizing and Lemmatizing: We'll split texts into their words and convert words to their base form (dogs=>dog)
*   Removing Stopwords: We'll remove words that have no special meaning (such as the,will,was)
*   Joining Prepared Strings: We'll join the words
*   Bag of Words Approach: We'll create a bag of word. In bag of words approach each feature shows whether the text contains the word or not.







In [None]:
cleanedData = []

lemma = WordNetLemmatizer()
swords = stopwords.words("english")
for text in df["text"]:

    # Cleaning everything except alphabetical and numerical characters
    text = re.sub("[^a-zA-Z0-9]"," ",text)

    # Tokenizing and lemmatizing
    text = nltk.word_tokenize(text.lower())
    text = [lemma.lemmatize(word) for word in text]

    # Removing stopwords
    text = [word for word in text if word not in swords]

    # Joining
    text = " ".join(text)

    cleanedData.append(text)

In [None]:
# check if the data is cleaned
for i in range(0,5):
    print(cleanedData[i],end="\n\n")

balance included kind donation prepared accordance b4si guideline

internal control risk management board audit committee responsible ensuring suitable internal control system prevent detect fraud error designed implemented third party service provider company also responsible reviewing effectiveness control

analysis doe take account energy mix related electricity consumption ttv portfolio issuer

change chief financial officer plc board chair position clare bousfield stepped board effect 1 october 2021 transitioned role managing director retail saving business clare continued receive salary benefit reflective new role

consideration prioritisation climate risk also built decision making governance process requirement key strategic board risk assessment paper



Now we can create our bag of words

In [None]:
vectorizer = CountVectorizer(max_features=10000)
BOW = vectorizer.fit_transform(cleanedData)

we can split our set into train and test.

In [None]:
x_train = list()
x_test = list()
y_train = list()
y_test = list()
for label in [0, 1]:
  x_train,x_test,y_train,y_test = train_test_split(BOW, np.asarray(df["label"]),
                                                  train_size=250,
                                                  test_size=250,
                                                  random_state=42)

In [None]:
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

(250, 4559)
(250, 4559)
(250,)
(250,)


Support Vector Machine Classifier Modeling

In [None]:
start_time = time.time()

model = SVC()
model.fit(x_train,y_train)

end_time = time.time()
process_time = round(end_time-start_time,2)
print("Fitting SVC took {} seconds".format(process_time))

Fitting SVC took 0.02 seconds


In [None]:
predictions = model.predict(x_test)

In [None]:
print("Accuracy of model is {}%".format(accuracy_score(y_test,predictions) * 100))

Accuracy of model is 76.0%


# Hyperparameter tuning using Optuna

In [None]:
clf = SVC(gamma="auto")
param_distributions = {
        "C": optuna.distributions.FloatDistribution(1e-10, 1e10, log=True),
        "degree": optuna.distributions.IntDistribution(1, 5),
    }

optuna_search = optuna.integration.OptunaSearchCV(
        clf, param_distributions, n_trials=100, timeout=600, verbose=2
    )


optuna_search.fit(x_train,y_train)

print("Best trial:")
trial = optuna_search.study_.best_trial

print("  Value: ", trial.value)
print("  Params: ")
for key, value in trial.params.items():
  print("    {}: {}".format(key, value))

  optuna_search = optuna.integration.OptunaSearchCV(
[I 2024-04-30 03:08:43,542] A new study created in memory with name: no-name-7cfd390a-66a4-4a52-8a81-754741ecf2e2
INFO:optuna_integration.sklearn:Searching the best hyperparameters using 250 samples...
[I 2024-04-30 03:08:43,602] Trial 0 finished with value: 0.8240000000000001 and parameters: {'C': 5106.998423617558, 'degree': 1}. Best is trial 0 with value: 0.8240000000000001.
[I 2024-04-30 03:08:43,656] Trial 1 finished with value: 0.8240000000000001 and parameters: {'C': 72537.74955294652, 'degree': 4}. Best is trial 0 with value: 0.8240000000000001.
[I 2024-04-30 03:08:43,702] Trial 2 finished with value: 0.772 and parameters: {'C': 5.945285082062503, 'degree': 4}. Best is trial 0 with value: 0.8240000000000001.
[I 2024-04-30 03:08:43,759] Trial 3 finished with value: 0.8240000000000001 and parameters: {'C': 10505.933042022352, 'degree': 4}. Best is trial 0 with value: 0.8240000000000001.
[I 2024-04-30 03:08:43,818] Trial 4 finis

Best trial:
  Value:  0.8360000000000001
  Params: 
    C: 325.07602123357964
    degree: 4


In [None]:
trial.params

{'C': 325.07602123357964, 'degree': 4}

In [None]:
start_time = time.time()

model = SVC(gamma="auto", degree=trial.params['degree'], C=trial.params['C'])
model.fit(x_train,y_train)

end_time = time.time()
process_time = round(end_time-start_time,2)
print("Fitting SVC took {} seconds".format(process_time))

Fitting SVC took 0.02 seconds


In [None]:
trial.params['degree']

4

In [None]:
predictions = model.predict(x_test)

In [None]:
print("Accuracy of model is {}%".format(accuracy_score(y_test,predictions) * 100))

Accuracy of model is 77.2%


In [None]:
def evaluate(y_true, y_pred):
    # Calculate accuracy
    accuracy = accuracy_score(y_true=y_true, y_pred=y_pred)
    print(f'Accuracy: {accuracy:.3f}')

    # Generate accuracy report
    unique_labels = set(y_true)  # Get unique labels

    for label in unique_labels:
        label_indices = [i for i in range(len(y_true))
                         if y_true[i] == label]
        label_y_true = [y_true[i] for i in label_indices]
        label_y_pred = [y_pred[i] for i in label_indices]
        accuracy = accuracy_score(label_y_true, label_y_pred)
        print(f'Accuracy for label {label}: {accuracy:.3f}')

    # Generate classification report
    class_report = classification_report(y_true=y_true, y_pred=y_pred)
    print('\nClassification Report:')
    print(class_report)

    # Generate confusion matrix
    conf_matrix = confusion_matrix(y_true=y_true, y_pred=y_pred, labels=[0, 1])
    print('\nConfusion Matrix:')
    print(conf_matrix)

In [None]:
evaluate(y_true=y_test, y_pred=predictions)

Accuracy: 0.772
Accuracy for label 0: 0.893
Accuracy for label 1: 0.413

Classification Report:
              precision    recall  f1-score   support

           0       0.82      0.89      0.85       187
           1       0.57      0.41      0.48        63

    accuracy                           0.77       250
   macro avg       0.69      0.65      0.67       250
weighted avg       0.75      0.77      0.76       250


Confusion Matrix:
[[167  20]
 [ 37  26]]


# build model for XGBoost

In [None]:
start_time = time.time()

xgb_model = xgb.XGBClassifier()
xgb_model.fit(x_train,y_train)

end_time = time.time()
process_time = round(end_time-start_time,2)
print("Fitting XGBoost took {} seconds".format(process_time))

Fitting XGBoost took 1.03 seconds


In [None]:
predictions = model.predict(x_test)

In [None]:
print("Accuracy of model is {}%".format(accuracy_score(y_test,predictions) * 100))

Accuracy of model is 77.2%


# Hyperparameter tuning using Optuna

In [None]:
def objective(trial):
    dtrain = xgb.DMatrix(x_train, label=y_train)
    dvalid = xgb.DMatrix(x_test, label=y_test)

    param = {
        "verbosity": 0,
        "objective": "binary:logistic",
        "eval_metric": "auc",
        "booster": trial.suggest_categorical("booster", ["gbtree", "gblinear", "dart"]),
        "lambda": trial.suggest_float("lambda", 1e-8, 1.0, log=True),
        "alpha": trial.suggest_float("alpha", 1e-8, 1.0, log=True),
    }

    if param["booster"] == "gbtree" or param["booster"] == "dart":
        param["max_depth"] = trial.suggest_int("max_depth", 1, 9)
        param["eta"] = trial.suggest_float("eta", 1e-8, 1.0, log=True)
        param["gamma"] = trial.suggest_float("gamma", 1e-8, 1.0, log=True)
        param["grow_policy"] = trial.suggest_categorical("grow_policy", ["depthwise", "lossguide"])
    if param["booster"] == "dart":
        param["sample_type"] = trial.suggest_categorical("sample_type", ["uniform", "weighted"])
        param["normalize_type"] = trial.suggest_categorical("normalize_type", ["tree", "forest"])
        param["rate_drop"] = trial.suggest_float("rate_drop", 1e-8, 1.0, log=True)
        param["skip_drop"] = trial.suggest_float("skip_drop", 1e-8, 1.0, log=True)

    # Add a callback for pruning.
    pruning_callback = optuna.integration.XGBoostPruningCallback(trial, "validation-auc")
    bst = xgb.train(param, dtrain, evals=[(dvalid, "validation")], callbacks=[pruning_callback])
    preds = bst.predict(dvalid)
    pred_labels = np.rint(preds)
    accuracy = accuracy_score(y_test,predictions)
    return accuracy

In [None]:
study = optuna.create_study(
    pruner=optuna.pruners.MedianPruner(n_warmup_steps=5), direction="maximize"
    )
study.optimize(objective, n_trials=100)
print(study.best_trial)

[I 2024-04-30 03:08:50,889] A new study created in memory with name: no-name-bf170eee-9242-456e-933c-ddcaa3c0b719


[0]	validation-auc:0.50000
[1]	validation-auc:0.50000
[2]	validation-auc:0.50000
[3]	validation-auc:0.50000
[4]	validation-auc:0.50000
[5]	validation-auc:0.50000
[6]	validation-auc:0.50000
[7]	validation-auc:0.50000
[8]	validation-auc:0.50000
[9]	validation-auc:0.50000


[I 2024-04-30 03:08:51,387] Trial 0 finished with value: 0.772 and parameters: {'booster': 'gblinear', 'lambda': 0.0007990125451767318, 'alpha': 0.616465210595442}. Best is trial 0 with value: 0.772.


[0]	validation-auc:0.80872
[1]	validation-auc:0.80872
[2]	validation-auc:0.80872
[3]	validation-auc:0.80150
[4]	validation-auc:0.80702
[5]	validation-auc:0.80634
[6]	validation-auc:0.80566
[7]	validation-auc:0.80643
[8]	validation-auc:0.80634
[9]	validation-auc:0.80566


[I 2024-04-30 03:08:51,720] Trial 1 finished with value: 0.772 and parameters: {'booster': 'gbtree', 'lambda': 0.004225324677069771, 'alpha': 0.13902716639794016, 'max_depth': 8, 'eta': 0.014452122661195197, 'gamma': 0.4488658235595789, 'grow_policy': 'lossguide'}. Best is trial 0 with value: 0.772.


[0]	validation-auc:0.80991
[1]	validation-auc:0.80957
[2]	validation-auc:0.80957
[3]	validation-auc:0.80957
[4]	validation-auc:0.80957
[5]	validation-auc:0.80957
[6]	validation-auc:0.80957
[7]	validation-auc:0.80957
[8]	validation-auc:0.80957
[9]	validation-auc:0.80957


[I 2024-04-30 03:08:53,493] Trial 2 finished with value: 0.772 and parameters: {'booster': 'dart', 'lambda': 2.1994000734070635e-06, 'alpha': 4.979303040564676e-06, 'max_depth': 6, 'eta': 1.779037892427607e-07, 'gamma': 0.011800527293551296, 'grow_policy': 'lossguide', 'sample_type': 'uniform', 'normalize_type': 'forest', 'rate_drop': 0.0011314017412099914, 'skip_drop': 3.579845296243907e-07}. Best is trial 0 with value: 0.772.


[0]	validation-auc:0.81962
[1]	validation-auc:0.82650
[2]	validation-auc:0.82633
[3]	validation-auc:0.82786
[4]	validation-auc:0.82896
[5]	validation-auc:0.82973
[6]	validation-auc:0.83066
[7]	validation-auc:0.83066
[8]	validation-auc:0.83100
[9]	validation-auc:0.83142


[I 2024-04-30 03:08:53,559] Trial 3 finished with value: 0.772 and parameters: {'booster': 'gblinear', 'lambda': 0.0026580773353329936, 'alpha': 0.0002568560566958203}. Best is trial 0 with value: 0.772.


[0]	validation-auc:0.80872
[1]	validation-auc:0.81967
[2]	validation-auc:0.82875
[3]	validation-auc:0.83168
[4]	validation-auc:0.83567
[5]	validation-auc:0.83575
[6]	validation-auc:0.83367
[7]	validation-auc:0.83134
[8]	validation-auc:0.83686
[9]	validation-auc:0.83906


[I 2024-04-30 03:08:53,683] Trial 4 finished with value: 0.772 and parameters: {'booster': 'dart', 'lambda': 1.4247088583536542e-07, 'alpha': 0.0011952454765430558, 'max_depth': 6, 'eta': 0.5628929301562029, 'gamma': 1.1015677696625452e-07, 'grow_policy': 'lossguide', 'sample_type': 'uniform', 'normalize_type': 'tree', 'rate_drop': 0.00010724673273104837, 'skip_drop': 4.519344298843916e-07}. Best is trial 0 with value: 0.772.


[0]	validation-auc:0.50000
[1]	validation-auc:0.50000
[2]	validation-auc:0.50000
[3]	validation-auc:0.50000
[4]	validation-auc:0.50000
[5]	validation-auc:0.50000


[I 2024-04-30 03:08:53,732] Trial 5 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.81360
[1]	validation-auc:0.81177
[2]	validation-auc:0.81177
[3]	validation-auc:0.81177
[4]	validation-auc:0.81177
[5]	validation-auc:0.81177
[6]	validation-auc:0.81160
[7]	validation-auc:0.81101
[8]	validation-auc:0.81177
[9]	validation-auc:0.81101


[I 2024-04-30 03:08:53,828] Trial 6 finished with value: 0.772 and parameters: {'booster': 'gbtree', 'lambda': 8.132785956063742e-06, 'alpha': 1.2644629946840556e-07, 'max_depth': 8, 'eta': 2.480913693896981e-07, 'gamma': 0.0021101839165627726, 'grow_policy': 'lossguide'}. Best is trial 0 with value: 0.772.


[0]	validation-auc:0.71318
[1]	validation-auc:0.71089
[2]	validation-auc:0.72243
[3]	validation-auc:0.73296
[4]	validation-auc:0.74535
[5]	validation-auc:0.76284


[I 2024-04-30 03:08:53,876] Trial 7 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.50000
[1]	validation-auc:0.50000
[2]	validation-auc:0.50000
[3]	validation-auc:0.50000
[4]	validation-auc:0.50000


[I 2024-04-30 03:08:53,907] Trial 8 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.81067
[1]	validation-auc:0.80558
[2]	validation-auc:0.80320
[3]	validation-auc:0.80345
[4]	validation-auc:0.80345
[5]	validation-auc:0.80345
[6]	validation-auc:0.80345
[7]	validation-auc:0.80345
[8]	validation-auc:0.80345
[9]	validation-auc:0.80261


[I 2024-04-30 03:08:54,001] Trial 9 finished with value: 0.772 and parameters: {'booster': 'gbtree', 'lambda': 0.23455403772860645, 'alpha': 3.618789564115521e-08, 'max_depth': 7, 'eta': 0.011684278701721946, 'gamma': 3.2851783305152454e-07, 'grow_policy': 'lossguide'}. Best is trial 0 with value: 0.772.


[0]	validation-auc:0.87183
[1]	validation-auc:0.87276
[2]	validation-auc:0.86903
[3]	validation-auc:0.86606
[4]	validation-auc:0.86504
[5]	validation-auc:0.86020
[6]	validation-auc:0.85867
[7]	validation-auc:0.85689
[8]	validation-auc:0.85528
[9]	validation-auc:0.85392


[I 2024-04-30 03:08:54,103] Trial 10 finished with value: 0.772 and parameters: {'booster': 'gblinear', 'lambda': 1.2681216010269263e-08, 'alpha': 0.008238893879479599}. Best is trial 0 with value: 0.772.


[0]	validation-auc:0.81742
[1]	validation-auc:0.81631
[2]	validation-auc:0.81687
[3]	validation-auc:0.81627
[4]	validation-auc:0.81636
[5]	validation-auc:0.81627
[6]	validation-auc:0.81636
[7]	validation-auc:0.81627
[8]	validation-auc:0.81636
[9]	validation-auc:0.81627


[I 2024-04-30 03:08:54,699] Trial 11 finished with value: 0.772 and parameters: {'booster': 'gbtree', 'lambda': 0.001374687083124439, 'alpha': 0.03782205786715263, 'max_depth': 9, 'eta': 0.00028567257546703215, 'gamma': 0.2622152833557725, 'grow_policy': 'depthwise'}. Best is trial 0 with value: 0.772.


[0]	validation-auc:0.50000
[1]	validation-auc:0.50000
[2]	validation-auc:0.50000
[3]	validation-auc:0.50000
[4]	validation-auc:0.50000


[I 2024-04-30 03:08:54,738] Trial 12 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.67634
[1]	validation-auc:0.67634
[2]	validation-auc:0.67634
[3]	validation-auc:0.67634
[4]	validation-auc:0.67634


[I 2024-04-30 03:08:54,799] Trial 13 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.53960
[1]	validation-auc:0.63929
[2]	validation-auc:0.63929
[3]	validation-auc:0.72604
[4]	validation-auc:0.72303


[I 2024-04-30 03:08:54,869] Trial 14 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.50000
[1]	validation-auc:0.50000
[2]	validation-auc:0.50000
[3]	validation-auc:0.50000
[4]	validation-auc:0.50000
[5]	validation-auc:0.50000


[I 2024-04-30 03:08:54,915] Trial 15 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.82964
[1]	validation-auc:0.81950
[2]	validation-auc:0.81852
[3]	validation-auc:0.83308
[4]	validation-auc:0.83138
[5]	validation-auc:0.83113
[6]	validation-auc:0.83113
[7]	validation-auc:0.83113
[8]	validation-auc:0.83096
[9]	validation-auc:0.83045


[I 2024-04-30 03:08:55,068] Trial 16 finished with value: 0.772 and parameters: {'booster': 'gbtree', 'lambda': 0.00024340520090948027, 'alpha': 0.0022826895632002504, 'max_depth': 9, 'eta': 0.006766855671927213, 'gamma': 4.062239303312728e-05, 'grow_policy': 'lossguide'}. Best is trial 0 with value: 0.772.


[0]	validation-auc:0.75753
[1]	validation-auc:0.75753
[2]	validation-auc:0.75753
[3]	validation-auc:0.75753
[4]	validation-auc:0.75753
[5]	validation-auc:0.75753


[I 2024-04-30 03:08:55,182] Trial 17 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.50000
[1]	validation-auc:0.50000
[2]	validation-auc:0.50000
[3]	validation-auc:0.50000
[4]	validation-auc:0.50000
[5]	validation-auc:0.50000


[I 2024-04-30 03:08:55,433] Trial 18 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.81305
[1]	validation-auc:0.81058
[2]	validation-auc:0.81058
[3]	validation-auc:0.81058
[4]	validation-auc:0.80991
[5]	validation-auc:0.80991


[I 2024-04-30 03:08:55,872] Trial 19 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.79115
[1]	validation-auc:0.79115
[2]	validation-auc:0.79115
[3]	validation-auc:0.79115
[4]	validation-auc:0.79115


[I 2024-04-30 03:08:55,945] Trial 20 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.81360
[1]	validation-auc:0.80957
[2]	validation-auc:0.80957
[3]	validation-auc:0.80957
[4]	validation-auc:0.80957


[I 2024-04-30 03:08:56,100] Trial 21 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.81402
[1]	validation-auc:0.81402
[2]	validation-auc:0.81402
[3]	validation-auc:0.81402
[4]	validation-auc:0.81402


[I 2024-04-30 03:08:56,192] Trial 22 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.50000
[1]	validation-auc:0.50000
[2]	validation-auc:0.50000
[3]	validation-auc:0.50000
[4]	validation-auc:0.50000
[5]	validation-auc:0.50000


[I 2024-04-30 03:08:56,316] Trial 23 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.79115
[1]	validation-auc:0.79115
[2]	validation-auc:0.79115
[3]	validation-auc:0.79115
[4]	validation-auc:0.79115
[5]	validation-auc:0.79115


[I 2024-04-30 03:08:56,419] Trial 24 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.81716
[1]	validation-auc:0.81373
[2]	validation-auc:0.80702
[3]	validation-auc:0.81411
[4]	validation-auc:0.81292
[5]	validation-auc:0.83312
[6]	validation-auc:0.83422
[7]	validation-auc:0.84411
[8]	validation-auc:0.83881
[9]	validation-auc:0.84488


[I 2024-04-30 03:08:56,546] Trial 25 finished with value: 0.772 and parameters: {'booster': 'dart', 'lambda': 7.182109737525394e-06, 'alpha': 4.274070928470397e-05, 'max_depth': 8, 'eta': 0.07280224615345009, 'gamma': 0.02902169996778015, 'grow_policy': 'lossguide', 'sample_type': 'weighted', 'normalize_type': 'tree', 'rate_drop': 0.001111594733760075, 'skip_drop': 0.007844458556503899}. Best is trial 0 with value: 0.772.


[0]	validation-auc:0.69493
[1]	validation-auc:0.69621
[2]	validation-auc:0.69773
[3]	validation-auc:0.69833
[4]	validation-auc:0.70028


[I 2024-04-30 03:08:56,584] Trial 26 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.50000
[1]	validation-auc:0.50000
[2]	validation-auc:0.50000
[3]	validation-auc:0.50000
[4]	validation-auc:0.50000


[I 2024-04-30 03:08:56,625] Trial 27 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.79832
[1]	validation-auc:0.79832
[2]	validation-auc:0.79832
[3]	validation-auc:0.79832
[4]	validation-auc:0.79832
[5]	validation-auc:0.79832


[I 2024-04-30 03:08:56,689] Trial 28 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.53960
[1]	validation-auc:0.53960
[2]	validation-auc:0.53960
[3]	validation-auc:0.53960
[4]	validation-auc:0.53960
[5]	validation-auc:0.53960


[I 2024-04-30 03:08:56,750] Trial 29 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.85324
[1]	validation-auc:0.85290
[2]	validation-auc:0.85061
[3]	validation-auc:0.84823
[4]	validation-auc:0.84670
[5]	validation-auc:0.84585
[6]	validation-auc:0.84483
[7]	validation-auc:0.84331
[8]	validation-auc:0.84186
[9]	validation-auc:0.84127


[I 2024-04-30 03:08:56,809] Trial 30 finished with value: 0.772 and parameters: {'booster': 'gblinear', 'lambda': 0.06590858133857348, 'alpha': 0.000432065502126031}. Best is trial 0 with value: 0.772.


[0]	validation-auc:0.84322
[1]	validation-auc:0.84611
[2]	validation-auc:0.84781
[3]	validation-auc:0.84789
[4]	validation-auc:0.85137
[5]	validation-auc:0.85502
[6]	validation-auc:0.85646
[7]	validation-auc:0.85689
[8]	validation-auc:0.85646
[9]	validation-auc:0.85850


[I 2024-04-30 03:08:56,868] Trial 31 finished with value: 0.772 and parameters: {'booster': 'gblinear', 'lambda': 0.0014315948076874267, 'alpha': 0.0028889153056948563}. Best is trial 0 with value: 0.772.


[0]	validation-auc:0.83321
[1]	validation-auc:0.83244
[2]	validation-auc:0.83431
[3]	validation-auc:0.83304
[4]	validation-auc:0.83524
[5]	validation-auc:0.83592
[6]	validation-auc:0.83669
[7]	validation-auc:0.83677
[8]	validation-auc:0.83821
[9]	validation-auc:0.83864


[I 2024-04-30 03:08:56,944] Trial 32 finished with value: 0.772 and parameters: {'booster': 'gblinear', 'lambda': 0.004449802769766497, 'alpha': 0.0003442990200159152}. Best is trial 0 with value: 0.772.


[0]	validation-auc:0.71089
[1]	validation-auc:0.71242
[2]	validation-auc:0.71522
[3]	validation-auc:0.71972
[4]	validation-auc:0.71921
[5]	validation-auc:0.71972


[I 2024-04-30 03:08:56,992] Trial 33 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.68487
[1]	validation-auc:0.68487
[2]	validation-auc:0.68487
[3]	validation-auc:0.68487
[4]	validation-auc:0.68487
[5]	validation-auc:0.68487


[I 2024-04-30 03:08:57,053] Trial 34 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.72439
[1]	validation-auc:0.74688
[2]	validation-auc:0.76793
[3]	validation-auc:0.78915
[4]	validation-auc:0.80154
[5]	validation-auc:0.80613


[I 2024-04-30 03:08:57,082] Trial 35 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.82964
[1]	validation-auc:0.84199
[2]	validation-auc:0.84259
[3]	validation-auc:0.84127
[4]	validation-auc:0.83261
[5]	validation-auc:0.83142
[6]	validation-auc:0.83125
[7]	validation-auc:0.83024
[8]	validation-auc:0.83024
[9]	validation-auc:0.83024


[I 2024-04-30 03:08:57,197] Trial 36 finished with value: 0.772 and parameters: {'booster': 'dart', 'lambda': 1.997263688270002e-05, 'alpha': 3.4270214470864634e-06, 'max_depth': 9, 'eta': 0.0016910183571261538, 'gamma': 1.1099898887046528e-08, 'grow_policy': 'depthwise', 'sample_type': 'weighted', 'normalize_type': 'tree', 'rate_drop': 1.003497179344735e-05, 'skip_drop': 0.0014729617267778808}. Best is trial 0 with value: 0.772.


[0]	validation-auc:0.78941
[1]	validation-auc:0.80333
[2]	validation-auc:0.80587
[3]	validation-auc:0.80698
[4]	validation-auc:0.80859


[I 2024-04-30 03:08:57,245] Trial 37 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.81305
[1]	validation-auc:0.81058
[2]	validation-auc:0.81058
[3]	validation-auc:0.80940
[4]	validation-auc:0.80872


[I 2024-04-30 03:08:57,309] Trial 38 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.70461
[1]	validation-auc:0.70461
[2]	validation-auc:0.70249
[3]	validation-auc:0.70249
[4]	validation-auc:0.70249


[I 2024-04-30 03:08:57,348] Trial 39 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.75741
[1]	validation-auc:0.75741
[2]	validation-auc:0.75741
[3]	validation-auc:0.75741
[4]	validation-auc:0.75741


[I 2024-04-30 03:08:57,414] Trial 40 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.80872
[1]	validation-auc:0.81912
[2]	validation-auc:0.82298
[3]	validation-auc:0.84590
[4]	validation-auc:0.85489
[5]	validation-auc:0.85328
[6]	validation-auc:0.85417
[7]	validation-auc:0.85566
[8]	validation-auc:0.85761
[9]	validation-auc:0.85867


[I 2024-04-30 03:08:57,504] Trial 41 finished with value: 0.772 and parameters: {'booster': 'dart', 'lambda': 2.3210329177372203e-07, 'alpha': 0.0002604627973958004, 'max_depth': 6, 'eta': 0.3668401307683901, 'gamma': 3.444309033678842e-08, 'grow_policy': 'lossguide', 'sample_type': 'uniform', 'normalize_type': 'tree', 'rate_drop': 0.0001564423147044887, 'skip_drop': 5.706156840991007e-07}. Best is trial 0 with value: 0.772.


[0]	validation-auc:0.80872
[1]	validation-auc:0.82663
[2]	validation-auc:0.84624
[3]	validation-auc:0.85107
[4]	validation-auc:0.83957
[5]	validation-auc:0.84517
[6]	validation-auc:0.84942
[7]	validation-auc:0.85634
[8]	validation-auc:0.85786
[9]	validation-auc:0.85290


[I 2024-04-30 03:08:57,606] Trial 42 finished with value: 0.772 and parameters: {'booster': 'dart', 'lambda': 7.154619287003629e-08, 'alpha': 0.0010684064060060877, 'max_depth': 6, 'eta': 0.8985798286697347, 'gamma': 1.672903073360246e-06, 'grow_policy': 'lossguide', 'sample_type': 'uniform', 'normalize_type': 'tree', 'rate_drop': 0.010305659332269117, 'skip_drop': 7.723133799534878e-07}. Best is trial 0 with value: 0.772.


[0]	validation-auc:0.81742
[1]	validation-auc:0.81398
[2]	validation-auc:0.80727
[3]	validation-auc:0.81436
[4]	validation-auc:0.81360


[I 2024-04-30 03:08:57,687] Trial 43 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.79115
[1]	validation-auc:0.81733
[2]	validation-auc:0.82030
[3]	validation-auc:0.84178
[4]	validation-auc:0.84420
[5]	validation-auc:0.84751
[6]	validation-auc:0.85213
[7]	validation-auc:0.85795
[8]	validation-auc:0.85727
[9]	validation-auc:0.86266


[I 2024-04-30 03:08:57,776] Trial 44 finished with value: 0.772 and parameters: {'booster': 'dart', 'lambda': 0.0027954982127681256, 'alpha': 0.06803866179378178, 'max_depth': 5, 'eta': 0.8114019218139976, 'gamma': 2.740441300308794e-06, 'grow_policy': 'lossguide', 'sample_type': 'uniform', 'normalize_type': 'forest', 'rate_drop': 4.821035161309269e-06, 'skip_drop': 5.6146868146651144e-06}. Best is trial 0 with value: 0.772.


[0]	validation-auc:0.71318
[1]	validation-auc:0.71972
[2]	validation-auc:0.73279
[3]	validation-auc:0.75045
[4]	validation-auc:0.77557


[I 2024-04-30 03:08:57,810] Trial 45 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.79115
[1]	validation-auc:0.79115
[2]	validation-auc:0.79115
[3]	validation-auc:0.79115
[4]	validation-auc:0.79115


[I 2024-04-30 03:08:57,871] Trial 46 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.81305
[1]	validation-auc:0.81305
[2]	validation-auc:0.81305
[3]	validation-auc:0.81305
[4]	validation-auc:0.81305
[5]	validation-auc:0.81305


[I 2024-04-30 03:08:57,947] Trial 47 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.80604
[1]	validation-auc:0.81232
[2]	validation-auc:0.81266
[3]	validation-auc:0.81674
[4]	validation-auc:0.82022


[I 2024-04-30 03:08:57,987] Trial 48 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.63916
[1]	validation-auc:0.72591
[2]	validation-auc:0.72290
[3]	validation-auc:0.74913
[4]	validation-auc:0.79458
[5]	validation-auc:0.81525


[I 2024-04-30 03:08:58,042] Trial 49 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.81182
[1]	validation-auc:0.81182
[2]	validation-auc:0.81182
[3]	validation-auc:0.81182
[4]	validation-auc:0.80447
[5]	validation-auc:0.80931


[I 2024-04-30 03:08:58,118] Trial 50 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.81330
[1]	validation-auc:0.81330
[2]	validation-auc:0.81330
[3]	validation-auc:0.81330
[4]	validation-auc:0.81330


[I 2024-04-30 03:08:58,197] Trial 51 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.50794
[1]	validation-auc:0.50794
[2]	validation-auc:0.50794
[3]	validation-auc:0.50794
[4]	validation-auc:0.50794


[I 2024-04-30 03:08:58,282] Trial 52 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.81275
[1]	validation-auc:0.81275
[2]	validation-auc:0.81275
[3]	validation-auc:0.81275
[4]	validation-auc:0.81275


[I 2024-04-30 03:08:58,349] Trial 53 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.80872
[1]	validation-auc:0.80872
[2]	validation-auc:0.80872
[3]	validation-auc:0.80872
[4]	validation-auc:0.80872


[I 2024-04-30 03:08:58,410] Trial 54 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.80991
[1]	validation-auc:0.81780
[2]	validation-auc:0.81780
[3]	validation-auc:0.81780
[4]	validation-auc:0.81780


[I 2024-04-30 03:08:58,474] Trial 55 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.83236
[1]	validation-auc:0.82692
[2]	validation-auc:0.82582
[3]	validation-auc:0.82684
[4]	validation-auc:0.82701
[5]	validation-auc:0.82709


[I 2024-04-30 03:08:58,519] Trial 56 pruned. Trial was pruned at iteration 6.


[0]	validation-auc:0.60551
[1]	validation-auc:0.60551
[2]	validation-auc:0.60551
[3]	validation-auc:0.60551
[4]	validation-auc:0.60551


[I 2024-04-30 03:08:58,585] Trial 57 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.80235
[1]	validation-auc:0.81305
[2]	validation-auc:0.81228
[3]	validation-auc:0.81373
[4]	validation-auc:0.81322
[5]	validation-auc:0.81296


[I 2024-04-30 03:08:58,649] Trial 58 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.78728
[1]	validation-auc:0.81555
[2]	validation-auc:0.81462
[3]	validation-auc:0.81241
[4]	validation-auc:0.81674


[I 2024-04-30 03:08:58,687] Trial 59 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.80872
[1]	validation-auc:0.80872
[2]	validation-auc:0.80872
[3]	validation-auc:0.80872
[4]	validation-auc:0.80872


[I 2024-04-30 03:08:58,756] Trial 60 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.79819
[1]	validation-auc:0.79310
[2]	validation-auc:0.79246
[3]	validation-auc:0.79009
[4]	validation-auc:0.79009
[5]	validation-auc:0.79009


[I 2024-04-30 03:08:58,818] Trial 61 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.80732
[1]	validation-auc:0.80222
[2]	validation-auc:0.80159
[3]	validation-auc:0.79921
[4]	validation-auc:0.79921
[5]	validation-auc:0.79947


[I 2024-04-30 03:08:58,890] Trial 62 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.80222
[1]	validation-auc:0.80626
[2]	validation-auc:0.80528
[3]	validation-auc:0.80664
[4]	validation-auc:0.80562
[5]	validation-auc:0.80545


[I 2024-04-30 03:08:58,956] Trial 63 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.81305
[1]	validation-auc:0.81058
[2]	validation-auc:0.81058
[3]	validation-auc:0.81058
[4]	validation-auc:0.80991
[5]	validation-auc:0.80991


[I 2024-04-30 03:08:59,033] Trial 64 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.72897
[1]	validation-auc:0.75520
[2]	validation-auc:0.77549
[3]	validation-auc:0.79102
[4]	validation-auc:0.79756
[5]	validation-auc:0.79883


[I 2024-04-30 03:08:59,076] Trial 65 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.81194
[1]	validation-auc:0.80761
[2]	validation-auc:0.81245
[3]	validation-auc:0.82866
[4]	validation-auc:0.83647
[5]	validation-auc:0.84742
[6]	validation-auc:0.84955
[7]	validation-auc:0.85349
[8]	validation-auc:0.85332
[9]	validation-auc:0.86011


[I 2024-04-30 03:08:59,178] Trial 66 finished with value: 0.772 and parameters: {'booster': 'dart', 'lambda': 0.4403307305431316, 'alpha': 5.816911852528353e-07, 'max_depth': 6, 'eta': 0.1920342705203484, 'gamma': 0.051514842037554236, 'grow_policy': 'lossguide', 'sample_type': 'uniform', 'normalize_type': 'tree', 'rate_drop': 2.1248980186860316e-08, 'skip_drop': 0.0002614602949321213}. Best is trial 0 with value: 0.772.


[0]	validation-auc:0.79386
[1]	validation-auc:0.79386
[2]	validation-auc:0.79386
[3]	validation-auc:0.79310
[4]	validation-auc:0.79395


[I 2024-04-30 03:08:59,242] Trial 67 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.85154
[1]	validation-auc:0.84925
[2]	validation-auc:0.84568
[3]	validation-auc:0.84543
[4]	validation-auc:0.84483
[5]	validation-auc:0.84331
[6]	validation-auc:0.84254
[7]	validation-auc:0.84152
[8]	validation-auc:0.83983
[9]	validation-auc:0.83932


[I 2024-04-30 03:08:59,303] Trial 68 finished with value: 0.772 and parameters: {'booster': 'gblinear', 'lambda': 0.037217387729242615, 'alpha': 2.504018010197455e-06}. Best is trial 0 with value: 0.772.


[0]	validation-auc:0.81742
[1]	validation-auc:0.81631
[2]	validation-auc:0.81687
[3]	validation-auc:0.81631
[4]	validation-auc:0.81636
[5]	validation-auc:0.81687


[I 2024-04-30 03:08:59,373] Trial 69 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.81742
[1]	validation-auc:0.81631
[2]	validation-auc:0.81687
[3]	validation-auc:0.81631
[4]	validation-auc:0.81636
[5]	validation-auc:0.81627


[I 2024-04-30 03:08:59,482] Trial 70 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.83669
[1]	validation-auc:0.83321
[2]	validation-auc:0.83889
[3]	validation-auc:0.84254
[4]	validation-auc:0.84730
[5]	validation-auc:0.85086
[6]	validation-auc:0.85018
[7]	validation-auc:0.85494
[8]	validation-auc:0.85528
[9]	validation-auc:0.85494


[I 2024-04-30 03:08:59,540] Trial 71 finished with value: 0.772 and parameters: {'booster': 'gblinear', 'lambda': 1.5023270217045127e-08, 'alpha': 0.002969186292407903}. Best is trial 0 with value: 0.772.


[0]	validation-auc:0.85914
[1]	validation-auc:0.85846
[2]	validation-auc:0.85727
[3]	validation-auc:0.85235
[4]	validation-auc:0.84946
[5]	validation-auc:0.84742
[6]	validation-auc:0.84394
[7]	validation-auc:0.84131
[8]	validation-auc:0.84004
[9]	validation-auc:0.83953


[I 2024-04-30 03:08:59,608] Trial 72 finished with value: 0.772 and parameters: {'booster': 'gblinear', 'lambda': 4.527817470896501e-08, 'alpha': 0.010341474753281735}. Best is trial 0 with value: 0.772.


[0]	validation-auc:0.84267
[1]	validation-auc:0.83868
[2]	validation-auc:0.83741
[3]	validation-auc:0.83563
[4]	validation-auc:0.83308
[5]	validation-auc:0.82909
[6]	validation-auc:0.82926
[7]	validation-auc:0.82917
[8]	validation-auc:0.82909
[9]	validation-auc:0.82739


[I 2024-04-30 03:08:59,664] Trial 73 finished with value: 0.772 and parameters: {'booster': 'gblinear', 'lambda': 1.0222699331435929e-08, 'alpha': 0.01542569169490276}. Best is trial 0 with value: 0.772.


[0]	validation-auc:0.80706
[1]	validation-auc:0.81199
[2]	validation-auc:0.81402
[3]	validation-auc:0.81631
[4]	validation-auc:0.81929
[5]	validation-auc:0.82098


[I 2024-04-30 03:08:59,704] Trial 74 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.84848
[1]	validation-auc:0.84526
[2]	validation-auc:0.84450
[3]	validation-auc:0.84730
[4]	validation-auc:0.84857
[5]	validation-auc:0.84865
[6]	validation-auc:0.84899
[7]	validation-auc:0.84959
[8]	validation-auc:0.84848
[9]	validation-auc:0.84713


[I 2024-04-30 03:08:59,770] Trial 75 finished with value: 0.772 and parameters: {'booster': 'gblinear', 'lambda': 0.01005895447744348, 'alpha': 0.0006045766432332531}. Best is trial 0 with value: 0.772.


[0]	validation-auc:0.87594
[1]	validation-auc:0.87382
[2]	validation-auc:0.87085
[3]	validation-auc:0.86415
[4]	validation-auc:0.86134
[5]	validation-auc:0.86058
[6]	validation-auc:0.86024
[7]	validation-auc:0.85837
[8]	validation-auc:0.85778
[9]	validation-auc:0.85710


[I 2024-04-30 03:08:59,825] Trial 76 finished with value: 0.772 and parameters: {'booster': 'gblinear', 'lambda': 2.9848572803133914e-05, 'alpha': 0.0075368581533512445}. Best is trial 0 with value: 0.772.


[0]	validation-auc:0.79768
[1]	validation-auc:0.79284
[2]	validation-auc:0.79221
[3]	validation-auc:0.79709
[4]	validation-auc:0.79709


[I 2024-04-30 03:08:59,906] Trial 77 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.80969
[1]	validation-auc:0.80872
[2]	validation-auc:0.80872
[3]	validation-auc:0.80872
[4]	validation-auc:0.80872


[I 2024-04-30 03:08:59,981] Trial 78 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.71216
[1]	validation-auc:0.72515
[2]	validation-auc:0.74764
[3]	validation-auc:0.77455
[4]	validation-auc:0.79246


[I 2024-04-30 03:09:00,019] Trial 79 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.81742
[1]	validation-auc:0.81279
[2]	validation-auc:0.82035
[3]	validation-auc:0.83894
[4]	validation-auc:0.85404
[5]	validation-auc:0.86062
[6]	validation-auc:0.85651
[7]	validation-auc:0.85591
[8]	validation-auc:0.85651
[9]	validation-auc:0.85910


[I 2024-04-30 03:09:00,128] Trial 80 finished with value: 0.772 and parameters: {'booster': 'dart', 'lambda': 1.7922463446994046e-07, 'alpha': 0.026199059278236254, 'max_depth': 8, 'eta': 0.1294657353024714, 'gamma': 0.09400062637545978, 'grow_policy': 'lossguide', 'sample_type': 'uniform', 'normalize_type': 'tree', 'rate_drop': 0.006312420218387203, 'skip_drop': 2.0412534413954283e-05}. Best is trial 0 with value: 0.772.


[0]	validation-auc:0.81182
[1]	validation-auc:0.81182
[2]	validation-auc:0.81182
[3]	validation-auc:0.81182
[4]	validation-auc:0.81182
[5]	validation-auc:0.81182


[I 2024-04-30 03:09:00,207] Trial 81 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.79980
[1]	validation-auc:0.81614
[2]	validation-auc:0.81941
[3]	validation-auc:0.81933
[4]	validation-auc:0.81788


[I 2024-04-30 03:09:00,275] Trial 82 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.81742
[1]	validation-auc:0.81631
[2]	validation-auc:0.81687
[3]	validation-auc:0.81636
[4]	validation-auc:0.81636
[5]	validation-auc:0.81636


[I 2024-04-30 03:09:00,341] Trial 83 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.81305
[1]	validation-auc:0.84458
[2]	validation-auc:0.84216
[3]	validation-auc:0.85341
[4]	validation-auc:0.87200
[5]	validation-auc:0.86809
[6]	validation-auc:0.86457
[7]	validation-auc:0.86402
[8]	validation-auc:0.86359
[9]	validation-auc:0.86597


[I 2024-04-30 03:09:00,444] Trial 84 finished with value: 0.772 and parameters: {'booster': 'gbtree', 'lambda': 0.00016218538726588477, 'alpha': 0.04735237931250221, 'max_depth': 7, 'eta': 0.32277239828733917, 'gamma': 0.0002647516984831619, 'grow_policy': 'depthwise'}. Best is trial 0 with value: 0.772.


[0]	validation-auc:0.79115
[1]	validation-auc:0.79399
[2]	validation-auc:0.79441
[3]	validation-auc:0.79441
[4]	validation-auc:0.79441


[I 2024-04-30 03:09:00,513] Trial 85 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.80872
[1]	validation-auc:0.80872
[2]	validation-auc:0.80872
[3]	validation-auc:0.80872
[4]	validation-auc:0.80872


[I 2024-04-30 03:09:00,593] Trial 86 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.84331
[1]	validation-auc:0.85078
[2]	validation-auc:0.85689
[3]	validation-auc:0.86071
[4]	validation-auc:0.86028
[5]	validation-auc:0.85952
[6]	validation-auc:0.86130
[7]	validation-auc:0.85952
[8]	validation-auc:0.85816
[9]	validation-auc:0.85782


[I 2024-04-30 03:09:00,652] Trial 87 finished with value: 0.772 and parameters: {'booster': 'gblinear', 'lambda': 3.026287002138648e-07, 'alpha': 0.004135828458185614}. Best is trial 0 with value: 0.772.


[0]	validation-auc:0.82964
[1]	validation-auc:0.84199
[2]	validation-auc:0.84259
[3]	validation-auc:0.83342
[4]	validation-auc:0.83248
[5]	validation-auc:0.83113


[I 2024-04-30 03:09:00,717] Trial 88 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.70427
[1]	validation-auc:0.70648
[2]	validation-auc:0.71861
[3]	validation-auc:0.72719
[4]	validation-auc:0.74510


[I 2024-04-30 03:09:00,758] Trial 89 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.80872
[1]	validation-auc:0.80872
[2]	validation-auc:0.80872
[3]	validation-auc:0.80872
[4]	validation-auc:0.80872


[I 2024-04-30 03:09:00,830] Trial 90 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.82964
[1]	validation-auc:0.81950
[2]	validation-auc:0.81852
[3]	validation-auc:0.81835
[4]	validation-auc:0.81784
[5]	validation-auc:0.81759


[I 2024-04-30 03:09:00,904] Trial 91 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.82964
[1]	validation-auc:0.81950
[2]	validation-auc:0.81852
[3]	validation-auc:0.81835
[4]	validation-auc:0.81759


[I 2024-04-30 03:09:00,977] Trial 92 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.81305
[1]	validation-auc:0.81305
[2]	validation-auc:0.81305
[3]	validation-auc:0.81305
[4]	validation-auc:0.81305
[5]	validation-auc:0.81305


[I 2024-04-30 03:09:01,052] Trial 93 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.81716
[1]	validation-auc:0.81373
[2]	validation-auc:0.80821
[3]	validation-auc:0.81360
[4]	validation-auc:0.81283
[5]	validation-auc:0.81173


[I 2024-04-30 03:09:01,140] Trial 94 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.73750
[1]	validation-auc:0.73750
[2]	validation-auc:0.73750
[3]	validation-auc:0.73750
[4]	validation-auc:0.73750
[5]	validation-auc:0.73750


[I 2024-04-30 03:09:01,251] Trial 95 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.80239
[1]	validation-auc:0.80379
[2]	validation-auc:0.81487
[3]	validation-auc:0.81479
[4]	validation-auc:0.81419


[I 2024-04-30 03:09:01,318] Trial 96 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.80872
[1]	validation-auc:0.81457
[2]	validation-auc:0.82675
[3]	validation-auc:0.83821
[4]	validation-auc:0.84034
[5]	validation-auc:0.85065
[6]	validation-auc:0.84950
[7]	validation-auc:0.85689
[8]	validation-auc:0.85816
[9]	validation-auc:0.85409


[I 2024-04-30 03:09:01,428] Trial 97 finished with value: 0.772 and parameters: {'booster': 'dart', 'lambda': 7.867712856933287e-06, 'alpha': 0.06678905742262249, 'max_depth': 6, 'eta': 0.5066985967597514, 'gamma': 2.5150356648668164e-07, 'grow_policy': 'lossguide', 'sample_type': 'weighted', 'normalize_type': 'forest', 'rate_drop': 1.1686213399296837e-07, 'skip_drop': 3.7300333713588767e-06}. Best is trial 0 with value: 0.772.


[0]	validation-auc:0.74960
[1]	validation-auc:0.77557
[2]	validation-auc:0.78958
[3]	validation-auc:0.79586
[4]	validation-auc:0.80010


[I 2024-04-30 03:09:01,467] Trial 98 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.82964
[1]	validation-auc:0.81950
[2]	validation-auc:0.81733
[3]	validation-auc:0.81767
[4]	validation-auc:0.83630
[5]	validation-auc:0.85112
[6]	validation-auc:0.84925
[7]	validation-auc:0.85685
[8]	validation-auc:0.86075
[9]	validation-auc:0.86410


[I 2024-04-30 03:09:01,579] Trial 99 finished with value: 0.772 and parameters: {'booster': 'gbtree', 'lambda': 0.0018448647108756885, 'alpha': 0.015197810231905876, 'max_depth': 9, 'eta': 0.0809554197879324, 'gamma': 0.0007425061330350752, 'grow_policy': 'depthwise'}. Best is trial 0 with value: 0.772.


FrozenTrial(number=0, state=TrialState.COMPLETE, values=[0.772], datetime_start=datetime.datetime(2024, 4, 30, 3, 8, 50, 890717), datetime_complete=datetime.datetime(2024, 4, 30, 3, 8, 51, 387358), params={'booster': 'gblinear', 'lambda': 0.0007990125451767318, 'alpha': 0.616465210595442}, user_attrs={}, system_attrs={}, intermediate_values={0: 0.5, 1: 0.5, 2: 0.5, 3: 0.5, 4: 0.5, 5: 0.5, 6: 0.5, 7: 0.5, 8: 0.5, 9: 0.5}, distributions={'booster': CategoricalDistribution(choices=('gbtree', 'gblinear', 'dart')), 'lambda': FloatDistribution(high=1.0, log=True, low=1e-08, step=None), 'alpha': FloatDistribution(high=1.0, log=True, low=1e-08, step=None)}, trial_id=0, value=None)


In [None]:
print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)
print("  Params: ")
for key, value in trial.params.items():
  print("    {}: {}".format(key, value))

Best trial:
  Value:  0.772
  Params: 
    booster: gblinear
    lambda: 0.0007990125451767318
    alpha: 0.616465210595442


In [None]:
start_time = time.time()

xgb_model = xgb.XGBClassifier(**trial.params)
xgb_model.fit(x_train,y_train)

end_time = time.time()
process_time = round(end_time-start_time,2)
print("Fitting XGBoost took {} seconds".format(process_time))

Fitting XGBoost took 0.01 seconds


In [None]:
predictions = model.predict(x_test)

In [None]:
print("Accuracy of model is {}%".format(accuracy_score(y_test,predictions) * 100))

Accuracy of model is 77.2%


In [None]:
evaluate(y_true=y_test, y_pred=predictions)

Accuracy: 0.772
Accuracy for label 0: 0.893
Accuracy for label 1: 0.413

Classification Report:
              precision    recall  f1-score   support

           0       0.82      0.89      0.85       187
           1       0.57      0.41      0.48        63

    accuracy                           0.77       250
   macro avg       0.69      0.65      0.67       250
weighted avg       0.75      0.77      0.76       250


Confusion Matrix:
[[167  20]
 [ 37  26]]
