In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import pandas as pd
import nltk
from nltk.corpus import stopwords
import matplotlib.pyplot as plt
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import CountVectorizer
import pickle
import time
import re

In [None]:
from sklearn.svm import SVC

In [None]:
import sklearn.svm

In [None]:
import xgboost as xgb

In [None]:
! pip install optuna

Collecting optuna
  Downloading optuna-3.6.1-py3-none-any.whl (380 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m380.1/380.1 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.13.1-py3-none-any.whl (233 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.4/233.4 kB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting colorlog (from optuna)
  Downloading colorlog-6.8.2-py3-none-any.whl (11 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.3-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.8/78.8 kB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: Mako, colorlog, alembic, optuna
Successfully installed Mako-1.3.3 alembic-1.13.1 colorlog-6.8.2 optuna-3.6.1


In [None]:
!pip install optuna-integration

Collecting optuna-integration
  Downloading optuna_integration-3.6.0-py3-none-any.whl (93 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/93.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m93.4/93.4 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: optuna-integration
Successfully installed optuna-integration-3.6.0


In [None]:
import optuna

In [None]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [None]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [None]:
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [None]:
from sklearn.metrics import (accuracy_score,
                             classification_report,
                             confusion_matrix)
from sklearn.model_selection import train_test_split

In [None]:
filename = "/content/drive/My Drive/Datasets/s_2k.csv"

df = pd.read_csv(filename,
                 encoding="utf-8", encoding_errors="replace")
# replace values
df.replace({'Not Social': 0, 'Social': 1}, inplace=True)

# Data Preprocessing

In this section we will prepare the dataset in order to use in SVM model. Before starting I want to explain processes with 1 line explanations



*   Cleaning Everything Except Alphabetical and Numerical Characters: We'll clean unrelevant digits using regular expressions
*   Tokenizing and Lemmatizing: We'll split texts into their words and convert words to their base form (dogs=>dog)
*   Removing Stopwords: We'll remove words that have no special meaning (such as the,will,was)
*   Joining Prepared Strings: We'll join the words
*   Bag of Words Approach: We'll create a bag of word. In bag of words approach each feature shows whether the text contains the word or not.







In [None]:
cleanedData = []

lemma = WordNetLemmatizer()
swords = stopwords.words("english")
for text in df["text"]:

    # Cleaning everything except alphabetical and numerical characters
    text = re.sub("[^a-zA-Z0-9]"," ",text)

    # Tokenizing and lemmatizing
    text = nltk.word_tokenize(text.lower())
    text = [lemma.lemmatize(word) for word in text]

    # Removing stopwords
    text = [word for word in text if word not in swords]

    # Joining
    text = " ".join(text)

    cleanedData.append(text)

In [None]:
# check if the data is cleaned
for i in range(0,5):
    print(cleanedData[i],end="\n\n")

balance included kind donation prepared accordance b4si guideline

internal control risk management board audit committee responsible ensuring suitable internal control system prevent detect fraud error designed implemented third party service provider company also responsible reviewing effectiveness control

analysis doe take account energy mix related electricity consumption ttv portfolio issuer

change chief financial officer plc board chair position clare bousfield stepped board effect 1 october 2021 transitioned role managing director retail saving business clare continued receive salary benefit reflective new role

consideration prioritisation climate risk also built decision making governance process requirement key strategic board risk assessment paper



Now we can create our bag of words

In [None]:
vectorizer = CountVectorizer(max_features=10000)
BOW = vectorizer.fit_transform(cleanedData)

we can split our set into train and test.

In [None]:
x_train = list()
x_test = list()
y_train = list()
y_test = list()
for label in [0, 1]:
  x_train,x_test,y_train,y_test = train_test_split(BOW, np.asarray(df["label"]),
                                                  train_size=250,
                                                  test_size=250,
                                                  random_state=42)

In [None]:
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

(250, 4596)
(250, 4596)
(250,)
(250,)


Support Vector Machine Classifier Modeling

In [None]:
start_time = time.time()

model = SVC()
model.fit(x_train,y_train)

end_time = time.time()
process_time = round(end_time-start_time,2)
print("Fitting SVC took {} seconds".format(process_time))

Fitting SVC took 0.02 seconds


In [None]:
predictions = model.predict(x_test)

In [None]:
print("Accuracy of model is {}%".format(accuracy_score(y_test,predictions) * 100))

Accuracy of model is 72.39999999999999%


# Hyperparameter tuning using Optuna

In [None]:
clf = SVC(gamma="auto")
param_distributions = {
        "C": optuna.distributions.FloatDistribution(1e-10, 1e10, log=True),
        "degree": optuna.distributions.IntDistribution(1, 5),
    }

optuna_search = optuna.integration.OptunaSearchCV(
        clf, param_distributions, n_trials=100, timeout=600, verbose=2
    )


optuna_search.fit(x_train,y_train)

print("Best trial:")
trial = optuna_search.study_.best_trial

print("  Value: ", trial.value)
print("  Params: ")
for key, value in trial.params.items():
  print("    {}: {}".format(key, value))

  optuna_search = optuna.integration.OptunaSearchCV(
[I 2024-04-30 03:04:30,401] A new study created in memory with name: no-name-b5c85214-ebc5-4197-bb32-8f0cafbd30d2
INFO:optuna_integration.sklearn:Searching the best hyperparameters using 250 samples...
[I 2024-04-30 03:04:30,472] Trial 0 finished with value: 0.752 and parameters: {'C': 3947007136.0451965, 'degree': 5}. Best is trial 0 with value: 0.752.
[I 2024-04-30 03:04:30,525] Trial 1 finished with value: 0.62 and parameters: {'C': 2.07183680508273e-05, 'degree': 3}. Best is trial 0 with value: 0.752.
[I 2024-04-30 03:04:30,586] Trial 2 finished with value: 0.62 and parameters: {'C': 19.03021573814445, 'degree': 1}. Best is trial 0 with value: 0.752.
[I 2024-04-30 03:04:30,644] Trial 3 finished with value: 0.62 and parameters: {'C': 0.00012035364110391863, 'degree': 1}. Best is trial 0 with value: 0.752.
[I 2024-04-30 03:04:30,705] Trial 4 finished with value: 0.752 and parameters: {'C': 3507275185.989393, 'degree': 2}. Best is t

Best trial:
  Value:  0.788
  Params: 
    C: 237.12419168900635
    degree: 5


In [None]:
trial.params

{'C': 237.12419168900635, 'degree': 5}

In [None]:
start_time = time.time()

model = SVC(gamma="auto", degree=trial.params['degree'], C=trial.params['C'])
model.fit(x_train,y_train)

end_time = time.time()
process_time = round(end_time-start_time,2)
print("Fitting SVC took {} seconds".format(process_time))

Fitting SVC took 0.02 seconds


In [None]:
trial.params['degree']

5

In [None]:
predictions = model.predict(x_test)

In [None]:
print("Accuracy of model is {}%".format(accuracy_score(y_test,predictions) * 100))

Accuracy of model is 82.0%


In [None]:
def evaluate(y_true, y_pred):
    # Calculate accuracy
    accuracy = accuracy_score(y_true=y_true, y_pred=y_pred)
    print(f'Accuracy: {accuracy:.3f}')

    # Generate accuracy report
    unique_labels = set(y_true)  # Get unique labels

    for label in unique_labels:
        label_indices = [i for i in range(len(y_true))
                         if y_true[i] == label]
        label_y_true = [y_true[i] for i in label_indices]
        label_y_pred = [y_pred[i] for i in label_indices]
        accuracy = accuracy_score(label_y_true, label_y_pred)
        print(f'Accuracy for label {label}: {accuracy:.3f}')

    # Generate classification report
    class_report = classification_report(y_true=y_true, y_pred=y_pred)
    print('\nClassification Report:')
    print(class_report)

    # Generate confusion matrix
    conf_matrix = confusion_matrix(y_true=y_true, y_pred=y_pred, labels=[0, 1])
    print('\nConfusion Matrix:')
    print(conf_matrix)

In [None]:
evaluate(y_true=y_test, y_pred=predictions)

Accuracy: 0.820
Accuracy for label 0: 0.855
Accuracy for label 1: 0.765

Classification Report:
              precision    recall  f1-score   support

           0       0.85      0.86      0.85       152
           1       0.77      0.77      0.77        98

    accuracy                           0.82       250
   macro avg       0.81      0.81      0.81       250
weighted avg       0.82      0.82      0.82       250


Confusion Matrix:
[[130  22]
 [ 23  75]]


# build model for XGBoost

In [None]:
start_time = time.time()

xgb_model = xgb.XGBClassifier()
xgb_model.fit(x_train,y_train)

end_time = time.time()
process_time = round(end_time-start_time,2)
print("Fitting XGBoost took {} seconds".format(process_time))

Fitting XGBoost took 1.64 seconds


In [None]:
predictions = model.predict(x_test)

In [None]:
print("Accuracy of model is {}%".format(accuracy_score(y_test,predictions) * 100))

Accuracy of model is 82.0%


# Hyperparameter tuning using Optuna

In [None]:
def objective(trial):
    dtrain = xgb.DMatrix(x_train, label=y_train)
    dvalid = xgb.DMatrix(x_test, label=y_test)

    param = {
        "verbosity": 0,
        "objective": "binary:logistic",
        "eval_metric": "auc",
        "booster": trial.suggest_categorical("booster", ["gbtree", "gblinear", "dart"]),
        "lambda": trial.suggest_float("lambda", 1e-8, 1.0, log=True),
        "alpha": trial.suggest_float("alpha", 1e-8, 1.0, log=True),
    }

    if param["booster"] == "gbtree" or param["booster"] == "dart":
        param["max_depth"] = trial.suggest_int("max_depth", 1, 9)
        param["eta"] = trial.suggest_float("eta", 1e-8, 1.0, log=True)
        param["gamma"] = trial.suggest_float("gamma", 1e-8, 1.0, log=True)
        param["grow_policy"] = trial.suggest_categorical("grow_policy", ["depthwise", "lossguide"])
    if param["booster"] == "dart":
        param["sample_type"] = trial.suggest_categorical("sample_type", ["uniform", "weighted"])
        param["normalize_type"] = trial.suggest_categorical("normalize_type", ["tree", "forest"])
        param["rate_drop"] = trial.suggest_float("rate_drop", 1e-8, 1.0, log=True)
        param["skip_drop"] = trial.suggest_float("skip_drop", 1e-8, 1.0, log=True)

    # Add a callback for pruning.
    pruning_callback = optuna.integration.XGBoostPruningCallback(trial, "validation-auc")
    bst = xgb.train(param, dtrain, evals=[(dvalid, "validation")], callbacks=[pruning_callback])
    preds = bst.predict(dvalid)
    pred_labels = np.rint(preds)
    accuracy = accuracy_score(y_test,predictions)
    return accuracy

In [None]:
study = optuna.create_study(
    pruner=optuna.pruners.MedianPruner(n_warmup_steps=5), direction="maximize"
    )
study.optimize(objective, n_trials=100)
print(study.best_trial)

[I 2024-04-30 03:04:39,592] A new study created in memory with name: no-name-befdab8d-9ce5-4a64-b799-139cdb42533a


[0]	validation-auc:0.72449
[1]	validation-auc:0.72983
[2]	validation-auc:0.73724
[3]	validation-auc:0.73755
[4]	validation-auc:0.73677
[5]	validation-auc:0.73724
[6]	validation-auc:0.73755
[7]	validation-auc:0.73681
[8]	validation-auc:0.73724
[9]	validation-auc:0.73694


[I 2024-04-30 03:04:42,828] Trial 0 finished with value: 0.82 and parameters: {'booster': 'dart', 'lambda': 3.152751444479729e-05, 'alpha': 0.44697926911065755, 'max_depth': 9, 'eta': 0.0003680399756490309, 'gamma': 1.59824440623675e-05, 'grow_policy': 'depthwise', 'sample_type': 'weighted', 'normalize_type': 'forest', 'rate_drop': 6.843493261342992e-06, 'skip_drop': 0.5219650333137934}. Best is trial 0 with value: 0.82.


[0]	validation-auc:0.68414
[1]	validation-auc:0.72503
[2]	validation-auc:0.72503
[3]	validation-auc:0.72503
[4]	validation-auc:0.72503
[5]	validation-auc:0.72503
[6]	validation-auc:0.72503
[7]	validation-auc:0.72503
[8]	validation-auc:0.72503
[9]	validation-auc:0.72503


[I 2024-04-30 03:04:42,919] Trial 1 finished with value: 0.82 and parameters: {'booster': 'dart', 'lambda': 0.005040833677881936, 'alpha': 0.01493042611697866, 'max_depth': 6, 'eta': 1.0982120182546449e-07, 'gamma': 0.004709906605828802, 'grow_policy': 'depthwise', 'sample_type': 'uniform', 'normalize_type': 'forest', 'rate_drop': 0.15744478199070983, 'skip_drop': 3.460852506624258e-08}. Best is trial 0 with value: 0.82.


[0]	validation-auc:0.85446
[1]	validation-auc:0.85419
[2]	validation-auc:0.85305
[3]	validation-auc:0.85311
[4]	validation-auc:0.85238
[5]	validation-auc:0.85191
[6]	validation-auc:0.85184
[7]	validation-auc:0.85177
[8]	validation-auc:0.85171
[9]	validation-auc:0.85157


[I 2024-04-30 03:04:42,960] Trial 2 finished with value: 0.82 and parameters: {'booster': 'gblinear', 'lambda': 0.3272670161789703, 'alpha': 4.174338681607259e-05}. Best is trial 0 with value: 0.82.


[0]	validation-auc:0.68411
[1]	validation-auc:0.68599
[2]	validation-auc:0.73842
[3]	validation-auc:0.76108
[4]	validation-auc:0.76047
[5]	validation-auc:0.76497
[6]	validation-auc:0.76437
[7]	validation-auc:0.80032
[8]	validation-auc:0.79817
[9]	validation-auc:0.80105


[I 2024-04-30 03:04:43,034] Trial 3 finished with value: 0.82 and parameters: {'booster': 'dart', 'lambda': 1.1334688499244033e-06, 'alpha': 1.2841461509658426e-07, 'max_depth': 5, 'eta': 0.10275859978696862, 'gamma': 2.8953502584518517e-08, 'grow_policy': 'depthwise', 'sample_type': 'weighted', 'normalize_type': 'tree', 'rate_drop': 6.9563296068818645e-06, 'skip_drop': 0.0017746281544608203}. Best is trial 0 with value: 0.82.


[0]	validation-auc:0.74084
[1]	validation-auc:0.74272
[2]	validation-auc:0.74084
[3]	validation-auc:0.74084
[4]	validation-auc:0.74084
[5]	validation-auc:0.74084
[6]	validation-auc:0.74272
[7]	validation-auc:0.74272
[8]	validation-auc:0.74292
[9]	validation-auc:0.74272


[I 2024-04-30 03:04:43,187] Trial 4 finished with value: 0.82 and parameters: {'booster': 'dart', 'lambda': 2.9596626982754617e-05, 'alpha': 5.11558645976455e-08, 'max_depth': 9, 'eta': 0.0008108604710768795, 'gamma': 1.3408188326053206e-05, 'grow_policy': 'lossguide', 'sample_type': 'uniform', 'normalize_type': 'tree', 'rate_drop': 2.808232520026879e-07, 'skip_drop': 1.353967222792462e-06}. Best is trial 0 with value: 0.82.


[0]	validation-auc:0.73782
[1]	validation-auc:0.73782
[2]	validation-auc:0.73782
[3]	validation-auc:0.73782
[4]	validation-auc:0.73782


[I 2024-04-30 03:04:43,240] Trial 5 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.50000
[1]	validation-auc:0.50000
[2]	validation-auc:0.50000
[3]	validation-auc:0.50000
[4]	validation-auc:0.50000


[I 2024-04-30 03:04:43,266] Trial 6 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.65669
[1]	validation-auc:0.65669
[2]	validation-auc:0.65669
[3]	validation-auc:0.65669
[4]	validation-auc:0.65669
[5]	validation-auc:0.65669


[I 2024-04-30 03:04:43,329] Trial 7 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.67844
[1]	validation-auc:0.67864
[2]	validation-auc:0.67864
[3]	validation-auc:0.67844
[4]	validation-auc:0.67844
[5]	validation-auc:0.67830


[I 2024-04-30 03:04:43,367] Trial 8 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.74043
[1]	validation-auc:0.73996
[2]	validation-auc:0.73936
[3]	validation-auc:0.76020
[4]	validation-auc:0.76557
[5]	validation-auc:0.76816
[6]	validation-auc:0.76876
[7]	validation-auc:0.76870
[8]	validation-auc:0.76870
[9]	validation-auc:0.77434


[I 2024-04-30 03:04:43,451] Trial 9 finished with value: 0.82 and parameters: {'booster': 'dart', 'lambda': 9.643953978584222e-05, 'alpha': 0.006243279460861, 'max_depth': 8, 'eta': 0.030035267701269817, 'gamma': 5.344773562746615e-05, 'grow_policy': 'lossguide', 'sample_type': 'uniform', 'normalize_type': 'forest', 'rate_drop': 5.908121434008727e-08, 'skip_drop': 0.008192732198307037}. Best is trial 0 with value: 0.82.


[0]	validation-auc:0.61587
[1]	validation-auc:0.61587
[2]	validation-auc:0.61587
[3]	validation-auc:0.61587
[4]	validation-auc:0.61587


[I 2024-04-30 03:04:43,543] Trial 10 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.50000
[1]	validation-auc:0.67052
[2]	validation-auc:0.68414
[3]	validation-auc:0.72194
[4]	validation-auc:0.72503


[I 2024-04-30 03:04:43,613] Trial 11 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.71241
[1]	validation-auc:0.71368
[2]	validation-auc:0.71368
[3]	validation-auc:0.71368
[4]	validation-auc:0.71368


[I 2024-04-30 03:04:43,691] Trial 12 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.67092
[1]	validation-auc:0.67092
[2]	validation-auc:0.67092
[3]	validation-auc:0.67092
[4]	validation-auc:0.67092


[I 2024-04-30 03:04:43,765] Trial 13 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.71848
[1]	validation-auc:0.71848
[2]	validation-auc:0.71848
[3]	validation-auc:0.71848
[4]	validation-auc:0.71848


[I 2024-04-30 03:04:43,844] Trial 14 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.50000
[1]	validation-auc:0.50000
[2]	validation-auc:0.50000
[3]	validation-auc:0.50000
[4]	validation-auc:0.59694
[5]	validation-auc:0.59694


[I 2024-04-30 03:04:43,955] Trial 15 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.71992
[1]	validation-auc:0.71992
[2]	validation-auc:0.71992
[3]	validation-auc:0.71992
[4]	validation-auc:0.71992
[5]	validation-auc:0.71992


[I 2024-04-30 03:04:44,031] Trial 16 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.72110
[1]	validation-auc:0.79911
[2]	validation-auc:0.81059
[3]	validation-auc:0.81837
[4]	validation-auc:0.81821
[5]	validation-auc:0.81764
[6]	validation-auc:0.82546
[7]	validation-auc:0.82828
[8]	validation-auc:0.82697
[9]	validation-auc:0.82848


[I 2024-04-30 03:04:44,120] Trial 17 finished with value: 0.82 and parameters: {'booster': 'dart', 'lambda': 8.05286246932395e-06, 'alpha': 0.00022336424904460892, 'max_depth': 7, 'eta': 0.70960962484363, 'gamma': 0.000642597930540459, 'grow_policy': 'lossguide', 'sample_type': 'weighted', 'normalize_type': 'tree', 'rate_drop': 0.004093133587824211, 'skip_drop': 0.007076741614720613}. Best is trial 0 with value: 0.82.


[0]	validation-auc:0.84529
[1]	validation-auc:0.81938
[2]	validation-auc:0.80246
[3]	validation-auc:0.79884
[4]	validation-auc:0.79971
[5]	validation-auc:0.80226
[6]	validation-auc:0.80293
[7]	validation-auc:0.80394
[8]	validation-auc:0.80448
[9]	validation-auc:0.80501


[I 2024-04-30 03:04:44,166] Trial 18 finished with value: 0.82 and parameters: {'booster': 'gblinear', 'lambda': 0.0008207403399171946, 'alpha': 0.010579677113028216}. Best is trial 0 with value: 0.82.


[0]	validation-auc:0.68411
[1]	validation-auc:0.68411
[2]	validation-auc:0.68411
[3]	validation-auc:0.68411
[4]	validation-auc:0.68411
[5]	validation-auc:0.68411


[I 2024-04-30 03:04:44,222] Trial 19 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.71852
[1]	validation-auc:0.71852
[2]	validation-auc:0.71852
[3]	validation-auc:0.71852
[4]	validation-auc:0.71852


[I 2024-04-30 03:04:44,289] Trial 20 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.84519
[1]	validation-auc:0.84526
[2]	validation-auc:0.84519
[3]	validation-auc:0.84519
[4]	validation-auc:0.84492
[5]	validation-auc:0.84486
[6]	validation-auc:0.84472
[7]	validation-auc:0.84479
[8]	validation-auc:0.84472
[9]	validation-auc:0.84466


[I 2024-04-30 03:04:44,339] Trial 21 finished with value: 0.82 and parameters: {'booster': 'gblinear', 'lambda': 0.8239233376126994, 'alpha': 1.7019801554701035e-05}. Best is trial 0 with value: 0.82.


[0]	validation-auc:0.84392
[1]	validation-auc:0.84432
[2]	validation-auc:0.84412
[3]	validation-auc:0.84378
[4]	validation-auc:0.84365
[5]	validation-auc:0.84365
[6]	validation-auc:0.84365
[7]	validation-auc:0.84358
[8]	validation-auc:0.84352
[9]	validation-auc:0.84345


[I 2024-04-30 03:04:44,391] Trial 22 finished with value: 0.82 and parameters: {'booster': 'gblinear', 'lambda': 0.9942836505712808, 'alpha': 1.0176941040652033e-06}. Best is trial 0 with value: 0.82.


[0]	validation-auc:0.86520
[1]	validation-auc:0.86258
[2]	validation-auc:0.86077
[3]	validation-auc:0.85929
[4]	validation-auc:0.85835
[5]	validation-auc:0.85761
[6]	validation-auc:0.85728
[7]	validation-auc:0.85714
[8]	validation-auc:0.85708
[9]	validation-auc:0.85694


[I 2024-04-30 03:04:44,442] Trial 23 finished with value: 0.82 and parameters: {'booster': 'gblinear', 'lambda': 0.19027928748246392, 'alpha': 2.816971239688775e-05}. Best is trial 0 with value: 0.82.


[0]	validation-auc:0.87742
[1]	validation-auc:0.87218
[2]	validation-auc:0.86970
[3]	validation-auc:0.86842
[4]	validation-auc:0.86674
[5]	validation-auc:0.86567
[6]	validation-auc:0.86520
[7]	validation-auc:0.86506
[8]	validation-auc:0.86459
[9]	validation-auc:0.86439


[I 2024-04-30 03:04:44,539] Trial 24 finished with value: 0.82 and parameters: {'booster': 'gblinear', 'lambda': 0.05077412886794015, 'alpha': 0.0009891712239899011}. Best is trial 0 with value: 0.82.


[0]	validation-auc:0.86849
[1]	validation-auc:0.86144
[2]	validation-auc:0.85654
[3]	validation-auc:0.85238
[4]	validation-auc:0.85023
[5]	validation-auc:0.84855
[6]	validation-auc:0.84741
[7]	validation-auc:0.84680
[8]	validation-auc:0.84613
[9]	validation-auc:0.84573


[I 2024-04-30 03:04:44,670] Trial 25 finished with value: 0.82 and parameters: {'booster': 'gblinear', 'lambda': 0.015749328366444673, 'alpha': 0.004368593045675765}. Best is trial 0 with value: 0.82.


[0]	validation-auc:0.67092
[1]	validation-auc:0.67092
[2]	validation-auc:0.67092
[3]	validation-auc:0.67092
[4]	validation-auc:0.67092
[5]	validation-auc:0.67092


[I 2024-04-30 03:04:44,738] Trial 26 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.74345
[1]	validation-auc:0.74359
[2]	validation-auc:0.74819
[3]	validation-auc:0.76282
[4]	validation-auc:0.76343
[5]	validation-auc:0.76618


[I 2024-04-30 03:04:44,813] Trial 27 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.72503
[1]	validation-auc:0.72691
[2]	validation-auc:0.72503
[3]	validation-auc:0.72691
[4]	validation-auc:0.72691
[5]	validation-auc:0.72691


[I 2024-04-30 03:04:44,873] Trial 28 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.85439
[1]	validation-auc:0.86473
[2]	validation-auc:0.86768
[3]	validation-auc:0.86889
[4]	validation-auc:0.86809
[5]	validation-auc:0.86835
[6]	validation-auc:0.86835
[7]	validation-auc:0.86882
[8]	validation-auc:0.86889
[9]	validation-auc:0.86822


[I 2024-04-30 03:04:44,946] Trial 29 finished with value: 0.82 and parameters: {'booster': 'gblinear', 'lambda': 0.0029226503727379245, 'alpha': 1.1338261290992499e-08}. Best is trial 0 with value: 0.82.


[0]	validation-auc:0.72083
[1]	validation-auc:0.72191
[2]	validation-auc:0.72003
[3]	validation-auc:0.72083
[4]	validation-auc:0.72191
[5]	validation-auc:0.72137


[I 2024-04-30 03:04:45,021] Trial 30 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.68599
[1]	validation-auc:0.76715
[2]	validation-auc:0.79971
[3]	validation-auc:0.81663
[4]	validation-auc:0.81804


[I 2024-04-30 03:04:45,092] Trial 31 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.67092
[1]	validation-auc:0.67092
[2]	validation-auc:0.68649
[3]	validation-auc:0.68649
[4]	validation-auc:0.68649


[I 2024-04-30 03:04:45,163] Trial 32 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.65615
[1]	validation-auc:0.67092
[2]	validation-auc:0.68690
[3]	validation-auc:0.68663
[4]	validation-auc:0.73137


[I 2024-04-30 03:04:45,233] Trial 33 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.68673
[1]	validation-auc:0.68673
[2]	validation-auc:0.68673
[3]	validation-auc:0.68673
[4]	validation-auc:0.68673
[5]	validation-auc:0.68673


[I 2024-04-30 03:04:45,306] Trial 34 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.72499
[1]	validation-auc:0.72499
[2]	validation-auc:0.72499
[3]	validation-auc:0.72499
[4]	validation-auc:0.72499


[I 2024-04-30 03:04:45,380] Trial 35 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.58882
[1]	validation-auc:0.58882
[2]	validation-auc:0.58882
[3]	validation-auc:0.58882
[4]	validation-auc:0.58882


[I 2024-04-30 03:04:45,426] Trial 36 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.65742
[1]	validation-auc:0.67092
[2]	validation-auc:0.68690
[3]	validation-auc:0.73771
[4]	validation-auc:0.76662


[I 2024-04-30 03:04:45,507] Trial 37 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.72113
[1]	validation-auc:0.72113
[2]	validation-auc:0.72113
[3]	validation-auc:0.72113
[4]	validation-auc:0.72113


[I 2024-04-30 03:04:45,582] Trial 38 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.67018
[1]	validation-auc:0.67018
[2]	validation-auc:0.67018
[3]	validation-auc:0.67018
[4]	validation-auc:0.67018
[5]	validation-auc:0.67018


[I 2024-04-30 03:04:45,650] Trial 39 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.50000
[1]	validation-auc:0.50000
[2]	validation-auc:0.50000
[3]	validation-auc:0.50000
[4]	validation-auc:0.50000


[I 2024-04-30 03:04:45,687] Trial 40 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.74084
[1]	validation-auc:0.74272
[2]	validation-auc:0.74084
[3]	validation-auc:0.74084
[4]	validation-auc:0.74272
[5]	validation-auc:0.74164


[I 2024-04-30 03:04:45,768] Trial 41 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.73916
[1]	validation-auc:0.74043
[2]	validation-auc:0.74043
[3]	validation-auc:0.74278
[4]	validation-auc:0.75050


[I 2024-04-30 03:04:45,870] Trial 42 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.74084
[1]	validation-auc:0.74164
[2]	validation-auc:0.74218
[3]	validation-auc:0.74272
[4]	validation-auc:0.74218


[I 2024-04-30 03:04:45,974] Trial 43 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.74050
[1]	validation-auc:0.74050
[2]	validation-auc:0.74050
[3]	validation-auc:0.74050
[4]	validation-auc:0.74050
[5]	validation-auc:0.74050


[I 2024-04-30 03:04:46,055] Trial 44 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.73788
[1]	validation-auc:0.73788
[2]	validation-auc:0.73788
[3]	validation-auc:0.73788
[4]	validation-auc:0.73788


[I 2024-04-30 03:04:46,128] Trial 45 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.72449
[1]	validation-auc:0.72983
[2]	validation-auc:0.73775
[3]	validation-auc:0.73768
[4]	validation-auc:0.73681
[5]	validation-auc:0.73775


[I 2024-04-30 03:04:46,194] Trial 46 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.65669
[1]	validation-auc:0.68414
[2]	validation-auc:0.73261
[3]	validation-auc:0.72543
[4]	validation-auc:0.72503


[I 2024-04-30 03:04:46,269] Trial 47 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.68673
[1]	validation-auc:0.68673
[2]	validation-auc:0.68673
[3]	validation-auc:0.68673
[4]	validation-auc:0.68673
[5]	validation-auc:0.68673


[I 2024-04-30 03:04:46,346] Trial 48 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.71697
[1]	validation-auc:0.73782
[2]	validation-auc:0.73782
[3]	validation-auc:0.73782
[4]	validation-auc:0.73782
[5]	validation-auc:0.74215


[I 2024-04-30 03:04:46,426] Trial 49 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.81519
[1]	validation-auc:0.82646
[2]	validation-auc:0.83895
[3]	validation-auc:0.84909
[4]	validation-auc:0.85372
[5]	validation-auc:0.85822
[6]	validation-auc:0.86003
[7]	validation-auc:0.86117
[8]	validation-auc:0.86258
[9]	validation-auc:0.86372


[I 2024-04-30 03:04:46,477] Trial 50 finished with value: 0.82 and parameters: {'booster': 'gblinear', 'lambda': 0.00018163686492351556, 'alpha': 8.57832488966833e-05}. Best is trial 0 with value: 0.82.


[0]	validation-auc:0.74345
[1]	validation-auc:0.74359
[2]	validation-auc:0.74819
[3]	validation-auc:0.76195
[4]	validation-auc:0.76510
[5]	validation-auc:0.76504


[I 2024-04-30 03:04:46,562] Trial 51 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.74050
[1]	validation-auc:0.74943
[2]	validation-auc:0.77867
[3]	validation-auc:0.78273
[4]	validation-auc:0.79817
[5]	validation-auc:0.82542


[I 2024-04-30 03:04:46,646] Trial 52 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.72761
[1]	validation-auc:0.72761
[2]	validation-auc:0.72761
[3]	validation-auc:0.72761
[4]	validation-auc:0.72761


[I 2024-04-30 03:04:46,724] Trial 53 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.72449
[1]	validation-auc:0.72973
[2]	validation-auc:0.73818
[3]	validation-auc:0.74980
[4]	validation-auc:0.74966


[I 2024-04-30 03:04:46,812] Trial 54 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.74104
[1]	validation-auc:0.74050
[2]	validation-auc:0.73923
[3]	validation-auc:0.73923
[4]	validation-auc:0.74050


[I 2024-04-30 03:04:46,880] Trial 55 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.50000
[1]	validation-auc:0.50000
[2]	validation-auc:0.50000
[3]	validation-auc:0.50000
[4]	validation-auc:0.50000


[I 2024-04-30 03:04:46,922] Trial 56 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.72110
[1]	validation-auc:0.75775
[2]	validation-auc:0.76668
[3]	validation-auc:0.78853
[4]	validation-auc:0.81804


[I 2024-04-30 03:04:47,016] Trial 57 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.59694
[1]	validation-auc:0.59694
[2]	validation-auc:0.59694
[3]	validation-auc:0.61715
[4]	validation-auc:0.61715
[5]	validation-auc:0.65742


[I 2024-04-30 03:04:47,089] Trial 58 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.87292
[1]	validation-auc:0.87426
[2]	validation-auc:0.87312
[3]	validation-auc:0.87272
[4]	validation-auc:0.87231
[5]	validation-auc:0.87191
[6]	validation-auc:0.87164
[7]	validation-auc:0.87171
[8]	validation-auc:0.87198
[9]	validation-auc:0.87178


[I 2024-04-30 03:04:47,157] Trial 59 finished with value: 0.82 and parameters: {'booster': 'gblinear', 'lambda': 0.009035552638386055, 'alpha': 3.976777511701934e-07}. Best is trial 0 with value: 0.82.


[0]	validation-auc:0.72765
[1]	validation-auc:0.72765
[2]	validation-auc:0.72765
[3]	validation-auc:0.72765
[4]	validation-auc:0.72765
[5]	validation-auc:0.72765


[I 2024-04-30 03:04:47,242] Trial 60 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.72110
[1]	validation-auc:0.77558
[2]	validation-auc:0.79622
[3]	validation-auc:0.80730
[4]	validation-auc:0.81267
[5]	validation-auc:0.82737


[I 2024-04-30 03:04:47,329] Trial 61 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.71848
[1]	validation-auc:0.72036
[2]	validation-auc:0.71848
[3]	validation-auc:0.71929
[4]	validation-auc:0.71848
[5]	validation-auc:0.71848


[I 2024-04-30 03:04:47,416] Trial 62 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.72761
[1]	validation-auc:0.76729
[2]	validation-auc:0.79401
[3]	validation-auc:0.81532
[4]	validation-auc:0.82710
[5]	validation-auc:0.81952


[I 2024-04-30 03:04:47,498] Trial 63 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.68545
[1]	validation-auc:0.68545
[2]	validation-auc:0.68673
[3]	validation-auc:0.68673
[4]	validation-auc:0.68673


[I 2024-04-30 03:04:47,589] Trial 64 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.50000
[1]	validation-auc:0.50000
[2]	validation-auc:0.50000
[3]	validation-auc:0.55377
[4]	validation-auc:0.71294


[I 2024-04-30 03:04:47,675] Trial 65 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.79961
[1]	validation-auc:0.79914
[2]	validation-auc:0.79995
[3]	validation-auc:0.79927
[4]	validation-auc:0.79974


[I 2024-04-30 03:04:47,714] Trial 66 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.74352
[1]	validation-auc:0.79679
[2]	validation-auc:0.82868
[3]	validation-auc:0.83576
[4]	validation-auc:0.83559


[I 2024-04-30 03:04:47,786] Trial 67 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.71848
[1]	validation-auc:0.71848
[2]	validation-auc:0.71848
[3]	validation-auc:0.71848
[4]	validation-auc:0.71848


[I 2024-04-30 03:04:47,868] Trial 68 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.72761
[1]	validation-auc:0.76363
[2]	validation-auc:0.76749
[3]	validation-auc:0.79797
[4]	validation-auc:0.79545


[I 2024-04-30 03:04:47,949] Trial 69 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.61715
[1]	validation-auc:0.61715
[2]	validation-auc:0.65501
[3]	validation-auc:0.67092
[4]	validation-auc:0.66850
[5]	validation-auc:0.68676


[I 2024-04-30 03:04:48,035] Trial 70 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.86288
[1]	validation-auc:0.85959
[2]	validation-auc:0.85604
[3]	validation-auc:0.85107
[4]	validation-auc:0.84865
[5]	validation-auc:0.84368
[6]	validation-auc:0.84274
[7]	validation-auc:0.84140
[8]	validation-auc:0.84026
[9]	validation-auc:0.84019


[I 2024-04-30 03:04:48,107] Trial 71 finished with value: 0.82 and parameters: {'booster': 'gblinear', 'lambda': 1.9323577649955468e-05, 'alpha': 0.006171831291701405}. Best is trial 0 with value: 0.82.


[0]	validation-auc:0.69525
[1]	validation-auc:0.69613
[2]	validation-auc:0.68935
[3]	validation-auc:0.68908
[4]	validation-auc:0.68760
[5]	validation-auc:0.68753


[I 2024-04-30 03:04:48,156] Trial 72 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.86701
[1]	validation-auc:0.87171
[2]	validation-auc:0.87715
[3]	validation-auc:0.87889
[4]	validation-auc:0.87809
[5]	validation-auc:0.87654
[6]	validation-auc:0.87460
[7]	validation-auc:0.87211
[8]	validation-auc:0.87084
[9]	validation-auc:0.86970


[I 2024-04-30 03:04:48,219] Trial 73 finished with value: 0.82 and parameters: {'booster': 'gblinear', 'lambda': 0.00038373633163331155, 'alpha': 0.002568680062603379}. Best is trial 0 with value: 0.82.


[0]	validation-auc:0.86117
[1]	validation-auc:0.86661
[2]	validation-auc:0.86943
[3]	validation-auc:0.87064
[4]	validation-auc:0.87117
[5]	validation-auc:0.87044
[6]	validation-auc:0.87017
[7]	validation-auc:0.86990
[8]	validation-auc:0.87037
[9]	validation-auc:0.87023


[I 2024-04-30 03:04:48,296] Trial 74 finished with value: 0.82 and parameters: {'booster': 'gblinear', 'lambda': 0.003283620845390121, 'alpha': 0.00018432769659024542}. Best is trial 0 with value: 0.82.


[0]	validation-auc:0.50000
[1]	validation-auc:0.50000
[2]	validation-auc:0.50000
[3]	validation-auc:0.50000
[4]	validation-auc:0.50000


[I 2024-04-30 03:04:48,356] Trial 75 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.72258
[1]	validation-auc:0.72258
[2]	validation-auc:0.72258
[3]	validation-auc:0.72258
[4]	validation-auc:0.72258


[I 2024-04-30 03:04:48,438] Trial 76 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.50000
[1]	validation-auc:0.50000
[2]	validation-auc:0.50000
[3]	validation-auc:0.50000
[4]	validation-auc:0.50000


[I 2024-04-30 03:04:48,470] Trial 77 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.74406
[1]	validation-auc:0.74090
[2]	validation-auc:0.74225
[3]	validation-auc:0.74406
[4]	validation-auc:0.74352


[I 2024-04-30 03:04:48,559] Trial 78 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.68673
[1]	validation-auc:0.68673
[2]	validation-auc:0.68673
[3]	validation-auc:0.72761
[4]	validation-auc:0.72761


[I 2024-04-30 03:04:48,622] Trial 79 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.72113
[1]	validation-auc:0.72113
[2]	validation-auc:0.72113
[3]	validation-auc:0.72113
[4]	validation-auc:0.72113
[5]	validation-auc:0.72113


[I 2024-04-30 03:04:48,702] Trial 80 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.84714
[1]	validation-auc:0.84707
[2]	validation-auc:0.84694
[3]	validation-auc:0.84680
[4]	validation-auc:0.84680
[5]	validation-auc:0.84627
[6]	validation-auc:0.84607
[7]	validation-auc:0.84600
[8]	validation-auc:0.84600
[9]	validation-auc:0.84586


[I 2024-04-30 03:04:48,764] Trial 81 finished with value: 0.82 and parameters: {'booster': 'gblinear', 'lambda': 0.6664717141316236, 'alpha': 1.5477916949492157e-05}. Best is trial 0 with value: 0.82.


[0]	validation-auc:0.50000
[1]	validation-auc:0.50000
[2]	validation-auc:0.50000
[3]	validation-auc:0.50000
[4]	validation-auc:0.50000


[I 2024-04-30 03:04:48,811] Trial 82 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.87634
[1]	validation-auc:0.87359
[2]	validation-auc:0.87104
[3]	validation-auc:0.86929
[4]	validation-auc:0.86755
[5]	validation-auc:0.86654
[6]	validation-auc:0.86567
[7]	validation-auc:0.86527
[8]	validation-auc:0.86506
[9]	validation-auc:0.86486


[I 2024-04-30 03:04:48,881] Trial 83 finished with value: 0.82 and parameters: {'booster': 'gblinear', 'lambda': 0.08692313784943463, 'alpha': 4.433207903628739e-08}. Best is trial 0 with value: 0.82.


[0]	validation-auc:0.85734
[1]	validation-auc:0.85627
[2]	validation-auc:0.85546
[3]	validation-auc:0.85452
[4]	validation-auc:0.85345
[5]	validation-auc:0.85311
[6]	validation-auc:0.85285
[7]	validation-auc:0.85244
[8]	validation-auc:0.85238
[9]	validation-auc:0.85238


[I 2024-04-30 03:04:48,954] Trial 84 finished with value: 0.82 and parameters: {'booster': 'gblinear', 'lambda': 0.28113253176297154, 'alpha': 5.631843910753335e-05}. Best is trial 0 with value: 0.82.


[0]	validation-auc:0.80726
[1]	validation-auc:0.81572
[2]	validation-auc:0.82579
[3]	validation-auc:0.83774
[4]	validation-auc:0.84586
[5]	validation-auc:0.85036
[6]	validation-auc:0.85426
[7]	validation-auc:0.85560
[8]	validation-auc:0.85768
[9]	validation-auc:0.85862


[I 2024-04-30 03:04:49,011] Trial 85 finished with value: 0.82 and parameters: {'booster': 'gblinear', 'lambda': 0.00011803879980619489, 'alpha': 1.9887551153639852e-05}. Best is trial 0 with value: 0.82.


[0]	validation-auc:0.72765
[1]	validation-auc:0.72765
[2]	validation-auc:0.72765
[3]	validation-auc:0.72765
[4]	validation-auc:0.72765
[5]	validation-auc:0.72765


[I 2024-04-30 03:04:49,103] Trial 86 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.67018
[1]	validation-auc:0.67018
[2]	validation-auc:0.67018
[3]	validation-auc:0.66964
[4]	validation-auc:0.66964
[5]	validation-auc:0.66964


[I 2024-04-30 03:04:49,190] Trial 87 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.74345
[1]	validation-auc:0.74359
[2]	validation-auc:0.74278
[3]	validation-auc:0.75087
[4]	validation-auc:0.75772
[5]	validation-auc:0.76269


[I 2024-04-30 03:04:49,279] Trial 88 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.80303
[1]	validation-auc:0.80055
[2]	validation-auc:0.79706
[3]	validation-auc:0.79934
[4]	validation-auc:0.80498


[I 2024-04-30 03:04:49,323] Trial 89 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.68411
[1]	validation-auc:0.68411
[2]	validation-auc:0.68411
[3]	validation-auc:0.68492
[4]	validation-auc:0.68599


[I 2024-04-30 03:04:49,405] Trial 90 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.85070
[1]	validation-auc:0.85030
[2]	validation-auc:0.84989
[3]	validation-auc:0.84909
[4]	validation-auc:0.84848
[5]	validation-auc:0.84801
[6]	validation-auc:0.84788
[7]	validation-auc:0.84795
[8]	validation-auc:0.84795
[9]	validation-auc:0.84795


[I 2024-04-30 03:04:49,468] Trial 91 finished with value: 0.82 and parameters: {'booster': 'gblinear', 'lambda': 0.4854592744133791, 'alpha': 6.910230479483492e-08}. Best is trial 0 with value: 0.82.


[0]	validation-auc:0.84533
[1]	validation-auc:0.84513
[2]	validation-auc:0.84486
[3]	validation-auc:0.84472
[4]	validation-auc:0.84466
[5]	validation-auc:0.84459


[I 2024-04-30 03:04:49,517] Trial 92 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.86151
[1]	validation-auc:0.86050
[2]	validation-auc:0.85889
[3]	validation-auc:0.85741
[4]	validation-auc:0.85687
[5]	validation-auc:0.85647
[6]	validation-auc:0.85580
[7]	validation-auc:0.85560
[8]	validation-auc:0.85560
[9]	validation-auc:0.85526


[I 2024-04-30 03:04:49,588] Trial 93 finished with value: 0.82 and parameters: {'booster': 'gblinear', 'lambda': 0.21557794327396407, 'alpha': 6.759931053899283e-06}. Best is trial 0 with value: 0.82.


[0]	validation-auc:0.87963
[1]	validation-auc:0.87567
[2]	validation-auc:0.87419
[3]	validation-auc:0.87178
[4]	validation-auc:0.87057
[5]	validation-auc:0.86970
[6]	validation-auc:0.86963
[7]	validation-auc:0.86903
[8]	validation-auc:0.86896
[9]	validation-auc:0.86896


[I 2024-04-30 03:04:49,649] Trial 94 finished with value: 0.82 and parameters: {'booster': 'gblinear', 'lambda': 0.06270861122957729, 'alpha': 2.1353096984872895e-06}. Best is trial 0 with value: 0.82.


[0]	validation-auc:0.72258
[1]	validation-auc:0.75366
[2]	validation-auc:0.77098
[3]	validation-auc:0.79602
[4]	validation-auc:0.81502


[I 2024-04-30 03:04:49,735] Trial 95 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.72449
[1]	validation-auc:0.72979
[2]	validation-auc:0.73724
[3]	validation-auc:0.73755
[4]	validation-auc:0.73677


[I 2024-04-30 03:04:49,824] Trial 96 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.88118
[1]	validation-auc:0.87829
[2]	validation-auc:0.87681
[3]	validation-auc:0.87621
[4]	validation-auc:0.87487
[5]	validation-auc:0.87399
[6]	validation-auc:0.87372
[7]	validation-auc:0.87325
[8]	validation-auc:0.87305
[9]	validation-auc:0.87299


[I 2024-04-30 03:04:49,892] Trial 97 finished with value: 0.82 and parameters: {'booster': 'gblinear', 'lambda': 0.02851648201253854, 'alpha': 2.575348921149522e-08}. Best is trial 0 with value: 0.82.


[0]	validation-auc:0.72765
[1]	validation-auc:0.72765
[2]	validation-auc:0.76004
[3]	validation-auc:0.75849
[4]	validation-auc:0.76054
[5]	validation-auc:0.76994


[I 2024-04-30 03:04:49,977] Trial 98 pruned. Trial was pruned at iteration 5.


[0]	validation-auc:0.73782
[1]	validation-auc:0.73862
[2]	validation-auc:0.73862
[3]	validation-auc:0.73970
[4]	validation-auc:0.73916
[5]	validation-auc:0.73916


[I 2024-04-30 03:04:50,096] Trial 99 pruned. Trial was pruned at iteration 5.


FrozenTrial(number=0, state=TrialState.COMPLETE, values=[0.82], datetime_start=datetime.datetime(2024, 4, 30, 3, 4, 39, 593677), datetime_complete=datetime.datetime(2024, 4, 30, 3, 4, 42, 827969), params={'booster': 'dart', 'lambda': 3.152751444479729e-05, 'alpha': 0.44697926911065755, 'max_depth': 9, 'eta': 0.0003680399756490309, 'gamma': 1.59824440623675e-05, 'grow_policy': 'depthwise', 'sample_type': 'weighted', 'normalize_type': 'forest', 'rate_drop': 6.843493261342992e-06, 'skip_drop': 0.5219650333137934}, user_attrs={}, system_attrs={}, intermediate_values={0: 0.7244897959183674, 1: 0.7298267991407089, 2: 0.7372448979591837, 3: 0.737546992481203, 4: 0.7367749731471536, 5: 0.7372448979591837, 6: 0.737546992481203, 7: 0.7368085392051558, 8: 0.7372448979591837, 9: 0.7369428034371643}, distributions={'booster': CategoricalDistribution(choices=('gbtree', 'gblinear', 'dart')), 'lambda': FloatDistribution(high=1.0, log=True, low=1e-08, step=None), 'alpha': FloatDistribution(high=1.0, lo

In [None]:
print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)
print("  Params: ")
for key, value in trial.params.items():
  print("    {}: {}".format(key, value))

Best trial:
  Value:  0.82
  Params: 
    booster: dart
    lambda: 3.152751444479729e-05
    alpha: 0.44697926911065755
    max_depth: 9
    eta: 0.0003680399756490309
    gamma: 1.59824440623675e-05
    grow_policy: depthwise
    sample_type: weighted
    normalize_type: forest
    rate_drop: 6.843493261342992e-06
    skip_drop: 0.5219650333137934


In [None]:
start_time = time.time()

xgb_model = xgb.XGBClassifier(**trial.params)
xgb_model.fit(x_train,y_train)

end_time = time.time()
process_time = round(end_time-start_time,2)
print("Fitting XGBoost took {} seconds".format(process_time))

Fitting XGBoost took 24.25 seconds


In [None]:
predictions = model.predict(x_test)

In [None]:
print("Accuracy of model is {}%".format(accuracy_score(y_test,predictions) * 100))

Accuracy of model is 82.0%


In [None]:
evaluate(y_true=y_test, y_pred=predictions)

Accuracy: 0.820
Accuracy for label 0: 0.855
Accuracy for label 1: 0.765

Classification Report:
              precision    recall  f1-score   support

           0       0.85      0.86      0.85       152
           1       0.77      0.77      0.77        98

    accuracy                           0.82       250
   macro avg       0.81      0.81      0.81       250
weighted avg       0.82      0.82      0.82       250


Confusion Matrix:
[[130  22]
 [ 23  75]]
