#### **Import Libs**

In [1]:
import pandas as pd
import numpy as np

from keras import Sequential
from keras.layers import Dense, Dropout
from keras.callbacks import EarlyStopping
from keras.optimizers import SGD, Adam

import optuna

from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
import joblib

  from .autonotebook import tqdm as notebook_tqdm


#### **Getting training && validation data**

In [2]:
df_train = pd.read_csv('../databases/training.csv')
df_valid = pd.read_csv('../databases/validation.csv')

In [3]:
X_train, y_train = df_train.drop(['IND_BOM_1_1'], axis=1), df_train['IND_BOM_1_1']
X_val, y_val = df_valid.drop(['IND_BOM_1_1'], axis=1), df_valid['IND_BOM_1_1']

In [4]:
X_train = np.array(X_train)
X_val = np.array(X_val)

#### **Parameters selection**

In [5]:
def objective(trial):
    model = DecisionTreeClassifier(
        max_depth=trial.suggest_int('max_depth', 2, 10),
        min_samples_split=trial.suggest_int('min_samples_split', 2, 10),
        min_samples_leaf=trial.suggest_int('min_samples_leaf', 1, 5),
        criterion=trial.suggest_categorical('criterion', ['gini', 'entropy']),
        splitter=trial.suggest_categorical('splitter', ['best', 'random'])
    )

    model.fit(X_train, y_train)

    y_pred = model.predict(X_val)

    accuracy = accuracy_score(y_val, y_pred)

    return 1 - accuracy

In [6]:
n_trials = 150

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=n_trials)

[32m[I 2023-04-16 14:33:09,830][0m A new study created in memory with name: no-name-1db9c78b-fcfb-42e1-841a-d384cef86341[0m
[32m[I 2023-04-16 14:33:35,937][0m Trial 0 finished with value: 0.3322321087716207 and parameters: {'max_depth': 8, 'min_samples_split': 9, 'min_samples_leaf': 5, 'criterion': 'entropy', 'splitter': 'best'}. Best is trial 0 with value: 0.3322321087716207.[0m
[32m[I 2023-04-16 14:33:52,204][0m Trial 1 finished with value: 0.34402255307902385 and parameters: {'max_depth': 5, 'min_samples_split': 9, 'min_samples_leaf': 4, 'criterion': 'entropy', 'splitter': 'best'}. Best is trial 0 with value: 0.3322321087716207.[0m
[32m[I 2023-04-16 14:34:17,823][0m Trial 2 finished with value: 0.32988282970663374 and parameters: {'max_depth': 9, 'min_samples_split': 9, 'min_samples_leaf': 5, 'criterion': 'gini', 'splitter': 'best'}. Best is trial 2 with value: 0.32988282970663374.[0m
[32m[I 2023-04-16 14:34:29,687][0m Trial 3 finished with value: 0.33430241094764046 a

#### **Saving study**

In [7]:
save_path = './optuna_studies/decision_tree_study.pkl'

joblib.dump(study, save_path)

['./optuna_studies/decision_tree_study.pkl']