In [None]:
# !python -m pip install lightning
# !pip install optuna-integration
# !pip install optuna-distributed
# %pip install -U optuna

In [1]:
import time
import os
import pickle
from pathlib import Path
import json
from typing import List, Optional
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tqdm as notebook_tqdm

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split

import optuna
from optuna.visualization.matplotlib import (plot_param_importances, plot_contour, plot_optimization_history,
                                            plot_slice, plot_parallel_coordinate, plot_rank)
from optuna.integration import PyTorchLightningPruningCallback
from optuna.pruners import MedianPruner
from optuna.samplers import TPESampler

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
SEED = 440

DIR = os.getcwd()
TRAIN_FEATURES_PATH = DIR + "/data" + "/train_features_X4juyT6.csv"
TRAIN_LABELS_PATH = DIR + "/data" + "/train_labels_JxtENGl.csv"
RF_TUNING_DIR = DIR + "/tuning" + "/random_forest"

# encoded paths
DBERTA_CME_PATH = DIR + "/data" + "/cme_deberta_enc.npy"
GTE_CME_PATH = DIR + "/data" + "/cme_gte_enc.npy"
GTE_LE_PATH = DIR + "/data" + "/le_gte_enc.npy"

In [3]:
train_features_df = pd.read_csv(TRAIN_FEATURES_PATH)
train_labels_df = pd.read_csv(TRAIN_LABELS_PATH)

#dbert_cme_features = np.load(DBERTA_CME_PATH)

In [None]:
print("DeBERTa encoding has shape:", dbert_cme_features.shape)
print("The memory size of numpy array arr is:",dbert_cme_features.itemsize*dbert_cme_features.size,"bytes")

#### GTE CME Load

In [None]:
gte_cme_features = np.load(GTE_CME_PATH)

# add uid to encoded array as df
gte_cme_features_df = pd.DataFrame(gte_cme_features)
gte_cme_features_df["uid"] = train_labels_df["uid"].values

#### GTE LE Load

In [4]:
gte_le_features = np.load(GTE_LE_PATH)

# add uid to encoded array as df
gte_le_features_df = pd.DataFrame(gte_le_features)
gte_le_features_df["uid"] = train_labels_df["uid"].values

In [5]:
def save_study_charts(study: optuna.study,
                      study_name: str,
                      dir: str) -> None:

    # save charts of trials
    fig = plt.figure(figsize=(8,8))
    plot_optimization_history(study)
    plt.savefig(f"{dir}/{study_name}_opt_hist.png")

    plt.clf()
    fig = plt.figure(figsize=(12,12))
    plot_parallel_coordinate(study)
    plt.savefig(f"{dir}/{study_name}_parallel_coord.png")

    plt.clf()
    fig = plt.figure(figsize=(12,12))
    plot_param_importances(study)
    plt.savefig(f"{dir}/{study_name}_param_importances.png")

    plt.clf()
    fig = plt.figure(figsize=(12,12))
    plot_rank(study)
    plt.savefig(f"{dir}/{study_name}_rank.png")

    plt.clf()
    fig = plt.figure()
    plot_slice(study)
    plt.savefig(f"{dir}/{study_name}_slice.png")

In [6]:
# add uid to encoded array as df
dbert_cme_features_df = pd.DataFrame(dbert_cme_features)
dbert_cme_features_df["uid"] = train_labels_df["uid"].values

NameError: name 'dbert_cme_features' is not defined

In [7]:
def split_data(text_df, 
               labels_df,
               #features,
               test_perc=0.15, 
               val_perc=0.15, 
               test_seed=42, 
               val_seed=74) -> tuple[pd.DataFrame]:
    
    val_size = int(len(text_df) * val_perc)
    X_tr, X_te, y_tr, y_te = train_test_split(text_df, labels_df, test_size=test_perc, shuffle=True, 
                                             random_state=test_seed)
    #val_perc = val_size // len(X_tr)
    X_tr, X_val, y_tr, y_val = train_test_split(X_tr, y_tr, test_size=val_size, shuffle=True,
                                               random_state=val_seed)
    
    #y_tr_sub, y_val_sub, y_te_sub = y_tr[features], y_val[features], y_te[features]
    
    return X_tr, X_val, X_te, y_tr, y_val, y_te

In [None]:
X_train, X_val, X_test, y_train, y_val, y_test = split_data(dbert_cme_features_df, train_labels_df)

In [None]:
# scaling
dbert_cme_scaler = MinMaxScaler()

X_train.drop("uid", axis=1, inplace=True)
X_val.drop("uid", axis=1, inplace=True)
X_test.drop("uid", axis=1, inplace=True)

dbert_cme_scaler.fit(X_train)

# leaves as dataframes
dbert_cme_train = dbert_cme_scaler.transform(X_train.to_numpy())
dbert_cme_val = dbert_cme_scaler.transform(X_val.to_numpy())
dbert_cme_test = dbert_cme_scaler.transform(X_test.to_numpy())

#### GTE CME Encoded Data Split

In [None]:
X_train, X_val, X_test, y_train, y_val, y_test = split_data(gte_cme_features_df, train_labels_df)

In [None]:
# scaling
gte_cme_scaler = MinMaxScaler()

X_train.drop("uid", axis=1, inplace=True)
X_val.drop("uid", axis=1, inplace=True)
X_test.drop("uid", axis=1, inplace=True)

gte_cme_scaler.fit(X_train)

# leaves as dataframes
gte_cme_train = gte_cme_scaler.transform(X_train.to_numpy())
gte_cme_val = gte_cme_scaler.transform(X_val.to_numpy())
gte_cme_test = gte_cme_scaler.transform(X_test.to_numpy())

#### GTE LE Encoded Data Split

In [8]:
X_train, X_val, X_test, y_train, y_val, y_test = split_data(gte_le_features_df, train_labels_df)

In [9]:
# scaling
gte_le_scaler = MinMaxScaler()

X_train.drop("uid", axis=1, inplace=True)
X_val.drop("uid", axis=1, inplace=True)
X_test.drop("uid", axis=1, inplace=True)

gte_le_scaler.fit(X_train)

# leaves as dataframes
gte_le_train = gte_le_scaler.transform(X_train.to_numpy())
gte_le_val = gte_le_scaler.transform(X_val.to_numpy())
gte_le_test = gte_le_scaler.transform(X_test.to_numpy())

# Tree Methods

In [10]:
from sklearn.multioutput import MultiOutputClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.multiclass import OneVsRestClassifier

import xgboost as xgb

# set average="binary"/"micro"
from sklearn.metrics import f1_score

In [None]:
# will have to fit separately for location and weapon each separately
y_train_binary, y_val_binary, y_test_binary = y_train.iloc[:, :-2], y_val.iloc[:, :-2], y_test.iloc[:, :-2]

In [11]:
def multi_f1_binary(preds: np.ndarray, actuals: np.ndarray) -> float:
    
    preds, actuals = preds.T, actuals.T # (tasks, samples)
    per_task_f1 = []
    for p, a in zip(preds, actuals):
        per_task_f1.append(f1_score(a, p, average="binary"))
    
    return np.sum(per_task_f1)

def multi_var_f1(binary_preds: np.ndarray, 
                 binary_actuals: np.ndarray,
                 weapon_preds: np.ndarray, 
                 weapon_actuals: np.ndarray,
                 location_preds: np.ndarray,
                 loaction_actuals: np.ndarray) -> float:
    
    num_tasks = binary_preds.shape[-1] + 2
    
    weapon_micro_f1 = f1_score(weapon_actuals, weapon_preds, average="micro")
    location_micro_f1 = f1_score(location_actuals, location_preds, average="micro")
    binary_f1 = multi_f1_binary(binary_preds, binary_actuals)
    
    return (weapon_micro_f1 + location_micro_f1 + binary_f1) / num_tasks

In [12]:
def sample_rf_params(trial: optuna.trial) -> dict:
    
    n_estimators = trial.suggest_int("n_estimators", low=700, high=1600, step=100)
    max_depth = trial.suggest_int("max_depth", low=1, high=60, step=1)
    min_samples_leaf = trial.suggest_int("min_samples_leaf", low=1, high=4, step=1)
    
    return {"n_estimators": n_estimators, "max_depth": max_depth, "min_samples_leaf": min_samples_leaf}

def sample_xgb_params(trial: optuna.trial) -> dict:
    
    min_child_weight = trial.suggest_int("min_child_weight", low=1, high=10, step=1)
    subsample = trial.suggest_float("subsample", 0.5, 0.9, step=0.1)
    colsample_bytree = trial.suggest_float("colsample_bytree", 0.5, 0.9, step=0.1)
    eta = trial.suggest_float("eta", 0.01, 0.2, log=True)
    gamma = trial.suggest_int("gamma", 0, 10, 1)
    lamda = trial.suggest_float("lambda", 1, 20, log=True)
    alpha = trial.suggest_float("alpha", 1, 20, log=True)
    max_depth = trial.suggest_int("max_depth", 1, 32, step=1)
    
    return {
        "min_child_weight": min_child_weight,
        "subsample": subsample,
        "colsample_bytree": colsample_bytree,
        "eta": eta,
        "gamma": gamma,
        "lambda": lamda,
        "alpha": alpha,
        "max_depth": max_depth
        }

def TreeObjective(trial: optuna.trial,
                  seed: int,
                  X_train: np.ndarray,
                  X_val: np.ndarray,
                  y_train: np.ndarray,
                  y_val: np.ndarray,
                  algo) -> float:
    
    params = sample_rf_params(trial)
    
    if algo.__name__ == "RandomForestClassifier":
        model = MultiOutputClassifier(algo(**params, random_state=seed)).fit(X_train, y_train)
    else:
        model = algo(**params, multi_strategy="multi_output_tree", seed=seed).fit(X_train, y_train)
    
    preds = model.predict(X_val)
    
    return multi_f1_binary(preds=preds, actuals=y_val) / y_train.shape[-1]

## Random Forest

### Tuning Multi-Output w/ All Binaries

In [None]:
# study identifiers
study_name = "rf_study2"
storage_name = "sqlite:///{}.db".format(study_name)

# init classifier
clf = RandomForestClassifier

# data to numpy
y_train = y_train_binary.drop("uid", axis=1).to_numpy()
y_val = y_val_binary.drop("uid", axis=1).to_numpy()

# tuning directory for model
if not os.path.exists(RF_TUNING_DIR):
    os.makedirs(RF_TUNING_DIR)

# check for a saved sampler
try:
    sampler = pickle.load(open(f"{RF_TUNING_DIR}/{study_name}_sampler.pkl", "rb"))
except (OSError, IOError) as e:
    sampler = TPESampler(seed=SEED)

pruner = MedianPruner()
    
# create study and optimize
study = optuna.create_study(study_name=study_name, storage=storage_name,
                                sampler=sampler, pruner=pruner, direction="maximize",
                                load_if_exists=True)

study.optimize(lambda trial: TreeObjective(trial, seed=SEED, 
                                           X_train=dbert_cme_train, X_val=dbert_cme_val,
                                           y_train=y_train, y_val=y_val, algo=clf), 
               n_trials=100, timeout=1500)

print("Number of finished trials: ", len(study.trials))
print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))


with open(f"{RF_TUNING_DIR}/{study_name}_sampler.pkl", "wb") as fout:
    pickle.dump(study.sampler, fout)

# saving charts from study
try:
    save_study_charts(study=study, study_name=study_name, dir=RF_TUNING_DIR)
except Exception as e:
    print(f"Error when trying to save charts: {e}")

### Tuning Multi-Output Subset Binaries
'DepressedMood', 'MentalIllnessTreatmentCurrnt','HistoryMentalIllnessTreatmnt', 'SuicideAttemptHistory', 'SuicideThoughtHistory'

In [None]:
feat_names = ['DepressedMood','MentalIllnessTreatmentCurrnt','HistoryMentalIllnessTreatmnt', 'SuicideAttemptHistory','SuicideThoughtHistory']
y_train_sub5 = y_train[feat_names].to_numpy()
y_val_sub5 = y_val[feat_names].to_numpy()
y_test_sub5 = y_test[feat_names].to_numpy()

In [None]:
# study identifiers
study_name = "rf_study_first5_binary_features"
#storage_name = "sqlite:///{}.db".format(study_name)
storage_name = optuna.storages.JournalStorage(
    optuna.storages.journal.JournalFileBackend("./optuna_journal_storage_rf_first5feats.log")
)

# init classifier
clf = RandomForestClassifier

# tuning directory for model
if not os.path.exists(RF_TUNING_DIR):
    os.makedirs(RF_TUNING_DIR)

# check for a saved sampler
try:
    sampler = pickle.load(open(f"{RF_TUNING_DIR}/{study_name}_sampler.pkl", "rb"))
except (OSError, IOError) as e:
    sampler = TPESampler(seed=SEED)

pruner = MedianPruner()
    
# create study and optimize
study = optuna.create_study(study_name=study_name, storage=storage_name,
                                sampler=sampler, pruner=pruner, direction="maximize",
                                load_if_exists=True)


study.optimize(lambda trial: TreeObjective(trial, seed=SEED, 
                                           X_train=dbert_cme_train, X_val=dbert_cme_val,
                                           y_train=y_train_sub5, y_val=y_val_sub5, algo=clf), 
               n_trials=55, timeout=1500)

print("Number of finished trials: ", len(study.trials))
print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))


with open(f"{RF_TUNING_DIR}/{study_name}_sampler.pkl", "wb") as fout:
    pickle.dump(study.sampler, fout)

# saving charts from study
try:
    save_study_charts(study=study, study_name=study_name, dir=RF_TUNING_DIR)
except Exception as e:
    print(f"Error when trying to save charts: {e}")

### Tuning RF for Weapons Only

In [13]:
def MicroTreeObjective(trial: optuna.trial,
                  seed: int,
                  X_train: np.ndarray,
                  X_val: np.ndarray,
                  y_train: np.ndarray,
                  y_val: np.ndarray,
                  algo) -> float:
    
    params = sample_rf_params(trial)
    model = OneVsRestClassifier(algo(**params, random_state=seed)).fit(X_train, y_train)
    preds = model.predict(X_val)
    
    return f1_score(y_val, preds, average='micro')

In [14]:
feat_names = ['WeaponType1']
y_train_weapon = y_train[feat_names].to_numpy()
y_val_weapon = y_val[feat_names].to_numpy()
y_test_weapon = y_test[feat_names].to_numpy()

In [None]:
# study identifiers
study_name = "rf_study_weaponOnly"
#storage_name = "sqlite:///{}.db".format(study_name)
storage_name = optuna.storages.JournalStorage(
    optuna.storages.journal.JournalFileBackend("./optuna_journal_storage_rf_weaponOnly.log")
)

# init classifier
clf = RandomForestClassifier

# tuning directory for model
if not os.path.exists(RF_TUNING_DIR):
    os.makedirs(RF_TUNING_DIR)

# check for a saved sampler
try:
    sampler = pickle.load(open(f"{RF_TUNING_DIR}/{study_name}_sampler.pkl", "rb"))
except (OSError, IOError) as e:
    sampler = TPESampler(seed=SEED)

pruner = MedianPruner()
    
# create study and optimize
study = optuna.create_study(study_name=study_name, storage=storage_name,
                                sampler=sampler, pruner=pruner, direction="maximize",
                                load_if_exists=True)


study.optimize(lambda trial: MicroTreeObjective(trial, seed=SEED, 
                                           X_train=dbert_cme_train, X_val=dbert_cme_val,
                                           y_train=y_train_weapon, y_val=y_val_weapon, algo=clf), 
               n_trials=50, timeout=2000)

print("Number of finished trials: ", len(study.trials))
print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))


with open(f"{RF_TUNING_DIR}/{study_name}_sampler.pkl", "wb") as fout:
    pickle.dump(study.sampler, fout)

# saving charts from study
try:
    save_study_charts(study=study, study_name=study_name, dir=RF_TUNING_DIR)
except Exception as e:
    print(f"Error when trying to save charts: {e}")

#### Trying GTE CME Encoded on Weapon

In [None]:
# study identifiers
study_name = "rf_study_weaponOnly_gte_cme"
#storage_name = "sqlite:///{}.db".format(study_name)
storage_name = optuna.storages.JournalStorage(
    optuna.storages.journal.JournalFileBackend("./optuna_journal_storage_rf_weaponOnly_gte_cme.log")
)

# init classifier
clf = RandomForestClassifier

# tuning directory for model
if not os.path.exists(RF_TUNING_DIR):
    os.makedirs(RF_TUNING_DIR)

# check for a saved sampler
try:
    sampler = pickle.load(open(f"{RF_TUNING_DIR}/{study_name}_sampler.pkl", "rb"))
except (OSError, IOError) as e:
    sampler = TPESampler(seed=SEED)

pruner = MedianPruner()
    
# create study and optimize
study = optuna.create_study(study_name=study_name, storage=storage_name,
                                sampler=sampler, pruner=pruner, direction="maximize",
                                load_if_exists=True)


study.optimize(lambda trial: MicroTreeObjective(trial, seed=SEED, 
                                           X_train=gte_cme_train, X_val=gte_cme_val,
                                           y_train=y_train_weapon, y_val=y_val_weapon, algo=clf), 
               n_trials=50, timeout=3000)

print("Number of finished trials: ", len(study.trials))
print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))


with open(f"{RF_TUNING_DIR}/{study_name}_sampler.pkl", "wb") as fout:
    pickle.dump(study.sampler, fout)

# saving charts from study
try:
    save_study_charts(study=study, study_name=study_name, dir=RF_TUNING_DIR)
except Exception as e:
    print(f"Error when trying to save charts: {e}")


#### Trying GTE LE Encoded for Weapon

In [15]:
# study identifiers
study_name = "rf_study_weaponOnly_gte_le"
#storage_name = "sqlite:///{}.db".format(study_name)
storage_name = optuna.storages.JournalStorage(
    optuna.storages.journal.JournalFileBackend("./optuna_journal_storage_rf_weaponOnly_gte_le.log")
)

# init classifier
clf = RandomForestClassifier

# tuning directory for model
if not os.path.exists(RF_TUNING_DIR):
    os.makedirs(RF_TUNING_DIR)

# check for a saved sampler
try:
    sampler = pickle.load(open(f"{RF_TUNING_DIR}/{study_name}_sampler.pkl", "rb"))
except (OSError, IOError) as e:
    sampler = TPESampler(seed=SEED)

pruner = MedianPruner()
    
# create study and optimize
study = optuna.create_study(study_name=study_name, storage=storage_name,
                                sampler=sampler, pruner=pruner, direction="maximize",
                                load_if_exists=True)


study.optimize(lambda trial: MicroTreeObjective(trial, seed=SEED, 
                                           X_train=gte_le_train, X_val=gte_le_val,
                                           y_train=y_train_weapon, y_val=y_val_weapon, algo=clf), 
               n_trials=50, timeout=3000)

print("Number of finished trials: ", len(study.trials))
print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))


with open(f"{RF_TUNING_DIR}/{study_name}_sampler.pkl", "wb") as fout:
    pickle.dump(study.sampler, fout)

# saving charts from study
try:
    save_study_charts(study=study, study_name=study_name, dir=RF_TUNING_DIR)
except Exception as e:
    print(f"Error when trying to save charts: {e}")


[I 2024-10-11 11:04:13,886] A new study created in Journal with name: rf_study_weaponOnly_gte_le
[I 2024-10-11 11:10:18,058] Trial 0 finished with value: 0.885 and parameters: {'n_estimators': 800, 'max_depth': 58, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.885.
[I 2024-10-11 11:18:16,774] Trial 1 finished with value: 0.8816666666666667 and parameters: {'n_estimators': 1000, 'max_depth': 37, 'min_samples_leaf': 3}. Best is trial 0 with value: 0.885.
[I 2024-10-11 11:24:15,456] Trial 2 finished with value: 0.8866666666666667 and parameters: {'n_estimators': 800, 'max_depth': 11, 'min_samples_leaf': 2}. Best is trial 2 with value: 0.8866666666666667.
[I 2024-10-11 11:32:11,081] Trial 3 finished with value: 0.8866666666666667 and parameters: {'n_estimators': 900, 'max_depth': 39, 'min_samples_leaf': 2}. Best is trial 2 with value: 0.8866666666666667.
[W 2024-10-11 11:44:48,757] Trial 4 failed with parameters: {'n_estimators': 1600, 'max_depth': 57, 'min_samples_leaf': 3} becaus

KeyboardInterrupt: 

### Tuning RF for 2 Suicide Qs Only

In [None]:
feat_names = ['SuicideAttemptHistory','SuicideThoughtHistory']
y_train_sui = y_train[feat_names].to_numpy()
y_val_sui = y_val[feat_names].to_numpy()
y_test_sui = y_test[feat_names].to_numpy()

In [None]:
# study identifiers
study_name = "rf_study_suicideOnly"
#storage_name = "sqlite:///{}.db".format(study_name)
storage_name = optuna.storages.JournalStorage(
    optuna.storages.journal.JournalFileBackend("./optuna_journal_storage_rf_suicideOnly.log")
)

# init classifier
clf = RandomForestClassifier

# tuning directory for model
if not os.path.exists(RF_TUNING_DIR):
    os.makedirs(RF_TUNING_DIR)

# check for a saved sampler
try:
    sampler = pickle.load(open(f"{RF_TUNING_DIR}/{study_name}_sampler.pkl", "rb"))
except (OSError, IOError) as e:
    sampler = TPESampler(seed=SEED)

pruner = MedianPruner()
    
# create study and optimize
study = optuna.create_study(study_name=study_name, storage=storage_name,
                                sampler=sampler, pruner=pruner, direction="maximize",
                                load_if_exists=True)


study.optimize(lambda trial: TreeObjective(trial, seed=SEED, 
                                           X_train=dbert_cme_train, X_val=dbert_cme_val,
                                           y_train=y_train_sui, y_val=y_val_sui, algo=clf), 
               n_trials=50, timeout=2000)

print("Number of finished trials: ", len(study.trials))
print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))


with open(f"{RF_TUNING_DIR}/{study_name}_sampler.pkl", "wb") as fout:
    pickle.dump(study.sampler, fout)

# saving charts from study
try:
    save_study_charts(study=study, study_name=study_name, dir=RF_TUNING_DIR)
except Exception as e:
    print(f"Error when trying to save charts: {e}")

#### GTE Encoded 

In [None]:
# study identifiers
study_name = "rf_study_suicideOnly"
#storage_name = "sqlite:///{}.db".format(study_name)
storage_name = optuna.storages.JournalStorage(
    optuna.storages.journal.JournalFileBackend("./optuna_journal_storage_rf_suicideOnly.log")
)

# init classifier
clf = RandomForestClassifier

# tuning directory for model
if not os.path.exists(RF_TUNING_DIR):
    os.makedirs(RF_TUNING_DIR)

# check for a saved sampler
try:
    sampler = pickle.load(open(f"{RF_TUNING_DIR}/{study_name}_sampler.pkl", "rb"))
except (OSError, IOError) as e:
    sampler = TPESampler(seed=SEED)

pruner = MedianPruner()
    
# create study and optimize
study = optuna.create_study(study_name=study_name, storage=storage_name,
                                sampler=sampler, pruner=pruner, direction="maximize",
                                load_if_exists=True)


study.optimize(lambda trial: TreeObjective(trial, seed=SEED, 
                                           X_train=gte_cme_train, X_val=gte_cme_val,
                                           y_train=y_train_sui, y_val=y_val_sui, algo=clf), 
               n_trials=50, timeout=2000)

print("Number of finished trials: ", len(study.trials))
print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))


with open(f"{RF_TUNING_DIR}/{study_name}_sampler.pkl", "wb") as fout:
    pickle.dump(study.sampler, fout)

# saving charts from study
try:
    save_study_charts(study=study, study_name=study_name, dir=RF_TUNING_DIR)
except Exception as e:
    print(f"Error when trying to save charts: {e}")

# MLP Optuna Hyperparameter Tuning

In [None]:
# how to have one optimizer and multiple sets of parameters
#Adam(itertools.chain(net1.parameters(), net2.parameters(), ...)

In [None]:
import torch
from torch import nn
import lightning.pytorch as pl

#from transformers import AutoModel, AutoTokenizer

In [None]:
class FCN(nn.Module):
    
    def __init__(self, 
                 in_features: int,
                 out_dims: List[int],
                 task_classes: List[int],
                 dropout: float):
        super().__init__()
        
        layers = []
        input_size = in_features
        for dim in out_dims:
            layers.append(nn.Linear(input_size, dim))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout))
            input_size = dim
        
        self.base_model = nn.Sequential(*layers)
        self.classification_layers = []
        for task in task_classes:
            if task > 2: 
                self.classification_layers.append(nn.Sequential(nn.Linear(input_size, task),
                                                          nn.Softmax(dim=1)))
            else:
                self.classification_layers.append(nn.Sequential(nn.Linear(input_size, task),
                                                          nn.Sigmoid()))
    def forward(self, x: torch.tensor) -> List[torch.tensor]:
        
        out = self.base_model(x)
        output_labels = []
        for class_head in self.classification_layers:
            output_labels.append(class_head(out))
        
        return output_labels

class LightningNet(pl.LightningModule):
    
    def __init__(self, 
                 in_features: int,
                 out_dims: List[int],
                 task_classes: List[int],
                 dropout: float):
        super().__init__()
        
        self.model = FCN(in_features, out_dims, task_classes, dropout)
        
    def forward(self, x: torch.tensor) -> list[torch.tensor]:
        
        self.model(x)
    
    def training_step(self, batch):
        
        data, targets

In [None]:
def objective(trial: optuna.trial):
    
    n_layers = trial.suggest_int("n_layers", 2, 4)
    
    layers = []
    in_features = 1024
    for i in range(n_layers):
        out_features = trial.suggest_int