In [1]:
import optuna
from HPO_utils import make_objective
from Parser import parse_dataset, split_dataset
import pickle

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
dataset_name = "Windows"
results_dir = f"HPO_results/{dataset_name}"

drain_params = {
    'st': [0.4, 0.5, 0.6],
    'depth': [4, 5, 6]
}

drain_params = {
    'st': [0.4],
    'depth': [5]
}
for st in drain_params['st']:
    for depth in drain_params['depth']:
        print(f"Parsing dataset with Drain st={st} depth={depth}...")
        parse_dataset(
            dataset_name=dataset_name,
            st=st,
            depth=depth
        )
        split_dataset(
            dataset_name=dataset_name,
            data_seed=42
        )

        study = optuna.create_study(
            direction="maximize",
            sampler=optuna.samplers.TPESampler(seed=42)
        )

        study.optimize(make_objective(dataset_name), n_trials=10)

        print("Best TOP-5 accuracy:", study.best_value)
        print("Best params:", study.best_params)

        with open(f"{results_dir}/study_st{st}_depth{depth}.pkl", "wb") as f:
            pickle.dump(study, f)


Parsing dataset with Drain st=0.4 depth=5...
Parsing file: /home/ubuntu/bsc/BootDet/DeepLog-master/Data/Windows/Windows.log
Total lines:  19617
Processed 5.1% of log lines.
Processed 10.2% of log lines.
Processed 15.3% of log lines.
Processed 20.4% of log lines.
Processed 25.5% of log lines.
Processed 30.6% of log lines.
Processed 35.7% of log lines.
Processed 40.8% of log lines.
Processed 45.9% of log lines.
Processed 51.0% of log lines.
Processed 56.1% of log lines.
Processed 61.2% of log lines.
Processed 66.3% of log lines.
Processed 71.4% of log lines.
Processed 76.5% of log lines.
Processed 81.6% of log lines.
Processed 86.7% of log lines.
Processed 91.8% of log lines.
Processed 96.9% of log lines.
Processed 100.0% of log lines.


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_df.drop(columns=["label"], inplace=True)
[I 2026-01-03 14:02:09,906] A new study created in memory with name: no-name-8b51b579-c22e-4ac8-9558-3df87b85c5ba


Parsing done. [Time taken: 0:00:08.838273]
Train IDs: [  6 136 123 168  86  57  96  36 190  43  66 105 160  79  77  30 137  61
  52 166  39  32 174  13 110  42 133 101  27 129 187  99   3  47 138  91
 146  37 186  23 127 148  34  12 125 155   7  28 162   5  33 144 141  98
 140  11  63 182 142   1 175 149  71 115  65  45 126  29  41 183 151 193
  26  24 147 143  82 109  80  85  40  62 121  87  78 189 164  48  95 178
  44 157   4 106  54 134 176 153 181  50  81  35   8 111  92  84 172 177
  90   9  14  60 167 132  18  73 171 135 163 179  64  55 108  51 191  59
  49  89  22  58 161 188 130  38 158 184   2  53 150 131 152 104 100 117
  88  75 122 194  21  72 107  15  93 180 103]
Val IDs: [139  17 156  97  69 154  56  16 113 112 185  19  83  10 165 118  70 114
 192 120 124 145  67  46 159 116  68  94  31 102 119  76  25 173 128 170
  20 169  74]
Early stopping triggered.
[Epoch 1/1] average loss = 0.0000 ###########################............. ( 66.67%) runtime 0:00:00.1

[I 2026-01-03 14:03:44,569] Trial 0 finished with value: 0.9191337823867798 and parameters: {'window': 20, 'hidden_size': 32, 'batch_size': 32, 'lr': 0.0001}. Best is trial 0 with value: 0.9191337823867798.


[Epoch 1/1] average loss = 0.0000 ######################################## (100.00%) runtime 0:00:00.2
Early stopping triggered.
[Epoch 1/1] average loss = 0.0000 #####################################... ( 92.11%) runtime 0:00:02.0

[I 2026-01-03 14:04:27,237] Trial 1 finished with value: 0.8689693212509155 and parameters: {'window': 30, 'hidden_size': 256, 'batch_size': 256, 'lr': 0.0001}. Best is trial 0 with value: 0.9191337823867798.


[Epoch 1/1] average loss = 0.0000 ######################################## (100.00%) runtime 0:00:02.2
Early stopping triggered.
[Epoch 1/1] average loss = 0.0000 ####################################.... ( 91.23%) runtime 0:00:02.1

[I 2026-01-03 14:05:07,159] Trial 2 finished with value: 0.9780701994895935 and parameters: {'window': 30, 'hidden_size': 256, 'batch_size': 32, 'lr': 0.001}. Best is trial 2 with value: 0.9780701994895935.


[Epoch 1/1] average loss = 0.0000 ######################################## (100.00%) runtime 0:00:02.2
Early stopping triggered.
[Epoch 1/1] average loss = 0.0000 #########################............... ( 62.28%) runtime 0:00:00.1

[I 2026-01-03 14:05:56,598] Trial 3 finished with value: 0.9709429740905762 and parameters: {'window': 20, 'hidden_size': 32, 'batch_size': 64, 'lr': 0.001}. Best is trial 2 with value: 0.9780701994895935.


[Epoch 1/1] average loss = 0.0000 ######################################## (100.00%) runtime 0:00:00.2
Early stopping triggered.
[Epoch 1/1] average loss = 0.0000 ##################################...... ( 85.96%) runtime 0:00:01.2

[I 2026-01-03 14:06:32,091] Trial 4 finished with value: 0.9731359481811523 and parameters: {'window': 20, 'hidden_size': 256, 'batch_size': 128, 'lr': 0.001}. Best is trial 2 with value: 0.9780701994895935.


[Epoch 1/1] average loss = 0.0000 ######################################## (100.00%) runtime 0:00:01.4


[I 2026-01-03 14:07:02,974] Trial 5 finished with value: 0.8848684430122375 and parameters: {'window': 10, 'hidden_size': 32, 'batch_size': 256, 'lr': 0.0001}. Best is trial 2 with value: 0.9780701994895935.


[Epoch 1/1] average loss = 0.0000 ######################################## (100.00%) runtime 0:00:00.1
Early stopping triggered.
[Epoch 1/1] average loss = 0.0000 #####################################... ( 92.98%) runtime 0:00:00.5

[I 2026-01-03 14:07:17,769] Trial 6 finished with value: 0.9018640518188477 and parameters: {'window': 30, 'hidden_size': 64, 'batch_size': 256, 'lr': 0.0003}. Best is trial 2 with value: 0.9780701994895935.


[Epoch 1/1] average loss = 0.0000 ######################################## (100.00%) runtime 0:00:00.6
Early stopping triggered.
[Epoch 1/1] average loss = 0.0000 #####################################... ( 92.11%) runtime 0:00:01.3

[I 2026-01-03 14:08:20,214] Trial 7 finished with value: 0.9635416865348816 and parameters: {'window': 20, 'hidden_size': 256, 'batch_size': 128, 'lr': 0.0003}. Best is trial 2 with value: 0.9780701994895935.


[Epoch 1/1] average loss = 0.0000 ######################################## (100.00%) runtime 0:00:01.4
Early stopping triggered.
[Epoch 1/1] average loss = 0.0000 #################################....... ( 82.46%) runtime 0:00:00.5

[I 2026-01-03 14:08:36,538] Trial 8 finished with value: 0.969298243522644 and parameters: {'window': 10, 'hidden_size': 256, 'batch_size': 128, 'lr': 0.001}. Best is trial 2 with value: 0.9780701994895935.


[Epoch 1/1] average loss = 0.0000 ######################################## (100.00%) runtime 0:00:00.7
Early stopping triggered.
[Epoch 1/1] average loss = 0.0000 ###########################............. ( 67.54%) runtime 0:00:00.1

[I 2026-01-03 14:09:14,063] Trial 9 finished with value: 0.9646381735801697 and parameters: {'window': 20, 'hidden_size': 32, 'batch_size': 64, 'lr': 0.001}. Best is trial 2 with value: 0.9780701994895935.


[Epoch 1/1] average loss = 0.0000 ######################################## (100.00%) runtime 0:00:00.2
Best TOP-5 accuracy: 0.9780701994895935
Best params: {'window': 30, 'hidden_size': 256, 'batch_size': 32, 'lr': 0.001}
