In [1]:
from HPO_utils import run_pareto, pick_solution
from Main import prepare_experiment
from Parser import parse_dataset, generate_embeddings
from GraphGeneration import generate_graphs
import pickle

1. Przetwórz dataset dla danego st i depth (Drain)
2. Stwórz zanurzenia semantyczne (GloVe)
3. Wygeneruj grafy (graph_st_depth)
4. HPO
    - prepare_experiment()
    - run_experiment()
5. Wróć do pkt 1.

In [None]:
results_path = './HPO_results/'
dataset_name = 'Linux'

drain_params = {
    'st': [0.4, 0.5, 0.6],
    'depth': [4, 5, 6]
}

for st in drain_params['st']:
    for depth in drain_params['depth']:
        print(f"\nProcessing dataset with Drain st={st}, depth={depth}...")
        
        parse_dataset(dataset_name, st=st, depth=depth)
        generate_embeddings(dataset_name)
        generate_graphs(dataset_name)
        prepare_experiment(dataset_name)

        study = run_pareto(n_trials=20, seed=1213, dataset_name=dataset_name)
        best = pick_solution(study, min_recall=0.8)

        if best is None:
            print("No solution found with recall >= 0.8")
            continue
        
        print("\nChosen solution:")
        print(f"  recall={best.values[0]:.4f}, precision={best.values[1]:.4f}, thr={best.user_attrs.get('thr')}")
        print(f"  params={best.params}")

        with open(f'{results_path}HPO_results_{dataset_name}_st{st}_depth{depth}.pkl', 'wb') as f:
            pickle.dump(best, f)


Processing dataset with Drain st=0.4, depth=4...
Parsing file: /home/ubuntu/bsc/BootDet/Log2Graph/Data/Linux/Linux.log
Total lines:  548633
Processed 0.2% of log lines.
Processed 0.4% of log lines.
Processed 0.5% of log lines.
Processed 0.7% of log lines.
Processed 0.9% of log lines.
Processed 1.1% of log lines.
Processed 1.3% of log lines.
Processed 1.5% of log lines.
Processed 1.6% of log lines.
Processed 1.8% of log lines.
Processed 2.0% of log lines.
Processed 2.2% of log lines.
Processed 2.4% of log lines.
Processed 2.6% of log lines.
Processed 2.7% of log lines.
Processed 2.9% of log lines.
Processed 3.1% of log lines.
Processed 3.3% of log lines.
Processed 3.5% of log lines.
Processed 3.6% of log lines.
Processed 3.8% of log lines.
Processed 4.0% of log lines.
Processed 4.2% of log lines.
Processed 4.4% of log lines.
Processed 4.6% of log lines.
Processed 4.7% of log lines.
Processed 4.9% of log lines.
Processed 5.1% of log lines.
Processed 5.3% of log lines.
Processed 5.5% of 

100%|██████████| 300/300 [03:25<00:00,  1.46it/s]


Saved GraphIndex → GroupId mapping to /home/ubuntu/bsc/BootDet/Log2Graph/Data/Linux/graph_ids.csv


[I 2025-12-29 02:32:31,244] A new study created in memory with name: no-name-a63fa47f-191e-4819-9014-719bbf3f7f6e
Processing...
