In [None]:
import traceback

from data.data import DataProcessor
from evaluation.evaluation import ResultsManager
from experiments.resnet_experiment import TabularResNetExperiment

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
CACHE_DIR = "./data/openml_cache"
RESULTS_DIR = "./results/resnet"
MODEL_NAME = "resnet"

SEED = 123
N_TRIALS = 50  # Optuna trials
CV_FOLDS = 10  # Cross-validation folds


def main():
    print("=" * 80)
    print("TABULAR RESNET - EXPERIMENT PIPELINE")
    print("=" * 80)
    print(f"Model: {MODEL_NAME}")
    print(f"Seed: {SEED}")
    print(f"Optuna trials: {N_TRIALS}")
    print(f"CV folds: {CV_FOLDS}")
    print(f"Results directory: {RESULTS_DIR}")
    print("=" * 80)

    # Initialize data processor and results manager
    processor = DataProcessor(seed=SEED, cache_dir=CACHE_DIR)
    results_manager = ResultsManager(save_dir=RESULTS_DIR, model_name=MODEL_NAME)

    # Find all processed PKL files
    pkl_files = list(processor.cache_dir.glob("*_dataset.pkl"))

    if not pkl_files:
        print("\nNo processed datasets found!")
        print(f"Looking in: {processor.cache_dir}")
        print("\nPlease run one of these first:")
        print("  1. python data.py (Pattern 1: to download and process)")
        print("  2. python data.py (Pattern 2: to process from raw CSVs)")
        return

    print(f"\n✓ Found {len(pkl_files)} processed datasets\n")

    # Run experiments on all datasets
    failed_datasets = []

    for idx, pkl_file in enumerate(pkl_files, 1):
        # Extract dataset name
        dataset_name = pkl_file.stem.replace("_dataset", "")

        print(f"\n{'#' * 80}")
        print(f"DATASET {idx}/{len(pkl_files)}: {dataset_name}")
        print(f"{'#' * 80}")

        try:
            # Load processed dataset (numpy arrays)
            X_train, y_train, X_test, y_test = processor.load_or_process_dataset(
                name=dataset_name
            )

            # Get number of classes
            n_classes = len(set(y_train))

            # Create experiment
            experiment = TabularResNetExperiment(
                dataset_name=dataset_name,
                X_train=X_train,
                y_train=y_train,
                X_test=X_test,
                y_test=y_test,
                n_classes=n_classes,
                n_trials=N_TRIALS,
                cv_folds=CV_FOLDS,
                seed=SEED,
            )

            # Run complete experiment
            results = experiment.run_complete_experiment()

            # Extract metrics and info for saving
            metrics = {
                "accuracy": results["accuracy"],
                "auc_ovo": results["auc_ovo"],
                "gmean": results["gmean"],
                "cross_entropy": results["cross_entropy"],
            }

            dataset_info = {
                "n_samples_train": results["n_samples_train"],
                "n_samples_test": results["n_samples_test"],
                "n_features": results["n_features"],
                "n_classes": results["n_classes"],
            }

            timings = {
                "tuning_time": results["tuning_time"],
                "training_time": results["training_time"],
                "prediction_time": results["prediction_time"],
                "total_time": results["total_time"],
            }

            # Save results
            results_manager.save_dataset_result(
                dataset_name=dataset_name,
                metrics=metrics,
                dataset_info=dataset_info,
                timings=timings,
                hyperparameters=results["best_params"],
            )

            print(f"\n✓ Completed {dataset_name} ({idx}/{len(pkl_files)})")

        except Exception as e:
            print(f"\n✗ Failed: {dataset_name}")
            print(f"   Error: {e}")
            failed_datasets.append((dataset_name, e))
            traceback.print_exc()
            continue

    # ========================================================================
    # Save final results
    # ========================================================================
    print("\n" + "=" * 80)
    print("SAVING FINAL RESULTS")
    print("=" * 80)

    # Save metrics CSV (for hypothesis testing)
    results_manager.save_metrics_csv()

    # Print final summary
    results_manager.print_summary()

    # Print failed datasets if any
    if failed_datasets:
        print(f"\nFailed datasets ({len(failed_datasets)}):")
        for dataset, error in failed_datasets:
            print(f"  - {dataset}: {error}")

    print("\n" + "=" * 80)
    print("EXPERIMENT COMPLETE!")
    print("=" * 80)
    print(f"✓ Total datasets: {len(pkl_files)}")
    print(f"✓ Successful: {len(results_manager.all_results)}")
    print(f"✓ Failed: {len(failed_datasets)}")
    print(f"\n✓ Metrics CSV saved: {RESULTS_DIR}/{MODEL_NAME}_metrics.csv")
    print("=" * 80)

In [3]:
main()

TABULAR RESNET - EXPERIMENT PIPELINE
Model: resnet
Seed: 123
Optuna trials: 50
CV folds: 10
Results directory: ./results/resnet

✓ Found 30 processed datasets


################################################################################
DATASET 1/30: MiceProtein
################################################################################
Loading processed dataset from cache: MiceProtein
Using device: cuda
Dataset: MiceProtein
  Train: (756, 77), Test: (324, 77)
  Features: 77, Classes: 8


[I 2025-11-26 23:32:05,557] A new study created in memory with name: no-name-7e345a0e-abda-401a-887f-2e2cd25ff4ea


✓ Data loaded to cuda

EXPERIMENT: MiceProtein

Starting hyperparameter optimization...
  Trials: 50, CV Folds: 10


[I 2025-11-26 23:32:15,050] Trial 0 finished with value: 0.9750431563456203 and parameters: {'d': 64, 'd_hidden_factor': 3.8778758791422523, 'n_layers': 5, 'hidden_dropout': 0.49038209919230774, 'residual_dropout': 0.34241486929243165, 'learning_rate': 0.00027720158198153483, 'batch_size': 128, 'epochs': 40}. Best is trial 0 with value: 0.9750431563456203.
[I 2025-11-26 23:32:42,050] Trial 1 finished with value: 0.9905003006155368 and parameters: {'d': 128, 'd_hidden_factor': 3.1262054953673535, 'n_layers': 6, 'hidden_dropout': 0.31720047927566053, 'residual_dropout': 0.4247158970388948, 'learning_rate': 0.0014906166728108333, 'batch_size': 64, 'epochs': 69}. Best is trial 1 with value: 0.9905003006155368.
[I 2025-11-26 23:33:02,677] Trial 2 finished with value: 0.9975073196879075 and parameters: {'d': 128, 'd_hidden_factor': 2.723451053318575, 'n_layers': 5, 'hidden_dropout': 0.212915145147914, 'residual_dropout': 0.15613061148623264, 'learning_rate': 0.0001901314327577386, 'batch_siz


✓ Optimization complete! Time: 5354.08s
  Best CV G-Mean: 1.0000
  Best parameters:
    d: 512
    d_hidden_factor: 4.323646972058734
    n_layers: 10
    hidden_dropout: 0.1778976046973269
    residual_dropout: 0.18698998490625918
    learning_rate: 1.1452211301313298e-05
    batch_size: 128
    epochs: 136

Training final model...
    Epoch 10/136: Train Loss = 0.7731, Val Loss = 0.7337
    Epoch 20/136: Train Loss = 0.3239, Val Loss = 0.3143
    Epoch 30/136: Train Loss = 0.1632, Val Loss = 0.1508
    Epoch 40/136: Train Loss = 0.0960, Val Loss = 0.0822
    Epoch 50/136: Train Loss = 0.0665, Val Loss = 0.0534
    Epoch 60/136: Train Loss = 0.0503, Val Loss = 0.0402
    Epoch 70/136: Train Loss = 0.0368, Val Loss = 0.0317
    Epoch 80/136: Train Loss = 0.0343, Val Loss = 0.0249
    Epoch 90/136: Train Loss = 0.0285, Val Loss = 0.0219
    Epoch 100/136: Train Loss = 0.0223, Val Loss = 0.0182
    Epoch 110/136: Train Loss = 0.0204, Val Loss = 0.0163
    Epoch 120/136: Train Loss = 0.0

[I 2025-11-27 01:01:36,525] A new study created in memory with name: no-name-a6a6120b-7c8a-420d-bbf8-1ff834739cb6


✓ Training complete! Time: 16.84s

Evaluating on test set...

RESULTS FOR MiceProtein
Accuracy:        1.0000
AUC OVO:         1.0000
G-Mean:          1.0000
Cross-Entropy:   0.0151
✓ Saved results for MiceProtein

✓ Completed MiceProtein (1/30)

################################################################################
DATASET 2/30: diabetes
################################################################################
Loading processed dataset from cache: diabetes
Using device: cuda
Dataset: diabetes
  Train: (537, 8), Test: (231, 8)
  Features: 8, Classes: 2
✓ Data loaded to cuda

EXPERIMENT: diabetes

Starting hyperparameter optimization...
  Trials: 50, CV Folds: 10


[I 2025-11-27 01:01:41,542] Trial 0 finished with value: 0.6943502108945224 and parameters: {'d': 64, 'd_hidden_factor': 3.8778758791422523, 'n_layers': 5, 'hidden_dropout': 0.49038209919230774, 'residual_dropout': 0.34241486929243165, 'learning_rate': 0.00027720158198153483, 'batch_size': 128, 'epochs': 40}. Best is trial 0 with value: 0.6943502108945224.
[I 2025-11-27 01:01:49,460] Trial 1 finished with value: 0.6947273251170855 and parameters: {'d': 128, 'd_hidden_factor': 3.1262054953673535, 'n_layers': 6, 'hidden_dropout': 0.31720047927566053, 'residual_dropout': 0.4247158970388948, 'learning_rate': 0.0014906166728108333, 'batch_size': 64, 'epochs': 69}. Best is trial 1 with value: 0.6947273251170855.
[I 2025-11-27 01:01:57,507] Trial 2 finished with value: 0.6695510939875335 and parameters: {'d': 128, 'd_hidden_factor': 2.723451053318575, 'n_layers': 5, 'hidden_dropout': 0.212915145147914, 'residual_dropout': 0.15613061148623264, 'learning_rate': 0.0001901314327577386, 'batch_siz


✓ Optimization complete! Time: 485.31s
  Best CV G-Mean: 0.7250
  Best parameters:
    d: 128
    d_hidden_factor: 3.468335578150428
    n_layers: 2
    hidden_dropout: 0.4995948185903601
    residual_dropout: 0.08493823160359984
    learning_rate: 2.1687768236505902e-05
    batch_size: 256
    epochs: 101

Training final model...
    Epoch 10/101: Train Loss = 0.7352, Val Loss = 0.7777
    Epoch 20/101: Train Loss = 0.7109, Val Loss = 0.7443
    Epoch 30/101: Train Loss = 0.6350, Val Loss = 0.7065
    Epoch 40/101: Train Loss = 0.6399, Val Loss = 0.6736
    Epoch 50/101: Train Loss = 0.6046, Val Loss = 0.6448
    Epoch 60/101: Train Loss = 0.5927, Val Loss = 0.6208


[I 2025-11-27 01:09:42,157] A new study created in memory with name: no-name-a5ca5606-e3b3-4804-92db-813b7e0a091a


    Epoch 70/101: Train Loss = 0.5587, Val Loss = 0.6010
    Epoch 80/101: Train Loss = 0.5889, Val Loss = 0.5824
    Epoch 90/101: Train Loss = 0.5505, Val Loss = 0.5701
    Epoch 100/101: Train Loss = 0.5293, Val Loss = 0.5570
✓ Training complete! Time: 0.32s

Evaluating on test set...

RESULTS FOR diabetes
Accuracy:        0.7835
AUC OVO:         0.8741
G-Mean:          0.6932
Cross-Entropy:   0.5058
✓ Saved results for diabetes

✓ Completed diabetes (2/30)

################################################################################
DATASET 3/30: pc1
################################################################################
Loading processed dataset from cache: pc1
Using device: cuda
Dataset: pc1
  Train: (776, 21), Test: (333, 21)
  Features: 21, Classes: 2
✓ Data loaded to cuda

EXPERIMENT: pc1

Starting hyperparameter optimization...
  Trials: 50, CV Folds: 10


[I 2025-11-27 01:09:50,823] Trial 0 finished with value: 0.41992670118414444 and parameters: {'d': 64, 'd_hidden_factor': 3.8778758791422523, 'n_layers': 5, 'hidden_dropout': 0.49038209919230774, 'residual_dropout': 0.34241486929243165, 'learning_rate': 0.00027720158198153483, 'batch_size': 128, 'epochs': 40}. Best is trial 0 with value: 0.41992670118414444.
[I 2025-11-27 01:10:04,427] Trial 1 finished with value: 0.5027697944050372 and parameters: {'d': 128, 'd_hidden_factor': 3.1262054953673535, 'n_layers': 6, 'hidden_dropout': 0.31720047927566053, 'residual_dropout': 0.4247158970388948, 'learning_rate': 0.0014906166728108333, 'batch_size': 64, 'epochs': 69}. Best is trial 1 with value: 0.5027697944050372.
[I 2025-11-27 01:10:23,058] Trial 2 finished with value: 0.3800912914840032 and parameters: {'d': 128, 'd_hidden_factor': 2.723451053318575, 'n_layers': 5, 'hidden_dropout': 0.212915145147914, 'residual_dropout': 0.15613061148623264, 'learning_rate': 0.0001901314327577386, 'batch_s


✓ Optimization complete! Time: 801.23s
  Best CV G-Mean: 0.5975
  Best parameters:
    d: 512
    d_hidden_factor: 1.031954150873541
    n_layers: 2
    hidden_dropout: 0.1785973443419468
    residual_dropout: 0.3654658770144939
    learning_rate: 0.00888036766804695
    batch_size: 256
    epochs: 131

Training final model...
    Epoch 10/131: Train Loss = 0.1738, Val Loss = 0.3982
    Early stopping at epoch 17
✓ Training complete! Time: 0.11s

Evaluating on test set...

RESULTS FOR pc1
Accuracy:        0.9099
AUC OVO:         0.8516
G-Mean:          0.4571
Cross-Entropy:   0.2713
✓ Saved results for pc1

✓ Completed pc1 (3/30)

################################################################################
DATASET 4/30: qsar-biodeg
################################################################################
Loading processed dataset from cache: qsar-biodeg
Using device: cuda
Dataset: qsar-biodeg
  Train: (738, 41), Test: (317, 41)
  Features: 41, Classes: 2
✓ Data loaded to cu

[I 2025-11-27 01:23:12,093] Trial 0 finished with value: 0.867448185762472 and parameters: {'d': 64, 'd_hidden_factor': 3.8778758791422523, 'n_layers': 5, 'hidden_dropout': 0.49038209919230774, 'residual_dropout': 0.34241486929243165, 'learning_rate': 0.00027720158198153483, 'batch_size': 128, 'epochs': 40}. Best is trial 0 with value: 0.867448185762472.
[I 2025-11-27 01:23:25,936] Trial 1 finished with value: 0.8347228390247536 and parameters: {'d': 128, 'd_hidden_factor': 3.1262054953673535, 'n_layers': 6, 'hidden_dropout': 0.31720047927566053, 'residual_dropout': 0.4247158970388948, 'learning_rate': 0.0014906166728108333, 'batch_size': 64, 'epochs': 69}. Best is trial 0 with value: 0.867448185762472.
[I 2025-11-27 01:23:40,866] Trial 2 finished with value: 0.8563149103310381 and parameters: {'d': 128, 'd_hidden_factor': 2.723451053318575, 'n_layers': 5, 'hidden_dropout': 0.212915145147914, 'residual_dropout': 0.15613061148623264, 'learning_rate': 0.0001901314327577386, 'batch_size':


✓ Optimization complete! Time: 1781.81s
  Best CV G-Mean: 0.8731
  Best parameters:
    d: 128
    d_hidden_factor: 2.5729864627729686
    n_layers: 6
    hidden_dropout: 0.32042016596789136
    residual_dropout: 0.17918429275508574
    learning_rate: 1.4981021007483853e-05
    batch_size: 64
    epochs: 97

Training final model...
    Epoch 10/97: Train Loss = 0.5010, Val Loss = 0.4872
    Epoch 20/97: Train Loss = 0.4384, Val Loss = 0.4274
    Epoch 30/97: Train Loss = 0.3876, Val Loss = 0.3939
    Epoch 40/97: Train Loss = 0.3699, Val Loss = 0.3754
    Epoch 50/97: Train Loss = 0.3438, Val Loss = 0.3474
    Epoch 60/97: Train Loss = 0.3134, Val Loss = 0.3307
    Epoch 70/97: Train Loss = 0.3141, Val Loss = 0.3266
    Epoch 80/97: Train Loss = 0.2974, Val Loss = 0.3118
    Epoch 90/97: Train Loss = 0.2912, Val Loss = 0.3023


[I 2025-11-27 01:52:50,025] A new study created in memory with name: no-name-6d04f2a4-7cbf-404b-9ce9-b279487fdb32


✓ Training complete! Time: 4.70s

Evaluating on test set...

RESULTS FOR qsar-biodeg
Accuracy:        0.8770
AUC OVO:         0.9204
G-Mean:          0.8678
Cross-Entropy:   0.3343
✓ Saved results for qsar-biodeg

✓ Completed qsar-biodeg (4/30)

################################################################################
DATASET 5/30: banknote-authentication
################################################################################
Loading processed dataset from cache: banknote-authentication
Using device: cuda
Dataset: banknote-authentication
  Train: (960, 4), Test: (412, 4)
  Features: 4, Classes: 2
✓ Data loaded to cuda

EXPERIMENT: banknote-authentication

Starting hyperparameter optimization...
  Trials: 50, CV Folds: 10


[I 2025-11-27 01:53:00,286] Trial 0 finished with value: 1.0 and parameters: {'d': 64, 'd_hidden_factor': 3.8778758791422523, 'n_layers': 5, 'hidden_dropout': 0.49038209919230774, 'residual_dropout': 0.34241486929243165, 'learning_rate': 0.00027720158198153483, 'batch_size': 128, 'epochs': 40}. Best is trial 0 with value: 1.0.
[I 2025-11-27 01:53:32,510] Trial 1 finished with value: 1.0 and parameters: {'d': 128, 'd_hidden_factor': 3.1262054953673535, 'n_layers': 6, 'hidden_dropout': 0.31720047927566053, 'residual_dropout': 0.4247158970388948, 'learning_rate': 0.0014906166728108333, 'batch_size': 64, 'epochs': 69}. Best is trial 0 with value: 1.0.
[I 2025-11-27 01:53:58,652] Trial 2 finished with value: 1.0 and parameters: {'d': 128, 'd_hidden_factor': 2.723451053318575, 'n_layers': 5, 'hidden_dropout': 0.212915145147914, 'residual_dropout': 0.15613061148623264, 'learning_rate': 0.0001901314327577386, 'batch_size': 64, 'epochs': 49}. Best is trial 0 with value: 1.0.
[I 2025-11-27 01:57


✓ Optimization complete! Time: 2413.93s
  Best CV G-Mean: 1.0000
  Best parameters:
    d: 64
    d_hidden_factor: 3.8778758791422523
    n_layers: 5
    hidden_dropout: 0.49038209919230774
    residual_dropout: 0.34241486929243165
    learning_rate: 0.00027720158198153483
    batch_size: 128
    epochs: 40

Training final model...
    Epoch 10/40: Train Loss = 0.1310, Val Loss = 0.0833
    Epoch 20/40: Train Loss = 0.0679, Val Loss = 0.0444
    Epoch 30/40: Train Loss = 0.0445, Val Loss = 0.0272


[I 2025-11-27 02:33:04,986] A new study created in memory with name: no-name-57d6154c-0dba-4d21-bcaf-2add0e25d619


    Epoch 40/40: Train Loss = 0.0295, Val Loss = 0.0166
✓ Training complete! Time: 1.02s

Evaluating on test set...

RESULTS FOR banknote-authentication
Accuracy:        1.0000
AUC OVO:         1.0000
G-Mean:          1.0000
Cross-Entropy:   0.0172
✓ Saved results for banknote-authentication

✓ Completed banknote-authentication (5/30)

################################################################################
DATASET 6/30: cnae-9
################################################################################
Loading processed dataset from cache: cnae-9
Using device: cuda
Dataset: cnae-9
  Train: (756, 856), Test: (324, 856)
  Features: 856, Classes: 9
✓ Data loaded to cuda

EXPERIMENT: cnae-9

Starting hyperparameter optimization...
  Trials: 50, CV Folds: 10


[I 2025-11-27 02:33:13,625] Trial 0 finished with value: 0.8950627750601343 and parameters: {'d': 64, 'd_hidden_factor': 3.8778758791422523, 'n_layers': 5, 'hidden_dropout': 0.49038209919230774, 'residual_dropout': 0.34241486929243165, 'learning_rate': 0.00027720158198153483, 'batch_size': 128, 'epochs': 40}. Best is trial 0 with value: 0.8950627750601343.
[I 2025-11-27 02:33:28,284] Trial 1 finished with value: 0.9049041293380278 and parameters: {'d': 128, 'd_hidden_factor': 3.1262054953673535, 'n_layers': 6, 'hidden_dropout': 0.31720047927566053, 'residual_dropout': 0.4247158970388948, 'learning_rate': 0.0014906166728108333, 'batch_size': 64, 'epochs': 69}. Best is trial 1 with value: 0.9049041293380278.
[I 2025-11-27 02:33:49,168] Trial 2 finished with value: 0.8618572970758228 and parameters: {'d': 128, 'd_hidden_factor': 2.723451053318575, 'n_layers': 5, 'hidden_dropout': 0.212915145147914, 'residual_dropout': 0.15613061148623264, 'learning_rate': 0.0001901314327577386, 'batch_siz


✓ Optimization complete! Time: 833.56s
  Best CV G-Mean: 0.9339
  Best parameters:
    d: 128
    d_hidden_factor: 3.1420110156483068
    n_layers: 2
    hidden_dropout: 0.43457026433287416
    residual_dropout: 0.37878978088039383
    learning_rate: 0.004142235254085385
    batch_size: 128
    epochs: 44

Training final model...
    Epoch 10/44: Train Loss = 0.0137, Val Loss = 0.3899


[I 2025-11-27 02:46:58,833] A new study created in memory with name: no-name-0b9aa3a5-ea8b-40e1-8ad2-5ae531f386c7


    Early stopping at epoch 19
✓ Training complete! Time: 0.25s

Evaluating on test set...

RESULTS FOR cnae-9
Accuracy:        0.9599
AUC OVO:         0.9950
G-Mean:          0.9587
Cross-Entropy:   0.1842
✓ Saved results for cnae-9

✓ Completed cnae-9 (6/30)

################################################################################
DATASET 7/30: car
################################################################################
Loading processed dataset from cache: car
Using device: cuda
Dataset: car
  Train: (1209, 21), Test: (519, 21)
  Features: 21, Classes: 4
✓ Data loaded to cuda

EXPERIMENT: car

Starting hyperparameter optimization...
  Trials: 50, CV Folds: 10


[I 2025-11-27 02:47:12,228] Trial 0 finished with value: 0.6012162385280678 and parameters: {'d': 64, 'd_hidden_factor': 3.8778758791422523, 'n_layers': 5, 'hidden_dropout': 0.49038209919230774, 'residual_dropout': 0.34241486929243165, 'learning_rate': 0.00027720158198153483, 'batch_size': 128, 'epochs': 40}. Best is trial 0 with value: 0.6012162385280678.
[I 2025-11-27 02:48:00,248] Trial 1 finished with value: 0.9782466430665664 and parameters: {'d': 128, 'd_hidden_factor': 3.1262054953673535, 'n_layers': 6, 'hidden_dropout': 0.31720047927566053, 'residual_dropout': 0.4247158970388948, 'learning_rate': 0.0014906166728108333, 'batch_size': 64, 'epochs': 69}. Best is trial 1 with value: 0.9782466430665664.
[I 2025-11-27 02:48:34,829] Trial 2 finished with value: 0.9788115103106009 and parameters: {'d': 128, 'd_hidden_factor': 2.723451053318575, 'n_layers': 5, 'hidden_dropout': 0.212915145147914, 'residual_dropout': 0.15613061148623264, 'learning_rate': 0.0001901314327577386, 'batch_siz


✓ Optimization complete! Time: 3397.87s
  Best CV G-Mean: 0.9966
  Best parameters:
    d: 256
    d_hidden_factor: 1.9451734361228792
    n_layers: 6
    hidden_dropout: 0.09924485140873582
    residual_dropout: 0.3971813140181491
    learning_rate: 0.0019114698762642767
    batch_size: 32
    epochs: 100

Training final model...
    Epoch 10/100: Train Loss = 0.1083, Val Loss = 0.1574
    Epoch 20/100: Train Loss = 0.0748, Val Loss = 0.0376
    Epoch 30/100: Train Loss = 0.0362, Val Loss = 0.0298
    Epoch 40/100: Train Loss = 0.0297, Val Loss = 0.0210
    Epoch 50/100: Train Loss = 0.0288, Val Loss = 0.1084
    Epoch 60/100: Train Loss = 0.0181, Val Loss = 0.0422


[I 2025-11-27 03:43:48,170] A new study created in memory with name: no-name-d88bde42-4dc7-4778-adbb-3e5dcd2c3b64


    Early stopping at epoch 67
✓ Training complete! Time: 11.45s

Evaluating on test set...

RESULTS FOR car
Accuracy:        0.9884
AUC OVO:         0.9998
G-Mean:          0.9713
Cross-Entropy:   0.0278
✓ Saved results for car

✓ Completed car (7/30)

################################################################################
DATASET 8/30: vehicle
################################################################################
Loading processed dataset from cache: vehicle
Using device: cuda
Dataset: vehicle
  Train: (592, 18), Test: (254, 18)
  Features: 18, Classes: 4
✓ Data loaded to cuda

EXPERIMENT: vehicle

Starting hyperparameter optimization...
  Trials: 50, CV Folds: 10


[I 2025-11-27 03:43:55,245] Trial 0 finished with value: 0.7658490401430743 and parameters: {'d': 64, 'd_hidden_factor': 3.8778758791422523, 'n_layers': 5, 'hidden_dropout': 0.49038209919230774, 'residual_dropout': 0.34241486929243165, 'learning_rate': 0.00027720158198153483, 'batch_size': 128, 'epochs': 40}. Best is trial 0 with value: 0.7658490401430743.
[I 2025-11-27 03:44:13,307] Trial 1 finished with value: 0.8207076658199954 and parameters: {'d': 128, 'd_hidden_factor': 3.1262054953673535, 'n_layers': 6, 'hidden_dropout': 0.31720047927566053, 'residual_dropout': 0.4247158970388948, 'learning_rate': 0.0014906166728108333, 'batch_size': 64, 'epochs': 69}. Best is trial 1 with value: 0.8207076658199954.
[I 2025-11-27 03:44:29,334] Trial 2 finished with value: 0.8319052730494503 and parameters: {'d': 128, 'd_hidden_factor': 2.723451053318575, 'n_layers': 5, 'hidden_dropout': 0.212915145147914, 'residual_dropout': 0.15613061148623264, 'learning_rate': 0.0001901314327577386, 'batch_siz


✓ Optimization complete! Time: 3487.25s
  Best CV G-Mean: 0.8553
  Best parameters:
    d: 512
    d_hidden_factor: 1.9966673641244546
    n_layers: 4
    hidden_dropout: 0.3370324956591667
    residual_dropout: 0.08425054757256112
    learning_rate: 2.2706013051919557e-05
    batch_size: 32
    epochs: 124

Training final model...
    Epoch 10/124: Train Loss = 0.5753, Val Loss = 0.5208
    Epoch 20/124: Train Loss = 0.4113, Val Loss = 0.3697
    Epoch 30/124: Train Loss = 0.3482, Val Loss = 0.3292
    Epoch 40/124: Train Loss = 0.3212, Val Loss = 0.2939
    Epoch 50/124: Train Loss = 0.2637, Val Loss = 0.2730
    Epoch 60/124: Train Loss = 0.2416, Val Loss = 0.2531
    Epoch 70/124: Train Loss = 0.2395, Val Loss = 0.2444
    Epoch 80/124: Train Loss = 0.2086, Val Loss = 0.2399
    Epoch 90/124: Train Loss = 0.1887, Val Loss = 0.2146
    Epoch 100/124: Train Loss = 0.1619, Val Loss = 0.2073
    Epoch 110/124: Train Loss = 0.1794, Val Loss = 0.2103


[I 2025-11-27 04:42:04,911] A new study created in memory with name: no-name-5d144a57-8bbb-4cd8-ab48-44bdce66f768


    Early stopping at epoch 114
✓ Training complete! Time: 9.48s

Evaluating on test set...

RESULTS FOR vehicle
Accuracy:        0.8268
AUC OVO:         0.9621
G-Mean:          0.8096
Cross-Entropy:   0.3877
✓ Saved results for vehicle

✓ Completed vehicle (8/30)

################################################################################
DATASET 9/30: breast-w
################################################################################
Loading processed dataset from cache: breast-w
Using device: cuda
Dataset: breast-w
  Train: (489, 9), Test: (210, 9)
  Features: 9, Classes: 2
✓ Data loaded to cuda

EXPERIMENT: breast-w

Starting hyperparameter optimization...
  Trials: 50, CV Folds: 10


[I 2025-11-27 04:42:10,355] Trial 0 finished with value: 0.9770070294658163 and parameters: {'d': 64, 'd_hidden_factor': 3.8778758791422523, 'n_layers': 5, 'hidden_dropout': 0.49038209919230774, 'residual_dropout': 0.34241486929243165, 'learning_rate': 0.00027720158198153483, 'batch_size': 128, 'epochs': 40}. Best is trial 0 with value: 0.9770070294658163.
[I 2025-11-27 04:42:17,297] Trial 1 finished with value: 0.9632260146518874 and parameters: {'d': 128, 'd_hidden_factor': 3.1262054953673535, 'n_layers': 6, 'hidden_dropout': 0.31720047927566053, 'residual_dropout': 0.4247158970388948, 'learning_rate': 0.0014906166728108333, 'batch_size': 64, 'epochs': 69}. Best is trial 0 with value: 0.9770070294658163.
[I 2025-11-27 04:42:27,760] Trial 2 finished with value: 0.9655452965824537 and parameters: {'d': 128, 'd_hidden_factor': 2.723451053318575, 'n_layers': 5, 'hidden_dropout': 0.212915145147914, 'residual_dropout': 0.15613061148623264, 'learning_rate': 0.0001901314327577386, 'batch_siz


✓ Optimization complete! Time: 713.48s
  Best CV G-Mean: 0.9830
  Best parameters:
    d: 64
    d_hidden_factor: 2.395402113706958
    n_layers: 4
    hidden_dropout: 0.39090868317026173
    residual_dropout: 0.1488236073281246
    learning_rate: 1.001781929603626e-05
    batch_size: 128
    epochs: 170

Training final model...
    Epoch 10/170: Train Loss = 0.6607, Val Loss = 0.6469
    Epoch 20/170: Train Loss = 0.5963, Val Loss = 0.6042
    Epoch 30/170: Train Loss = 0.5713, Val Loss = 0.5630
    Epoch 40/170: Train Loss = 0.5258, Val Loss = 0.5315
    Epoch 50/170: Train Loss = 0.4960, Val Loss = 0.5047
    Epoch 60/170: Train Loss = 0.4639, Val Loss = 0.4773
    Epoch 70/170: Train Loss = 0.4455, Val Loss = 0.4513
    Epoch 80/170: Train Loss = 0.4151, Val Loss = 0.4320
    Epoch 90/170: Train Loss = 0.3993, Val Loss = 0.4151
    Epoch 100/170: Train Loss = 0.3752, Val Loss = 0.3969
    Epoch 110/170: Train Loss = 0.3678, Val Loss = 0.3809
    Epoch 120/170: Train Loss = 0.3470,

[I 2025-11-27 04:54:00,336] A new study created in memory with name: no-name-290bca94-d4de-4831-9284-b1d76a21f868


    Epoch 160/170: Train Loss = 0.2983, Val Loss = 0.3252
    Epoch 170/170: Train Loss = 0.2846, Val Loss = 0.3191
✓ Training complete! Time: 1.94s

Evaluating on test set...

RESULTS FOR breast-w
Accuracy:        0.9333
AUC OVO:         0.9837
G-Mean:          0.9327
Cross-Entropy:   0.3193
✓ Saved results for breast-w

✓ Completed breast-w (9/30)

################################################################################
DATASET 10/30: pc4
################################################################################
Loading processed dataset from cache: pc4
Using device: cuda
Dataset: pc4
  Train: (1020, 37), Test: (438, 37)
  Features: 37, Classes: 2
✓ Data loaded to cuda

EXPERIMENT: pc4

Starting hyperparameter optimization...
  Trials: 50, CV Folds: 10


[I 2025-11-27 04:54:12,169] Trial 0 finished with value: 0.6867256126218308 and parameters: {'d': 64, 'd_hidden_factor': 3.8778758791422523, 'n_layers': 5, 'hidden_dropout': 0.49038209919230774, 'residual_dropout': 0.34241486929243165, 'learning_rate': 0.00027720158198153483, 'batch_size': 128, 'epochs': 40}. Best is trial 0 with value: 0.6867256126218308.
[I 2025-11-27 04:54:31,384] Trial 1 finished with value: 0.7753098403121366 and parameters: {'d': 128, 'd_hidden_factor': 3.1262054953673535, 'n_layers': 6, 'hidden_dropout': 0.31720047927566053, 'residual_dropout': 0.4247158970388948, 'learning_rate': 0.0014906166728108333, 'batch_size': 64, 'epochs': 69}. Best is trial 1 with value: 0.7753098403121366.
[I 2025-11-27 04:54:56,762] Trial 2 finished with value: 0.7565310163373249 and parameters: {'d': 128, 'd_hidden_factor': 2.723451053318575, 'n_layers': 5, 'hidden_dropout': 0.212915145147914, 'residual_dropout': 0.15613061148623264, 'learning_rate': 0.0001901314327577386, 'batch_siz


✓ Optimization complete! Time: 1160.09s
  Best CV G-Mean: 0.8153
  Best parameters:
    d: 512
    d_hidden_factor: 4.381464382577794
    n_layers: 3
    hidden_dropout: 0.32589678057363464
    residual_dropout: 0.4848682556407795
    learning_rate: 0.0013215005467429803
    batch_size: 256
    epochs: 189

Training final model...
    Epoch 10/189: Train Loss = 0.1892, Val Loss = 0.2428


[I 2025-11-27 05:13:21,079] A new study created in memory with name: no-name-2cb63d05-b099-4bc4-8ee5-81d0768b9107


    Early stopping at epoch 18
✓ Training complete! Time: 0.65s

Evaluating on test set...

RESULTS FOR pc4
Accuracy:        0.8927
AUC OVO:         0.9286
G-Mean:          0.7914
Cross-Entropy:   0.2362
✓ Saved results for pc4

✓ Completed pc4 (10/30)

################################################################################
DATASET 11/30: blood-transfusion-service-center
################################################################################
Loading processed dataset from cache: blood-transfusion-service-center
Using device: cuda
Dataset: blood-transfusion-service-center
  Train: (523, 4), Test: (225, 4)
  Features: 4, Classes: 2
✓ Data loaded to cuda

EXPERIMENT: blood-transfusion-service-center

Starting hyperparameter optimization...
  Trials: 50, CV Folds: 10


[I 2025-11-27 05:13:26,626] Trial 0 finished with value: 0.6232430811980498 and parameters: {'d': 64, 'd_hidden_factor': 3.8778758791422523, 'n_layers': 5, 'hidden_dropout': 0.49038209919230774, 'residual_dropout': 0.34241486929243165, 'learning_rate': 0.00027720158198153483, 'batch_size': 128, 'epochs': 40}. Best is trial 0 with value: 0.6232430811980498.
[I 2025-11-27 05:13:34,454] Trial 1 finished with value: 0.6187614013659917 and parameters: {'d': 128, 'd_hidden_factor': 3.1262054953673535, 'n_layers': 6, 'hidden_dropout': 0.31720047927566053, 'residual_dropout': 0.4247158970388948, 'learning_rate': 0.0014906166728108333, 'batch_size': 64, 'epochs': 69}. Best is trial 0 with value: 0.6232430811980498.
[I 2025-11-27 05:13:44,683] Trial 2 finished with value: 0.5503161468471659 and parameters: {'d': 128, 'd_hidden_factor': 2.723451053318575, 'n_layers': 5, 'hidden_dropout': 0.212915145147914, 'residual_dropout': 0.15613061148623264, 'learning_rate': 0.0001901314327577386, 'batch_siz


✓ Optimization complete! Time: 824.19s
  Best CV G-Mean: 0.6694
  Best parameters:
    d: 64
    d_hidden_factor: 4.608687320984796
    n_layers: 10
    hidden_dropout: 0.1421723920289982
    residual_dropout: 0.48044060656136567
    learning_rate: 1.4058607575030626e-05
    batch_size: 64
    epochs: 57

Training final model...
    Epoch 10/57: Train Loss = 0.7246, Val Loss = 0.6935
    Epoch 20/57: Train Loss = 0.6932, Val Loss = 0.6711
    Epoch 30/57: Train Loss = 0.6606, Val Loss = 0.6553
    Epoch 40/57: Train Loss = 0.6574, Val Loss = 0.6424
    Epoch 50/57: Train Loss = 0.6256, Val Loss = 0.6284


[I 2025-11-27 05:27:08,196] A new study created in memory with name: no-name-4af7f7a7-025c-4c7a-840c-574cb68ac1db


✓ Training complete! Time: 2.92s

Evaluating on test set...

RESULTS FOR blood-transfusion-service-center
Accuracy:        0.6044
AUC OVO:         0.7173
G-Mean:          0.6539
Cross-Entropy:   0.6810
✓ Saved results for blood-transfusion-service-center

✓ Completed blood-transfusion-service-center (11/30)

################################################################################
DATASET 12/30: vowel
################################################################################
Loading processed dataset from cache: vowel
Using device: cuda
Dataset: vowel
  Train: (693, 27), Test: (297, 27)
  Features: 27, Classes: 11
✓ Data loaded to cuda

EXPERIMENT: vowel

Starting hyperparameter optimization...
  Trials: 50, CV Folds: 10


[I 2025-11-27 05:27:15,326] Trial 0 finished with value: 0.38681356691016516 and parameters: {'d': 64, 'd_hidden_factor': 3.8778758791422523, 'n_layers': 5, 'hidden_dropout': 0.49038209919230774, 'residual_dropout': 0.34241486929243165, 'learning_rate': 0.00027720158198153483, 'batch_size': 128, 'epochs': 40}. Best is trial 0 with value: 0.38681356691016516.
[I 2025-11-27 05:27:43,404] Trial 1 finished with value: 0.9715803877381827 and parameters: {'d': 128, 'd_hidden_factor': 3.1262054953673535, 'n_layers': 6, 'hidden_dropout': 0.31720047927566053, 'residual_dropout': 0.4247158970388948, 'learning_rate': 0.0014906166728108333, 'batch_size': 64, 'epochs': 69}. Best is trial 1 with value: 0.9715803877381827.
[I 2025-11-27 05:28:02,011] Trial 2 finished with value: 0.97918806590052 and parameters: {'d': 128, 'd_hidden_factor': 2.723451053318575, 'n_layers': 5, 'hidden_dropout': 0.212915145147914, 'residual_dropout': 0.15613061148623264, 'learning_rate': 0.0001901314327577386, 'batch_siz


✓ Optimization complete! Time: 2328.04s
  Best CV G-Mean: 0.9923
  Best parameters:
    d: 512
    d_hidden_factor: 1.9998262957693116
    n_layers: 10
    hidden_dropout: 0.19766733059471134
    residual_dropout: 0.2769147976227846
    learning_rate: 0.00015447176259137893
    batch_size: 128
    epochs: 170

Training final model...
    Epoch 10/170: Train Loss = 0.5046, Val Loss = 0.4654
    Epoch 20/170: Train Loss = 0.1569, Val Loss = 0.2062
    Epoch 30/170: Train Loss = 0.0860, Val Loss = 0.0995
    Epoch 40/170: Train Loss = 0.0619, Val Loss = 0.1226


[I 2025-11-27 06:05:58,647] A new study created in memory with name: no-name-d053f468-1442-4bb0-b559-f03345312ffe


    Early stopping at epoch 45
✓ Training complete! Time: 2.35s

Evaluating on test set...

RESULTS FOR vowel
Accuracy:        0.9697
AUC OVO:         0.9990
G-Mean:          0.9684
Cross-Entropy:   0.0999
✓ Saved results for vowel

✓ Completed vowel (12/30)

################################################################################
DATASET 13/30: semeion
################################################################################
Loading processed dataset from cache: semeion
Using device: cuda
Dataset: semeion
  Train: (1115, 256), Test: (478, 256)
  Features: 256, Classes: 10
✓ Data loaded to cuda

EXPERIMENT: semeion

Starting hyperparameter optimization...
  Trials: 50, CV Folds: 10


[I 2025-11-27 06:06:10,400] Trial 0 finished with value: 0.9145440530542276 and parameters: {'d': 64, 'd_hidden_factor': 3.8778758791422523, 'n_layers': 5, 'hidden_dropout': 0.49038209919230774, 'residual_dropout': 0.34241486929243165, 'learning_rate': 0.00027720158198153483, 'batch_size': 128, 'epochs': 40}. Best is trial 0 with value: 0.9145440530542276.
[I 2025-11-27 06:06:32,860] Trial 1 finished with value: 0.9170983560125634 and parameters: {'d': 128, 'd_hidden_factor': 3.1262054953673535, 'n_layers': 6, 'hidden_dropout': 0.31720047927566053, 'residual_dropout': 0.4247158970388948, 'learning_rate': 0.0014906166728108333, 'batch_size': 64, 'epochs': 69}. Best is trial 1 with value: 0.9170983560125634.
[I 2025-11-27 06:07:02,442] Trial 2 finished with value: 0.9271567796022543 and parameters: {'d': 128, 'd_hidden_factor': 2.723451053318575, 'n_layers': 5, 'hidden_dropout': 0.212915145147914, 'residual_dropout': 0.15613061148623264, 'learning_rate': 0.0001901314327577386, 'batch_siz


✓ Optimization complete! Time: 6035.20s
  Best CV G-Mean: 0.9487
  Best parameters:
    d: 512
    d_hidden_factor: 4.635623977297499
    n_layers: 9
    hidden_dropout: 0.14728268549306744
    residual_dropout: 0.022776439070110174
    learning_rate: 3.6930431163502905e-05
    batch_size: 256
    epochs: 170

Training final model...
    Epoch 10/170: Train Loss = 0.3291, Val Loss = 0.4738
    Epoch 20/170: Train Loss = 0.1093, Val Loss = 0.3726
    Epoch 30/170: Train Loss = 0.0541, Val Loss = 0.3325
    Epoch 40/170: Train Loss = 0.0347, Val Loss = 0.3228
    Epoch 50/170: Train Loss = 0.0244, Val Loss = 0.3247
    Epoch 60/170: Train Loss = 0.0196, Val Loss = 0.3151
    Epoch 70/170: Train Loss = 0.0150, Val Loss = 0.3119
    Epoch 80/170: Train Loss = 0.0133, Val Loss = 0.3126
    Epoch 90/170: Train Loss = 0.0110, Val Loss = 0.3109
    Epoch 100/170: Train Loss = 0.0099, Val Loss = 0.3092


[I 2025-11-27 07:46:45,484] A new study created in memory with name: no-name-92226afa-b39d-4334-9efa-3bbc1a0b029f


    Epoch 110/170: Train Loss = 0.0085, Val Loss = 0.3120
    Early stopping at epoch 111
✓ Training complete! Time: 11.57s

Evaluating on test set...

RESULTS FOR semeion
Accuracy:        0.9289
AUC OVO:         0.9968
G-Mean:          0.9268
Cross-Entropy:   0.2291
✓ Saved results for semeion

✓ Completed semeion (13/30)

################################################################################
DATASET 14/30: credit-g
################################################################################
Loading processed dataset from cache: credit-g
Using device: cuda
Dataset: credit-g
  Train: (700, 61), Test: (300, 61)
  Features: 61, Classes: 2
✓ Data loaded to cuda

EXPERIMENT: credit-g

Starting hyperparameter optimization...
  Trials: 50, CV Folds: 10


[I 2025-11-27 07:46:52,116] Trial 0 finished with value: 0.6818661095184958 and parameters: {'d': 64, 'd_hidden_factor': 3.8778758791422523, 'n_layers': 5, 'hidden_dropout': 0.49038209919230774, 'residual_dropout': 0.34241486929243165, 'learning_rate': 0.00027720158198153483, 'batch_size': 128, 'epochs': 40}. Best is trial 0 with value: 0.6818661095184958.
[I 2025-11-27 07:46:59,857] Trial 1 finished with value: 0.6535302669534927 and parameters: {'d': 128, 'd_hidden_factor': 3.1262054953673535, 'n_layers': 6, 'hidden_dropout': 0.31720047927566053, 'residual_dropout': 0.4247158970388948, 'learning_rate': 0.0014906166728108333, 'batch_size': 64, 'epochs': 69}. Best is trial 0 with value: 0.6818661095184958.
[I 2025-11-27 07:47:09,507] Trial 2 finished with value: 0.6776187314759733 and parameters: {'d': 128, 'd_hidden_factor': 2.723451053318575, 'n_layers': 5, 'hidden_dropout': 0.212915145147914, 'residual_dropout': 0.15613061148623264, 'learning_rate': 0.0001901314327577386, 'batch_siz


✓ Optimization complete! Time: 896.12s
  Best CV G-Mean: 0.7091
  Best parameters:
    d: 512
    d_hidden_factor: 3.531048947593829
    n_layers: 3
    hidden_dropout: 0.4290806649523898
    residual_dropout: 0.08855270546440934
    learning_rate: 2.280785114165716e-05
    batch_size: 128
    epochs: 77

Training final model...
    Epoch 10/77: Train Loss = 0.5384, Val Loss = 0.5792
    Epoch 20/77: Train Loss = 0.4330, Val Loss = 0.5505
    Epoch 30/77: Train Loss = 0.3631, Val Loss = 0.5486


[I 2025-11-27 08:01:42,796] A new study created in memory with name: no-name-9f1a5ddc-cb8f-4690-942d-b2b2cd2ba6c7


    Epoch 40/77: Train Loss = 0.3234, Val Loss = 0.5755
    Early stopping at epoch 44
✓ Training complete! Time: 1.18s

Evaluating on test set...

RESULTS FOR credit-g
Accuracy:        0.7267
AUC OVO:         0.7611
G-Mean:          0.6667
Cross-Entropy:   0.5552
✓ Saved results for credit-g

✓ Completed credit-g (14/30)

################################################################################
DATASET 15/30: kc2
################################################################################
Loading processed dataset from cache: kc2
Using device: cuda
Dataset: kc2
  Train: (365, 21), Test: (157, 21)
  Features: 21, Classes: 2
✓ Data loaded to cuda

EXPERIMENT: kc2

Starting hyperparameter optimization...
  Trials: 50, CV Folds: 10


[I 2025-11-27 08:01:46,606] Trial 0 finished with value: 0.6577220711028889 and parameters: {'d': 64, 'd_hidden_factor': 3.8778758791422523, 'n_layers': 5, 'hidden_dropout': 0.49038209919230774, 'residual_dropout': 0.34241486929243165, 'learning_rate': 0.00027720158198153483, 'batch_size': 128, 'epochs': 40}. Best is trial 0 with value: 0.6577220711028889.
[I 2025-11-27 08:01:53,617] Trial 1 finished with value: 0.7045497150818978 and parameters: {'d': 128, 'd_hidden_factor': 3.1262054953673535, 'n_layers': 6, 'hidden_dropout': 0.31720047927566053, 'residual_dropout': 0.4247158970388948, 'learning_rate': 0.0014906166728108333, 'batch_size': 64, 'epochs': 69}. Best is trial 1 with value: 0.7045497150818978.
[I 2025-11-27 08:02:02,107] Trial 2 finished with value: 0.6559714502120249 and parameters: {'d': 128, 'd_hidden_factor': 2.723451053318575, 'n_layers': 5, 'hidden_dropout': 0.212915145147914, 'residual_dropout': 0.15613061148623264, 'learning_rate': 0.0001901314327577386, 'batch_siz


✓ Optimization complete! Time: 470.93s
  Best CV G-Mean: 0.7402
  Best parameters:
    d: 128
    d_hidden_factor: 3.9221800273414082
    n_layers: 10
    hidden_dropout: 0.10670355338042883
    residual_dropout: 0.23525010738364932
    learning_rate: 0.003514908764779207
    batch_size: 256
    epochs: 137

Training final model...
    Epoch 10/137: Train Loss = 0.3574, Val Loss = 0.2481
    Epoch 20/137: Train Loss = 0.3039, Val Loss = 0.2617
    Epoch 30/137: Train Loss = 0.2512, Val Loss = 0.3258
    Epoch 40/137: Train Loss = 0.2206, Val Loss = 0.3252


[I 2025-11-27 08:09:34,146] A new study created in memory with name: no-name-39c0dd9b-4bab-4520-895e-75b14d29ff0f


    Early stopping at epoch 41
✓ Training complete! Time: 0.41s

Evaluating on test set...

RESULTS FOR kc2
Accuracy:        0.8344
AUC OVO:         0.7981
G-Mean:          0.6782
Cross-Entropy:   0.5290
✓ Saved results for kc2

✓ Completed kc2 (15/30)

################################################################################
DATASET 16/30: tic-tac-toe
################################################################################
Loading processed dataset from cache: tic-tac-toe
Using device: cuda
Dataset: tic-tac-toe
  Train: (670, 27), Test: (288, 27)
  Features: 27, Classes: 2
✓ Data loaded to cuda

EXPERIMENT: tic-tac-toe

Starting hyperparameter optimization...
  Trials: 50, CV Folds: 10


[I 2025-11-27 08:09:41,104] Trial 0 finished with value: 0.9018663654372185 and parameters: {'d': 64, 'd_hidden_factor': 3.8778758791422523, 'n_layers': 5, 'hidden_dropout': 0.49038209919230774, 'residual_dropout': 0.34241486929243165, 'learning_rate': 0.00027720158198153483, 'batch_size': 128, 'epochs': 40}. Best is trial 0 with value: 0.9018663654372185.
[I 2025-11-27 08:09:59,190] Trial 1 finished with value: 0.9766105645113601 and parameters: {'d': 128, 'd_hidden_factor': 3.1262054953673535, 'n_layers': 6, 'hidden_dropout': 0.31720047927566053, 'residual_dropout': 0.4247158970388948, 'learning_rate': 0.0014906166728108333, 'batch_size': 64, 'epochs': 69}. Best is trial 1 with value: 0.9766105645113601.
[I 2025-11-27 08:10:17,749] Trial 2 finished with value: 0.9734200262800025 and parameters: {'d': 128, 'd_hidden_factor': 2.723451053318575, 'n_layers': 5, 'hidden_dropout': 0.212915145147914, 'residual_dropout': 0.15613061148623264, 'learning_rate': 0.0001901314327577386, 'batch_siz


✓ Optimization complete! Time: 1036.84s
  Best CV G-Mean: 0.9913
  Best parameters:
    d: 512
    d_hidden_factor: 4.892228722678056
    n_layers: 3
    hidden_dropout: 0.04557881882413451
    residual_dropout: 0.03936207928860028
    learning_rate: 0.0008021706379699085
    batch_size: 128
    epochs: 159

Training final model...
    Epoch 10/159: Train Loss = 0.0108, Val Loss = 0.0666
    Epoch 20/159: Train Loss = 0.0012, Val Loss = 0.0239
    Epoch 30/159: Train Loss = 0.0005, Val Loss = 0.0164
    Epoch 40/159: Train Loss = 0.0003, Val Loss = 0.0117
    Epoch 50/159: Train Loss = 0.0003, Val Loss = 0.0082
    Epoch 60/159: Train Loss = 0.0002, Val Loss = 0.0132


[I 2025-11-27 08:26:53,440] A new study created in memory with name: no-name-f052cb9f-4821-4b22-aa73-0519d84ead30


    Epoch 70/159: Train Loss = 0.0001, Val Loss = 0.0097
    Early stopping at epoch 70
✓ Training complete! Time: 2.44s

Evaluating on test set...

RESULTS FOR tic-tac-toe
Accuracy:        0.9965
AUC OVO:         0.9999
G-Mean:          0.9950
Cross-Entropy:   0.0145
✓ Saved results for tic-tac-toe

✓ Completed tic-tac-toe (16/30)

################################################################################
DATASET 17/30: credit-approval
################################################################################
Loading processed dataset from cache: credit-approval
Using device: cuda
Dataset: credit-approval
  Train: (483, 46), Test: (207, 46)
  Features: 46, Classes: 2
✓ Data loaded to cuda

EXPERIMENT: credit-approval

Starting hyperparameter optimization...
  Trials: 50, CV Folds: 10


[I 2025-11-27 08:26:58,850] Trial 0 finished with value: 0.8658239465250965 and parameters: {'d': 64, 'd_hidden_factor': 3.8778758791422523, 'n_layers': 5, 'hidden_dropout': 0.49038209919230774, 'residual_dropout': 0.34241486929243165, 'learning_rate': 0.00027720158198153483, 'batch_size': 128, 'epochs': 40}. Best is trial 0 with value: 0.8658239465250965.
[I 2025-11-27 08:27:04,788] Trial 1 finished with value: 0.8572721186486936 and parameters: {'d': 128, 'd_hidden_factor': 3.1262054953673535, 'n_layers': 6, 'hidden_dropout': 0.31720047927566053, 'residual_dropout': 0.4247158970388948, 'learning_rate': 0.0014906166728108333, 'batch_size': 64, 'epochs': 69}. Best is trial 0 with value: 0.8658239465250965.
[I 2025-11-27 08:27:12,865] Trial 2 finished with value: 0.8565016239703278 and parameters: {'d': 128, 'd_hidden_factor': 2.723451053318575, 'n_layers': 5, 'hidden_dropout': 0.212915145147914, 'residual_dropout': 0.15613061148623264, 'learning_rate': 0.0001901314327577386, 'batch_siz


✓ Optimization complete! Time: 1577.42s
  Best CV G-Mean: 0.8777
  Best parameters:
    d: 256
    d_hidden_factor: 2.9321370570508174
    n_layers: 10
    hidden_dropout: 0.25974255962990467
    residual_dropout: 0.30644726288148383
    learning_rate: 2.300837769659881e-05
    batch_size: 32
    epochs: 82

Training final model...
    Epoch 10/82: Train Loss = 0.3486, Val Loss = 0.3393
    Epoch 20/82: Train Loss = 0.3226, Val Loss = 0.2894
    Epoch 30/82: Train Loss = 0.2430, Val Loss = 0.2555
    Epoch 40/82: Train Loss = 0.2738, Val Loss = 0.2233
    Epoch 50/82: Train Loss = 0.2371, Val Loss = 0.2125
    Epoch 60/82: Train Loss = 0.2059, Val Loss = 0.2060
    Epoch 70/82: Train Loss = 0.2042, Val Loss = 0.1969


[I 2025-11-27 08:53:18,902] A new study created in memory with name: no-name-42a1d912-872e-4086-971c-057476ce1380


    Early stopping at epoch 73
✓ Training complete! Time: 8.03s

Evaluating on test set...

RESULTS FOR credit-approval
Accuracy:        0.8357
AUC OVO:         0.9169
G-Mean:          0.8335
Cross-Entropy:   0.3980
✓ Saved results for credit-approval

✓ Completed credit-approval (17/30)

################################################################################
DATASET 18/30: mfeat-factors
################################################################################
Loading processed dataset from cache: mfeat-factors
Using device: cuda
Dataset: mfeat-factors
  Train: (1400, 216), Test: (600, 216)
  Features: 216, Classes: 10
✓ Data loaded to cuda

EXPERIMENT: mfeat-factors

Starting hyperparameter optimization...
  Trials: 50, CV Folds: 10


[I 2025-11-27 08:53:33,948] Trial 0 finished with value: 0.9708687531957221 and parameters: {'d': 64, 'd_hidden_factor': 3.8778758791422523, 'n_layers': 5, 'hidden_dropout': 0.49038209919230774, 'residual_dropout': 0.34241486929243165, 'learning_rate': 0.00027720158198153483, 'batch_size': 128, 'epochs': 40}. Best is trial 0 with value: 0.9708687531957221.
[I 2025-11-27 08:54:06,373] Trial 1 finished with value: 0.9763552709512002 and parameters: {'d': 128, 'd_hidden_factor': 3.1262054953673535, 'n_layers': 6, 'hidden_dropout': 0.31720047927566053, 'residual_dropout': 0.4247158970388948, 'learning_rate': 0.0014906166728108333, 'batch_size': 64, 'epochs': 69}. Best is trial 1 with value: 0.9763552709512002.
[I 2025-11-27 08:54:44,763] Trial 2 finished with value: 0.981542869426416 and parameters: {'d': 128, 'd_hidden_factor': 2.723451053318575, 'n_layers': 5, 'hidden_dropout': 0.212915145147914, 'residual_dropout': 0.15613061148623264, 'learning_rate': 0.0001901314327577386, 'batch_size


✓ Optimization complete! Time: 6063.92s
  Best CV G-Mean: 0.9853
  Best parameters:
    d: 256
    d_hidden_factor: 2.3692840774683015
    n_layers: 6
    hidden_dropout: 0.3284770462087821
    residual_dropout: 0.2556897797211803
    learning_rate: 9.716444547420922e-05
    batch_size: 32
    epochs: 107

Training final model...
    Epoch 10/107: Train Loss = 0.1473, Val Loss = 0.1855
    Epoch 20/107: Train Loss = 0.0490, Val Loss = 0.1662
    Epoch 30/107: Train Loss = 0.0336, Val Loss = 0.1463
    Epoch 40/107: Train Loss = 0.0217, Val Loss = 0.1331


[I 2025-11-27 10:34:32,797] A new study created in memory with name: no-name-49cdb593-a17c-45c9-ba51-e902c6a8b85e


    Early stopping at epoch 49
✓ Training complete! Time: 9.93s

Evaluating on test set...

RESULTS FOR mfeat-factors
Accuracy:        0.9783
AUC OVO:         0.9992
G-Mean:          0.9780
Cross-Entropy:   0.0764
✓ Saved results for mfeat-factors

✓ Completed mfeat-factors (18/30)

################################################################################
DATASET 19/30: steel-plates-fault
################################################################################
Loading processed dataset from cache: steel-plates-fault
Using device: cuda
Dataset: steel-plates-fault
  Train: (1358, 27), Test: (583, 27)
  Features: 27, Classes: 7
✓ Data loaded to cuda

EXPERIMENT: steel-plates-fault

Starting hyperparameter optimization...
  Trials: 50, CV Folds: 10


[I 2025-11-27 10:34:47,904] Trial 0 finished with value: 0.2819697622028472 and parameters: {'d': 64, 'd_hidden_factor': 3.8778758791422523, 'n_layers': 5, 'hidden_dropout': 0.49038209919230774, 'residual_dropout': 0.34241486929243165, 'learning_rate': 0.00027720158198153483, 'batch_size': 128, 'epochs': 40}. Best is trial 0 with value: 0.2819697622028472.
[I 2025-11-27 10:35:18,677] Trial 1 finished with value: 0.7652942150274382 and parameters: {'d': 128, 'd_hidden_factor': 3.1262054953673535, 'n_layers': 6, 'hidden_dropout': 0.31720047927566053, 'residual_dropout': 0.4247158970388948, 'learning_rate': 0.0014906166728108333, 'batch_size': 64, 'epochs': 69}. Best is trial 1 with value: 0.7652942150274382.
[I 2025-11-27 10:35:55,459] Trial 2 finished with value: 0.7739187315506323 and parameters: {'d': 128, 'd_hidden_factor': 2.723451053318575, 'n_layers': 5, 'hidden_dropout': 0.212915145147914, 'residual_dropout': 0.15613061148623264, 'learning_rate': 0.0001901314327577386, 'batch_siz


✓ Optimization complete! Time: 2177.64s
  Best CV G-Mean: 0.7804
  Best parameters:
    d: 256
    d_hidden_factor: 3.9242921433782283
    n_layers: 2
    hidden_dropout: 0.30034928391679494
    residual_dropout: 0.4329322291516323
    learning_rate: 0.008924108207819248
    batch_size: 256
    epochs: 123

Training final model...
    Epoch 10/123: Train Loss = 0.5490, Val Loss = 0.6852


[I 2025-11-27 11:10:50,809] A new study created in memory with name: no-name-a75ae8b0-2ce9-4e99-8216-bcf743eda624


    Epoch 20/123: Train Loss = 0.4658, Val Loss = 0.6155
    Early stopping at epoch 27
✓ Training complete! Time: 0.34s

Evaluating on test set...

RESULTS FOR steel-plates-fault
Accuracy:        0.7479
AUC OVO:         0.9540
G-Mean:          0.8028
Cross-Entropy:   0.7581
✓ Saved results for steel-plates-fault

✓ Completed steel-plates-fault (19/30)

################################################################################
DATASET 20/30: wdbc
################################################################################
Loading processed dataset from cache: wdbc
Using device: cuda
Dataset: wdbc
  Train: (398, 30), Test: (171, 30)
  Features: 30, Classes: 2
✓ Data loaded to cuda

EXPERIMENT: wdbc

Starting hyperparameter optimization...
  Trials: 50, CV Folds: 10


[I 2025-11-27 11:10:54,746] Trial 0 finished with value: 0.9748143661185684 and parameters: {'d': 64, 'd_hidden_factor': 3.8778758791422523, 'n_layers': 5, 'hidden_dropout': 0.49038209919230774, 'residual_dropout': 0.34241486929243165, 'learning_rate': 0.00027720158198153483, 'batch_size': 128, 'epochs': 40}. Best is trial 0 with value: 0.9748143661185684.
[I 2025-11-27 11:11:03,799] Trial 1 finished with value: 0.9697906892984761 and parameters: {'d': 128, 'd_hidden_factor': 3.1262054953673535, 'n_layers': 6, 'hidden_dropout': 0.31720047927566053, 'residual_dropout': 0.4247158970388948, 'learning_rate': 0.0014906166728108333, 'batch_size': 64, 'epochs': 69}. Best is trial 0 with value: 0.9748143661185684.
[I 2025-11-27 11:11:14,086] Trial 2 finished with value: 0.9705914859133452 and parameters: {'d': 128, 'd_hidden_factor': 2.723451053318575, 'n_layers': 5, 'hidden_dropout': 0.212915145147914, 'residual_dropout': 0.15613061148623264, 'learning_rate': 0.0001901314327577386, 'batch_siz


✓ Optimization complete! Time: 906.97s
  Best CV G-Mean: 0.9844
  Best parameters:
    d: 256
    d_hidden_factor: 3.679472268120411
    n_layers: 3
    hidden_dropout: 0.2890784603364066
    residual_dropout: 0.4083359142322675
    learning_rate: 0.0039888567934246826
    batch_size: 64
    epochs: 132

Training final model...
    Epoch 10/132: Train Loss = 0.0600, Val Loss = 0.0121


[I 2025-11-27 11:25:58,154] A new study created in memory with name: no-name-d4bce46a-7a78-4e34-bcd2-19053ba9c62e


    Epoch 20/132: Train Loss = 0.0341, Val Loss = 0.0197
    Early stopping at epoch 20
✓ Training complete! Time: 0.36s

Evaluating on test set...

RESULTS FOR wdbc
Accuracy:        0.9591
AUC OVO:         0.9896
G-Mean:          0.9579
Cross-Entropy:   0.1355
✓ Saved results for wdbc

✓ Completed wdbc (20/30)

################################################################################
DATASET 21/30: climate-model-simulation-crashes
################################################################################
Loading processed dataset from cache: climate-model-simulation-crashes
Using device: cuda
Dataset: climate-model-simulation-crashes
  Train: (378, 18), Test: (162, 18)
  Features: 18, Classes: 2
✓ Data loaded to cuda

EXPERIMENT: climate-model-simulation-crashes

Starting hyperparameter optimization...
  Trials: 50, CV Folds: 10


[I 2025-11-27 11:26:02,006] Trial 0 finished with value: 0.7974245415041891 and parameters: {'d': 64, 'd_hidden_factor': 3.8778758791422523, 'n_layers': 5, 'hidden_dropout': 0.49038209919230774, 'residual_dropout': 0.34241486929243165, 'learning_rate': 0.00027720158198153483, 'batch_size': 128, 'epochs': 40}. Best is trial 0 with value: 0.7974245415041891.
[I 2025-11-27 11:26:11,003] Trial 1 finished with value: 0.6541101102640742 and parameters: {'d': 128, 'd_hidden_factor': 3.1262054953673535, 'n_layers': 6, 'hidden_dropout': 0.31720047927566053, 'residual_dropout': 0.4247158970388948, 'learning_rate': 0.0014906166728108333, 'batch_size': 64, 'epochs': 69}. Best is trial 0 with value: 0.7974245415041891.
[I 2025-11-27 11:26:21,505] Trial 2 finished with value: 0.7848882665640848 and parameters: {'d': 128, 'd_hidden_factor': 2.723451053318575, 'n_layers': 5, 'hidden_dropout': 0.212915145147914, 'residual_dropout': 0.15613061148623264, 'learning_rate': 0.0001901314327577386, 'batch_siz


✓ Optimization complete! Time: 467.07s
  Best CV G-Mean: 0.8937
  Best parameters:
    d: 512
    d_hidden_factor: 1.445023114555651
    n_layers: 3
    hidden_dropout: 0.06863452696561956
    residual_dropout: 0.4171196735446927
    learning_rate: 0.0001922883998359619
    batch_size: 256
    epochs: 59

Training final model...
    Epoch 10/59: Train Loss = 0.3087, Val Loss = 0.3669
    Epoch 20/59: Train Loss = 0.1437, Val Loss = 0.2657
    Epoch 30/59: Train Loss = 0.0722, Val Loss = 0.1589


[I 2025-11-27 11:33:45,589] A new study created in memory with name: no-name-1f7765fd-7597-4eef-8b32-4700b49ff84e


    Epoch 40/59: Train Loss = 0.0447, Val Loss = 0.1088
    Epoch 50/59: Train Loss = 0.0314, Val Loss = 0.0858
✓ Training complete! Time: 0.35s

Evaluating on test set...

RESULTS FOR climate-model-simulation-crashes
Accuracy:        0.8951
AUC OVO:         0.9102
G-Mean:          0.8434
Cross-Entropy:   0.3137
✓ Saved results for climate-model-simulation-crashes

✓ Completed climate-model-simulation-crashes (21/30)

################################################################################
DATASET 22/30: analcatdata_dmft
################################################################################
Loading processed dataset from cache: analcatdata_dmft
Using device: cuda
Dataset: analcatdata_dmft
  Train: (462, 7), Test: (199, 7)
  Features: 7, Classes: 5
✓ Data loaded to cuda

EXPERIMENT: analcatdata_dmft

Starting hyperparameter optimization...
  Trials: 50, CV Folds: 10


[I 2025-11-27 11:33:50,348] Trial 0 finished with value: 0.15769268867733985 and parameters: {'d': 64, 'd_hidden_factor': 3.8778758791422523, 'n_layers': 5, 'hidden_dropout': 0.49038209919230774, 'residual_dropout': 0.34241486929243165, 'learning_rate': 0.00027720158198153483, 'batch_size': 128, 'epochs': 40}. Best is trial 0 with value: 0.15769268867733985.
[I 2025-11-27 11:33:55,944] Trial 1 finished with value: 0.12462427641552247 and parameters: {'d': 128, 'd_hidden_factor': 3.1262054953673535, 'n_layers': 6, 'hidden_dropout': 0.31720047927566053, 'residual_dropout': 0.4247158970388948, 'learning_rate': 0.0014906166728108333, 'batch_size': 64, 'epochs': 69}. Best is trial 0 with value: 0.15769268867733985.
[I 2025-11-27 11:34:01,304] Trial 2 finished with value: 0.09401563314565238 and parameters: {'d': 128, 'd_hidden_factor': 2.723451053318575, 'n_layers': 5, 'hidden_dropout': 0.212915145147914, 'residual_dropout': 0.15613061148623264, 'learning_rate': 0.0001901314327577386, 'batc


✓ Optimization complete! Time: 420.63s
  Best CV G-Mean: 0.2006
  Best parameters:
    d: 512
    d_hidden_factor: 3.5625424706553064
    n_layers: 4
    hidden_dropout: 0.11666435986352208
    residual_dropout: 0.23164147899615622
    learning_rate: 0.00018288138471389106
    batch_size: 64
    epochs: 86

Training final model...
    Epoch 10/86: Train Loss = 1.4190, Val Loss = 1.8750


[I 2025-11-27 11:40:46,993] A new study created in memory with name: no-name-6242da9b-6b25-4fb4-9514-5f45fc884bab


    Early stopping at epoch 16
✓ Training complete! Time: 0.75s

Evaluating on test set...

RESULTS FOR analcatdata_dmft
Accuracy:        0.2312
AUC OVO:         0.5561
G-Mean:          0.2270
Cross-Entropy:   1.7538
✓ Saved results for analcatdata_dmft

✓ Completed analcatdata_dmft (22/30)

################################################################################
DATASET 23/30: ilpd
################################################################################
Loading processed dataset from cache: ilpd
Using device: cuda
Dataset: ilpd
  Train: (408, 11), Test: (175, 11)
  Features: 11, Classes: 2
✓ Data loaded to cuda

EXPERIMENT: ilpd

Starting hyperparameter optimization...
  Trials: 50, CV Folds: 10


[I 2025-11-27 11:40:50,676] Trial 0 finished with value: 0.5438684519231419 and parameters: {'d': 64, 'd_hidden_factor': 3.8778758791422523, 'n_layers': 5, 'hidden_dropout': 0.49038209919230774, 'residual_dropout': 0.34241486929243165, 'learning_rate': 0.00027720158198153483, 'batch_size': 128, 'epochs': 40}. Best is trial 0 with value: 0.5438684519231419.
[I 2025-11-27 11:40:57,823] Trial 1 finished with value: 0.4570219981551048 and parameters: {'d': 128, 'd_hidden_factor': 3.1262054953673535, 'n_layers': 6, 'hidden_dropout': 0.31720047927566053, 'residual_dropout': 0.4247158970388948, 'learning_rate': 0.0014906166728108333, 'batch_size': 64, 'epochs': 69}. Best is trial 0 with value: 0.5438684519231419.
[I 2025-11-27 11:41:05,911] Trial 2 finished with value: 0.4807621203707889 and parameters: {'d': 128, 'd_hidden_factor': 2.723451053318575, 'n_layers': 5, 'hidden_dropout': 0.212915145147914, 'residual_dropout': 0.15613061148623264, 'learning_rate': 0.0001901314327577386, 'batch_siz


✓ Optimization complete! Time: 512.77s
  Best CV G-Mean: 0.6142
  Best parameters:
    d: 512
    d_hidden_factor: 1.795851333002123
    n_layers: 8
    hidden_dropout: 0.3510491642133454
    residual_dropout: 0.2280213953889534
    learning_rate: 0.002883879979938879
    batch_size: 256
    epochs: 193

Training final model...
    Epoch 10/193: Train Loss = 0.5241, Val Loss = 1.3635


[I 2025-11-27 11:49:20,055] A new study created in memory with name: no-name-f1f95078-bdc7-4fb2-8e46-3b354ccb2e93


    Early stopping at epoch 16
✓ Training complete! Time: 0.27s

Evaluating on test set...

RESULTS FOR ilpd
Accuracy:        0.6800
AUC OVO:         0.8005
G-Mean:          0.7018
Cross-Entropy:   0.5278
✓ Saved results for ilpd

✓ Completed ilpd (23/30)

################################################################################
DATASET 24/30: cmc
################################################################################
Loading processed dataset from cache: cmc
Using device: cuda
Dataset: cmc
  Train: (1031, 9), Test: (442, 9)
  Features: 9, Classes: 3
✓ Data loaded to cuda

EXPERIMENT: cmc

Starting hyperparameter optimization...
  Trials: 50, CV Folds: 10


[I 2025-11-27 11:49:31,768] Trial 0 finished with value: 0.5540176875081935 and parameters: {'d': 64, 'd_hidden_factor': 3.8778758791422523, 'n_layers': 5, 'hidden_dropout': 0.49038209919230774, 'residual_dropout': 0.34241486929243165, 'learning_rate': 0.00027720158198153483, 'batch_size': 128, 'epochs': 40}. Best is trial 0 with value: 0.5540176875081935.
[I 2025-11-27 11:49:49,184] Trial 1 finished with value: 0.551519852542982 and parameters: {'d': 128, 'd_hidden_factor': 3.1262054953673535, 'n_layers': 6, 'hidden_dropout': 0.31720047927566053, 'residual_dropout': 0.4247158970388948, 'learning_rate': 0.0014906166728108333, 'batch_size': 64, 'epochs': 69}. Best is trial 0 with value: 0.5540176875081935.
[I 2025-11-27 11:50:07,857] Trial 2 finished with value: 0.5402436753447036 and parameters: {'d': 128, 'd_hidden_factor': 2.723451053318575, 'n_layers': 5, 'hidden_dropout': 0.212915145147914, 'residual_dropout': 0.15613061148623264, 'learning_rate': 0.0001901314327577386, 'batch_size


✓ Optimization complete! Time: 924.60s
  Best CV G-Mean: 0.5641
  Best parameters:
    d: 512
    d_hidden_factor: 1.393822117101131
    n_layers: 8
    hidden_dropout: 0.0070454647817293735
    residual_dropout: 0.49603173000665035
    learning_rate: 0.00888036766804695
    batch_size: 256
    epochs: 198

Training final model...
    Epoch 10/198: Train Loss = 0.8636, Val Loss = 0.9030


[I 2025-11-27 12:04:45,575] A new study created in memory with name: no-name-dde7da60-63cf-47c2-aa5e-08db55617cc2


    Epoch 20/198: Train Loss = 0.8611, Val Loss = 0.9522
    Early stopping at epoch 25
✓ Training complete! Time: 0.90s

Evaluating on test set...

RESULTS FOR cmc
Accuracy:        0.5385
AUC OVO:         0.7192
G-Mean:          0.5284
Cross-Entropy:   0.9841
✓ Saved results for cmc

✓ Completed cmc (24/30)

################################################################################
DATASET 25/30: cylinder-bands
################################################################################
Loading processed dataset from cache: cylinder-bands
Using device: cuda
Dataset: cylinder-bands
  Train: (378, 119), Test: (162, 119)
  Features: 119, Classes: 2
✓ Data loaded to cuda

EXPERIMENT: cylinder-bands

Starting hyperparameter optimization...
  Trials: 50, CV Folds: 10


[I 2025-11-27 12:04:49,193] Trial 0 finished with value: 0.6899513459810391 and parameters: {'d': 64, 'd_hidden_factor': 3.8778758791422523, 'n_layers': 5, 'hidden_dropout': 0.49038209919230774, 'residual_dropout': 0.34241486929243165, 'learning_rate': 0.00027720158198153483, 'batch_size': 128, 'epochs': 40}. Best is trial 0 with value: 0.6899513459810391.
[I 2025-11-27 12:04:55,238] Trial 1 finished with value: 0.7728282911648879 and parameters: {'d': 128, 'd_hidden_factor': 3.1262054953673535, 'n_layers': 6, 'hidden_dropout': 0.31720047927566053, 'residual_dropout': 0.4247158970388948, 'learning_rate': 0.0014906166728108333, 'batch_size': 64, 'epochs': 69}. Best is trial 1 with value: 0.7728282911648879.
[I 2025-11-27 12:05:03,325] Trial 2 finished with value: 0.7392800598765724 and parameters: {'d': 128, 'd_hidden_factor': 2.723451053318575, 'n_layers': 5, 'hidden_dropout': 0.212915145147914, 'residual_dropout': 0.15613061148623264, 'learning_rate': 0.0001901314327577386, 'batch_siz


✓ Optimization complete! Time: 779.54s
  Best CV G-Mean: 0.8065
  Best parameters:
    d: 512
    d_hidden_factor: 4.560107548527225
    n_layers: 9
    hidden_dropout: 0.4597693182168862
    residual_dropout: 0.0033274404135525426
    learning_rate: 4.596810624149129e-05
    batch_size: 128
    epochs: 174

Training final model...
    Epoch 10/174: Train Loss = 0.4801, Val Loss = 0.5887
    Epoch 20/174: Train Loss = 0.2690, Val Loss = 0.5812


[I 2025-11-27 12:17:46,720] A new study created in memory with name: no-name-7d2c4e91-b576-44da-984b-de2fc85999f6


    Epoch 30/174: Train Loss = 0.1445, Val Loss = 0.6926
    Early stopping at epoch 30
✓ Training complete! Time: 1.58s

Evaluating on test set...

RESULTS FOR cylinder-bands
Accuracy:        0.7407
AUC OVO:         0.8267
G-Mean:          0.7495
Cross-Entropy:   0.6391
✓ Saved results for cylinder-bands

✓ Completed cylinder-bands (25/30)

################################################################################
DATASET 26/30: eucalyptus
################################################################################
Loading processed dataset from cache: eucalyptus
Using device: cuda
Dataset: eucalyptus
  Train: (515, 91), Test: (221, 91)
  Features: 91, Classes: 5
✓ Data loaded to cuda

EXPERIMENT: eucalyptus

Starting hyperparameter optimization...
  Trials: 50, CV Folds: 10


[I 2025-11-27 12:17:52,201] Trial 0 finished with value: 0.5580026445839533 and parameters: {'d': 64, 'd_hidden_factor': 3.8778758791422523, 'n_layers': 5, 'hidden_dropout': 0.49038209919230774, 'residual_dropout': 0.34241486929243165, 'learning_rate': 0.00027720158198153483, 'batch_size': 128, 'epochs': 40}. Best is trial 0 with value: 0.5580026445839533.
[I 2025-11-27 12:18:01,106] Trial 1 finished with value: 0.5982383152600741 and parameters: {'d': 128, 'd_hidden_factor': 3.1262054953673535, 'n_layers': 6, 'hidden_dropout': 0.31720047927566053, 'residual_dropout': 0.4247158970388948, 'learning_rate': 0.0014906166728108333, 'batch_size': 64, 'epochs': 69}. Best is trial 1 with value: 0.5982383152600741.
[I 2025-11-27 12:18:13,316] Trial 2 finished with value: 0.63080529408474 and parameters: {'d': 128, 'd_hidden_factor': 2.723451053318575, 'n_layers': 5, 'hidden_dropout': 0.212915145147914, 'residual_dropout': 0.15613061148623264, 'learning_rate': 0.0001901314327577386, 'batch_size'


✓ Optimization complete! Time: 1837.12s
  Best CV G-Mean: 0.6765
  Best parameters:
    d: 64
    d_hidden_factor: 4.721827878307432
    n_layers: 9
    hidden_dropout: 0.46751066172174116
    residual_dropout: 0.20297535460076244
    learning_rate: 0.00010919182475514198
    batch_size: 32
    epochs: 161

Training final model...
    Epoch 10/161: Train Loss = 1.2516, Val Loss = 1.2946
    Epoch 20/161: Train Loss = 1.0379, Val Loss = 1.0562
    Epoch 30/161: Train Loss = 0.8843, Val Loss = 0.9632
    Epoch 40/161: Train Loss = 0.7694, Val Loss = 0.9087
    Epoch 50/161: Train Loss = 0.6927, Val Loss = 0.8868
    Epoch 60/161: Train Loss = 0.6728, Val Loss = 0.8659


[I 2025-11-27 12:48:30,248] A new study created in memory with name: no-name-dfb0154c-be4c-4cec-9f50-9ead6574004b


    Epoch 70/161: Train Loss = 0.6373, Val Loss = 0.8764
    Early stopping at epoch 72
✓ Training complete! Time: 6.39s

Evaluating on test set...

RESULTS FOR eucalyptus
Accuracy:        0.6290
AUC OVO:         0.9010
G-Mean:          0.5759
Cross-Entropy:   0.8226
✓ Saved results for eucalyptus

✓ Completed eucalyptus (26/30)

################################################################################
DATASET 27/30: pc3
################################################################################
Loading processed dataset from cache: pc3
Using device: cuda
Dataset: pc3
  Train: (1094, 37), Test: (469, 37)
  Features: 37, Classes: 2
✓ Data loaded to cuda

EXPERIMENT: pc3

Starting hyperparameter optimization...
  Trials: 50, CV Folds: 10


[I 2025-11-27 12:48:42,003] Trial 0 finished with value: 0.3860075897048444 and parameters: {'d': 64, 'd_hidden_factor': 3.8778758791422523, 'n_layers': 5, 'hidden_dropout': 0.49038209919230774, 'residual_dropout': 0.34241486929243165, 'learning_rate': 0.00027720158198153483, 'batch_size': 128, 'epochs': 40}. Best is trial 0 with value: 0.3860075897048444.
[I 2025-11-27 12:49:01,679] Trial 1 finished with value: 0.5287413205642593 and parameters: {'d': 128, 'd_hidden_factor': 3.1262054953673535, 'n_layers': 6, 'hidden_dropout': 0.31720047927566053, 'residual_dropout': 0.4247158970388948, 'learning_rate': 0.0014906166728108333, 'batch_size': 64, 'epochs': 69}. Best is trial 1 with value: 0.5287413205642593.
[I 2025-11-27 12:49:25,485] Trial 2 finished with value: 0.5414278628798688 and parameters: {'d': 128, 'd_hidden_factor': 2.723451053318575, 'n_layers': 5, 'hidden_dropout': 0.212915145147914, 'residual_dropout': 0.15613061148623264, 'learning_rate': 0.0001901314327577386, 'batch_siz


✓ Optimization complete! Time: 1286.61s
  Best CV G-Mean: 0.5901
  Best parameters:
    d: 256
    d_hidden_factor: 1.8103528312101083
    n_layers: 7
    hidden_dropout: 0.038609246415486895
    residual_dropout: 0.42625230198252084
    learning_rate: 0.004732405706482846
    batch_size: 256
    epochs: 104

Training final model...
    Epoch 10/104: Train Loss = 0.2218, Val Loss = 0.3786


[I 2025-11-27 13:09:57,394] A new study created in memory with name: no-name-a9d42d6f-680a-4ab2-804d-4a682be8a3a9


    Epoch 20/104: Train Loss = 0.1759, Val Loss = 0.3791
    Early stopping at epoch 28
✓ Training complete! Time: 0.52s

Evaluating on test set...

RESULTS FOR pc3
Accuracy:        0.8763
AUC OVO:         0.7885
G-Mean:          0.5757
Cross-Entropy:   0.3551
✓ Saved results for pc3

✓ Completed pc3 (27/30)

################################################################################
DATASET 28/30: dresses-sales
################################################################################
Loading processed dataset from cache: dresses-sales
Using device: cuda
Dataset: dresses-sales
  Train: (350, 141), Test: (150, 141)
  Features: 141, Classes: 2
✓ Data loaded to cuda

EXPERIMENT: dresses-sales

Starting hyperparameter optimization...
  Trials: 50, CV Folds: 10


[I 2025-11-27 13:10:00,621] Trial 0 finished with value: 0.5580807666130643 and parameters: {'d': 64, 'd_hidden_factor': 3.8778758791422523, 'n_layers': 5, 'hidden_dropout': 0.49038209919230774, 'residual_dropout': 0.34241486929243165, 'learning_rate': 0.00027720158198153483, 'batch_size': 128, 'epochs': 40}. Best is trial 0 with value: 0.5580807666130643.
[I 2025-11-27 13:10:04,503] Trial 1 finished with value: 0.5212502010870452 and parameters: {'d': 128, 'd_hidden_factor': 3.1262054953673535, 'n_layers': 6, 'hidden_dropout': 0.31720047927566053, 'residual_dropout': 0.4247158970388948, 'learning_rate': 0.0014906166728108333, 'batch_size': 64, 'epochs': 69}. Best is trial 0 with value: 0.5580807666130643.
[I 2025-11-27 13:10:08,750] Trial 2 finished with value: 0.5605285293218245 and parameters: {'d': 128, 'd_hidden_factor': 2.723451053318575, 'n_layers': 5, 'hidden_dropout': 0.212915145147914, 'residual_dropout': 0.15613061148623264, 'learning_rate': 0.0001901314327577386, 'batch_siz


✓ Optimization complete! Time: 576.76s
  Best CV G-Mean: 0.6007
  Best parameters:
    d: 256
    d_hidden_factor: 4.708421746705769
    n_layers: 5
    hidden_dropout: 0.09008699300455848
    residual_dropout: 0.36284830921241956
    learning_rate: 1.4797219999249465e-05
    batch_size: 32
    epochs: 137

Training final model...
    Epoch 10/137: Train Loss = 0.7267, Val Loss = 0.7867


[I 2025-11-27 13:19:34,908] A new study created in memory with name: no-name-914d39f2-bdc2-45a4-99ba-10d81e8d5ce4


    Early stopping at epoch 16
✓ Training complete! Time: 0.74s

Evaluating on test set...

RESULTS FOR dresses-sales
Accuracy:        0.5000
AUC OVO:         0.5864
G-Mean:          0.4480
Cross-Entropy:   0.7484
✓ Saved results for dresses-sales

✓ Completed dresses-sales (28/30)

################################################################################
DATASET 29/30: balance-scale
################################################################################
Loading processed dataset from cache: balance-scale
Using device: cuda
Dataset: balance-scale
  Train: (437, 4), Test: (188, 4)
  Features: 4, Classes: 3
✓ Data loaded to cuda

EXPERIMENT: balance-scale

Starting hyperparameter optimization...
  Trials: 50, CV Folds: 10


[I 2025-11-27 13:19:40,070] Trial 0 finished with value: 0.06933612743506347 and parameters: {'d': 64, 'd_hidden_factor': 3.8778758791422523, 'n_layers': 5, 'hidden_dropout': 0.49038209919230774, 'residual_dropout': 0.34241486929243165, 'learning_rate': 0.00027720158198153483, 'batch_size': 128, 'epochs': 40}. Best is trial 0 with value: 0.06933612743506347.
[I 2025-11-27 13:19:51,202] Trial 1 finished with value: 0.7839030731129073 and parameters: {'d': 128, 'd_hidden_factor': 3.1262054953673535, 'n_layers': 6, 'hidden_dropout': 0.31720047927566053, 'residual_dropout': 0.4247158970388948, 'learning_rate': 0.0014906166728108333, 'batch_size': 64, 'epochs': 69}. Best is trial 1 with value: 0.7839030731129073.
[I 2025-11-27 13:20:03,162] Trial 2 finished with value: 0.8038333749976619 and parameters: {'d': 128, 'd_hidden_factor': 2.723451053318575, 'n_layers': 5, 'hidden_dropout': 0.212915145147914, 'residual_dropout': 0.15613061148623264, 'learning_rate': 0.0001901314327577386, 'batch_s


✓ Optimization complete! Time: 503.65s
  Best CV G-Mean: 0.9510
  Best parameters:
    d: 256
    d_hidden_factor: 4.927212135393402
    n_layers: 1
    hidden_dropout: 0.4065623125080101
    residual_dropout: 0.005739073876571299
    learning_rate: 0.00852277869190327
    batch_size: 256
    epochs: 170

Training final model...
    Epoch 10/170: Train Loss = 0.1501, Val Loss = 0.1845
    Epoch 20/170: Train Loss = 0.1287, Val Loss = 0.1317
    Epoch 30/170: Train Loss = 0.1083, Val Loss = 0.1438
    Epoch 40/170: Train Loss = 0.0693, Val Loss = 0.0704
    Epoch 50/170: Train Loss = 0.0497, Val Loss = 0.0647
    Epoch 60/170: Train Loss = 0.0350, Val Loss = 0.0288
    Epoch 70/170: Train Loss = 0.0541, Val Loss = 0.0397
    Early stopping at epoch 73
✓ Training complete! Time: 0.17s

Evaluating on test set...

RESULTS FOR balance-scale
Accuracy:        0.9787
AUC OVO:         0.9968
G-Mean:          0.9659
Cross-Entropy:   0.0707
✓ Saved results for balance-scale

✓ Completed balance-

[I 2025-11-27 13:28:05,605] Trial 0 finished with value: 0.9975753094846148 and parameters: {'d': 64, 'd_hidden_factor': 3.8778758791422523, 'n_layers': 5, 'hidden_dropout': 0.49038209919230774, 'residual_dropout': 0.34241486929243165, 'learning_rate': 0.00027720158198153483, 'batch_size': 128, 'epochs': 40}. Best is trial 0 with value: 0.9975753094846148.
[I 2025-11-27 13:28:25,676] Trial 1 finished with value: 0.9954235137479704 and parameters: {'d': 128, 'd_hidden_factor': 3.1262054953673535, 'n_layers': 6, 'hidden_dropout': 0.31720047927566053, 'residual_dropout': 0.4247158970388948, 'learning_rate': 0.0014906166728108333, 'batch_size': 64, 'epochs': 69}. Best is trial 0 with value: 0.9975753094846148.
[I 2025-11-27 13:28:42,319] Trial 2 finished with value: 0.9966358590056631 and parameters: {'d': 128, 'd_hidden_factor': 2.723451053318575, 'n_layers': 5, 'hidden_dropout': 0.212915145147914, 'residual_dropout': 0.15613061148623264, 'learning_rate': 0.0001901314327577386, 'batch_siz


✓ Optimization complete! Time: 4799.10s
  Best CV G-Mean: 1.0000
  Best parameters:
    d: 64
    d_hidden_factor: 2.1444073493643163
    n_layers: 8
    hidden_dropout: 0.39773290288811947
    residual_dropout: 0.02489923440104569
    learning_rate: 6.731743934331164e-05
    batch_size: 32
    epochs: 146

Training final model...
    Epoch 10/146: Train Loss = 0.6102, Val Loss = 0.5120
    Epoch 20/146: Train Loss = 0.3209, Val Loss = 0.2760
    Epoch 30/146: Train Loss = 0.2039, Val Loss = 0.1956
    Epoch 40/146: Train Loss = 0.1517, Val Loss = 0.1337
    Epoch 50/146: Train Loss = 0.1163, Val Loss = 0.1005
    Epoch 60/146: Train Loss = 0.1006, Val Loss = 0.0773
    Epoch 70/146: Train Loss = 0.0760, Val Loss = 0.0619
    Epoch 80/146: Train Loss = 0.0682, Val Loss = 0.0495
    Epoch 90/146: Train Loss = 0.0671, Val Loss = 0.0433
    Epoch 100/146: Train Loss = 0.0353, Val Loss = 0.0347
    Epoch 110/146: Train Loss = 0.0521, Val Loss = 0.0296
    Epoch 120/146: Train Loss = 0.031