In [6]:
import os
import csv
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import optuna
import joblib
from tqdm import tqdm

### Import and Preprocess the Data

In [7]:
df_transformed = pd.read_csv("data generation/train_dataset.csv")

X = df_transformed[['alpha', 'rho', 'nu', 'T', 'K_relative']].values
Y = df_transformed[['IV']].values
input_size = X.shape[1] 
output_size = Y.shape[1]

### Create Optuna Optimization

In [None]:
# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

In [9]:
# Split data once (train/val)
X_train_raw, X_val_raw, Y_train_raw, Y_val_raw = train_test_split(
    X, Y, test_size=0.3, random_state=42, shuffle=True
)

In [10]:
# Prepare csv
results_file = "optuna_results.csv"
if not os.path.exists(results_file):
    with open(results_file, mode="w", newline="") as f:
        writer = csv.writer(f)
        writer.writerow(["trial_number", "params", "mse"])

# Define model builder
def define_model(trial):
    depth = trial.suggest_int("depth", 1, 5)
    layer_widths = [
        trial.suggest_int(f"width_layer_{i}", 10, 500, step=10)
        for i in range(depth)
    ]

    activation_fn = nn.Softplus() 

    layers = [
        nn.Linear(5, layer_widths[0]),
        nn.BatchNorm1d(layer_widths[0]),
        activation_fn,
    ]
    for i in range(1, len(layer_widths)):
        layers.append(nn.Linear(layer_widths[i - 1], layer_widths[i]))
        layers.append(nn.BatchNorm1d(layer_widths[i]))
        layers.append(activation_fn)

    # Final layer 
    layers.append(nn.Linear(layer_widths[-1], 1))

    return nn.Sequential(*layers)

In [None]:
# Objective function
def objective(trial):
    batch_size = trial.suggest_categorical("batch_size", [32, 64, 128, 256, 512, 1024])
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop"])

    # Scalers
    scaler_X = MinMaxScaler().fit(X_train_raw)
    scaler_Y = StandardScaler().fit(Y_train_raw)

    X_train = torch.tensor(scaler_X.transform(X_train_raw), dtype=torch.float32).to(device)
    Y_train = torch.tensor(scaler_Y.transform(Y_train_raw), dtype=torch.float32).to(device)
    X_val = torch.tensor(scaler_X.transform(X_val_raw), dtype=torch.float32).to(device)
    Y_val = torch.tensor(scaler_Y.transform(Y_val_raw), dtype=torch.float32).to(device)

    # model, optimizer, and loss
    model = define_model(trial).to(device)
    optimizer = optim.Adam(model.parameters()) if optimizer_name == "Adam" else optim.RMSprop(model.parameters())
    criterion = nn.MSELoss()
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode="min", factor=0.1, patience=15
    )

    # Early stopping 
    patience = 30
    best_val_loss = float("inf")
    patience_counter = 0
    best_model_state = model.state_dict()

    # Training loop 
    num_epochs = 200
    progress_bar = tqdm(range(num_epochs), desc=f"Trial {trial.number}", leave=False)

    for epoch in progress_bar:
        model.train()
        permutation = torch.randperm(X_train.size(0))
        epoch_loss = 0.0

        for i in range(0, X_train.size(0), batch_size):
            optimizer.zero_grad()
            indices = permutation[i:i + batch_size]
            batch_X, batch_Y = X_train[indices], Y_train[indices]
            outputs = model(batch_X)
            loss = criterion(outputs, batch_Y)

            if not torch.isfinite(loss):
                continue

            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()

        # Validation
        model.eval()
        with torch.no_grad():
            val_outputs = model(X_val)
            val_loss = criterion(val_outputs, Y_val).item()

        scheduler.step(val_loss)
        progress_bar.set_postfix({"val_loss": f"{val_loss:.6f}"})

        # Early stopping check
        if val_loss < best_val_loss - 1e-4:
            best_val_loss = val_loss
            best_model_state = model.state_dict()
            patience_counter = 0
        else:
            patience_counter += 1

        if patience_counter >= patience:
            break

    progress_bar.close()

    # Evaluate best model
    model.load_state_dict(best_model_state)
    model.eval()
    with torch.no_grad():
        Y_pred = model(X_val).cpu().numpy()
        Y_val_orig = scaler_Y.inverse_transform(Y_val.cpu().numpy())
        Y_pred_orig = scaler_Y.inverse_transform(Y_pred)
        mse = mean_squared_error(Y_val_orig, Y_pred_orig)

    # Save results
    with open(results_file, mode="a", newline="") as f:
        writer = csv.writer(f)
        writer.writerow([trial.number, trial.params, mse])

    return mse

optuna.logging.set_verbosity(optuna.logging.INFO)
study = optuna.create_study(
    study_name="nn_study_softplus",
    direction="minimize",
    sampler=optuna.samplers.TPESampler(),
    storage="sqlite:///optuna_study.db",
    load_if_exists=True,
)

In [None]:
optuna.logging.set_verbosity(optuna.logging.INFO)

# Parallelize trials
study.optimize(objective, n_trials=100, n_jobs=3)

# Save 
joblib.dump(study, "optuna_study.pkl")
print(study.best_trial)

