In [None]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import joblib
import optuna
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_squared_error
from tqdm import trange

### Import and Preprocess Data

In [None]:
file_path = 'data generation/train_dataset.csv'
df = pd.read_csv(file_path)
X = df[['alpha', 'rho', 'nu', 'T', 'K_relative']].values
Y = df[['IV']].values
input_size = X.shape[1] 
output_size = Y.shape[1]

### Train Final Model

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

In [None]:
# Load Optuna study
study = optuna.load_study(
    study_name="nn_study_softplus",
    storage="sqlite:///optuna_study.db"
)

# Retrieve best trial
best_trial = study.best_trial
best_params = best_trial.params
print("Best parameters:", best_params)

# Scale data
scaler_X = MinMaxScaler().fit(X)
scaler_Y = StandardScaler().fit(Y)

X_scaled = torch.tensor(scaler_X.transform(X), dtype=torch.float32).to(device)
Y_scaled = torch.tensor(scaler_Y.transform(Y), dtype=torch.float32).to(device)

# Rebuild model using best params
def build_best_model(params):
    depth = params["depth"]
    layer_widths = [params[f"width_layer_{i}"] for i in range(depth)]
    activation_fn = nn.Softplus()
    layers = [
        nn.Linear(5, layer_widths[0]),
        nn.BatchNorm1d(layer_widths[0]),
        activation_fn,
    ]
    for i in range(1, len(layer_widths)):
        layers.append(nn.Linear(layer_widths[i - 1], layer_widths[i]))
        layers.append(nn.BatchNorm1d(layer_widths[i]))
        layers.append(activation_fn)
    layers.append(nn.Linear(layer_widths[-1], 1))
    return nn.Sequential(*layers)

model = build_best_model(best_params).to(device)
optimizer = optim.Adam(model.parameters()) if best_params["optimizer"] == "Adam" else optim.RMSprop(model.parameters())
criterion = nn.MSELoss()

# Scheduler and early stopping
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", factor=0.1, patience=15)
num_epochs = 200
patience = 30
best_loss = float("inf")
patience_counter = 0
best_model_state = model.state_dict()
batch_size = best_params["batch_size"]

# Training loop
progress_bar = trange(num_epochs, desc="Training Progress", unit="epoch")

for epoch in progress_bar:
    model.train()
    permutation = torch.randperm(X_scaled.size(0))
    epoch_loss = 0.0

    for i in range(0, X_scaled.size(0), batch_size):
        optimizer.zero_grad()
        indices = permutation[i:i + batch_size]
        batch_X, batch_Y = X_scaled[indices], Y_scaled[indices]
        outputs = model(batch_X)
        loss = criterion(outputs, batch_Y)
        if not torch.isfinite(loss):
            continue
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()

    epoch_loss /= (X_scaled.size(0) // batch_size)
    scheduler.step(epoch_loss)
    progress_bar.set_postfix({"epoch_loss": f"{epoch_loss:.6f}"})

    if epoch_loss < best_loss - 1e-4:
        best_loss = epoch_loss
        best_model_state = model.state_dict()
        patience_counter = 0
    else:
        patience_counter += 1
    if patience_counter >= patience:
        break

progress_bar.close()

# Load best weights
model.load_state_dict(best_model_state)
model.eval()

# Save model and scalers
torch.save(model.state_dict(), "best_model.pth")
joblib.dump(scaler_X, "scaler_X.pkl")
joblib.dump(scaler_Y, "scaler_Y.pkl")