In [2]:
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split

import sys
sys.path.append("../../")

import biked_commons
from biked_commons.prediction import clip_predictor, loaders
from biked_commons import resource_utils

# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = "cpu"

In [3]:
X_tv, Y_tv = loaders.load_clip()

⚠️  CLIP_X_train.csv not found in datasets folder. Performing first-time download from Harvard Dataverse...


Downloading CLIP_X_train.csv: 100%|██████████| 2.19G/2.19G [28:25<00:00, 1.38MB/s] 


✅ Download complete: ../../resources/datasets/split_datasets/CLIP_X_train.csv
⚠️  CLIP_Y_train.npy not found in datasets folder. Performing first-time download from Harvard Dataverse...


Downloading CLIP_Y_train.npy: 100%|██████████| 1.33G/1.33G [17:18<00:00, 1.38MB/s]


✅ Download complete: ../../resources/datasets/split_datasets/CLIP_Y_train.npy


In [None]:
X_tv_tens = torch.tensor(X_tv.values, dtype=torch.float32, device=device)
Y_tv_tens = torch.tensor(Y_tv.values, dtype=torch.float32, device=device)
mean = X_tv_tens.mean(axis=0)
std = X_tv_tens.std(axis=0)

In [5]:
X_train, X_val, Y_train, Y_val = train_test_split(X_tv_tens, Y_tv_tens, test_size=0.2, random_state=42)

In [None]:
# Define the model, loss function, and optimizer
input_dim = X_train.shape[1]
output_dim = 1
model = clip_predictor.ResidualNetwork(96, 512, 256, 2, 3)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 500
batch_size = 32
best_val_loss = float('inf')
model_path = resource_utils.resource_path("models")
save_path = model_path + '/clip.pth'

for epoch in range(num_epochs):
    model.train()
    permutation = torch.randperm(X_train.size()[0])
    
    for i in range(0, X_train.size()[0], batch_size):
        indices = permutation[i:i + batch_size]
        batch_x, batch_y = X_train[indices], Y_train[indices]

        optimizer.zero_grad()
        outputs = model(batch_x)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        val_outputs = model(X_val)
        val_loss = criterion(val_outputs, Y_val)

    # Save best model
    if val_loss.item() < best_val_loss:
        best_val_loss = val_loss.item()
        torch.save(model, save_path)

    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}, Val Loss: {val_loss.item():.4f} (Best: {best_val_loss:.4f})')



Epoch [10/500], Loss: 3.6706, Val Loss: 1.0024 (Best: 1.0024)
Epoch [20/500], Loss: 3.2093, Val Loss: 0.4705 (Best: 0.4547)
Epoch [30/500], Loss: 2.3870, Val Loss: 0.3969 (Best: 0.3969)
Epoch [40/500], Loss: 2.2113, Val Loss: 0.4355 (Best: 0.3321)
Epoch [50/500], Loss: 2.2873, Val Loss: 0.3470 (Best: 0.3103)
Epoch [60/500], Loss: 1.4425, Val Loss: 0.3368 (Best: 0.3103)
Epoch [70/500], Loss: 1.8420, Val Loss: 0.2951 (Best: 0.2885)
Epoch [80/500], Loss: 2.1649, Val Loss: 0.4410 (Best: 0.2885)
Epoch [90/500], Loss: 1.9080, Val Loss: 0.3816 (Best: 0.2721)
Epoch [100/500], Loss: 1.3815, Val Loss: 0.4398 (Best: 0.2721)
Epoch [110/500], Loss: 1.9357, Val Loss: 0.5580 (Best: 0.2721)
Epoch [120/500], Loss: 1.7282, Val Loss: 0.3901 (Best: 0.2721)
Epoch [130/500], Loss: 1.5907, Val Loss: 0.5393 (Best: 0.2545)
Epoch [140/500], Loss: 1.6743, Val Loss: 0.3432 (Best: 0.2545)
Epoch [150/500], Loss: 1.7459, Val Loss: 0.3530 (Best: 0.2545)
Epoch [160/500], Loss: 1.1400, Val Loss: 0.4068 (Best: 0.2535)
E

In [7]:
model = torch.load(save_path)

  model = torch.load(save_path)


In [10]:
import evaluators

evaluators.evaluate_aero(model, device=device)

0.9547767043113708

In [22]:
import pandas as pd
import numpy as np
import dill
model_path = resource_utils.resource_path("models") + '/clip_old.pt'
scaler_path = resource_utils.resource_path("models") + '/clip_old_scaler.pk'

ref_model = ResidualNetwork(96, 512, 256, 2, 3)
ref_model.load_state_dict(torch.load(model_path))
with open(scaler_path, "rb") as file:
    scaler = dill.load(file)
X_test = pd.read_csv('../../resources/datasets/split_datasets/CLIP_X_test.csv', index_col=0)
Y_test = np.load("../../resources/datasets/split_datasets/CLIP_Y_test.npy")

X_test_scaled = scaler.transform(X_test.values)
Y_pred = ref_model(torch.tensor(X_test_scaled, dtype=torch.float32, device=device)).cpu().detach().numpy()


  ref_model.load_state_dict(torch.load(model_path))
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


FileNotFoundError: [Errno 2] No such file or directory: '../../resources/datasets/split_datasets/CLIP_Y_test.npy'

In [20]:
X_test.columns

Index(['SSB_Include', 'CSB_Include', 'CS Length', 'BB Drop', 'Stack', 'SS E',
       'ST Angle', 'BB OD', 'TT OD', 'HT OD', 'DT OD', 'CS OD', 'SS OD',
       'ST OD', 'CS F', 'HT LX', 'ST UX', 'HT UX', 'HT Angle', 'HT Length',
       'ST Length', 'BB Length', 'Dropout Offset', 'SSB OD', 'CSB OD',
       'Material', 'SSB Offset', 'CSB Offset', 'SS Z', 'SS Thickness',
       'CS Thickness', 'TT Thickness', 'BB Thickness', 'HT Thickness',
       'ST Thickness', 'DT Thickness', 'DT Length'],
      dtype='object')