In [1]:
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm, trange
import dill

import sys
sys.path.append("../../")

import biked_commons
from biked_commons.prediction import clip_predictor, loaders
from biked_commons.resource_utils import models_and_scalers_path

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
X_tv, Y_tv = loaders.load_clip()

✅ CLIP_X_train.csv already exists in datasets folder. Skipping download.
✅ CLIP_Y_train.npy already exists in datasets folder. Skipping download.


In [3]:
X_tv_tens = torch.tensor(X_tv.values, dtype=torch.float32, device=device)
Y_tv_tens = torch.tensor(Y_tv, dtype=torch.float32, device=device)

In [4]:
X_tv_tens = clip_predictor.remove_wall_thickness(X_tv_tens)

In [5]:
scaler = StandardScaler().fit(X_tv_tens.cpu().numpy())

scaler_path = models_and_scalers_path("clip_scaler.pkl")
with open(scaler_path, "wb") as f:
    dill.dump(scaler, f)

X_tv_tens = torch.tensor(scaler.transform(X_tv_tens.cpu().numpy()), dtype=torch.float32, device=device)

In [6]:
X_tv_tens = X_tv_tens[:10000]
Y_tv_tens = Y_tv_tens[:10000]

In [7]:
X_train, X_val, Y_train, Y_val = train_test_split(X_tv_tens, Y_tv_tens, test_size=0.2, random_state=42)

In [None]:
# Define the model, loss function, and optimizer
input_dim = X_train.shape[1]
model = clip_predictor.ResidualNetwork(input_dim, 512, 256, 2, 3).to(device)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0003)

# Training loop
num_epochs = 5
batch_size = 32
best_val_loss = float('inf')
best_model = None

bar = trange(num_epochs, desc="Training")
for epoch in bar:
    model.train()
    permutation = torch.randperm(X_train.size(0))
    for i in range(0, X_train.size(0), batch_size):
        indices = permutation[i : i + batch_size]
        batch_x, batch_y = X_train[indices], Y_train[indices]

        optimizer.zero_grad()
        outputs = model(batch_x)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()

    model.eval()
    with torch.no_grad():
        val_outputs = model(X_val)
        val_loss = criterion(val_outputs, Y_val)

    if val_loss.item() < best_val_loss:
        best_val_loss = val_loss.item()
        best_model = model

    bar.set_postfix({
        'loss': f'{loss.item():.4f}',
        'val_loss': f'{val_loss.item():.4f}',
        'best_val': f'{best_val_loss:.4f}'
    })




Training: 100%|██████████| 5/5 [00:02<00:00,  2.50it/s, loss=0.0131, val_loss=0.0132, best_val=0.0132]


In [9]:
save_path = models_and_scalers_path("clip.pth")
torch.save(best_model, save_path)

In [10]:
model = torch.load(save_path)

  model = torch.load(save_path)


In [11]:
import evaluators

evaluators.evaluate_clip(model, device=device)

FileNotFoundError: [Errno 2] No such file or directory: '../../resources/datasets/split_datasets/CLIP_Y_test.npy'

In [None]:
import pandas as pd
import numpy as np
import dill
model_path = resource_utils.resource_path("models") + '/clip_old.pt'
scaler_path = resource_utils.resource_path("models") + '/clip_old_scaler.pk'

ref_model = ResidualNetwork(96, 512, 256, 2, 3)
ref_model.load_state_dict(torch.load(model_path))
with open(scaler_path, "rb") as file:
    scaler = dill.load(file)
X_test = pd.read_csv('../../resources/datasets/split_datasets/CLIP_X_test.csv', index_col=0)
Y_test = np.load("../../resources/datasets/split_datasets/CLIP_Y_test.npy")

X_test_scaled = scaler.transform(X_test.values)
Y_pred = ref_model(torch.tensor(X_test_scaled, dtype=torch.float32, device=device)).cpu().detach().numpy()


  ref_model.load_state_dict(torch.load(model_path))
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


FileNotFoundError: [Errno 2] No such file or directory: '../../resources/datasets/split_datasets/CLIP_Y_test.npy'

In [None]:
X_test.columns

Index(['SSB_Include', 'CSB_Include', 'CS Length', 'BB Drop', 'Stack', 'SS E',
       'ST Angle', 'BB OD', 'TT OD', 'HT OD', 'DT OD', 'CS OD', 'SS OD',
       'ST OD', 'CS F', 'HT LX', 'ST UX', 'HT UX', 'HT Angle', 'HT Length',
       'ST Length', 'BB Length', 'Dropout Offset', 'SSB OD', 'CSB OD',
       'Material', 'SSB Offset', 'CSB Offset', 'SS Z', 'SS Thickness',
       'CS Thickness', 'TT Thickness', 'BB Thickness', 'HT Thickness',
       'ST Thickness', 'DT Thickness', 'DT Length'],
      dtype='object')