In [186]:
import pandas as pd
from tqdm.notebook import tqdm
from sklearn import preprocessing

import torch
from torch import nn
from torch import optim
from torch.utils.data import DataLoader

In [209]:
class WLNuDataset(torch.utils.data.Dataset):
     def __init__(self, df_path="../data/gen.csv"):
        df = pd.read_csv(df_path)
        df = df.drop(labels=["L_E", "Nu_E"], axis=1)
        
        # standardize input columns
        x = df.loc[:, df.columns != "Nu_pz"].to_numpy()
        scaler = preprocessing.StandardScaler().fit(x)
        x = scaler.transform(x)
        df[df.columns[:-1]] = x
        
        self.data = torch.from_numpy(df.values).float()
 
     def __getitem__(self, idx):
         return self.data[idx]
 
     def __len__(self):
         return len(self.data)

In [241]:
class WLNuModel(nn.Module):
    def __init__(self, n_features):
        super().__init__()
        self.n_features = n_features
        self.model = nn.Sequential(
            nn.Linear(n_features, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, 1),
            nn.ReLU()
        )
    
    def forward(self, x):
        return self.model(x)

In [None]:
def evaluate(model, loader, loss_fn):
    
    n_features = model.n_features
    
    model.eval()
    
    total_loss = 0
    
    for batch in loader:
        X = batch[:, :n_features]
        Y = batch[:, n_features:]

        out = model(X)
        loss = loss_fn(out, Y)
        total_loss += loss.item()
    
    model.train()
    
    return total_loss / len(loader)

In [None]:
def train(data_split = [0.9, 0.05, 0.05], batch_size=8192, n_epochs=100, learning_rate=1e-3):
    dataset = WLNuDataset()
    n_features = len(dataset[0]) - 1 # last two columns are targets: N_z
    
    print(f"# of input features: {n_features}")
    print(f"Total number of samples: {len(dataset)}")
    
    data_split = [int(i * len(dataset)) for i in data_split]
    train_set, val_set, test_set = torch.utils.data.random_split(dataset, lengths=data_split)
    
    print(f"Train set samples: {len(train_set)}")
    print(f"Val set samples: {len(val_set)}")
    print(f"Test set samples: {len(test_set)}")
    
    train_loader, val_loader, test_loader = [DataLoader(i, batch_size=batch_size, shuffle=True) 
                                             for i in (train_set, val_set, test_set)]
    
    model = WLNuModel(n_features)
    loss_fn = torch.nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    print(f"\n{'='*30}\n")
    for epoch in range(n_epochs):
        
        print(f"Epoch {epoch}")
        
        train_loss = 0
        
        for train_batch in train_loader:
            X = train_batch[:, :n_features]
            Y = train_batch[:, n_features:]
            
            out = model(X)
            loss = loss_fn(out, Y)
            train_loss += loss.item()
            
            optimizer.zero_grad() # clear previous gradients
            loss.backward() # compute gradients
            
            optimizer.step() # update weights using computed gradients

        train_loss = train_loss / len(train_loader)
        print(f"Train loss: {train_loss}")
        
        val_loss = evaluate(model, val_loader, loss_fn)
        print(f"Val loss: {val_loss}")
        
        print(f"\n{'='*30}\n")

In [None]:
train()