In [186]:
import pandas as pd
from tqdm.notebook import tqdm
from sklearn import preprocessing

import torch
from torch import nn
from torch import optim
from torch.utils.data import DataLoader

In [250]:
class WLNuDataset(torch.utils.data.Dataset):
     def __init__(self, df_path="../data/gen.csv"):
        df = pd.read_csv(df_path)
        df = df.drop(labels=["W_px", "W_py", "W_pz", "W_E", "L_E", "Nu_E"], axis=1)
        
        # standardize input columns
        x = df.loc[:, df.columns != "Nu_pz"].to_numpy()
        scaler = preprocessing.StandardScaler().fit(x)
        x = scaler.transform(x)
        df[df.columns[:-1]] = x
        
        self.data = torch.from_numpy(df.values).float()
 
     def __getitem__(self, idx):
         return self.data[idx]
 
     def __len__(self):
         return len(self.data)

In [251]:
class WLNuModel(nn.Module):
    def __init__(self, n_features):
        super().__init__()
        self.n_features = n_features
        self.model = nn.Sequential(
            nn.Linear(n_features, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, 1)
        )
    
    def forward(self, x):
        return self.model(x)

In [252]:
def evaluate(model, loader, loss_fn):
    
    n_features = model.n_features
    
    model.eval()
    
    total_loss = 0
    
    for batch in loader:
        X = batch[:, :n_features]
        Y = batch[:, n_features:]

        out = model(X)
        loss = loss_fn(out, Y)
        total_loss += loss.item()
    
    model.train()
    
    return total_loss / len(loader)

In [253]:
def train(data_split = [0.9, 0.05, 0.05], batch_size=8192, n_epochs=100, learning_rate=1e-3):
    dataset = WLNuDataset()
    n_features = len(dataset[0]) - 1 # last two columns are targets: N_z
    
    print(f"# of input features: {n_features}")
    print(f"Total number of samples: {len(dataset)}")
    
    data_split = [int(i * len(dataset)) for i in data_split]
    train_set, val_set, test_set = torch.utils.data.random_split(dataset, lengths=data_split)
    
    print(f"Train set samples: {len(train_set)}")
    print(f"Val set samples: {len(val_set)}")
    print(f"Test set samples: {len(test_set)}")
    
    train_loader, val_loader, test_loader = [DataLoader(i, batch_size=batch_size, shuffle=True) 
                                             for i in (train_set, val_set, test_set)]
    
    model = WLNuModel(n_features)
    loss_fn = torch.nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    print(f"\n{'='*30}\n")
    for epoch in range(n_epochs):
        
        print(f"Epoch {epoch}")
        
        train_loss = 0
        
        for train_batch in train_loader:
            X = train_batch[:, :n_features]
            Y = train_batch[:, n_features:]
            
            out = model(X)
            loss = loss_fn(out, Y)
            train_loss += loss.item()
            
            optimizer.zero_grad() # clear previous gradients
            loss.backward() # compute gradients
            
            optimizer.step() # update weights using computed gradients

        train_loss = train_loss / len(train_loader)
        print(f"Train loss: {train_loss}")
        
        val_loss = evaluate(model, val_loader, loss_fn)
        print(f"Val loss: {val_loss}")
        
        print(f"\n{'='*30}\n")

In [254]:
train()

# of input features: 5
Total number of samples: 100000
Train set samples: 90000
Val set samples: 5000
Test set samples: 5000


Epoch 0
Train loss: 1167.9853404651988
Val loss: 1096.760498046875


Epoch 1
Train loss: 1164.7868208451705
Val loss: 1091.45556640625


Epoch 2
Train loss: 1151.069491299716
Val loss: 1076.5557861328125


Epoch 3
Train loss: 1129.7309681285512
Val loss: 1054.3587646484375


Epoch 4
Train loss: 1097.895419034091
Val loss: 1016.8220825195312


Epoch 5
Train loss: 1042.818076393821
Val loss: 951.0092163085938


Epoch 6
Train loss: 978.0543157404119
Val loss: 885.7515258789062


Epoch 7
Train loss: 931.6553455699574
Val loss: 848.2990112304688


Epoch 8
Train loss: 893.9603493430398
Val loss: 810.2698974609375


Epoch 9
Train loss: 854.3333573774858
Val loss: 782.2012939453125


Epoch 10
Train loss: 811.3327747691761
Val loss: 748.4339599609375


Epoch 11
Train loss: 778.6949240944602
Val loss: 725.5910034179688


Epoch 12
Train loss: 743.7549549449574
Val loss: 6

Train loss: 545.2250310724431
Val loss: 528.0106201171875


Epoch 82


KeyboardInterrupt: 