In [1]:
import pandas as pd
from tqdm.notebook import tqdm
from sklearn import preprocessing

import torch
from torch import nn
from torch import optim
from torch.utils.data import DataLoader

In [18]:
class WLNuDataset(torch.utils.data.Dataset):
     def __init__(self, df_path="../wlnu/data/gen.csv"):
        df = pd.read_csv(df_path)
        df = df.drop(labels=["W_px", "W_py", "W_pz", "W_m", "L_E", "Nu_E"], axis=1)
        
        # standardize input columns
        # x = df.loc[:, df.columns != "Nu_pz"].to_numpy()
        # scaler = preprocessing.StandardScaler().fit(x)
        # x = scaler.transform(x)
        # df[df.columns[:-1]] = x
        
        self.data = torch.from_numpy(df.values).float()
 
     def __getitem__(self, idx):
         return self.data[idx]
 
     def __len__(self):
         return len(self.data)

In [19]:
class WLNuModel(nn.Module):
    def __init__(self, n_features):
        super().__init__()
        self.n_features = n_features
        self.model = nn.Sequential(
            nn.Linear(n_features, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, 1)
        )
    
    def forward(self, x):
        return self.model(x)

In [20]:
def evaluate(model, loader, loss_fn):
    
    n_features = model.n_features
    
    model.eval()
    
    total_loss = 0
    
    for batch in loader:
        X = batch[:, :n_features]
        Y = batch[:, n_features:]

        out = model(X)
        loss = loss_fn(out, Y)
        total_loss += loss.item()
    
    model.train()
    
    return total_loss / len(loader)

In [21]:
def train(data_split = [0.9, 0.05, 0.05], batch_size=8192, n_epochs=100, learning_rate=1e-3):
    dataset = WLNuDataset()
    n_features = len(dataset[0]) - 1 # last two columns are targets: N_z
    
    print(f"# of input features: {n_features}")
    print(f"Total number of samples: {len(dataset)}")
    
    data_split = [int(i * len(dataset)) for i in data_split]
    train_set, val_set, test_set = torch.utils.data.random_split(dataset, lengths=data_split)
    
    print(f"Train set samples: {len(train_set)}")
    print(f"Val set samples: {len(val_set)}")
    print(f"Test set samples: {len(test_set)}")
    
    train_loader, val_loader, test_loader = [DataLoader(i, batch_size=batch_size, shuffle=True) 
                                             for i in (train_set, val_set, test_set)]
    
    model = WLNuModel(n_features)
    loss_fn = torch.nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    print(f"\n{'='*30}\n")
    for epoch in range(n_epochs):
        
        print(f"Epoch {epoch}")
        
        train_loss = 0
        
        for train_batch in train_loader:
            X = train_batch[:, :n_features]
            Y = train_batch[:, n_features:]
            
            out = model(X)
            loss = loss_fn(out, Y)
            train_loss += loss.item()
            
            optimizer.zero_grad() # clear previous gradients
            loss.backward() # compute gradients
            
            optimizer.step() # update weights using computed gradients

        train_loss = train_loss / len(train_loader)
        print(f"Train loss: {train_loss}")
        
        val_loss = evaluate(model, val_loader, loss_fn)
        print(f"Val loss: {val_loss}")
        
        print(f"\n{'='*30}\n")

In [23]:
train()

# of input features: 5
Total number of samples: 100000
Train set samples: 90000
Val set samples: 5000
Test set samples: 5000


Epoch 0
Train loss: 1147.2495006214488
Val loss: 1148.6844482421875


Epoch 1
Train loss: 1125.5441450639205
Val loss: 1122.2547607421875


Epoch 2
Train loss: 1099.9109885475852
Val loss: 1103.3150634765625


Epoch 3
Train loss: 1067.02734375
Val loss: 1087.123046875


Epoch 4
Train loss: 1042.6960282759233
Val loss: 1058.099365234375


Epoch 5
Train loss: 1028.446660822088
Val loss: 1048.319580078125


Epoch 6
Train loss: 1008.5561689897017
Val loss: 1032.2677001953125


Epoch 7
Train loss: 986.9375554865056
Val loss: 1014.1881713867188


Epoch 8
Train loss: 969.2565585049716
Val loss: 991.2457275390625


Epoch 9
Train loss: 970.3288518732244
Val loss: 1011.42041015625


Epoch 10
Train loss: 955.773476340554
Val loss: 981.4016723632812


Epoch 11
Train loss: 952.3160677823154
Val loss: 988.5363159179688


Epoch 12
Train loss: 941.9237337979404
Val loss: 984.7

Train loss: 623.1235850941051
Val loss: 673.373046875


Epoch 82
Train loss: 620.037070534446
Val loss: 668.2620849609375


Epoch 83
Train loss: 622.7220348011364
Val loss: 679.9517211914062


Epoch 84
Train loss: 628.1897250088779
Val loss: 694.8091430664062


Epoch 85
Train loss: 643.0382135564631
Val loss: 697.4771118164062


Epoch 86
Train loss: 626.3065352006392
Val loss: 660.7698974609375


Epoch 87
Train loss: 610.8686079545455
Val loss: 662.1757202148438


Epoch 88
Train loss: 603.3119451349431
Val loss: 649.73388671875


Epoch 89
Train loss: 598.7633167613636
Val loss: 651.1747436523438


Epoch 90
Train loss: 599.2256469726562
Val loss: 656.9910278320312


Epoch 91
Train loss: 600.8044544566761
Val loss: 653.5519409179688


Epoch 92
Train loss: 602.8539373224431
Val loss: 656.1383056640625


Epoch 93
Train loss: 611.362138227983
Val loss: 680.7377319335938


Epoch 94
Train loss: 598.8529829545455
Val loss: 652.8258666992188


Epoch 95
Train loss: 592.238603071733
Val loss: 648