In [None]:
from getpass import getpass
import os

# Securely input your token
token = getpass('Paste your GitLab token: ')

# Set the repository URL
# repo_url = "https://oauth2:" + token + "@gitlab.ewi.tudelft.nl/dsait5000/tom-viering/msc-thesis-vasko.git"
username = "vdakov" 

# Syntax: https://<username>:<token>@<domain>/...
repo_url = f"https://{username}:{token}@gitlab.ewi.tudelft.nl/dsait5000/tom-viering/msc-thesis-vasko.git"

!git clone {repo_url}

# Clone the repository
# !git clone {repo_url}
# -7tPNnta8dSg6H33ZZcSz286MQp1OjhjeAk.01.0z1wd0yob
# Verify clone
os.chdir("msc-thesis-vasko")
%pip install -r requirements.txt
os.chdir("notebooks")

In [None]:
import sys
import os 

current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)
sys.path.append(os.path.join(parent_dir, 'src'))
print(f"Added to sys.path: {parent_dir}")

In [None]:
import torch
from torch.distributions import Distribution

class DistributionSampler:
    def __init__(self, distribution: Distribution):
        """
        :param distribution: An instantiated torch.distributions object 
                             (e.g., Uniform(0.1, 3.0), Gamma(2.0, 2.0))
        """
        self.distribution = distribution

    def sample(self, batch_size: int) -> torch.Tensor:
        """
        Samples from the distribution with the given batch size.
        Returns a tensor of shape (batch_size,)
        """
        # PyTorch distributions expect a tuple/torch.Size for sampling
        return self.distribution.sample(torch.Size([batch_size]))

In [None]:
import numpy as np
import models.encoders as encoders
from training_util import get_uniform_single_eval_pos_sampler, get_weighted_single_eval_pos_sampler, get_cosine_schedule_with_warmup
import train
from criterion.bar_distribution import BarDistribution, get_bucket_limits
from models import positional_encodings
from prior_generation import gp_prior, gp_lengthscale_prior
import torch
import torch.distributions as dist

epochs = 3
batch_size =  256
warmup_epochs = 25
steps_per_epoch = 10
lr = 0.0001
sequence_length = 10
emsize = 512
fuse_x_y = False
nlayers = 6
nhead = 4
nhid = 1024
dropout = 0.2
device = 'cuda' if torch.cuda.is_available() else 'cpu'
num_buckets = 1000
min_y = -5
max_y = 5
num_features = 1
num_outputs = 1000
my_prior_dist = dist.Uniform(low=0.1, high=1)

sampler = DistributionSampler(my_prior_dist)
# prior_hyperparameters = {'num_features': num_features, 'num_outputs': num_outputs, 'device': device, 'kernel': "rbf", 'length_scale': 0.5}
prior_hyperparameters = {'num_features': num_features, 'num_outputs': num_outputs, 'device': device, 'kernel': "rbf", 'length_scale': 0.5, "length_scale_sampling": sampler}
input_normalization = True
aggregate_k_gradients=1
encoder_type = 'linear'  # 'linear' or 'mlp'
y_encoder_type = 'linear'
pos_encoder_type = 'none'  # 'sinus', 'learned', 'none'
scheduler = get_cosine_schedule_with_warmup
prior_prediction = False
num_test_parameters = 1

def get_encoder_generator(encoder):
        if encoder == 'linear':
            encoder_generator = encoders.LinearEncoder
        elif encoder == 'mlp':
            encoder_generator = encoders.MLPEncoder
        else:
            raise NotImplementedError(f'A {encoder} encoder is not valid.')
        return encoder_generator

encoder_generator = get_encoder_generator(encoder_type)
y_encoder_generator = get_encoder_generator(y_encoder_type)

if pos_encoder_type== 'sinus':
    pos_encoder_generator = positional_encodings.PositionalEncoding
elif pos_encoder_type == 'learned':
    pos_encoder_generator = positional_encodings.LearnedPositionalEncoding
else:
    pos_encoder_generator = positional_encodings.NoPositionalEncoding
    
permutation_invariant_max_eval_pos = sequence_length - 1
permutation_invariant_sampling = 'uniform'

if permutation_invariant_max_eval_pos is not None:
    if permutation_invariant_sampling == 'weighted':
        get_sampler = get_weighted_single_eval_pos_sampler
    elif permutation_invariant_sampling == 'uniform':
        get_sampler = get_uniform_single_eval_pos_sampler
    else:
        raise ValueError()
    
context_delimiter_generator = get_sampler(permutation_invariant_max_eval_pos)

transformer_configuration = (emsize, nhead, nhid, nlayers, dropout, num_features, num_outputs, input_normalization, y_encoder_generator, sequence_length, fuse_x_y, prior_prediction, num_test_parameters) 
training_configuration = (epochs, steps_per_epoch, batch_size, sequence_length, lr, warmup_epochs, aggregate_k_gradients, scheduler, prior_prediction)
generators = (encoder_generator, y_encoder_generator, pos_encoder_generator)
prior = gp_prior.GaussianProcessPriorGenerator()
# prior = gp_lengthscale_prior.GaussianProcessHyperPriorGenerator()
print(prior.name)
criterion = BarDistribution(borders=get_bucket_limits(num_buckets, full_range=(min_y, max_y)))

In [None]:
hyperparameters = { 'kernel': "rbf", 'length_scale': 0.5}
prior.visualize_datasets(number_of_datasets=5, num_points_per_dataset=200, num_features_per_dataset=1, device='cpu', **hyperparameters)

In [None]:
# 6. Run Training
print(f"Starting training on {device}...")
losses, positional_losses, val_losses,  model = train.train(
    prior_dataloader=prior,
    criterion=criterion, # Passing the wrapper
    transformer_configuration=transformer_configuration,
    generators = generators,
    training_configuration=training_configuration,
    prior_hyperparameters=prior_hyperparameters,
    load_path=None,
    context_delimiter_generator = context_delimiter_generator,
    device=device,
    verbose=True,
    save_path=None,
)
# -7tPNnta8dSg6H33ZZcSz286MQp1OjhjeAk.01.0z1wd0yob

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np


plt.figure(figsize=(15, 5))
sns.lineplot(x=np.arange(0, len(losses)), y=np.array(losses), label="Training")
sns.lineplot(x=np.arange(0, len(losses)), y=np.array(val_losses), label="Validation")
plt.title("Training Loss")
plt.grid()
plt.legend()
plt.ylabel('Cross Entropy')
plt.xlabel('Epochs')
plt.show()

In [None]:
num_points_in_dataset = 15
train_X, train_Y, y_target = prior.get_datasets_from_prior(9, num_points_in_dataset, 1, **hyperparameters)
train_X = train_X.to(device)
train_Y = train_Y.to(device)
y_target = y_target.to(device)
num_training_points = num_points_in_dataset - 5

In [None]:

model = model.to(device)
# Set up grid for subplots
fig, axes = plt.subplots(3, 3, figsize=(15, 8)) 
axes = axes.flatten()

for batch_index in range(9):
    ax = axes[batch_index] 
    train_x = train_X[:num_training_points, batch_index, :]
    train_y = train_Y[:num_training_points, batch_index]
    test_x = train_X[:, batch_index, :]
    with torch.no_grad():
        logits = model((torch.cat((train_x, test_x)), torch.cat((train_y, torch.zeros(len(test_x), device=device)))), context_pos=num_training_points - 1)

        pred_means = model.criterion.mean(logits)
        pred_confs = model.criterion.quantile(logits)
        pred_means = pred_means[-len(test_x):]
        pred_confs = pred_confs[-len(test_x):]
        # Plot scatter points for training data
        ax.scatter(train_x[..., 0].cpu().numpy(), train_y.cpu().numpy(), label="Training Data")

    # Plot model predictions
    order_test_x = test_x[:, 0].cpu().argsort()
    ax.plot(
        test_x[order_test_x, 0].cpu().numpy(),
        pred_means[order_test_x].cpu().numpy(),
        color='green',
        label='pfn'
    )
    ax.fill_between(
        test_x[order_test_x, 0].cpu().numpy(),
        pred_confs[order_test_x][:, 0].cpu().numpy(),
        pred_confs[order_test_x][:, 1].cpu().numpy(),
        alpha=.1,
        color='green'
    )
    ax.set_xlabel('X')
    ax.set_ylabel('Y')

plt.legend()
plt.show()


In [5]:
import numpy as np
import models.encoders as encoders
from training_util import get_uniform_single_eval_pos_sampler, get_weighted_single_eval_pos_sampler, get_cosine_schedule_with_warmup
import train
from criterion.bar_distribution import BarDistribution, get_bucket_limits
from models import positional_encodings
from prior_generation import gp_prior, gp_lengthscale_prior
import torch
import torch.distributions as dist

epochs = 10
batch_size =  256
warmup_epochs = 25
steps_per_epoch = 10
lr = 0.0001
sequence_length = 10
emsize = 512
fuse_x_y = False
nlayers = 6
nhead = 4
nhid = 1024
dropout = 0.2
device = 'cuda' if torch.cuda.is_available() else 'cpu'
num_buckets = 250
min_y = -5
max_y = 5
num_features = 1
num_outputs = 250
my_prior_dist = dist.Uniform(low=0.4, high=0.6)
sampler = DistributionSampler(my_prior_dist)
prior_hyperparameters = {'num_features': num_features, 'num_outputs': num_outputs, 'device': device, 'kernel': "rbf", "length_scale_sampling": sampler}
input_normalization = True
aggregate_k_gradients=1
encoder_type = 'linear'  # 'linear' or 'mlp'
y_encoder_type = 'linear'
pos_encoder_type = 'none'  # 'sinus', 'learned', 'none'
scheduler = get_cosine_schedule_with_warmup
prior_prediction = True
num_test_parameters = 1

def get_encoder_generator(encoder):
        if encoder == 'linear':
            encoder_generator = encoders.LinearEncoder
        elif encoder == 'mlp':
            encoder_generator = encoders.MLPEncoder
        else:
            raise NotImplementedError(f'A {encoder} encoder is not valid.')
        return encoder_generator

encoder_generator = get_encoder_generator(encoder_type)
y_encoder_generator = get_encoder_generator(y_encoder_type)

if pos_encoder_type== 'sinus':
    pos_encoder_generator = positional_encodings.PositionalEncoding
elif pos_encoder_type == 'learned':
    pos_encoder_generator = positional_encodings.LearnedPositionalEncoding
else:
    pos_encoder_generator = positional_encodings.NoPositionalEncoding
    
permutation_invariant_max_eval_pos = sequence_length - 1
permutation_invariant_sampling = 'uniform'

if permutation_invariant_max_eval_pos is not None:
    if permutation_invariant_sampling == 'weighted':
        get_sampler = get_weighted_single_eval_pos_sampler
    elif permutation_invariant_sampling == 'uniform':
        get_sampler = get_uniform_single_eval_pos_sampler
    else:
        raise ValueError()
    
context_delimiter_generator = get_sampler(permutation_invariant_max_eval_pos)

transformer_configuration = (emsize, nhead, nhid, nlayers, dropout, num_features, num_outputs, input_normalization, y_encoder_generator, sequence_length, fuse_x_y, prior_prediction, num_test_parameters) 
training_configuration = (epochs, steps_per_epoch, batch_size, sequence_length, lr, warmup_epochs, aggregate_k_gradients, scheduler, prior_prediction)
generators = (encoder_generator, y_encoder_generator, pos_encoder_generator)
# prior = gp_prior.GaussianProcessPriorGenerator()
prior = gp_lengthscale_prior.GaussianProcessHyperPriorGenerator()
criterion = BarDistribution(borders=get_bucket_limits(num_buckets, full_range=(min_y, max_y)))

In [None]:
# 6. Run Training
print(f"Starting training on {device}...")
losses, positional_losses, val_losses,  model = train.train(
    prior_dataloader=prior,
    criterion=criterion, # Passing the wrapper
    transformer_configuration=transformer_configuration,
    generators = generators,
    training_configuration=training_configuration,
    prior_hyperparameters=prior_hyperparameters,
    load_path=None,
    context_delimiter_generator = context_delimiter_generator,
    device=device,
    verbose=True,
    save_path=None,
)
# -7tPNnta8dSg6H33ZZcSz286MQp1OjhjeAk.01.0z1wd0yob

Starting training on cpu...
Using cpu:0 device
1
1


  0%|          | 0/10 [00:00<?, ?it/s]

DATA
[tensor([[[0.6282],
         [0.8171],
         [0.5943],
         ...,
         [0.5811],
         [0.8568],
         [0.2070]],

        [[0.8161],
         [0.0717],
         [0.2097],
         ...,
         [0.9800],
         [0.5665],
         [0.8293]],

        [[0.9888],
         [0.7096],
         [0.0462],
         ...,
         [0.5256],
         [0.5716],
         [0.1320]],

        ...,

        [[0.5721],
         [0.9375],
         [0.8083],
         ...,
         [0.2760],
         [0.0744],
         [0.7766]],

        [[0.7393],
         [0.7845],
         [0.5835],
         ...,
         [0.0931],
         [0.4794],
         [0.1998]],

        [[0.8190],
         [0.9413],
         [0.6583],
         ...,
         [0.9398],
         [0.5783],
         [0.1889]]]), tensor([[-0.6302,  1.9766, -0.0901,  ..., -0.2503,  1.0161, -0.0136],
        [-0.7733, -0.4807, -0.6840,  ..., -0.1500,  1.1801, -1.0125],
        [-0.4981,  1.5432, -0.5164,  ..., -0.0118,  1.0414,

loss  2.37 | pos loss   nan,  nan,  nan,  nan,  nan,  nan,  nan,  nan,  nan,  nan, 2.37, lr 0.0val score 2.3476366996765137:  10%|█         | 1/10 [00:34<05:11, 34.62s/it]

DATA
[tensor([[[0.7692],
         [0.6764],
         [0.0274],
         ...,
         [0.8896],
         [0.5302],
         [0.0433]],

        [[0.9053],
         [0.1602],
         [0.7452],
         ...,
         [0.4109],
         [0.9114],
         [0.3410]],

        [[0.5344],
         [0.5166],
         [0.7969],
         ...,
         [0.5077],
         [0.9353],
         [0.7397]],

        ...,

        [[0.3890],
         [0.5768],
         [0.1094],
         ...,
         [0.9183],
         [0.4935],
         [0.2963]],

        [[0.2879],
         [0.0821],
         [0.7039],
         ...,
         [0.7517],
         [0.6685],
         [0.1523]],

        [[0.0511],
         [0.6607],
         [0.7053],
         ...,
         [0.1453],
         [0.7065],
         [0.2225]]]), tensor([[ 0.0334,  1.5493,  0.9855,  ...,  0.7701,  1.9386,  0.4798],
        [ 0.1857,  1.1372, -0.1315,  ..., -0.0415,  1.4705,  0.6061],
        [ 0.6023,  1.7908, -0.0174,  ...,  0.3178,  1.5595,

loss  2.29 | pos loss   nan,  nan,  nan,  nan,  nan,  nan,  nan,  nan,  nan,  nan, 2.29, lr 4.000000000000001e-06val score 2.343047618865967:  20%|██        | 2/10 [01:10<04:43, 35.39s/it]

DATA
[tensor([[[0.4901],
         [0.6445],
         [0.6030],
         ...,
         [0.7056],
         [0.9211],
         [0.5458]],

        [[0.9196],
         [0.4595],
         [0.9915],
         ...,
         [0.8700],
         [0.2984],
         [0.9068]],

        [[0.3799],
         [0.4799],
         [0.3947],
         ...,
         [0.2265],
         [0.0464],
         [0.8466]],

        ...,

        [[0.7422],
         [0.3277],
         [0.0049],
         ...,
         [0.4451],
         [0.4158],
         [0.9764]],

        [[0.0288],
         [0.3158],
         [0.5698],
         ...,
         [0.2662],
         [0.9482],
         [0.5757]],

        [[0.5017],
         [0.6460],
         [0.9003],
         ...,
         [0.3074],
         [0.9748],
         [0.6291]]]), tensor([[ 0.1654, -0.8007, -0.0221,  ...,  0.7154, -0.4993, -0.4325],
        [ 0.1314, -1.0047,  0.1731,  ...,  0.9200, -0.9667, -1.1261],
        [-0.0900, -0.9175, -0.1321,  ...,  0.5026, -1.0194,

loss  2.00 | pos loss   nan,  nan,  nan,  nan,  nan,  nan,  nan,  nan,  nan,  nan, 2.00, lr 8.000000000000001e-06val score 2.3392786979675293:  30%|███       | 3/10 [01:43<04:01, 34.49s/it]

DATA
[tensor([[[0.4298],
         [0.7739],
         [0.1066],
         ...,
         [0.0734],
         [0.3770],
         [0.1839]],

        [[0.3076],
         [0.7829],
         [0.3027],
         ...,
         [0.4197],
         [0.1904],
         [0.5613]],

        [[0.8042],
         [0.9277],
         [0.1672],
         ...,
         [0.8647],
         [0.9535],
         [0.3313]],

        ...,

        [[0.4805],
         [0.8661],
         [0.7055],
         ...,
         [0.9009],
         [0.7961],
         [0.1773]],

        [[0.6907],
         [0.3665],
         [0.0465],
         ...,
         [0.0591],
         [0.8992],
         [0.9130]],

        [[0.1794],
         [0.0756],
         [0.6778],
         ...,
         [0.7421],
         [0.1308],
         [0.2148]]]), tensor([[ 0.2560, -0.5357,  0.8831,  ..., -0.8748, -1.3622, -0.1066],
        [ 0.2249, -0.4623,  1.1304,  ..., -0.9588, -1.2924, -0.2136],
        [-0.6303, -0.4600,  0.8661,  ..., -2.1966,  0.7831,

loss  1.43 | pos loss   nan,  nan,  nan,  nan,  nan,  nan,  nan,  nan,  nan,  nan, 1.43, lr 1.2e-05val score 2.3354156017303467:  40%|████      | 4/10 [02:29<03:52, 38.78s/it]              

DATA
[tensor([[[0.4407],
         [0.5559],
         [0.0921],
         ...,
         [0.5595],
         [0.8722],
         [0.6248]],

        [[0.0073],
         [0.1786],
         [0.7514],
         ...,
         [0.4918],
         [0.0072],
         [0.0605]],

        [[0.8888],
         [0.2167],
         [0.1449],
         ...,
         [0.3816],
         [0.1762],
         [0.9033]],

        ...,

        [[0.7838],
         [0.5929],
         [0.8853],
         ...,
         [0.7938],
         [0.7920],
         [0.9870]],

        [[0.8184],
         [0.5608],
         [0.6933],
         ...,
         [0.1598],
         [0.4521],
         [0.8974]],

        [[0.7680],
         [0.4381],
         [0.1312],
         ...,
         [0.3620],
         [0.2682],
         [0.5819]]]), tensor([[ 1.3778e+00, -1.7112e-04,  4.2705e-01,  ..., -7.4728e-01,
         -3.8520e-01, -1.7718e+00],
        [ 1.1191e+00,  3.7013e-01,  4.2322e-01,  ..., -6.8114e-01,
          8.4939e-01, -5.0149

In [None]:
num_points_in_dataset = 15
hyperparameters = { 'kernel': "rbf", 'length_scale': 0.4, "length_scale_sampling": DistributionSampler(dist.Uniform(low=0.4, high=0.41))}
train_X, train_Y, y_target, lengthscale = prior.get_datasets_from_prior(1, num_points_in_dataset, 1, **hyperparameters)
train_X = train_X.to(device)
train_Y = train_Y.to(device)
y_target = y_target.to(device)
num_training_points = num_points_in_dataset - 5

In [None]:
import seaborn as sns 
model = model.to(device)
train_x = train_X[:num_training_points]
train_y = train_Y[:num_training_points]


print(train_x.shape, train_y.shape)
with torch.no_grad():
    logits = model((train_x, train_y), context_pos=num_training_points - 1)
    print(logits.shape)
    outputs = torch.exp(torch.log_softmax(logits, -1))
outputs[1]

In [None]:
import matplotlib.pyplot as plt

borders = model.criterion.borders.detach().cpu().numpy() 
borders = borders[1:]
print(borders.shape)
values = torch.squeeze(outputs[1])
print(values.shape)
threshold = 0.1
mask = torch.squeeze(outputs[1] > threshold)
#outputs[1] because this is the lengthscale 
plt.bar(borders[mask], values[mask], log=True, linewidth=0.1)
plt.show()