In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init
from torch.utils.data import Dataset, DataLoader

import numpy as np

import os
import random

In [2]:
def seed_everything(seed=429):
    """Function to set reproducibility of results"""
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

def clear_memory():
    """Frees up unused memory"""
    torch.cuda.empty_cache()
    gc.collect()

In [3]:
class config():
    run_id = 0
    seed = 0
    epoch = 100
    learning_rate = 0.1
    batch_size = 10
    load_W_from_file = False

    #NW architecture: Nhidden layers of Nnodes each, ip and op 1 node
    # n_in - [n_layers] x n_nodes - n_out
    n_in = 1
    n_layers = 1
    n_nodes = 5
    n_out = 1

    data_dir = './OU_q100/'
    q = 100
    hidden_q = 10
    Ntrain = 200
    Ntest = 100

In [4]:
data_dir = 'data'
train_x = np.loadtxt(os.path.join(data_dir, 'train_x.dat'))
train_y = np.loadtxt(os.path.join(data_dir, 'train_y.dat'))
test_x = np.loadtxt(os.path.join(data_dir, 'test_x.dat'))
test_y = np.loadtxt(os.path.join(data_dir, 'test_y.dat'))
train_x = train_x[:config.Ntrain].reshape((-1, 1, config.q))
train_y = train_y[:config.Ntrain].reshape((-1, 1, config.q))
test_x = test_x[:config.Ntest].reshape((-1, 1, config.q))
test_y = test_y[:config.Ntest].reshape((-1, 1, config.q))

class DistDataset(Dataset):
    def __init__(self, x_df, y_df):
        self.x_df = x_df
        self.y_df = y_df

    def __len__(self):
        return self.x_df.shape[0]

    def __getitem__(self, index):
        x = x_df[index]
        target = y_df[index] 
        
        return x, target

In [5]:
class JSDivLoss(nn.Module):
    def __init__(self):
        super(JSDivLoss, self).__init__()
        self.kl = nn.KLDivLoss(reduction='batchmean', log_target=True)

    def forward(self, p: torch.Tensor, q: torch.Tensor):
        p, q = p.view(-1, p.size(-1)), q.view(-1, q.size(-1))
        m = (0.5 * (p + q)).log()
        return 0.5 * (self.kl(m, p.log())) + self.kl(m, q.log())

class CustomLayer(nn.Module):
    def __init__(self, 
        n_lower : int, n_upper : int, 
        q_lower : int, q_upper : int,
        weight_init=0.1,
        weight_init_method='uniform'
    ):
        # factory_kwargs = {'device': torch.device, 'dtype': dtype}
        super(CustomLayer, self).__init__()
        self.n_lower = n_lower
        self.n_upper = n_upper
        self.q_lower = q_lower
        self.q_upper = q_upper
        self.weight_init = weight_init
        self.weight_init_method = weight_init_method

        self.D = self.initD(n_lower, q_upper, n_lower, n_upper)
        self.init_weights()
        
    def init_weights(self):
        # if self.weight_init_method == 'uniform': # IMPLEMENT LATER
        self.weights = nn.Parameter(init.uniform_(torch.empty(self.n_upper, self.n_lower), a=-self.weight_init, b=self.weight_init))
        self.ba = nn.Parameter(init.uniform_(torch.empty(self.n_upper, 1), a=-self.weight_init, b=self.weight_init))
        self.bq = nn.Parameter(init.uniform_(torch.empty(self.n_upper, 1), a=-self.weight_init, b=self.weight_init))
        self.lama = nn.Parameter(init.uniform_(torch.empty(self.n_upper, 1), a=0, b=1))
        self.lamq = nn.Parameter(init.uniform_(torch.empty(self.n_upper, 1), a=0, b=1))
        # elif self.weight_init_method == 'normal':
        # elif self.weight_init_method == 'glorot_normal':
    
    def initD(self, q_lower, q_upper, n_lower, n_upper):
        D_np = np.zeros((q_upper, q_lower))

        for s1 in range(q_upper):
            for s0 in range(q_lower):
                D_np[s1, s0] = np.exp(-((float(s0)/q_lower - float(s1)/q_upper) ** 2)) # suggest for improvement
        
        Dnp = D_np.reshape((q_upper, q_lower, 1, 1))
        D_tensor = torch.tensor(Dnp, dtype=torch.float32)
        D = torch.tile(D_tensor, [1, 1, n_upper, n_lower])
        return D
    
    # returns log(exp(B)) which is B
    def cal_logexp_bias(self, q):
        # each contains multiple nodes bias values, of size nu x 1
        s0 = torch.tensor(torch.arange(q).reshape((1, q)))

        # need account for multiple nodes in layer
        # s0 - b : (1 x q) x (nu x 1) = nu x q
        B = -(self.bq * torch.pow(s0 / q - self.lamq, 2) + self.ba * torch.abs(s0 / q - self.lama))
        return B

    def forward(self, P):
        # MIGHT HAVE PROLEMS HERE LATER WITH BATCH SIZE (BE VARY)
        Ptile = torch.tile(torch.reshape(P,[-1, 1, self.n_lower, self.q_lower, 1]), [1, self.n_upper, 1, 1, 1])  # bs x nu x nl x ql x 1
        T = torch.permute(torch.pow(self.D, self.weights), [2, 3, 0, 1])
        # T = torch.transpose(torch.pow(self.D, self.weights), [2, 3, 0, 1])  # nu x nl x qu x ql
        Pw_unclipped = torch.squeeze(torch.einsum('jklm,ijkmn->ijkln', T, Ptile), dim=4)   # bs x nu x nl x qu x 1 -> bs x nu x nl x qu
         # clip Pw by value to prevent zeros when weight is large
        Pw = torch.clamp(Pw_unclipped, 1e-15, 1e+15)
        
        # perform underflow handling (product of probabilities become small as no. neighbors increase)
        # 1. log each term in Pw
        logPw = torch.log(Pw)  # bs x nu x nl x qu
        # 2. sum over neighbors
        logsum = torch.sum(logPw, axis=2)       # bs x nu x qu
        # 3. log of exp of bias terms: log(expB) = exponent_B
        exponent_B = self.cal_logexp_bias(self.q_upper)  # nu x q
        # 4. add B to logsum
        logsumB = torch.add(logsum, exponent_B)          # bs x nu x qu
        # 5. find max over s0
        (max_logsum, max_logsum_indices) = torch.max(logsumB, dim=2, keepdim=True)    # bs x nu x qu
        # 6. subtract max_logsum and exponentiate (the max term will have a result of exp(0) = 1, preventing underflow)
        # Now all terms will have been multiplied by exp(-max)
        expm_P = torch.exp(torch.subtract(logsumB, max_logsum))        # bs x nu x qu
        # normalize
        Z = torch.sum(expm_P, dim=2, keepdim=True)
        y_normalised = torch.div(expm_P, Z)
        
        return y_normalised

class DRN(nn.Module):
    def __init__(self, 
        in_features: int = 1,
        num_layers: int = 1,
        num_nodes: int = 5,
        out_features: int = 1,
        q: int = 100, 
        hidden_q: int = 10
    ):
        super(DRN, self).__init__()

        if num_layers == 0:
            self.layer1 = CustomLayer(in_features, out_features, q, q)
        else: 
            self.layer_1 = CustomLayer(in_features, num_nodes, q, hidden_q)
            for layer in range (2, num_layers+1):
                setattr(self, f'layer_{layer}', CustomLayer(num_nodes, num_nodes, hidden_q, hidden_q))
            self.final_layer = CustomLayer(num_nodes, out_features, hidden_q, q)
            
    def forward(self, x):
        yout = x
        for layer in self.children():
            yout = layer(yout)
        return yout


In [6]:
seed_everything()
model = DRN()

In [7]:
layer = CustomLayer(1, 5, 100, 10)

In [8]:
x = torch.tensor(train_x[0:10])
# print(x)
# torch.reshape(x, [-1, 1, 1, 100, 1])
# print(x)
# print(x)
# layer(x).shape
model(x)

  s0 = torch.tensor(torch.arange(q).reshape((1, q)))


RuntimeError: einsum(): operands do not broadcast with remapped shapes [original->remapped]: [1, 5, 100, 5]->[1, 1, 5, 100, 1, 5] [10, 1, 5, 10, 1]->[10, 1, 5, 1, 1, 10]

In [None]:
seed_everything()
model = DRN()

train_set = DistDataset(x=train_x, y=train_y)
val_set = DistDataset(x=test_x, y=test_y)

seed_everything()
training_loader = DataLoader(
    train_set,
    batch_size=config.batch_size,
    pin_memory=True,
    drop_last=True,
    shuffle=True,
    num_workers=4
)
validation_loader = DataLoader(
    val_set,
    batch_size=config.batch_size,
    pin_memory=True,
    drop_last=True,
    shuffle=False,
    num_workers=4
)

seed_everything()
mse_loss = nn.MSELoss()
jds_loss = JSDivLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate)

TypeError: __init__() got an unexpected keyword argument 'x'

In [None]:
def train(epoch, model, training_loader, loss_fn, optimizer, Config):
    model.train()
    running_loss = 0
    pbar = tqdm(enumerate(training_loader), total=len(training_loader))
    for i, (x, target) in pbar:
        optimizer.zero_grad()

        output = model(x)

        loss = loss_fn(output, target)

        loss.backward()

        optimizer.step()

        running_loss = loss.item()
        description = f"Train Loss: {running_loss}"
        pbar.set_descriptio(description)

        clear_memory()


# def validate():
#     model.eval()
#     running_loss = 0
#     pbar = tqdm(enumerate(training_loader), total=len(training_loader))
#     for i, (x, target) in pbar:
#         optimizer.zero_grad()

#         output = model(x)

#         loss = loss_fn(output, target)

#         loss.backward()

#         optimizer.step()

#         running_loss = loss.item()
#         description = f"Train Loss: {running_loss}"
#         pbar.set_descriptio(description)

#         clear_memory()

In [None]:
import torch

# Create the input tensor
input_tensor = torch.randn(10, 1, 100)

# Reshape the tensor
reshaped_tensor = torch.reshape(input_tensor, (-1, 1, 1, 100, 1))
print(input_tensor.shape)
print(reshaped_tensor.shape)


torch.Size([10, 1, 100])
torch.Size([10, 1, 1, 100, 1])


In [None]:
print(input_tensor)

tensor([[[ 1.0483e+00, -7.0600e-02,  8.2982e-01, -2.7604e-01,  6.5104e-01,
          -1.0764e+00,  1.1277e+00, -1.2005e+00,  2.1778e-01,  6.5981e-02,
           2.6278e-01, -4.3101e-01,  1.9239e+00, -1.0497e+00,  6.3626e-01,
           1.0500e+00, -1.1182e-01, -2.7996e-01, -1.3441e+00, -8.7905e-01,
           2.6070e-02,  1.3441e-01,  4.2098e-01,  2.3166e-01, -1.3429e+00,
          -9.5078e-01, -2.2974e+00,  2.3790e-01, -1.4965e+00,  9.6083e-02,
          -4.2727e-01, -2.9522e+00,  5.6465e-01, -7.5040e-01, -1.5394e+00,
           2.1988e-01,  3.7527e-02, -8.9407e-02, -6.1276e-01,  1.0504e+00,
          -7.7325e-01, -7.0889e-01, -2.6448e-01,  3.3139e-01, -1.2745e+00,
          -7.4332e-01,  4.8268e-01, -3.2979e-01, -1.6409e+00,  3.3546e-02,
           1.5255e+00,  7.2267e-01, -7.5387e-01,  1.3723e+00, -1.0036e+00,
           2.2944e-01,  9.8739e-01,  1.0094e+00, -1.9446e+00, -2.0813e+00,
           3.8591e-02,  3.4954e-01,  6.8550e-01, -1.9231e-01,  6.9675e-01,
           8.5388e-01,  5

torch.Size([10, 1, 5, 1]) torch.Size([5, 1])


TypeError: transpose() received an invalid combination of arguments - got (Tensor, list), but expected one of:
 * (Tensor input, int dim0, int dim1)
 * (Tensor input, name dim0, name dim1)


In [None]:
for epoch in range(config.epoch):
    print(f'Epoch {epoch + 1}')
    print(config.learning_rate)
    train(epoch, model, training_loader, loss_fn, optimizer, Config)
    validate(epoch, model, validation_loader, loss_fn, Config)

In [None]:
qu, ql, nu, nl = 2, 4, 3, 6
D = torch.tensor((qu, ql, nu, nl))
W = torch.tensor((nu, nl))
torch.pow(D, W)

RuntimeError: The size of tensor a (4) must match the size of tensor b (2) at non-singleton dimension 0

In [None]:
torch.transpose(torch.pow(D, W), [2, 3, 0, 1])

RuntimeError: The size of tensor a (4) must match the size of tensor b (2) at non-singleton dimension 0