# Required Imports

In [1]:
import sys, random, math, pickle
from time import time
import numpy as np
import torch
from torch import nn
import torch.nn.functional as F
from torch.nn import MSELoss
import seaborn as sns
import torch.nn.functional as F
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
sys.path.append('DG/gan')
import gc
print(device)
from torch.nn import TransformerEncoder, TransformerEncoderLayer


cuda:1


In [2]:
from pynvml import *
nvmlInit()
h = nvmlDeviceGetHandleByIndex(1)
info = nvmlDeviceGetMemoryInfo(h)
print(f'total    : {info.total}')
print(f'free     : {info.free}')
print(f'used     : {info.used}')

total    : 50962169856
free     : 50958106624
used     : 4063232


# Import Real Training Data to Generate New Data from it.

### Actual Distribution
- Class0: 6250
- Class1: 16124
- Class2: 21273
- Class3: 5278

In [3]:
def get_one_class(X,Y,mask,class_label): # (X, Y, and mask) are the whole dataset that is consisted of many classes, Y is NOT One-Hot Encoded
    indices_class_label = np.where(Y==class_label)
    X,Y,mask = X[indices_class_label], Y[indices_class_label], mask[indices_class_label] 
    indices_non_zero = torch.nonzero(torch.sum(mask,1)-1).squeeze()
    return X[indices_non_zero], Y[indices_non_zero], mask[indices_non_zero]

def get_n_samples(X,Y,mask,n_samples):
    randomList = random.sample(range(0, Y.shape[0]), n_samples)
    return X[randomList], Y[randomList], mask[randomList]

# In real data, if flag sum is 1 --> Then no timestep at all. --> So we do remove those ones by converting them to zeros, then remove from the list
# In real data, there is no flag of length ZERO
def remove_zero_datapoints(X,Y,mask):
    indices_non_zero = torch.nonzero(torch.sum(mask,1)-1).squeeze()
    return X[indices_non_zero], Y[indices_non_zero], mask[indices_non_zero]

In [4]:
training_real = np.load('data/google/data_train.npz')

real_train_X = torch.from_numpy(training_real['data_feature']).float() #[50000, 2500, 9]
real_train_Y = torch.from_numpy(training_real['data_attribute']) #[50000,4]
real_train_Y_labels = torch.argmax(real_train_Y,1) #[50000,]  returns a list of the class label, no one hot encoding any more
real_train_flags = torch.from_numpy(training_real['data_gen_flag'])   # (50000, 2500)

#------------------------------------------------------------------Loading One Class------------------------------------------------
real_train_X, real_train_Y_labels, real_train_flags= remove_zero_datapoints(real_train_X, real_train_Y_labels, real_train_flags)

# The pading mask need to be inverted 

padding_mask = real_train_flags == 0 # True when padding, False when considering

real_train_lengths = torch.sum(real_train_flags,1).long()

# The Tow Magic Rows

In [5]:
p1 = real_train_flags.clone().unsqueeze(2)
p2 = torch.zeros_like(p1)

In [6]:
for i,length in enumerate(real_train_lengths):
    p1[i,length-1] = 0
    p2[i,length-1] = 1

In [7]:
magic_rows = torch.cat((p1,p2),2).float()
real_train_X = torch.cat((real_train_X,torch.FloatTensor(magic_rows)),2)

In [8]:
real_train_X.shape

torch.Size([48925, 2500, 11])

# PyTorch Transformer Model

- Later, we need to remove this from here and put in a separate folder

In [9]:
class PositionalEncoding(nn.Module):

    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return self.dropout(x).to(device)

In [10]:
class TimeSeriesTransformer(nn.Module):

    def __init__(self, n_features=11, d_model=512, n_heads=8, n_hidden=512, n_layers=8, dropout=0.1):
        super().__init__()
        self.model_type = 'Time Series Transformer Model'
        self.InputLinear = nn.Linear(n_features, d_model)
        
        self.positional_encoding = PositionalEncoding(d_model, dropout)
        encoder_layers = TransformerEncoderLayer(d_model, n_heads, n_hidden, dropout)
        self.transformer_encoder = TransformerEncoder(encoder_layers, n_layers)
        
        self.d_model = d_model
        self.n_features = n_features
        
        self.OutputLinear = nn.Linear(d_model, n_features) # The output of the encoder is similar to the input of the encoder, both are (B,S,d_model)

        self.init_weights()
        self.activation1 = nn.Sigmoid()
        self.activation2= nn.Softmax(dim=2)

    def generate_square_subsequent_mask(self, sz):
        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
        mask = mask.float().masked_fill(mask == 0, float(-1e6)).masked_fill(mask == 1, float(0.0))
        return mask.to(device)

    def init_weights(self):
        initrange = 0.1
        self.InputLinear.weight.data.uniform_(-initrange, initrange)
        self.OutputLinear.bias.data.zero_()
        self.OutputLinear.weight.data.uniform_(-initrange, initrange)

    def forward(self, src, src_mask,padding_mask):
        src = self.InputLinear(src) * math.sqrt(self.d_model)
        src = self.positional_encoding(src)
        output = self.transformer_encoder(src, src_mask,padding_mask)
        output = self.OutputLinear(output)
        output1 = self.activation1(output[...,:(self.n_features-2)]) # output[...,:9] --> Actual 9 values
        output2 = self.activation2(output[...,(self.n_features-2):])
        return torch.cat((output1,output2),2)
    

In [11]:

model = TimeSeriesTransformer().to(device)

model.load_state_dict(torch.load('class_all_weights_flags2_bce'))
model.eval()
print()




In [12]:
sys.getsizeof(model)

48

In [13]:
from pynvml import *
nvmlInit()
h = nvmlDeviceGetHandleByIndex(1)
info = nvmlDeviceGetMemoryInfo(h)
print(f'total    : {info.total}')
print(f'free     : {info.free}')
print(f'used     : {info.used}')

total    : 50962169856
free     : 49526800384
used     : 1435369472


# Generating New Data

In [14]:
# All generated Data has at least three timesteps because the seed is 2

# We should stop at 2 or at least if S >= datapoint_len 

In [15]:
gc.collect(),torch.cuda.empty_cache()

(88, None)

In [22]:
################################# The following is the generating part #################################

# Returns: X (The data)
# Returns: masks (e.g. [False,Flase,True,True,True,....,True]), False is the actual Data

resulted_masks = []
generated_dataset_Y=[]
generated_dataset_X=[]

def generate_dataset(X,Y,masks,n_seed,n_samples,max_length):
    for n in range(n_samples):
        
        datapoint,y,mask = get_n_samples(X,Y,masks,n_samples=1) # The first 10 timesteps of just one sample
        datapoint = datapoint[:,:n_seed].permute(1,0,2) 
        datapoint_len = torch.sum(~mask) #Flip and count, you will get the actual length to generate likewise
        mask = mask[:,:n_seed] 
        E = datapoint.size(2)
        S = datapoint.size(0)
        for t in range(max_length-n_seed): # Loop until 400 timesteps
            src_mask = model.generate_square_subsequent_mask(S)
            
#             print(datapoint.shape,datapoint.element_size() * datapoint.nelement())
            
            predicted = model(datapoint.to(device),src_mask.to(device),mask.to(device)).cpu() # [S,B,E] --> We want just the predicted timestep S
            one_new_timestep=predicted[-1].unsqueeze(0)
            datapoint = torch.cat((datapoint,one_new_timestep)) # add the forecasted timestep
            mask = torch.cat((mask,torch.tensor([[False]])),1 )
            S = datapoint.size(0)
    
            if(one_new_timestep[...,-1].item()>= one_new_timestep[...,-2].item()):
#                 print(datapoint.shape[0],datapoint_len)
                datapoint = torch.cat((datapoint,torch.zeros((max_length-S,1,E)))).cpu()# Pad remainings with zero
                mask =  torch.cat((mask,torch.full((1,max_length-S),True)),1)
                break
        resulted_masks.append(mask.numpy())
        generated_dataset_X.append(datapoint[...,:-2].permute(1,0,2).squeeze().detach().numpy())
        generated_dataset_Y.append(y.item())
        del mask
        del datapoint
        del one_new_timestep
        gc.collect(),torch.cuda.empty_cache()
        
        if (n%100==0):
            print('{}/{}'.format(n,n_samples))
        if (n%1000==0):
             np.savez('class_all_generated_flags2_bce',X=generated_dataset_X,masks= resulted_masks,Y=generated_dataset_Y)

max_length = 400
n_seed = 2
# Padding Mask Fed here is the Mask where "False is Real Data", True is masked and ignore them

In [25]:
real_train_X0 ,real_train_Y_labels0 ,padding_mask0= get_one_class(real_train_X ,real_train_Y_labels ,padding_mask,0)
real_train_X1 ,real_train_Y_labels1 ,padding_mask1= get_one_class(real_train_X ,real_train_Y_labels ,padding_mask,1)
real_train_X2 ,real_train_Y_labels2 ,padding_mask2= get_one_class(real_train_X ,real_train_Y_labels ,padding_mask,2)
real_train_X3 ,real_train_Y_labels3 ,padding_mask3= get_one_class(real_train_X ,real_train_Y_labels ,padding_mask,3)


generate_dataset(real_train_X0 ,real_train_Y_labels0 ,padding_mask0,n_seed=n_seed,n_samples=real_train_X0.size(0),max_length=max_length)
generate_dataset(real_train_X1 ,real_train_Y_labels1 ,padding_mask1,n_seed=n_seed,n_samples=real_train_X1.size(0),max_length=max_length)
generate_dataset(real_train_X2 ,real_train_Y_labels2 ,padding_mask2,n_seed=n_seed,n_samples=real_train_X2.size(0),max_length=max_length)
generate_dataset(real_train_X3 ,real_train_Y_labels3 ,padding_mask3,n_seed=n_seed,n_samples=real_train_X3.size(0),max_length=max_length)

3 tensor(4)
0/21273
4 tensor(5)
4 tensor(6)
4 tensor(4)
9 tensor(10)
6 tensor(4)
242 tensor(4)
9 tensor(17)
7 tensor(5)
3 tensor(3)
15 tensor(5)
3 tensor(3)
176 tensor(4)
4 tensor(15)
4 tensor(4)
8 tensor(8)
9 tensor(6)
4 tensor(4)
359 tensor(5)
3 tensor(3)
5 tensor(19)
5 tensor(5)
4 tensor(8)
7 tensor(5)
4 tensor(14)
3 tensor(3)
3 tensor(3)
3 tensor(3)
9 tensor(4)
3 tensor(3)
5 tensor(5)
4 tensor(4)
9 tensor(32)


KeyboardInterrupt: 

In [None]:
# The variables are global variables

np.savez('class_all_generated_flags2_bce',X=generated_dataset_X,masks= resulted_masks,Y=generated_dataset_Y)

In [None]:
print(np.array(generated_dataset_X).shape)