# Required Imports

In [4]:
import sys, random, math, pickle
from time import time
import numpy as np
import gc
import matplotlib
import matplotlib.pyplot as plt
import torch
from torch import nn
from torch.nn import MSELoss
import torch.nn.functional as F
from datetime import timedelta
from torch.nn import TransformerEncoder, TransformerEncoderLayer
import pytorch_lightning as pl
import torchmetrics.functional as FM
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.plugins import DDPPlugin
from pytorch_lightning.callbacks import ModelCheckpoint
sys.path.append('../DG/gan')

# Loading Real Train Data

In [5]:
def get_one_class(X,Y_labels,flag,class_label):
    indices_class_label = np.where(Y_labels==class_label)
    return X[indices_class_label], Y_labels[indices_class_label], flag[indices_class_label] 
    
def get_n_samples(X,Y_labels,flag,n_samples):
    randomList = random.sample(range(0, Y_labels.shape[0]), n_samples)
    return X[randomList], Y_labels[randomList], flag[randomList]

# In real data, if flag sum is 1 --> Then no timestep at all. 
            # So we do remove those ones by converting them to zeros, then return only non-zero flags indices
# In real data, there is no flag of length ZERO
def remove_zero_datapoints(X,Y_labels,flag):
    indices_non_zero = torch.nonzero(torch.sum(flag,1)-1).squeeze()
    return X[indices_non_zero], Y_labels[indices_non_zero], flag[indices_non_zero]


In [6]:
training_real = np.load('../data/google/data_train_reduced.npz')

real_train_X = torch.from_numpy(training_real['data_feature']).float() #[50000, 2500, 9]
real_train_Y = torch.from_numpy(training_real['data_attribute']) #[50000,4]
real_train_Y_labels = torch.argmax(real_train_Y,1) #[50000,]  returns a list of the class label, no one hot encoding any more
real_train_flags = torch.from_numpy(training_real['data_gen_flag'])   # (50000, 2500)

real_train_X,real_train_Y_labels,real_train_flags = remove_zero_datapoints(real_train_X,real_train_Y_labels,real_train_flags)

real_train_lengths = torch.sum(real_train_flags,1).long()

real_train_masks = real_train_flags == 0

In [7]:
val_real = np.load('../data/google/data_train_val.npz')

real_val_X = torch.from_numpy(val_real['data_feature']).float() #[50000, 2500, 9]
real_val_Y = torch.from_numpy(val_real['data_attribute']) #[50000,4]
real_val_Y_labels = torch.argmax(real_val_Y,1) #[50000,]  returns a list of the class label, no one hot encoding any more
real_val_flags = torch.from_numpy(val_real['data_gen_flag'])   # (50000, 2500)

real_val_X,real_val_Y_labels,real_val_flags = remove_zero_datapoints(real_val_X,real_val_Y_labels,real_val_flags)

real_val_masks = real_val_flags == 0

In [8]:
test_real = np.load('../data/google/data_test_reduced.npz')

real_test_X = torch.from_numpy(test_real['data_feature']).float() #[50000, 2500, 9]
real_test_Y = torch.from_numpy(test_real['data_attribute']) #[50000,4]
real_test_Y_labels = torch.argmax(real_test_Y,1) #[50000,]  returns a list of the class label, no one hot encoding any more
real_test_flags = torch.from_numpy(test_real['data_gen_flag'])   # (50000, 2500)

real_test_X,real_test_Y_labels,real_test_flags = remove_zero_datapoints(real_test_X,real_test_Y_labels,real_test_flags)

real_test_masks = real_test_flags == 0

# Dataset and DataLoader

In [9]:
B = real_train_X.size(0)
S = real_train_X.size(1)
E = real_train_X.size(2)

# 1- Shift the targets
Input_shifted = real_train_X[:,1:]
Zero_at_the_end = torch.zeros((B,1,E))
targets = torch.cat((Input_shifted,Zero_at_the_end),1) # real_train_X shifted to the left one timestep

targets=  targets[:,:400]
real_train_masks = real_train_masks[:,:400]
real_train_X = real_train_X[:,:400]
real_train_flags = real_train_flags[:,:400]
real_train_lengths = torch.sum(real_train_flags,1).long()


S = real_train_X.size(1)

params_dataloader = {'shuffle': True,'num_workers':8 ,'batch_size':128} # No need to shuffle rn, they are all the same class
# "num_workers" is how many subprocesses to use for data loading.
dataset = torch.utils.data.TensorDataset(real_train_X, targets, real_train_lengths, real_train_masks)
train_dataloader  = torch.utils.data.DataLoader(dataset, **params_dataloader)

In [10]:
# Validation Dataset and DataLoader 

B = real_val_X.size(0)
S = real_val_X.size(1)
E = real_val_X.size(2)

Input_shifted = real_val_X[:,1:]
Zero_at_the_end = torch.zeros((B,1,E))
targets = torch.cat((Input_shifted,Zero_at_the_end),1) # real_train_X shifted to the left one timestep

targets=  targets[:,:400]
real_val_masks = real_val_masks[:,:400]
real_val_X = real_val_X[:,:400]

real_val_flags = real_val_flags[:,:400]
real_val_lengths = torch.sum(real_val_flags,1).long()

S = real_val_X.size(1)

params_dataloader = {'shuffle': False,'num_workers':8 ,'batch_size':128} # No need to shuffle rn, they are all the same class
dataset = torch.utils.data.TensorDataset(real_val_X, targets, real_val_lengths, real_val_masks)
val_dataloader  = torch.utils.data.DataLoader(dataset, **params_dataloader)

# TST

In [11]:
class PositionalEncoding2(nn.Module):

    def __init__(self, d_model, max_len=5000):
        """
        Inputs
            d_model - Hidden dimensionality of the input.
            max_len - Maximum length of a sequence to expect.
        """
        super().__init__()

        # Create matrix of [SeqLen, HiddenDim] representing the positional encoding for max_len inputs
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)

        # register_buffer => Tensor which is not a parameter, but should be part of the modules state.
        # Used for tensors that need to be on the same device as the module.
        # persistent=False tells PyTorch to not add the buffer to the state dict (e.g. when we save the model)
        self.register_buffer('pe', pe) # ,persistent=False)

    def forward(self, x):
        x = x + self.pe[:, :x.size(1)]
        return x

In [12]:
class TimeSeriesTransformer(pl.LightningModule):

    def __init__(self, n_features=9, d_model=256, n_heads=8, n_hidden=256, n_layers=8, dropout=0.0, S=400):
        super().__init__()
        self.model_type = 'Time Series Transformer Model'
        self.InputLinear = nn.Linear(n_features, d_model)
        
        self.positional_encoding = PositionalEncoding(d_model, dropout)
        encoder_layers = TransformerEncoderLayer(d_model, n_heads, n_hidden, dropout)
        self.transformer_encoder = TransformerEncoder(encoder_layers, n_layers)
        
        self.d_model = d_model
        self.n_features = n_features
        
        self.OutputLinear = nn.Linear(d_model, n_features) # The output of the encoder is similar to the input of the encoder, both are (B,S,d_model)
        self.init_weights()
     
        
    def generate_square_subsequent_mask(self, sz):
        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
        mask = mask.float().masked_fill(mask == 0, float(-1e6)).masked_fill(mask == 1, float(0.0))
        return mask 

    def init_weights(self):
        initrange = 0.1
        self.InputLinear.weight.data.uniform_(-initrange, initrange)
        self.OutputLinear.bias.data.zero_()
        self.OutputLinear.weight.data.uniform_(-initrange, initrange)

    def forward(self, src, src_mask,padding_mask):
        src = self.InputLinear(src) * math.sqrt(self.d_model)
        src = self.positional_encoding(src)
        output = self.transformer_encoder(src, src_mask,padding_mask)
        output = self.OutputLinear(output.permute(1,0,2))
        return output 
    
    def training_step(self, batch, batch_idx):

        X,target,lengths,padding_mask = batch
        src_mask = self.generate_square_subsequent_mask(S).cuda()
        X = X.permute(1,0,2)
        padding_mask = torch.cat((torch.zeros((X.shape[1],2),dtype=torch.bool), (torch.ones((X.shape[1],398),dtype=torch.bool))),1).cuda()
        class_probs  = self(X,None,padding_mask)
        lengths -=1
        loss = nn.CrossEntropyLoss()(class_probs, lengths )
        
        return {'loss': loss,} # will call loss.backward() on what we return exactly. 
    
    def training_epoch_end(self, outputs):
        if((self.current_epoch+1)%100==0):
            torch.save(self.state_dict(), 'W_transformer_token_V9')
        print("Epoch Loss:",torch.stack([x["loss"] for x in outputs]).mean().item())

    # Lightning disables gradients, puts model in eval mode, and does everything needed for validation.
    def validation_step(self, batch, batch_idx):
        X,target,lengths,padding_mask = batch
        src_mask = self.generate_square_subsequent_mask(S).cuda()
        X = X.permute(1,0,2)
        padding_mask = torch.cat((torch.zeros((X.shape[1],2),dtype=torch.bool), (torch.ones((X.shape[1],398),dtype=torch.bool))),1).cuda()
        
        class_probs  = self(X,None,padding_mask)
        lengths -=1
        loss = nn.CrossEntropyLoss()(class_probs, lengths )
        
        self.log('val_loss', loss)
        return {'val_loss': loss,} # We may return the predictions themselves
    
    def validation_epoch_end(self, outputs):
        print("Validation Loss:",torch.stack([x["val_loss"] for x in outputs]).mean().item())
        
    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.0001)
    


In [14]:
model.generate_square_subsequent_mask(S).shape

torch.Size([400, 400])

In [13]:

model = TimeSeriesTransformer() 
ck = torch.load('../lightning_logs/version_19/checkpoints/epoch=399-step=76799.ckpt')['state_dict']
model.load_state_dict(ck)
for param in model.parameters():
    param.requires_grad = False

model.OutputLinear  = nn.Sequential(nn.Flatten(1),nn.Linear(256*400,400) )

# Eval

In [13]:
# ck = torch.load('lightning_logs/version_108/checkpoints/epoch=2-step=575.ckpt')['state_dict']
# # The minimum loss was  1.4949455261230469, but if all data available not only the first two timesteps, it was 0.3219143748283386
# model.load_state_dict(ck)
# model.eval()

TimeSeriesTransformer(
  (InputLinear): Linear(in_features=9, out_features=256, bias=True)
  (positional_encoding): PositionalEncoding(
    (dropout): Dropout(p=0.0, inplace=False)
  )
  (transformer_encoder): TransformerEncoder(
    (layers): ModuleList(
      (0): TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): _LinearWithBias(in_features=256, out_features=256, bias=True)
        )
        (linear1): Linear(in_features=256, out_features=256, bias=True)
        (dropout): Dropout(p=0.0, inplace=False)
        (linear2): Linear(in_features=256, out_features=256, bias=True)
        (norm1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.0, inplace=False)
        (dropout2): Dropout(p=0.0, inplace=False)
      )
      (1): TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): _LinearWithBias(in_features=256

In [21]:
# a = 300
# b = 500
# src_mask = model.generate_square_subsequent_mask(S)
# x = torch.cat((real_train_X[a:b,:2],torch.zeros((200,398,9))),1)
# padding_mask = torch.cat((real_train_masks[a:b,:2],torch.ones((200,398),dtype=torch.bool)),1)

# # x= real_train_X[2400:2600]
# # padding_mask = real_train_masks[2400:2600]
# testt = model(x.permute(1,0,2),None,padding_mask)

In [23]:
# for i,j in zip(F.softmax(testt).argmax(1),real_train_lengths[a:b]):
#     print(i,j)

# Classifier - MLP (not pretrained)

In [17]:
class MLPModel(pl.LightningModule):
    def __init__(self,n_features=9,n_timesteps=2,n_hidden=128,n_output=400):
        super().__init__()
        # need to be (self) in order to be optimized and part of model
        self.model = nn.Sequential(
                  nn.Flatten(start_dim=1),
                  nn.Linear(n_features*n_timesteps,1024),
                  nn.ReLU(),
                    nn.Linear(1024,256),
                  nn.ReLU(),
                  nn.Linear(256,n_output),
                )
    def forward(self,x):
        return self.model(x)
    
     
    def training_step(self, batch, batch_idx):

        X,target,lengths,padding_mask = batch
        #padding_mask = torch.cat((torch.zeros((X.shape[1],2),dtype=torch.bool), (torch.ones((X.shape[1],398),dtype=torch.bool))),1).cuda()
        
        class_probs  = self(X[:,:2])
        lengths -=1
        loss = nn.CrossEntropyLoss()(class_probs, lengths )
        
        return {'loss': loss,} # will call loss.backward() on what we return exactly. 
    
    def training_epoch_end(self, outputs):
        if((self.current_epoch+1)%100==0):
            torch.save(self.state_dict(), 'W_transformer_token_V9.1')
        print("Epoch Loss:",torch.stack([x["loss"] for x in outputs]).mean().item())

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.001)

In [18]:
def main():
    time_all = time()
    mlp_model = MLPModel()
    checkpoint_callback = ModelCheckpoint()
    trainer = pl.Trainer(gpus=1,max_epochs=100, progress_bar_refresh_rate=50,check_val_every_n_epoch=3,
                        callbacks=[checkpoint_callback],)
    trainer.fit(mlp_model,train_dataloader)
    print("Total Time (in minutes) is {}".format( timedelta(seconds=(time()-time_all))))
    print(checkpoint_callback.best_model_path)

if __name__ == '__main__':
    main()

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name  | Type       | Params
-------------------------------------
0 | model | Sequential | 1.4 M 
-------------------------------------
1.4 M     Trainable params
0         Non-trainable params
1.4 M     Total params
5.737     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Epoch Loss: 2.7630443572998047
Epoch Loss: 2.2068989276885986
Epoch Loss: 2.123223304748535
Epoch Loss: 2.08376145362854
Epoch Loss: 2.04807186126709
Epoch Loss: 2.033076286315918
Epoch Loss: 2.014946460723877
Epoch Loss: 2.011458158493042
Epoch Loss: 1.9921491146087646
Epoch Loss: 1.981168508529663
Epoch Loss: 1.9713590145111084
Epoch Loss: 1.9637658596038818
Epoch Loss: 1.957294225692749
Epoch Loss: 1.9503129720687866
Epoch Loss: 1.9459199905395508
Epoch Loss: 1.9419441223144531
Epoch Loss: 1.9342927932739258
Epoch Loss: 1.9297717809677124
Epoch Loss: 1.9208972454071045
Epoch Loss: 1.9157543182373047
Epoch Loss: 1.9099369049072266
Epoch Loss: 1.8982248306274414
Epoch Loss: 1.8949275016784668
Epoch Loss: 1.8950470685958862
Total Time (in minutes) is 0:01:24.508482
/rdata/yelnady/DoppelGANger/Token/lightning_logs/version_116/checkpoints/epoch=24-step=4798.ckpt


# Training

**Notes on EarlyStopping:**
- The EarlyStopping callback runs at the **end of every validation epoch**, which, under the default configuration, happen after **every training epoch**.
-  However, the frequency of validation can be modified by setting various parameters in the Trainer, for example **check_val_every_n_epoch and val_check_interval**.
- Note that the **patience** parameter counts the number of **validation epochs with no improvement**, and **not the number of training epochs**. 
    - Therefore, with parameters **check_val_every_n_epoch=10 and patience=3**, the trainer will perform at least **40 training epochs before being stopped**. 

In [58]:
# RuntimeError: CUDA error: device-side assert triggered --> The problem it needs to be 0-399 not 1-400
def main():
    # pl.seed_everything(42, workers=True) --> sets seeds for numpy, torch, python.random and PYTHONHASHSEED.
    time_all = time()

    
    early_stop_callback = EarlyStopping(monitor='val_loss',patience=5, verbose=False, mode='min')
    checkpoint_callback = ModelCheckpoint()
#     trainer = pl.Trainer(gpus=2,max_epochs=400, progress_bar_refresh_rate=50,accelerator ='ddp',
#                         callbacks=[early_stop_callback,checkpoint_callback]
#                          ,plugins=DDPPlugin(find_unused_parameters=False,check_val_every_n_epoch=2))
    
    
    trainer = pl.Trainer(gpus=1,max_epochs=100, progress_bar_refresh_rate=50,check_val_every_n_epoch=3,
                        callbacks=[checkpoint_callback],)
    trainer.fit(model,train_dataloader,val_dataloader)
    print("Total Time (in minutes) is {}".format( timedelta(seconds=(time()-time_all))))
    print(checkpoint_callback.best_model_path)

if __name__ == '__main__':
    main()

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name                | Type               | Params
-----------------------------------------------------------
0 | InputLinear         | Linear             | 2.6 K 
1 | positional_encoding | PositionalEncoding | 0     
2 | transformer_encoder | TransformerEncoder | 3.2 M 
3 | OutputLinear        | Sequential         | 205 K 
-----------------------------------------------------------
205 K     Trainable params
3.2 M     Non-trainable params
3.4 M     Total params
13.496    Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Validation Loss: 6.091458320617676


Training: 0it [00:00, ?it/s]

Epoch Loss: 3.2524185180664062
Epoch Loss: 2.2478199005126953
Epoch Loss: 2.161529541015625


Validating: 0it [00:00, ?it/s]

Validation Loss: 3.3052306175231934
Epoch Loss: 2.11668062210083
Epoch Loss: 2.0964949131011963
Epoch Loss: 2.0840518474578857


Validating: 0it [00:00, ?it/s]

Validation Loss: 3.475083351135254
Epoch Loss: 2.07138991355896
Epoch Loss: 2.0565011501312256
Epoch Loss: 2.05267071723938


Validating: 0it [00:00, ?it/s]

Validation Loss: 3.5716495513916016
Epoch Loss: 2.044581174850464
Epoch Loss: 2.0346930027008057
Epoch Loss: 2.0320231914520264


Validating: 0it [00:00, ?it/s]

Validation Loss: 3.625115156173706
Epoch Loss: 2.0270254611968994
Epoch Loss: 2.016049861907959
Epoch Loss: 2.0125222206115723


Validating: 0it [00:00, ?it/s]

Validation Loss: 3.684926748275757
Epoch Loss: 2.0080485343933105
Epoch Loss: 2.0049023628234863
Epoch Loss: 1.998694658279419


Validating: 0it [00:00, ?it/s]

Validation Loss: 3.730433464050293
Epoch Loss: 1.9966742992401123
Epoch Loss: 1.988674521446228
Epoch Loss: 1.9874091148376465


Validating: 0it [00:00, ?it/s]

Validation Loss: 3.7565619945526123
Epoch Loss: 1.985229253768921
Epoch Loss: 1.9836041927337646
Epoch Loss: 1.9827523231506348


Validating: 0it [00:00, ?it/s]

Validation Loss: 3.786376476287842
Epoch Loss: 1.976083755493164
Epoch Loss: 1.970283031463623
Epoch Loss: 1.9712257385253906


Validating: 0it [00:00, ?it/s]

Validation Loss: 3.8001856803894043
Epoch Loss: 1.972574234008789
Epoch Loss: 1.9662559032440186
Epoch Loss: 1.9608850479125977


Validating: 0it [00:00, ?it/s]

Validation Loss: 3.828634738922119
Epoch Loss: 1.9601223468780518
Epoch Loss: 1.9567625522613525
Epoch Loss: 1.9576104879379272


Validating: 0it [00:00, ?it/s]

Validation Loss: 3.8663177490234375
Epoch Loss: 1.9549705982208252
Epoch Loss: 1.9528136253356934
Epoch Loss: 1.9532811641693115


Validating: 0it [00:00, ?it/s]

Validation Loss: 3.875904083251953
Epoch Loss: 1.9513275623321533
Epoch Loss: 1.9467430114746094
Epoch Loss: 1.9440553188323975


Validating: 0it [00:00, ?it/s]

Validation Loss: 3.906177520751953
Epoch Loss: 1.9465984106063843
Epoch Loss: 1.9427865743637085
Epoch Loss: 1.9402536153793335


Validating: 0it [00:00, ?it/s]

Validation Loss: 3.890829086303711
Epoch Loss: 1.9435124397277832
Epoch Loss: 1.9408477544784546
Epoch Loss: 1.9376882314682007


Validating: 0it [00:00, ?it/s]

Validation Loss: 3.9257469177246094
Epoch Loss: 1.9347093105316162
Epoch Loss: 1.9337749481201172
Epoch Loss: 1.9339559078216553


Validating: 0it [00:00, ?it/s]

Validation Loss: 3.934978485107422
Epoch Loss: 1.931655764579773
Epoch Loss: 1.9308128356933594
Epoch Loss: 1.9263185262680054


Validating: 0it [00:00, ?it/s]

Validation Loss: 3.948209285736084
Epoch Loss: 1.9263191223144531
Epoch Loss: 1.9267730712890625
Epoch Loss: 1.9249358177185059


Validating: 0it [00:00, ?it/s]

Validation Loss: 3.9626994132995605
Epoch Loss: 1.928309440612793
Epoch Loss: 1.9228929281234741
Epoch Loss: 1.9222674369812012


Validating: 0it [00:00, ?it/s]

Validation Loss: 3.9650092124938965
Epoch Loss: 1.9203437566757202
Epoch Loss: 1.9167022705078125
Epoch Loss: 1.9171184301376343


Validating: 0it [00:00, ?it/s]

Validation Loss: 3.982018232345581
Epoch Loss: 1.9175441265106201
Epoch Loss: 1.9160118103027344
Epoch Loss: 1.9131884574890137


Validating: 0it [00:00, ?it/s]

Validation Loss: 3.991971969604492
Epoch Loss: 1.9176900386810303
Epoch Loss: 1.910217046737671
Epoch Loss: 1.913451910018921


Validating: 0it [00:00, ?it/s]

Validation Loss: 3.9965872764587402
Epoch Loss: 1.9134769439697266
Epoch Loss: 1.9105905294418335
Epoch Loss: 1.9067248106002808


Validating: 0it [00:00, ?it/s]

Validation Loss: 4.010469436645508
Epoch Loss: 1.9113296270370483
Epoch Loss: 1.9083633422851562
Epoch Loss: 1.9048479795455933


Validating: 0it [00:00, ?it/s]

Validation Loss: 4.00966215133667
Epoch Loss: 1.90632963180542
Epoch Loss: 1.907663106918335
Epoch Loss: 1.9059743881225586


Validating: 0it [00:00, ?it/s]

Validation Loss: 4.025118350982666
Epoch Loss: 1.9039463996887207
Epoch Loss: 1.9058631658554077
Epoch Loss: 1.8995327949523926


Validating: 0it [00:00, ?it/s]

Validation Loss: 4.035505294799805
Epoch Loss: 1.902456283569336
Epoch Loss: 1.8992919921875
Epoch Loss: 1.8987665176391602


Validating: 0it [00:00, ?it/s]

Validation Loss: 4.0524210929870605
Epoch Loss: 1.8947362899780273
Epoch Loss: 1.904262900352478
Epoch Loss: 1.8983545303344727


Validating: 0it [00:00, ?it/s]

Validation Loss: 4.0392608642578125
Epoch Loss: 1.8942921161651611
Epoch Loss: 1.8970961570739746
Epoch Loss: 1.8928836584091187


Validating: 0it [00:00, ?it/s]

Validation Loss: 4.052065372467041
Epoch Loss: 1.8952707052230835
Epoch Loss: 1.897621989250183
Epoch Loss: 1.8892712593078613


Validating: 0it [00:00, ?it/s]

Validation Loss: 4.063403606414795
Epoch Loss: 1.8936184644699097
Epoch Loss: 1.8957316875457764
Epoch Loss: 1.8922133445739746


Validating: 0it [00:00, ?it/s]

Validation Loss: 4.064950942993164
Epoch Loss: 1.891387939453125
Epoch Loss: 1.8935546875
Epoch Loss: 1.8899495601654053


Validating: 0it [00:00, ?it/s]

Validation Loss: 4.088421821594238
Epoch Loss: 1.8908867835998535
Epoch Loss: 1.8894509077072144
Epoch Loss: 1.8870067596435547


Validating: 0it [00:00, ?it/s]

Validation Loss: 4.082433700561523
Epoch Loss: 1.8845741748809814
Total Time (in minutes) is 0:09:32.818214
/rdata/yelnady/DoppelGANger/Token/lightning_logs/version_111/checkpoints/epoch=2-step=575.ckpt


In [12]:
gc.collect()

7