In [1]:
import math
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import io
import torchtext
from torchtext.utils import download_from_url, extract_archive
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator

import pytorch_lightning as pl
from pytorch_lightning.trainer.trainer import Trainer
from pytorch_lightning import loggers as pl_loggers

from pytorch_lightning import seed_everything
seed_everything(42)

42

In [2]:
from sklearn.model_selection import train_test_split

class AlarmDataset(Dataset):
    def __init__(self,data,seq_len,batch_size):
        self.length = len(data)//seq_len # how much data i have         
        self.data = data
        self.seq_len = seq_len
        self.batch_size = batch_size
       
    def __getitem__(self, index: int):
        x = self.data[index*self.seq_len:(index*self.seq_len)+seq_len]
        y = self.data[1+index*self.seq_len:1+(index*self.seq_len)+seq_len]
        return x,y
    
    def __len__(self) -> int:
        return self.length

class MyDataModule(pl.LightningDataModule):
    
    def __init__(self, data_path:str, batch_size:int, seq_len:int):
        super().__init__()
        self.batch_size = batch_size
        self.tokenizer = get_tokenizer('basic_english')
        self.vocab = build_vocab_from_iterator(map(self.tokenizer,iter(io.open(data_path,encoding="utf8"))))
                
        # url = data_path
        # test_filepath, valid_filepath, train_filepath = extract_archive(download_from_url(url))
        seqs = None
        with open(data_path) as f:
            seqs = f.readlines()
        print(seqs[:4])
        train, valid = train_test_split(seqs,test_size=0.30,shuffle=False)
        valid, test = train_test_split(valid,test_size=0.30, shuffle=False)

        

        with open("../.data/train.tokens","w") as f:
            for seq in train:
                f.write(seq)
        
        with open("../.data/val.tokens","w") as f:
            for seq in valid:
                f.write(seq)
            
        with open("../.data/test.tokens","w") as f:
            for seq in test:
                f.write(seq)


        
        

        train_data = self.data_process(iter(io.open("../.data/train.tokens", encoding="utf8")))
        val_data = self.data_process(iter(io.open("../.data/val.tokens", encoding="utf8")))
        test_data = self.data_process(iter(io.open("../.data/test.tokens", encoding="utf8")))

    
        self.train_dataset = AlarmDataset(train_data, seq_len,self.batch_size)
        self.valid_dataset = AlarmDataset(val_data,seq_len,self.batch_size)
        self.test_dataset = AlarmDataset(test_data, seq_len,self.batch_size)
    
    def data_process(self, raw_text_iter):
        data = [torch.tensor([self.vocab[token] for token in self.tokenizer(item)],dtype=torch.long) for item in raw_text_iter]
        return torch.cat(tuple(filter(lambda t: t.numel() > 0, data)))


    def setup(self, stage: None):
        return None

    def train_dataloader(self) -> DataLoader:
        return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=False,num_workers=1,drop_last=True, pin_memory=True)
    
    def val_dataloader(self) -> DataLoader:
        return DataLoader(self.valid_dataset, batch_size=self.batch_size, shuffle=False,num_workers=1,drop_last=True, pin_memory=True)
    
    def test_dataloader(self):
        return DataLoader(self.test_dataset, batch_size=self.batch_size, shuffle=False,num_workers=1,drop_last=True, pin_memory=True)

file_path = '../.data/seqs.tokens'

bsize = 20
seq_len = 35
dm = MyDataModule(file_path,bsize,seq_len)


30551lines [00:00, 54856.64lines/s]
['A17 A17 A17 A75 A17 A57 A17 A17 A17 A98 A99 A56\n', 'A245 A246 A50 A243\n', 'A50 A59 A60 A64 A392 A726 A726 A726 A9 A725 A726 A726 A725 A725 A725 A243 A725\n', 'A746 A17 A266 A563 A204 A613 A367 A1094\n']


In [3]:
class TransformerModel(pl.LightningModule):

    def __init__(self, ntoken, ninp, nhead, nhid, nlayers, dropout=0.5, seq_len=None):
        super(TransformerModel, self).__init__()
        self.model_type = 'Transformer'
        self.ntoken = ntoken
        self.pos_encoder = PositionalEncoding(ninp, dropout)
        encoder_layers = torch.nn.TransformerEncoderLayer(ninp, nhead, nhid, dropout)
        self.transformer_encoder = torch.nn.TransformerEncoder(encoder_layers, nlayers)
        self.encoder = torch.nn.Embedding(ntoken, ninp)
        self.ninp = ninp
        self.decoder = torch.nn.Linear(ninp, ntoken)
        self.src_mask = self.generate_square_subsequent_mask(seq_len)
        self.seq_len = seq_len 
        self.init_weights()

    def generate_square_subsequent_mask(self, sz):
        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
        mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
        return mask

    def init_weights(self):
        initrange = 0.1
        self.encoder.weight.data.uniform_(-initrange, initrange)
        self.decoder.bias.data.zero_()
        self.decoder.weight.data.uniform_(-initrange, initrange)

    def forward(self, src, src_mask):
        src_mask = src_mask.to(self.device)
        src = self.encoder(src) * math.sqrt(self.ninp)
        src = self.pos_encoder(src)
        src_mask = src_mask.to(self.device)
      
        output = self.transformer_encoder(src, src_mask)
        output = self.decoder(output)
        
        return output
    
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=0.001, weight_decay=0.0000001)
        return optimizer
    
    def training_step(self,batch,batch_idx):
        x,y = batch
        x = x.T
        y = y.T.reshape(-1)

        # print("Training Shape: ", x.size(),y.size())
        
        if x.size(0) != self.seq_len:
           self.src_mask =  self.generate_square_subsequent_mask(x.size(0))
        
        y_hat = self(x,self.src_mask)

        loss = F.cross_entropy(y_hat.view(-1, self.ntoken),y)
        self.log('train_loss', loss,on_step=True, prog_bar=True, logger=True)
        self.log("train_ppl",math.exp(loss.item()),on_step=True, prog_bar=True, logger=True)
        return loss
    
    def validation_step(self,batch, batch_idx):
        x,y = batch
        x = x.T
        y = y.T.reshape(-1)

        # print("Validation Shape: ", x.size(),y.size())

        if x.size(0) != self.seq_len:
           self.src_mask =  self.generate_square_subsequent_mask(x.size(0))
        
        y_hat = self(x,self.src_mask)
        # print("> y-hat",y_hat.size())
        loss = F.cross_entropy(y_hat.view(-1, self.ntoken),y)
        self.log('val_loss', loss, on_step=True, prog_bar=True, logger=True)
        self.log("val_ppl",math.exp(loss.item()),on_step=True, prog_bar=True, logger=True)
        return {'val_loss':loss}
    
    def test_step(self,batch, batch_idx):
        x,y = batch
        x = x.T
        y = y.T.reshape(-1)

        # print("Validation Shape: ", x.size(),y.size())

        if x.size(0) != self.seq_len:
           self.src_mask =  self.generate_square_subsequent_mask(x.size(0))
        
        y_hat = self(x,self.src_mask)
        # print("> y-hat",y_hat.size())
        loss = F.cross_entropy(y_hat.view(-1, self.ntoken),y)
        self.log('test_loss', loss, on_step=True, prog_bar=True, logger=True)
        self.log("test_ppl",math.exp(loss.item()),on_step=True, prog_bar=True, logger=True)
        return {'test_loss':loss}
    
    def training_epoch_end(self, outputs):
        avg_loss = torch.stack([d['loss']  for d in outputs]).mean()
        print(f"> Avg Training loss = {avg_loss}")
        
    def validation_epoch_end(self, outputs):
        # print(outputs)
        avg_loss = torch.stack([d['val_loss'] for d in outputs]).mean()
        print(f"> Average Valid Loss = {avg_loss}")
    
    def test_epoch_end(self, outputs):
        avg_loss = torch.stack([d['test_loss'] for d in outputs]).mean()
        print(f"> Average Test Loss = {avg_loss}")
    
    
class PositionalEncoding(torch.nn.Module):

    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = torch.nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return self.dropout(x)

In [4]:
file_path = '../.data/seqs.tokens'
tb_logger = pl_loggers.TensorBoardLogger('logs/')

bsize = 32
seq_len = 35
dm = MyDataModule(file_path,bsize,seq_len)

ntokens = len(dm.vocab.stoi) # the size of vocabulary
emsize = 200 # embedding dimension
nhid = 200 # the dimension of the feedforward network model in nn.TransformerEncoder
nlayers = 2 # the number of nn.TransformerEncoderLayer in nn.TransformerEncoder
nhead = 2 # the number of heads in the multiheadattention models
dropout = 0.2 # the dropout value

model = TransformerModel(ntokens, emsize, nhead, nhid, nlayers, dropout,seq_len=seq_len)
trainer = Trainer(precision=16,gpus=1,max_epochs=400,check_val_every_n_epoch=4,deterministic=True, gradient_clip_val=0.5,logger=tb_logger)
trainer.fit(model,dm) # traning and validation

30551lines [00:00, 55218.51lines/s]
['A17 A17 A17 A75 A17 A57 A17 A17 A17 A98 A99 A56\n', 'A245 A246 A50 A243\n', 'A50 A59 A60 A64 A392 A726 A726 A726 A9 A725 A726 A726 A725 A725 A725 A243 A725\n', 'A746 A17 A266 A563 A204 A613 A367 A1094\n']
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.

  | Name                | Type               | Params
-----------------------------------------------------------
0 | pos_encoder         | PositionalEncoding | 0     
1 | transformer_encoder | TransformerEncoder | 484 K 
2 | encoder             | Embedding          | 138 K 
3 | decoder             | Linear             | 139 K 
-----------------------------------------------------------
762 K     Trainable params
0         Non-trainable params
762 K     Total params


Validation sanity check: |          | 0/? [00:00<?, ?it/s]

> Average Valid Loss = 6.519143104553223


Training: |          | 0/? [00:00<?, ?it/s]

Validating: |          | 0/? [00:00<?, ?it/s]

> Average Valid Loss = 2.987840414047241
> Avg Training loss = 1.9766099452972412


Validating: |          | 0/? [00:00<?, ?it/s]

> Average Valid Loss = 2.885150194168091
> Avg Training loss = 1.7431257963180542


Validating: |          | 0/? [00:00<?, ?it/s]

> Average Valid Loss = 2.885382890701294
> Avg Training loss = 1.6885005235671997


Validating: |          | 0/? [00:00<?, ?it/s]

> Average Valid Loss = 2.8332700729370117
> Avg Training loss = 1.646619200706482


Validating: |          | 0/? [00:00<?, ?it/s]

> Average Valid Loss = 2.8899593353271484
> Avg Training loss = 1.6184449195861816


Validating: |          | 0/? [00:00<?, ?it/s]

> Average Valid Loss = 2.878481864929199
> Avg Training loss = 1.5973347425460815


Validating: |          | 0/? [00:00<?, ?it/s]

> Average Valid Loss = 2.823499917984009
> Avg Training loss = 1.5788036584854126


Validating: |          | 0/? [00:00<?, ?it/s]

> Average Valid Loss = 2.7876315116882324
> Avg Training loss = 1.5707781314849854


Validating: |          | 0/? [00:00<?, ?it/s]

> Average Valid Loss = 2.760228157043457
> Avg Training loss = 1.562995433807373


Validating: |          | 0/? [00:00<?, ?it/s]

> Average Valid Loss = 2.7077765464782715
> Avg Training loss = 1.553634762763977


Validating: |          | 0/? [00:00<?, ?it/s]

> Average Valid Loss = 2.67106294631958
> Avg Training loss = 1.547193169593811


Validating: |          | 0/? [00:00<?, ?it/s]

> Average Valid Loss = 2.7941792011260986
> Avg Training loss = 1.5428318977355957


Validating: |          | 0/? [00:00<?, ?it/s]

> Average Valid Loss = 2.743276357650757
> Avg Training loss = 1.536960482597351


Validating: |          | 0/? [00:00<?, ?it/s]

> Average Valid Loss = 2.6819145679473877
> Avg Training loss = 1.5272958278656006


Validating: |          | 0/? [00:00<?, ?it/s]

> Average Valid Loss = 2.7427215576171875
> Avg Training loss = 1.5231577157974243


Validating: |          | 0/? [00:00<?, ?it/s]

> Average Valid Loss = 2.696821928024292
> Avg Training loss = 1.5184237957000732


Validating: |          | 0/? [00:00<?, ?it/s]

> Average Valid Loss = 2.6438236236572266
> Avg Training loss = 1.51895010471344


Validating: |          | 0/? [00:00<?, ?it/s]

> Average Valid Loss = 2.720628261566162
> Avg Training loss = 1.5160142183303833


Validating: |          | 0/? [00:00<?, ?it/s]

> Average Valid Loss = 2.7121009826660156
> Avg Training loss = 1.5146926641464233


Validating: |          | 0/? [00:00<?, ?it/s]

> Average Valid Loss = 2.744326591491699
> Avg Training loss = 1.5099523067474365


Validating: |          | 0/? [00:00<?, ?it/s]

> Average Valid Loss = 2.7432701587677
> Avg Training loss = 1.5083099603652954


Validating: |          | 0/? [00:00<?, ?it/s]

> Average Valid Loss = 2.7878241539001465
> Avg Training loss = 1.5060663223266602


Validating: |          | 0/? [00:00<?, ?it/s]

> Average Valid Loss = 2.6685454845428467
> Avg Training loss = 1.510250449180603


Validating: |          | 0/? [00:00<?, ?it/s]

> Average Valid Loss = 2.730907440185547
> Avg Training loss = 1.5051714181900024


Validating: |          | 0/? [00:00<?, ?it/s]

> Average Valid Loss = 2.725038766860962
> Avg Training loss = 1.5014500617980957


Validating: |          | 0/? [00:00<?, ?it/s]

> Average Valid Loss = 2.777764320373535
> Avg Training loss = 1.4997276067733765


Validating: |          | 0/? [00:00<?, ?it/s]

> Average Valid Loss = 2.734978437423706
> Avg Training loss = 1.49988853931427


Validating: |          | 0/? [00:00<?, ?it/s]

> Average Valid Loss = 2.851977586746216
> Avg Training loss = 1.4954900741577148


Validating: |          | 0/? [00:00<?, ?it/s]

> Average Valid Loss = 2.7137157917022705
> Avg Training loss = 1.4943643808364868


Validating: |          | 0/? [00:00<?, ?it/s]

> Average Valid Loss = 2.732642889022827
> Avg Training loss = 1.493984580039978


Validating: |          | 0/? [00:00<?, ?it/s]

> Average Valid Loss = 2.666787624359131
> Avg Training loss = 1.4919896125793457


Validating: |          | 0/? [00:00<?, ?it/s]

> Average Valid Loss = 2.6962642669677734
> Avg Training loss = 1.4927773475646973


Validating: |          | 0/? [00:00<?, ?it/s]

> Average Valid Loss = 2.6638636589050293
> Avg Training loss = 1.4900317192077637


Validating: |          | 0/? [00:00<?, ?it/s]

> Average Valid Loss = 2.6541759967803955
> Avg Training loss = 1.4900648593902588


Validating: |          | 0/? [00:00<?, ?it/s]

> Average Valid Loss = 2.6451241970062256
> Avg Training loss = 1.4868074655532837


Validating: |          | 0/? [00:00<?, ?it/s]

> Average Valid Loss = 2.663384199142456
> Avg Training loss = 1.4874776601791382


Validating: |          | 0/? [00:00<?, ?it/s]

> Average Valid Loss = 2.5947036743164062
> Avg Training loss = 1.4858081340789795


Validating: |          | 0/? [00:00<?, ?it/s]

> Average Valid Loss = 2.619311571121216
> Avg Training loss = 1.486391305923462


Validating: |          | 0/? [00:00<?, ?it/s]

> Average Valid Loss = 2.619338274002075
> Avg Training loss = 1.4847642183303833


Validating: |          | 0/? [00:00<?, ?it/s]

> Average Valid Loss = 2.6048290729522705
> Avg Training loss = 1.4864068031311035


1

In [5]:
trainer.test(datamodule=dm) # testing
# %%

Testing: |          | 0/? [00:00<?, ?it/s]

> Average Test Loss = 2.731191635131836
--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_loss': tensor(2.7468, device='cuda:0'),
 'test_loss_epoch': tensor(2.7312, device='cuda:0'),
 'test_ppl': 15.592896266917588,
 'test_ppl_epoch': tensor(19.3933),
 'val_loss': tensor(2.1163, device='cuda:0'),
 'val_loss_epoch': tensor(2.6048, device='cuda:0'),
 'val_ppl': 8.30071050411446,
 'val_ppl_epoch': tensor(21.3543)}
--------------------------------------------------------------------------------


[{'val_loss_epoch': 2.6048288345336914,
  'val_ppl_epoch': 21.354297637939453,
  'val_loss': 2.1163411140441895,
  'val_ppl': 8.30071050411446,
  'test_loss_epoch': 2.731191635131836,
  'test_ppl_epoch': 19.39333724975586,
  'test_loss': 2.7468154430389404,
  'test_ppl': 15.592896266917588}]