In [None]:
!pip install pytorch-lightning
!pip install torchtext



In [None]:
from torchtext.legacy.data import Field 

text_field = Field(sequential=True, include_lengths=True, fix_length=200)
label_field = Field(sequential=False)

In [None]:
from torchtext.legacy.datasets import IMDB

train, test = IMDB.splits(text_field, label_field)

In [None]:
print(vars(train.examples[0]))
print(vars(train.examples[0])['label'])

{'text': ['Pushing', 'Daisies', 'truly', 'is', 'a', 'landmark', 'in', 'Television', 'as', 'an', 'art', 'form.', 'Everything', 'seems', 'to', 'pay', 'homage', 'to', 'Amelie', 'and', 'Tim', 'Burton,', 'but', 'so', 'what,', 'in', 'a', 'world', 'where', 'fresh', 'ideas', 'are', 'distinctly', 'rare,', 'this', 'show', 'will', 'guarantee', 'that', 'you', 'do', 'not', 'care', 'about', 'whether', 'its', 'fresh', 'or', 'not.', 'It', 'is', 'just', 'Brilliant.<br', '/><br', '/>I', 'have', 'been', 'captivated', 'from', 'the', 'start,', 'the', 'intelligent', 'writing,', 'the', 'Directing', 'to', 'the', 'backdrops', 'and', 'dialogue', 'make', 'this', 'show', 'the', 'most', 'incredible', 'masterpiece', 'since', 'The', 'Shield', 'and', 'The', 'Wire', '(ok', 'not', 'exactly', 'good', 'comparisons', 'but', 'the', 'beauty', 'of', 'Pushing', 'Daisies', 'is', 'that', 'it', 'has', 'no', 'comparisons', 'on', 'television).<br', '/><br', '/>Truly', 'addictive', 'and', 'an', 'absolute', 'pleasure.', 'Perhaps', '

In [None]:
text_field.build_vocab(train, vectors='fasttext.simple.300d')
label_field.build_vocab(train)

In [None]:
import torch
from torchtext.legacy.data import BucketIterator

device = 'cuda' if torch.cuda.is_available() else 'cpu'
batch_size = 32

train_iter, test_iter = BucketIterator.splits(
    (train, test), 
    batch_size=batch_size, 
    device=device
)

In [None]:
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
import pytorch_lightning as pl

class RNNModel(pl.LightningModule):
    def __init__(self, embedding, lstm_input_size=300, lstm_hidden_size=100, output_size=3):
        super().__init__()
        self.embedding = embedding
        self.lstm = nn.LSTM(lstm_input_size, lstm_hidden_size)
        self.lin = nn.Linear(lstm_hidden_size, output_size)
        self.loss_function = nn.CrossEntropyLoss()

        self.train_accuracy = pl.metrics.Accuracy()
        self.val_accuracy = pl.metrics.Accuracy()
    
    def forward(self, X: torch.Tensor):
        x = self.embedding[X].to(self.device).permute(1, 0, 2)
        x, _ = self.lstm(x)
        x = F.elu(x.permute(1, 0, 2))
        x = self.lin(x)
        x = x.sum(dim=1)
        return x
   
    def training_step(self, batch, batch_idx):
        x, y = batch.text[0].T, batch.label
        y_hat = self(x)
        loss = self.loss_function(y_hat, y)
        train_acc = self.val_accuracy(y_hat, y)
        self.log('train_acc', train_acc, prog_bar=True)
        return dict(loss=loss)
        )
    
    def validation_step(self, batch, batch_idx):
        x, y = batch.text[0].T, batch.label
        y_hat = self(x)
        loss = self.loss_function(y_hat, y)
        val_acc = self.val_accuracy(y_hat, y)
        self.log('val_acc', val_acc, prog_bar=True)
        return dict(validation_loss=loss)
    
    def train_dataloader(self):
        return train_iter
    
    def val_dataloader(self):
        return test_iter

    def configure_optimizers(self):
        return Adam(self.parameters(), lr=0.01)

In [None]:
model = RNNModel(text_field.vocab.vectors)

In [None]:
trainer = pl.Trainer(
    gpus=1, 
    max_epochs=3
)
trainer.fit(model)

INFO:pytorch_lightning.utilities.distributed:GPU available: True, used: True
INFO:pytorch_lightning.utilities.distributed:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.distributed:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.accelerators.gpu:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.core.lightning:
  | Name           | Type             | Params
----------------------------------------------------
0 | lstm           | LSTM             | 160 K 
1 | lin            | Linear           | 303   
2 | loss_function  | CrossEntropyLoss | 0     
3 | train_accuracy | Accuracy         | 0     
4 | val_accuracy   | Accuracy         | 0     
----------------------------------------------------
161 K     Trainable params
0         Non-trainable params
161 K     Total params
0.644     Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: -1it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]