In [None]:
# Installations
!pip install transformers==3.0.2

# imports
import os
import random

import torch
from torch.utils.data import Dataset, DataLoader
from torch import cuda

import transformers
from transformers import RobertaTokenizer, RobertaModel
from transformers import pipeline

from torch import cuda
from tqdm import tqdm
device = 'cuda' if cuda.is_available() else 'cpu'

In [None]:
# Mounting Google Drive to this .ipynb
# from google.colab import drive
# drive.mount('/content/drive')


train_data_loc = 'SST-2/Few_Shot/train_1.tsv'
dev_data_loc = 'SST-2/dev.tsv'

In [None]:
# Some Parameters

max_len = 256
train_batch_size = 8
val_batch_size = 8
roberta_flavour = 'roberta-large'
learning_rate = 2e-5
tokenizer = RobertaTokenizer.from_pretrained(roberta_flavour, truncation = True, do_lower_case = True)

# Dataloader - Custom Dataset Class

class SST2_Basic(Dataset):
    def __init__(self, file_loc, tokenizer, max_len):
        self.tokenizer = tokenizer
        self.max_length = max_len
        
        with open(file_loc) as f:
            f.readline()
            data = [line.split("\t") for line in f]
            
        self.docs = [x for (x,y) in data]
        self.targets = [int(y) for (x,y) in data]
        
    def __len__(self):
        return len(self.docs)
    
    def __getitem__(self, index):
        doc = str(self.docs[index])
        
        inputs = self.tokenizer.encode_plus(
            doc,
            None,
            add_special_tokens = True,
            max_length = self.max_length,
            pad_to_max_length = True,
            return_token_type_ids = True
        )
        
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs['token_type_ids']
        
        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'targets': torch.tensor(self.targets[index], dtype=torch.float)
        }
    

In [None]:

training_set = SST2_Basic(train_data_loc, tokenizer, max_len)
validation_set = SST2_Basic(dev_data_loc, tokenizer, max_len)

train_params = {'batch_size': train_batch_size,
                'shuffle': True,
                'num_workers': 0
                }
val_params = {'batch_size': val_batch_size,
                'shuffle': True,
                'num_workers': 0
                }

train_data = DataLoader(training_set, **train_params)
val_data = DataLoader(validation_set, **val_params)

In [None]:
# Model

# Step 1: Define the model

class RobertaClass(torch.nn.Module):
    def __init__(self):
        super(RobertaClass, self).__init__()
        
        self.l1 = RobertaModel.from_pretrained(roberta_flavour)
        self.dropout = torch.nn.Dropout(0.3)
        self.classifier = torch.nn.Linear(768, 2)
        
    def forward(self, input_ids, attention_mask, token_type_ids):
        main = self.l1(input_ids = input_ids, attention_mask = attention_mask, token_type_ids = token_type_ids)
        hidden_state = main[0]
        pooler = hidden_state[:, 0]
        output = self.classifier(self.dropout(pooler))
        
        return output


In [None]:

model = RobertaClass()
model.to(device)

In [None]:
loss_function = torch.nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(params = model.parameters(), lr = learning_rate)

def calculate_accuracy(preds, targets):
    n_correct = (preds == targets).sum().item()
    return n_correct

In [None]:
def train(model, epoch, train_data_loader, validation_data_loader):
    tr_loss = 0
    n_correct = 0
    nb_tr_steps = 0
    nb_tr_examples = 0
    model.train()
    for _,data in tqdm(enumerate(train_data_loader, 0)):
        
        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        targets = data['targets'].to(device, dtype = torch.long)

        outputs = model(ids, mask, token_type_ids)
        loss = loss_function(outputs, targets)
        tr_loss += loss.item()
        big_val, big_idx = torch.max(outputs.data, dim=1)
        n_correct += calculate_accuracy(big_idx, targets)

        nb_tr_steps += 1
        nb_tr_examples+=targets.size(0)
        
        if _%500==0:
            loss_step = tr_loss/nb_tr_steps
            accu_step = (n_correct*100)/nb_tr_examples 
            print(f"Training Loss per 500 steps: {loss_step}")
            print(f"Training Accuracy per 500 steps: {accu_step}")
            #valid(model, validation_data_loader)

        optimizer.zero_grad()
        loss.backward()
        # # When using GPU
        optimizer.step()

    print(f'The Total Accuracy for Epoch {epoch}: {(n_correct*100)/nb_tr_examples}')
    epoch_loss = tr_loss/nb_tr_steps
    epoch_accu = (n_correct*100)/nb_tr_examples
    print(f"Training Loss Epoch: {epoch_loss}")
    print(f"Training Accuracy Epoch: {epoch_accu}")

    return 

In [None]:
import logging
logging.basicConfig(level=logging.ERROR)

In [None]:
epochs = 20
for epoch in range(epochs):
    train(model, epoch, train_data, val_data)


In [None]:
def valid(model, testing_loader):
    model.eval()
    n_correct = 0; n_wrong = 0; total = 0; tr_loss=0; nb_tr_steps=0; nb_tr_examples=0
    with torch.no_grad():
        for _, data in tqdm(enumerate(testing_loader, 0)):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype=torch.long)
            targets = data['targets'].to(device, dtype = torch.long)
            outputs = model(ids, mask, token_type_ids).squeeze()
            loss = loss_function(outputs, targets)
            tr_loss += loss.item()
            big_val, big_idx = torch.max(outputs.data, dim=1)
            n_correct += calculate_accuracy(big_idx, targets)

            nb_tr_steps += 1
            nb_tr_examples+=targets.size(0)
            
            
    epoch_loss = tr_loss/nb_tr_steps
    epoch_accu = (n_correct*100)/nb_tr_examples
    print(f"Validation Loss Epoch per 1000 steps: {epoch_loss}")
    print(f"Validation Accuracy Epoch per 1000 steps: {epoch_accu}")
    
    return epoch_accu


In [None]:

vacc = valid(model, val_data)
print("Val Acc: ", vacc)