In [None]:
from google.colab import drive
drive.mount('/content/drive')

PROJECT_PATH = '/content/drive/MyDrive/Colab/ECE570/Project'

Mounted at /content/drive


In [None]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.24.0-py3-none-any.whl (5.5 MB)
[K     |████████████████████████████████| 5.5 MB 23.7 MB/s 
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 75.2 MB/s 
Collecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.11.0-py3-none-any.whl (182 kB)
[K     |████████████████████████████████| 182 kB 74.3 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.11.0 tokenizers-0.13.2 transformers-4.24.0


In [None]:
import torch
device = torch.device('cuda' if torch.cuda.is_available else 'cpu')
print(device)

cuda


# Step 1: Setup DataLoader

In [None]:
from torch.utils.data import Dataset

# Have to redeclare this so when we torch.load we can create the object again
class IMDBDataset(Dataset):
  def __init__(self, ids, token_types, attn_masks, y):
    self.ids = ids
    self.token_types = token_types
    self.attn_masks = attn_masks
    self.y = y
  
  def __len__(self):
    return len(self.y)
    
  def __getitem__(self, idx):
    return self.ids[idx], self.token_types[idx], self.attn_masks[idx], self.y[idx]

In [None]:
from torch.utils.data import DataLoader

# We can load the previously created datasets from `build_dataset.py`
train_set = torch.load(f'{PROJECT_PATH}/dataset/train_set.pt')
test_set = torch.load(f'{PROJECT_PATH}/dataset/test_set.pt')

In [None]:
train_loader = DataLoader(train_set, batch_size = 32)
test_loader = DataLoader(test_set, batch_size = 100)

# Step 2: Create Model Architecture

In [None]:
from transformers import BertModel

bert_model = BertModel.from_pretrained('bert-base-uncased')

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [31]:
import torch.nn as nn

class SentimentClassifier(nn.Module):
  def __init__(self, bert_model, freeze_bert=True):
    super(SentimentClassifier, self).__init__()
    self.bert = bert_model
    self.dropout = nn.Dropout(p=0.2) # Zero out 20% of nodes while training
    self.cls = nn.Linear(768, 1) # Take output of model and reduce it to one class
  
  def forward(self, ids, token_types, attn_mask):
    bert_out = self.bert(ids, attention_mask=attn_mask, token_type_ids=token_types)
    pooler_out = bert_out.pooler_output
    dropout_out = self.dropout(pooler_out)
    cls_out = self.cls(dropout_out)
    return cls_out.squeeze(-1)
  
  # If we want to freeze the bert parameters later on
  def freeze_bert(self):
      for p in self.bert.parameters():
        p.requires_grad = False

net = SentimentClassifier(bert_model, freeze_bert=False).to(device)

# Step 3: Set Up Training and Testing

In [32]:
import torch.optim as optim

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam([
    {"params": net.cls.parameters(), "lr": 8e-4},
], lr=2e-5)

In [33]:
def train(net, criterion, loader):
  net.train()
  for idx, (ids, token_types, attn_mask, labels) in enumerate(loader):
    ids, token_types, attn_mask, labels = ids.to(device), token_types.to(device), attn_mask.to(device), labels.to(device)
    optimizer.zero_grad()

    logits = net(ids, token_types, attn_mask)
    loss = criterion(logits, labels.float())
    loss.backward()
    optimizer.step()

    if idx % 100 == 0:
      print(f'[{idx*len(ids)} / {len(loader.dataset)}]: loss = {loss.item()}')

def test(net, criterion, loader):
  net.eval()

  tot_loss = 0
  correct = 0

  with torch.no_grad():
    for (ids, token_types, attn_mask, labels) in loader:
      ids, token_types, attn_mask, labels = ids.to(device), token_types.to(device), attn_mask.to(device), labels.to(device)
  
      logits = net(ids, token_types, attn_mask)
      pred = torch.round(torch.sigmoid(logits))

      tot_loss += criterion(logits, labels.float())
      correct += torch.sum(pred == labels).item()

  tot_loss /= len(loader)
  accuracy = correct / len(loader.dataset) * 100.
  print(f'Test: avg loss = {tot_loss}, accuracy = {accuracy}%')
  return accuracy

# Step 4: Train the Model

In [34]:
epochs = 7

best_acc = 0.01

for epoch in range(1, epochs):
  print(f'Epoch {epoch}')
  print('=====================================================')
  train(net, criterion, train_loader)
  accuracy = test(net, criterion, test_loader)
  
  # If this is the most accurate model so far, we save it so we can further test it later
  if accuracy > best_acc:
    torch.save(net.state_dict(), f'{PROJECT_PATH}/model.pt')
    best_acc = accuracy

Epoch 1
[0 / 40000]: loss = 0.6995843052864075
[3200 / 40000]: loss = 0.6228442192077637
[6400 / 40000]: loss = 0.707646369934082
[9600 / 40000]: loss = 0.7700990438461304
[12800 / 40000]: loss = 0.7092512845993042
[16000 / 40000]: loss = 0.5351296663284302
[19200 / 40000]: loss = 0.5678591728210449
[22400 / 40000]: loss = 0.571395993232727
[25600 / 40000]: loss = 0.5643982887268066
[28800 / 40000]: loss = 0.6544989347457886
[32000 / 40000]: loss = 0.5337569117546082
[35200 / 40000]: loss = 0.5348870754241943
[38400 / 40000]: loss = 0.5609210133552551
Test: avg loss = 0.5372771620750427, accuracy = 72.89999999999999%
Epoch 2
[0 / 40000]: loss = 0.5364029407501221
[3200 / 40000]: loss = 0.4753051698207855
[6400 / 40000]: loss = 0.5958806276321411
[9600 / 40000]: loss = 0.6105954647064209
[12800 / 40000]: loss = 0.7464258074760437
[16000 / 40000]: loss = 0.5351036190986633
[19200 / 40000]: loss = 0.4948069155216217
[22400 / 40000]: loss = 0.5837639570236206
[25600 / 40000]: loss = 0.4833