# Assessment 2: Literature Review and Self-reflection #

### ConvBERT: Improving BERT with Span-based Dynamic Convolution ###

Based on the limit of computational resource. In this article, we will fine-tune a ConvBERT model to classify sentences based on their grammatical acceptability using the multiple dataset.

### 1. Install and Import Required Library

In [2]:
!pip install torch transformers datasets

Collecting datasets
  Downloading datasets-3.0.1-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.17-py310-none-any.whl.metadata (7.2 kB)
INFO: pip is looking at multiple versions of multiprocess to determine which version is compatible with other requirements. This could take a while.
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Downloading datasets-3.0.1-py3-none-any.whl (471 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m471.6/471.6 kB[0m [31m21.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m7.0 MB/s[0m eta [36m0:00:

In [3]:
import torch
from torch.utils.data import DataLoader
from transformers import ConvBertTokenizer, ConvBertForSequenceClassification, AdamW
from datasets import load_dataset, concatenate_datasets
import numpy as np
from sklearn.metrics import accuracy_score, f1_score, matthews_corrcoef, mean_squared_error
from scipy.stats import spearmanr
from torch.nn.utils.rnn import pad_sequence

In [29]:
def Load_Glue_Task_Dataset(task_name, model_name='YituTech/conv-bert-base'):
  # Load choosen task dataset from Hugging Face datasets
  dataset = load_dataset('glue', task_name)
  train_dataset = dataset['train']
  if task_name == 'mnli':
    # MNLI has two validation datasets, validation_matched and validatation_mismatched
    test_dataset = concatenate_datasets([dataset['validation_matched'], dataset['validation_mismatched']])
  else:
    test_dataset = dataset['validation']

  tokenizer = ConvBertTokenizer.from_pretrained(model_name)

  # Define the input field of each task
  task_field_map = {
    'mnli': ("premise", "hypothesis"),
    'qnli': ("question", "sentence"),
    'qqp': ('question1', 'question2'),
    'rte': ('sentence1', 'sentence2'),
    'sst2': ('sentence', None),
    'mrpc': ('sentence1', 'sentence2'),
    'cola': ('sentence', None),
    'stsb': ('sentence1', 'sentence2'),
  }

  # Check whether the task is in the map
  if task_name not in task_field_map:
    raise ValueError(f"Task '{task_name}' is not supported.")

  # Tokenize the data
  field = task_field_map[task_name]

  if field[1] is None:
    def tokenize(batch):
      return tokenizer(batch[field[0]], padding=True, truncation=True)
  else:
    def tokenize(batch):
      return tokenizer(batch[field[0]], batch[field[1]], padding=True, truncation=True)

  train_dataset = train_dataset.map(lambda x: tokenize(x), batched=True)
  test_dataset = test_dataset.map(lambda x: tokenize(x), batched=True)

  # Convert dataset to PyTorch tensors
  train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
  test_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

  return train_dataset, test_dataset

In [30]:
def Task_Score(task_name, preds, labels):
  if task_name in ['mnli', 'qnli', 'qqp', 'rte', 'sst2', 'mrpc']:
    return accuracy_score(labels, preds)
  elif task_name == 'cola':
    return matthews_corrcoef(labels, preds)
  elif task_name == 'stsb':
    Newpreds = [float(pred) for pred in preds]
    spearman_corr, _ = spearmanr(labels, Newpreds)
    return spearman_corr
  else:
    raise ValueError(f"Task '{task_name}' is not supported.")

In [31]:
# Custom Collate Function for Sentence Padding
def collate_fn(batch):
  input_ids = [item['input_ids'] for item in batch]
  attention_mask = [item['attention_mask'] for item in batch]
  labels = [item['label'] for item in batch]

  input_ids_padded = pad_sequence(input_ids, batch_first=True)
  attention_mask_padded = pad_sequence(attention_mask, batch_first=True)
  labels = torch.tensor(labels)

  return {
    'input_ids': input_ids_padded,
    'attention_mask': attention_mask_padded,
    'label': labels
  }

In [32]:
# Model Evaluation
def Evaluate_Glue_Task(task_name, model, test_dataset, batch_size, device):
  dataloader = DataLoader(test_dataset, batch_size=batch_size, collate_fn=collate_fn)

  model.eval()
  preds, reals = [], []

  with torch.no_grad():
    for batch in dataloader:
      input_ids = batch['input_ids'].to(device)
      attention_mask = batch['attention_mask'].to(device)
      labels = batch['label'].to(device)

      outputs = model(input_ids, attention_mask=attention_mask)
      if task_name == 'stsb':
        pred = outputs.logits.squeeze() # STS-B is regression task
      else:
        pred = torch.argmax(outputs.logits, dim=-1) # Other is classification task, choose the maxium value

      preds.extend(pred.cpu().numpy())
      reals.extend(labels.cpu().numpy())

  score = Task_Score(task_name, preds, reals)
  return score

In [33]:
def Fine_Tune_Glue_Task(task_name, model_name='YituTech/conv-bert-base', num_epochs=3, learning_rate=5e-5, batch_size=8):
  # Load the Task Dataset
  train_dataset, test_dataset = Load_Glue_Task_Dataset(task_name, model_name)

  print(f"Working on Task {task_name}:")

  # Load Pre-trained ConvBERT
  model = ConvBertForSequenceClassification.from_pretrained(model_name, num_labels=2)
  if task_name == 'stsb': # STS-B is regression task
    model = ConvBertForSequenceClassification.from_pretrained(model_name, num_labels=1)

  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
  model.to(device)

  # Setup the optimizer
  optimizer = AdamW(model.parameters(), lr=learning_rate)

  # Prepare DataLoader with the custom collate function
  dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)

  # Fine-tune the model
  model.train()
  for epoch in range(num_epochs):
    total_loss = 0
    for batch in dataloader:
      optimizer.zero_grad()
      input_ids = batch['input_ids'].to(device)
      attention_mask = batch['attention_mask'].to(device)
      labels = batch['label'].to(device)

      outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
      loss = outputs.loss
      total_loss += loss.item()

      loss.backward()
      optimizer.step()
    avg_loss = total_loss / len(dataloader)
    print(f"Epoch {epoch+1}, Loss: {avg_loss:.4f}")

  return Evaluate_Glue_Task(task_name, model, test_dataset, batch_size, device)

In [35]:
Task_Names = ['mrpc', 'cola', 'stsb', 'rte']
Scores = []

for task_name in Task_Names:
  score = Fine_Tune_Glue_Task(task_name)
  Scores.append(score)

train-00000-of-00001.parquet:   0%|          | 0.00/649k [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/75.7k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/308k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/3668 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/408 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1725 [00:00<?, ? examples/s]

Map:   0%|          | 0/3668 [00:00<?, ? examples/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Map:   0%|          | 0/408 [00:00<?, ? examples/s]

Working on Task mrpc:


Some weights of ConvBertForSequenceClassification were not initialized from the model checkpoint at YituTech/conv-bert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Loss: 0.4363
Epoch 2, Loss: 0.2159
Epoch 3, Loss: 0.0988




Map:   0%|          | 0/1043 [00:00<?, ? examples/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Working on Task cola:


Some weights of ConvBertForSequenceClassification were not initialized from the model checkpoint at YituTech/conv-bert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Loss: 0.4335
Epoch 2, Loss: 0.2474
Epoch 3, Loss: 0.1504


train-00000-of-00001.parquet:   0%|          | 0.00/502k [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/151k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/114k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/5749 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1500 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1379 [00:00<?, ? examples/s]



Map:   0%|          | 0/5749 [00:00<?, ? examples/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Map:   0%|          | 0/1500 [00:00<?, ? examples/s]

Working on Task stsb:


Some weights of ConvBertForSequenceClassification were not initialized from the model checkpoint at YituTech/conv-bert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of ConvBertForSequenceClassification were not initialized from the model checkpoint at YituTech/conv-bert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Loss: 0.7405
Epoch 2, Loss: 0.2727
Epoch 3, Loss: 0.1695


Map:   0%|          | 0/277 [00:00<?, ? examples/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Working on Task rte:


Some weights of ConvBertForSequenceClassification were not initialized from the model checkpoint at YituTech/conv-bert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Loss: 0.6983
Epoch 2, Loss: 0.6197
Epoch 3, Loss: 0.4003


In [36]:
print(Scores)

[0.8676470588235294, 0.58690464101932, 0.9081852113502844, 0.7111913357400722]
