In [1]:
# Transformers installation
! pip install transformers datasets

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.27.1-py3-none-any.whl (6.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.7/6.7 MB[0m [31m59.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets
  Downloading datasets-2.10.1-py3-none-any.whl (469 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m469.0/469.0 KB[0m [31m39.4 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.13.2-py3-none-any.whl (199 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.2/199.2 KB[0m [31m25.4 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m108.5 MB/s[0m eta [36m0:00:00[0m
Collecti

## Loading Data

In [1]:
from datasets import load_dataset, Dataset
import pandas as pd

df_train= pd.read_csv('/content/train.csv')
df_dev= pd.read_csv('/content/dev.csv')

df_train.rename(columns= {'initial_request': 'text','clarification_need': 'label'}, inplace= True)
df_dev.rename(columns= {'initial_request': 'text','clarification_need': 'label'}, inplace= True)

df_train['label']= [df_train['label'][i]-1 for i in range(len(df_train['label']))]  #since model uses labels 0-3
df_dev['label']= [df_dev['label'][i]-1 for i in range(len(df_dev['label']))]


train_data = Dataset.from_pandas(df_train) #.select(range(184))
dev_data = Dataset.from_pandas(df_dev) #.select(range(48))

In [2]:
train_data[0]

{'text': 'Tell me about Obama family tree.', 'label': 1}

## Tokenizing Data

In [3]:
from transformers import AutoTokenizer, OpenAIGPTForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("roberta-base")
from transformers import AutoModelForSequenceClassification, AutoModel

model = AutoModelForSequenceClassification.from_pretrained("roberta-base", num_labels=4)

# tokenizer.pad_token = tokenizer.eos_token

# if tokenizer.pad_token is None:
#     tokenizer.add_special_tokens({'pad_token': '[PAD]'})
#     model.resize_token_embeddings(len(tokenizer))

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True) #padding="max_length", add this argument if needed

tokenized_train = train_data.map(tokenize_function, batched=True)
tokenized_dev = dev_data.map(tokenize_function, batched=True)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.bias', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.

Map:   0%|          | 0/187 [00:00<?, ? examples/s]

Map:   0%|          | 0/50 [00:00<?, ? examples/s]

In [4]:
tokenized_train = tokenized_train.remove_columns(["text"])
tokenized_train = tokenized_train.rename_column("label", "labels")
tokenized_train.set_format("torch")

tokenized_dev = tokenized_dev.remove_columns(["text"])
tokenized_dev = tokenized_dev.rename_column("label", "labels")
tokenized_dev.set_format("torch")

### Dataloader

In [5]:
from torch.utils.data import DataLoader

train_dataloader = DataLoader(tokenized_train, batch_size=8)
eval_dataloader = DataLoader(tokenized_dev, batch_size=8)

### Model

In [6]:
import torch

In [7]:
# from transformers import AutoModelForSequenceClassification, AutoModel

# model = AutoModelForSequenceClassification.from_pretrained("gpt2", num_labels=4)

In [8]:
from torch.optim import AdamW

optimizer = AdamW(model.parameters(), lr=5e-5)

In [9]:
from transformers import get_scheduler

num_epochs = 20
num_training_steps = num_epochs * len(train_dataloader)
lr_scheduler = get_scheduler(
    name="linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps
)

In [10]:
import torch

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
# device = torch.device("cpu")
model.to(device)

RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0): RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerN

### Training

In [11]:
!pip install evaluate

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [12]:
import os
from datasets import load_metric
from tqdm.auto import tqdm
import evaluate

In [13]:
def train_one_batch(model, batch, optimizer, lr_scheduler):
  batch = {k: v.to(device) for k, v in batch.items()}
  outputs = model(**batch)
  loss = outputs.loss
  loss.backward()

  optimizer.step()
  lr_scheduler.step()
  optimizer.zero_grad()

  return loss, model, optimizer, lr_scheduler


def train_one_epoch(model, train_dataloader, optimizer, lr_scheduler):
  model.train()
  loss = 0
  progress_bar = tqdm(range(len(train_dataloader)))

  for batch in train_dataloader:
    running_loss, model, optimizer, lr_scheduler = train_one_batch(model, batch, optimizer, lr_scheduler)
    loss += running_loss
    progress_bar.update(1)

  loss = loss / len(train_dataloader)
  return loss, model, optimizer, lr_scheduler
  

def get_val_loss(model, val_dataloader):
  model.eval()
  loss = 0

  # metric = load_metric('f1')
  metric= evaluate.load('f1')
  with torch.no_grad():
    for batch in val_dataloader:
      batch = {k: v.to(device) for k, v in batch.items()}
      outputs = model(**batch)
      running_loss = outputs.loss
      loss += running_loss

      logits = outputs.logits
      predictions = torch.argmax(logits, dim=-1)
      metric.add_batch(predictions=predictions, references=batch["labels"])
  
  # f1_score = metric.compute()
  f1_score = metric.compute(average= 'weighted')
  
  return loss / len(val_dataloader), f1_score

In [14]:
# num_epochs=10
for epoch in range(num_epochs):
  print(f'Epoch: [{epoch+1} / {num_epochs}]:')
  t_loss, model, optimizer, lr_scheduler = train_one_epoch(model, train_dataloader, optimizer, lr_scheduler)
  v_loss, f1_score = get_val_loss(model, eval_dataloader)
  # print
  print(f"\tLoss -> Train: {t_loss:.5f} | Val: {v_loss} | F1 Score: {f1_score['f1']}")
  # save
  # DST Folder
  DST = '/content/checkpoints'
  if not os.path.isdir(DST):
    os.makedirs(DST)
  path = os.path.join(DST, f'epoch_{epoch+1}.pth')
  torch.save(model.state_dict(), path)

Epoch: [1 / 20]:


  0%|          | 0/24 [00:00<?, ?it/s]

	Loss -> Train: 1.35152 | Val: 1.3064011335372925 | F1 Score: 0.24845070422535212
Epoch: [2 / 20]:


  0%|          | 0/24 [00:00<?, ?it/s]

	Loss -> Train: 1.30945 | Val: 1.29379141330719 | F1 Score: 0.24845070422535212
Epoch: [3 / 20]:


  0%|          | 0/24 [00:00<?, ?it/s]

	Loss -> Train: 1.30500 | Val: 1.2932981252670288 | F1 Score: 0.24845070422535212
Epoch: [4 / 20]:


  0%|          | 0/24 [00:00<?, ?it/s]

	Loss -> Train: 1.31055 | Val: 1.2913224697113037 | F1 Score: 0.2896470588235294
Epoch: [5 / 20]:


  0%|          | 0/24 [00:00<?, ?it/s]

	Loss -> Train: 1.29620 | Val: 1.273861289024353 | F1 Score: 0.3466666666666666
Epoch: [6 / 20]:


  0%|          | 0/24 [00:00<?, ?it/s]

	Loss -> Train: 1.23857 | Val: 1.1944109201431274 | F1 Score: 0.30628205128205127
Epoch: [7 / 20]:


  0%|          | 0/24 [00:00<?, ?it/s]

	Loss -> Train: 1.10044 | Val: 1.156795859336853 | F1 Score: 0.37055690072639225
Epoch: [8 / 20]:


  0%|          | 0/24 [00:00<?, ?it/s]

	Loss -> Train: 0.72869 | Val: 1.6413390636444092 | F1 Score: 0.3272380952380952
Epoch: [9 / 20]:


  0%|          | 0/24 [00:00<?, ?it/s]

	Loss -> Train: 0.42919 | Val: 1.7978808879852295 | F1 Score: 0.5182251710553597
Epoch: [10 / 20]:


  0%|          | 0/24 [00:00<?, ?it/s]

	Loss -> Train: 0.18238 | Val: 2.281141757965088 | F1 Score: 0.41
Epoch: [11 / 20]:


  0%|          | 0/24 [00:00<?, ?it/s]

	Loss -> Train: 0.19375 | Val: 2.3187923431396484 | F1 Score: 0.4471428571428572
Epoch: [12 / 20]:


  0%|          | 0/24 [00:00<?, ?it/s]

	Loss -> Train: 0.17620 | Val: 2.1510119438171387 | F1 Score: 0.41616341251088174
Epoch: [13 / 20]:


  0%|          | 0/24 [00:00<?, ?it/s]

	Loss -> Train: 0.06701 | Val: 2.3048908710479736 | F1 Score: 0.3237397085672948
Epoch: [14 / 20]:


  0%|          | 0/24 [00:00<?, ?it/s]

	Loss -> Train: 0.04856 | Val: 2.0540380477905273 | F1 Score: 0.48344055944055947
Epoch: [15 / 20]:


  0%|          | 0/24 [00:00<?, ?it/s]

	Loss -> Train: 0.02831 | Val: 2.0900721549987793 | F1 Score: 0.509010989010989
Epoch: [16 / 20]:


  0%|          | 0/24 [00:00<?, ?it/s]

	Loss -> Train: 0.02283 | Val: 2.100780963897705 | F1 Score: 0.5162108262108261
Epoch: [17 / 20]:


  0%|          | 0/24 [00:00<?, ?it/s]

	Loss -> Train: 0.01781 | Val: 2.1185455322265625 | F1 Score: 0.5017181929181929
Epoch: [18 / 20]:


  0%|          | 0/24 [00:00<?, ?it/s]

	Loss -> Train: 0.01591 | Val: 2.1289565563201904 | F1 Score: 0.47631322751322747
Epoch: [19 / 20]:


  0%|          | 0/24 [00:00<?, ?it/s]

	Loss -> Train: 0.01853 | Val: 2.141364574432373 | F1 Score: 0.47631322751322747
Epoch: [20 / 20]:


  0%|          | 0/24 [00:00<?, ?it/s]

	Loss -> Train: 0.01625 | Val: 2.1455869674682617 | F1 Score: 0.46110052910052907


### Testing

In [46]:
df_test= pd.read_csv('/content/test_with_labels.csv')

df_test.rename(columns= {'initial_request': 'text','clarification_need': 'label'}, inplace= True)
df_test['label']= [df_test['label'][i]-1 for i in range(len(df_test['label']))]
test_data = Dataset.from_pandas(df_test) #.select(range(184))
tokenized_test = test_data.map(tokenize_function, batched=True)

tokenized_test = tokenized_test.remove_columns(["text"])
tokenized_test = tokenized_test.rename_column("label", "labels")
tokenized_test.set_format("torch")
test_dataloader = DataLoader(tokenized_test, batch_size=8)

# model = TheModelClass(*args, **kwargs)
modelnew = AutoModelForSequenceClassification.from_pretrained("roberta-base", num_labels=4)
modelnew.to(device)
modelnew.load_state_dict(torch.load('/content/checkpoints/epoch_17.pth'))
modelnew.eval()
# modelnew= torch.load('/content/checkpoints/epoch_8.pth',weights_only= True)

test_loss, f1_score = get_val_loss(modelnew, test_dataloader)

print(f"Test loss is: {test_loss:.5f}, F1 score is: {f1_score['f1']:.5f}")

Map:   0%|          | 0/61 [00:00<?, ? examples/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.bias', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.

Test loss is: 2.14675, F1 score is: 0.47569
