In [1]:
!pip install datasets transformers --quiet

In [2]:
import sys
import os
from torch.optim import AdamW
import torch
from tqdm.auto import tqdm
tqdm.pandas()

DRIVE_PATH = "/content/drive/MyDrive/transformers_layers/"
sys.path.append(DRIVE_PATH)

In [3]:
%pdb on

Automatic pdb calling has been turned ON


In [4]:
!pip install -Uqq ipdb
import ipdb

In [5]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [6]:
from data.dataloader_superglue import DataSuperglue
import lm_utils
from lm_utils import LM
import constants

In [7]:
class run_model():

  def __init__(self, _db_name, _db_class, _model):
    self.lm: LM = LM(_is_pretrain=False)
    self.db_name = _db_name
    self.train_dataloader, self.val_dataloader, self.test_dataloader = DataSuperglue(self.lm, _db_name, _db_class, device).get_db_dataloaders()
    self.model = _model(self.lm)
    self.optimizer = AdamW(self.model.parameters(), lr=5e-5)
    self.N_EPOCH = 1


  def train(self):
    # ipdb.set_trace()
    self.model.train()
    for epoch in range(self.N_EPOCH):
      for step, batch in tqdm(enumerate(self.train_dataloader)):
          self.optimizer.zero_grad()
          output, loss, labels = self.model(batch)
          loss.backward()
          self.optimizer.step()
  
  def evaluation(self):
    self.model.eval()
    accuracy = 0
    for step, batch in tqdm(enumerate(self.val_dataloader)):
        with torch.no_grad():
          output, loss, labels = self.model(batch)
          batch_success = torch.Tensor(output).argmax(1).eq(torch.Tensor(labels)).sum().item() 
          batch_accuracy = batch_success / len(batch[0].input_ids)
          accuracy += batch_accuracy
    print(f"{self.db_name} - eval result: {accuracy / len(self.val_dataloader):.2f}%")

  def test(self):
    self.model.eval()
    accuracy = 0
    for step, batch in tqdm(enumerate(self.test_dataloader)):
        with torch.no_grad():
          output, loss, labels = self.model(batch)
          batch_success = torch.Tensor(output).argmax(1).eq(torch.Tensor(labels)).sum().item() 
          batch_accuracy = batch_success / len(batch[0].input_ids)
          accuracy += batch_accuracy
    print(f"{self.db_name} - test result: {accuracy / len(self.val_dataloader):.2f}%")

In [8]:
def init_task(_task):
  db_name = _task
  if _task == constants.CB:
      from data.datasets.cb_ds import CbDataset
      from models.cb_model import CbModel
      db_class= CbDataset
      model = CbModel
  elif _task == constants.COPA:
      from data.datasets.copa_ds import CopaDataset
      from models.copa_model import CopaModel
      db_class= CopaDataset
      model = CopaModel
  else:
      from data.datasets.rte_ds import RteDataset
      from models.rte_model import RteModel
      db_name = constants.RTE
      db_class= RteDataset
      model = RteModel
  return db_name, db_class, model

In [9]:
def test_task(_task):
  db_name, db_class, model = init_task(_task)
  train_model = run_model(db_name, db_class, model)
  train_model.train()
  train_model.evaluation()
  # train_model.test()

In [10]:
test_task(constants.COPA)
test_task(constants.CB)
test_task(constants.RTE)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Reusing dataset super_glue (/root/.cache/huggingface/datasets/super_glue/copa/1.0.2/d040c658e2ddef6934fdd97deb45c777b6ff50c524781ea434e7219b56a428a7)


  0%|          | 0/3 [00:00<?, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

copa - eval result: 0.38%


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Reusing dataset super_glue (/root/.cache/huggingface/datasets/super_glue/cb/1.0.2/d040c658e2ddef6934fdd97deb45c777b6ff50c524781ea434e7219b56a428a7)


  0%|          | 0/3 [00:00<?, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

cb - eval result: 0.25%


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Reusing dataset super_glue (/root/.cache/huggingface/datasets/super_glue/rte/1.0.2/d040c658e2ddef6934fdd97deb45c777b6ff50c524781ea434e7219b56a428a7)


  0%|          | 0/3 [00:00<?, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

rte - eval result: 0.42%


In [None]:
!zip -r 