In [1]:
%run ../nli/config.py
%run ../nli/nli_finetune.py
%run ../nli/mnli.py
# %run ../nli/nli_dataset.py

In [2]:
import pandas as pd

import torch
import transformers
import torchmetrics
import pytorch_lightning as pl

from transforms import ClinicalSynonymSubstitution

In [3]:
# transform=ClinicalSynonymSubstitution(substitution_probability=1.0,p=0.7)

In [4]:
# text="Patient has elevated BUN"
# transformed_text=transform(text)
# transformed_text

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [6]:
CONFIG["MODEL_NAME_OR_PATH"]="dmis-lab/biobert-v1.1" # Make changes here to specify different model.
CONFIG["CHECKPOINT_DIR"]="../nli/checkpoints/biobert_v1-epoch=01-val_loss=0.45.ckpt" # Make changes here to specify corresponding model checkpoint.

In [7]:
_=pl.seed_everything(CONFIG['SEED'])

Global seed set to 13


In [8]:
trained_model = NLIFineTuningModel.load_from_checkpoint(checkpoint_path=CONFIG["CHECKPOINT_DIR"],  # model checkpoint path
                                                        num_labels=CONFIG['NUM_CLASSES'],
                                                        model_name_or_path=CONFIG['MODEL_NAME_OR_PATH'])
trained_model.freeze()

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
tokenizer = transformers.AutoTokenizer.from_pretrained(
    CONFIG['MODEL_NAME_OR_PATH'])

In [10]:
class NLIDataset(torch.utils.data.Dataset):

  def __init__(self,max_len:int,tokenizer,sentence1,sentence2,labels):
    super().__init__()
    self.max_len=max_len
    self.tokenizer=tokenizer
    self.sentence1=sentence1
    self.sentence2=sentence2
    self.labels=labels
  
  def __len__(self):
    return len(self.sentence1)

  def __getitem__(self,idx):
    sentence_1=self.sentence1[idx]
    sentence_2=self.sentence2[idx]
    encoded_input=self.tokenizer.encode_plus(
        text=sentence_1,
        text_pair=sentence_2,
        add_special_tokens=True,
        padding='max_length',
        truncation=True,
        max_length=self.max_len,
        return_token_type_ids=True,
        return_attention_mask=True,
        return_tensors='pt'
    )

    return {
        'labels':torch.tensor(self.labels[idx]),
        'input_ids':encoded_input['input_ids'].view(-1),
        'attention_mask':encoded_input['attention_mask'].view(-1),
        'token_type_ids':encoded_input['token_type_ids'].view(-1),
    }

In [11]:
test_df=mnli_df('test')

In [12]:
test_dataset = NLIDataset(max_len=CONFIG['MAX_LEN'],
                          tokenizer=tokenizer,
                          sentence1=test_df[CONFIG['sentence1']].values,
                          sentence2=test_df[CONFIG['sentence2']].values,
                          labels=test_df[CONFIG['labels']].values,
#                           transforms=None
                         )

In [13]:
test_dataloader = torch.utils.data.DataLoader(test_dataset,
                                                      batch_size=CONFIG['VAL_BS'],
                                                      shuffle=False,
                                                      num_workers=CONFIG['NUM_WORKERS'])

In [14]:
test_metrics=torchmetrics.MetricCollection([
            torchmetrics.Accuracy(),
            torchmetrics.F1(num_classes=CONFIG['NUM_CLASSES'], average='macro')])
test_metrics=test_metrics.to(device)

In [15]:
trained_model=trained_model.to(device)

In [16]:
for batch in test_dataloader:
    for key,value in batch.items():
        batch[key]=value.to(device)
    loss, logits = trained_model(batch)[:2]
    predictions = torch.argmax(logits, dim=1)
    test_metrics(predictions, batch['labels'])
test_metrics.compute()

{'Accuracy': tensor(0.8235, device='cuda:0'),
 'F1': tensor(0.8230, device='cuda:0')}