# Classification using Auxiliary Sentences

In [1]:
# Import libraries

import numpy as np
import pandas as pd
import matplotlib
from   matplotlib import pyplot as plt
import seaborn as sns

from copy import deepcopy
import pickle
import json
from tqdm import tqdm
from pprint import pprint

import torch
from transformers import (
    BertTokenizer as Tokenizer,
    BertForSequenceClassification as Model,
    pipeline
) 

from torch.utils.data import DataLoader

In [2]:
# Setup for plotting
sns.set(style='darkgrid')
matplotlib.rcParams['figure.dpi'] = 120
matplotlib.rcParams['font.size'] = 18
matplotlib.rcParams['figure.figsize'] = (10, 5)

In [3]:
# For caching objects

def load_obj(file_path):
    """Load a pickled object from given path
    :param file_path: Path to the pickle file of the object
    :type file_path: string
    """
    with open(file_path, 'rb') as f:
        return pickle.load(f)

def save_obj(obj, file_path):
    """Save an object to given path via pickling
    :param obj: Object to pickle
    :param file_path: Path for pickling
    :type file_path: string
    """
    with open(file_path, 'wb') as f:
        return pickle.dump(obj, f)

In [4]:
# Load the LOGIC dataset

train_df = pd.read_csv('./dataset/train.csv')
dev_df   = pd.read_csv('./dataset/dev.csv')
test_df  = pd.read_csv('./dataset/test.csv')

In [5]:
train_texts        = list(train_df['text'])
train_labels       = list(train_df['label'])
train_masked_texts = list(train_df['masked_text'])

dev_texts        = list(dev_df['text'])
dev_labels       = list(dev_df['label'])
dev_masked_texts = list(dev_df['masked_text'])

test_texts        = list(test_df['text'])
test_labels       = list(test_df['label'])
test_masked_texts = list(test_df['masked_text'])

In [6]:
# Change value of `N` and `masked`

N = -1
masked = False

train_aux, test_aux, dev_aux = load_obj(f'./dataset/aux-sentences-n-{N}-masked-{masked}.pkl')

In [7]:
train_aux[3]

("senator randall is n't lying when she says she cares about her constituents — she would n't lie to people she cares about .",
 'clausal complement leads to nominal subject. nominal subject leads to clausal complement leads to nominal subject. prepositional modifier leads to nominal subject. auxiliary leads to nominal subject. negation modifier leads to nominal subject. adverbial clause modifier leads to nominal subject. auxiliary leads to clausal complement leads to nominal subject. negation modifier leads to clausal complement leads to nominal subject. prepositional modifier leads to clausal complement leads to nominal subject. nominal subject leads to adverbial clause modifier leads to nominal subject. clausal complement leads to adverbial clause modifier leads to nominal subject. object of preposition leads to prepositional modifier leads to clausal complement leads to nominal subject. nominal subject leads to clausal complement leads to adverbial clause modifier leads to nominal 

In [8]:
# Encoding labels as integer
label_map = {
    'faulty generalization': 0,
    'false causality': 1,
    'circular reasoning': 2, 
    'ad populum': 3,
    'ad hominem': 4,
    'fallacy of logic': 5,
    'appeal to emotion': 6,
    'false dilemma': 7,
    'equivocation': 8,
    'fallacy of extension': 9,
    'fallacy of relevance': 10,
    'fallacy of credibility': 11,
    'intentional': 12,
}

inverse_label_map = dict()
for k, v in label_map.items():
    inverse_label_map[v] = k

In [9]:
train_labels_encoded = [label_map[z] for z in train_labels]
test_labels_encoded  = [label_map[z] for z in test_labels]
dev_labels_encoded   = [label_map[z] for z in dev_labels]

In [10]:
tokenizer = Tokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

In [11]:
class TrainingDataset(torch.utils.data.Dataset):
    def __init__(self, texts1, texts2, labels, max_length=64):
        self.encodings = tokenizer(texts1, texts2, max_length=max_length, 
                                   truncation=True, padding="max_length")
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

In [12]:
train_texts1 = []
train_texts2 = []

for t1, t2 in train_aux:
    train_texts1.append(t1)
    if t2 == '.':
        t2 = ''
    train_texts2.append(t2)

In [13]:
train_dataset = TrainingDataset(train_texts1, train_texts2, train_labels_encoded, max_length=128)

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

In [14]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)

cuda


In [15]:
model = Model.from_pretrained('bert-base-uncased',
                              num_labels=len(label_map))
model.to(device)
model.train()

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

In [16]:
loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
optim = torch.optim.AdamW(model.parameters(), lr=5e-5)

In [17]:
for epoch in range(5):
    for batch in tqdm(loader):
        optim.zero_grad()
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels_ = batch['labels'].to(device)
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels_)
        loss = outputs[0]
        loss.backward()
        optim.step()

100%|██████████| 29/29 [00:21<00:00,  1.34it/s]
100%|██████████| 29/29 [00:21<00:00,  1.35it/s]
100%|██████████| 29/29 [00:21<00:00,  1.35it/s]
100%|██████████| 29/29 [00:21<00:00,  1.35it/s]
100%|██████████| 29/29 [00:21<00:00,  1.35it/s]


In [18]:
# Creating inference pipeline
pipe = pipeline(task='text-classification',
                model=model,
                tokenizer=tokenizer,
                device=0)

In [19]:
class InferenceDataset(torch.utils.data.Dataset):
    def __init__(self, text_list):
        self._list = text_list

    def __len__(self):
        return len(self._list)

    def __getitem__(self, i):
        t = self._list[i][1]
        if t == '.':
            t = ''
        return {
            'text': self._list[i][0],
            'text_pair': t
        }

In [20]:
train_inf_dataset = InferenceDataset(train_aux)
test_inf_dataset  = InferenceDataset(test_aux)
dev_inf_dataset   = InferenceDataset(dev_aux)

In [21]:
def get_predicted_labels(inf_dataset):
    pred_labels = list()
    for out in tqdm(pipe(inf_dataset, batch_size=64, max_length=128, truncation=True), total=len(inf_dataset)):
        pred_labels.append(int(out['label'][6:]))
    return pred_labels

In [22]:
train_inf_labels_encoded = get_predicted_labels(train_inf_dataset)
test_inf_labels_encoded  = get_predicted_labels(test_inf_dataset)
dev_inf_labels_encoded   = get_predicted_labels(dev_inf_dataset)

  0%|          | 0/1849 [00:00<?, ?it/s]Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the se

In [23]:
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score

In [24]:
# F1-score
print('Train: ', f1_score(train_labels_encoded, train_inf_labels_encoded, average='macro'))
print('Test: ',  f1_score(test_labels_encoded , test_inf_labels_encoded,  average='macro'))
print('Dev: ',   f1_score(dev_labels_encoded,   dev_inf_labels_encoded,   average='macro'))


Train:  0.7781577174702707
Test:  0.4367367270684305
Dev:  0.4495890335936495


In [25]:
# Accuracy score
print('Train: ', accuracy_score(train_labels_encoded, train_inf_labels_encoded))
print('Test: ',  accuracy_score(test_labels_encoded , test_inf_labels_encoded))
print('Dev: ',   accuracy_score(dev_labels_encoded,   dev_inf_labels_encoded))


Train:  0.8285559762033532
Test:  0.5133333333333333
Dev:  0.56


In [26]:
# Precision score=
print('Train: ', precision_score(train_labels_encoded, train_inf_labels_encoded, average='macro'))
print('Test: ',  precision_score(test_labels_encoded , test_inf_labels_encoded,  average='macro'))
print('Dev: ',   precision_score(dev_labels_encoded,   dev_inf_labels_encoded,   average='macro'))

Train:  0.8555375547493431
Test:  0.519327588063258
Dev:  0.5077385019692712


  _warn_prf(average, modifier, msg_start, len(result))


In [27]:
# Recall score=
print('Train: ', recall_score(train_labels_encoded, train_inf_labels_encoded, average='macro'))
print('Test: ',  recall_score(test_labels_encoded , test_inf_labels_encoded,  average='macro'))
print('Dev: ',   recall_score(dev_labels_encoded,   dev_inf_labels_encoded,   average='macro'))

Train:  0.772411670885397
Test:  0.44752043761301
Dev:  0.46422150176654564
