# No auxiliary sentences used while classification!

In [1]:
# Import libraries

import numpy as np
import pandas as pd
import matplotlib
from   matplotlib import pyplot as plt
import seaborn as sns

from copy import deepcopy
import pickle
import json
from tqdm import tqdm
from pprint import pprint

import torch
from transformers import (
    BertTokenizer as Tokenizer,
    BertForSequenceClassification as Model,
    pipeline
) 

from torch.utils.data import DataLoader
from transformers import AdamW

In [2]:
# Setup for plotting
sns.set(style='darkgrid')
matplotlib.rcParams['figure.dpi'] = 120
matplotlib.rcParams['font.size'] = 18
matplotlib.rcParams['figure.figsize'] = (10, 5)

In [3]:
# For caching objects

def load_obj(file_path):
    """Load a pickled object from given path
    :param file_path: Path to the pickle file of the object
    :type file_path: string
    """
    with open(file_path, 'rb') as f:
        return pickle.load(f)

def save_obj(obj, file_path):
    """Save an object to given path via pickling
    :param obj: Object to pickle
    :param file_path: Path for pickling
    :type file_path: string
    """
    with open(file_path, 'wb') as f:
        return pickle.dump(obj, f)

In [4]:
# Load the LOGIC dataset

train_df = pd.read_csv('./dataset/train.csv')
dev_df   = pd.read_csv('./dataset/dev.csv')
test_df  = pd.read_csv('./dataset/test.csv')

In [5]:
train_texts        = list(train_df['text'])
train_labels       = list(train_df['label'])
train_masked_texts = list(train_df['masked_text'])

dev_texts        = list(dev_df['text'])
dev_labels       = list(dev_df['label'])
dev_masked_texts = list(dev_df['masked_text'])

test_texts        = list(test_df['text'])
test_labels       = list(test_df['label'])
test_masked_texts = list(test_df['masked_text'])

In [6]:
# Encoding labels as integer
label_map = {
    'faulty generalization': 0,
    'false causality': 1,
    'circular reasoning': 2, 
    'ad populum': 3,
    'ad hominem': 4,
    'fallacy of logic': 5,
    'appeal to emotion': 6,
    'false dilemma': 7,
    'equivocation': 8,
    'fallacy of extension': 9,
    'fallacy of relevance': 10,
    'fallacy of credibility': 11,
    'intentional': 12,
}

inverse_label_map = dict()
for k, v in label_map.items():
    inverse_label_map[v] = k

In [7]:
train_labels_encoded = [label_map[z] for z in train_labels]
test_labels_encoded  = [label_map[z] for z in test_labels]
dev_labels_encoded   = [label_map[z] for z in dev_labels]

In [8]:
tokenizer = Tokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

In [9]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, texts, labels, max_length=64):
        self.encodings = tokenizer(texts, max_length=max_length, 
                                   truncation=True, padding="max_length")
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

In [10]:
train_dataset = CustomDataset(train_texts, train_labels_encoded)

In [11]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)

cuda


In [12]:
model = Model.from_pretrained('bert-base-uncased',
                              num_labels=len(label_map))
model.to(device)
model.train()

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

In [13]:
loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
optim = torch.optim.AdamW(model.parameters(), lr=5e-5)

In [14]:
for epoch in range(5):
    for batch in tqdm(loader):
        optim.zero_grad()
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels_ = batch['labels'].to(device)
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels_)
        loss = outputs[0]
        loss.backward()
        optim.step()

100%|██████████| 29/29 [00:11<00:00,  2.57it/s]
100%|██████████| 29/29 [00:11<00:00,  2.61it/s]
100%|██████████| 29/29 [00:11<00:00,  2.61it/s]
100%|██████████| 29/29 [00:11<00:00,  2.61it/s]
100%|██████████| 29/29 [00:11<00:00,  2.61it/s]


In [16]:
# Creating inference pipeline
pipe = pipeline(task='text-classification',
                model=model,
                tokenizer=tokenizer,
                device=0)

In [49]:
class InferenceDataset(torch.utils.data.Dataset):
    def __init__(self, text_list):
        self._list = text_list

    def __len__(self):
        return len(self._list)

    def __getitem__(self, i):
        return self._list[i]

In [50]:
train_inf_dataset = InferenceDataset(train_texts)
test_inf_dataset  = InferenceDataset(test_texts)
dev_inf_dataset   = InferenceDataset(dev_texts)

In [51]:
def get_predicted_labels(inf_dataset):
    pred_labels = list()
    for out in tqdm(pipe(inf_dataset, batch_size=64, max_length=64, truncation=True), total=len(inf_dataset)):
        pred_labels.append(int(out['label'][6:]))
    return pred_labels

In [52]:
train_inf_labels_encoded = get_predicted_labels(train_inf_dataset)
test_inf_labels_encoded  = get_predicted_labels(test_inf_dataset)
dev_inf_labels_encoded   = get_predicted_labels(dev_inf_dataset)

100%|██████████| 1849/1849 [00:04<00:00, 383.99it/s]
100%|██████████| 300/300 [00:00<00:00, 380.93it/s]
100%|██████████| 300/300 [00:00<00:00, 383.05it/s]


In [53]:
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score

In [54]:
# F1-score
print('Train: ', f1_score(train_labels_encoded, train_inf_labels_encoded, average='macro'))
print('Test: ',  f1_score(test_labels_encoded , test_inf_labels_encoded,  average='macro'))
print('Dev: ',   f1_score(dev_labels_encoded,   dev_inf_labels_encoded,   average='macro'))


Train:  0.9030118490538459
Test:  0.5226626653029867
Dev:  0.5631260365815453


In [55]:
# Accuracy score
print('Train: ', accuracy_score(train_labels_encoded, train_inf_labels_encoded))
print('Test: ',  accuracy_score(test_labels_encoded , test_inf_labels_encoded))
print('Dev: ',   accuracy_score(dev_labels_encoded,   dev_inf_labels_encoded))


Train:  0.961060032449973
Test:  0.5966666666666667
Dev:  0.6366666666666667


In [56]:
# Precision score=
print('Train: ', precision_score(train_labels_encoded, train_inf_labels_encoded, average='macro'))
print('Test: ',  precision_score(test_labels_encoded , test_inf_labels_encoded,  average='macro'))
print('Dev: ',   precision_score(dev_labels_encoded,   dev_inf_labels_encoded,   average='macro'))

Train:  0.9599852102959349
Test:  0.5463454217979105
Dev:  0.5880818958507789


  _warn_prf(average, modifier, msg_start, len(result))


In [57]:
# Recall score=
print('Train: ', recall_score(train_labels_encoded, train_inf_labels_encoded, average='macro'))
print('Test: ',  recall_score(test_labels_encoded , test_inf_labels_encoded,  average='macro'))
print('Dev: ',   recall_score(dev_labels_encoded,   dev_inf_labels_encoded,   average='macro'))

Train:  0.9081465317328381
Test:  0.5168632339416779
Dev:  0.5727165872131107
