In [42]:
# Importing the libraries needed
import pandas as pd
import torch
import transformers
from torch.utils.data import Dataset, DataLoader
from transformers import RobertaModel, RobertaTokenizer
import numpy as np
from tqdm import tqdm
from sklearn.metrics import classification_report,  confusion_matrix

In [45]:
# Setting up the device for GPU usage

from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'

In [46]:
import nvidia_smi
nvidia_smi.nvmlInit()

handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0)
# card id 0 hardcoded here, there is also a call to get all available card ids, so we could iterate

info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle)

print("Total memory:", info.total)
print("Free memory:", info.free)
print("Used memory:", info.used)

nvidia_smi.nvmlShutdown()


Total memory: 50962169856
Free memory: 444006400
Used memory: 50518163456


In [57]:
print("Get memory info", torch.cuda.mem_get_info(device=None)) 
print("Get number of devices available: ", torch.cuda.device_count())

print("Memory stats about which device is free: ")

nvidia_smi.nvmlInit()


deviceCount = nvidia_smi.nvmlDeviceGetCount()
for i in range(deviceCount):
    handle = nvidia_smi.nvmlDeviceGetHandleByIndex(i)
    info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle)
    print("Device {}: {}, Memory : ({:.2f}% free): {}(total), {} (free), {} (used)".format(i, nvidia_smi.nvmlDeviceGetName(handle), 100*info.free/info.total, info.total, info.free, info.used))

nvidia_smi.nvmlShutdown()



Get memory info (49782652928, 50962169856)
Get number of devices available:  4
Memory stats about which device is free: 
Device 0: b'Quadro RTX 8000', Memory : (2.32% free): 50962169856(total), 1180106752 (free), 49782063104 (used)
Device 1: b'Quadro RTX 8000', Memory : (6.16% free): 50962169856(total), 3138846720 (free), 47823323136 (used)
Device 2: b'Quadro RTX 8000', Memory : (57.40% free): 50962169856(total), 29251534848 (free), 21710635008 (used)
Device 3: b'Quadro RTX 8000', Memory : (97.69% free): 50962169856(total), 49782652928 (free), 1179516928 (used)


In [58]:
print("Current Device:  ", torch.cuda.current_device()) 
torch.cuda.set_device(3)
torch.cuda.empty_cache() 


Current Device:   3


In [59]:

MAX_LEN = 256
TRAIN_BATCH_SIZE = 8
VALID_BATCH_SIZE = 4
LEARNING_RATE = 1e-05
tokenizer = RobertaTokenizer.from_pretrained('roberta-base', truncation=True, do_lower_case=True)

In [50]:
class LogicalFallacy(Dataset):
    def __init__(self, dataset, tokenizer, max_len):
        self.tokenizer = tokenizer
        self.data = dataset
        self.text = dataset.clean_prompt
        self.targets = dataset.label
        self.max_len = max_len
        #self.original_label = dataset.updated_label

    def __len__(self):
        return len(self.text)

    def __getitem__(self, index):
        text = str(self.text[index])
        text = " ".join(text.split())

        inputs = self.tokenizer.encode_plus(
            text,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            pad_to_max_length=True,
            return_token_type_ids=True
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs["token_type_ids"]
        

        return {
            'sentence': text,
            
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'targets': torch.tensor(self.targets[index], dtype=torch.float)
        }

In [51]:
class RobertaClass(torch.nn.Module):
    def __init__(self):
        super(RobertaClass, self).__init__()
        self.l1 = RobertaModel.from_pretrained("roberta-base")
        self.pre_classifier = torch.nn.Linear(768, 768)
        self.dropout = torch.nn.Dropout(0.3)
        self.classifier = torch.nn.Linear(768, 2)

    def forward(self, input_ids, attention_mask, token_type_ids):
        output_1 = self.l1(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
        hidden_state = output_1[0]
        pooler = hidden_state[:, 0]
        pooler = self.pre_classifier(pooler)
        pooler = torch.nn.ReLU()(pooler)
        pooler = self.dropout(pooler)
        output = self.classifier(pooler)
       # sfmax = torch.nn.functional.softmax(output)
       # return sfmax
        return output



In [52]:
def calcuate_accu(big_idx, targets):
   
    n_correct = (big_idx==targets).sum().item()
    return n_correct

def generate_classification_report(preds, targets): 
    target_names = ['fallacy of relevance', 'component fallacy', 'fallacy of ambiguity'] 

    print(classification_report(targets, preds, target_names=target_names, digits=4))
    cm = confusion_matrix(targets, preds) 
    print("Confusion Matrix: ")
    print(cm)
  
    print("Per class Accuracy: ", cm.diagonal()/cm.sum(axis=1) ) 


In [53]:

def predict(loader, model_path, epochs=1):
    val, og_val = [], [] 
    model = torch.load(model_path)
    model.eval()
    loss_function = torch.nn.CrossEntropyLoss()
    test_answers = [[[],[]], [[],[]]]

    n_correct = 0 
    nb_tr_steps = 0 
    nb_tr_examples = 0 
    for epoch in range(epochs):
        for steps, data in tqdm(enumerate(loader, 0)):
            sentence = data['sentence']
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.long)
        
            outputs = model.forward(ids, mask, token_type_ids)
            #print(torch.max(outputs.data, dim=1))
            _, max_indices = torch.max(outputs.data, dim=1)
            
            val.extend(max_indices.tolist())   
            
           
            og_val.extend(targets.tolist())
            
            n_correct+= calcuate_accu(max_indices, targets) 

            nb_tr_steps +=1 
            nb_tr_examples+=targets.size(0)
            
           
    accuracy = (n_correct*100)/nb_tr_examples 

        
    return accuracy, val, og_val
                                                                

       


In [54]:
def driver_code(test_file, model_path, model_name):
   
    test_df = pd.read_csv(test_file)
    
    test_set = LogicalFallacy(test_df, tokenizer, MAX_LEN)

    
    
    

    test_params = {'batch_size': VALID_BATCH_SIZE,
                'shuffle': True,
                'num_workers': 0
                }
    
    test_loader = DataLoader(test_set, **test_params)

    #print(test_loader)
    value, preds, targets = predict(test_loader, model_path)
    print("Model Name: ", model_name)
    print("Accuracy of the model: ", value) 
    print("Classification Report: ")
    generate_classification_report(preds, targets)
    

In [63]:
driver_code('../data/broad_classifier/updated_edu_test.csv','../models/broad_classifiers/broad_classifier_trained_roberta.pt',  "roBERTa -  - (only sentence)" )


137it [00:03, 39.67it/s]

Model Name:  roBERTa -  - (only sentence)
Accuracy of the model:  70.32967032967034
Classification Report: 
                      precision    recall  f1-score   support

fallacy of relevance     0.7773    0.6453    0.7052       265
   component fallacy     0.7162    0.7910    0.7518       268
fallacy of ambiguity     0.0333    0.0769    0.0465        13

            accuracy                         0.7033       546
           macro avg     0.5089    0.5044    0.5011       546
        weighted avg     0.7296    0.7033    0.7124       546

Confusion Matrix: 
[[171  75  19]
 [ 46 212  10]
 [  3   9   1]]
Per class Accuracy:  [0.64528302 0.79104478 0.07692308]





In [75]:
driver_code('../data/broad_classifier/updated_edu_test_with_neg.csv','../models/broad_classifiers/broad_classifier_trained_roberta_sentence_prompts_with_neg.pt',  "roBERTa -  - (only sentences + prompts)" )


137it [00:03, 39.29it/s]

Model Name:  roBERTa -  - (only sentences + prompts)
Accuracy of the model:  77.28937728937728
Classification Report: 
                      precision    recall  f1-score   support

fallacy of relevance     0.7854    0.7736    0.7795       265
   component fallacy     0.7660    0.8060    0.7855       268
fallacy of ambiguity     0.3333    0.0769    0.1250        13

            accuracy                         0.7729       546
           macro avg     0.6282    0.5522    0.5633       546
        weighted avg     0.7651    0.7729    0.7668       546

Confusion Matrix: 
[[205  58   2]
 [ 52 216   0]
 [  4   8   1]]
Per class Accuracy:  [0.77358491 0.80597015 0.07692308]





In [77]:
driver_code('../data/broad_classifier/updated_edu_test_with_neg.csv','../models/broad_classifiers/broad_classifier_trained_roberta_prompts_with_neg.pt',  "roBERTa -  - (only prompts)" )


137it [00:03, 41.36it/s]

Model Name:  roBERTa -  - (only prompts)
Accuracy of the model:  65.56776556776556
Classification Report: 
                      precision    recall  f1-score   support

fallacy of relevance     0.7019    0.5509    0.6173       265
   component fallacy     0.6272    0.7910    0.6997       268
fallacy of ambiguity     0.0000    0.0000    0.0000        13

            accuracy                         0.6557       546
           macro avg     0.4430    0.4473    0.4390       546
        weighted avg     0.6485    0.6557    0.6431       546

Confusion Matrix: 
[[146 119   0]
 [ 56 212   0]
 [  6   7   0]]
Per class Accuracy:  [0.5509434  0.79104478 0.        ]



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Calculating Baselines

In [33]:
dataset = pd.read_csv('../data/broad_classifier/updated_edu_dev_with_neg.csv')
distribution = dataset['label'].value_counts().to_list() 
distribution

[300, 259, 13]

In [34]:
percentage_distribution = [ round(elt/sum(distribution),2) for elt in distribution] 
percentage_distribution

[0.52, 0.45, 0.02]

In [88]:
0.56**2+0.42**2+0.02**2 

0.4904

In [62]:
driver_code('../data/broad_classifier/updated_edu_test_balanced.csv','../models/broad_classifiers/broad_classifier_trained_roberta_sentence_prompt_balanced.pt',  "roBERTa -  - (only sentence+prompts) balanced" )


199it [00:04, 40.95it/s]

Model Name:  roBERTa -  - (only sentence+prompts) balanced
Accuracy of the model:  55.23329129886507
Classification Report: 
                      precision    recall  f1-score   support

fallacy of relevance     0.6596    0.7019    0.6801       265
   component fallacy     0.4725    0.8657    0.6113       268
fallacy of ambiguity     1.0000    0.0769    0.1429       260

            accuracy                         0.5523       793
           macro avg     0.7107    0.5482    0.4781       793
        weighted avg     0.7080    0.5523    0.4807       793

Confusion Matrix: 
[[186  79   0]
 [ 36 232   0]
 [ 60 180  20]]
Per class Accuracy:  [0.70188679 0.86567164 0.07692308]



