# **Using Fine-Tuned RoBERTa for Argumentative Sentence Detection on Debate 7138**


## Requirements

In [None]:
!pip install torch transformers datasets

Collecting datasets
  Downloading datasets-3.5.0-py3-none-any.whl.metadata (19 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting 

In [None]:
!pip install scikit-learn



In [None]:
import torch
from datasets import load_dataset
from transformers import (
    RobertaTokenizerFast,
    RobertaForSequenceClassification,
    TrainingArguments,
    Trainer,
    AutoConfig,
)
import pandas as pd
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
from google.colab import drive

In [None]:
from datasets import Dataset

## Loading UNSCUkrArg Dataset

In [None]:
# UNSCUkrArg Dataset
data_unsc= pd.read_excel("/content/UNSC_arg_schemes_Task1.xlsx")
data_unsc

Unnamed: 0,fileid,filename,sentence,annotated argument,prem_claim_label,label
0,UNSC_2014_SPV.7165_spch016,UNSC_2014_SPV.7165_spch016_sentsplit_Lithuania...,Let me thank the Nigerian presidency for conve...,[],[],non_arg
1,UNSC_2014_SPV.7165_spch016,UNSC_2014_SPV.7165_spch016_sentsplit_Lithuania...,I might refer to some of the comments by our c...,[],[],non_arg
2,UNSC_2014_SPV.7165_spch016,UNSC_2014_SPV.7165_spch016_sentsplit_Lithuania...,I would also like to thank Under Secretary-Gen...,[],[],non_arg
3,UNSC_2014_SPV.7165_spch016,UNSC_2014_SPV.7165_spch016_sentsplit_Lithuania...,"As so many of us have said here, the internati...",[],[],non_arg
4,UNSC_2014_SPV.7165_spch016,UNSC_2014_SPV.7165_spch016_sentsplit_Lithuania...,"But, as we know today, the Geneva statement is...",['the Geneva statement is just another documen...,"['claim', 'premise']",arg
...,...,...,...,...,...,...
1491,UNSC_2014_SPV.7154_spch012,UNSC_2014_SPV.7154_spch012_sentsplit_China.txt,We hope that the relevant parties will remain ...,[],[],non_arg
1492,UNSC_2014_SPV.7154_spch012,UNSC_2014_SPV.7154_spch012_sentsplit_China.txt,Settling the question of Ukraine involves the ...,['Settling the question of Ukraine involves th...,['premise'],arg
1493,UNSC_2014_SPV.7154_spch012,UNSC_2014_SPV.7154_spch012_sentsplit_China.txt,It should be considered in a balanced manner.,['It should be considered in a balanced manner.'],['claim'],arg
1494,UNSC_2014_SPV.7154_spch012,UNSC_2014_SPV.7154_spch012_sentsplit_China.txt,Political and diplomatic channels should conti...,['Political and diplomatic channels should con...,"['claim', 'premise']",arg


# Loading Fine-Tuned RoBERTa Model

In [None]:
# Uploading pre-trained model on us debates dataset
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Load the model and tokenizer from the saved path
model_path = # Add path of saved model
model = RobertaForSequenceClassification.from_pretrained(model_path) # Loading saved model
tokenizer = RobertaTokenizerFast.from_pretrained(model_path) # Loading saved tokenizer

# Set the model to evaluation mode
model.eval()

RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
         

In [None]:
# Setting up device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# Model to the device
model.to(device)

RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
         

# *UNSCUkrArg* Dataset Classification

In [None]:
data_unsc

Unnamed: 0,fileid,filename,sentence,annotated argument,prem_claim_label,label
0,UNSC_2014_SPV.7165_spch016,UNSC_2014_SPV.7165_spch016_sentsplit_Lithuania...,Let me thank the Nigerian presidency for conve...,[],[],non_arg
1,UNSC_2014_SPV.7165_spch016,UNSC_2014_SPV.7165_spch016_sentsplit_Lithuania...,I might refer to some of the comments by our c...,[],[],non_arg
2,UNSC_2014_SPV.7165_spch016,UNSC_2014_SPV.7165_spch016_sentsplit_Lithuania...,I would also like to thank Under Secretary-Gen...,[],[],non_arg
3,UNSC_2014_SPV.7165_spch016,UNSC_2014_SPV.7165_spch016_sentsplit_Lithuania...,"As so many of us have said here, the internati...",[],[],non_arg
4,UNSC_2014_SPV.7165_spch016,UNSC_2014_SPV.7165_spch016_sentsplit_Lithuania...,"But, as we know today, the Geneva statement is...",['the Geneva statement is just another documen...,"['claim', 'premise']",arg
...,...,...,...,...,...,...
1491,UNSC_2014_SPV.7154_spch012,UNSC_2014_SPV.7154_spch012_sentsplit_China.txt,We hope that the relevant parties will remain ...,[],[],non_arg
1492,UNSC_2014_SPV.7154_spch012,UNSC_2014_SPV.7154_spch012_sentsplit_China.txt,Settling the question of Ukraine involves the ...,['Settling the question of Ukraine involves th...,['premise'],arg
1493,UNSC_2014_SPV.7154_spch012,UNSC_2014_SPV.7154_spch012_sentsplit_China.txt,It should be considered in a balanced manner.,['It should be considered in a balanced manner.'],['claim'],arg
1494,UNSC_2014_SPV.7154_spch012,UNSC_2014_SPV.7154_spch012_sentsplit_China.txt,Political and diplomatic channels should conti...,['Political and diplomatic channels should con...,"['claim', 'premise']",arg


In [None]:
# Mapping label values to numeric values
label_mapping = {"non_arg": 0, "arg": 1}

data_unsc['label'] = data_unsc['label'].map(label_mapping)
data_unsc

Unnamed: 0,fileid,filename,sentence,annotated argument,prem_claim_label,label
0,UNSC_2014_SPV.7165_spch016,UNSC_2014_SPV.7165_spch016_sentsplit_Lithuania...,Let me thank the Nigerian presidency for conve...,[],[],0
1,UNSC_2014_SPV.7165_spch016,UNSC_2014_SPV.7165_spch016_sentsplit_Lithuania...,I might refer to some of the comments by our c...,[],[],0
2,UNSC_2014_SPV.7165_spch016,UNSC_2014_SPV.7165_spch016_sentsplit_Lithuania...,I would also like to thank Under Secretary-Gen...,[],[],0
3,UNSC_2014_SPV.7165_spch016,UNSC_2014_SPV.7165_spch016_sentsplit_Lithuania...,"As so many of us have said here, the internati...",[],[],0
4,UNSC_2014_SPV.7165_spch016,UNSC_2014_SPV.7165_spch016_sentsplit_Lithuania...,"But, as we know today, the Geneva statement is...",['the Geneva statement is just another documen...,"['claim', 'premise']",1
...,...,...,...,...,...,...
1491,UNSC_2014_SPV.7154_spch012,UNSC_2014_SPV.7154_spch012_sentsplit_China.txt,We hope that the relevant parties will remain ...,[],[],0
1492,UNSC_2014_SPV.7154_spch012,UNSC_2014_SPV.7154_spch012_sentsplit_China.txt,Settling the question of Ukraine involves the ...,['Settling the question of Ukraine involves th...,['premise'],1
1493,UNSC_2014_SPV.7154_spch012,UNSC_2014_SPV.7154_spch012_sentsplit_China.txt,It should be considered in a balanced manner.,['It should be considered in a balanced manner.'],['claim'],1
1494,UNSC_2014_SPV.7154_spch012,UNSC_2014_SPV.7154_spch012_sentsplit_China.txt,Political and diplomatic channels should conti...,['Political and diplomatic channels should con...,"['claim', 'premise']",1


In [None]:
# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples['sentence'], padding="max_length", truncation=True)

In [None]:
# Converting to readeble dataset for model
unsc_test = Dataset.from_pandas(data_unsc)

In [None]:
unsc_test

Dataset({
    features: ['fileid', 'filename', 'sentence', 'annotated argument', 'prem_claim_label', 'label'],
    num_rows: 1496
})

In [None]:
unsc_tokenized_test = unsc_test.map(tokenize_function, batched=True)

Map:   0%|          | 0/1496 [00:00<?, ? examples/s]

In [None]:
unsc_tokenized_test.set_format(type='torch', columns=['input_ids', 'attention_mask'])

In [None]:
arg_unsc_loader = DataLoader(unsc_tokenized_test, batch_size=16)

## Predictions on UNSCUkrArg dataset

In [None]:
# Function for predictions
def predict(model, dataloader):
    predictions = []
    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs.logits

            predictions.append(logits.argmax(dim=-1).cpu().numpy())

    return predictions


In [None]:
# Getting predictions
unsc_predictions = predict(model, arg_unsc_loader)

# COnverting predictions to flat list
unsc_predictions = [item for sublist in unsc_predictions for item in sublist]

# Adding prections to test set Dataframe
data_unsc['Predicted_Label'] = unsc_predictions


In [None]:
data_unsc

Unnamed: 0,fileid,filename,sentence,annotated argument,prem_claim_label,label,Predicted_Label
0,UNSC_2014_SPV.7165_spch016,UNSC_2014_SPV.7165_spch016_sentsplit_Lithuania...,Let me thank the Nigerian presidency for conve...,[],[],0,0
1,UNSC_2014_SPV.7165_spch016,UNSC_2014_SPV.7165_spch016_sentsplit_Lithuania...,I might refer to some of the comments by our c...,[],[],0,0
2,UNSC_2014_SPV.7165_spch016,UNSC_2014_SPV.7165_spch016_sentsplit_Lithuania...,I would also like to thank Under Secretary-Gen...,[],[],0,0
3,UNSC_2014_SPV.7165_spch016,UNSC_2014_SPV.7165_spch016_sentsplit_Lithuania...,"As so many of us have said here, the internati...",[],[],0,1
4,UNSC_2014_SPV.7165_spch016,UNSC_2014_SPV.7165_spch016_sentsplit_Lithuania...,"But, as we know today, the Geneva statement is...",['the Geneva statement is just another documen...,"['claim', 'premise']",1,1
...,...,...,...,...,...,...,...
1491,UNSC_2014_SPV.7154_spch012,UNSC_2014_SPV.7154_spch012_sentsplit_China.txt,We hope that the relevant parties will remain ...,[],[],0,1
1492,UNSC_2014_SPV.7154_spch012,UNSC_2014_SPV.7154_spch012_sentsplit_China.txt,Settling the question of Ukraine involves the ...,['Settling the question of Ukraine involves th...,['premise'],1,1
1493,UNSC_2014_SPV.7154_spch012,UNSC_2014_SPV.7154_spch012_sentsplit_China.txt,It should be considered in a balanced manner.,['It should be considered in a balanced manner.'],['claim'],1,1
1494,UNSC_2014_SPV.7154_spch012,UNSC_2014_SPV.7154_spch012_sentsplit_China.txt,Political and diplomatic channels should conti...,['Political and diplomatic channels should con...,"['claim', 'premise']",1,1


## Evaluation of UNSCUkrArg Dataset Predictions

In [None]:
# Getting true labels and predicted labels form test dataframe
y_true = data_unsc['label']
y_pred = data_unsc['Predicted_Label']

In [None]:
# Calculating metrics
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, pos_label=1)  # assuming 'Arg'=1 is the positive class
recall = recall_score(y_true, y_pred, pos_label=1)
f1 = f1_score(y_true, y_pred, pos_label=1)
conf_matrix = confusion_matrix(y_true, y_pred)


In [None]:
# Printing the metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("Confusion Matrix:\n", conf_matrix)
print("\nClassification Report:\n", classification_report(y_true, y_pred))

Accuracy: 0.6991978609625669
Precision: 0.6842496285289748
Recall: 0.9735729386892178
F1 Score: 0.8036649214659686
Confusion Matrix:
 [[125 425]
 [ 25 921]]

Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.23      0.36       550
           1       0.68      0.97      0.80       946

    accuracy                           0.70      1496
   macro avg       0.76      0.60      0.58      1496
weighted avg       0.74      0.70      0.64      1496



In [None]:
# Saving predictions on complete UNSCUkrArg dataset
data_unsc.to_excel("RoBERTa_arg_non_arg_UNSC_predictions.xlsx")

# Debate 7138 Classification

In [None]:
unsc_7138= pd.read_excel("Debate_7138_Speches.xlsx")

In [None]:
unsc_7138

In [None]:
# Converting to readeble dataset for model
unsc_7138_dataset = Dataset.from_pandas(unsc_7138)

#Tokenizing UNSC 7138 Speeches
unsc_7138_dataset_tokenized = unsc_7138_dataset.map(tokenize_function, batched=True)
unsc_7138_dataset_tokenized.set_format(type='torch', columns=['input_ids', 'attention_mask'])
unsc_7138_loader = DataLoader(unsc_7138_dataset_tokenized, batch_size=16)

In [None]:
# Getting predictions
unsc_7138_predictions = predict(model, unsc_7138_loader)

# COnverting predictions to flat list
unsc_7138_predictions = [item for sublist in unsc_7138_predictions for item in sublist]

# Adding prections to test set Dataframe
unsc_7138['Predicted_Label'] = unsc_7138_predictions

In [None]:
# Saving predictions
unsc_7138.to_excel("7138_speches_with_Task1_predictions.xlsx")