# Task 4- Argumentation Schemes Classification of Debate 7138
* Using UNSCUkrArg dataset for fine-tuning
* Predicting over Debate 7138 speeches
* Evaluating on 7138 manually annotated speeches


## Requirements

### Installs

In [None]:
!pip install torch transformers datasets

Collecting datasets
  Downloading datasets-3.5.0-py3-none-any.whl.metadata (19 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting 

### General Requirements

In [None]:
import torch
from datasets import load_dataset, Dataset, ClassLabel
from transformers import (
    RobertaTokenizerFast,
    RobertaForSequenceClassification,
    TrainingArguments,
    Trainer,
    AutoConfig,
    EarlyStoppingCallback,
)
from huggingface_hub import HfFolder, notebook_login
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, precision_recall_fscore_support
from sklearn.utils.class_weight import compute_class_weight
import torch.nn as nn
import os
from torch.optim import AdamW
from torch.utils.data import DataLoader

KeyboardInterrupt: 

In [None]:
os.environ["WANDB_DISABLED"] = "true"

## Loading UNSCUkrArg Dataset

In [None]:
# Loading dataset
arg_sch_data = pd.read_csv("/content/UNSCUkrArg.csv")

In [None]:
arg_sch_data['argumentation scheme'].value_counts()

Unnamed: 0_level_0,count
argumentation scheme,Unnamed: 1_level_1
evidence,346
negative_value,149
negative_consequence,142
positive_consequence,87
positive_value,65
rules,12
distress,2
bias,1
ad_hominem,1
threat,1


In [None]:
# Test set from 7138 speeches (3 first speeches manually annotated)
speeches_7138 = pd.read_csv("/content/premise_claim_7138_relationsChat.csv")

In [None]:
speeches_7138  = speeches_7138[speeches_7138['Argument'] != 'non_arg']

In [None]:
speeches_7138

Unnamed: 0,file_name,premise,claim,similarity,relation,Argument,argumentation_scheme
0,UNSC_2014_SPV.7138_spch006_sentsplit_United_Ki...,It was about sending a clear signal that holdi...,The draft resolution (S/2014/l89) was designed...,0.626599,support,arg,missing
3,UNSC_2014_SPV.7138_spch006_sentsplit_United_Ki...,It was about sending a clear signal that holdi...,If Russia fails to respond to Ukraine's outstr...,0.532449,support,arg,missing
4,UNSC_2014_SPV.7138_spch006_sentsplit_United_Ki...,It was about sending a clear signal that holdi...,We ask Russia to hear the collective voice of ...,0.567920,support,arg,missing
5,UNSC_2014_SPV.7138_spch006_sentsplit_United_Ki...,Russia alone backs the referendum.,The resounding message from today's vote is th...,0.628725,support,arg,missing
6,UNSC_2014_SPV.7138_spch006_sentsplit_United_Ki...,Russia alone backs the referendum.,Russia alone is prepared to violate internatio...,0.633112,support,arg,missing
...,...,...,...,...,...,...,...
386,UNSC_2014_SPV.7138_spch005_sentsplit_France,"After all, Crimea was Russian for 170 years bu...",To accept the annexation of Crimea would be to...,,,arg,negative_consequence
387,UNSC_2014_SPV.7138_spch005_sentsplit_France,The annexation goes beyond Ukraine.,The violation of international law is so obvio...,,,arg,negative_value
388,UNSC_2014_SPV.7138_spch005_sentsplit_France,The annexation goes beyond Ukraine.,"Basically, it is simple: the Russian veto toda...",,,arg,negative_value
389,UNSC_2014_SPV.7138_spch005_sentsplit_France,The annexation goes beyond Ukraine.,To accept the annexation of Crimea would be to...,,,arg,negative_consequence


In [None]:
speeches_7138['argumentation_scheme'].value_counts()

Unnamed: 0_level_0,count
argumentation_scheme,Unnamed: 1_level_1
missing,220
evidence,23
negative_value,8
negative_consequence,5
rules,4


## Transforming data premise-claims relations

In [None]:
# Combine premise and claim
arg_sch_data["text"] = arg_sch_data["premise"] + " [SEP] " + arg_sch_data["claim"]
speeches_7138["text"] = speeches_7138["premise"] + " [SEP] " + speeches_7138["claim"]

In [None]:
#preds_7138["text"] = preds_7138["premise"] + " [SEP] " + preds_7138["claim"]

In [None]:
label_mapping = {"evidence": 0, "negative_value": 1, "negative_consequence": 2, "positive_consequence": 3, "positive_value": 4,
                 "rules": 5, "distress": 6, "bias": 7, "ad_hominem": 8, "threat": 9}

arg_sch_data["label"] = arg_sch_data['argumentation scheme'].map(label_mapping)
speeches_7138["label"] = speeches_7138['argumentation_scheme'].map(label_mapping)

In [None]:
print('Arg complete size: ', arg_sch_data.shape)

Arg complete size:  (806, 10)


In [None]:
# Splitting dataset
# Split the DataFrame into 80% training and 20% validation
train_df, val_df = train_test_split(arg_sch_data, test_size=0.2, random_state=42)

# Display the shape of the splits
print(f'Training Set: {train_df.shape}')
print(f'Validation Set: {val_df.shape}')

## Loading Model
* Fine-tuning model with UNSCUkrArg dataset with 5-folds

In [None]:
# Loading Tokenizer
model_name = 'roberta-base'
tokenizer = RobertaTokenizerFast.from_pretrained(model_name)


# Tokenization Function
def tokenize_function(example):
    return tokenizer(example['text'], padding='max_length', truncation=True, max_length=512)


k = 5
skf = StratifiedKFold(n_splits=k, shuffle=True, random_state=42)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

In [None]:
# Fine tuning with 5 fold cross validation
for fold, (train_idx, val_idx) in enumerate(skf.split(arg_sch_data, arg_sch_data['label'])):
    print(f"\n--- Fold {fold+1} ---")

    train_df = arg_sch_data.iloc[train_idx].reset_index(drop=True)
    val_df = arg_sch_data.iloc[val_idx].reset_index(drop=True)

    # Convert to HuggingFace Datasets
    train_dataset = Dataset.from_pandas(train_df)
    val_dataset = Dataset.from_pandas(val_df)

    # Tokenize
    train_dataset = train_dataset.map(tokenize_function, batched=True)
    val_dataset = val_dataset.map(tokenize_function, batched=True)

    # Remove unused columns
    #train_dataset = train_dataset.remove_columns(["text", "__index_level_0__"])
    #val_dataset = val_dataset.remove_columns(["text", "__index_level_0__"])

    # Set format for PyTorch
    train_dataset.set_format("torch")
    val_dataset.set_format("torch")

    # Load model
    model = RobertaForSequenceClassification.from_pretrained(model_name, num_labels=arg_sch_data['label'].nunique())


    # Define training arguments
    training_args = TrainingArguments(
        output_dir=f'./results_fold_{fold+1}',
        evaluation_strategy="epoch",
        save_strategy="epoch",
        learning_rate=2e-5,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        num_train_epochs=4,
        weight_decay=0.01,
        load_best_model_at_end=True,
        metric_for_best_model="accuracy",
        logging_strategy="epoch",
        logging_steps=10,
    )

    # Define Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        tokenizer=tokenizer,
        compute_metrics=lambda p: {
            "accuracy": (p.predictions.argmax(-1) == p.label_ids).mean()
        }
    )

    # Train and evaluate
    trainer.train()
    eval_results = trainer.evaluate()
    print(f"Fold {fold+1} evaluation:", eval_results)




--- Fold 1 ---




Map:   0%|          | 0/644 [00:00<?, ? examples/s]

Map:   0%|          | 0/162 [00:00<?, ? examples/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,1.6188,1.340105,0.5
2,1.1045,1.00676,0.635802
3,0.7739,0.97692,0.62963
4,0.542,0.903913,0.703704


Fold 1 evaluation: {'eval_loss': 0.9039127826690674, 'eval_accuracy': 0.7037037037037037, 'eval_runtime': 4.0859, 'eval_samples_per_second': 39.648, 'eval_steps_per_second': 5.14, 'epoch': 4.0}

--- Fold 2 ---


Map:   0%|          | 0/645 [00:00<?, ? examples/s]

Map:   0%|          | 0/161 [00:00<?, ? examples/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,1.6875,1.330575,0.47205


In [None]:
# Predicting
predictions_val = trainer.predict(val_dataset)
y_true = predictions_val.label_ids
y_pred = predictions_val.predictions.argmax(axis=1)

In [None]:
# Converting DataFrame into Hugging Face Dataset
train_dataset = Dataset.from_pandas(train_set_df[['text', 'label']])
test_dataset = Dataset.from_pandas(test_set_df[['text', 'label']])
test_7138_dataset = Dataset.from_pandas(test_7138[['text', 'label']])

In [None]:
pred_7138_set = Dataset.from_pandas(preds_7138[['text']])

In [None]:
pred_7138_set

Dataset({
    features: ['text'],
    num_rows: 377
})

## Spliting datasets

In [None]:
train_dataset

Dataset({
    features: ['text', 'label', '__index_level_0__'],
    num_rows: 644
})

In [None]:
test_dataset

Dataset({
    features: ['text', 'label', '__index_level_0__'],
    num_rows: 162
})

In [None]:
# Tokenization of Datasets
# Setting up device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Tokenizer and Model Initialization
tokenizer = RobertaTokenizerFast.from_pretrained('roberta-base')

# Function for tokenization (correctly return tokenized input_ids, attention_mask, and keep labels)
def tokenize_function(examples):
    return tokenizer(examples['text'], padding='max_length', truncation=True)

# Apply tokenization to datasets
tokenized_train_set = train_dataset.map(tokenize_function, batched=True)
tokenized_test_set = test_dataset.map(tokenize_function, batched=True)
tokenized_test_7138 = test_7138_dataset.map(tokenize_function, batched=True)

Map:   0%|          | 0/644 [00:00<?, ? examples/s]

Map:   0%|          | 0/162 [00:00<?, ? examples/s]

Map:   0%|          | 0/377 [00:00<?, ? examples/s]

Map:   0%|          | 0/40 [00:00<?, ? examples/s]

In [None]:
tokenized_pred_7138_set = pred_7138_set.map(tokenize_function, batched=True)

Map:   0%|          | 0/377 [00:00<?, ? examples/s]

In [None]:
# Load model
model = RobertaForSequenceClassification.from_pretrained("roberta-base", num_labels=10)

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
'''training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_dir="./logs",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    report_to="none",  # Disable wandb
)'''

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",  # Evaluate at the end of each epoch
    save_strategy="epoch",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=16,
    num_train_epochs=3,  # You can experiment with more epochs
    learning_rate=2e-5,  # Use a small learning rate
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
    save_total_limit=2,
    seed=42,
    report_to="none",
)

# Customizing the optimizer
optimizer = AdamW(model.parameters(), lr=2e-5)



In [None]:
# Define compute_metrics function
def compute_metrics(pred):
    labels = pred.label_ids
    preds = np.argmax(pred.predictions, axis=1)

    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')
    acc = accuracy_score(labels, preds)

    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }


In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_set,
    eval_dataset=tokenized_test_set,
    tokenizer=tokenizer,
    optimizers=(optimizer, None),        # passing the AdamW optimizer and using the default scheduler
    compute_metrics=compute_metrics
)

  trainer = Trainer(


In [None]:
# Train the model
trainer.train()

Epoch,Training Loss,Validation Loss
1,0.4211,0.720659
2,0.1625,0.803488
3,0.1444,0.654714


TrainOutput(global_step=243, training_loss=0.2691692034158196, metrics={'train_runtime': 228.336, 'train_samples_per_second': 8.461, 'train_steps_per_second': 1.064, 'total_flos': 508367071715328.0, 'train_loss': 0.2691692034158196, 'epoch': 3.0})

In [None]:
# Save the fine-tuned model
model.save_pretrained("./fine_tuned_roberta_arg_schemes_FV")
tokenizer.save_pretrained("./fine_tuned_roberta_arg_schemes_FV")

('./fine_tuned_roberta_arg_schemes_FV/tokenizer_config.json',
 './fine_tuned_roberta_arg_schemes_FV/special_tokens_map.json',
 './fine_tuned_roberta_arg_schemes_FV/vocab.json',
 './fine_tuned_roberta_arg_schemes_FV/merges.txt',
 './fine_tuned_roberta_arg_schemes_FV/added_tokens.json',
 './fine_tuned_roberta_arg_schemes_FV/tokenizer.json')

In [None]:
# Saving fine tuned model in Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!mkdir -p /content/drive/MyDrive/ADD PATH # ADD PATH
!cp -r ./fine_tuned_roberta_arg_schemes_FV/* /content/drive/MyDrive/ADD PATH # ADD PATH

## Testing with 7138 test set

In [None]:
# Test Loader
test_loader = DataLoader(tokenized_test_set, batch_size=16)

In [None]:
# Tokenizing Test Set
tokenized_test_set.set_format(type='torch', columns=['input_ids', 'attention_mask'])

In [None]:
# Function for predictions
def predict(model, dataloader):
    predictions = []
    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs.logits

            predictions.append(logits.argmax(dim=-1).cpu().numpy())

    return predictions


In [None]:
# Getting predictions
predictions = predict(model, test_loader)

In [None]:
# Converting predictions to flat list
predictions = [item for sublist in predictions for item in sublist]

In [None]:
# Adding prections to test set Dataframe
test_set_df['Predicted_Label'] = predictions

In [None]:
test_set_df

Unnamed: 0,fileid,filename,speaker,speech,premise,claim,argumentation scheme,argument target,text,label,Predicted_Label
699,UNSC_2014_SPV.7165_spch008,UNSC_2014_SPV.7165_spch008_sentsplit_United_St...,United States of America,8,"Also in Donetsk, pro-Russian groups continue t...",There we have seen a sharp deterioration in la...,negative_value,Russian Federation,"Also in Donetsk, pro-Russian groups continue t...",1,1
296,UNSC_2014_SPV.7154_spch019,UNSC_2014_SPV.7154_spch019_sentsplit_Ukraine.txt,Ukraine,19,There is substantial video evidence of armed a...,they are professional special forces appropria...,evidence,Russian Federation,There is substantial video evidence of armed a...,0,0
227,UNSC_2014_SPV.7219_spch006,UNSC_2014_SPV.7219_spch006_sentsplit_United_St...,United States of America,6,The flight was transmiting its assigned transp...,"We assess that Malaysia Airlines Flight 17, ca...",evidence,Other,The flight was transmiting its assigned transp...,0,0
336,UNSC_2014_SPV.7154_spch019,UNSC_2014_SPV.7154_spch019_sentsplit_Ukraine.txt,Ukraine,19,"distorting the truth about Ukraine,","Apparently, through these attempts, they hope ...",negative_consequence,Russian Federation,"distorting the truth about Ukraine, [SEP] Appa...",2,2
538,UNSC_2014_SPV.7154_spch005,UNSC_2014_SPV.7154_spch005_sentsplit_Lithuania...,Lithuania,5,"Again, considerable Russian troops are massed ...",The scenario is dangerously familiar.,evidence,Russian Federation,"Again, considerable Russian troops are massed ...",0,0
...,...,...,...,...,...,...,...,...,...,...,...
670,UNSC_2014_SPV.7165_spch011,UNSC_2014_SPV.7165_spch011_sentsplit_Luxembour...,Luxembourg,11,The alternative to dialogue and to de-escalati...,The Geneva declaration of 17 April cannot rema...,negative_consequence,Other,The alternative to dialogue and to de-escalati...,2,2
675,UNSC_2014_SPV.7219_spch014,UNSC_2014_SPV.7219_spch014_sentsplit_Nigeria.txt,Nigeria,14,it will mark a new dimension in the Ukraine cr...,it is of critical importance for the facts sur...,negative_consequence,Other,it will mark a new dimension in the Ukraine cr...,2,1
754,UNSC_2014_SPV.7219_spch011,UNSC_2014_SPV.7219_spch011_sentsplit_France.txt,France,11,"Moreover, this unprecedented deterioration is ...",The question of outside support in terms of re...,evidence,Russian Federation,"Moreover, this unprecedented deterioration is ...",0,0
752,UNSC_2014_SPV.7219_spch011,UNSC_2014_SPV.7219_spch011_sentsplit_France.txt,France,11,"Moreover, this unprecedented deterioration is ...",It is irrefutable.,evidence,Russian Federation,"Moreover, this unprecedented deterioration is ...",0,0


In [None]:
# Getting true labels and predicted labels form test dataframe
y_true = test_set_df['label']
y_pred = test_set_df['Predicted_Label']

In [None]:
# Calculating metrics
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average="macro")  # assuming 'Premise'=1 is the positive class
recall = recall_score(y_true, y_pred, average="macro")
f1 = f1_score(y_true, y_pred, average="macro")
conf_matrix = confusion_matrix(y_true, y_pred)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
# Printing the metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("Confusion Matrix:\n", conf_matrix)
print("\nClassification Report:\n", classification_report(y_true, y_pred))

Accuracy: 0.8271604938271605
Precision: 0.5674162257495591
Recall: 0.5586765059028928
F1 Score: 0.555183539437264
Confusion Matrix:
 [[74  1  2  0  0  0  0]
 [ 4 20  5  0  0  0  0]
 [ 2  1 20  0  0  0  0]
 [ 0  0  4  8  3  0  0]
 [ 0  0  0  2 12  0  0]
 [ 0  2  1  0  0  0  0]
 [ 1  0  0  0  0  0  0]]

Classification Report:
               precision    recall  f1-score   support

           0       0.91      0.96      0.94        77
           1       0.83      0.69      0.75        29
           2       0.62      0.87      0.73        23
           3       0.80      0.53      0.64        15
           4       0.80      0.86      0.83        14
           5       0.00      0.00      0.00         3
           9       0.00      0.00      0.00         1

    accuracy                           0.83       162
   macro avg       0.57      0.56      0.56       162
weighted avg       0.82      0.83      0.81       162



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# Predicting on Debate 7138

In [None]:
#test_loader = DataLoader(tokenized_test_set, batch_size=16)
tokenized_pred_7138_set.set_format(type='torch', columns=['input_ids', 'attention_mask'])

In [None]:
tokenized_pred_7138_set

Dataset({
    features: ['text', 'input_ids', 'attention_mask'],
    num_rows: 377
})

In [None]:
# Dada Loader
loader_pred_7138_set = DataLoader(tokenized_pred_7138_set, batch_size=16)

In [None]:
tokenized_pred_7138_set

Dataset({
    features: ['text', 'input_ids', 'attention_mask'],
    num_rows: 377
})

In [None]:
# Getting predictions
predictions_unsc7138 = predict(model, loader_pred_7138_set)

In [None]:
# Converting predictions to flat list
predictions_unsc7138_list = [item for sublist in predictions_unsc7138 for item in sublist]

In [None]:
# Adding prections to test set Dataframe
preds_7138['Predicted_Label'] = predictions_unsc7138_list

In [None]:
preds_7138['Predicted_Label'].unique()

array([1, 4, 0, 3, 2])

In [None]:
# Saving results
preds_7138.to_csv("predictions_RoBERTa_7138_speeches.csv")

# Evaluating on manually annotated sample of 7138 speeches

In [None]:
preds_7138_test_copy = preds_7138[preds_7138['file_name'].isin([
    'UNSC_2014_SPV.7138_spch002_sentsplit_Russian_Federation',
    'UNSC_2014_SPV.7138_spch004_sentsplit_United_States_Of_America',
    'UNSC_2014_SPV.7138_spch005_sentsplit_France'
])].copy()
preds_7138_test_copy

Unnamed: 0,file_name,premise,claim,similarity,relation,text,Predicted_Label
0,UNSC_2014_SPV.7138_spch002_sentsplit_Russian_F...,That principle is confirmed in the 1970 Declar...,The philosophy of the sponsors of the draft re...,0.550103,support,That principle is confirmed in the 1970 Declar...,1
1,UNSC_2014_SPV.7138_spch002_sentsplit_Russian_F...,"With respect to Crimea, that case resulted fro...","Many times, we have put forth in this Chamber ...",0.500157,support,"With respect to Crimea, that case resulted fro...",1
2,UNSC_2014_SPV.7138_spch002_sentsplit_Russian_F...,"With respect to Crimea, that case resulted fro...","We cannot go along with its basic assumption, ...",0.549051,support,"With respect to Crimea, that case resulted fro...",1
3,UNSC_2014_SPV.7138_spch002_sentsplit_Russian_F...,"With respect to Crimea, that case resulted fro...","The political, legal and historic backdrop of ...",0.661576,support,"With respect to Crimea, that case resulted fro...",1
4,UNSC_2014_SPV.7138_spch002_sentsplit_Russian_F...,"With respect to Crimea, that case resulted fro...",The View of the people of Crimea was once agai...,0.619084,support,"With respect to Crimea, that case resulted fro...",1
...,...,...,...,...,...,...,...
102,UNSC_2014_SPV.7138_spch005_sentsplit_France,The annexation goes beyond Ukraine.,To accept the annexation of Crimea would be to...,0.672179,support,The annexation goes beyond Ukraine. [SEP] To a...,1
103,UNSC_2014_SPV.7138_spch005_sentsplit_France,The annexation goes beyond Ukraine.,"The vast majority of Member States will prove,...",0.645621,support,The annexation goes beyond Ukraine. [SEP] The ...,1
104,UNSC_2014_SPV.7138_spch005_sentsplit_France,The annexation goes beyond Ukraine.,The veto should be a defeat for Russia alone.,0.533680,support,The annexation goes beyond Ukraine. [SEP] The ...,2
105,UNSC_2014_SPV.7138_spch005_sentsplit_France,The annexation goes beyond Ukraine.,We must remain steadfast in our commitment to ...,0.500540,support,The annexation goes beyond Ukraine. [SEP] We m...,3


In [None]:
evaluation7138 = pd.read_csv("/content/RoBERTa_predictions_7138_test_evaluation.csv")

In [None]:
evaluation7138

Unnamed: 0,file_name,premise,claim,similarity,relation,text,Predicted_Label,argumentation scheme,label
0,UNSC_2014_SPV.7138_spch002_sentsplit_Russian_F...,That principle is confirmed in the 1970 Declar...,The philosophy of the sponsors of the draft re...,0.550103,support,That principle is confirmed in the 1970 Declar...,1,rules,5
1,UNSC_2014_SPV.7138_spch002_sentsplit_Russian_F...,"With respect to Crimea, that case resulted fro...","We cannot go along with its basic assumption, ...",0.549051,support,"With respect to Crimea, that case resulted fro...",1,evidence,0
2,UNSC_2014_SPV.7138_spch002_sentsplit_Russian_F...,It should also be noted that generally agreed ...,The philosophy of the sponsors of the draft re...,0.51849,support,It should also be noted that generally agreed ...,1,rules,5
3,UNSC_2014_SPV.7138_spch002_sentsplit_Russian_F...,It was given to Ukraine in violation of the no...,"We cannot go along with its basic assumption, ...",0.523418,support,It was given to Ukraine in violation of the no...,1,evidence,0
4,UNSC_2014_SPV.7138_spch002_sentsplit_Russian_F...,It was given to Ukraine in violation of the no...,The View of the people of Crimea was once agai...,0.660728,support,It was given to Ukraine in violation of the no...,1,evidence,0
5,UNSC_2014_SPV.7138_spch002_sentsplit_Russian_F...,"In January 1991, a referendum was conducted in...","We cannot go along with its basic assumption, ...",0.577526,support,"In January 1991, a referendum was conducted in...",1,evidence,0
6,UNSC_2014_SPV.7138_spch002_sentsplit_Russian_F...,"In September 1991, the High Council of Crimea ...","We cannot go along with its basic assumption, ...",0.581408,support,"In September 1991, the High Council of Crimea ...",1,evidence,0
7,UNSC_2014_SPV.7138_spch002_sentsplit_Russian_F...,"In 1992, the constitution of Crimea was adopte...","We cannot go along with its basic assumption, ...",0.543508,support,"In 1992, the constitution of Crimea was adopte...",1,evidence,0
8,UNSC_2014_SPV.7138_spch002_sentsplit_Russian_F...,"However, in 1995, through a decision of the Uk...","We cannot go along with its basic assumption, ...",0.608566,support,"However, in 1995, through a decision of the Uk...",1,negative_consequence,2
9,UNSC_2014_SPV.7138_spch002_sentsplit_Russian_F...,"However, in 1995, through a decision of the Uk...",The View of the people of Crimea was once agai...,0.607235,support,"However, in 1995, through a decision of the Uk...",0,evidence,0


In [None]:
# Getting true labels and predicted labels form test dataframe
y_true_7138 = evaluation7138['label']
y_pred_7138 = evaluation7138['Predicted_Label']

In [None]:

# Calculating metrics
accuracy_7138 = accuracy_score(y_true_7138, y_pred_7138)
precision_7138 = precision_score(y_true_7138, y_pred_7138, average="macro")  # assuming 'Premise'=1 is the positive class
recall_7138 = recall_score(y_true_7138, y_pred_7138, average="macro")
f1_7138 = f1_score(y_true_7138, y_pred_7138, average="macro")
conf_matrix_7138 = confusion_matrix(y_true_7138, y_pred_7138)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
# Printing the metrics
print("Accuracy:", accuracy_7138)
print("Precision:", precision_7138)
print("Recall:", recall_7138)
print("F1 Score:", f1_7138)
print("Confusion Matrix:\n", conf_matrix_7138)
print("\nClassification Report:\n", classification_report(y_true_7138, y_pred_7138))

Accuracy: 0.225
Precision: 0.15636363636363634
Recall: 0.17608695652173914
F1 Score: 0.1013937282229965
Confusion Matrix:
 [[ 3 20  0  0  0]
 [ 2  6  0  0  0]
 [ 0  3  0  2  0]
 [ 0  0  0  0  0]
 [ 0  4  0  0  0]]

Classification Report:
               precision    recall  f1-score   support

           0       0.60      0.13      0.21        23
           1       0.18      0.75      0.29         8
           2       0.00      0.00      0.00         5
           3       0.00      0.00      0.00         0
           5       0.00      0.00      0.00         4

    accuracy                           0.23        40
   macro avg       0.16      0.18      0.10        40
weighted avg       0.38      0.23      0.18        40



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


## Evaluating on Augmented Evaluation Set

In [None]:
# loading data
data_25percent = pd.read_csv("/content/validation_set_debate7138.csv")

In [None]:
data_25percent

Unnamed: 0,file_name,premise,claim,similarity,relation,Argument,argumentation_scheme,text,label,predicted_label,argumentation_scheme_predicted
0,UNSC_2014_SPV.7138_spch006_sentsplit_United_Ki...,It was about sending a clear signal that holdi...,If Russia fails to respond to Ukraine's outstr...,0.532449,support,arg,negative_consequence,It was about sending a clear signal that holdi...,2,2,negative_consequence
1,UNSC_2014_SPV.7138_spch006_sentsplit_United_Ki...,Russia alone backs the referendum.,Russia alone is prepared to violate internatio...,0.633112,support,arg,rules,Russia alone backs the referendum. [SEP] Russi...,5,2,negative_consequence
2,UNSC_2014_SPV.7138_spch006_sentsplit_United_Ki...,The ball is now firmly in Russia's court.,If Russia fails to respond to Ukraine's outstr...,0.558896,support,arg,negative_consequence,The ball is now firmly in Russia's court. [SEP...,2,2,negative_consequence
3,UNSC_2014_SPV.7138_spch007_sentsplit_Lithuania,Our Russian colleague has spoken about self-de...,We also urge Russia to reaffirm its treaty obl...,0.572551,support,arg,positive_consequence,Our Russian colleague has spoken about self-de...,3,3,positive_consequence
4,UNSC_2014_SPV.7138_spch007_sentsplit_Lithuania,It is in that context that the Council has vot...,"Tomorrow a farce of a referendum will be held,...",0.635068,support,arg,negative_value,It is in that context that the Council has vot...,1,1,negative_value
...,...,...,...,...,...,...,...,...,...,...,...
59,UNSC_2014_SPV.7138_spch005_sentsplit_France,"After all, Crimea was Russian for 170 years bu...",To accept the annexation of Crimea would be to...,,,arg,negative_consequence,"After all, Crimea was Russian for 170 years bu...",2,2,negative_consequence
60,UNSC_2014_SPV.7138_spch005_sentsplit_France,The annexation goes beyond Ukraine.,The violation of international law is so obvio...,,,arg,negative_value,The annexation goes beyond Ukraine. [SEP] The ...,1,1,negative_value
61,UNSC_2014_SPV.7138_spch005_sentsplit_France,The annexation goes beyond Ukraine.,"Basically, it is simple: the Russian veto toda...",,,arg,negative_value,The annexation goes beyond Ukraine. [SEP] Basi...,1,0,evidence
62,UNSC_2014_SPV.7138_spch005_sentsplit_France,The annexation goes beyond Ukraine.,To accept the annexation of Crimea would be to...,,,arg,negative_consequence,The annexation goes beyond Ukraine. [SEP] To a...,2,2,negative_consequence


In [None]:
# Getting true labels and predicted labels form test dataframe
y_true = data_25percent['label']
y_pred  = data_25percent['predicted_label']

In [None]:
# Calculating metrics
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average="macro")  # assuming 'Premise'=1 is the positive class
recall = recall_score(y_true, y_pred, average="macro")
f1 = f1_score(y_true, y_pred, average="macro")
conf_matrix = confusion_matrix(y_true, y_pred)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
# Printing the metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("Confusion Matrix:\n", conf_matrix)
print("\nClassification Report:\n", classification_report(y_true, y_pred))

Accuracy: 0.375
Precision: 0.3513888888888889
Recall: 0.43543823326432013
F1 Score: 0.36694811896050283
Confusion Matrix:
 [[ 5 14  3  0  1  0]
 [ 4  5  0  2  3  0]
 [ 0  2  6  0  1  0]
 [ 0  0  0  4  1  0]
 [ 0  0  0  3  4  0]
 [ 1  3  1  1  0  0]]

Classification Report:
               precision    recall  f1-score   support

           0       0.50      0.22      0.30        23
           1       0.21      0.36      0.26        14
           2       0.60      0.67      0.63         9
           3       0.40      0.80      0.53         5
           4       0.40      0.57      0.47         7
           5       0.00      0.00      0.00         6

    accuracy                           0.38        64
   macro avg       0.35      0.44      0.37        64
weighted avg       0.38      0.38      0.35        64



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
