# Task 3- Premise-Claim Relation Extraction and Validation

## Requirements

In [None]:
import pandas as pd
import torch
from sentence_transformers import SentenceTransformer, util
from sklearn.metrics import classification_report

## Loading Data

In [None]:
# Data Loading
df = pd.read_excel("/content/Task2_results_Debate_7138.xlsx")

In [None]:
# Dropping unnecessary columns
df= df.drop(['label', 'Attitude Fragment', 'Predicted Argument or Not', 'Predicted Argument', 'Predicted_Labels'], axis=1)
df

Unnamed: 0,File_name,Speech,claim_premise_predictions
0,UNSC_2014_SPV.7138_spch020_sentsplit_Russian_F...,Some colleagues today have achieved high level...,Premise
1,UNSC_2014_SPV.7138_spch020_sentsplit_Russian_F...,I must mention that the Ukrainian colleague ne...,Claim
2,UNSC_2014_SPV.7138_spch020_sentsplit_Russian_F...,"Ifone is to speak ofblood, indeed the blood is...",Premise
3,UNSC_2014_SPV.7138_spch020_sentsplit_Russian_F...,Colleagues that have taken the floor discussed...,Premise
4,UNSC_2014_SPV.7138_spch020_sentsplit_Russian_F...,I will not comment on everything heard today -...,Claim
...,...,...,...
310,UNSC_2014_SPV.7138_spch014_sentsplit_Nigeria,Our willing submission to the rule of the Inte...,Claim
311,UNSC_2014_SPV.7138_spch014_sentsplit_Nigeria,Nigeria is vehemently opposed to unilateral se...,Claim
312,UNSC_2014_SPV.7138_spch014_sentsplit_Nigeria,"Standing on that fundamental principle, Nigeri...",Claim
313,UNSC_2014_SPV.7138_spch014_sentsplit_Nigeria,It is therefore needless to restate that the i...,Claim


In [None]:
# Grouping by File_name to compare within the same document
grouped = df.groupby("File_name")

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7e2ead6604d0>

## Loading Multi-QA Sentence Transformer Model

In [None]:
# Load sentence transformer model
model = SentenceTransformer('sentence-transformers/multi-qa-mpnet-base-dot-v1')

modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/212 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/8.71k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
# Extracting relations based on similarity scores with Sentence Transformer
relations_2 = []

for file_name, group in grouped:
    premises = group[group["claim_premise_predictions"] == "Premise"]
    claims = group[group["claim_premise_predictions"] == "Claim"]

    if premises.empty or claims.empty:
        continue  # Skip if no premises or claims

    # Encode sentences using SBERT
    premise_embeddings = model.encode(premises["Speech"].tolist(), convert_to_tensor=True)
    claim_embeddings = model.encode(claims["Speech"].tolist(), convert_to_tensor=True)

    # Compute cosine similarity between each premise and claim
    similarity_matrix = util.pytorch_cos_sim(premise_embeddings, claim_embeddings)

    # Determine relations
    for i, premise_text in enumerate(premises["Speech"]):
        for j, claim_text in enumerate(claims["Speech"]):
            similarity = similarity_matrix[i][j].item()  # Get similarity score

            relation_type = "support" if similarity > 0.5 else "attack"
            relations_2.append((file_name, premise_text, claim_text, similarity, relation_type))

# Convert to DataFrame
relations_df_model = pd.DataFrame(relations_2, columns=["File_name", "Premise", "Claim", "Similarity", "Relation"])


In [None]:
relations_df_model

Unnamed: 0,File_name,Premise,Claim,Similarity,Relation
0,UNSC_2014_SPV.7138_spch002_sentsplit_Russian_F...,That principle is confirmed in the 1970 Declar...,"Many times, we have put forth in this Chamber ...",0.300350,attack
1,UNSC_2014_SPV.7138_spch002_sentsplit_Russian_F...,That principle is confirmed in the 1970 Declar...,That general context is important to understan...,0.419197,attack
2,UNSC_2014_SPV.7138_spch002_sentsplit_Russian_F...,That principle is confirmed in the 1970 Declar...,It is a secret to no one that the Russian Fede...,0.351075,attack
3,UNSC_2014_SPV.7138_spch002_sentsplit_Russian_F...,That principle is confirmed in the 1970 Declar...,"We cannot go along with its basic assumption, ...",0.336189,attack
4,UNSC_2014_SPV.7138_spch002_sentsplit_Russian_F...,That principle is confirmed in the 1970 Declar...,The philosophy of the sponsors of the draft re...,0.550103,support
...,...,...,...,...,...
1667,UNSC_2014_SPV.7138_spch020_sentsplit_Russian_F...,"To conclude, one of our colleagues said that K...",That does not apply to us; it is a phrase take...,0.413647,attack
1668,UNSC_2014_SPV.7138_spch020_sentsplit_Russian_F...,"To conclude, one of our colleagues said that K...",Ms. Power spoke of truth.,0.361243,attack
1669,UNSC_2014_SPV.7138_spch020_sentsplit_Russian_F...,"To conclude, one of our colleagues said that K...",The world would be very interested to know if ...,0.513300,support
1670,UNSC_2014_SPV.7138_spch020_sentsplit_Russian_F...,"To conclude, one of our colleagues said that K...",But the problem is not with Moscow; it has to ...,0.667055,support


In [None]:
# Saving Similarity scores obtained with Sentence Transformer
relations_df_model.to_excel("Task3_Sent_Transformer_Multi-Qa_results.xlsx", index=False)

## Predicting similarity scores over UNSCUkrArg Dataset

In [None]:
data_unsc_annotated = pd.read_excel("/content/Task3 file evaluations UNSC speeches.xlsx")
data_unsc_annotated

Unnamed: 0,filename,Premise,Claim,Relation
0,UNSC_2014_SPV.7165_spch016_sentsplit_Lithuania...,adding to the growing list of breaches of Russ...,the Geneva statement is just another document ...,support
1,UNSC_2014_SPV.7165_spch016_sentsplit_Lithuania...,Barricades in Maidan Square are being dismantled.,"Despite growing provocations, the Ukrainian au...",support
2,UNSC_2014_SPV.7165_spch016_sentsplit_Lithuania...,I recall my French colleague referring a numbe...,"Despite growing provocations, the Ukrainian au...",support
3,UNSC_2014_SPV.7165_spch016_sentsplit_Lithuania...,Users of those will see that there is a signif...,"Despite growing provocations, the Ukrainian au...",support
4,UNSC_2014_SPV.7165_spch016_sentsplit_Lithuania...,barricades are being dismantled.,"Despite growing provocations, the Ukrainian au...",support
...,...,...,...,...
803,UNSC_2014_SPV.7154_spch021_sentsplit_Russian_F...,That would have extremely significant conseque...,Let us ensure that we do not permit the Ukrain...,support
804,UNSC_2014_SPV.7154_spch021_sentsplit_Russian_F...,That would have extremely significant conseque...,That needs to be avoided.,support
805,UNSC_2014_SPV.7154_spch012_sentsplit_China.txt,Settling the question of Ukraine involves the ...,It should be considered in a balanced manner.,support
806,UNSC_2014_SPV.7154_spch012_sentsplit_China.txt,in order to narrow differences and settle the ...,Political and diplomatic channels should conti...,support


In [None]:
# Encode premises and claims using SBERT
premise_embeddings = model_2.encode(data_unsc_annotated["Premise"].tolist(), convert_to_tensor=True)
claim_embeddings = model_2.encode(data_unsc_annotated["Claim"].tolist(), convert_to_tensor=True)

# Compute cosine similarity between Premise-Claim pairs
similarity_scores = util.pytorch_cos_sim(premise_embeddings, claim_embeddings).diagonal().tolist()

# Classify relations based on similarity threshold
data_unsc_annotated["Predicted_Relation"] = ["support" if sim > 0.5 else "attack" for sim in similarity_scores]
data_unsc_annotated["Similarity"] = similarity_scores  # Store similarity for analysis

# Evaluate against gold standard Relation column
print(classification_report(data_unsc_annotated["Relation"].str.lower(), data_unsc_annotated["Predicted_Relation"], digits=4))


              precision    recall  f1-score   support

      attack     0.0000    0.0000    0.0000         0
     support     1.0000    0.4394    0.6105       808

    accuracy                         0.4394       808
   macro avg     0.5000    0.2197    0.3052       808
weighted avg     1.0000    0.4394    0.6105       808



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
data_unsc_annotated.to_excel("predicted_relations_unscannotations_senttransformer.xlsx")