In [1]:
!nvidia-smi

Mon Jan  3 15:52:29 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.44       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   33C    P8     9W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

# Create Datasets

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Specify path to the data folder from the argminint-21-keypoint-analysis-sahredtask-code repository:

In [3]:
cd /content/drive/MyDrive/argmining-21-keypoint-analysis-sharedtask-code-master/data

/content/drive/MyDrive/argmining-21-keypoint-analysis-sharedtask-code-master/data


In [4]:
import pandas as pd
import numpy as np
import json

from sklearn.model_selection import train_test_split

kp_df     = pd.read_csv('../KPA_2021_shared_task/kpm_data/key_points_train.csv')
arg_df    = pd.read_csv('../KPA_2021_shared_task/kpm_data/arguments_train.csv')
labels_df = pd.read_csv('../KPA_2021_shared_task/kpm_data/labels_train.csv')

all_df = labels_df.merge(arg_df, how='inner', left_on='arg_id', right_on='arg_id')
all_df = all_df.merge(kp_df[['key_point_id', 'key_point']], how='inner', left_on='key_point_id', right_on='key_point_id')

all_df.to_csv('../data/training_df.csv')

In [5]:
kp_df     = pd.read_csv('../KPA_2021_shared_task/kpm_data/key_points_dev.csv')
arg_df    = pd.read_csv('../KPA_2021_shared_task/kpm_data/arguments_dev.csv')
labels_df = pd.read_csv('../KPA_2021_shared_task/kpm_data/labels_dev.csv')

all_df = labels_df.merge(arg_df, how='inner', left_on='arg_id', right_on='arg_id')
all_df = all_df.merge(kp_df[['key_point_id', 'key_point']], how='inner', left_on='key_point_id', right_on='key_point_id')

all_df.to_csv('../data/valid_df.csv')

In [6]:
dev_df = pd.read_csv('../data/valid_df.csv')
argument_ids = list(set(dev_df.arg_id.tolist()))
test_arg_ids, valid_arg_ids = train_test_split(argument_ids, test_size=0.7)
test_df  = dev_df[dev_df.arg_id.isin(test_arg_ids)]
valid_df = dev_df[dev_df.arg_id.isin(valid_arg_ids)]
valid_df.to_csv('../data/our_valid.csv')
test_df.to_csv('../data/our_test.csv')

Prepare Dataset for Contrastive Loss

In [7]:
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

pd.set_option('display.max_colwidth', None)

In [8]:
training_df = pd.read_csv('../data/training_df.csv')
valid_df = pd.read_csv('../data/our_valid.csv')

In [9]:
df = training_df.copy()
df['keypoint'] = df.apply(lambda x: x['topic'] + ' <SEP> ' + x['key_point'], axis=1)
df['label'] = df.label.apply(lambda x: int(x))
df[['argument', 'keypoint', 'label']].to_csv('training_df_contrastive.csv')

df = valid_df.copy()
df['keypoint'] = df.apply(lambda x: x['topic'] + ' <SEP> ' + x['key_point'], axis=1)
df['label'] = df.label.apply(lambda x: int(x))
df[['argument', 'keypoint', 'label']].to_csv('valid_df_contrastive.csv')

# Build Model Architecture

In [10]:
pip install sentence_transformers

Collecting sentence_transformers
  Downloading sentence-transformers-2.1.0.tar.gz (78 kB)
[K     |████████████████████████████████| 78 kB 4.9 MB/s 
[?25hCollecting transformers<5.0.0,>=4.6.0
  Downloading transformers-4.15.0-py3-none-any.whl (3.4 MB)
[K     |████████████████████████████████| 3.4 MB 14.5 MB/s 
[?25hCollecting tokenizers>=0.10.3
  Downloading tokenizers-0.11.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.8 MB)
[K     |████████████████████████████████| 6.8 MB 69.8 MB/s 
Collecting sentencepiece
  Downloading sentencepiece-0.1.96-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[K     |████████████████████████████████| 1.2 MB 61.5 MB/s 
[?25hCollecting huggingface-hub
  Downloading huggingface_hub-0.2.1-py3-none-any.whl (61 kB)
[K     |████████████████████████████████| 61 kB 626 kB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.46-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 53.4 MB/s 
Collecti

In [11]:
from sentence_transformers.evaluation import SentenceEvaluator, SimilarityFunction
from sentence_transformers import util
import logging
import os
import csv
from sklearn.metrics.pairwise import paired_cosine_distances, paired_euclidean_distances, paired_manhattan_distances
from typing import List


logger = logging.getLogger(__name__)

##### METHODS FROM THE KPA EVALUATION FILE ##########

import sys
import pandas as pd
from sklearn.metrics import precision_recall_curve, average_precision_score
import numpy as np
import os
import json

def get_ap(df, label_column, top_percentile=0.5):
    top = int(len(df)*top_percentile)
    df = df.sort_values('score', ascending=False).head(top)
    # after selecting top percentile candidates, we set the score for the dummy kp to 1, to prevent it from increasing the precision.
    df.loc[df['key_point_id'] == "dummy_id", 'score'] = 0.99
    return average_precision_score(y_true=df[label_column], y_score=df["score"])

def calc_mean_average_precision(df, label_column):
    precisions = [get_ap(group, label_column) for _, group in df.groupby(["topic", "stance"])]
    return np.mean(precisions)

def evaluate_predictions(merged_df):
    mAP_strict = calc_mean_average_precision(merged_df, "label_strict")
    mAP_relaxed = calc_mean_average_precision(merged_df, "label_relaxed")
    return mAP_strict, mAP_relaxed

def load_kpm_data(gold_data_dir, subset):
    arguments_file = os.path.join(gold_data_dir, f"arguments_{subset}.csv")
    key_points_file = os.path.join(gold_data_dir, f"key_points_{subset}.csv")
    labels_file = os.path.join(gold_data_dir, f"labels_{subset}.csv")

    arguments_df = pd.read_csv(arguments_file)
    key_points_df = pd.read_csv(key_points_file)
    labels_file_df = pd.read_csv(labels_file)

    return arguments_df, key_points_df, labels_file_df


def get_predictions(preds, labels_df, arg_df):
    arg_df = arg_df[["arg_id", "topic", "stance"]]
    predictions_df = load_predictions(preds)
    #make sure each arg_id has a prediction
    predictions_df = pd.merge(arg_df, predictions_df, how="left", on="arg_id")

    #handle arguements with no matching key point
    predictions_df["key_point_id"] = predictions_df["key_point_id"].fillna("dummy_id")
    predictions_df["score"] = predictions_df["score"].fillna(0)

    #merge each argument with the gold labels
    merged_df = pd.merge(predictions_df, labels_df, how="left", on=["arg_id", "key_point_id"])

    merged_df.loc[merged_df['key_point_id'] == "dummy_id", 'label'] = 0
    merged_df["label_strict"] = merged_df["label"].fillna(0)
    merged_df["label_relaxed"] = merged_df["label"].fillna(1)
    return merged_df


"""
this method chooses the best key point for each argument
and generates a dataframe with the matches and scores
"""
def load_predictions(predictions_dir):
    arg =[]
    kp = []
    scores = []
    # with open(predictions_dir, "r") as f_in:
    res = predictions_dir
    for arg_id, kps in res.items():
        best_kp = max(kps.items(), key=lambda x: x[1])
        arg.append(arg_id)
        kp.append(best_kp[0])
        scores.append(best_kp[1])
    print(f"loaded predictions for {len(arg)} arguments")
    return pd.DataFrame({"arg_id" : arg, "key_point_id": kp, "score": scores})


##### END OF METHODS FROM THE KPA EVALUATION FILE ##########

####### OUR METHODS #######

def match_argument_with_keypoints(result, kp_dict, arg_dict):
    
    for arg, arg_embedding in arg_dict.items():
        result[arg] = {}
        for kp, kp_embedding in kp_dict.items():
            result[arg][kp] = util.pytorch_cos_sim(arg_embedding, kp_embedding).item()
        
        #Applying softmax
        kp_scores = list(result[arg].items())
        kp_ids, kp_scores = zip(*kp_scores)
        #print(kp_ids)
        #print(kp_scores)
        #kp_scores = torch.softmax(torch.Tensor(kp_scores), 0).tolist()
        #print(kp_scores)
        result[arg] = {kp_id:score for kp_id, score in zip(kp_ids, kp_scores)}
        

    return result

def predict(model, argument_df, keypoint_df, output_path, append_topic=False):
    argument_keypoints = {}
    for topic in argument_df.topic.unique():
        for stance in [-1, 1]:
            topic_keypoints_ids = keypoint_df[(keypoint_df.topic==topic) & (keypoint_df.stance==stance)]['key_point_id'].tolist()
            topic_keypoints = keypoint_df[(keypoint_df.topic==topic) & (keypoint_df.stance==stance)]['key_point'].tolist()
            if append_topic:
                topic_keypoints = [topic + ' <SEP> ' + x for x in topic_keypoints]
                
            topic_keypoints_embeddings = model.encode(topic_keypoints)
            topic_kp_embed = dict(zip(topic_keypoints_ids, topic_keypoints_embeddings))

            topic_arguments_ids = argument_df[(argument_df.topic==topic) & (argument_df.stance==stance)]['arg_id'].tolist()
            topic_arguments = argument_df[(argument_df.topic==topic) & (argument_df.stance==stance)]['argument'].tolist()
            topic_arguments_embeddings = model.encode(topic_arguments)
            topic_arg_embed= dict(zip(topic_arguments_ids, topic_arguments_embeddings))

            argument_keypoints = match_argument_with_keypoints(argument_keypoints, topic_kp_embed, topic_arg_embed)
    
    json.dump(argument_keypoints, open(output_path, 'w'))
    
    return argument_keypoints


def perform_preds(model, arg_df, kp_df, append_topic):
    argument_keypoints = {}
    for topic in arg_df.topic.unique():
        for stance in [-1, 1]:
            topic_keypoints_ids = kp_df[(kp_df.topic==topic) & (kp_df.stance==stance)]['key_point_id'].tolist()
            topic_keypoints = kp_df[(kp_df.topic==topic) & (kp_df.stance==stance)]['key_point'].tolist()
            
            if append_topic:
                topic_keypoints = [topic + ' <SEP> ' + x for x in topic_keypoints]
                
            topic_keypoints_embeddings = model.encode(topic_keypoints, show_progress_bar=False)
            topic_kp_embed = dict(zip(topic_keypoints_ids, topic_keypoints_embeddings))

            topic_arguments_ids = arg_df[(arg_df.topic==topic)&(arg_df.stance==stance)]['arg_id'].tolist()
            topic_arguments = arg_df[(arg_df.topic==topic)&(arg_df.stance==stance)]['argument'].tolist()
            topic_arguments_embeddings = model.encode(topic_arguments, show_progress_bar=False)
            topic_arg_embed= dict(zip(topic_arguments_ids, topic_arguments_embeddings))

            argument_keypoints = match_argument_with_keypoints(argument_keypoints, topic_kp_embed, topic_arg_embed)

    return argument_keypoints


############################

class KeyPointEvaluator(SentenceEvaluator):
    """
    Evaluate a model based on a triplet: (sentence, positive_example, negative_example). Checks if distance(sentence,positive_example) < distance(sentence, negative_example).
    """
    def __init__(self, arg_df, kp_df, labels_df, append_topic, main_distance_function: SimilarityFunction = None, name: str = '', batch_size: int = 16, show_progress_bar: bool = False, write_csv: bool = True):
        """
        Constructs an evaluator based for the dataset


        :param dataloader:
            the data for the evaluation
        :param main_similarity:
            the similarity metric that will be used for the returned score

        """
        self.arg_df = arg_df
        self.kp_df = kp_df
        self.labels_df = labels_df
        self.name = name
        self.append_topic=append_topic
        self.main_distance_function = main_distance_function

        self.batch_size = batch_size
        if show_progress_bar is None:
            show_progress_bar = (logger.getEffectiveLevel() == logging.INFO or logger.getEffectiveLevel() == logging.DEBUG)
        self.show_progress_bar = show_progress_bar

        self.csv_file: str = "triplet_evaluation"+("_"+name if name else '')+"_results.csv"
        self.csv_headers = ["epoch", "steps", "mAP_relaxed", "mAP_strict"]
        self.write_csv = write_csv


    @classmethod
    def from_eval_data_path(cls, eval_data_path, subset_name, append_topic, **kwargs):
        arg_df, kp_df, labels_df = load_kpm_data(eval_data_path, subset=subset_name)
        return cls(arg_df, kp_df, labels_df, append_topic, **kwargs)

    def __call__(self, model, output_path: str = None, epoch: int = -1, steps: int = -1) -> float:
        if epoch != -1:
            if steps == -1:
                out_txt = " after epoch {}:".format(epoch)
            else:
                out_txt = " in epoch {} after {} steps:".format(epoch, steps)
        else:
            out_txt = ":"

        logger.info("TripletEvaluator: Evaluating the model on "+self.name+" dataset"+out_txt)

        
        #Perform prediction on the validation/test dataframes
        preds = perform_preds(model, self.arg_df, self.kp_df, self.append_topic)

        merged_df = get_predictions(preds, self.labels_df, self.arg_df)
        
        #Perform evaluation
        mAP_strict, mAP_relaxed = evaluate_predictions(merged_df)
        
        print(f"mAP strict= {mAP_strict} ; mAP relaxed = {mAP_relaxed}")
        
        logger.info("mAP strict:   \t{:.2f}".format(mAP_strict*100))
        logger.info("mAP relaxed:   \t{:.2f}".format(mAP_relaxed*100))
        
        if output_path is not None and self.write_csv:
            csv_path = os.path.join(output_path, self.csv_file)
            if not os.path.isfile(csv_path):
                with open(csv_path, mode="w", encoding="utf-8") as f:
                    writer = csv.writer(f)
                    writer.writerow(self.csv_headers)
                    writer.writerow([epoch, steps, mAP_relaxed, mAP_strict])

            else:
                with open(csv_path, mode="a", encoding="utf-8") as f:
                    writer = csv.writer(f)
                    writer.writerow([epoch, steps, mAP_relaxed, mAP_strict])


        return (mAP_strict + mAP_relaxed)/2





In [12]:
from enum import Enum
import torch.nn.functional as F
class SiameseDistanceMetric(Enum):
    """
    The metric for the contrastive loss
    """
    EUCLIDEAN = lambda x, y: F.pairwise_distance(x, y, p=2)
    MANHATTAN = lambda x, y: F.pairwise_distance(x, y, p=1)
    COSINE_DISTANCE = lambda x, y: 1-F.cosine_similarity(x, y)

In [13]:
from sentence_transformers import SentenceTransformer, InputExample, LoggingHandler, losses, models, util
from torch.utils.data import DataLoader
from sentence_transformers.evaluation import TripletEvaluator
from datetime import datetime
from zipfile import ZipFile

from sentence_transformers.datasets import SentenceLabelDataset
from sentence_transformers.datasets import NoDuplicatesDataLoader

import csv
import logging
import os
import sys

sys.path.insert(0, '../../src-py/')

def train_model(dataset_path, eval_data_path, subset_name, output_path, model_name, num_epochs=3, train_batch_size=16, model_suffix='', data_file_suffix='', max_seq_length=256, 
                add_special_token=False, loss='ContrastiveLoss', sentence_transformer=False):
    ### Configure sentence transformers for training and train on the provided dataset
    # Use Huggingface/transformers model (like BERT, RoBERTa, XLNet, XLM-R) for mapping tokens to embeddings
    output_path = output_path+model_name+ "-" + model_suffix + "-"+datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

    if sentence_transformer:
        word_embedding_model = SentenceTransformer(model_name)
        word_embedding_model.max_seq_length = max_seq_length
        
        if add_special_token:
            word_embedding_model.tokenizer.add_tokens(['<SEP>'], special_tokens=True)
            word_embedding_model.resize_token_embeddings(len(word_embedding_model.tokenizer))

    else:
        word_embedding_model = models.Transformer(model_name)
        word_embedding_model.max_seq_length = max_seq_length
    
        if add_special_token:
            word_embedding_model.tokenizer.add_tokens(['<SEP>'], special_tokens=True)
            word_embedding_model.auto_model.resize_token_embeddings(len(word_embedding_model.tokenizer))

    # Apply mean pooling to get one fixed sized sentence vector
    pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension(),
                                   pooling_mode_mean_tokens=True,
                                   pooling_mode_cls_token=False,
                                   pooling_mode_max_tokens=False)

    model = SentenceTransformer(modules=[word_embedding_model, pooling_model])


    logger.info("Read Triplet train dataset")
    train_examples = []
    with open(os.path.join(dataset_path, 'training_df_{}.csv'.format(data_file_suffix)), encoding="utf-8") as fIn:
        reader = csv.DictReader(fIn, delimiter=',', quoting=csv.QUOTE_MINIMAL)
        for row in reader:
            if loss == 'ContrastiveLoss':
                train_examples.append(InputExample(texts=[row['argument'], row['keypoint']], label=int(row['label'])))
            else:
                train_examples.append(InputExample(texts=[row['anchor'], row['pos'], row['neg']], label=0))



    if loss == 'MultipleNegativesRankingLoss':
        # Special data loader that avoid duplicates within a batch
        train_dataloader = NoDuplicatesDataLoader(train_examples, shuffle=False, batch_size=train_batch_size)
        # Our training loss
        train_loss = losses.MultipleNegativesRankingLoss(model)
    elif loss == 'ContrastiveLoss':
        train_dataloader = DataLoader(train_examples, shuffle=False, batch_size=train_batch_size)
        train_loss = losses.ContrastiveLoss(model=model)
    else:
        train_dataloader = DataLoader(train_examples, shuffle=False, batch_size=train_batch_size)
        train_loss = losses.TripletLoss(model)
    

    evaluator = KeyPointEvaluator.from_eval_data_path(eval_data_path, subset_name, add_special_token, name='dev', show_progress_bar=False)


    warmup_steps = int(len(train_dataloader) * num_epochs * 0.1) #10% of train data


    # Train the model
    model.fit(train_objectives=[(train_dataloader, train_loss)],
              evaluator=evaluator,
              epochs=num_epochs,
              evaluation_steps=500,
              warmup_steps=warmup_steps,
              output_path=output_path)

In [14]:
train_model('',
            '/content/drive/MyDrive/argmining-21-keypoint-analysis-sharedtask-code-master/KPA_2021_shared_task/kpm_data',
            'train',
            '',
            'roberta-base',
            model_suffix='contrastive-3-epochs-32batch-train', 
            data_file_suffix='contrastive',
            num_epochs=3, max_seq_length=70, add_special_token=True, train_batch_size=32, loss='ContrastiveLoss')

Downloading:   0%|          | 0.00/481 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/478M [00:00<?, ?B/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.layer_norm.bias', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Downloading:   0%|          | 0.00/878k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.29M [00:00<?, ?B/s]

Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Iteration:   0%|          | 0/645 [00:00<?, ?it/s]

loaded predictions for 5583 arguments
mAP strict= 0.860426104101457 ; mAP relaxed = 0.9542743144470522
loaded predictions for 5583 arguments
mAP strict= 0.8630138268301515 ; mAP relaxed = 0.9657318046919702


Iteration:   0%|          | 0/645 [00:00<?, ?it/s]

loaded predictions for 5583 arguments
mAP strict= 0.8959813669588499 ; mAP relaxed = 0.9791412181693028
loaded predictions for 5583 arguments
mAP strict= 0.9180934315496282 ; mAP relaxed = 0.9840778454472229


Iteration:   0%|          | 0/645 [00:00<?, ?it/s]

loaded predictions for 5583 arguments
mAP strict= 0.929463289145001 ; mAP relaxed = 0.9901626571566918
loaded predictions for 5583 arguments
mAP strict= 0.9302954616606912 ; mAP relaxed = 0.9897420966721092


In [15]:
ls 


[0m[01;34marg-kp[0m/
[01;34marg-quality[0m/
arguments_our_test.csv
arguments_our_valid.csv
key_points_our_test.csv
key_points_our_valid.csv
labels_our_test.csv
labels_our_valid.csv
our_test.csv
our_valid.csv
pagerank-test-keypoints.pkl
[01;34mpredictions[0m/
[01;34mroberta-base-contrastive-10-epochs-2021-12-09_15-30-13[0m/
roberta-base-contrastive-10-epochs-2021-12-09_15-30-13-test-preds.json
roberta-base-contrastive-10-epochs-32batch-train_Euclidean-2021-12-28_21-36-32-test-preds.json
roberta-base-contrastive-10-epochs-32batch-train_Euclidean-2021-12-28_21-55-04-test-preds.json
[01;34mroberta-base-contrastive-10-epochs-64batch-train_28_12-2021-12-28_18-06-30[0m/
roberta-base-contrastive-10-epochs-64batch-train_28_12-2021-12-28_18-06-30-test-preds.json
roberta-base-contrastive-10-epochs-64batch-train_Euclidean-1margin-2021-12-29_08-37-32-test-preds.json
roberta-base-contrastive-10-epochs-64batch-train_Euclidean-2021-12-29_08-04-04-test-preds.json
roberta-base-contrastive-3-e

# Evaluation

Specify the model and the models_list with the models you trained

In [17]:
# model = SentenceTransformer('/content/drive/MyDrive/argmining-21-keypoint-analysis-sharedtask-code-master/data/roberta-base-contrastive-10-epochs-2021-12-09_15-30-13')
model = SentenceTransformer('/content/drive/MyDrive/argmining-21-keypoint-analysis-sharedtask-code-master/data/roberta-base-contrastive-3-epochs-32batch-train-2022-01-03_15-53-15')
models_list = [
              #  '/content/drive/MyDrive/argmining-21-keypoint-analysis-sharedtask-code-master/data/roberta-base-contrastive-10-epochs-2021-12-09_15-30-13',
              #  '/content/drive/MyDrive/argmining-21-keypoint-analysis-sharedtask-code-master/data/roberta-base-contrastive-3-epochs-Special-Token-32batch-train-2021-12-09_16-11-51',
              #  '/content/drive/MyDrive/argmining-21-keypoint-analysis-sharedtask-code-master/data/roberta-base-contrastive-3-epochs-Special-Token-16batch-train_28_12-2021-12-28_16-03-36',
              #  '/content/drive/MyDrive/argmining-21-keypoint-analysis-sharedtask-code-master/data/roberta-base-contrastive-3-epochs-Special-Token-8batch-train_28_12-2021-12-28_16-25-50',
              #  '/content/drive/MyDrive/argmining-21-keypoint-analysis-sharedtask-code-master/data/roberta-base-contrastive-3-epochs-16batch-train_28_12-2021-12-28_16-45-56',
              #  '/content/drive/MyDrive/argmining-21-keypoint-analysis-sharedtask-code-master/data/roberta-base-contrastive-3-epochs-32batch-train_28_12-2021-12-28_17-00-37',
              #  '/content/drive/MyDrive/argmining-21-keypoint-analysis-sharedtask-code-master/data/roberta-base-contrastive-3-epochs-Special-Tokens-32batch-train_28_12-2021-12-28_17-14-44/',
              #  '/content/drive/MyDrive/argmining-21-keypoint-analysis-sharedtask-code-master/data/roberta-base-contrastive-5-epochs-32batch-train_28_12-2021-12-28_17-28-47',
              #  '/content/drive/MyDrive/argmining-21-keypoint-analysis-sharedtask-code-master/data/roberta-base-contrastive-5-epochs-50batch-train_28_12-2021-12-28_17-45-52',
              #  '/content/drive/MyDrive/argmining-21-keypoint-analysis-sharedtask-code-master/data/roberta-base-contrastive-10-epochs-64batch-train_28_12-2021-12-28_18-06-30',
               '/content/drive/MyDrive/argmining-21-keypoint-analysis-sharedtask-code-master/data/roberta-base-contrastive-3-epochs-32batch-train-2022-01-03_15-53-15',
              #  '/content/drive/MyDrive/argmining-21-keypoint-analysis-sharedtask-code-master/data/roberta-base-contrastive-3-epochs-32batch-train-2021-12-30_13-47-54',
              #  '/content/drive/MyDrive/argmining-21-keypoint-analysis-sharedtask-code-master/data/roberta-base-contrastive-3-epochs-32batch-train-2021-12-30_15-34-33',
              #  '/content/drive/MyDrive/argmining-21-keypoint-analysis-sharedtask-code-master/data/roberta-base-contrastive-5-epochs-32batch-train-2021-12-30_14-45-23',
              #  '/content/drive/MyDrive/argmining-21-keypoint-analysis-sharedtask-code-master/data/roberta-base-contrastive-3-epochs-32batch-train-2021-12-30_15-11-41',
              #  '/content/drive/MyDrive/argmining-21-keypoint-analysis-sharedtask-code-master/data/roberta-base-contrastive-10-epochs-64batch-train_Euclidean-1margin-2021-12-29_08-37-32',


]

pred_output_path = ''
subset_name= 'our_valid'

Methods are slighlty adjusted for evaluation purpose

In [18]:
import sys
import pandas as pd
from sklearn.metrics import precision_recall_curve, average_precision_score, precision_score
from matplotlib import pyplot
import numpy as np
import os
import json



def get_ap(df, label_column, top_percentile=0.5):
    top = int(len(df)*top_percentile)
    df = df.sort_values('score', ascending=False).head(top)
    # after selecting top percentile candidates, we set the score for the dummy kp to 1, to prevent it from increasing the precision.
    df.loc[df['key_point_id'] == "dummy_id", 'score'] = 0.99
    return average_precision_score(y_true=df[label_column], y_score=df["score"])

def calc_mean_average_precision(df, label_column):
    precisions = [get_ap(group, label_column) for _, group in df.groupby(["topic", "stance"])]
    return np.mean(precisions)

def evaluate_predictions(merged_df):
    mAP_strict = calc_mean_average_precision(merged_df, "label_strict")
    mAP_relaxed = calc_mean_average_precision(merged_df, "label_relaxed")
    print(f"mAP strict= {mAP_strict} ; mAP relaxed = {mAP_relaxed}")

def load_kpm_data(gold_data_dir, subset):
    arguments_file = os.path.join(gold_data_dir, f"arguments_{subset}.csv")
    key_points_file = os.path.join(gold_data_dir, f"key_points_{subset}.csv")
    labels_file = os.path.join(gold_data_dir, f"labels_{subset}.csv")

    arguments_df = pd.read_csv(arguments_file)
    key_points_df = pd.read_csv(key_points_file)
    labels_file_df = pd.read_csv(labels_file)

    return arguments_df, key_points_df, labels_file_df


def get_predictions(predictions_file, labels_df, arg_df):
    arg_df = arg_df[["arg_id", "topic", "stance"]]
    predictions_df = load_predictions(predictions_file)
    #make sure each arg_id has a prediction
    predictions_df = pd.merge(arg_df, predictions_df, how="left", on="arg_id")

    #handle arguements with no matching key point
    predictions_df["key_point_id"] = predictions_df["key_point_id"].fillna("dummy_id")
    predictions_df["score"] = predictions_df["score"].fillna(0)

    #merge each argument with the gold labels
    merged_df = pd.merge(predictions_df, labels_df, how="left", on=["arg_id", "key_point_id"])

    merged_df.loc[merged_df['key_point_id'] == "dummy_id", 'label'] = 0
    merged_df["label_strict"] = merged_df["label"].fillna(0)
    merged_df["label_relaxed"] = merged_df["label"].fillna(1)
    return merged_df


"""
this method chooses the best key point for each argument
and generates a dataframe with the matches and scores
"""
def load_predictions(predictions_dir):
    arg =[]
    kp = []
    scores = []
    with open(predictions_dir, "r") as f_in:
        res = json.load(f_in)
        for arg_id, kps in res.items():
            best_kp = max(kps.items(), key=lambda x: x[1])
            arg.append(arg_id)
            kp.append(best_kp[0])
            scores.append(best_kp[1])
        print(f"loaded predictions for {len(arg)} arguments")
        return pd.DataFrame({"arg_id" : arg, "key_point_id": kp, "score": scores})

def match_argument_with_keypoints(result, kp_dict, arg_dict):
    
    for arg, arg_embedding in arg_dict.items():
        result[arg] = {}
        for kp, kp_embedding in kp_dict.items():
            # print(type(util.pytorch_cos_sim(arg_embedding, kp_embedding).item()))
            # print(type(float(np.linalg.norm(arg_embedding - kp_embedding))))
            result[arg][kp] = util.pytorch_cos_sim(arg_embedding, kp_embedding).item()
            # result[arg][kp] = float(1-np.linalg.norm(arg_embedding - kp_embedding))
        
        #Applying softmax
        kp_scores = list(result[arg].items())
        kp_ids, kp_scores = zip(*kp_scores)
        #print(kp_ids)
        #print(kp_scores)
        #kp_scores = torch.softmax(torch.Tensor(kp_scores), 0).tolist()
        #print(kp_scores)
        result[arg] = {kp_id:score for kp_id, score in zip(kp_ids, kp_scores)}
        

    return result

def predict(model, argument_df, keypoint_df, output_path, append_topic=False):
    argument_keypoints = {}
    for topic in argument_df.topic.unique():
        for stance in [-1, 1]:
            topic_keypoints_ids = keypoint_df[(keypoint_df.topic==topic) & (keypoint_df.stance==stance)]['key_point_id'].tolist()
            topic_keypoints = keypoint_df[(keypoint_df.topic==topic) & (keypoint_df.stance==stance)]['key_point'].tolist()
            if append_topic:
                topic_keypoints = [topic + ' <SEP> ' + x for x in topic_keypoints]
                
            topic_keypoints_embeddings = model.encode(topic_keypoints)
            topic_kp_embed = dict(zip(topic_keypoints_ids, topic_keypoints_embeddings))

            topic_arguments_ids = argument_df[(argument_df.topic==topic) & (argument_df.stance==stance)]['arg_id'].tolist()
            topic_arguments = argument_df[(argument_df.topic==topic) & (argument_df.stance==stance)]['argument'].tolist()
            topic_arguments_embeddings = model.encode(topic_arguments)
            topic_arg_embed= dict(zip(topic_arguments_ids, topic_arguments_embeddings))

            argument_keypoints = match_argument_with_keypoints(argument_keypoints, topic_kp_embed, topic_arg_embed)
    print(argument_keypoints)
    print(type(argument_keypoints))
    json.dump(argument_keypoints, open(output_path, 'w'))
    return argument_keypoints

def predict_and_evaluate(argument_df, keypoint_df, gold_data_dir, subset_name):
    pred_df = {}
    for model_path in models_list:
        append_topic= 'topic_added' in model_path
        #Predict
        model = SentenceTransformer(model_path)
        model_name = model_path.split('/')[-1]
        predictions_file = pred_output_path+model_name+ '-' + subset_name + '-preds.json'
        json_preds = predict(model, argument_df, keypoint_df, predictions_file, append_topic)
        

        #Evaluate
        arg_df, kp_df, labels_df = load_kpm_data(gold_data_dir, subset=subset_name)
        merged_df = get_predictions(predictions_file, labels_df, arg_df)
        print('Evaluating {}:'.format(model_name))
        evaluate_predictions(merged_df)
        
        pred_df[model_name] = merged_df

    return pred_df

Load test data

In [19]:
test_arg_df = pd.read_csv('../KPA_2021_shared_task/test_data/arguments_test.csv')
test_keypoints_df = pd.read_csv('../KPA_2021_shared_task/test_data/key_points_test.csv')

In [20]:
results_df = predict_and_evaluate(test_arg_df, test_keypoints_df, '../KPA_2021_shared_task/test_data/', 'test')

{'arg_0_0': {'kp_0_0': 0.8495104312896729, 'kp_0_1': 0.48215460777282715, 'kp_0_2': 0.3403909504413605, 'kp_0_3': 0.8233962059020996}, 'arg_0_1': {'kp_0_0': 0.9214873313903809, 'kp_0_1': 0.5158599615097046, 'kp_0_2': 0.42015787959098816, 'kp_0_3': 0.8982327580451965}, 'arg_0_2': {'kp_0_0': 0.8449358940124512, 'kp_0_1': 0.4653307795524597, 'kp_0_2': 0.36464187502861023, 'kp_0_3': 0.8379727602005005}, 'arg_0_3': {'kp_0_0': 0.7612001299858093, 'kp_0_1': 0.47184908390045166, 'kp_0_2': 0.4090626537799835, 'kp_0_3': 0.7064131498336792}, 'arg_0_4': {'kp_0_0': 0.8598307371139526, 'kp_0_1': 0.6230531930923462, 'kp_0_2': 0.510854184627533, 'kp_0_3': 0.8649142384529114}, 'arg_0_5': {'kp_0_0': 0.5596566200256348, 'kp_0_1': 0.8137309551239014, 'kp_0_2': 0.8107937574386597, 'kp_0_3': 0.5775954127311707}, 'arg_0_6': {'kp_0_0': 0.8926764726638794, 'kp_0_1': 0.4827284812927246, 'kp_0_2': 0.36649149656295776, 'kp_0_3': 0.9011802673339844}, 'arg_0_7': {'kp_0_0': 0.9480359554290771, 'kp_0_1': 0.4999296367

# Own Siamese Model together with additional features

In [21]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [22]:
from keras import backend as K

def cosine_distance(vests):
    x, y = vests
    x = K.l2_normalize(x, axis=-1)
    y = K.l2_normalize(y, axis=-1)
    # print("Cosine distance NORM: ")
    # print(x,y)
    return -K.mean(x * y, axis=-1, keepdims=True)

def cos_dist_output_shape(shapes):
    shape1, shape2 = shapes
    return (shape1[0],1)



In [43]:
def loss(margin=1):
    """Provides 'constrastive_loss' an enclosing scope with variable 'margin'.

  Arguments:
      margin: Integer, defines the baseline for distance for which pairs
              should be classified as dissimilar. - (default is 1).

  Returns:
      'constrastive_loss' function with data ('margin') attached.
  """

    # Contrastive loss = mean( (1-true_value) * square(prediction) +
    #                         true_value * square( max(margin-prediction, 0) ))
    def contrastive_loss(y_true, y_pred):
        """Calculates the constrastive loss.

      Arguments:
          y_true: List of labels, each label is of type float32.
          y_pred: List of predictions of same length as of y_true,
                  each label is of type float32.

      Returns:
          A tensor containing constrastive loss as floating point value.
      """
        y_true = tf.cast(y_true, y_pred.dtype)
        square_pred = tf.math.square(y_pred)
        margin_square = tf.math.square(tf.math.maximum(margin - (y_pred), 0))
        # return tf.math.reduce_mean(
        #     (1 - y_true) * square_pred + (y_true) * margin_square
        # )
        return tf.math.reduce_mean(
            (1 - y_true) * y_pred + (y_true) * tf.math.maximum(margin - y_pred, 0)
        )

    return contrastive_loss

def euclidean_distance(vects):
    """Find the Euclidean distance between two vectors.

    Arguments:
        vects: List containing two tensors of same length.

    Returns:
        Tensor containing euclidean distance
        (as floating point value) between vectors.
    """
    
    x, y = vects
    # print(x)
    sum_square = tf.math.reduce_sum(tf.math.square(x - y), axis=1, keepdims=True)
    return tf.math.sqrt(tf.math.maximum(sum_square, tf.keras.backend.epsilon()))

def getCosineSim(vests):
    x, y = vests
    dot = tf.math.reduce_sum(tf.math.multiply(x,y), axis=1, keepdims=True)
    x_norm = tf.norm(x, axis=1, keepdims=True)
    y_norm = tf.norm(y, axis=1, keepdims=True)
    mult = x_norm * y_norm
    # mult = tf.math.maximum(mult, tf.keras.backend.epsilon())
    # print(dot)
    # print(x_norm)
    # print(y_norm)
    # print(x_norm * y_norm)
    # print(tf.math.divide(dot, mult))
    return tf.math.divide(dot,mult)
    # argument_embedding, key_point_embedding = vests
    # a = []
    # # print(argument_embedding)
    # # u = argument_embedding.numpy()
    # # v = key_point_embedding.numpy()
    # # print(argument_embedding.shape[0])
    # # for i in range(argument_embedding.shape[0]):
    # for i in range(argument_embedding.shape[0]):
    #     a.append([cosine(argument_embedding[i], key_point_embedding[i])])
    # return tf.convert_to_tensor(a, dtype=np.float)
def cosine(u, v):
    # print(u)
    dot_product = tf.reduce_sum(tf.multiply(u, v))
    u_norm = tf.norm(u, axis=-1)
    v_norm = tf.norm(v, axis=-1)
    # print("Cosinesim NORM: ")
    # print(u_norm, v_norm)
    return dot_product / (u_norm * v_norm)

def getCosineSimoriginal(argument_embedding, key_point_embedding):
    a = []
    for i in range(len(argument_embedding)):
        a.append([cosineoriginal(argument_embedding[i], key_point_embedding[i])])
    return a
def cosineoriginal(u, v):
    return np.dot(u, v) / (np.linalg.norm(u) * np.linalg.norm(v))

In [24]:
def contrastive_loss(y, d):
    """ Contrastive loss from Hadsell-et-al.'06
        http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
    """
    y= tf.cast(y, d.dtype)
    margin = 0.5
    return K.mean(y * K.square(d) + (1 - y) * K.square(K.maximum(margin - d, 0)))

## POS Data Preparation

Data Preparation:
  1. Get Sentence Embedding with model specified above
  2. Create POS Data

In [25]:
import spacy
nlp = spacy.load("en_core_web_sm")

In [26]:
df_arguments = pd.read_csv("../KPA_2021_shared_task/kpm_data/arguments_train.csv")
df_keypoints = pd.read_csv("../KPA_2021_shared_task/kpm_data/key_points_train.csv")
df_labels = pd.read_csv("../KPA_2021_shared_task/kpm_data/labels_train.csv")
merged_dataset=df_labels.merge(df_arguments, left_on="arg_id", right_on="arg_id")
full_dataset=merged_dataset.merge(df_keypoints, left_on="key_point_id", right_on="key_point_id")
altdata_arguments = model.encode(full_dataset["argument"])
altdata_keypoints = model.encode(full_dataset["key_point"])
labels = np.array(full_dataset["label"])
labels=np.asarray(labels).astype(int)

array([0, 0, 0, ..., 1, 0, 0])

In [None]:
tags = set()
for el in full_dataset["argument"]:
  doc = nlp(el)
  for token in doc:
    tags.add(token.pos_)
for el in full_dataset["key_point"]:
  doc = nlp(el)
  for token in doc:
    tags.add(token.pos_)
pos_tags = {}
i = 0
for el in list(sorted(tags)): 
    pos_tags[el] = i
    i = i + 1
pos_tags

In [None]:
argument_pos = []
for el in full_dataset["argument"]:
  doc = nlp(el)
  pos_tags_res = [0 for _ in range(17)]
  for token in doc:
    ind = pos_tags[token.pos_]
    pos_tags_res[ind] = pos_tags_res[ind] + 1
  argument_pos.append(pos_tags_res)
keypoint_pos = []
for el in full_dataset["key_point"]:
  doc = nlp(el)
  pos_tags_res = [0 for _ in range(17)]
  for token in doc:
    ind = pos_tags[token.pos_]
    pos_tags_res[ind] = pos_tags_res[ind] + 1
  keypoint_pos.append(pos_tags_res)
arg_pos_df = pd.DataFrame(argument_pos)
kp_pos_df = pd.DataFrame(keypoint_pos)
for i in range(17):
  m = max(arg_pos_df[i].max(), kp_pos_df[i].max())
  arg_pos_df[i] = arg_pos_df[i] / m
  kp_pos_df[i] = kp_pos_df[i] / m
arg_pos_df = arg_pos_df.values.tolist()
kp_pos_df = kp_pos_df.values.tolist()
arguments_embedding_pos = np.array(np.hstack((altdata_arguments, arg_pos_df)))
kp_embedding_pos = np.array(np.hstack((altdata_keypoints, kp_pos_df)))

full_dataset["argument_pos"] = arg_pos_df
full_dataset["keypoint_pos"] = kp_pos_df
full_dataset

In [None]:
full_dataset.to_csv("training_pos.csv")

Create Pos Features for the Test dataset

In [30]:
df_arguments_test = pd.read_csv("/content/drive/MyDrive/argmining-21-keypoint-analysis-sharedtask-code-master/KPA_2021_shared_task/test_data/arguments_test.csv")
df_keypoints_test = pd.read_csv("/content/drive/MyDrive/argmining-21-keypoint-analysis-sharedtask-code-master/KPA_2021_shared_task/test_data/key_points_test.csv")
df_labels_test = pd.read_csv("/content/drive/MyDrive/argmining-21-keypoint-analysis-sharedtask-code-master/KPA_2021_shared_task/test_data/labels_test.csv")

In [31]:
merged_dataset_test=df_labels_test.merge(df_arguments_test, left_on="arg_id", right_on="arg_id")
# df_arguments.loc[df_arguments['arg_id'] == "arg_4_121"]
full_dataset_test=merged_dataset_test.merge(df_keypoints_test, left_on="key_point_id", right_on="key_point_id")
full_dataset_test
arguments_test = model.encode(full_dataset_test["argument"])
# altdata_arguments
keypoints_test = model.encode(full_dataset_test["key_point"])

In [None]:
argument_pos_test = []
for el in full_dataset_test["argument"]:
  doc = nlp(el)
  pos_tags_res = [0 for _ in range(17)]
  for token in doc:
    if token.pos_ in pos_tags:
      ind = pos_tags[token.pos_]
      pos_tags_res[ind] = pos_tags_res[ind] + 1
  argument_pos_test.append(pos_tags_res)
keypoint_pos_test = []
for el in full_dataset_test["key_point"]:
  doc = nlp(el)
  pos_tags_res = [0 for _ in range(17)]
  for token in doc:
    if token.pos_ in pos_tags:
      ind = pos_tags[token.pos_] 
      pos_tags_res[ind] = pos_tags_res[ind] + 1
  keypoint_pos_test.append(pos_tags_res)
arg_pos_df_test = pd.DataFrame(argument_pos_test)
kp_pos_df_test = pd.DataFrame(keypoint_pos_test)
for i in range(16):
  m = max(arg_pos_df_test[i].max(), kp_pos_df_test[i].max())
  arg_pos_df_test[i] = arg_pos_df_test[i] / m
  kp_pos_df_test[i] = kp_pos_df_test[i] / m
arg_pos_df_test = arg_pos_df_test.values.tolist()
kp_pos_df_test = kp_pos_df_test.values.tolist()
full_dataset_test["argument_pos"] = arg_pos_df_test
full_dataset_test["keypoint_pos"] = kp_pos_df_test
full_dataset_test

In [None]:
full_dataset_test.to_csv("test_pos.csv")

Get one pos feature embedding for each argument and each keypoint out of full dataset

In [None]:
kp_subset = full_dataset_test[["key_point_id", "keypoint_pos"]]
kp_pos_dic = {}
for i, row in kp_subset.iterrows():
  if row["key_point_id"] not in kp_pos_dic:
    kp_pos_dic[row["key_point_id"]] = row["keypoint_pos"]
kp_pos_dic
arg_subset = full_dataset_test[["arg_id", "argument_pos"]]
arg_pos_dic = {}
for i, row in arg_subset.iterrows():
  if row["arg_id"] not in arg_pos_dic:
    arg_pos_dic[row["arg_id"]] = row["argument_pos"]
arg_pos_dic


In [None]:
arguments_embedding_pos[0].shape

(785,)

# Siamese Neural Network Architecture

In [44]:
inputs = keras.Input(shape=(785,))
# dense = layers.Dense(768, activation="relu")
# x = dense(inputs)
outputs = layers.Dense(785, activation="tanh")(inputs)
normal_Outputs = tf.keras.layers.Normalization()(outputs)
embedding_network = keras.Model(inputs=inputs, outputs=normal_Outputs, name="shared_layers")
embedding_network.summary()

input_1 = keras.Input((785, 1))
input_2 = keras.Input((785, 1))

tower_1 = embedding_network(input_1)
tower_2 = embedding_network(input_2)

# merge_layer = layers.Lambda(euclidean_distance)([tower_1, tower_2])
merge_layer = layers.Lambda(getCosineSim, output_shape=cos_dist_output_shape)([tower_1, tower_2])

output_layer = layers.Dense(1, activation="sigmoid")(merge_layer)
siamese = keras.Model(inputs=[input_1, input_2], outputs=output_layer)
siamese.compile(loss=loss(0.5), optimizer="RMSprop", metrics=["accuracy"])
siamese.summary()

Model: "shared_layers"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_13 (InputLayer)       [(None, 785)]             0         
                                                                 
 dense_8 (Dense)             (None, 785)               617010    
                                                                 
 normalization_4 (Normalizat  (None, 785)              1571      
 ion)                                                            
                                                                 
Total params: 618,581
Trainable params: 617,010
Non-trainable params: 1,571
_________________________________________________________________
Model: "model_4"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_14 (InputLayer)    

In [45]:
history = siamese.fit(
    [arguments_embedding_pos, kp_embedding_pos],
    labels,
    batch_size=1,
    epochs=15,
)

Epoch 1/15
 2405/20635 [==>...........................] - ETA: 46s - loss: 0.2157 - accuracy: 0.8653

KeyboardInterrupt: ignored

# Evaluation

Rewritten Evaluation Methods for different Evaluation procedures: 
  1: Cosine Similarity
  2: Model prediction
  3: Euclidean Distance

In [None]:
import sys
import pandas as pd
from sklearn.metrics import precision_recall_curve, average_precision_score, precision_score
from matplotlib import pyplot
import numpy as np
import os
import json



def get_ap(df, label_column, top_percentile=0.5):
    top = int(len(df)*top_percentile)
    df = df.sort_values('score', ascending=False).head(top)
    # after selecting top percentile candidates, we set the score for the dummy kp to 1, to prevent it from increasing the precision.
    df.loc[df['key_point_id'] == "dummy_id", 'score'] = 0.99
    return average_precision_score(y_true=df[label_column], y_score=df["score"])

def calc_mean_average_precision(df, label_column):
    precisions = [get_ap(group, label_column) for _, group in df.groupby(["topic", "stance"])]
    return np.mean(precisions)

def evaluate_predictions(merged_df):
    mAP_strict = calc_mean_average_precision(merged_df, "label_strict")
    mAP_relaxed = calc_mean_average_precision(merged_df, "label_relaxed")
    print(f"mAP strict= {mAP_strict} ; mAP relaxed = {mAP_relaxed}")

def load_kpm_data(gold_data_dir, subset):
    arguments_file = os.path.join(gold_data_dir, f"arguments_{subset}.csv")
    key_points_file = os.path.join(gold_data_dir, f"key_points_{subset}.csv")
    labels_file = os.path.join(gold_data_dir, f"labels_{subset}.csv")

    arguments_df = pd.read_csv(arguments_file)
    key_points_df = pd.read_csv(key_points_file)
    labels_file_df = pd.read_csv(labels_file)

    return arguments_df, key_points_df, labels_file_df


def get_predictions(predictions_file, labels_df, arg_df):
    arg_df = arg_df[["arg_id", "topic", "stance"]]
    predictions_df = load_predictions(predictions_file)
    #make sure each arg_id has a prediction
    predictions_df = pd.merge(arg_df, predictions_df, how="left", on="arg_id")

    #handle arguements with no matching key point
    predictions_df["key_point_id"] = predictions_df["key_point_id"].fillna("dummy_id")
    predictions_df["score"] = predictions_df["score"].fillna(0)

    #merge each argument with the gold labels
    merged_df = pd.merge(predictions_df, labels_df, how="left", on=["arg_id", "key_point_id"])

    merged_df.loc[merged_df['key_point_id'] == "dummy_id", 'label'] = 0
    merged_df["label_strict"] = merged_df["label"].fillna(0)
    merged_df["label_relaxed"] = merged_df["label"].fillna(1)
    return merged_df


"""
this method chooses the best key point for each argument
and generates a dataframe with the matches and scores
"""
def load_predictions(predictions_dir):
    arg =[]
    kp = []
    scores = []
    with open(predictions_dir, "r") as f_in:
        res = json.load(f_in)
        for arg_id, kps in res.items():
            best_kp = max(kps.items(), key=lambda x: x[1])
            arg.append(arg_id)
            kp.append(best_kp[0])
            scores.append(best_kp[1])
        print(f"loaded predictions for {len(arg)} arguments")
        return pd.DataFrame({"arg_id" : arg, "key_point_id": kp, "score": scores})

def match_argument_with_keypointsnn(modelnn, result, kp_dict, arg_dict):
    
    for arg, arg_embedding in arg_dict.items():
        result[arg] = {}
        for kp, kp_embedding in kp_dict.items():

          in1 = np.array(np.hstack((arg_embedding, arg_pos_dic[arg])))
          in2 = np.array(np.hstack((kp_embedding, kp_pos_dic[kp])))


          arg_input = np.expand_dims(in1, axis=0)  
          kp_input = np.expand_dims(in2, axis=0)
# Alternative Predict embedding and compute Cosine similarity
          argembed = embedding_network.predict(arg_input)
          argembed = tf.convert_to_tensor(argembed, dtype=np.float, dtype_hint=None, name=None)

          kpembed = embedding_network.predict(kp_input)
          kpembed = tf.convert_to_tensor(kpembed, dtype=np.float, dtype_hint=None, name=None)
          res = getCosineSim([argembed, kpembed])
          result[arg][kp] = float(res[0])
#  Alternative: Predict final Value with Network
          # res = modelnn.predict([arg_input, kp_input])
          # result[arg][kp] = float(res[0])
# Alternative 3 Euclidean distance
          # argembed = embedding_network.predict(arg_input)
          # print(type(argembed))

          # print("ARGEMBED")
          # print(argembed)
          
          # kpembed = embedding_network.predict(kp_input)
          # print(float(np.linalg.norm(argembed - kpembed)))
          # result[arg][kp] = float(20 - np.linalg.norm(argembed - kpembed))
        
        #Applying softmax
        kp_scores = list(result[arg].items())
        kp_ids, kp_scores = zip(*kp_scores)
        #print(kp_ids)
        #print(kp_scores)
        #kp_scores = torch.softmax(torch.Tensor(kp_scores), 0).tolist()
        #print(kp_scores)
        result[arg] = {kp_id:score for kp_id, score in zip(kp_ids, kp_scores)}
        

    return result



def predictnn(model, modelnn, argument_df, keypoint_df, output_path):
    argument_keypoints = {}
    for topic in argument_df.topic.unique():
        for stance in [-1, 1]:
            topic_keypoints_ids = keypoint_df[(keypoint_df.topic==topic) & (keypoint_df.stance==stance)]['key_point_id'].tolist()
            topic_keypoints = keypoint_df[(keypoint_df.topic==topic) & (keypoint_df.stance==stance)]['key_point'].tolist()
            
            topic_keypoints_embeddings = model.encode(topic_keypoints)
            topic_kp_embed = dict(zip(topic_keypoints_ids, topic_keypoints_embeddings))

            topic_arguments_ids = argument_df[(argument_df.topic==topic) & (argument_df.stance==stance)]['arg_id'].tolist()
            topic_arguments = argument_df[(argument_df.topic==topic) & (argument_df.stance==stance)]['argument'].tolist()
            topic_arguments_embeddings = model.encode(topic_arguments)
            topic_arg_embed= dict(zip(topic_arguments_ids, topic_arguments_embeddings))

            argument_keypoints = match_argument_with_keypointsnn(modelnn, argument_keypoints, topic_kp_embed, topic_arg_embed)
    print(argument_keypoints)
    print(type(argument_keypoints))
    json.dump(argument_keypoints, open(output_path, 'w'))
    return argument_keypoints
    
def predict_and_evaluatenn(modelnn, argument_df, keypoint_df, gold_data_dir, subset_name):
    pred_df = {}
    for model_path in models_list:
        #Predict
        model = SentenceTransformer(model_path)
        model_name = model_path.split('/')[-1]
        output_path = 'testing-preds.json'
        json_preds = predictnn(model, modelnn, argument_df, keypoint_df, output_path)
        

        #Evaluate
        arg_df, kp_df, labels_df = load_kpm_data(gold_data_dir, subset=subset_name)
        merged_df = get_predictions(output_path, labels_df, arg_df)
        print('Evaluating {}:'.format(model_name))
        evaluate_predictions(merged_df)
        
        pred_df[model_name] = merged_df

    return pred_df

BEST RESULT: 15 Epochs, loss 0.5, 786 Layer, Normalization, Pos, 17:00 Model, Contrastive Loss SQRT

In [None]:
# 15 Epochs sqrt Contrastive Loss 17:00
result_test_linear = predict_and_evaluatenn(siamese, df_arguments_test, df_keypoints_test, "/content/drive/MyDrive/argmining-21-keypoint-analysis-sharedtask-code-master/KPA_2021_shared_task/test_data", "test" )

{'arg_0_0': {'kp_0_0': 0.9949237704277039, 'kp_0_1': 0.14685878157615662, 'kp_0_2': 0.0012498873984441161, 'kp_0_3': 0.9914548397064209}, 'arg_0_1': {'kp_0_0': 0.999066174030304, 'kp_0_1': 0.14233550429344177, 'kp_0_2': 0.00031346461037173867, 'kp_0_3': 0.997951090335846}, 'arg_0_2': {'kp_0_0': 0.9958166480064392, 'kp_0_1': 0.07241039723157883, 'kp_0_2': 0.0014720788458362222, 'kp_0_3': 0.9912137389183044}, 'arg_0_3': {'kp_0_0': 0.783173143863678, 'kp_0_1': 0.05938571318984032, 'kp_0_2': 0.0022693369537591934, 'kp_0_3': 0.533928632736206}, 'arg_0_4': {'kp_0_0': 0.9934617280960083, 'kp_0_1': 0.6254062652587891, 'kp_0_2': 0.0004988983855582774, 'kp_0_3': 0.9977019429206848}, 'arg_0_5': {'kp_0_0': 0.04979861155152321, 'kp_0_1': 0.44443055987358093, 'kp_0_2': 0.1159706860780716, 'kp_0_3': 0.06244663894176483}, 'arg_0_6': {'kp_0_0': 0.9969191551208496, 'kp_0_1': 0.0924268364906311, 'kp_0_2': 0.0006552293780259788, 'kp_0_3': 0.9964907765388489}, 'arg_0_7': {'kp_0_0': 0.9996036887168884, 'kp_