In [4]:
import pandas as pd
import joblib
import numpy as np
import utils
import configs
from transformers import AutoTokenizer


In [5]:
dataset = "dbpedia"
architecture = "BART"
data_dir = f"../datasets/{dataset}_dataset"
batch_size = 256
model_name = f"{dataset}_model"
attack = "textbugger"
output_file_name = f"artifacts/{dataset}/{model_name}/{dataset}_examples_with_closest_protos_{attack}.csv"

if architecture == "BART":
    tokenizer = AutoTokenizer.from_pretrained("ModelTC/bart-base-mnli")
elif architecture == "ELECTRA":
    tokenizer = AutoTokenizer.from_pretrained("google/electra-base-discriminator")
else:
    print(f"Invalid backbone architecture: {architecture}")

all_datasets = utils.load_dataset(
    data_dir=data_dir,
    tokenizer=tokenizer,
    max_length=configs.dataset_to_max_length[dataset],
)

best_protos_per_testeg = joblib.load(
    f"artifacts/{dataset}/{model_name}/best_protos_per_testeg.joblib"
)

best_protos_per_testeg.keys()

Train data shape:  (24094, 2)


Map:   0%|          | 0/24094 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1281 [00:00<?, ? examples/s]

Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/1836 [00:00<?, ? examples/s]

Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/60794 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1281 [00:00<?, ? examples/s]

Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/1836 [00:00<?, ? examples/s]

Map:   0%|          | 0/800 [00:00<?, ? examples/s]

dict_keys(['test_textfooler', 'test_textbugger', 'adv_textfooler', 'adv_textbugger', 'test_textfooler_protocnn', 'adv_textfooler_protocnn', 'test_textbugger_protocnn', 'adv_textbugger_protocnn', 'adv_paraphrased', 'test_paraphrased'])

In [6]:

test_dataset = f"test_{attack}"
num_examples = len(all_datasets[test_dataset])


indices = best_protos_per_testeg[test_dataset][0].tolist()
values = best_protos_per_testeg[test_dataset][1].tolist()

prototoypes_indices_values_per_point = []
for i in range(num_examples):
    indices_values_per_training_example = list(zip(indices[i], values[i]))
    indices_values_per_training_example.sort(key=lambda x: x[1], reverse=False)
    prototoypes_indices_values_per_point.append(indices_values_per_training_example)

In [7]:
top_protos = np.array(prototoypes_indices_values_per_point)[:, 0, 0]
top_dist = np.array(prototoypes_indices_values_per_point)[:, 0, 1]

In [8]:
test_dataset = f"adv_{attack}"
num_examples = len(all_datasets[test_dataset])


indices = best_protos_per_testeg[test_dataset][0].tolist()
values = best_protos_per_testeg[test_dataset][1].tolist()

adv_prototoypes_indices_values_per_point = []
for i in range(num_examples):
    indices_values_per_training_example = list(zip(indices[i], values[i]))
    indices_values_per_training_example.sort(key=lambda x: x[1], reverse=False)
    adv_prototoypes_indices_values_per_point.append(indices_values_per_training_example)

In [9]:
top_protos_adv = np.array(adv_prototoypes_indices_values_per_point)[:, 0, 0]
top_dist_adv = np.array(adv_prototoypes_indices_values_per_point)[:, 0, 1]

In [10]:
clean_text, corrupted_text, labels = [], [], []
for i, j in zip(all_datasets[f"test_{attack}"], all_datasets[f"adv_{attack}"]):
    clean_text.append(i["text"])
    corrupted_text.append(j["text"])
    labels.append(i["label"])

In [11]:
df = pd.DataFrame({"text": clean_text, "label": labels, "closest_prototype": top_protos, "closest_prototype_dist": top_dist, "permuted_text": corrupted_text, "permuted_closest_prototype": top_protos_adv, "permuted_closest_prototype_dist": top_dist_adv})
df["prototype_changed"] = df["closest_prototype"] != df["permuted_closest_prototype"]

df.to_csv(output_file_name, index=False)