In [1]:
import pandas as pd
import numpy as np
import json
import os

In [2]:
curr_directory = "/scratch/zhivar/robust-prototype-learning"

In [3]:
with open(
    os.path.join(
        curr_directory, "PBN_transformer", "all_results_from_pbn_models_static.json"
    )
) as f:
    pbns_data = json.load(f)
    f.close()

In [4]:
with open(
    os.path.join(
        curr_directory, "normal_models", "all_results_from_non_pbn_models_static.json"
    )
) as f:
    non_pbns_data = json.load(f)
    f.close()

In [5]:
with open(
    os.path.join(
        curr_directory,
        "normal_models",
        "all_results_from_non_pbn_models_static_augmented_training.json",
    )
) as f:
    non_pbns_data_augmented = json.load(f)
    f.close()

In [6]:
all_comments = []
all_architectures = []
all_datasets = []
all_attack_types = []
all_p1_lambs = []
all_p2_lambs = []
all_p3_lambs = []
all_num_protos = []
all_conditions = []
all_accs = []
for item in pbns_data:
    test_accuracy = item["results"]["test"][f"test_{item['attack_type']}"]["accuracy"]
    if test_accuracy < 0.5:
        continue
    for condition in ["test", "adv"]:
        all_comments.append("PBN")
        all_architectures.append(item["architecture"])
        all_datasets.append(item["dataset"])
        all_attack_types.append(item["attack_type"])
        all_p1_lambs.append(item["p1_lamb"])
        all_p2_lambs.append(item["p2_lamb"])
        all_p3_lambs.append(item["p3_lamb"])
        all_num_protos.append(item["num_proto"])
        all_conditions.append(condition)
        all_accs.append(
            item["results"][condition][f"{condition}_{item['attack_type']}"]["accuracy"]
        )
pbns_data_df = pd.DataFrame(
    {
        "comment": all_comments,
        "architecture": all_architectures,
        "dataset": all_datasets,
        "attack_type": all_attack_types,
        "p1_lamb": all_p1_lambs,
        "p2_lamb": all_p2_lambs,
        "p3_lamb": all_p3_lambs,
        "num_protos": all_num_protos,
        "condition": all_conditions,
        "accuracy": all_accs,
    }
)
pbns_data_df.head(3)

Unnamed: 0,comment,architecture,dataset,attack_type,p1_lamb,p2_lamb,p3_lamb,num_protos,condition,accuracy
0,PBN,BART,dbpedia,pwws,0.0,0.0,0.0,4,test,0.766595
1,PBN,BART,dbpedia,pwws,0.0,0.0,0.0,4,adv,0.478944
2,PBN,BART,dbpedia,pwws,0.0,0.0,0.9,8,test,0.942184


In [7]:
def get_architecture_from_full_name(checkpoint):
    if "bart" in checkpoint.lower():
        return "BART"
    elif "electra" in checkpoint.lower():
        return "ELECTRA"
    elif "bert" in checkpoint.lower():
        return "BERT"
    else:
        raise ValueError("Unknown architecture")

In [8]:
all_comments = []
all_architectures = []
all_datasets = []
all_attack_types = []
all_conditions = []
all_accs = []
for item in non_pbns_data:
    test_accuracy = item["results"]["test"][f"test_{item['attack_type']}"][
        "eval_accuracy"
    ]
    if test_accuracy < 0.5:
        continue
    for condition in ["test", "adv"]:
        all_comments.append("non_PBN")
        all_architectures.append(get_architecture_from_full_name(item["architecture"]))
        all_datasets.append(item["dataset"])
        all_attack_types.append(item["attack_type"])
        all_conditions.append(condition)
        all_accs.append(
            item["results"][condition][f"{condition}_{item['attack_type']}"][
                "eval_accuracy"
            ]
        )
non_pbns_data_df = pd.DataFrame(
    {
        "comment": all_comments,
        "architecture": all_architectures,
        "dataset": all_datasets,
        "attack_type": all_attack_types,
        "condition": all_conditions,
        "accuracy": all_accs,
    }
)
non_pbns_data_df.head(3)

Unnamed: 0,comment,architecture,dataset,attack_type,condition,accuracy
0,non_PBN,BART,dbpedia,textfooler,test,0.986928
1,non_PBN,BART,dbpedia,textfooler,adv,0.712963
2,non_PBN,BART,dbpedia,textbugger,test,0.960187


In [9]:
all_comments = []
all_architectures = []
all_datasets = []
all_attack_types = []
all_conditions = []
all_accs = []
for item in non_pbns_data_augmented:
    test_accuracy = item["results"]["test"]["eval_accuracy"]
    if test_accuracy < 0.5:
        continue

    all_comments.append("non_PBN_augmented")
    all_architectures.append(get_architecture_from_full_name(item["architecture"]))
    all_datasets.append(item["dataset"])
    all_attack_types.append(item["attack_type"])
    all_conditions.append("adv")
    all_accs.append(item["results"]["test"]["eval_accuracy"])
non_pbns_data_augmented_df = pd.DataFrame(
    {
        "comment": all_comments,
        "architecture": all_architectures,
        "dataset": all_datasets,
        "attack_type": all_attack_types,
        "condition": all_conditions,
        "accuracy": all_accs,
    }
)
non_pbns_data_augmented_df.head(3)

Unnamed: 0,comment,architecture,dataset,attack_type,condition,accuracy
0,non_PBN_augmented,BART,dbpedia,textfooler,adv,0.939759
1,non_PBN_augmented,BART,dbpedia,textbugger,adv,0.887931
2,non_PBN_augmented,BART,dbpedia,deepwordbug,adv,0.796117


In [10]:
sub_df = non_pbns_data_df[non_pbns_data_df["condition"] == "test"].copy()
sub_df["comment"] = "non_PBN_augmented"
sub_df = sub_df[sub_df["dataset"] != "sst2"]
non_pbns_data_augmented_df["condition"] = "adv"
non_pbns_data_augmented_df = pd.concat([non_pbns_data_augmented_df, sub_df])
non_pbns_data_augmented_df.head(3)

Unnamed: 0,comment,architecture,dataset,attack_type,condition,accuracy
0,non_PBN_augmented,BART,dbpedia,textfooler,adv,0.939759
1,non_PBN_augmented,BART,dbpedia,textbugger,adv,0.887931
2,non_PBN_augmented,BART,dbpedia,deepwordbug,adv,0.796117


In [11]:
non_pbns_data_augmented_df.shape, non_pbns_data_df.shape, pbns_data_df.shape

((90, 6), (96, 6), (1444, 10))

# comparing the PBN with non PBN

In [12]:
pbns_data_df_summary = (
    pbns_data_df.groupby(["comment", "architecture", "dataset", "attack_type"])
    .apply(
        lambda x: pd.Series(
            {
                "adv": x[x["condition"] == "adv"]["accuracy"].max(),
                "test": x[x["condition"] == "test"]["accuracy"].max(),
                "difference": x[x["condition"] == "adv"]["accuracy"].max()
                - x[x["condition"] == "test"]["accuracy"].max(),
            }
        )
    )
    .reset_index()
)

non_pbns_data_df_summary = (
    non_pbns_data_df.groupby(["comment", "architecture", "dataset", "attack_type"])
    .apply(
        lambda x: pd.Series(
            {
                "adv": x[x["condition"] == "adv"]["accuracy"].mean(),
                "test": x[x["condition"] == "test"]["accuracy"].mean(),
                "difference": x[x["condition"] == "adv"]["accuracy"].mean()
                - x[x["condition"] == "test"]["accuracy"].mean(),
            }
        )
    )
    .reset_index()
)

merged_df = pd.merge(
    pbns_data_df_summary,
    non_pbns_data_df_summary,
    on=["architecture", "dataset", "attack_type"],
    how="inner",
    suffixes=("_pbn", "_non_pbn"),
)

print(
    "ratio that adv_pbn is higher than test_pbn using the best hyperparameters",
    (merged_df["adv_pbn"] > merged_df["adv_non_pbn"]).sum() / merged_df.shape[0],
)

ratio that adv_pbn is higher than test_pbn using the best hyperparameters 0.9375


In [13]:
pbns_data_df_summary = (
    pbns_data_df.groupby(["comment", "architecture", "dataset", "attack_type"])
    .apply(
        lambda x: pd.Series(
            {
                "adv": x[x["condition"] == "adv"]["accuracy"].mean(),
                "test": x[x["condition"] == "test"]["accuracy"].mean(),
                "difference": x[x["condition"] == "adv"]["accuracy"].mean()
                - x[x["condition"] == "test"]["accuracy"].mean(),
            }
        )
    )
    .reset_index()
)

non_pbns_data_df_summary = (
    non_pbns_data_df.groupby(["comment", "architecture", "dataset", "attack_type"])
    .apply(
        lambda x: pd.Series(
            {
                "adv": x[x["condition"] == "adv"]["accuracy"].mean(),
                "test": x[x["condition"] == "test"]["accuracy"].mean(),
                "difference": x[x["condition"] == "adv"]["accuracy"].mean()
                - x[x["condition"] == "test"]["accuracy"].mean(),
            }
        )
    )
    .reset_index()
)

merged_df = pd.merge(
    pbns_data_df_summary,
    non_pbns_data_df_summary,
    on=["architecture", "dataset", "attack_type"],
    how="inner",
    suffixes=("_pbn", "_non_pbn"),
)

print(
    "ratio that adv_pbn is higher than test_pbn averaging over all the hyperparameters",
    (merged_df["adv_pbn"] > merged_df["adv_non_pbn"]).sum() / merged_df.shape[0],
)

ratio that adv_pbn is higher than test_pbn averaging over all the hyperparameters 0.3125


# Comparison between the non_pbn that also has augmented training and pbn

In [14]:
pbns_data_df_summary = (
    pbns_data_df.groupby(["comment", "architecture", "dataset", "attack_type"])
    .apply(
        lambda x: pd.Series(
            {
                "adv": x[x["condition"] == "adv"]["accuracy"].max(),
                "test": x[x["condition"] == "test"]["accuracy"].max(),
                "difference": x[x["condition"] == "adv"]["accuracy"].max()
                - x[x["condition"] == "test"]["accuracy"].max(),
            }
        )
    )
    .reset_index()
)

non_pbns_data_augmented_df_summary = (
    non_pbns_data_augmented_df.groupby(
        ["comment", "architecture", "dataset", "attack_type"]
    )
    .apply(
        lambda x: pd.Series(
            {
                "adv": x[x["condition"] == "adv"]["accuracy"].mean(),
                "test": x[x["condition"] == "test"]["accuracy"].mean(),
                "difference": x[x["condition"] == "adv"]["accuracy"].mean()
                - x[x["condition"] == "test"]["accuracy"].mean(),
            }
        )
    )
    .reset_index()
)

merged_df = pd.merge(
    pbns_data_df_summary,
    non_pbns_data_augmented_df_summary,
    on=["architecture", "dataset", "attack_type"],
    how="inner",
    suffixes=("_pbn", "_non_pbn"),
)

print(
    "ratio that adv_pbn is higher than test_pbn (+augmented training) using the best hyperparameters",
    (merged_df["adv_pbn"] > merged_df["adv_non_pbn"]).sum() / merged_df.shape[0],
)

ratio that adv_pbn is higher than test_pbn (+augmented training) using the best hyperparameters 0.28888888888888886


In [15]:
pbns_data_df_summary = (
    pbns_data_df.groupby(["comment", "architecture", "dataset", "attack_type"])
    .apply(
        lambda x: pd.Series(
            {
                "adv": x[x["condition"] == "adv"]["accuracy"].mean(),
                "test": x[x["condition"] == "test"]["accuracy"].mean(),
                "difference": x[x["condition"] == "adv"]["accuracy"].mean()
                - x[x["condition"] == "test"]["accuracy"].mean(),
            }
        )
    )
    .reset_index()
)

non_pbns_data_augmented_df_summary = (
    non_pbns_data_augmented_df.groupby(
        ["comment", "architecture", "dataset", "attack_type"]
    )
    .apply(
        lambda x: pd.Series(
            {
                "adv": x[x["condition"] == "adv"]["accuracy"].mean(),
                "test": x[x["condition"] == "test"]["accuracy"].mean(),
                "difference": x[x["condition"] == "adv"]["accuracy"].mean()
                - x[x["condition"] == "test"]["accuracy"].mean(),
            }
        )
    )
    .reset_index()
)

merged_df = pd.merge(
    pbns_data_df_summary,
    non_pbns_data_augmented_df_summary,
    on=["architecture", "dataset", "attack_type"],
    how="inner",
    suffixes=("_pbn", "_non_pbn"),
)

print(
    "ratio that adv_pbn is higher than test_pbn (+augmented training) averaging over all the hyperparameters",
    (merged_df["adv_pbn"] > merged_df["adv_non_pbn"]).sum() / merged_df.shape[0],
)

ratio that adv_pbn is higher than test_pbn (+augmented training) averaging over all the hyperparameters 0.08888888888888889
