In [None]:
import numpy as np
import os
import os.path as osp
# from pdb import set_trace as st
import copy
from matplotlib import pyplot as plt
plt.ion()

def is_number(s):
    try:
        float(s)
        return True
    except ValueError:
        pass

    return False

def load_similarity(fpath):
    def is_number(s):
        try:
            float(s)
            return True
        except ValueError:
            pass

        return False
    all_exp, cur_exp = [], None
    with open(fpath) as f:
        lines = f.readlines()
        for line in lines:
            if not line:
                continue
            if is_number(line[0]):
                id, name, parent = line.split()
                cur_exp = {
                    "id": id,
                    "name": name,
                    "parent_name": None,
                    "parent_sim": None,
                    "dissim": [],
                }
                all_exp.append(cur_exp)
            elif line[0] == "p":
                parent_sim, score, parent = line.split()
                assert parent_sim == "parent_sim:"
                score = 1 - float(score)
                all_exp[-1]["parent_sim"] = score
                all_exp[-1]["parent_name"] = parent
            elif line[0] == "r":
                ref_sim, sim_score, gap, name = line.split()
                assert ref_sim == "ref_sim:"
                sim_score = 1 - float(sim_score)
                all_exp[-1]["dissim"].append(sim_score)
            elif line[0] == "[":
                error, ref_sim, sim_score, gap, name = line.split()
                assert ref_sim == "ref_sim:" and error == "[ERROR]"
                sim_score = 1 - float(sim_score)
                all_exp[-1]["dissim"].append(sim_score)
            else:
                pass
                # print(line)
    
    # filter unnecessary models
    filtered_exp = []
    for info in all_exp:
        name = info['name']
        if 'steal' in name:
            arch1 = name[name.find('(')+1:name.find(',')]
            arch2 = name[name.rfind('(')+1:name.rfind(')')]
            if arch1 == arch2:
                continue
        if 'quantize(float16)' in name:
            continue
        filtered_exp.append(info)
        
    return filtered_exp

all_exp = load_similarity("modeldiff_correctness_high_divergence_high_diversity_eps0.06.txt")

In [None]:
# draw similarity distribution

for arch in ["mbnetv2", "resnet18"]:
    transfer_sim = {
        "transfer": {
                0.1: [],
                0.5: [],
                1: [],
            },
        "prune": {
            0.2: [],
            0.5: [],
            0.8: [],
            },
        "distill": [],
        "steal": {
            "homo": [],
            "heter": [],
        },
        "quant": [],
    }
    transfer_dissim = copy.deepcopy(transfer_sim)
    transfer_dissim_nomean = copy.deepcopy(transfer_sim)

    for info in all_exp:
        root = info["name"].split('-')[0]
        if arch not in root:
            # if "resnet18" in root:
            #     st()
            continue
        name = info["name"]
        components = info["name"].split('-')[:-1]
        last_type = components[-1]
        sim_score = info["parent_sim"]
        dissim_score = np.mean(info["dissim"])
        # print(last_type)
        if "transfer" in last_type:
            ratio = float(last_type.split(',')[-1][:-1])
            transfer_sim["transfer"][ratio].append(sim_score)
            # transfer_dissim["transfer"][ratio] += scores["dis_score"]
            transfer_dissim["transfer"][ratio].append(dissim_score)
            transfer_dissim_nomean["transfer"][ratio] += info["dissim"]
        elif "quant" in last_type:
            transfer_sim["quant"].append(sim_score)
            transfer_dissim["quant"].append(dissim_score)
            transfer_dissim_nomean["quant"] += info["dissim"]
        elif "prune" in last_type:
            ratio = float(last_type.split('(')[-1].split(')')[0])
            transfer_sim["prune"][ratio].append(sim_score)
            # transfer_dissim["prune"][ratio] += scores["dis_score"]
            transfer_dissim["prune"][ratio].append(dissim_score)
            transfer_dissim_nomean["prune"][ratio] += info["dissim"]
        elif "distill" in last_type:
            transfer_sim["distill"].append(sim_score)
            # transfer_dissim["distill"] += scores["dis_score"]
            transfer_dissim["distill"].append(dissim_score)
            transfer_dissim_nomean["distill"] += info["dissim"]
        elif "steal" in last_type:
            s_arch = last_type.split('(')[-1].split(')')[0]
            t_arch = components[0].split('(')[-1].split(')')[0].split(',')[0]
            if s_arch == t_arch:
                print(" homo  ", info["name"], dissim_score)
                transfer_sim["steal"]["homo"].append(sim_score)
                # transfer_dissim["steal"]["homo"] += scores["dis_score"]
                transfer_dissim["steal"]["homo"].append(dissim_score)
                transfer_dissim_nomean["steal"]["homo"] += info["dissim"]
            else:
                print("heter  ", info["name"], dissim_score)
                transfer_sim["steal"]["heter"].append(sim_score)
                # transfer_dissim["steal"]["heter"] += scores["dis_score"]
                transfer_dissim["steal"]["heter"].append(dissim_score)
                transfer_dissim_nomean["steal"]["heter"] += info["dissim"]
        else:
            st()
        
    plt.figure(figsize=(8,6))
    plt_setting = {
        'color': 'blue',
        'alpha': 0.5,
        'marker': 'o',
    }
    dis_plt_setting = {
        'color': 'red',
        'alpha': 0.5,
        'marker': 'x',
    }
    transfer_dissim = transfer_dissim_nomean

    IDX = 1
    target_list = transfer_sim['transfer'][0.1]
    l = len(target_list)
    plt.scatter([IDX]*l, target_list, **plt_setting)
    target_list = transfer_dissim['transfer'][0.1]
    l = len(target_list)
    plt.scatter([IDX]*l, target_list, **dis_plt_setting)

    IDX = 2
    target_list = transfer_sim['transfer'][0.5]
    l = len(target_list)
    plt.scatter([IDX]*l, target_list, **plt_setting)
    target_list = transfer_dissim['transfer'][0.5]
    l = len(target_list)
    plt.scatter([IDX]*l, target_list, **dis_plt_setting)

    IDX = 3
    target_list = transfer_sim['transfer'][1]
    l = len(target_list)
    plt.scatter([IDX]*l, target_list, **plt_setting)
    target_list = transfer_dissim['transfer'][1]
    l = len(target_list)
    plt.scatter([IDX]*l, target_list, **dis_plt_setting)

    IDX = 4
    target_list = transfer_sim['prune'][0.2]
    l = len(target_list)
    plt.scatter([IDX]*l, target_list, **plt_setting)
    target_list = transfer_dissim['prune'][0.2]
    l = len(target_list)
    plt.scatter([IDX]*l, target_list, **dis_plt_setting)

    IDX = 5
    target_list = transfer_sim['prune'][0.5]
    l = len(target_list)
    plt.scatter([IDX]*l, target_list, **plt_setting)
    target_list = transfer_dissim['prune'][0.5]
    l = len(target_list)
    plt.scatter([IDX]*l, target_list, **dis_plt_setting)

    IDX = 6
    target_list = transfer_dissim['prune'][0.8]
    l = len(target_list)
    plt.scatter([IDX]*l, target_list, **dis_plt_setting)
    target_list = transfer_sim['prune'][0.8]
    l = len(target_list)
    plt.scatter([IDX]*l, target_list, **plt_setting)


    IDX = 7
    target_list = transfer_sim['quant']
    l = len(target_list)
    plt.scatter([IDX]*l, target_list, **plt_setting)
    target_list = transfer_dissim['quant']
    l = len(target_list)
    plt.scatter([IDX]*l, target_list, **dis_plt_setting)

    IDX = 8
    target_list = transfer_dissim['distill']
    l = len(target_list)
    plt.scatter([IDX]*l, target_list, **dis_plt_setting)
    target_list = transfer_sim['distill']
    l = len(target_list)
    plt.scatter([IDX]*l, target_list, **plt_setting)


    IDX = 9
    target_list = transfer_dissim['steal']['heter']
    l = len(target_list)
    plt.scatter([IDX]*l, target_list, **dis_plt_setting)
    target_list = transfer_sim['steal']['heter']
    l = len(target_list)
    plt.scatter([IDX]*l, target_list, **plt_setting)

    # IDX = 10
    # target_list = transfer_dissim['steal']['homo']
    # l = len(target_list)
    # plt.scatter([IDX]*l, target_list, **dis_plt_setting)
    # target_list = transfer_sim['steal']['homo']
    # l = len(target_list)
    # plt.scatter([IDX]*l, target_list, **plt_setting)

    FONTSIZE=25

    plt.ylabel("Similarity", fontsize=FONTSIZE)
    x_names = [
        "Transfer-0.1", "Transfer-0.5", "Transfer-1.0",
        "Prune-0.2", "Prune-0.5", "Prune-0.8",
        "Quant", "Distill", "Steal",
    ]
    x_idx = list(range(1,11))
    plt.xticks(x_idx, x_names, rotation=90, fontsize=FONTSIZE)
    plt.yticks(fontsize=FONTSIZE)

    plt.tight_layout()
    path = osp.join( f"similarity_dist_{arch}.png")
    # plt.show()
    plt.savefig(path)
    plt.clf()


In [None]:
import pickle

path = "Flower102_acc.pkl"
with open(path, "rb") as f:
    acc_dict = pickle.load(f)
dataset_names = ["Flower102", "SDog120"]

# all_exp = load_similarity()
all_exp_model_info = {}
model_info = {}
for item in all_exp:
    name = item["name"]
    all_exp_model_info[item["name"]] = item
    if item["name"] in acc_dict:
        item["acc"] = acc_dict[item["name"]]
        model_info[item["name"]] = item
    if 'quantize' in name:
        parent_name = name[:name.rfind('quantize')]
        # print(parent_name)
        if parent_name not in acc_dict:
            continue
        item['acc'] = acc_dict[parent_name]
        model_info[name] = item

for dataset_key in dataset_names:
    sim, acc = [], []
    data = {
        "transfer": {
            "acc": [],
            "sim": [],
        },
        "prune": {
            "acc": [],
            "sim": [],
        },
        "distill": {
            "acc": [],
            "sim": [],
        },
        "steal": {
            "acc": [],
            "sim": [],
        },
        "quantize": {
            "acc": [],
            "sim": [],
        },
        "train": {
            "acc": [],
            "sim": [],
        }
    }
    for name in model_info.keys():
        if dataset_key in name:
            # print(name)
            # if "pretrain" not in name:
            #     data["train"]["acc"].append(acc_dict[name])
            #     data["train"]["sim"].append(model_info[name]["parent_sim"])
            if "prune" in name:
                data["prune"]["acc"].append(acc_dict[name])
                data["prune"]["sim"].append(model_info[name]["parent_sim"])
            elif "distill" in name:
                data["distill"]["acc"].append(acc_dict[name])
                data["distill"]["sim"].append(model_info[name]["parent_sim"])
            elif "steal" in name:
                data["steal"]["acc"].append(acc_dict[name])
                data["steal"]["sim"].append(model_info[name]["parent_sim"])
            elif "quantize" in name:
                data["quantize"]["acc"].append(model_info[name]["acc"])
                data["quantize"]["sim"].append(model_info[name]["parent_sim"])
            else:
                print(name)
                data["transfer"]["acc"].append(acc_dict[name])
                data["transfer"]["sim"].append(model_info[name]["parent_sim"])

        
    # FONTSIZE=40
    # MARKERSIZE=200
    FONTSIZE=20
    MARKERSIZE=100

    plt.figure(figsize=(8,6))
    # plt.scatter(data["train"]["sim"], data["train"]["acc"], marker='o', 
    #     color='blue', alpha=0.7, s=MARKERSIZE, label="Retrain")
    plt.scatter(data["quantize"]["sim"], data["quantize"]["acc"], marker='o', 
        color='blue', alpha=0.7, s=MARKERSIZE, label="Quantize")
    plt.scatter(data["transfer"]["sim"], data["transfer"]["acc"], marker='x', 
        color='red', alpha=0.7, s=MARKERSIZE, label="Transfer")
    plt.scatter(data["prune"]["sim"], data["prune"]["acc"], marker='*', 
        color='green', alpha=0.7, s=MARKERSIZE, label="Prune")
    plt.scatter(data["distill"]["sim"], data["distill"]["acc"], marker='D', 
        color='gray', alpha=0.7, s=MARKERSIZE, label="Distill")
    plt.scatter(data["steal"]["sim"], data["steal"]["acc"], marker='^', 
        color='orangered', alpha=0.7, s=MARKERSIZE, label="Steal")

    plt.ylabel("Accuracy", fontsize=FONTSIZE)
    plt.yticks(fontsize=FONTSIZE)
    plt.xlabel("Similarity", fontsize=FONTSIZE)
    plt.xticks(fontsize=FONTSIZE)
    plt.legend(prop={"size": 20})

    plt.tight_layout()
    name = f"sim_acc_{dataset_key}.png"
    path = osp.join(name)
    print(name)
    # plt.show()
    plt.savefig(path)
    plt.clf()

In [None]:
def compute_correctness(all_exp):
    correctness_arr = []
    for info in all_exp:
        # print(info)
        name = info["name"]
        root = info["name"].split('-')[0]
        components = info["name"].split('-')[:-1]
        # print(name)
        last_type = components[-1]
        sim_score = info["parent_sim"]
        dissim_score_max = np.max(info["dissim"])
        correct = sim_score > dissim_score_max
        correctness_arr.append(correct)
        # print(f'correct:{correct} {sim_score} {dissim_score_max} {name}')

    correctness = np.array(correctness_arr).sum() / len(correctness_arr)
    print(f'correctness:{correctness:.4f} len:{len(correctness_arr)}')
    return correctness

all_exp = load_similarity("modeldiff_correctness_high_divergence_high_diversity_eps0.06_randseed.txt")
compute_correctness(all_exp)
all_exp = load_similarity("modeldiff_correctness_high_divergence_high_diversity_eps0.06_10inputs.txt")
compute_correctness(all_exp)
all_exp = load_similarity("modeldiff_correctness_high_divergence_high_diversity_eps0.06_50inputs.txt")
compute_correctness(all_exp)
all_exp = load_similarity("modeldiff_correctness_high_divergence_high_diversity_eps0.06.txt")
compute_correctness(all_exp)
all_exp = load_similarity("modeldiff_correctness_high_divergence_high_diversity_eps0.06_200inputs.txt")
compute_correctness(all_exp)
all_exp = load_similarity("modeldiff_correctness_input_nodiversity_output_nodiversity.txt")
compute_correctness(all_exp)
all_exp = load_similarity("modeldiff_correctness_high_divergence_high_diversity_eps0.06_all_normal.txt")
compute_correctness(all_exp)
all_exp = load_similarity("modeldiff_correctness_high_divergence_high_diversity_eps0.06_all_adv.txt")
compute_correctness(all_exp)

In [None]:
name = 'pretrain(resnet18,ImageNet)-transfer(Flower102,0.5)-quantize(float16)-'
name[name.find('(')+1:name.find(',')]
name[name.rfind('(')+1:name.rfind(')')]