In [1]:
import pandas
import os
import numpy as np
import torch
import gates_models as gm
import pickle
import utils as ut
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.linear_model import LogisticRegression
from sklearn.calibration import CalibratedClassifierCV
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.metrics import make_scorer
from sklearn.metrics import roc_auc_score
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import pairwise_distances
from sklearn.linear_model import LogisticRegression

In [2]:
round_suffix = 'r6'
subset = 'all'
path = f'models_{round_suffix}'
main_path = os.path.join(path, 'round6-train-dataset') if round_suffix == 'r6' else os.path.join(path, 'round5-train-dataset')
models_path = os.path.join(main_path, 'models')
metadata_file = 'METADATA.csv'    
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
df = pandas.read_csv(os.path.join(main_path, metadata_file))
torch.backends.cudnn.enabled=False
use_amp = False # True if torch.cuda.is_available() else False # attempt to use mixed precision to accelerate embedding conversion process
# ut.write_embeddings_on_file(df, main_path, models_path, round_suffix=round_suffix, use_amp=use_amp)

In [None]:
reg_types = ['sparse']
threshold = (0.95, 0.7); gg = 'all'
# threshold = (1, 0.7); gg = 'per_sample'

for reg_type in reg_types:
    print(f'Reg Type: {reg_type}')
    cdrp_hgates_all_params = {'threshold':threshold, 'start':0.05, 'iter':50, 'lr':0.1, 'eps':1e-2, 'gate_type':'hidden', 'reg_type':reg_type, 'gate_granularity':gg, 'subset':subset}
    hgates_all, hgates_all_accs, hgates_all_gammas, class_indices, model_labels, trigger_targets = gm.apply_cdrp_on_all_models(df, main_path, models_path, cdrp_hgates_all_params, round_suffix, use_amp, device)

    cdrp_igates_all_params = {'threshold':threshold, 'start':0.05, 'iter':50, 'lr':0.1, 'eps':1e-2, 'gate_type':'input', 'reg_type':reg_type, 'gate_granularity':gg, 'subset':subset}
    igates_all, igates_all_accs, igates_all_gammas, _, _, _ = gm.apply_cdrp_on_all_models(df, main_path, models_path, cdrp_igates_all_params, round_suffix, use_amp, device)

    with open(os.path.join(f'data_{round_suffix}', f'{gg}_{reg_type}_data.pickle'), 'wb') as handle:
        pickle.dump((hgates_all, hgates_all_accs, hgates_all_gammas, igates_all, igates_all_accs, igates_all_gammas, class_indices, model_labels, trigger_targets), handle, protocol=pickle.HIGHEST_PROTOCOL)

In [3]:
gg = 'all'
round_suffix = 'r5'

with open(os.path.join(f'data_{round_suffix}', f'{gg}_l1_data.pickle'), 'rb') as handle:
    all_l1_r5 =  pickle.load(handle)

with open(os.path.join(f'data_{round_suffix}', f'{gg}_l2_data.pickle'), 'rb') as handle:
    all_l2_r5 =  pickle.load(handle)

with open(os.path.join(f'data_{round_suffix}', f'{gg}_sparse_data.pickle'), 'rb') as handle:
    all_sp_r5 =  pickle.load(handle)

round_suffix = 'r6'

with open(os.path.join(f'data_{round_suffix}', f'{gg}_l1_data.pickle'), 'rb') as handle:
    all_l1_r6 =  pickle.load(handle)

with open(os.path.join(f'data_{round_suffix}', f'{gg}_l2_data.pickle'), 'rb') as handle:
    all_l2_r6 =  pickle.load(handle)

with open(os.path.join(f'data_{round_suffix}', f'{gg}_sparse_data.pickle'), 'rb') as handle:
    all_sp_r6 =  pickle.load(handle)

In [4]:
d = lambda x: np.linalg.norm(x - 1, ord=1)
dd = lambda x, y: np.linalg.norm(x - y, ord=1)
s = lambda x: len(np.where(x == 0)[0])/len(x)

model_labels_r5 = all_l1_r5[7]

hconfs_l1_r5 = np.array([[all_l1_r5[1][idx][0][1], all_l1_r5[1][idx][1][1], all_l1_r5[1][idx][2][1]] for idx in range(len(model_labels_r5))])
iconfs_l1_r5 = np.array([[all_l1_r5[4][idx][0][1], all_l1_r5[4][idx][1][1], all_l1_r5[4][idx][2][1]] for idx in range(len(model_labels_r5))])
hgammas_l1_r5 = np.array([[all_l1_r5[2][idx][0], all_l1_r5[2][idx][1]] for idx in range(len(model_labels_r5))])
igammas_l1_r5 = np.array([[all_l1_r5[5][idx][0], all_l1_r5[5][idx][1]] for idx in range(len(model_labels_r5))])
hgates_l1_r5 = np.array([[d(all_l1_r5[0][idx][0]), d(all_l1_r5[0][idx][1]), dd(all_l1_r5[0][idx][0], all_l1_r5[0][idx][1])] for idx in range(len(model_labels_r5))])
igates_l1_r5 = np.array([[d(all_l1_r5[3][idx][0]), d(all_l1_r5[3][idx][1]), dd(all_l1_r5[3][idx][0], all_l1_r5[3][idx][1])] for idx in range(len(model_labels_r5))])

# l1_r5 = np.hstack((hconfs_l1_r5, iconfs_l1_r5, hgammas_l1_r5, igammas_l1_r5, hgates_l1_r5, igates_l1_r5))
l1_r5 = np.hstack((iconfs_l1_r5, igammas_l1_r5, igates_l1_r5))

hconfs_l2_r5 = np.array([[all_l2_r5[1][idx][0][1], all_l2_r5[1][idx][1][1], all_l2_r5[1][idx][2][1]] for idx in range(len(model_labels_r5))])
iconfs_l2_r5 = np.array([[all_l2_r5[4][idx][0][1], all_l2_r5[4][idx][1][1], all_l2_r5[4][idx][2][1]] for idx in range(len(model_labels_r5))])
hgammas_l2_r5 = np.array([[all_l2_r5[2][idx][0], all_l2_r5[2][idx][1]] for idx in range(len(model_labels_r5))])
igammas_l2_r5 = np.array([[all_l2_r5[5][idx][0], all_l2_r5[5][idx][1]] for idx in range(len(model_labels_r5))])
hgates_l2_r5 = np.array([[d(all_l2_r5[0][idx][0]), d(all_l2_r5[0][idx][1]), dd(all_l2_r5[0][idx][0], all_l2_r5[0][idx][1])] for idx in range(len(model_labels_r5))])
igates_l2_r5 = np.array([[d(all_l2_r5[3][idx][0]), d(all_l2_r5[3][idx][1]), dd(all_l2_r5[3][idx][0], all_l2_r5[3][idx][1])] for idx in range(len(model_labels_r5))])

# l2_r5 = np.hstack((hconfs_l2_r5, iconfs_l2_r5, hgammas_l2_r5, igammas_l2_r5, hgates_l2_r5, igates_l2_r5))
l2_r5 = np.hstack((iconfs_l2_r5, igammas_l2_r5, igates_l2_r5))


hconfs_sp_r5 = np.array([[all_sp_r5[1][idx][0][1], all_sp_r5[1][idx][1][1], all_sp_r5[1][idx][2][1]] for idx in range(len(model_labels_r5))])
iconfs_sp_r5 = np.array([[all_sp_r5[4][idx][0][1], all_sp_r5[4][idx][1][1], all_sp_r5[4][idx][2][1]] for idx in range(len(model_labels_r5))])
hgammas_sp_r5 = np.array([[all_sp_r5[2][idx][0], all_sp_r5[2][idx][1], all_sp_r5[2][idx][2]] for idx in range(len(model_labels_r5))])
igammas_sp_r5 = np.array([[all_sp_r5[5][idx][0], all_sp_r5[5][idx][1], all_sp_r5[5][idx][2]] for idx in range(len(model_labels_r5))])
hgates_sp_r5 = np.array([[s(all_sp_r5[0][idx][0]), s(all_sp_r5[0][idx][1]), s(all_sp_r5[0][idx][2])] for idx in range(len(model_labels_r5))])
igates_sp_r5 = np.array([[s(all_sp_r5[3][idx][0]), s(all_sp_r5[3][idx][1]), s(all_sp_r5[3][idx][2])] for idx in range(len(model_labels_r5))])

# sp_r5 = np.hstack((hconfs_sp_r5, iconfs_sp_r5, hgammas_sp_r5, igammas_sp_r5, hgates_sp_r5, igates_sp_r5))
sp_r5 = np.hstack((iconfs_sp_r5, igammas_sp_r5, igates_sp_r5))

data_r5 = np.hstack((l1_r5, l2_r5, sp_r5))
print(data_r5.shape)

(1656, 25)


In [5]:
model_labels_r6 = all_l1_r6[7]

hconfs_l1_r6 = np.array([[all_l1_r6[1][idx][0][1], all_l1_r6[1][idx][1][1], all_l1_r6[1][idx][2][1]] for idx in range(len(model_labels_r6))])
iconfs_l1_r6 = np.array([[all_l1_r6[4][idx][0][1], all_l1_r6[4][idx][1][1], all_l1_r6[4][idx][2][1]] for idx in range(len(model_labels_r6))])
hgammas_l1_r6 = np.array([[all_l1_r6[2][idx][0], all_l1_r6[2][idx][1]] for idx in range(len(model_labels_r6))])
igammas_l1_r6 = np.array([[all_l1_r6[5][idx][0], all_l1_r6[5][idx][1]] for idx in range(len(model_labels_r6))])
hgates_l1_r6 = np.array([[d(all_l1_r6[0][idx][0]), d(all_l1_r6[0][idx][1]), dd(all_l1_r6[0][idx][0], all_l1_r6[0][idx][1])] for idx in range(len(model_labels_r6))])
igates_l1_r6 = np.array([[d(all_l1_r6[3][idx][0]), d(all_l1_r6[3][idx][1]), dd(all_l1_r6[3][idx][0], all_l1_r6[3][idx][1])] for idx in range(len(model_labels_r6))])

#l1_r6 = np.hstack((hconfs_l1_r6, iconfs_l1_r6, hgammas_l1_r6, igammas_l1_r6, hgates_l1_r6, igates_l1_r6))
l1_r6 = np.hstack((iconfs_l1_r6, igammas_l1_r6, igates_l1_r6))


hconfs_l2_r6 = np.array([[all_l2_r6[1][idx][0][1], all_l2_r6[1][idx][1][1], all_l2_r6[1][idx][2][1]] for idx in range(len(model_labels_r6))])
iconfs_l2_r6 = np.array([[all_l2_r6[4][idx][0][1], all_l2_r6[4][idx][1][1], all_l2_r6[4][idx][2][1]] for idx in range(len(model_labels_r6))])
hgammas_l2_r6 = np.array([[all_l2_r6[2][idx][0], all_l2_r6[2][idx][1]] for idx in range(len(model_labels_r6))])
igammas_l2_r6 = np.array([[all_l2_r6[5][idx][0], all_l2_r6[5][idx][1]] for idx in range(len(model_labels_r6))])
hgates_l2_r6 = np.array([[d(all_l2_r6[0][idx][0]), d(all_l2_r6[0][idx][1]), dd(all_l2_r6[0][idx][0], all_l2_r6[0][idx][1])] for idx in range(len(model_labels_r6))])
igates_l2_r6 = np.array([[d(all_l2_r6[3][idx][0]), d(all_l2_r6[3][idx][1]), dd(all_l2_r6[3][idx][0], all_l2_r6[3][idx][1])] for idx in range(len(model_labels_r6))])

# l2_r6 = np.hstack((hconfs_l2_r6, iconfs_l2_r6, hgammas_l2_r6, igammas_l2_r6, hgates_l2_r6, igates_l2_r6))
l2_r6 = np.hstack((iconfs_l2_r6, igammas_l2_r6, igates_l2_r6))


hconfs_sp_r6 = np.array([[all_sp_r6[1][idx][0][1], all_sp_r6[1][idx][1][1], all_sp_r6[1][idx][2][1]] for idx in range(len(model_labels_r6))])
iconfs_sp_r6 = np.array([[all_sp_r6[4][idx][0][1], all_sp_r6[4][idx][1][1], all_sp_r6[4][idx][2][1]] for idx in range(len(model_labels_r6))])
hgammas_sp_r6 = np.array([[all_sp_r6[2][idx][0], all_sp_r6[2][idx][1], all_sp_r6[2][idx][2]] for idx in range(len(model_labels_r6))])
igammas_sp_r6 = np.array([[all_sp_r6[5][idx][0], all_sp_r6[5][idx][1], all_sp_r6[5][idx][2]] for idx in range(len(model_labels_r6))])
hgates_sp_r6 = np.array([[s(all_sp_r6[0][idx][0]), s(all_sp_r6[0][idx][1]), s(all_sp_r6[0][idx][2])] for idx in range(len(model_labels_r6))])
igates_sp_r6 = np.array([[s(all_sp_r6[3][idx][0]), s(all_sp_r6[3][idx][1]), s(all_sp_r6[3][idx][2])] for idx in range(len(model_labels_r6))])

# sp_r6 = np.hstack((hconfs_sp_r6, iconfs_sp_r6, hgammas_sp_r6, igammas_sp_r6, hgates_sp_r6, igates_sp_r6))
sp_r6 = np.hstack((iconfs_sp_r6, igammas_sp_r6, igates_sp_r6))

data_r6 = np.hstack((l1_r6, l2_r6, sp_r6))
print(data_r6.shape)

(48, 25)


In [9]:
X_train, X_test, y_train, y_test = train_test_split(data_r5, model_labels_r5, stratify=model_labels_r5, random_state=0)

# scoring = make_scorer(accuracy_score)
# parameters = {'learning_rate': [0.15,0.1,0.05,0.01,0.005,0.001],  'n_estimators': [100,250,500,750,1000,1250,1500], 'max_depth': [3,5,7]}
# clf = GridSearchCV(GradientBoostingClassifier(), parameters, scoring=scoring, refit=True, cv=2, n_jobs=-1).fit(X_train, y_train)
# print(f'Acc: {clf.score(X_test, y_test):.2f} - AUC: {roc_auc_score(y_test, clf.predict_proba(X_test)[:, 1]):.2f}')

clf = GradientBoostingClassifier(learning_rate=0.005, max_depth=3, n_estimators=1000).fit(X_train, y_train)
print(f'Acc: {clf.score(X_test, y_test):.2f} - AUC: {roc_auc_score(y_test, clf.predict_proba(X_test)[:, 1]):.2f}')

# clf = LogisticRegression(penalty='l2', C=10).fit(data, model_labels)

# save the model
with open('clf.pickle', 'wb') as handle:
    pickle.dump(clf, handle, protocol=pickle.HIGHEST_PROTOCOL)

# load the model
# with open('clf.pickle', 'rb') as handle:
#     clf = pickle.load(handle)

Acc: 0.65 - AUC: 0.74


In [10]:
print(f'Acc: {clf.score(data_r6, model_labels_r6):.2f} - AUC: {roc_auc_score(model_labels_r6, clf.predict_proba(data_r6)[:, 1]):.2f}')

Acc: 0.67 - AUC: 0.75


In [11]:
preds = clf.predict(data_r6) == model_labels_r6

for idx, _ in enumerate(df['model_name']):
    params = ut.read_model(df, idx, main_path, models_path)
    print(f'Idx: {idx} - Poisoned: {params[2]} - Embedding: {os.path.basename(params[6])} - Arch: {params[1]} Pred Correct: {preds[idx]}')


Idx: 0 - Poisoned: False - Embedding: GPT-2-gpt2.pt - Arch: LstmLinear Pred Correct: False
Idx: 1 - Poisoned: False - Embedding: GPT-2-gpt2.pt - Arch: GruLinear Pred Correct: False
Idx: 2 - Poisoned: False - Embedding: GPT-2-gpt2.pt - Arch: FCLinear Pred Correct: False
Idx: 3 - Poisoned: False - Embedding: GPT-2-gpt2.pt - Arch: LstmLinear Pred Correct: True
Idx: 4 - Poisoned: False - Embedding: GPT-2-gpt2.pt - Arch: GruLinear Pred Correct: True
Idx: 5 - Poisoned: False - Embedding: GPT-2-gpt2.pt - Arch: FCLinear Pred Correct: False
Idx: 6 - Poisoned: False - Embedding: DistilBERT-distilbert-base-uncased.pt - Arch: LstmLinear Pred Correct: True
Idx: 7 - Poisoned: False - Embedding: DistilBERT-distilbert-base-uncased.pt - Arch: GruLinear Pred Correct: False
Idx: 8 - Poisoned: False - Embedding: DistilBERT-distilbert-base-uncased.pt - Arch: FCLinear Pred Correct: True
Idx: 9 - Poisoned: False - Embedding: DistilBERT-distilbert-base-uncased.pt - Arch: LstmLinear Pred Correct: False
Idx: 10

In [12]:
clf.predict_proba(data_r6)

array([[0.4822843 , 0.5177157 ],
       [0.47775315, 0.52224685],
       [0.34523882, 0.65476118],
       [0.53922682, 0.46077318],
       [0.85626312, 0.14373688],
       [0.47316491, 0.52683509],
       [0.69386839, 0.30613161],
       [0.35771163, 0.64228837],
       [0.65394538, 0.34605462],
       [0.34195977, 0.65804023],
       [0.79910973, 0.20089027],
       [0.53117374, 0.46882626],
       [0.59982414, 0.40017586],
       [0.27931746, 0.72068254],
       [0.71099037, 0.28900963],
       [0.27444432, 0.72555568],
       [0.36757623, 0.63242377],
       [0.58488436, 0.41511564],
       [0.41184967, 0.58815033],
       [0.32395575, 0.67604425],
       [0.74898668, 0.25101332],
       [0.30931875, 0.69068125],
       [0.34949972, 0.65050028],
       [0.50056619, 0.49943381],
       [0.72874977, 0.27125023],
       [0.87205085, 0.12794915],
       [0.64650949, 0.35349051],
       [0.72610187, 0.27389813],
       [0.83036352, 0.16963648],
       [0.46993125, 0.53006875],
       [0.