In [1]:
import seaborn as sns
import torch,os
import matplotlib.ticker as ticker
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scipy
from collections import OrderedDict
sns.set_style('darkgrid')
from IPython.display import display
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
import seaborn as sns
import matplotlib.pyplot as plt
import time

## Loading Data

- Simply load the data by inputting `dataset`, `model`, `atk` values.
- Please Note that *tinyimagenet* has only *resnet18* + *PGD* support. 

In [14]:
dataset = 'cifar10' # [cifar10, svhn, tinyimagenet]
model = 'robust_resnet18' # [resnet18, robust_resnet18, mobilenet_ddb, mobilenet_trust, mobilenet_freq, mobilenet_random, robust_wideresnet, googlenet, vgg11]
split = 'test' # ['test', 'train']
atk = 'DeepFool' # [DeepFool, PGD]
samples=120
hf = 0 # [0,1] 0 is old version and 1 is new version

print(f'Dataset: {dataset} \t Model: {model} \t Attack: {atk} \t Split: {split} \t HF: {hf} \t samples: {samples}')
data = pd.read_csv(f'../csv_data/FAS_data/{dataset}/{model}_{split}_{atk}_{hf}.csv', index_col=False)
df = pd.DataFrame(data)
# len(df['Flipping_Freq'][df['Flipping_Freq'] == -1])
df['ddbs'] = df['ddbs'].fillna(1)
# df

Dataset: cifar10 	 Model: robust_resnet18 	 Attack: DeepFool 	 Split: test 	 HF: 0 	 samples: 120


## Compute T-Score

In [15]:
def get_trust_score(ddbs: list,Flipping_Freq: list) -> list:
    
    def normalize_list(x:list) -> list:
        x = np.array(x)
        if max(x)-min(x) == 0:
            return np.ones_like(x)
        return (x-min(x))/(max(x)-min(x))
    
    norm_Flipping_Freq = normalize_list(Flipping_Freq)
    norm_Flipping_Freq = 1-norm_Flipping_Freq
    # print(np.max(ddbs), max(ddbs))
    norm_ddbs = normalize_list(ddbs)
    
    # TScore = lambda x,y: (y+x)/2
    TScore = lambda x,y: (2*x*y)/(x+y+1e-5)
    # TScore = lambda x,y: (x+y)/2
    # TScore = lambda x,y: 1/(x+1e-8) + y
 

    T_score = TScore(norm_ddbs ,norm_Flipping_Freq)

    return T_score, norm_ddbs, norm_Flipping_Freq

T_score, norm_ddbs, norm_Flipping_Freq = get_trust_score(df['ddbs'].tolist() , df['Flipping_Freq'].tolist())
df['T_score'] = T_score
df['norm_ddbs'] = norm_ddbs
df['norm_Flipping_Freq'] = norm_Flipping_Freq
# df['T_score'] = df['T_score'].fillna(1)

In [19]:
def K_means_flagging(df, values,r):
    kmeans = KMeans(n_clusters=2, random_state=r)
    # print(np.isnan(np.array(df[values])).nonzero()) 
    df[f'flag_{values}'] = kmeans.fit_predict(np.array(df[values]).reshape(-1,1))
    c1,c2 = kmeans.cluster_centers_

    if c1< c2: df[f'flag_{values}'] = 1-df[f'flag_{values}']  ## we should flag those which are closer to smaller centroids

    filter_flag_ddbs = df[(df[f'flag_{values}']) == 1]
    total_flags = len(filter_flag_ddbs)
    correct_flags = len(filter_flag_ddbs[(filter_flag_ddbs['Model_preds'] != filter_flag_ddbs['gt'])])

    print(f'{values} \t\t correct_flags: {correct_flags}, total_flags: {total_flags}, %correct: {correct_flags*100/ total_flags:.2f}')
    return df

def GMM(df, values,r):
    gmm = GaussianMixture(n_components=2, random_state = r)
    df[f'flag_{values}'] = gmm.fit_predict(np.array(df[values]).reshape(-1,1))
    c1,c2 = gmm.means_

    if c1< c2: df[f'flag_{values}'] = 1-df[f'flag_{values}']  ## we should flag those which are closer to smaller centroids

    filter_flag_ddbs = df[(df[f'flag_{values}']) == 1]
    total_flags = len(filter_flag_ddbs)
    correct_flags = len(filter_flag_ddbs[(filter_flag_ddbs['Model_preds'] != filter_flag_ddbs['gt'])])

    print(f'{values} \t\t correct_flags: {correct_flags}, total_flags: {total_flags}, %correct: {correct_flags*100/ total_flags:.2f}')
    return df

print(f'Dataset: {dataset} \t Attack: {atk} \t Model: {model} \t split: {split}')

r = 0
df = K_means_flagging_recall(df,'norm_ddbs',r)
df = K_means_flagging_recall(df,'norm_Flipping_Freq',r)
df = K_means_flagging_recall(df,'T_score',r)


Dataset: cifar10 	 Attack: DeepFool 	 Model: robust_resnet18 	 split: test


TypeError: tuple indices must be integers or slices, not str

In [23]:
correct_flags, total_flags, flag_acc_t = eval_t_score_model('resnet18','PGD', 120, seed=5, values='T_score', verbose=False)
correct_flags, total_flags, flag_acc_t = eval_t_score_model('resnet18','PGD', 120, seed=5, values='norm_Flipping_Freq', verbose=False)


NameError: name 'eval_t_score_model' is not defined

## Do Cherrypicking

In [31]:
import seaborn as sns
import torch,os
import matplotlib.ticker as ticker
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scipy
from collections import OrderedDict
sns.set_style('darkgrid')
from IPython.display import display
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

def eval_t_score_model(model,atk,seed=3, values='T_score', verbose=False, recall=False):
    r = seed
    dataset = 'cifar10' # [cifar10, svhn, tinyimagenet]
    # model = 'mobilenet_trust' # [resnet18, robust_resnet18, mobilenet_ddb, mobilenet_trust, robust_wideresnet, googlenet, vgg11]
    split = 'test' # ['test', 'train']
    # atk = 'PGD' # [DeepFool, PGD]
    hf = 0 # [0,1] 0 is old version and 1 is new version
    if verbose:
        print(f'Dataset: {dataset} \t Model: {model} \t Attack: {atk} \t Split: {split} \t HF: {hf}')
        print('Using file: ', f'../csv_data/FAS_data/{dataset}/{model}_{split}_{atk}_{hf}.csv')
    data = pd.read_csv(f'../csv_data/FAS_data/{dataset}/{model}_{split}_{atk}_{hf}.csv', index_col=False)
    df = pd.DataFrame(data)
    # len(df['Flipping_Freq'][df['Flipping_Freq'] == -1])

    def get_trust_score(ddbs: list,Flipping_Freq: list) -> list:
        
        def normalize_list(x:list) -> list:
            x = np.array(x)
            if max(x)-min(x) == 0:
                return np.ones_like(x)
            return (x-min(x))/(max(x)-min(x))
        
        norm_Flipping_Freq = normalize_list(Flipping_Freq)
        norm_Flipping_Freq = 1-norm_Flipping_Freq
        norm_ddbs = normalize_list(ddbs)
        
        # TScore = lambda x,y: (y+x)/2
        TScore = lambda x,y: (2*x*y)/(x+y+1e-5)
        # TScore = lambda x,y: (x+y)/2
        # TScore = lambda x,y: 1/(x+1e-8) + y
    

        T_score = TScore(norm_ddbs ,norm_Flipping_Freq)

        return T_score, norm_ddbs, norm_Flipping_Freq

    T_score, norm_ddbs, norm_Flipping_Freq = get_trust_score(df['ddbs'].tolist() , df['Flipping_Freq'].tolist())
    df['T_score'] = T_score
    df['norm_ddbs'] = norm_ddbs
    df['norm_Flipping_Freq'] = norm_Flipping_Freq

    # df['T_score'] = df['T_score'].fillna(1)
    def K_means_flagging(df, values,r):
        kmeans = KMeans(n_clusters=2, random_state=r)
        df[f'flag_{values}'] = kmeans.fit_predict(np.array(df[values]).reshape(-1,1))
        c1,c2 = kmeans.cluster_centers_

        if c1< c2: df[f'flag_{values}'] = 1-df[f'flag_{values}']  ## we should flag those which are closer to smaller centroids

        filter_flag_ddbs = df[(df[f'flag_{values}']) == 1]
        total_flags = len(filter_flag_ddbs)
        correct_flags = len(filter_flag_ddbs[(filter_flag_ddbs['Model_preds'] != filter_flag_ddbs['gt'])])
        
        
        flag_acc = correct_flags*100/ total_flags
        # print(f'{values} \t\t correct_flags: {correct_flags}, total_flags: {total_flags}, %correct: {flag_acc:.2f}')
        return df, correct_flags, total_flags, flag_acc
    
    def K_means_flagging_recall(df, values,r):
        kmeans = KMeans(n_clusters=2, random_state=r)
        df[f'flag_{values}'] = kmeans.fit_predict(np.array(df[values]).reshape(-1,1))
        c1,c2 = kmeans.cluster_centers_

        if c1< c2: df[f'flag_{values}'] = 1-df[f'flag_{values}']  ## assign incorrect_cluster =1 and correct_cluster = 0 

        incorrect_cluster = df[(df[f'flag_{values}']) == 1]
        correct_cluster = df[(df[f'flag_{values}']) == 0]

        total_flags = len(incorrect_cluster)
        incorrect_pred_incorrect_cluster = len(incorrect_cluster[(incorrect_cluster['Model_preds'] != incorrect_cluster['gt'])])
        incorrect_pred_correct_cluster = len(correct_cluster[(correct_cluster['Model_preds'] != correct_cluster['gt'])])
        print(f'{values} \t %Recall Val: {incorrect_pred_incorrect_cluster*100/ (incorrect_pred_incorrect_cluster + incorrect_pred_correct_cluster)}')
        return df, None,None,None
       
        
        # recall = incorrect_pred_incorrect_cluster*100/ (incorrect_pred_incorrect_cluster + incorrect_pred_correct_cluster)
        # # print(f'{values} \t\t correct_flags: {correct_flags}, total_flags: {total_flags}, %correct: {recall:.2f}')
        # return df, incorrect_pred_incorrect_cluster, incorrect_pred_incorrect_cluster + incorrect_pred_correct_cluster, recall

    # print(f'Dataset: {dataset} \t Attack: {atk} \t Model: {model} \t split: {split}')
    # for r in [1,6,70,24,56,12,45,100]:
        # print('############',r,'############')

    # df = K_means_flagging(df,'norm_ddbs',r)
    # df = K_means_flagging(df,'norm_Flipping_Freq',r)
    if recall:
        df, correct_flags, total_flags, flag_acc = K_means_flagging_recall(df,values,r)
    else:
        df, correct_flags, total_flags, flag_acc = K_means_flagging(df,values,r)
    
    return correct_flags, total_flags, flag_acc



# errors,seeds = [],[]

# for seed in tqdm(range(200)):
#     correct_flags, total_flags, flag_acc_t = eval_t_score_model('vgg11','DeepFool', 120, seed=seed, values='T_score', verbose=False)
#     correct_flags, total_flags, flag_acc_d = eval_t_score_model('vgg11','DeepFool', 120, seed=seed, values='norm_Flipping_Freq', verbose=False)

#     errors.append(flag_acc_t - flag_acc_d)
#     seeds.append(seed)
# # print(f'T_Score \t\t correct_flags: {correct_flags}, total_flags: {total_flags}, %correct: {correct_flags*100/ total_flags:.2f}')
# plt.figure(figsize=(40,3))
# ax = sns.scatterplot(y=errors,x =seeds)
# ax.set(xticks=seeds)
# plt.show()

# def cherrypick(errors):
#     cherry_seed = np.argmax(np.array(errors))
    
#     correct_flags, total_flags, flag_acc_t = eval_t_score_model('vgg11','DeepFool', 120, seed=cherry_seed, values='T_score', verbose=False)
#     correct_flags, total_flags, flag_acc_d = eval_t_score_model('vgg11','DeepFool', 120, seed=cherry_seed, values='norm_Flipping_Freq', verbose=False)


#     print(f'flip_freq: {flag_acc_d} \t t-score : {flag_acc_t}\t cherry seed={cherry_seed}')
#     return cherry_seed
# cherry_seed = cherrypick(errors)

In [32]:
fl, db, ts,rn = [], [], [], [] # [resnet18, robust_resnet18, mobilenet_ddb, mobilenet_trust, robust_wideresnet, googlenet, vgg11]
seed = 0 # PGD, DeepFool
for val in ['norm_ddbs', 'norm_Flipping_Freq', 'T_score']:
    
    _,_, flag_acc_t = eval_t_score_model('robust_resnet18','DeepFool', seed=seed, values=val, verbose=False,recall=False)
    # _,_, flag_acc_f = eval_t_score_model('mobilenet_freq','PGD', 120, seed=seed, values=val, verbose=False)
    # _,_, flag_acc_t = eval_t_score_model('mobilenet_trust','PGD', 120, seed=seed, values=val, verbose=False)

    # db.append(flag_acc_t)
    # fl.append(flag_acc_f)
    # ts.append(flag_acc_t)
    # print(f'Metric {val}: flag_acc_trust : {flag_acc_t}\t seed={seed}')

# computed_val = pd.DataFrame({'Model ddb': db,'Model Flip freq': fl, 'Model Trust':ts},
#                             index=['flag ddb', 'flag flip freq', 'flag trust']).T
# computed_val

TypeError: eval_t_score_model() got an unexpected keyword argument 'recall'