In [1]:
import collect_results as cr
import loss_rank_correlation as lrc

config_file = 'config.json'

In [2]:
%run train_models.py {config_file} # to train the models as its specified in the config file

Using PyTorch version: 1.9.0+cu111 CUDA: True
fmnist_laug_no_0_daug_no_0_dp_nc_0_nm_0_epochs_35_run_1 exists
fmnist_laug_smooth_0.01_daug_no_0_dp_nc_0_nm_0_epochs_35_run_1 exists
fmnist_laug_smooth_0.05_daug_no_0_dp_nc_0_nm_0_epochs_35_run_1 exists
fmnist_laug_no_0_daug_no_0_dp_nc_0_nm_0_epochs_3_run_1 exists
fmnist_laug_no_0_daug_no_0_dp_nc_0_nm_0_epochs_4_run_1 exists
fmnist_laug_no_0_daug_no_0_dp_nc_0_nm_0_epochs_7_run_1 exists
fmnist_laug_no_0_daug_no_0_dp_nc_0_nm_0_epochs_35_run_2 exists
fmnist_laug_smooth_0.01_daug_no_0_dp_nc_0_nm_0_epochs_35_run_2 exists
fmnist_laug_smooth_0.05_daug_no_0_dp_nc_0_nm_0_epochs_35_run_2 exists
fmnist_laug_no_0_daug_no_0_dp_nc_0_nm_0_epochs_3_run_2 exists
fmnist_laug_no_0_daug_no_0_dp_nc_0_nm_0_epochs_4_run_2 exists
fmnist_laug_no_0_daug_no_0_dp_nc_0_nm_0_epochs_7_run_2 exists


In [3]:
%run mi_attacks.py {config_file} # apply membership inference attacks to the trained models with parameters specified in config file

Using PyTorch version: 1.9.0+cu111 CUDA: True
There are 15 models in ./models\fmnist.
Attacking fmnist_laug_distillation_1_daug_no_0_dp_nc_0_nm_0_epochs_35_run_1 - |A|: 100 - S: 0...
Train Top1: 100.000%, Train Top5: 100.000%, Test Top1: 93.863%, Test Top5: 99.863%
Avg Yeom MI Advantage: 8.70
Best Yeom MI Advantage: 11.10
Aware MI Advantage: 10.82 - Reduction: none
--------------------------------------------
Attacking fmnist_laug_no_0_daug_crop_1_dp_nc_0_nm_0_epochs_35_run_1 - |A|: 100 - S: 0...
Train Top1: 98.667%, Train Top5: 100.000%, Test Top1: 94.490%, Test Top5: 99.941%
Avg Yeom MI Advantage: 4.26
Best Yeom MI Advantage: 4.46
Aware MI Advantage: 5.76 - Reduction: std
--------------------------------------------
Attacking fmnist_laug_no_0_daug_no_0_dp_nc_0_nm_0_epochs_35_run_1 - |A|: 100 - S: 0...
Train Top1: 100.000%, Train Top5: 100.000%, Test Top1: 93.863%, Test Top5: 99.843%
Avg Yeom MI Advantage: 9.20
Best Yeom MI Advantage: 10.98
--------------------------------------------

In [4]:
# load the config file for demonstratio
import json
with open(config_file) as f:
    cfg = json.load(f)

models_path = cfg['models_path']
ds_name = 'fmnist'

In [5]:
# n_attacker_train: number of samples the attacker has from the training set and the testing set of the model
# num_epochs: number of epochs the model is trained for
baseline_stats = cr.get_non_aug_stats(models_path=models_path, ds_name=ds_name, n_attacker_train=100, num_epochs=35)
print(baseline_stats) # accuracy, avg attack, pow attack

{'acc': 93.69934775159248, 'avg_mia': 8.86, 'pow_mia': 8.733333333333334}


In [6]:
relative_accuracy_drop_limit = 0.1 # we are looking for models with up to 90% of the baseline accuracy
laug_type, daug_type = 'smooth', 'no'

# returns the results (accuracies and MIA accuracies) for the trained models that match the given augmentation types
# models_path: where the models are saved, specified in the config
# ds_name: the name of the dataset (fmnist, cifar10 or cifar100)
# laug_type: label augmentation type ('distillation', 'smooth', 'disturblabel')
# laug_param: the parameter for label augmentation
# daug_type: data augmentation type ('crop', 'cutout', 'noise', 'cutout', 'mixup')
# daug_param: the parameter for data augmentation
# n_attacker_train: number of samples the attacker has from the training set and the testing set of the model
# n_repeat: for augmentation aware membership inference attack, number of forward passes with randomly augmented input samples
# num_epochs: number of epochs the model is trained for
# sort_order: how the results are sorted
# the results are sorted based on the membership inference accuracy (increasing order) if sort_order is 'mia' else it will be sorted based on the accuracy
results = cr.get_mia_stats(models_path=models_path, ds_name=ds_name, laug_type=laug_type, daug_type=daug_type, n_attacker_train=100, n_repeat=25, num_epochs=35, collect_dp=False, sort_order='mia')

# each result is for specific parameter setting
# print the model with the highest accuracy within the accuracy drop limit with the lowest MIA accuracy
for result in results:
    if result['acc'] > baseline_stats['acc'] * (1 - relative_accuracy_drop_limit):
        print(result)
    break


{'laug_type': 'smooth', 'daug_type': 'no', 'laug_param': 0.01, 'daug_param': 0, 'acc': 93.70915192248775, 'avg_mia': 7.443333333333335, 'pow_mia': 10.199999999999998, 'awa_mia': (14.319999999999999,)}


In [7]:
# print the model with the highest accuracy
results = cr.get_mia_stats(models_path=models_path, ds_name=ds_name, laug_type=laug_type, daug_type=daug_type, n_attacker_train=100, n_repeat=25, num_epochs=35, collect_dp=False, sort_order='accuracy')
print(results[0])

{'laug_type': 'smooth', 'daug_type': 'no', 'laug_param': 0.05, 'daug_param': 0, 'acc': 93.86928284090328, 'avg_mia': 7.823333333333333, 'pow_mia': 17.139999999999997, 'awa_mia': (16.94333333333333,)}


In [8]:
# get the results for early stopping models
early_stopping_epochs = cfg['early_stopping_epochs']
early_stopping_results = [(f'epochs:{ii}', cr.get_non_aug_stats(models_path=models_path, ds_name=ds_name, n_attacker_train=100, num_epochs=ii)) for ii in early_stopping_epochs]
print(early_stopping_results)

[('epochs:3', {'acc': 90.80065534005757, 'avg_mia': 0.4966666666666673, 'pow_mia': 0.6900000000000001}), ('epochs:4', {'acc': 91.83660255382263, 'avg_mia': 1.1900000000000002, 'pow_mia': 1.3866666666666632}), ('epochs:7', {'acc': 92.97712602802352, 'avg_mia': 1.80333333333333, 'pow_mia': 1.2666666666666682})]


In [9]:
# apply the loss rank correlation metric between regular models with no augmentation and models with smoothing (alpha=0.05)
fp = cr.get_models_dirs(models_path=models_path, ds_name=ds_name, laug_type='no', laug_param=0, daug_type='no', daug_param=0, num_epochs=35, dp_params=None)
sp = cr.get_models_dirs(models_path=models_path, ds_name=ds_name, laug_type='smooth', laug_param=0.05, daug_type='no', daug_param=0, num_epochs=35, dp_params=None)

# since the losses are saved in the attack files we will also specify the parameters of the attack to fetch those files (seed and n_attacker_train)
lrc_score = lrc.get_pairwise_lrc(fp, sp, n_attacker_train=100, seed=0)
print(lrc_score)

0.42644087544304643


In [10]:
# apply the loss rank correlation metric between early stopping models that are trained with 7 epoochs and 4 epochs
fp = cr.get_models_dirs(models_path=models_path, ds_name=ds_name, laug_type='no', laug_param=0, daug_type='no', daug_param=0, num_epochs=7, dp_params=None)
sp = cr.get_models_dirs(models_path=models_path, ds_name=ds_name, laug_type='no', laug_param=0, daug_type='no', daug_param=0, num_epochs=4, dp_params=None)
lrc_score = lrc.get_pairwise_lrc(fp, sp, n_attacker_train=100, seed=0)
print(lrc_score)

0.9722368259745489


In [11]:
# apply the loss rank correlation metric between early stopping models and models with smoothing (alpha=0.01)
fp = cr.get_models_dirs(models_path=models_path, ds_name=ds_name, laug_type='no', laug_param=0, daug_type='no', daug_param=0, num_epochs=7, dp_params=None)
sp = cr.get_models_dirs(models_path=models_path, ds_name=ds_name, laug_type='smooth', laug_param=0.01, daug_type='no', daug_param=0, num_epochs=35, dp_params=None)
lrc_score = lrc.get_pairwise_lrc(fp, sp, n_attacker_train=100, seed=0)
print(lrc_score)

0.3119577278240878
