This notebook contains results associated with additional models/datasets to our main setup.

We provide results across 5 combinations:
* RN-20, CIFAR-10
* WRN40-4, CIFAR-10
* WRN28-2, CIFAR-10
* WRN28-2, CIFAR-100
* WRN28-2, CINIC-10

Each model/dataset pair requires training a separate set of shadow models, which could be expensive. This notebook will 
rerport results for all experiments found in `MODEL_DIR`. To fully replicate results reported in the paper you will 
need to run all 5, but any subset is also ok.

In [84]:
import os
from collections import defaultdict

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.metrics import roc_auc_score, roc_curve

from loss_traces.attacks import AttackConfig, LiRAAttack
from loss_traces.results.final_model_metrics import get_final_model_metrics
from loss_traces.results.result_processing import get_trace_reduction
from loss_traces.results.utils import make_precision_recall_at_k_df_single_threshold

plt.style.use("plot_style.mplstyle")

In [15]:
gpu=":1"
final_model_metrics = {}
for dataset in ("CIFAR10", "CIFAR100", "CINIC10"):
    for arch in ("wrn28-2", "wrn40-4", "rn-20"):
        if dataset != "CIFAR10" and arch != "wrn28-2":
            continue
        
        exp_id = f"{arch}_{dataset}"
        csv_path = f'data/final_model_metrics_{exp_id}.csv'
        if os.path.exists(csv_path):
            final_model_metrics[exp_id] = pd.read_csv(csv_path, index_col=0)
            continue
        
        try:
            config = AttackConfig(
                exp_id=exp_id,
                target_id="target",
                checkpoint=None,
                arch=arch,
                dataset=dataset,
                attack="LiRA",
                augment=True,
                batchsize=32,
                num_workers=8,
                gpu=gpu,
            )
            attack = LiRAAttack(config)
            results = attack.run()

            df_final_model_metrics = get_final_model_metrics(
                model=attack.model, data_loader=attack.attack_loaders[0]
            )
            df_final_model_metrics = pd.merge(df_final_model_metrics, results, left_index=True, right_index=True)
            df_final_model_metrics.to_csv(csv_path)
        except:
            pass


In [18]:
print(f"Found data for {len(final_model_metrics)} setups: {final_model_metrics.keys()}")

Found data for 5 setups: dict_keys(['wrn28-2_CIFAR10', 'wrn40-4_CIFAR10', 'rn-20_CIFAR10', 'wrn28-2_CIFAR100', 'wrn28-2_CINIC10'])


In [23]:
for dataset in ("CIFAR10", "CIFAR100", "CINIC10"):
    for arch in ("wrn28-2", "wrn40-4", "rn-20"):
        if dataset != "CIFAR10" and arch != "wrn28-2":
            continue
        
        exp_id = f"{arch}_{dataset}"
        if exp_id in final_model_metrics:
            final_model_metrics[exp_id]["lt_iqr"] = get_trace_reduction(exp_id, reduction="iqr")
            final_model_metrics[exp_id]["lt_mean"] = get_trace_reduction(exp_id, reduction="mean")
            final_model_metrics[exp_id]["lt_delta"] = get_trace_reduction(exp_id, reduction="mid-end")
            final_model_metrics[exp_id]["lt_slope"] = get_trace_reduction(exp_id, reduction="slope")
            final_model_metrics[exp_id]["lt_l2norm"] = get_trace_reduction(exp_id, reduction="norm2")
            final_model_metrics[exp_id]["lt_linfnorm"] = get_trace_reduction(exp_id, reduction="inf")

# Table 2

In [54]:
metrics = [
    "loss_desc",
    "confidence_asc", 
    "param_grad_norm_desc",
    "input_grad_norm_desc",
    "shap_norm_desc",
    "lt_iqr_desc"
]

all_results = []

for arch in ["rn-20", "wrn28-2", "wrn40-4"]:
    exp_id = f"{arch}_CIFAR10"
    if exp_id not in final_model_metrics:
        continue
        
    results = make_precision_recall_at_k_df_single_threshold(
        scores_df=final_model_metrics[exp_id],
        ground_truth_df=final_model_metrics[exp_id], 
        fpr_threshold=0.001,
        k_frac=0.01
    )
    
    results['architecture'] = arch
    
    all_results.append(results)

precision_columns = ["precision_" + m for m in metrics]
recall_columns = ["recall_" + m for m in metrics]

df = pd.DataFrame.from_records(all_results)[precision_columns + recall_columns + ['architecture']]

df_pivot = df.melt(id_vars=['architecture'], var_name='metric', value_name='value')

df_pivot['type'] = df_pivot['metric'].str.split('_').str[0]
df_pivot['metric'] = df_pivot['metric'].str.split('_').str[1:].str.join('_')
df_pivot.set_index(['type', 'metric'], inplace=True)

results_df = df_pivot.pivot(columns='architecture', values='value')

idx = pd.MultiIndex.from_product([['precision', 'recall'], metrics], names=['type', 'metric'])
results_df = results_df.reindex(idx)

results_df


Unnamed: 0_level_0,architecture,rn-20,wrn28-2,wrn40-4
type,metric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
precision,loss_desc,0.212,0.208,0.112
precision,confidence_asc,0.192,0.2,0.1
precision,param_grad_norm_desc,0.172,0.2,0.12
precision,input_grad_norm_desc,0.192,0.204,0.12
precision,shap_norm_desc,0.064,0.152,0.132
precision,lt_iqr_desc,0.788,0.92,0.912
recall,loss_desc,0.039057,0.021346,0.012957
recall,confidence_asc,0.035372,0.020525,0.011569
recall,param_grad_norm_desc,0.031688,0.020525,0.013882
recall,input_grad_norm_desc,0.035372,0.020936,0.013882


# Table 3

In [55]:
metrics = [
    "loss_desc",
    "confidence_asc", 
    "param_grad_norm_desc",
    "input_grad_norm_desc",
    "shap_norm_desc",
    "lt_iqr_desc"
]

all_results = []

for dataset in ["CIFAR10", "CIFAR100", "CINIC10"]:
    exp_id = f"wrn28-2_{dataset}"  # Use WRN-28-2 for all datasets
    if exp_id not in final_model_metrics:
        continue
        
    results = make_precision_recall_at_k_df_single_threshold(
        scores_df=final_model_metrics[exp_id],
        ground_truth_df=final_model_metrics[exp_id], 
        fpr_threshold=0.001,
        k_frac=0.01
    )
    
    results['dataset'] = dataset
    
    all_results.append(results)

precision_columns = ["precision_" + m for m in metrics]
recall_columns = ["recall_" + m for m in metrics]

df = pd.DataFrame.from_records(all_results)[precision_columns + recall_columns + ['dataset']]

df_pivot = df.melt(id_vars=['dataset'], var_name='metric', value_name='value')

df_pivot['type'] = df_pivot['metric'].str.split('_').str[0]
df_pivot['metric'] = df_pivot['metric'].str.split('_').str[1:].str.join('_')
df_pivot.set_index(['type', 'metric'], inplace=True)

results_df = df_pivot.pivot(columns='dataset', values='value')

idx = pd.MultiIndex.from_product([['precision', 'recall'], metrics], names=['type', 'metric'])
results_df = results_df.reindex(idx)

results_df


Unnamed: 0_level_0,dataset,CIFAR10,CIFAR100,CINIC10
type,metric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
precision,loss_desc,0.208,0.272,0.3
precision,confidence_asc,0.2,0.236,0.28
precision,param_grad_norm_desc,0.2,0.22,0.286667
precision,input_grad_norm_desc,0.204,0.228,0.297778
precision,shap_norm_desc,0.152,0.192,0.213333
precision,lt_iqr_desc,0.92,0.972,0.94
recall,loss_desc,0.021346,0.011296,0.023196
recall,confidence_asc,0.020525,0.009801,0.021649
recall,param_grad_norm_desc,0.020525,0.009136,0.022165
recall,input_grad_norm_desc,0.020936,0.009468,0.023024


# Table 4

In [63]:
results = []

for exp_id in final_model_metrics.keys():
    scores = final_model_metrics[exp_id]["lira_score"]
    labels = final_model_metrics[exp_id]["target_trained_on"]

    lira_auc = roc_auc_score(labels, scores)

    fpr, tpr, thresholds = roc_curve(labels, scores)
    idx = np.argmin(np.abs(fpr - 0.001))
    tpr_at_fpr = tpr[idx]

    results.append({
        'exp_id': exp_id,
        'lira_auc': lira_auc,
        'tpr_at_fpr': tpr_at_fpr
    })

df = pd.DataFrame(results).set_index('exp_id')
print("\nLiRA Metrics across all experiments:")
df



LiRA Metrics across all experiments:


Unnamed: 0_level_0,lira_auc,tpr_at_fpr
exp_id,Unnamed: 1_level_1,Unnamed: 2_level_1
wrn28-2_CIFAR10,0.740698,0.09736
wrn40-4_CIFAR10,0.733188,0.08568
rn-20_CIFAR10,0.702819,0.05368
wrn28-2_CIFAR100,0.919882,0.239
wrn28-2_CINIC10,0.800446,0.128378


# Table 7

In [67]:
metrics = [
    "lt_mean_desc",
    "lt_delta_desc", 
    "lt_slope_asc",
    "lt_l2norm_desc",
    "lt_linfnorm_desc",
    "lt_iqr_desc"
]

all_results = []

for dataset in ["CIFAR10", "CIFAR100", "CINIC10"]:
    exp_id = f"wrn28-2_{dataset}"  # Use WRN-28-2 for all datasets
    if exp_id not in final_model_metrics:
        continue
        
    results = make_precision_recall_at_k_df_single_threshold(
        scores_df=final_model_metrics[exp_id],
        ground_truth_df=final_model_metrics[exp_id], 
        fpr_threshold=0.001,
        k_frac=0.01
    )
    
    results['dataset'] = dataset
    
    all_results.append(results)

precision_columns = ["precision_" + m for m in metrics]
recall_columns = ["recall_" + m for m in metrics]

df = pd.DataFrame.from_records(all_results)[precision_columns + recall_columns + ['dataset']]

df_pivot = df.melt(id_vars=['dataset'], var_name='metric', value_name='value')

df_pivot['type'] = df_pivot['metric'].str.split('_').str[0]
df_pivot['metric'] = df_pivot['metric'].str.split('_').str[1:].str.join('_')
df_pivot.set_index(['type', 'metric'], inplace=True)

results_df = df_pivot.pivot(columns='dataset', values='value')

idx = pd.MultiIndex.from_product([['precision', 'recall'], metrics], names=['type', 'metric'])
results_df = results_df.reindex(idx)

results_df


Unnamed: 0_level_0,dataset,CIFAR10,CIFAR100,CINIC10
type,metric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
precision,lt_mean_desc,0.908,0.956,0.96
precision,lt_delta_desc,0.508,0.616,0.775556
precision,lt_slope_asc,0.888,0.984,0.915556
precision,lt_l2norm_desc,0.872,0.968,0.948889
precision,lt_linfnorm_desc,0.416,0.132,0.191111
precision,lt_iqr_desc,0.92,0.972,0.94
recall,lt_mean_desc,0.093186,0.039701,0.074227
recall,lt_delta_desc,0.052135,0.025581,0.059966
recall,lt_slope_asc,0.091133,0.040864,0.07079
recall,lt_l2norm_desc,0.089491,0.040199,0.073368


# Table 9

In [80]:
exp_id = "wrn28-2_CIFAR10"

for exp_id in final_model_metrics:
    df = final_model_metrics[exp_id]
    df = df[df["target_trained_on"]]
    print(f"{exp_id}: {df["lt_iqr"].mean():.2f}")

wrn28-2_CIFAR10: 0.31
wrn40-4_CIFAR10: 0.21
rn-20_CIFAR10: 0.55
wrn28-2_CIFAR100: 1.45
wrn28-2_CINIC10: 0.71


# Table 10

In [91]:
metrics = [
    "lt_iqr_desc",
]
exp_id = "wrn28-2_CIFAR10"

results = defaultdict(dict)
for exp_id in final_model_metrics:
    df = final_model_metrics[exp_id]

    k_fracs = [0.01, 0.03, 0.05, 0.10, 0.20, 0.50]

    # Populate results
    for k_frac in k_fracs:
        stats = make_precision_recall_at_k_df_single_threshold(
            scores_df=df,
            ground_truth_df=df,
            fpr_threshold=0.001, 
            k_frac=k_frac
        )
        
        results[exp_id][f'k={k_frac*100}% Precision'] = stats['precision_lt_iqr_desc']
        results[exp_id][f'k={k_frac*100}% Recall'] = stats['recall_lt_iqr_desc']

results_df = pd.DataFrame.from_dict(results, orient='index')

column_order = []
for k_frac in k_fracs:
    column_order.extend([f'k={k_frac*100}% Precision', f'k={k_frac*100}% Recall'])
results_df = results_df[column_order]

display(results_df)


Unnamed: 0,k=1.0% Precision,k=1.0% Recall,k=3.0% Precision,k=3.0% Recall,k=5.0% Precision,k=5.0% Recall,k=10.0% Precision,k=10.0% Recall,k=20.0% Precision,k=20.0% Recall,k=50.0% Precision,k=50.0% Recall
wrn28-2_CIFAR10,0.92,0.094417,0.829333,0.255337,0.76,0.389984,0.596,0.611658,0.4214,0.864943,0.19408,0.995895
wrn40-4_CIFAR10,0.912,0.105507,0.832,0.288755,0.7472,0.432207,0.5956,0.689033,0.393,0.909301,0.17256,0.998149
rn-20_CIFAR10,0.788,0.145173,0.644,0.355932,0.5488,0.505527,0.386,0.711127,0.2498,0.920413,0.10848,0.999263
wrn28-2_CIFAR100,0.972,0.040365,0.944,0.117608,0.9,0.186877,0.8276,0.343688,0.711,0.590532,0.4572,0.949336
wrn28-2_CINIC10,0.94,0.07268,0.881481,0.204467,0.824444,0.318729,0.71,0.548969,0.534444,0.82646,0.257689,0.99622


# Table 11

In [98]:
def positives_at_fpr(results, target_fpr=0.001):
    fpr, tpr, thresholds = roc_curve(
        results["target_trained_on"], results["lira_score"]
    )

    closest_idx = np.argmin(np.abs(fpr - target_fpr))
    threshold = thresholds[closest_idx]

    positives_at_threshold = np.sum(
        (results["lira_score"] >= threshold) & (results["target_trained_on"])
    )

    return positives_at_threshold

In [99]:
from collections import defaultdict

exp_ids = []
total_pos = []
pos_at_thresh = []
max_recalls = defaultdict(list)

for exp_id in final_model_metrics.keys():
    results = final_model_metrics[exp_id]
    
    num_positives_at_threshold = positives_at_fpr(results)
    total_positives = results["target_trained_on"].sum()
    
    exp_ids.append(exp_id)
    total_pos.append(total_positives)
    pos_at_thresh.append(num_positives_at_threshold)
    
    for k in (1, 3, 5, 10, 20, 50):
        predicted_samples = k * total_positives // 100
        max_recall = min(1, predicted_samples / num_positives_at_threshold)
        max_recalls[k].append(max_recall)

results_dict = {
    'Total Positives': total_pos,
    'Positives at Threshold': pos_at_thresh,
}
for k in max_recalls:
    results_dict[f'Max Recall {k}%'] = max_recalls[k]

results_df = pd.DataFrame(results_dict, index=exp_ids)
display(results_df)

Unnamed: 0,Total Positives,Positives at Threshold,Max Recall 1%,Max Recall 3%,Max Recall 5%,Max Recall 10%,Max Recall 20%,Max Recall 50%
wrn28-2_CIFAR10,25000,2434,0.102712,0.308135,0.513558,1.0,1.0,1
wrn40-4_CIFAR10,25000,2142,0.116713,0.35014,0.583567,1.0,1.0,1
rn-20_CIFAR10,25000,1342,0.186289,0.558867,0.931446,1.0,1.0,1
wrn28-2_CIFAR100,25000,5975,0.041841,0.125523,0.209205,0.41841,0.83682,1
wrn28-2_CINIC10,45000,5777,0.077895,0.233685,0.389476,0.778951,1.0,1


# Table 14

In [106]:
metrics = [
    "spearman_loss",
    "spearman_confidence",
    "spearman_param_grad_norm",
    "spearman_input_grad_norm",
    "spearman_shap_norm",
    "spearman_lt_iqr",
]

metrics_by_exp = {}

for exp_id in final_model_metrics:
    df = final_model_metrics[exp_id]
    
    metrics_dict = make_precision_recall_at_k_df_single_threshold(
        scores_df=df,
        ground_truth_df=df, 
        fpr_threshold=0.001,
        k_frac=0.01,
    )
    
    metrics_by_exp[exp_id] = metrics_dict

results_df = pd.DataFrame(metrics_by_exp).loc[metrics]

display(results_df)




Unnamed: 0,wrn28-2_CIFAR10,wrn40-4_CIFAR10,rn-20_CIFAR10,wrn28-2_CIFAR100,wrn28-2_CINIC10
spearman_loss,0.163612,0.013881,0.325082,0.262193,0.213315
spearman_confidence,0.175863,0.01874,0.321111,0.247914,0.217877
spearman_param_grad_norm,0.2095,0.079165,0.323321,0.264883,0.246394
spearman_input_grad_norm,0.229175,0.151215,0.323732,0.269151,0.25409
spearman_shap_norm,0.146975,0.163064,0.032545,0.060551,0.182064
spearman_lt_iqr,0.615619,0.654473,0.495297,0.747859,0.693568
