In [1]:
import argparse
import json

import torch
import numpy as np
import random
import torch.optim as optim
import time

from sklearn.preprocessing import Normalizer, StandardScaler, RobustScaler
import stealing_verification.sort.mlp as mlp
import os
from torch.amp import autocast
import pickle

def split_train_test(vict_o, benign_o, scale=0.8, shuffle=False):
    # split the train & test set for training clf
    indices_vict = np.arange(len(vict_o))
    indices_benign = np.arange(len(benign_o))

    indices_train_vict_o = np.random.choice(indices_vict, int(len(vict_o)*scale), replace=False)
    indices_test_vict_o = np.setdiff1d(indices_vict, indices_train_vict_o)

    indices_train_benign_o = np.random.choice(indices_benign, int(len(benign_o) * scale), replace=False)
    indices_test_benign_o = np.setdiff1d(indices_benign, indices_train_benign_o)

    if shuffle:
        np.random.shuffle(indices_train_vict_o)
        np.random.shuffle(indices_train_benign_o)

    train_vict_o = []
    test_vict_o = []
    train_benign_o = []
    test_benign_o = []

    test_num = min(len(indices_test_benign_o), len(indices_test_vict_o))

    for id in indices_train_vict_o:
        train_vict_o.append(vict_o[id])

    for id in indices_test_vict_o:
        test_vict_o.append(vict_o[id])

    for id in indices_train_benign_o:
        train_benign_o.append(benign_o[id])

    for id in indices_test_benign_o:
        test_benign_o.append(benign_o[id])

    test_vict_o = test_vict_o[:test_num]
    test_benign_o = test_benign_o[:test_num]

    return train_vict_o, test_vict_o, train_benign_o, test_benign_o

def get_outputs_set(o_p, o_np, norm=False, scale=False, output_clf_dir=None):
    o_trainset = np.vstack([o_p, o_np])
    o_label = np.concatenate([np.ones(len(o_p)), np.zeros(len(o_np))])

    if norm:
        normalizer = Normalizer(norm='l2')
        o_trainset = normalizer.transform(o_trainset)
        if output_clf_dir is not None:
            os.makedirs(output_clf_dir, exist_ok=True)
            save_dir = os.path.join(output_clf_dir, 'normalizer.pkl')
            with open(save_dir, 'wb') as f:
                pickle.dump(normalizer, f)

    if scale:
        scaler = RobustScaler()
        o_trainset = scaler.fit_transform(o_trainset)
        if output_clf_dir is not None:
            save_dir = os.path.join(output_clf_dir, 'scaler.pkl')
            with open(save_dir, 'wb') as f:
                pickle.dump(scaler, f)


    return o_trainset, o_label


In [2]:
# load outputset
seed = 42
softmax = False
norm = True
scale = True
output_clf_dir = './ckpt/clf/'
mlp_epoch = 300

vict_output = './outputset-ckpt/victim/benign_sample_train_logits_list.npy'
poison_output = './outputset-ckpt/poisoned/benign_sample_train_logits_list.npy'
benign_output = './outputset-ckpt/benign/benign_sample_train_logits_list.npy'

torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

starttime = time.time()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print('load output of victim and benign')

vict_output = np.load(vict_output)
poison_output = np.load(poison_output)
benign_output = np.load(benign_output)

if softmax:
    softmax = torch.nn.Softmax(dim=1)
    vict_output = softmax(torch.from_numpy(np.array(vict_output))).numpy()
    poison_output = softmax(torch.from_numpy(np.array(poison_output))).numpy()
    benign_output = softmax(torch.from_numpy(np.array(benign_output))).numpy()

vict_v = vict_output - poison_output
benign_v = benign_output - poison_output

# split train & test set for training clf
train_vict_o, test_vict_o, train_benign_o, test_benign_o = split_train_test(vict_v, benign_v)

outputs_trainset, outputs_trainlabel = get_outputs_set(train_vict_o, train_benign_o, norm=norm, scale=scale, output_clf_dir=output_clf_dir)

outputs_testset, outputs_testlabel = get_outputs_set(test_vict_o, test_benign_o, norm=norm, scale=scale)


load output of victim and benign


In [3]:
# train binary classifier
print('train meta-classifier')
clf = mlp.MLP5(len(outputs_trainset[0]), 2)
clf = clf.to(device)
print(clf)

optimizer = optim.SGD(clf.parameters(), lr=0.005, weight_decay=5e-4, momentum=0.9)
best_acc = 0

norm_name = ''
scale_name=''
softmax_name=''
ckpt_name='clf'


if norm:
    norm_name = 'norm'

if scale:
    scale_name = 'scale'

if softmax:
    softmax_name='softmax'

ckpt_name=ckpt_name+'_'+norm_name+'_'+scale_name+'_'+softmax_name+'.pt'

print(norm)
print(scale)
print(ckpt_name)

isExists = os.path.exists(output_clf_dir)
if not isExists:
    os.makedirs(output_clf_dir)

for epoch in range(mlp_epoch):
    with autocast(device_type='cuda'):
        mlp.train(clf, outputs_trainset, outputs_trainlabel, epoch, optimizer, device)
        acc = mlp.test(clf, outputs_trainset, outputs_trainlabel, device, epoch)
        best_mlp_path = output_clf_dir+'/'+ckpt_name
        if acc > best_acc:
            best_acc = acc
            torch.save(clf.state_dict(), best_mlp_path)

print("Test on Victim best acc=%.6f" % best_acc)
print('Save clf at ', best_mlp_path)

print('Time cost: {} sec'.format(round(time.time() - starttime, 2)))

train meta-classifier
MLP5(
  (linear): Sequential(
    (0): Linear(in_features=10, out_features=5, bias=True)
    (1): ReLU(inplace=True)
    (2): Linear(in_features=5, out_features=2, bias=True)
    (3): ReLU(inplace=True)
    (4): Linear(in_features=2, out_features=2, bias=True)
    (5): Tanh()
  )
)
True
True
clf_norm_scale_.pt




Train Epoch: 0 Loss: 0.649296

Epoch 0 Test set: Average loss: 0.6664, Accuracy: 4583/7606 (60.2551%)

laber 0 acc: 0.23
laber 1 acc: 0.97
Train Epoch: 10 Loss: 0.332008

Epoch 10 Test set: Average loss: 0.3374, Accuracy: 6730/7606 (88.4828%)

laber 0 acc: 0.90
laber 1 acc: 0.87
Train Epoch: 20 Loss: 0.247855

Epoch 20 Test set: Average loss: 0.3125, Accuracy: 6823/7606 (89.7055%)

laber 0 acc: 0.91
laber 1 acc: 0.88
Train Epoch: 30 Loss: 0.324523

Epoch 30 Test set: Average loss: 0.3030, Accuracy: 6870/7606 (90.3234%)

laber 0 acc: 0.92
laber 1 acc: 0.88
Train Epoch: 40 Loss: 0.210620

Epoch 40 Test set: Average loss: 0.2930, Accuracy: 6900/7606 (90.7179%)

laber 0 acc: 0.93
laber 1 acc: 0.88
Train Epoch: 50 Loss: 0.282198

Epoch 50 Test set: Average loss: 0.2820, Accuracy: 6949/7606 (91.3621%)

laber 0 acc: 0.93
laber 1 acc: 0.89
Train Epoch: 60 Loss: 0.282088

Epoch 60 Test set: Average loss: 0.2757, Accuracy: 6969/7606 (91.6250%)

laber 0 acc: 0.94
laber 1 acc: 0.89
Train Epoch: 70

In [4]:

from scipy.stats import hmean
from scipy import stats

def get_p_value(arrA, arrB, alternative='greater'):
    a = np.array(arrA)
    b = np.array(arrB)
    t, p = stats.ttest_ind(a, b, alternative=alternative, equal_var=False)
    return p


def mult_test(prob_f, prob_nf, seed, m, mult_num=40, alternative='greater'):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    p_list = []
    mu_list = []
    np.random.seed(seed)
    for t in range(mult_num):
        sample_num = m
        sample_list = [i for i in range(len(prob_f))]
        sample_list = random.sample(sample_list, sample_num)

        subprob_f = prob_f[sample_list]
        subprob_nf = prob_nf[sample_list]
        p_val = get_p_value(subprob_f, subprob_nf, alternative)
        p_list.append(p_val)
        mu_list.append(np.mean(subprob_f) - np.mean(subprob_nf))
    return p_list, mu_list


def get_prob_pair(clf, sus_list, device):
    prob_sus = []
    for i in range(len(sus_list)):
        sus_o = torch.from_numpy(sus_list[i])
        sus_o = sus_o.to(device)

        out_sus = clf(sus_o)

        prob_sus.append(out_sus.cpu().detach().numpy())

    return prob_sus

def verification_test(sus_output, poison_output, steal_name='None'):
    sus_diff = sus_output - poison_output

    sus_diff = get_prob_pair(clf, sus_diff, device)

    softmax = torch.nn.Softmax(dim=1)
    softmax_sus_diff = softmax(torch.from_numpy(np.array(sus_diff)))

    # (benign, 0) and (vict, 1)
    sus_diff_stolen = softmax_sus_diff[:, 1]
    sus_diff_benign = softmax_sus_diff[:, 0]


    seed = 100
    m = 100

    p_list, mu_list = mult_test(sus_diff_stolen.numpy(), sus_diff_benign.numpy(), seed=seed, m=m, mult_num=40, alternative='greater')
    print('{}:  p-val: {} mu: {}'.format(steal_name, hmean(p_list), np.mean(mu_list)))

def verification_test_2(sus_output, poison_output, seed, m, steal_name='None'):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    if_norm = False
    if_scale = False
    if_softmax = False

    if_load_norm=False
    if_load_scale=False

    if 'norm' in ckpt_name:
        if_norm = True
    if 'scale' in ckpt_name:
        if_scale = True
    
    sus_diff = sus_output - poison_output


    if if_norm:
        normalizer = Normalizer(norm='l2')
        sus_diff = normalizer.transform(sus_diff)

    if if_scale:
        scaler = RobustScaler()
        sus_diff = scaler.fit_transform(sus_diff)

    sus_diff = get_prob_pair(clf, sus_diff, device)

    softmax = torch.nn.Softmax(dim=1)
    softmax_sus_diff = softmax(torch.from_numpy(np.array(sus_diff)))

    # (benign, 0) and (vict, 1)
    sus_diff_stolen = softmax_sus_diff[:, 1]
    sus_diff_benign = softmax_sus_diff[:, 0]

    p_list, mu_list = mult_test(sus_diff_stolen.numpy(), sus_diff_benign.numpy(), seed=seed, m=m, mult_num=40, alternative='greater')
    print('{}:  p-val: {} mu: {}'.format(steal_name, hmean(p_list), np.mean(mu_list)))


In [5]:
# verification

seed = 300
m=100

# Soft Distill

outputset_dict = {
    'Direct-copy':'./outputset-ckpt/stolen/Direct-copy/benign_sample_train_logits_list.npy',
    'Fine-tuning':'./outputset-ckpt/stolen/Fine-tuning/benign_sample_train_logits_list.npy',
    'Data Distill': './outputset-ckpt/stolen/Data-distill/benign_sample_train_logits_list.npy',
    'Data-free Distill': './outputset-ckpt/stolen/Data-free-distill/benign_sample_train_logits_list.npy',
    'Hard Distill': './outputset-ckpt/stolen/Hard-distill/benign_sample_train_logits_list.npy',
    'Soft Distill': './outputset-ckpt/stolen/Soft-distill/benign_sample_train_logits_list.npy',
    'Independent': './outputset-ckpt/stolen/Independent/benign_sample_train_logits_list.npy',
}


for item in outputset_dict.keys():
    steal_name = item
    sus_output_dir = outputset_dict[steal_name]
    sus_output = np.load(sus_output_dir)
    verification_test_2(sus_output, poison_output, seed, m, steal_name)
    print()
    




Direct-copy:  p-val: 1.9865974516909697e-18 mu: 0.240617036819458

Fine-tuning:  p-val: 5.957258553445839e-16 mu: 0.2647797167301178

Data Distill:  p-val: 4.526801730669385e-09 mu: 0.1560070812702179

Data-free Distill:  p-val: 4.666670116955149e-11 mu: 0.1875937581062317

Hard Distill:  p-val: 1.0684542526591873e-05 mu: 0.06177513673901558

Soft Distill:  p-val: 0.0030668361297120695 mu: 0.054538749158382416

Independent:  p-val: 0.48637868587127725 mu: -0.08097676932811737

