## Tensorflow 2.x Setup


In [1]:
import random
import numpy as np
import math
import matplotlib.pyplot as plt

# tensorflow
import tensorflow as tf
print(tf.__version__)

# pytorch
import torch
print(torch.__version__)

# scikit-learn
import sklearn
from sklearn import cluster, decomposition, manifold
import pandas as pd

import scipy
from scipy.stats import entropy
from scipy.stats import dirichlet

cwd = 'rl-starter-files/storage/'

2.5.0
1.9.0


### Get action probabilities

In [None]:
# shadow (seed 0~4)
# IN: label_0 ... label_4
# OUT: label_5 ... label_9

# attack
# concat(shadow_seed_0, label_0) = IN
# concat(shadow_seed_5, label_5) = OUT

# attack on concat(shadow_seed_1, label_1)

# victim, whether seed 3 is used in training
# attack on concat(victim_seed_3, label_3) ==> IN/OUT

In [3]:
'''
You can change the file directories to your models
'''
df = pd.read_csv(cwd+'mr_shadow_1/probabilities.csv')
df0 = pd.read_csv(cwd+'mr_shadow_5/probabilities.csv')
df1 = pd.read_csv(cwd+'mr_label_1/probabilities.csv')
df2 = pd.read_csv(cwd+'mr_label_5/probabilities.csv')

In [None]:
def reshape_data(dataframe):
    # reshape the data into the shape that fit the attack model
    data = dataframe.to_numpy()
    data = data.reshape((len(data)//64, 64, 7))
    data = data.swapaxes(1,2)
    return data

data0 = reshape_data(df)
data00 = reshape_data(df0)
data1 = reshape_data(df1)
data2 = reshape_data(df2)
data0.shape

(3200, 7, 64)

In [None]:
# Put IN and OUT data together
# You can concatenate more data if you have
in_data = np.concatenate([np.concatenate([data0[:1600], data1[:1600]], axis=1),
                          np.concatenate([data00[:1600], data2[:1600]], axis=1)
                          # add more data if there are
                         ])

out_data = np.concatenate([np.concatenate([data0[:1600], data2[:1600]], axis=1),
                           np.concatenate([data00[:1600], data1[:1600]], axis=1)
                           # add more data if there are
                          ])
in_data.shape, out_data.shape

((3200, 14, 64), (3200, 14, 64))

In [None]:
def get_att_data(in_data, out_data):
    # Generate IN/OUT labels for the data
    in_label = [1.0]*len(in_data)
    out_label = [0.0]*len(out_data)
    labels = in_label + out_label
    in_data = [d for d in in_data]
    out_data = [d for d in out_data]
    data = in_data + out_data

    c = list(zip(data, labels))
    random.shuffle(c)
    data, labels = zip(*c)
    return np.array(data), np.array(labels)

def get_label_vector(labels):
    # Convert labels into vector form (one-hot embedding)
    label_vectors = np.zeros((len(labels), np.max(labels)+1))
    for i in range(len(labels)):
        label_vectors[i, labels[i]] = 1
    return label_vectors

In [None]:
data, labels = get_att_data(in_data, out_data)
label_vec = get_label_vector(np.array(labels, dtype=np.int))

test_size = 2000
data, test_data = data[:-test_size], data[-test_size:]
label_vec, test_label = label_vec[:-test_size], label_vec[-test_size:]

data.shape, label_vec.shape

((4400, 14, 64), (4400, 2))

### Attack Model

In [None]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Softmax
def build_att():
    model = Sequential()
    model.add(Dense(128, activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Flatten())
    model.add(Dense(32, activation='relu'))
    model.add(Dense(16, activation='relu'))
    model.add(Dense(2))
    model.add(Softmax())
    opt = keras.optimizers.Adam(learning_rate=0.001)
    precision = keras.metrics.Precision(class_id=0)
    recall = keras.metrics.Recall(class_id=0)
    model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy', precision, recall])
    return model

In [None]:
attack_model = build_att()
attack_model.fit(x=data, y=label_vec, batch_size=64, epochs=15, verbose=True)
attack_model.evaluate(x=test_data, y=test_label, batch_size=64)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[0.18781711161136627,
 0.9194999933242798,
 0.8759757280349731,
 0.9824903011322021]

### Privacy Preserving

In [None]:
def attack_accuracy(model, victim, in_label, out_label):
    # Evaluate the attacking on the victim
    # in_label, out_label are the probalities collected from in/out model
    test = np.concatenate([victim, in_label], axis=1)
    pred1 = np.argmax(model.predict(test), axis=1)
    acc = sklearn.metrics.accuracy_score(np.ones((len(in_label,))), pred1)

    test = np.concatenate([victim, out_label], axis=1)
    pred2 = np.argmax(model.predict(test), axis=1)
    print('Number of predicted IN data: ', np.sum(pred1+pred2))
    acc += sklearn.metrics.accuracy_score(np.zeros((len(out_label,))), pred2)
    print('Accuracy: ', acc/2, '\n')
    return acc/2

In [None]:
'''
You can change the file directories to your models
'''
victim_df = pd.read_csv(cwd+'data/mr_0/probabilities.csv')
victim = reshape_data(victim_df)
print('Attack Model without Protection:')
attack_accuracy(attack_model, victim, reshape_data(pd.read_csv(cwd+'data/mr0/probabilities.csv')), data2)

Attack Model without Protection:
Number of predicted IN data:  2728
Accuracy:  0.92625 



0.92625

In [None]:
'''
You can change the file directories to your protected models
'''
prot_victim_df = pd.read_csv(cwd+'data/mr_k_1/probabilities.csv')
prot_victim = reshape_data(prot_victim_df)
print('Attack Model with Dirichlet k=1:')
attack_accuracy(attack_model, prot_victim, data1,data2)

prot_victim_df = pd.read_csv(cwd+'data/mr_k_10/probabilities.csv')
prot_victim = reshape_data(prot_victim_df)
print('Attack Model with Dirichlet k=10:')
attack_accuracy(attack_model, prot_victim, data1, data2)

prot_victim_df = pd.read_csv(cwd+'data/mr_k_100/probabilities.csv')
prot_victim = reshape_data(prot_victim_df)
print('Attack Model with Dirichlet k=100:')
attack_accuracy(attack_model, prot_victim, data1, data2)

Attack Model with Dirichlet k=1:
Number of predicted IN data:  0
Accuracy:  0.5 

Attack Model with Dirichlet k=10:
Number of predicted IN data:  0
Accuracy:  0.5 

Attack Model with Dirichlet k=100:
Number of predicted IN data:  654
Accuracy:  0.6021875 



0.6021875

### Train Attack Model to fit Dirichlet

In [None]:
def dirichlet_dist(traj, k=1, decay_rate=1, decay_step=10000):
    # apply dirichlet distribution to the trajectories
    for i in range(len(traj)):
        alpha = np.exp(traj[i]) * k * (decay_rate ** (i//decay_step))
        rv = dirichlet.rvs(alpha, size=1, random_state=None)[0]
        while np.min(rv)==0:
            rv = dirichlet.rvs(alpha, size=1, random_state=None)[0]
        traj[i] = np.log(rv)
    return traj

def reshape_data_(data):
    data = data.reshape((len(data)//64, 64, 7))
    data = data.swapaxes(1,2)
    return data

def apply_dirichlet(dataframe, k, decay_rate, decay_step):
    data = dataframe.to_numpy()
    data = dirichlet_dist(data, k, decay_rate, decay_step)
    return reshape_data_(data)

def dirichlet_experiment(k=1, decay_rate=1, decay_step=10000):
    # Experiments for exploring how constant k and its decay will affect
    # the attacking accuracy
    # You can change the file directories to run your own experiments
    print('Experiment k = ', k)
    df = pd.read_csv(cwd+'data/mr_1/probabilities.csv')
    df0 = pd.read_csv(cwd+'data/mr_5/probabilities.csv')
    df1 = pd.read_csv(cwd+'data/mr1/probabilities.csv')
    df2 = pd.read_csv(cwd+'data/mr5/probabilities.csv')
    data0 = apply_dirichlet(df, k, decay_rate, decay_step)
    data00 = apply_dirichlet(df0, k, decay_rate, decay_step)
    data1 = reshape_data(df1)
    data2 = reshape_data(df2)

    in_data = np.concatenate([np.concatenate([data0[:1600], data1[:1600]], axis=1),
                          np.concatenate([data00[:1600], data2[:1600]], axis=1)])

    out_data = np.concatenate([np.concatenate([data0[:1600], data2[:1600]], axis=1),
                              np.concatenate([data00[:1600], data1[:1600]], axis=1)])
    
    data, labels = get_att_data(in_data, out_data)
    label_vec = get_label_vector(np.array(labels, dtype=np.int))
    attack_model = build_att()
    attack_model.fit(x=data, y=label_vec, batch_size=64, epochs=10, verbose=True)

    victim_df = pd.read_csv(cwd+'data/mr_1/probabilities.csv')
    victim = reshape_data(victim_df)
    print('Attack Model without Protection:')
    attack_accuracy(attack_model, victim, data1,data2)

    prot_victim_df = pd.read_csv(cwd+'data/mr_k_1/probabilities.csv')
    prot_victim = reshape_data(prot_victim_df)
    print('Attack Model with Dirichlet k=1:')
    attack_accuracy(attack_model, prot_victim, data1, data2)

    prot_victim_df = pd.read_csv(cwd+'data/mr_k_10/probabilities.csv')
    prot_victim = reshape_data(prot_victim_df)
    print('Attack Model with Dirichlet k=10:')
    attack_accuracy(attack_model, prot_victim, data1, data2)

    prot_victim_df = pd.read_csv(cwd+'data/mr_k_100/probabilities.csv')
    prot_victim = reshape_data(prot_victim_df)
    print('Attack Model with Dirichlet k=100:')
    attack_accuracy(attack_model, prot_victim, data1, data2)
    print()

In [None]:
dirichlet_experiment(k=1)
dirichlet_experiment(k=10)
dirichlet_experiment(k=100)

Experiment k =  1
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Attack Model without Protection:
Number of predicted IN data:  33
Accuracy:  0.50171875 

Attack Model with Dirichlet k=1:
Number of predicted IN data:  0
Accuracy:  0.5 

Attack Model with Dirichlet k=10:
Number of predicted IN data:  847
Accuracy:  0.50015625 

Attack Model with Dirichlet k=100:
Number of predicted IN data:  438
Accuracy:  0.503125 


Experiment k =  10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Attack Model without Protection:
Number of predicted IN data:  6329
Accuracy:  0.48890625 

Attack Model with Dirichlet k=1:
Number of predicted IN data:  0
Accuracy:  0.5 

Attack Model with Dirichlet k=10:
Number of predicted IN data:  1092
Accuracy:  0.5 

Attack Model with Dirichlet k=100:
Number of predicted IN data:  6297
Accuracy:  0.48453124999999997 


Experiment k =  100
E

### Dirichlet k decay

In [25]:
dirichlet_experiment(k=100, decay_rate=0.7)

Experiment k =  100
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Attack Model without Protection:
Number of predicted IN data:  0
Accuracy:  0.5 

Attack Model with Dirichlet k=1:
Number of predicted IN data:  0
Accuracy:  0.5 

Attack Model with Dirichlet k=10:
Number of predicted IN data:  0
Accuracy:  0.5 

Attack Model with Dirichlet k=100:
Number of predicted IN data:  0
Accuracy:  0.5 


