# Model recovery attack in split learning with multiple data owners

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow import keras
from tensorflow.keras import layers
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv('../datasets/news-popularity.csv').sample(frac=1)
df = df.drop(columns=["url"])
df.to_numpy().shape

(39644, 60)

In [3]:
def make_dataset(X, Y, f):
    x = tf.data.Dataset.from_tensor_slices(X)
    y = tf.data.Dataset.from_tensor_slices(Y)
    x = x.map(f)
    xy = tf.data.Dataset.zip((x, y))
    xy = xy.shuffle(10000)
    return xy

df = pd.read_csv('../datasets/news-popularity.csv').sample(frac=1)
df = df.drop(columns=["url"])
xy = df.to_numpy()
x = xy[:,0:-1]
y = xy[:,-1].reshape((len(x), 1))
min_values = pd.DataFrame(x).describe().transpose()['min'].to_numpy()
max_values = pd.DataFrame(x).describe().transpose()['max'].to_numpy()
# x = df.drop(columns=["shares"]).to_numpy()
x = (x-min_values)/(max_values-min_values)
y = (y-min(y))/(max(y) - min(y))
# y = df["shares"].to_numpy().reshape((len(x), 1)).astype("float32")
# # x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
# # attack_ds = make_dataset(x_train, y_train, lambda t: t)
train_ds = make_dataset(x, y, lambda t: t)

train_size = len(x)

In [29]:
def make_f(input_shape):
    xin = tf.keras.layers.Input(input_shape)
    x = tf.keras.layers.BatchNormalization()(xin)
    x = tf.keras.layers.Dense(64, activation="relu")(x)
    x = tf.keras.layers.Dense(128, activation="relu")(x)
    output = tf.keras.layers.Dense(256, activation="relu")(x)
    return tf.keras.Model(xin, output)

def make_g(input_shape):
    xin = tf.keras.layers.Input(input_shape)
    x = tf.keras.layers.Dense(128, activation="relu")(xin)
    x = tf.keras.layers.Dropout(0.5)(x)
    output = tf.keras.layers.Dense(1)(x)
    return tf.keras.Model(xin, output)

input_shape = train_ds.element_spec[0].shape
f = make_f(input_shape)
intermediate_shape = f.layers[-1].output_shape[1:]
g = make_g(intermediate_shape)

In [30]:
batch_size = 32
epoches = 1
# note that iterations is the number of batches we iterate
# iterations = epoches * train_size // batch_size
iterations = 100
learning_rate = 0.001

In [31]:
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
train_batches = train_ds.batch(batch_size=batch_size, drop_remainder=True).repeat(-1).take(100)
train_ref = []
z_ref = []

log = []
iter_count = 0
log_frequency = 100

for (x_batch, y_batch) in train_batches:
    
    with tf.GradientTape(persistent=True) as tape:
        z = f(x_batch, training=True)
        y_pred = g(z, training = True)
        loss = tf.keras.losses.MeanSquaredError()(y_true=y_batch, y_pred=y_pred)
        # acc = tf.metrics.sparse_categorical_accuracy(y_batch, y_pred)
    var = f.trainable_variables + g.trainable_variables
    grad = tape.gradient(loss, var)
    optimizer.apply_gradients(zip(grad, var))

    # iter_acc = sum(acc) / len(acc)
    log.append(loss)
    iter_count += 1

    train_ref.append(x_batch)
    z_ref.append(z)

    if (iter_count - 1) % log_frequency == 0:
        print("Iteration %04d: Training loss: %0.4f" % (iter_count, loss))

Iteration 0001: Training loss: 0.0554


In [32]:
def make_generator(input_shape):
    xin = tf.keras.layers.Input(input_shape)
    act = "relu"
    x = tf.keras.layers.Dense(1024, activation=act)(xin)
    x = tf.keras.layers.Dense(512, activation=act)(x)
    x = tf.keras.layers.Dense(256, activation=act)(x)
    x = tf.keras.layers.Dense(128, activation=act)(x)
    # x = tf.keras.layers.Dense(64, activation=act)(x)
    # x = tf.keras.layers.Dense(32, activation=act)(x)
    x = tf.keras.layers.Dense(59, activation="sigmoid")(x)
    return tf.keras.Model(xin, x)

In [34]:
f_temp = tf.keras.models.clone_model(f)
f_temp.set_weights(f.get_weights())

generator = make_generator(intermediate_shape)

x_opt = tf.keras.optimizers.Adam(learning_rate=0.001)
f_opt = tf.keras.optimizers.Adam(learning_rate=0.0001)

# inference_batches = attack_ds.batch(batch_size=32, drop_remainder=True).repeat(-1).take(attack_iterations)

attack_iter_count = 0

# for (x_batch, y_batch) in inference_batches:
for i in range(1):

    z = z_ref[iterations - i - 1]
    x = train_ref[iterations - i - 1]

    # x_temp = tf.Variable(2 * np.random.rand(*(x.numpy().shape)) - 1)

    # x_temp = np.zeros_like(x.numpy())
    # x_temp.fill(0.5)
    # x_temp = tf.Variable(x_temp)

    for _ in range(100):

        for _ in range(20):
            with tf.GradientTape() as tape:
                x_temp = generator(z, training=True)
                loss_x = tf.keras.losses.MeanSquaredError()(f_temp(x_temp, training=False), z)
            vars = generator.trainable_variables
            grad = tape.gradient(loss_x, vars)
            x_opt.apply_gradients(zip(grad, vars))
            # loss = lambda: tf.keras.losses.MeanSquaredError()(f_temp(x_temp, training=False), z)
            # x_opt.minimize(loss, var_list=[x_temp])

        for _ in range(1):
            with tf.GradientTape() as tape:
                loss_f = tf.keras.losses.MeanSquaredError()(f_temp(x_temp, training=True), z)
            vars = f_temp.trainable_variables
            grad = tape.gradient(loss_f, vars)
            f_opt.apply_gradients(zip(grad, vars))

    attack_mse = tf.losses.MeanSquaredError()(x_temp, x)
    rg_uniform = tf.losses.MeanSquaredError()(x, np.random.rand(*(x.numpy().shape)))
    attack_iter_count += 1
    print("Iteration %04d: RG: %0.4f reconstruction validation: %0.4f" % (attack_iter_count, rg_uniform, attack_mse))

Iteration 0001: RG: 0.2567 reconstruction validation: 0.2454


In [15]:
loss_f

<tf.Tensor: shape=(), dtype=float32, numpy=0.00038563198>