# Wisconsin Breast Cancer Membership Inference Attack

In [3]:
# lib imports
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from IPython.core.magic import register_cell_magic
from sklearn.datasets import load_breast_cancer

@register_cell_magic
def skip(line, cell):
    return

### Load and preprocess dataset, divide into victim and shadow subsets.

In [4]:
# load direct from csv
# df_full = pd.read_csv('datasets-csv/breast-cancer-wisconsin-data.csv')

# load from sklearn
data = load_breast_cancer()
df_full = pd.DataFrame(data.data, columns=data.feature_names)  
df_full['diagnosis'] = data.target

df_full.drop(columns=['Unnamed: 32', 'id'], inplace=True, errors='ignore')
label_encoder = LabelEncoder()
df_full['diagnosis'] = label_encoder.fit_transform(df_full['diagnosis'])

features_dim = 30

# Dividing the dataset into victim and shadow subsets. Is this the best way to do this?
df_v = df_full.sample(frac=0.75, replace=False)
df_s = df_full.sample(frac=0.75, replace=False)

### Define training regime with cross-entropy loss.

In [7]:
def train(model, X_train, y_train, num_epochs, learning_rate, batch_size, report_loss=False):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    # Training loop
    for epoch in range(num_epochs):
        running_loss = 0.0
        for inputs, labels in train_loader:
            # Zero the gradients
            optimizer.zero_grad()
        
            # Forward pass
            outputs = model(inputs)
        
            # Compute the loss
            loss = criterion(outputs, labels)
        
            # Backward pass and optimization
            loss.backward()
            optimizer.step()
            
            # Track the loss
            running_loss += loss.item()
        
        # Print the average loss for the epoch
        if report_loss: 
            print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss / len(train_loader)}")
    
    print("Training complete.")

### Disease classification architecture. 
*Fully-connected feed forward with 3 hidden layers with ReLU activation and Softmax outer layer.*

In [9]:
class DiseaseClassifier(nn.Module):
    def __init__(self, input_dim, hidden1_dim, hidden2_dim, hidden3_dim, output_dim):
        super(DiseaseClassifier, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden1_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden1_dim, hidden2_dim)
        self.fc3 = nn.Linear(hidden2_dim, hidden3_dim)
        self.fc4 = nn.Linear(hidden3_dim, output_dim)
        self.logsoftmax = nn.LogSoftmax(dim=1)        
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.fc3(out)
        out = self.relu(out)
        out = self.fc4(out)
        out = self.logsoftmax(out)
        return out

### Build the victim train/test sets and train the victim model.

In [13]:
features = df_v.drop(columns=['diagnosis']).values
labels = df_v['diagnosis'].values

X_train_raw, X_test_raw, y_train_raw, y_test_raw = train_test_split(features, labels, test_size=0.5)
X_train_v = torch.tensor(X_train_raw, dtype=torch.float32)
X_test_v = torch.tensor(X_test_raw, dtype=torch.float32)
y_train_v = torch.tensor(y_train_raw, dtype=torch.long)
y_test_v = torch.tensor(y_test_raw, dtype=torch.long)

victim_model = DiseaseClassifier(features_dim, 1024, 1024, 512, 2)

train(victim_model, X_train_v, y_train_v, 50, .001, 32, True)

Epoch 1/50, Loss: 26.307646398033416
Epoch 2/50, Loss: 23.04261016845703
Epoch 3/50, Loss: 5.932648113795689
Epoch 4/50, Loss: 1.6294241973331995
Epoch 5/50, Loss: 0.8013491800853184
Epoch 6/50, Loss: 0.5050253974539893
Epoch 7/50, Loss: 0.5630670977490289
Epoch 8/50, Loss: 0.44457744913441793
Epoch 9/50, Loss: 0.350696362555027
Epoch 10/50, Loss: 0.29382953686373575
Epoch 11/50, Loss: 0.3229076990059444
Epoch 12/50, Loss: 0.2563119849988392
Epoch 13/50, Loss: 0.26652735045978
Epoch 14/50, Loss: 0.2408284502370017
Epoch 15/50, Loss: 0.22340301317828043
Epoch 16/50, Loss: 0.22129004555089132
Epoch 17/50, Loss: 0.24846419292901242
Epoch 18/50, Loss: 0.2903527042695454
Epoch 19/50, Loss: 0.2632482328585216
Epoch 20/50, Loss: 0.22504276356526784
Epoch 21/50, Loss: 0.21973119250365666
Epoch 22/50, Loss: 0.2112953258412225
Epoch 23/50, Loss: 0.23559836085353578
Epoch 24/50, Loss: 0.21252978593111038
Epoch 25/50, Loss: 0.19695965200662613
Epoch 26/50, Loss: 0.23349442332983017
Epoch 27/50, Lo

### Build the shadow train/test sets and train the shadow model.

In [12]:
features = df_s.drop(columns=['diagnosis']).values
labels = df_s['diagnosis'].values

X_train_raw, X_test_raw, y_train_raw, y_test_raw = train_test_split(features, labels, test_size=0.5)
X_train_s = torch.tensor(X_train_raw, dtype=torch.float32)
X_test_s = torch.tensor(X_test_raw, dtype=torch.float32)
y_train_s = torch.tensor(y_train_raw, dtype=torch.long)
y_test_s = torch.tensor(y_test_raw, dtype=torch.long)

shadow_model = DiseaseClassifier(features_dim, 1024, 1024, 512, 2)

train(shadow_model, X_train_s, y_train_s, 2000, .001, 32)

Training complete.


### Define and test prediction accuracy of victim and shadow models. 

In [14]:
def output_predictions(outputs):
    _,predicted = torch.max(outputs.data, 1) 
    return predicted

def prediction_accuracy(outputs, y_test, report_predictions=False):
    with torch.no_grad():
        predicted = output_predictions(outputs)
        if report_predictions:
            print(predicted)
        accuracy = (predicted == y_test).sum().item() / y_test.size(0)
        print(f"Test Accuracy: {accuracy}")

victim_model.eval()
shadow_model.eval()

test_outputs_v = victim_model(X_test_v)
test_outputs_s = shadow_model(X_test_s)

prediction_accuracy(test_outputs_v, y_test_v)
prediction_accuracy(test_outputs_s, y_test_s)

Test Accuracy: 0.9532710280373832
Test Accuracy: 0.9439252336448598


### Build attack model train and test datasets.

In [16]:
# attack training dataset
train_outputs_s = shadow_model(X_train_s)

train_outputs_s_predictions = output_predictions(train_outputs_s)
test_outputs_s_predictions = output_predictions(test_outputs_s)

zerovec = torch.full([test_outputs_s.size(dim=0)], 0)
onevec = torch.full([train_outputs_s.size(dim=0)], 1)

pre_xta1 = torch.cat((train_outputs_s, test_outputs_s)).detach().numpy()
pre_xta2 = torch.cat((train_outputs_s_predictions, test_outputs_s_predictions)).detach().numpy().astype(np.float32)
pre_xta3 = np.hstack((pre_xta1, pre_xta2.reshape(len(pre_xta2), 1)))
X_train_a = torch.from_numpy(np.hstack((np.vstack((X_train_s.detach().numpy(),X_test_s.detach().numpy())), pre_xta3)))
y_train_a = torch.cat((onevec,zerovec))

# attack test dataset
# train_outputs_v = victim_model(X_train_v)
train_outputs_v = shadow_model(X_train_v)
    
train_outputs_v_predictions = output_predictions(train_outputs_v)
test_outputs_v_predictions = output_predictions(test_outputs_v)

zerovec = torch.full([test_outputs_v.size(dim=0)], 0)
onevec = torch.full([train_outputs_v.size(dim=0)], 1)

pre_xta1 = torch.cat((train_outputs_v, test_outputs_v)).detach().numpy()
pre_xta2 = torch.cat((train_outputs_v_predictions, test_outputs_v_predictions)).detach().numpy().astype(np.float32)
pre_xta3 = np.hstack((pre_xta1, pre_xta2.reshape(len(pre_xta2), 1)))
X_test_a = torch.from_numpy(np.hstack((np.vstack((X_train_v.detach().numpy(),X_test_v.detach().numpy())), pre_xta3)))
y_test_a = torch.cat((onevec,zerovec))

### Attack model architecture.

In [18]:
class AttackClassifier(nn.Module):
    def __init__(self, input_dim, hidden1_dim, hidden2_dim, output_dim):
        super(AttackClassifier, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden1_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden1_dim, hidden2_dim)
        self.relu = nn.ReLU()  # Do I need to do this a second time?
        self.fc3 = nn.Linear(hidden2_dim, output_dim)
        self.logsoftmax = nn.LogSoftmax(dim=1)        
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.fc3(out)
        out = self.logsoftmax(out)
        return out

### Build and train the attack model.

In [20]:
attack_model = AttackClassifier(33, 64, 32, 2)

train(attack_model, X_train_a, y_train_a, 2000, .0001, 32, True)

Epoch 1/2000, Loss: 2.195226890700204
Epoch 2/2000, Loss: 1.601172047001975
Epoch 3/2000, Loss: 1.1351871362754278
Epoch 4/2000, Loss: 0.9158543348312378
Epoch 5/2000, Loss: 0.8677207274096352
Epoch 6/2000, Loss: 0.8190091635499682
Epoch 7/2000, Loss: 0.837008501802172
Epoch 8/2000, Loss: 0.8001594202859061
Epoch 9/2000, Loss: 0.7974970638751984
Epoch 10/2000, Loss: 0.7900895093168531
Epoch 11/2000, Loss: 0.7910087789808001
Epoch 12/2000, Loss: 0.7895462768418449
Epoch 13/2000, Loss: 0.7996593926634107
Epoch 14/2000, Loss: 0.738635493176324
Epoch 15/2000, Loss: 0.7927975739751544
Epoch 16/2000, Loss: 0.7791566933904376
Epoch 17/2000, Loss: 0.7589760550430843
Epoch 18/2000, Loss: 0.7395283920424325
Epoch 19/2000, Loss: 0.7496622971126011
Epoch 20/2000, Loss: 0.7417766749858856
Epoch 21/2000, Loss: 0.7204858873571668
Epoch 22/2000, Loss: 0.7346658153193337
Epoch 23/2000, Loss: 0.7796616554260254
Epoch 24/2000, Loss: 0.7198171615600586
Epoch 25/2000, Loss: 0.7289726606437138
Epoch 26/2000

### Evaluate accuracy of attack model.

In [22]:
attack_model.eval()

test_outputs_a = attack_model(X_test_a)

prediction_accuracy(test_outputs_a, y_test_a, False)

Test Accuracy: 0.6229508196721312
