In [1]:
# Import necessary libraries
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import time

# Load the dataset
diabetes_data = pd.read_csv("diabetes.csv")

# Separate the target ('Outcome') from the features
X = diabetes_data.drop('Outcome', axis=1)
y = diabetes_data['Outcome']

# Standardize numerical features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Convert data to PyTorch tensors
X_train_torch = torch.FloatTensor(X_train)
y_train_torch = torch.FloatTensor(y_train.values).view(-1, 1)
X_val_torch = torch.FloatTensor(X_val)
y_val_torch = torch.FloatTensor(y_val.values).view(-1, 1)

In [2]:
class DiabetesNN(nn.Module):
    def __init__(self, input_dim):
        super(DiabetesNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        return torch.sigmoid(self.fc4(x))

# Initialize the model
model = DiabetesNN(X_train.shape[1])

# Loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters())

# Training loop
num_epochs = 10
start_time = time.time()
for epoch in range(num_epochs):
    optimizer.zero_grad()
    outputs = model(X_train_torch)
    loss = criterion(outputs, y_train_torch)
    loss.backward()
    optimizer.step()
end_time = time.time()

# Calculate training time
training_time = end_time - start_time

# Model evaluation
model.eval()
with torch.no_grad():
    val_outputs = model(X_val_torch)
    val_predictions = (val_outputs > 0.5).float()
    correct = (val_predictions == y_val_torch).sum().item()
    accuracy = correct / len(y_val)

# Print results
print(f"Model Accuracy: {accuracy:.4f}")
print(f"Training Time: {training_time:.2f} seconds")

Model Accuracy: 0.7792
Training Time: 0.15 seconds


In [4]:
!pip install opacus

Collecting opacus
  Downloading opacus-1.4.0-py3-none-any.whl (224 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.8/224.8 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: opacus
Successfully installed opacus-1.4.0


In [5]:
from torch.utils.data import DataLoader, TensorDataset
from opacus import PrivacyEngine

In [8]:
# Differential Privacy Training
# Hyperparameters
noise_multipliers = [0.5, 1.0, 1.5, 2.0, 3.0]
batch_size = 64
results = []

for noise in noise_multipliers:
    # Create a fresh model, optimizer, and DataLoader for each iteration
    model_dp = DiabetesNN(X_train.shape[1])
    optimizer = optim.Adam(model_dp.parameters())
    train_dataset = TensorDataset(X_train_torch, y_train_torch)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    # Attach PrivacyEngine to the optimizer
    privacy_engine = PrivacyEngine()
    model_dp, optimizer, train_loader = privacy_engine.make_private(
        module=model_dp,
        optimizer=optimizer,
        data_loader=train_loader,
        noise_multiplier=noise,
        max_grad_norm=1.0,
    )

    # Training loop for DP model
    start_time_dp = time.time()
    for epoch in range(num_epochs):
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs_dp = model_dp(inputs)
            loss_dp = criterion(outputs_dp, labels)
            loss_dp.backward()
            optimizer.step()
    end_time_dp = time.time()

    # Evaluation for DP model
    with torch.no_grad():
        val_outputs_dp = model_dp(X_val_torch)
        val_predictions_dp = (val_outputs_dp > 0.5).float()
        correct_dp = (val_predictions_dp == y_val_torch).sum().item()
        dp_accuracy = correct_dp / len(y_val)



    # Record results
    results.append((noise, dp_accuracy,  end_time_dp - start_time_dp))

# Display DP results
for noise, accuracy, time_spent in results:
    print(f"Noise: {noise}, DP Accuracy: {accuracy:.4f}, DP Training Time: {time_spent:.2f} seconds")

Noise: 0.5, DP Accuracy: 0.6883, DP Training Time: 1.16 seconds
Noise: 1.0, DP Accuracy: 0.6429, DP Training Time: 1.14 seconds
Noise: 1.5, DP Accuracy: 0.6494, DP Training Time: 1.13 seconds
Noise: 2.0, DP Accuracy: 0.6494, DP Training Time: 0.88 seconds
Noise: 3.0, DP Accuracy: 0.6429, DP Training Time: 0.87 seconds
