In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
import torch
import torch.nn as nn
import torch.optim as optim
import time

# Load the dataset

train_data = pd.read_csv("train.csv")



# Separate the target from features
X = train_data.drop('Loan Status', axis=1)
y = train_data['Loan Status']

# Identify categorical and numerical columns
categorical_columns = X.select_dtypes(include=['object']).columns.tolist()
numerical_columns = X.select_dtypes(exclude=['object']).columns.tolist()

# One-hot encode categorical columns
encoder = OneHotEncoder(drop='first', sparse=False)
encoded_features = encoder.fit_transform(X[categorical_columns])
encoded_df = pd.DataFrame(encoded_features, columns=encoder.get_feature_names_out(categorical_columns))

# Combine encoded and numerical features
X_encoded = pd.concat([X.drop(categorical_columns, axis=1), encoded_df], axis=1)

# Standardize numerical features
scaler = StandardScaler()
X_encoded[numerical_columns] = scaler.fit_transform(X_encoded[numerical_columns])

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_encoded, y, test_size=0.2, random_state=42)




In [2]:

# Convert data to PyTorch tensors
X_train_torch = torch.FloatTensor(X_train.values)
y_train_torch = torch.FloatTensor(y_train.values).view(-1, 1)
X_val_torch = torch.FloatTensor(X_val.values)
y_val_torch = torch.FloatTensor(y_val.values).view(-1, 1)

# Define the PyTorch model
class SimpleNN(nn.Module):
    def __init__(self, input_dim):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        return torch.sigmoid(self.fc4(x))

# Initialize the model
model = SimpleNN(X_train.shape[1])

# Loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters())

# Training loop
num_epochs = 10
start_time = time.time()
for epoch in range(num_epochs):
    optimizer.zero_grad()
    outputs = model(X_train_torch)
    loss = criterion(outputs, y_train_torch)
    loss.backward()
    optimizer.step()
end_time = time.time()

# Calculate training time
training_time = end_time - start_time

# Model evaluation
model.eval()
with torch.no_grad():
    val_outputs = model(X_val_torch)
    val_predictions = (val_outputs > 0.5).float()
    correct = (val_predictions == y_val_torch).sum().item()
    accuracy = correct / len(y_val)

# Print results
print(f"Model Accuracy: {accuracy:.4f}")
print(f"Training Time: {training_time:.2f} seconds")

Model Accuracy: 0.9097
Training Time: 2.79 seconds


In [5]:
!pip install opacus

Collecting opacus
  Downloading opacus-1.4.0-py3-none-any.whl (224 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/224.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━[0m [32m133.1/224.8 kB[0m [31m3.8 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.8/224.8 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: opacus
Successfully installed opacus-1.4.0


In [6]:
from opacus import PrivacyEngine
from torch.utils.data import DataLoader, TensorDataset
import time

In [9]:
# Hyperparameters
noise_multipliers = [0.5, 1.0, 1.5, 2.0, 3.0]
num_epochs = 10
batch_size = 64
results = []

for noise in noise_multipliers:
    # Start the training time measurement
    start_time = time.time()

    # Create a fresh model, optimizer, and DataLoader for each iteration
    model_dp = SimpleNN(X_train.shape[1])
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model_dp.parameters())

    train_dataset = TensorDataset(X_train_torch, y_train_torch)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    # Attach PrivacyEngine to the optimizer
    privacy_engine = PrivacyEngine()
    model_dp, optimizer, train_loader = privacy_engine.make_private(
        module=model_dp,
        optimizer=optimizer,
        data_loader=train_loader,
        noise_multiplier=noise,
        max_grad_norm=1.0,
    )

    # Training loop
    for epoch in range(num_epochs):
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model_dp(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

    # End the training time measurement
    end_time = time.time()

    # Calculate total training time
    training_time = end_time - start_time

    # Evaluation
    with torch.no_grad():
        val_outputs = model_dp(X_val_torch)
        val_predictions = (val_outputs > 0.5).float()
        correct = (val_predictions == y_val_torch).sum().item()
        dp_accuracy = correct / len(y_val)



    results.append((noise, dp_accuracy, training_time))

# Display results
for noise, accuracy, time_spent in results:
    print(f"Noise: {noise}, Accuracy: {accuracy:.4f}, Training Time: {time_spent:.2f} seconds")


Noise: 0.5, Accuracy: 0.9097, Training Time: 142.92 seconds
Noise: 1.0, Accuracy: 0.9097, Training Time: 143.16 seconds
Noise: 1.5, Accuracy: 0.9097, Training Time: 143.32 seconds
Noise: 2.0, Accuracy: 0.9097, Training Time: 144.56 seconds
Noise: 3.0, Accuracy: 0.9097, Training Time: 146.48 seconds
