# GMM Toy Model: Two Gaussians

In [43]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

## Case 1: Sample Size = 5000

In [58]:
# Parameters
n_samples = 5000
n_features = 10
n_components = 2

# Generate data from Gaussian components
X_gmm_1 = np.random.multivariate_normal(mean=np.random.rand(n_features) * 10,
                                        cov=np.eye(n_features) * 10,
                                        size=n_samples // n_components)

X_gmm_2 = np.random.multivariate_normal(mean=np.random.rand(n_features) * 10,
                                        cov=np.eye(n_features) * 10,
                                        size=n_samples // n_components)

x = np.random.multivariate_normal(mean=np.mean(X_gmm_1, axis=0),
                                   cov=np.eye(n_features) * 20,
                                   size=1)

y = 0

X = np.vstack([X_gmm_1, X_gmm_2])
Y = np.hstack([[i] * (n_samples // n_components) for i in range(n_components)])

# Define binary labels Z for (X_0, X_1)
def create_train_labels(X, x):
    X_0 = torch.cat((X, x), dim=0)  # Dataset including x
    Z_0 = torch.zeros(len(X_0))  # Labels for X_0
    X_1 = X  # Original dataset
    Z_1 = torch.ones(len(X_1))  # Labels for X_1
    return X_0, Z_0, X_1, Z_1

# Convert X and Y to PyTorch tensors
X = torch.tensor(X, dtype=torch.float32)
Y = torch.tensor(Y, dtype=torch.long)  # Ensure Y is long for CrossEntropyLoss
x = torch.tensor(x, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.float32)

X_0, Z_0, X_1, Z_1 = create_train_labels(X, x)

y = y.unsqueeze(0)  # Reshape y to a 1D tensor
Y_0 = torch.cat((Y, y), dim=0)
Y_0 = torch.tensor(Y_0, dtype=torch.long)
Y_1 = Y

# Concatenate datasets for gradient descent
X_train = torch.cat((X_0, X_1), dim=0)
Z_train = torch.cat((Z_0, Z_1), dim=0)

# Shuffle the combined dataset
train_indices = torch.randperm(len(X_train))
X_train_shuffled = X_train[train_indices]
Z_train_shuffled = Z_train[train_indices]
indices = torch.randperm(len(X))
X_shuffled = X[indices]
Y_shuffled = Y[indices]
original_indices = torch.randperm(len(X_0))
X_0_shuffled = X_0[original_indices]
Y_0_shuffled = Y_0[original_indices]


def compute_mutual_information_direct(predicted_outputs, labels):
    """
    Compute mutual information I(\hat{Y}; Z) using the original formula.
    """
    # Convert logits to probabilities using softmax
    probabilities = torch.softmax(predicted_outputs, dim=1)  # Shape: (N, 2)
    probabilities = torch.clamp(probabilities, 1e-10, 1.0 - 1e-10)  # Avoid numerical issues

    # Compute marginal probabilities of Z
    p_z = torch.mean(labels.float())  # Empirical P(Z=1)
    p_not_z = 1 - p_z                 # Empirical P(Z=0)

    # Compute joint probabilities P(\hat{y}, Z)
    joint_p_z = probabilities[:, 1] * p_z  # P(\hat{y}, Z=1)
    joint_p_not_z = probabilities[:, 0] * p_not_z  # P(\hat{y}, Z=0)

    # Compute marginal probabilities P(\hat{y})
    p_y_hat = joint_p_z + joint_p_not_z  # Marginal P(\hat{y})

    # Compute mutual information
    mutual_information = torch.mean(
        joint_p_z * torch.log(joint_p_z / (p_y_hat * p_z) + 1e-10) +
        joint_p_not_z * torch.log(joint_p_not_z / (p_y_hat * p_not_z) + 1e-10)
    )

    return mutual_information

# Define a normal classifier
class NormalClassifier(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(NormalClassifier, self).__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, num_classes)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x


  Y_0 = torch.tensor(Y_0, dtype=torch.long)


## Original: None-regularizaed

In [315]:
train_acc_origin = []
unlearn_acc_origin = []

for i in range(100):
    # Instantiate the models
    model_original = NormalClassifier(input_dim=n_features, num_classes=2)

    # Loss functions and optimizers
    normal_criterion = nn.CrossEntropyLoss()  # Cross-Entropy Loss for classifier
    normal_optimizer = optim.Adam(model_original.parameters(), lr=0.01)

    # Training loop with updated mutual information penalty
    num_epochs = 100
    target_mutual_information = []
    normal_classifier_loss = []
    #mutual_information_threshold = 0.1

    for epoch in range(num_epochs):
        # Training the normal classifier
        normal_optimizer.zero_grad()
        Y_train_shuffled = model_original(X_train_shuffled)  # Predicted outputs for \hat{Y}
        Y_hat = model_original(X_0_shuffled)
        normal_loss = normal_criterion(Y_hat, Y_0_shuffled)

        # Compute mutual information I(\hat{Y}; Z)
        mutual_info = compute_mutual_information_direct(Y_train_shuffled, Z_train_shuffled)
        mutual_info.backward(retain_graph=True)
        #for name, param in normal_model.named_parameters():

        # Total loss with mutual information penalty
        lambda_reg = 0
        total_loss = normal_loss + lambda_reg * mutual_info
        #total_loss = normal_loss
        #total_loss = mutual_info
        total_loss.backward()

        # Check gradient norms
        #for name, param in normal_model.named_parameters():
        #    if param.grad is not None:
        #        print(f"Epoch [{epoch + 1}], Gradient Norm ({name}): {param.grad.norm().item()}")

        # Update the normal classifier
        normal_optimizer.step()

        # Log the metrics
        target_mutual_information.append(mutual_info.item())
        normal_classifier_loss.append(normal_loss.item())

        if (epoch + 1) % 10 == 0:
            print(f"Epoch [{epoch + 1}/{num_epochs}], Mutual Information: {mutual_info:.4f}, "
                f"Normal Classifier Loss: {normal_loss.item():.4f}, Lambda: {lambda_reg:.4f}")
            
    # Evaluate the model's accuracy
    with torch.no_grad():
        # Get the model's predictions (logits)
        logits = model_original(X)  # X is the dataset
        
        # Convert logits to predicted labels
        predictions = torch.argmax(logits, dim=1)  # Choose the class with the highest score
        
        # Compute the accuracy
        accuracy = (predictions == Y).float().mean().item()  # Compare predictions with ground truth Y

    # Compute probabilities using softmax
    probabilities = torch.softmax(model_original(x)[0], dim=0)
    # Extract the probability of the first class
    prob_class_0 = probabilities[0]

    train_acc_origin.append(accuracy)
    unlearn_acc_origin.append(prob_class_0.detach().numpy())

Epoch [10/100], Mutual Information: 0.2057, Normal Classifier Loss: 0.2027, Lambda: 0.0000
Epoch [20/100], Mutual Information: 0.1542, Normal Classifier Loss: 0.2392, Lambda: 0.0000
Epoch [30/100], Mutual Information: 0.1507, Normal Classifier Loss: 0.2388, Lambda: 0.0000
Epoch [40/100], Mutual Information: 0.1481, Normal Classifier Loss: 0.2396, Lambda: 0.0000
Epoch [50/100], Mutual Information: 0.1497, Normal Classifier Loss: 0.2361, Lambda: 0.0000
Epoch [60/100], Mutual Information: 0.1465, Normal Classifier Loss: 0.2380, Lambda: 0.0000
Epoch [70/100], Mutual Information: 0.1489, Normal Classifier Loss: 0.2344, Lambda: 0.0000
Epoch [80/100], Mutual Information: 0.1455, Normal Classifier Loss: 0.2368, Lambda: 0.0000
Epoch [90/100], Mutual Information: 0.1452, Normal Classifier Loss: 0.2362, Lambda: 0.0000
Epoch [100/100], Mutual Information: 0.1445, Normal Classifier Loss: 0.2360, Lambda: 0.0000
Epoch [10/100], Mutual Information: 0.2029, Normal Classifier Loss: 0.2123, Lambda: 0.000

In [316]:
np.mean(train_acc_origin), np.std(train_acc_origin)

(np.float64(0.9333900034427642), np.float64(0.001703261915304359))

In [317]:
np.mean(unlearn_acc_origin), np.std(unlearn_acc_origin)

(np.float32(0.96756), np.float32(0.030018877))

## Re-Training from Scratch

In [371]:
train_acc_retrain_scratch = []
unlearn_acc_retrain_scratch = []

for i in range(100):
    # Instantiate the models
    model_retrain = NormalClassifier(input_dim=n_features, num_classes=2)

    # Loss functions and optimizers
    normal_criterion = nn.CrossEntropyLoss()  # Cross-Entropy Loss for classifier
    normal_optimizer = optim.Adam(model_retrain.parameters(), lr=0.01)

    # Training loop with updated mutual information penalty
    num_epochs = 100
    target_mutual_information = []
    normal_classifier_loss = []
    #mutual_information_threshold = 0.1

    for epoch in range(num_epochs):
        # Training the normal classifier
        normal_optimizer.zero_grad()
        Y_train_shuffled = model_retrain(X_train_shuffled)  # Predicted outputs for \hat{Y}
        Y_hat = model_retrain(X_shuffled)
        normal_loss = normal_criterion(Y_hat, Y_shuffled)

        # Compute mutual information I(\hat{Y}; Z)
        mutual_info = compute_mutual_information_direct(Y_train_shuffled, Z_train_shuffled)
        mutual_info.backward(retain_graph=True)

        # Total loss with mutual information penalty
        lambda_reg = 0
        total_loss = normal_loss + lambda_reg * mutual_info
        #total_loss = normal_loss
        #total_loss = mutual_info
        total_loss.backward()

        # Check gradient norms
        #for name, param in normal_model.named_parameters():
        #    if param.grad is not None:
        #        print(f"Epoch [{epoch + 1}], Gradient Norm ({name}): {param.grad.norm().item()}")

        # Update the normal classifier
        normal_optimizer.step()

        # Log the metrics
        target_mutual_information.append(mutual_info.item())
        normal_classifier_loss.append(normal_loss.item())

        if (epoch + 1) % 10 == 0:
            print(f"Epoch [{epoch + 1}/{num_epochs}], Mutual Information: {mutual_info:.4f}, "
                f"Normal Classifier Loss: {normal_loss.item():.4f}, Lambda: {lambda_reg:.4f}")
            
    # Evaluate the model's accuracy
    with torch.no_grad():
        # Get the model's predictions (logits)
        logits = model_retrain(X)  # X is the dataset
        
        # Convert logits to predicted labels
        predictions = torch.argmax(logits, dim=1)  # Choose the class with the highest score
        
        # Compute the accuracy
        accuracy = (predictions == Y).float().mean().item()  # Compare predictions with ground truth Y


    # Compute probabilities using softmax
    probabilities = torch.softmax(model_retrain(x)[0], dim=0)
    # Extract the probability of the first class
    prob_class_0 = probabilities[0]

    train_acc_retrain_scratch.append(accuracy)
    unlearn_acc_retrain_scratch.append(prob_class_0.detach().numpy())


Epoch [10/100], Mutual Information: 0.1204, Normal Classifier Loss: 0.3579, Lambda: 0.0000
Epoch [20/100], Mutual Information: 0.1387, Normal Classifier Loss: 0.2486, Lambda: 0.0000
Epoch [30/100], Mutual Information: 0.1394, Normal Classifier Loss: 0.2489, Lambda: 0.0000
Epoch [40/100], Mutual Information: 0.1462, Normal Classifier Loss: 0.2394, Lambda: 0.0000
Epoch [50/100], Mutual Information: 0.1471, Normal Classifier Loss: 0.2365, Lambda: 0.0000
Epoch [60/100], Mutual Information: 0.1430, Normal Classifier Loss: 0.2389, Lambda: 0.0000
Epoch [70/100], Mutual Information: 0.1433, Normal Classifier Loss: 0.2373, Lambda: 0.0000
Epoch [80/100], Mutual Information: 0.1438, Normal Classifier Loss: 0.2357, Lambda: 0.0000
Epoch [90/100], Mutual Information: 0.1447, Normal Classifier Loss: 0.2339, Lambda: 0.0000
Epoch [100/100], Mutual Information: 0.1448, Normal Classifier Loss: 0.2330, Lambda: 0.0000
Epoch [10/100], Mutual Information: 0.1912, Normal Classifier Loss: 0.2226, Lambda: 0.000

In [372]:
np.mean(train_acc_retrain_scratch), np.std(train_acc_retrain_scratch)

(np.float64(0.9334120035171509), np.float64(0.0017721886747474346))

In [373]:
np.mean(unlearn_acc_retrain_scratch), np.std(unlearn_acc_retrain_scratch)

(np.float32(0.7848097), np.float32(0.19006258))

## Unlearning, gamma = 4.2, epoch = 5

In [336]:
from copy import deepcopy

In [374]:
train_acc_unlearn = []
unlearn_acc_unlearn = []

for i in range(100):
    # Instantiate the models
    model_reg = NormalClassifier(input_dim=n_features, num_classes=2)
    model_reg = deepcopy(model_original)

    # Loss functions and optimizers
    normal_criterion = nn.CrossEntropyLoss()  # Cross-Entropy Loss for classifier
    normal_optimizer = optim.Adam(model_reg.parameters(), lr=0.01)

    # Training loop with updated mutual information penalty
    num_epochs = 5
    target_mutual_information = []
    normal_classifier_loss = []
    #mutual_information_threshold = 0.1

    for epoch in range(num_epochs):
        # Training the normal classifier
        normal_optimizer.zero_grad()
        Y_train_shuffled = model_reg(X_train_shuffled)  # Predicted outputs for \hat{Y}
        Y_hat = model_reg(X_0_shuffled)
        normal_loss = normal_criterion(Y_hat, Y_0_shuffled)

        # Compute mutual information I(\hat{Y}; Z)
        mutual_info = compute_mutual_information_direct(Y_train_shuffled, Z_train_shuffled)

        # Total loss with mutual information penalty
        lambda_reg = 4.2
        total_loss = normal_loss + lambda_reg * mutual_info
        #total_loss = normal_loss
        #total_loss = mutual_info
        total_loss.backward()

        # Check gradient norms
        #for name, param in normal_model.named_parameters():
        #    if param.grad is not None:
        #        print(f"Epoch [{epoch + 1}], Gradient Norm ({name}): {param.grad.norm().item()}")

        # Update the normal classifier
        normal_optimizer.step()

        # Log the metrics
        target_mutual_information.append(mutual_info.item())
        normal_classifier_loss.append(normal_loss.item())

        if (epoch + 1) % 10 == 0:
            print(f"Epoch [{epoch + 1}/{num_epochs}], Mutual Information: {mutual_info:.4f}, "
                f"Normal Classifier Loss: {normal_loss.item():.4f}, Lambda: {lambda_reg:.4f}")
        
        # Evaluate the model's accuracy
    with torch.no_grad():
        # Get the model's predictions (logits)
        logits = model_reg(X)  # X is the dataset
        
        # Convert logits to predicted labels
        predictions = torch.argmax(logits, dim=1)  # Choose the class with the highest score
        
        # Compute the accuracy
        accuracy = (predictions == Y).float().mean().item()  # Compare predictions with ground truth Y

    # Compute probabilities using softmax
    probabilities = torch.softmax(model_reg(x)[0], dim=0)
    # Extract the probability of the first class
    prob_class_0 = probabilities[0]

    train_acc_unlearn.append(accuracy)
    unlearn_acc_unlearn.append(prob_class_0.detach().numpy())

In [375]:
np.mean(train_acc_unlearn), np.std(train_acc_unlearn)

(np.float64(0.9243999719619751), np.float64(0.0))

In [376]:
np.mean(unlearn_acc_unlearn), np.std(unlearn_acc_unlearn)

(np.float32(0.7805244), np.float32(5.9604645e-08))

## retrain from original

In [384]:
train_acc_retrain_origin = []
unlearn_acc_retrain_origin = []    

for i in range(100):
    # Instantiate the models
    model_retrain_original = NormalClassifier(input_dim=n_features, num_classes=2)
    model_retrain_original = deepcopy(model_original)

    # Loss functions and optimizers
    normal_criterion = nn.CrossEntropyLoss()  # Cross-Entropy Loss for classifier
    normal_optimizer = optim.Adam(model_retrain_original.parameters(), lr=0.01)

    # Training loop with updated mutual information penalty
    num_epochs = 100
    target_mutual_information = []
    normal_classifier_loss = []
    #mutual_information_threshold = 0.1

    for epoch in range(num_epochs):
        # Training the normal classifier
        normal_optimizer.zero_grad()
        Y_train_shuffled = model_retrain_original(X_train_shuffled)  # Predicted outputs for \hat{Y}
        Y_hat = model_retrain_original(X_0_shuffled)
        normal_loss = normal_criterion(Y_hat, Y_0_shuffled)

        # Compute mutual information I(\hat{Y}; Z)
        mutual_info = compute_mutual_information_direct(Y_train_shuffled, Z_train_shuffled)

        # Total loss with mutual information penalty
        lambda_reg = 0
        total_loss = normal_loss + lambda_reg * mutual_info
        #total_loss = normal_loss
        #total_loss = mutual_info
        total_loss.backward()

        # Check gradient norms
        #for name, param in normal_model.named_parameters():
        #    if param.grad is not None:
        #        print(f"Epoch [{epoch + 1}], Gradient Norm ({name}): {param.grad.norm().item()}")

        # Update the normal classifier
        normal_optimizer.step()

        # Log the metrics
        target_mutual_information.append(mutual_info.item())
        normal_classifier_loss.append(normal_loss.item())

        if (epoch + 1) % 10 == 0:
            print(f"Epoch [{epoch + 1}/{num_epochs}], Mutual Information: {mutual_info:.4f}, "
                f"Normal Classifier Loss: {normal_loss.item():.4f}, Lambda: {lambda_reg:.4f}")
            
    # Evaluate the model's accuracy
    with torch.no_grad():
        # Get the model's predictions (logits)
        logits = model_retrain_original(X)  # X is the dataset
        
        # Convert logits to predicted labels
        predictions = torch.argmax(logits, dim=1)  # Choose the class with the highest score
        
        # Compute the accuracy
        accuracy = (predictions == Y).float().mean().item()  # Compare predictions with ground truth Y

    # Compute probabilities using softmax
    probabilities = torch.softmax(model_retrain_original(x)[0], dim=0)
    # Extract the probability of the first class
    prob_class_0 = probabilities[0]

    train_acc_retrain_origin.append(accuracy)
    unlearn_acc_retrain_origin.append(prob_class_0.detach().numpy())


Epoch [10/100], Mutual Information: 0.2704, Normal Classifier Loss: 0.1760, Lambda: 0.0000
Epoch [20/100], Mutual Information: 0.2581, Normal Classifier Loss: 0.1701, Lambda: 0.0000
Epoch [30/100], Mutual Information: 0.2635, Normal Classifier Loss: 0.1662, Lambda: 0.0000
Epoch [40/100], Mutual Information: 0.2642, Normal Classifier Loss: 0.1623, Lambda: 0.0000
Epoch [50/100], Mutual Information: 0.2656, Normal Classifier Loss: 0.1575, Lambda: 0.0000
Epoch [60/100], Mutual Information: 0.2680, Normal Classifier Loss: 0.1525, Lambda: 0.0000
Epoch [70/100], Mutual Information: 0.2677, Normal Classifier Loss: 0.1496, Lambda: 0.0000
Epoch [80/100], Mutual Information: 0.2724, Normal Classifier Loss: 0.1424, Lambda: 0.0000
Epoch [90/100], Mutual Information: 0.2749, Normal Classifier Loss: 0.1376, Lambda: 0.0000
Epoch [100/100], Mutual Information: 0.2746, Normal Classifier Loss: 0.1368, Lambda: 0.0000
Epoch [10/100], Mutual Information: 0.2704, Normal Classifier Loss: 0.1760, Lambda: 0.000

In [385]:
np.mean(train_acc_retrain_origin), np.std(train_acc_retrain_origin)

(np.float64(0.9452000260353088), np.float64(0.0))

In [386]:
np.mean(unlearn_acc_retrain_origin), np.std(unlearn_acc_retrain_origin)

(np.float32(1.0), np.float32(0.0))