In [None]:
import torch
import numpy as np
import random
from torchvision import transforms
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
import torchvision.datasets as datasets
import torch.nn as nn
from torch.utils.data import Subset

# Set random seeds for reproducibility
torch.manual_seed(42)
random.seed(42)
np.random.seed(42)
trainepochs = 60
fineepochs = 30

# Gaussian noise function
def apply_gaussian_noise(image, noise_level=3.0):
    """Apply Gaussian noise to a single image"""
    image = image.clone()  # Copy image to avoid modifying original data
    noise = torch.randn(image.shape) * noise_level  # Generate Gaussian noise
    noisy_image = torch.clamp(image + noise, 0, 1)  # Clip pixel values
    return noisy_image

# CIFAR-10 data preprocessing
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))  # CIFAR-10 normalization
])

# Load full CIFAR-10 dataset
full_trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

from sklearn.model_selection import train_test_split

# Get all labels
labels = [y for _, y in full_trainset]

# Stratified sampling
_, subset_indices = train_test_split(
    range(len(full_trainset)),
    test_size=1/10,
    stratify=labels,
    random_state=42
)
subset_indices = range(len(full_trainset))
trainset = Subset(full_trainset, subset_indices)

# Check category distribution
from collections import Counter
print("category distribution:", Counter([full_trainset[i][1] for i in subset_indices]))

# Generate indices for attacked samples
num_train_attack = len(trainset) // 2  # 50% of training set to be attacked
attack_train_indices = set(random.sample(range(len(trainset)), num_train_attack))  # Randomly select samples to add noise
train_clean_indices = set(range(len(trainset))) - attack_train_indices  # Other half remains clean

# Custom dataset class
class NoisyCIFAR10Dataset(Dataset):
    def __init__(self, dataset, attacked_indices, noise_level=0.1):
        self.dataset = dataset
        self.attacked_indices = attacked_indices
        self.noise_level = noise_level

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        img, label = self.dataset[idx]
        if idx in self.attacked_indices:
            img = apply_gaussian_noise(img, self.noise_level)  # Add Gaussian noise
        return img, label

# Create attacked training set
trainset_noisy = NoisyCIFAR10Dataset(trainset, attack_train_indices, noise_level=8)  # Use smaller noise level for CIFAR-10

# Create DataLoaders
trainloader = DataLoader(trainset_noisy, batch_size=64, shuffle=False)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Print counts of attacked vs clean samples
print(f"Train Clean Samples: {len(train_clean_indices)}, Attacked Samples: {len(attack_train_indices)}")

# Visualization function
def show_images(dataset, indices, title):
    fig, axes = plt.subplots(1, 5, figsize=(10, 2))
    for ax, idx in zip(axes, list(indices)[:5]):  # Show first 5 samples
        img, label = dataset[idx]
        # Adjust channel order for CIFAR-10 display
        img = img.permute(1, 2, 0)  # Change from (C,H,W) to (H,W,C)
        ax.imshow(img.numpy().clip(0, 1))  # Ensure values between 0-1
        ax.set_title(f"Label: {label}")
        ax.axis('off')
    plt.suptitle(title)
    plt.show()

# Display sample attacked and clean images
show_images(trainset_noisy, attack_train_indices, "Attacked Train Samples")
show_images(trainset_noisy, train_clean_indices, "Clean Train Samples")

# Split training set into attacked and clean subsets
train_attack_subset = Subset(trainset_noisy, list(attack_train_indices))  # Attacked samples
train_clean_subset = Subset(trainset_noisy, list(train_clean_indices))    # Clean samples

# Create corresponding DataLoaders
attackTrainloader = DataLoader(train_attack_subset, batch_size=64, shuffle=False)
cleanTrainloader = DataLoader(train_clean_subset, batch_size=64, shuffle=False)

# Print subset sizes
print(f"Train - Attack: {len(train_attack_subset)}, Clean: {len(train_clean_subset)}")

In [None]:
# import torch
# import torch.nn as nn
# import torch.optim as optim
# import matplotlib.pyplot as plt
# # import time
# import torch.nn.functional as F
#
# # Ensure the plot background is white
# plt.rcParams['figure.facecolor'] = 'white'
#
# # ---------- 1. Define Model Architecture ----------
# class BasicBlock(nn.Module):
#     expansion = 1
#     def __init__(self, in_channels, out_channels, stride=1, downsample=None):
#         super().__init__()
#         self.conv1 = nn.Conv2d(in_channels, out_channels, 3, stride, 1, bias=False)
#         self.bn1   = nn.BatchNorm2d(out_channels)
#         self.relu  = nn.ReLU(inplace=True)
#         self.conv2 = nn.Conv2d(out_channels, out_channels, 3, 1, 1, bias=False)
#         self.bn2   = nn.BatchNorm2d(out_channels)
#         self.downsample = downsample
#
#     def forward(self, x):
#         identity = x
#         out = self.relu(self.bn1(self.conv1(x)))
#         out = self.bn2(self.conv2(out))
#         if self.downsample:
#             identity = self.downsample(x)
#         out = self.relu(out + identity)
#         return out
#
# class ResNet18(nn.Module):
#     def __init__(self, num_classes=10):
#         super().__init__()
#         self.in_channels = 64
#         self.conv1 = nn.Conv2d(3, 64, 3, 1, 1, bias=False)
#         self.bn1   = nn.BatchNorm2d(64)
#         self.layer1 = self._make_layer(64, 2)
#         self.layer2 = self._make_layer(128, 2, stride=2)
#         self.layer3 = self._make_layer(256, 2, stride=2)
#         self.layer4 = self._make_layer(512, 2, stride=2)
#         self.avgpool = nn.AdaptiveAvgPool2d((1,1))
#         self.fc1 = nn.Linear(512 * BasicBlock.expansion, 256)
#         self.fc2 = nn.Linear(256, num_classes)
#
#     def _make_layer(self, out_channels, blocks, stride=1):
#         downsample = None
#         if stride != 1 or self.in_channels != out_channels * BasicBlock.expansion:
#             downsample = nn.Sequential(
#                 nn.Conv2d(self.in_channels, out_channels * BasicBlock.expansion, 1, stride, bias=False),
#                 nn.BatchNorm2d(out_channels * BasicBlock.expansion)
#             )
#         layers = [BasicBlock(self.in_channels, out_channels, stride, downsample)]
#         self.in_channels = out_channels * BasicBlock.expansion
#         for _ in range(1, blocks):
#             layers.append(BasicBlock(self.in_channels, out_channels))
#         return nn.Sequential(*layers)
#
#     def forward(self, x):
#         x = F.relu(self.bn1(self.conv1(x)))
#         x = self.layer1(x); x = self.layer2(x)
#         x = self.layer3(x); x = self.layer4(x)
#         x = self.avgpool(x)
#         x = torch.flatten(x, 1)
#         x = F.relu(self.fc1(x))
#         return self.fc2(x)
#
# # ---------- 2. Instantiate Model, Optimizer, etc. ----------
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# model = ResNet18(num_classes=10).to(device)
#
# criterion = nn.CrossEntropyLoss()
# optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
# scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[60, 120, 160], gamma=0.2)
#
# # ---------- 3. Training and Testing Parameters ----------
# num_epochs = 150
# train_losses = []
# test_accuracies = []
#
# # ---------- 4. Training + Testing Loop ----------
# for epoch in range(num_epochs):
#     model.train()
#     running_loss = 0.0
#     for inputs, targets in trainloader:
#         inputs, targets = inputs.to(device), targets.to(device)
#         optimizer.zero_grad()
#         outputs = model(inputs)
#         loss = criterion(outputs, targets)
#         loss.backward()
#         optimizer.step()
#         running_loss += loss.item()
#     avg_loss = running_loss / len(trainloader)
#     train_losses.append(avg_loss)
#
#     # Test
#     model.eval()
#     correct, total = 0, 0
#     with torch.no_grad():
#         for inputs, targets in testloader:
#             inputs, targets = inputs.to(device), targets.to(device)
#             outputs = model(inputs)
#             _, pred = outputs.max(1)
#             total += targets.size(0)
#             correct += pred.eq(targets).sum().item()
#     acc = 100.0 * correct / total
#     test_accuracies.append(acc)
#
#     scheduler.step()
#     print(f"Epoch {epoch+1:3d}/{num_epochs} → Loss: {avg_loss:.4f}, Test Acc: {acc:.2f}%")
#
# # ---------- 5. Plot and Save ----------
# # Test Accuracy
# plt.figure()
# plt.plot(range(1, num_epochs+1), test_accuracies)
# plt.title('Test Accuracy over Epochs')
# plt.xlabel('Epoch'); plt.ylabel('Accuracy (%)')
# plt.grid(True)
# plt.savefig('accuracy.png', dpi=300, facecolor='white')
# plt.show()
#
# # Training Loss
# plt.figure()
# plt.plot(range(1, num_epochs+1), train_losses)
# plt.title('Training Loss over Epochs')
# plt.xlabel('Epoch'); plt.ylabel('Loss')
# plt.grid(True)
# plt.savefig('loss.png', dpi=300, facecolor='white')
# plt.show()
#
# print("Generated and saved：accuracy.png, loss.png")


In [None]:
# Basic Residual Block
import torch
import torch.nn as nn
import torch.optim as optim
import time

import torch.nn.functional as F

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3,
                               stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out

# ResNet18 Model
class ResNet18(nn.Module):
    def __init__(self, block, layers, num_classes=1000):
        super(ResNet18, self).__init__()
        self.in_channels = 64

        # Initial convolutional layer
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)

        # Residual block layers
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)

        # Classification layers
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, 256)
        self.fc2 = nn.Linear(256,num_classes)

    def _make_layer(self, block, out_channels, blocks, stride=1):
        downsample = None
        if stride != 1 or self.in_channels != out_channels * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, out_channels * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * block.expansion)
            )

        layers = []
        layers.append(block(self.in_channels, out_channels, stride, downsample))
        self.in_channels = out_channels * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.in_channels, out_channels))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(self.bn1(x))

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        # Can pool
        x = self.avgpool(x)
        x = torch.flatten(x, 1)  # Flatten all dimensions except batch
        x = F.relu(self.fc(x))
        x = self.fc2(x)

        return x

def ResNet18_CIFAR10(num_classes=10):
    return ResNet18(BasicBlock, [2, 2, 2, 2], num_classes=num_classes)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = ResNet18_CIFAR10().to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[60, 120, 160], gamma=0.2)

# Example training loop
def train(epoch):
    model.train()
    running_loss, correct, total = 0.0, 0, 0
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        inputs, targets = inputs.cuda(), targets.cuda()
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()


# Test function
def test():
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, targets in testloader:
            inputs, targets = inputs.cuda(), targets.cuda()
            outputs = model(inputs)
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
    accuracy = 100.*correct/total
    print(f'Test Accuracy: {accuracy:.2f}%')
    return  accuracy

# Training process
best_acc, total_train_time = 0, 0

for epoch in range(trainepochs):
    print(f"Epoch {epoch+1}/{trainepochs}")
    start_time = time.time()
    train(epoch)
    scheduler.step()
    best_acc = test()
    epoch_time = time.time() - start_time
    total_train_time += epoch_time

# Final output results
print(f"\nTraining completed with results:")
print(f"Total training time: {total_train_time:.2f} seconds")
print(f"Average time per epoch: {total_train_time/trainepochs:.2f}s")
print(f"Best test accuracy: {best_acc:.2f}%")

In [None]:
def compute_ffn_attribution(model, data_loader):
    """
    Compute attribution for the fc layer (nn.Linear(512, 256)) in ResNet18

    Parameters:
        model: Trained ResNet18 model
        data_loader: Data loader

    Returns:
        attributions: Attribution matrix of shape (num_samples, 256)
    """
    model.eval()
    attributions = []

    for images, _ in data_loader:  # Labels not needed
        images = images.to(device)
        images.requires_grad_()  # Need to compute input gradients

       # Forward pass up to before the fc layer
        x = model.conv1(images)
        x = F.relu(model.bn1(x))
        x = model.layer1(x)
        x = model.layer2(x)
        x = model.layer3(x)
        x = model.layer4(x)
        x = model.avgpool(x)
        x = torch.flatten(x, 1)  # (batch_size, 512)

        # Save input to the fc layer
        fc_input = x.clone()

        # Continue forward pass through fc layer
        fc_output = model.fc(x)  # (batch_size, 256)

        # Continue to final output
        final_output = model.fc2(F.relu(fc_output))  # (batch_size, 100)

        # Compute gradient matrix
        gradient_matrix = []
        for i in range(10):  # CIFAR100 has 100 classes
            model.zero_grad()

            # Compute gradient of ith class output w.r.t fc layer output
            grad_i = torch.autograd.grad(
                outputs=final_output[:, i],
                inputs=fc_output,
                grad_outputs=torch.ones_like(final_output[:, i]),
                retain_graph=True,
                create_graph=False
            )[0]  # (batch_size, 256)

            gradient_matrix.append(grad_i)

        # Stack gradients (batch_size, 100, 256)
        gradient_matrix = torch.stack(gradient_matrix, dim=1)

        # Use max aggregation (batch_size, 256)
        attribution_max = (gradient_matrix.max(dim=1)[0] * fc_output).abs()
        
        attributions.append(attribution_max.detach().cpu())
    
    return torch.cat(attributions, dim=0)  # (num_samples, 256)

In [None]:
# Initialize model

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # Automatically select device
model.to(device)  # Move model to GPU
# Compute attribution
attributions = compute_ffn_attribution(model, trainloader)

# attributions shape is (num_samples, 256), representing each sample's importance on 256-dimensional features
attributions.shape

In [None]:
from sklearn.preprocessing import StandardScaler

# Assume ffn_attributions is a PyTorch tensor on GPU
# 1. Move tensor from GPU to CPU
ffn_attributions_cpu = attributions.cpu()

# 2. Convert PyTorch tensor to NumPy array
ffn_attributions_np = ffn_attributions_cpu.numpy()

# 3. Standardize using StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(ffn_attributions_np)

from sklearn.mixture import GaussianMixture

# GMM clustering
gmm = GaussianMixture(n_components=2, random_state=42)
gmm_labels = gmm.fit_predict(X_scaled)

# Get indices for two clusters
cluster_0_indices = np.where(gmm_labels == 0)[0]
cluster_1_indices = np.where(gmm_labels == 1)[0]

print(f"\nGMM classification results:")
print(f"Cluster 0 sample count: {len(cluster_0_indices)}")
print(f"Cluster 1 sample count: {len(cluster_1_indices)}")

In [None]:
# Calculate the matching between K-Means clustering and manual partitioning
attack_set = set(attack_train_indices)  # True attack sample indices
clean_set = set(train_clean_indices)    # True clean sample indices

cluster_0_set = set(cluster_0_indices)  # Samples in K-Means cluster 0
cluster_1_set = set(cluster_1_indices)  # Samples in K-Means cluster 1

# Calculate intersections
attack_in_cluster_0 = len(attack_set & cluster_0_set)  # Number of true attack samples in Cluster 0
attack_in_cluster_1 = len(attack_set & cluster_1_set)  # Number of true attack samples in Cluster 1

clean_in_cluster_0 = len(clean_set & cluster_0_set)  # Number of true clean samples in Cluster 0
clean_in_cluster_1 = len(clean_set & cluster_1_set)  # Number of true clean samples in Cluster 1

# Calculate classification accuracy of K-Means for attack samples
attack_accuracy_cluster_0 = (attack_in_cluster_0 / len(attack_set)) * 100
attack_accuracy_cluster_1 = (attack_in_cluster_1 / len(attack_set)) * 100

clean_accuracy_cluster_0 = (clean_in_cluster_0 / len(clean_set)) * 100
clean_accuracy_cluster_1 = (clean_in_cluster_1 / len(clean_set)) * 100

# Print comparison results
print("==== K-Means Clustering vs Manual Labels ====")
print(f"Cluster 0: {len(cluster_0_indices)} samples")
print(f"Cluster 1: {len(cluster_1_indices)} samples\n")

print(f"Attack Samples in Cluster 0: {attack_in_cluster_0} ({attack_accuracy_cluster_0:.2f}%)")
print(f"Attack Samples in Cluster 1: {attack_in_cluster_1} ({attack_accuracy_cluster_1:.2f}%)\n")

print(f"Clean Samples in Cluster 0: {clean_in_cluster_0} ({clean_accuracy_cluster_0:.2f}%)")
print(f"Clean Samples in Cluster 1: {clean_in_cluster_1} ({clean_accuracy_cluster_1:.2f}%)")

# Calculate overall classification accuracy of K-Means
total_correct = attack_in_cluster_0 + clean_in_cluster_1  # Assuming cluster_0 mainly contains attack samples, cluster_1 mainly clean samples
overall_accuracy = (total_correct / len(trainset)) * 100  # Previously train_subset, now changed to trainset

print(f"\nOverall Clustering Accuracy: {overall_accuracy:.2f}%")

In [None]:
# Extract samples from cluster 0
cluster_0_attributions = attributions[cluster_0_indices]

# Extract samples from cluster 1
cluster_1_attributions = attributions[cluster_1_indices]
print(cluster_0_attributions.shape)
print(cluster_1_attributions.shape)

In [None]:
import torch
from torch.utils.data import DataLoader, Subset

# Ensure labels length matches dataset size gam_labels
#assert len(labels) == len(trainloader.dataset), "labels and trainloader.dataset size mismatch!"

# Convert indices to Python list
cluster_0_indices = cluster_0_indices.tolist()
cluster_1_indices = cluster_1_indices.tolist()

# Use Subset to split original dataset by indices
dataset_0 = Subset(trainloader.dataset, cluster_0_indices)
dataset_1 = Subset(trainloader.dataset, cluster_1_indices)

# Create new DataLoaders
trainloader_0 = DataLoader(dataset_0, batch_size=64, shuffle=True)
trainloader_1 = DataLoader(dataset_1, batch_size=64, shuffle=True)

# Final verification
print(f"Expected Cluster 0 size: {len(cluster_0_indices)}, Actual: {len(trainloader_0.dataset)}")
print(f"Expected Cluster 1 size: {len(cluster_1_indices)}, Actual: {len(trainloader_1.dataset)}")

# Assign datasets based on clustering accuracy
if overall_accuracy > 0.5:
    fine_tuning_load = trainloader_1
    noisy_dataset = trainloader_0
else:
    fine_tuning_load = trainloader_0
    noisy_dataset = trainloader_1

In [None]:
import torch

def evaluate_model(model, dataloader, device):
    """
    Calculate model accuracy on specified dataset.

    Parameters:
    - model: Trained PyTorch model
    - dataloader: DataLoader for evaluation
    - device: Computation device ('cuda' or 'cpu')

    Returns:
    - accuracy: Accuracy score (float)
    """
    model.eval()  # Set model to evaluation mode
    correct = 0
    total = 0

    with torch.no_grad():  # Disable gradient computation for faster inference
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)  # Move data to device
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)  # Get predictions
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    return accuracy

In [None]:
import torch
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix

def evaluate_model_with_metrics(model, dataloader, device, average='macro'):
    """
    Calculate multiple evaluation metrics on specified dataset: Accuracy, Precision, Recall, F1.

    Parameters:
        model: Trained PyTorch model
        dataloader: DataLoader for evaluation
        device: Computation device ('cuda' or 'cpu')
        average: Averaging method for precision/recall/f1, options: 'macro', 'micro', 'weighted'

    Returns:
        metrics: dict containing:
            - accuracy
            - precision
            - recall
            - f1
            - confusion_matrix (as numpy array)
    """
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, dim=1)
            all_preds.append(preds.cpu())
            all_labels.append(labels.cpu())

    # Concatenate into 1D arrays
    y_true = torch.cat(all_labels).numpy()
    y_pred = torch.cat(all_preds).numpy()

    # Compute metrics
    accuracy = (y_pred == y_true).mean()
    precision = precision_score(y_true, y_pred, average=average, zero_division=0)
    recall = recall_score(y_true, y_pred, average=average, zero_division=0)
    f1 = f1_score(y_true, y_pred, average=average, zero_division=0)
    cm = confusion_matrix(y_true, y_pred)

    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'confusion_matrix': cm
    }

In [None]:
# Evaluate model on cluster_0 and cluster_1 datasets
accuracy_cluster_0 = evaluate_model(model, trainloader_0, device)
accuracy_cluster_1 = evaluate_model(model, trainloader_1, device)
accuracy_cluster = evaluate_model(model,trainloader,device)

print(f"Accuracy on Cluster 0: {accuracy_cluster_0:.2f}%")
print(f"Accuracy on Cluster 1: {accuracy_cluster_1:.2f}%")
print(f"Accuracy on Cluster : {accuracy_cluster:.2f}%")

accuracy_attack_train = evaluate_model(model, attackTrainloader , device)
accuracy_clean_train = evaluate_model(model, cleanTrainloader , device)
# accuracy_attack_test = evaluate_model(model, attackTestloader , device)
accuracy_test = evaluate_model(model, testloader , device)

print(f"Accuracy on attack_train : {accuracy_attack_train:.2f}%")
print(f"Accuracy on clean_train : {accuracy_clean_train:.2f}%")

print(f"Accuracy on test : {accuracy_test:.2f}%")

#print(f"Accuracy on clean_test : {accuracy_clean_test:.2f}%")
accuracy_source = accuracy_test
metric_test = evaluate_model_with_metrics(model,testloader,device,average='macro')
print(f"Accuracy:  {metric_test['accuracy']*100:.2f}%")
print(f"Precision: {metric_test['precision']:.4f}")
print(f"Recall:    {metric_test['recall']:.4f}")
print(f"F1-score:  {metric_test['f1']:.4f}")

In [None]:
import copy

# Create a deep copy of the model
model_copy = copy.deepcopy(model)

# Ensure the new model is on the same device as the original
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_copy.to(device)

# Verify successful copying
print(model_copy)
# Operate on model-copy version2

In [None]:
import numpy as np
import torch

def compute_layer_features(model, data_loader, cluster_labels=None, layer_name='fc_512_256', overall_accuracy=1.0):
    """
    Extract features from specified fully-connected layer in ResNet18

    Parameters:
        model: ResNet18 model instance
        data_loader: DataLoader returning (images, indices) tuples
        cluster_labels: Optional array of cluster labels (0 or 1)
        layer_name: Currently only supports 'fc_512_256' (corresponds to nn.Linear(512, 256))
        overall_accuracy: Current model accuracy used to determine label inversion

    Returns:
        X: Feature matrix (n_samples, 256)
        y: Label array if cluster_labels provided, otherwise None
    """
    model.eval()
    X_list = []
    y_list = []

    if cluster_labels is not None and isinstance(cluster_labels, torch.Tensor):
        cluster_labels = cluster_labels.cpu().numpy()

    for images, indices in data_loader:
        images = images.to(device)

        with torch.no_grad():
            # Forward pass up to fc layer
            x = model.conv1(images)
            x = F.relu(model.bn1(x))
            x = model.layer1(x)
            x = model.layer2(x)
            x = model.layer3(x)
            x = model.layer4(x)
            x = model.avgpool(x)
            x = torch.flatten(x, 1)  # (batch_size, 512)

            # Extract fc layer features
            if layer_name == 'fc_512_256':
                features = model.fc(x)  # (batch_size, 256)
            else:
                raise ValueError(f"Unsupported layer name: {layer_name}. Only 'fc_512_256' is supported.")

        # Store features
        X_list.append(features.cpu().numpy())

        # Process labels if cluster_labels provided
        if cluster_labels is not None:
            batch_labels = cluster_labels[indices.numpy()]
            if overall_accuracy > 0.5:
                inverse_labels = 1 - batch_labels
            else:
                inverse_labels = batch_labels
            y_list.append(inverse_labels)

    X = np.concatenate(X_list, axis=0)
    y = np.concatenate(y_list, axis=0) if cluster_labels is not None else None

    return X, y

In [None]:
X_1,y_1 = compute_layer_features(model_copy, trainloader,gmm_labels)
#X_2,y_2 = compute_layer_features(model_copy, trainloader,gmm_labels,"fc_512_256")

In [None]:
# def solve_linear_regression_torch(X, y):
#     X = torch.tensor(X, dtype=torch.float32)
#     y = torch.tensor(y, dtype=torch.float32).view(-1, 1)
#
#     # Add bias term
#     ones = torch.ones(X.shape[0], 1)
#     X_bias = torch.cat([X, ones], dim=1)  # Shape becomes (60000, 65)
#
#     # Solve using least squares
#     W = torch.linalg.lstsq(X_bias, y).solution
#     return W[:-1], W[-1]  # W[:-1] are weights (n_features,), W[-1] is bias
# #W2_torch, b2_torch = solve_linear_regression_torch(X_2, y_2)
# W1_torch, b1_torch = solve_linear_regression_torch(X_1, y_1)
#
# print("W1 shape:", W1_torch.shape)  # (64,)
# print("b:", b1_torch)
# # To print all weights:
# # for i, w_i in enumerate(W2_torch):
# #     print(f"W[{i}] = {w_i}")

In [None]:
# # Modify fc1 according to fc2 with 40 neurons
# # Flatten W_torch into a one-dimensional tensor
# W_torch = W1_torch.flatten()  # shape is (128,)
#
# # Sort in descending order by absolute value
# W_abs_sorted, indices = torch.sort(torch.abs(W_torch), descending=True)
#
# # Take the indices of the top 10 largest neurons
# top_10_indices = indices[:40]  # Take the first 10 indices 40 / 128
# top_10_values = W_torch[top_10_indices]  ## Get the corresponding W values
#
# # Print the contents of top_10_values
# print("Top 10 values:", top_10_values)
#
# # output
# print("Top 10 neurons with highest absolute weights:")
# for rank, (idx, val) in enumerate(zip(top_10_indices.tolist(), top_10_values.tolist()), start=1):
#     print(f"Rank {rank}: Neuron {idx}, Weight = {float(val):.6f}")  # Ensure val is a float How to prune
# # Get the weights and biases of the fc2 layer
# fc1_weight = model_copy.fc.weight  # Weight matrix, shape (out_features, in_features)
# fc1_bias = model_copy.fc.bias      # Bias vector, shape (out_features,)
#
# # Assume top_10_indices are the indices of the top 10 neurons obtained earlier
# #top_10_indices = torch.tensor([12, 45, 3, 28, 7, 19, 33, 56, 22, 41])  # Example data
# neurons_to_zero = top_10_indices[:40]  # Take the first 7 neurons
# print("Neurons to zero:", neurons_to_zero)
#
# # Gradually set the weights and biases to 0
# for neuron_idx in neurons_to_zero:
#     # Set the corresponding neuron's weights to 0
#     fc1_weight.data[neuron_idx, :] = 0  # Set the weight row of this neuron to 0
#     # Set the corresponding neuron's bias to 0
#     #fc2_bias.data[neuron_idx] = 0
#     print(f"Neuron {neuron_idx} weight and bias set to 0.")

In [None]:
# model_copy.fc.weight.shape

In [None]:
# def magnitude_based_pruning(model, layer_name='fc', pruning_rate=0.2, pruning_type='global'):
#     """
#     Weight magnitude-based pruning method (applied to specified layer)
#
#     Args:
#         model: Model to be pruned
#         layer_name: Target layer name (consistent 'fc' layer as original method)
#         pruning_rate: Pruning ratio (0-1)
#         pruning_type: 'global' (global pruning) or 'local' (local pruning)
#     """
#     # Get target layer
#     layer = getattr(model, layer_name)
#     weight = layer.weight.data
#     print(layer)
#     # print(weight)
#     if pruning_type == 'global':
#          # ----------------------------
#         # Global pruning strategy (entire weight matrix)
#         # ----------------------------
#         # Flatten weights and calculate number of weights to prune
#         flat_weights = weight.view(-1)
#         num_prune = int(pruning_rate * flat_weights.numel())
#
#         # Find indices of k% smallest absolute weights
#         _, indices = torch.topk(flat_weights.abs(), num_prune, largest=False)
#
#         # Zero out selected weights
#         flat_weights[indices] = 0
#         layer.weight.data = flat_weights.view_as(weight)
#
#     elif pruning_type == 'local':
#         # ----------------------------
#         # Local pruning strategy (per-neuron pruning)
#         # ----------------------------
#         for neuron_idx in range(weight.size(0)):
#             # Get current neuron's input weights
#             neuron_weights = weight[neuron_idx]
#
#             # Calculate number of weights to prune
#             num_prune_neuron = int(pruning_rate * neuron_weights.numel())
#
#             # Find smallest absolute weights for current neuron
#             _, indices = torch.topk(neuron_weights.abs(), num_prune_neuron, largest=False)
#
#             # Zero out selected weights
#             neuron_weights[indices] = 0
#
#     else:
#         raise ValueError(f"Unsupported pruning type: {pruning_type}")
#
# # Usage example (applied to same layer as original method)
# # --------------------------------------------------
# # Global pruning: prune 40% of smallest absolute weights in fc layer
# magnitude_based_pruning(model_copy, layer_name='fc', pruning_rate=0.5, pruning_type='global')
#
# # Local pruning: prune 30% of input connections per neuron
# # magnitude_based_pruning(model_copy, layer_name='fc', pruning_rate=0.3, pruning_type='local')

In [None]:
def hessian_based_pruning(model, dataloader, layer_name='fc', pruning_rate=0.2, device='cuda'):
    """Hessian matrix-based approximate importance pruning"""
    model.eval()
    layer = getattr(model, layer_name)
    weight = layer.weight.data

    # Compute second-order derivative approximation
    hessian_diag = torch.zeros_like(weight)
    for inputs, targets in dataloader:
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = model(inputs)
        loss = F.cross_entropy(outputs, targets, reduction='sum')
        grad = torch.autograd.grad(loss, layer.weight, create_graph=True)
        hessian_diag += torch.autograd.grad(grad, layer.weight, grad_outputs=torch.ones_like(grad))[0].abs()

    # Select least important weights
    flat_hessian = hessian_diag.view(-1)
    num_prune = int(pruning_rate * flat_hessian.numel())
    _, indices = torch.topk(flat_hessian, num_prune, largest=False)

    # Apply pruning
    flat_weights = layer.weight.data.view(-1)
    flat_weights[indices] = 0
    layer.weight.data = flat_weights.view_as(weight)

In [None]:
hessian_based_pruning(model_copy,fine_tuning_load)

In [None]:
# Prune testing
# Ensure the data is on the GPU (if available)
# Test on the pruned model_copy
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_copy.to(device)

# Evaluate the model on the cluster_0 and cluster_1 datasets
accuracy_cluster_0 = evaluate_model(model_copy, trainloader_0, device)
accuracy_cluster_1 = evaluate_model(model_copy, trainloader_1, device)
accuracy_cluster = evaluate_model(model_copy,trainloader,device)

print(f"Accuracy on Cluster 0: {accuracy_cluster_0:.2f}%")
print(f"Accuracy on Cluster 1: {accuracy_cluster_1:.2f}%")
print(f"Accuracy on Cluster : {accuracy_cluster:.2f}%")

accuracy_attack_train = evaluate_model(model_copy, attackTrainloader , device)
accuracy_clean_train = evaluate_model(model_copy, cleanTrainloader , device)
# accuracy_attack_test = evaluate_model(model, attackTestloader , device)
accuracy_test = evaluate_model(model_copy, testloader , device)

print(f"Accuracy on attack_train : {accuracy_attack_train:.2f}%")
print(f"Accuracy on clean_train : {accuracy_clean_train:.2f}%")
accuracy_prune = accuracy_test

print(f"Accuracy on test : {accuracy_test:.2f}%")
metric_prune = evaluate_model_with_metrics(model_copy,testloader,device,average='macro')
print(f"Accuracy:  {metric_prune['accuracy']*100:.2f}%")
print(f"Precision: {metric_prune['precision']:.4f}")
print(f"Recall:    {metric_prune['recall']:.4f}")
print(f"F1-score:  {metric_prune['f1']:.4f}")

In [None]:
accuracy_prune

In [None]:
import copy

# Deep copy the model
model_copy_fine_all = copy.deepcopy(model_copy)

# Ensure the new model is on the same device as the original
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_copy_fine_all.to(device)

# Verify if the copy was successful
print(model_copy_fine_all)
# Preparation for fine-tuning

In [None]:
# Assume device is GPU or CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Move model to device
model_copy = model_copy.to(device)

# Freeze all layers except fc2
for name, param in model_copy.named_parameters():
    if not (name.startswith("fc") or name.startswith("fc2")):
        param.requires_grad = False

# Check which layers' parameters are frozen
for name, param in model_copy.named_parameters():
    print(f"{name}: requires_grad = {param.requires_grad}")

# Define optimizer - only optimize parameters that require gradients
optimizer = optim.Adam(
    filter(lambda p: p.requires_grad, model_copy.parameters()),
    lr=0.001,
    weight_decay=1e-5  # Add weight decay to prevent overfitting
)

# Define loss function
criterion = nn.CrossEntropyLoss()

import time

# Training loop (with time statistics)
total_training_time = 0  # Track total training time

for epoch in range(fineepochs):
    epoch_start_time = time.time()  # Record epoch start time
    model_copy.train()  # Set model to training mode
    running_loss = 0.0
    batch_times = []  # Track time per batch

    for inputs, labels in fine_tuning_load:
        batch_start_time = time.time()  # Record batch start time

        # Move input data and labels to device
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model_copy(inputs)

        # Calculate loss
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()

        # Update parameters
        optimizer.step()

        # Record loss
        running_loss += loss.item()

        # Record batch time
        batch_time = time.time() - batch_start_time
        batch_times.append(batch_time)

    # Calculate epoch time
    epoch_time = time.time() - epoch_start_time
    total_training_time += epoch_time

    # Calculate average batch time
    avg_batch_time = sum(batch_times)/len(batch_times) if batch_times else 0

    # Print training info (with time statistics)
    print(f"Epoch [{epoch + 1}/{fineepochs}], "
          f"Loss: {running_loss / len(trainloader):.4f}, "
          f"Epoch Time: {epoch_time:.2f}s, "
          f"Avg Batch Time: {avg_batch_time*1000:.1f}ms")

# Final time statistics
print(f"\nTraining completed!")
print(f"Total training time: {total_training_time:.2f} seconds")
print(f"Average epoch time: {total_training_time/fineepochs:.2f}s")
time_fc1_fc2 = total_training_time/fineepochs

# Validate model
model_copy.eval()  # Set model to evaluation mode
correct = 0
total = 0

with torch.no_grad():  # Disable gradient calculation
    for inputs, labels in testloader:
        # Move input data and labels to device
        inputs, labels = inputs.to(device), labels.to(device)

        # Forward pass
        outputs = model_copy(inputs)

        # Get predictions
        _, predicted = torch.max(outputs.data, 1)

        # Count correct predictions
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

# Print accuracy
print(f"Validation Accuracy: {100 * correct / total:.2f}%")

In [None]:
# Assume device is GPU or CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Move model to device
model_copy_fine_all = model_copy_fine_all.to(device)

# Define optimizer
optimizer = optim.Adam(model_copy_fine_all.parameters(), lr=0.001)

# Define loss function
criterion = nn.CrossEntropyLoss()

import time

# Training loop (with time tracking)
total_training_time = 0  # Track total training time

for epoch in range(fineepochs):
    epoch_start_time = time.time()  # Record epoch start time
    model_copy_fine_all.train()  # Set model to training mode
    running_loss = 0.0
    batch_times = []  # Track time per batch
    
    for inputs, labels in fine_tuning_load:
        batch_start_time = time.time()  # Record batch start time

        # Move inputs and labels to device
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model_copy_fine_all(inputs)

        # Compute loss
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()

        # Update parameters
        optimizer.step()

        # Record loss
        running_loss += loss.item()

        # Track batch time
        batch_time = time.time() - batch_start_time
        batch_times.append(batch_time)

    # Calculate epoch time
    epoch_time = time.time() - epoch_start_time
    total_training_time += epoch_time

    # Calculate average batch time
    avg_batch_time = sum(batch_times)/len(batch_times) if batch_times else 0

    # Print training info (with timing)
    print(f"Epoch [{epoch + 1}/{fineepochs}], "
          f"Loss: {running_loss / len(trainloader):.4f}, "
          f"Epoch Time: {epoch_time:.2f}s, "
          f"Avg Batch Time: {avg_batch_time*1000:.1f}ms")

# Final timing statistics
print(f"\nTraining completed!")
print(f"Total training time: {total_training_time:.2f} seconds")
print(f"Average epoch time: {total_training_time/fineepochs:.2f}s")
time_all = total_training_time/fineepochs

# Validate model
model_copy_fine_all.eval()  # Set model to evaluation mode
correct = 0
total = 0

with torch.no_grad():  # Disable gradient calculation
    for inputs, labels in testloader:
        # Move inputs and labels to device
        inputs, labels = inputs.to(device), labels.to(device)

        # Forward pass
        outputs = model_copy_fine_all(inputs)

        # Get predictions
        _, predicted = torch.max(outputs.data, 1)

        # Count correct predictions
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

# Print accuracy
print(f"Validation Accuracy: {100 * correct / total:.2f}%")

In [None]:
# Ensure data is on GPU (if available) - Testing on fine-tuned model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_copy.to(device)

# Evaluate model on cluster_0 and cluster_1 datasets
accuracy_cluster_0 = evaluate_model(model_copy, trainloader_0, device)
accuracy_cluster_1 = evaluate_model(model_copy, trainloader_1, device)
accuracy_cluster = evaluate_model(model_copy,trainloader,device)

print(f"Accuracy on Cluster 0: {accuracy_cluster_0:.2f}%")
print(f"Accuracy on Cluster 1: {accuracy_cluster_1:.2f}%")
print(f"Accuracy on Cluster : {accuracy_cluster:.2f}%")

accuracy_attack_train = evaluate_model(model_copy, attackTrainloader , device)
accuracy_clean_train = evaluate_model(model_copy, cleanTrainloader , device)
# accuracy_attack_test = evaluate_model(model, attackTestloader , device)
accuracy_test = evaluate_model(model_copy, testloader , device)
accuracy_test_all = evaluate_model(model_copy_fine_all, testloader , device)
print(f"Accuracy on attack_train : {accuracy_attack_train:.2f}%")
print(f"Accuracy on clean_train : {accuracy_clean_train:.2f}%")
#print(f"Accuracy on test souse : {accuracy_test:.2f}%")

print(f"Accuracy on test : {accuracy_source:.2f}%")
print(f"Accuracy on test_prune : {accuracy_prune:.2f}%")
print(f"Average epoch time on special: {time_fc1_fc2:.2f}s")
print(f"Accuracy on test_fin-tuning special : {accuracy_test:.2f}%")
print(f"Average epoch time on all: {time_all:.2f}s")
print(f"Accuracy on test_fin-tuning all : {accuracy_test_all:.2f}%")
metric_fin_single = evaluate_model_with_metrics(model_copy, testloader , device)
metric_fin_all = evaluate_model_with_metrics(model_copy_fine_all, testloader , device)
print("single fine -tuning ")
print(f"Accuracy:  {metric_fin_single['accuracy']*100:.2f}%")
print(f"Precision: {metric_fin_single['precision']:.4f}")
print(f"Recall:    {metric_fin_single['recall']:.4f}")
print(f"F1-score:  {metric_fin_single['f1']:.4f}")

print("All fine -tuning ")
print(f"Accuracy:  {metric_fin_all['accuracy']*100:.2f}%")
print(f"Precision: {metric_fin_all['precision']:.4f}")
print(f"Recall:    {metric_fin_all['recall']:.4f}")
print(f"F1-score:  {metric_fin_all['f1']:.4f}")

In [None]:
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import MultiStepLR

def ResNet18_CIFAR10(num_classes=10):
    return ResNet18(BasicBlock, [2, 2, 2, 2], num_classes=num_classes)

# Initialize model and optimizer
model_fresh = ResNet18_CIFAR10(num_classes=10).to(device)
optimizer_fresh = optim.SGD(model_fresh.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
scheduler_fresh = MultiStepLR(optimizer_fresh, milestones=[60, 120, 160], gamma=0.2)
criterion_fresh = nn.CrossEntropyLoss()

# Record training loss, test accuracy and time
train_losses_fresh = []
test_accuracies_fresh = []
epoch_times = []  # Record total time per epoch
train_times = []  # Record training time per epoch
test_times = []   # Record testing time per epoch

for epoch in range(fineepochs):
    start_time = time.time()  # Record epoch start time

    # Training
    model_fresh.train()
    running_loss = 0.0
    train_start = time.time()
    for inputs, targets in trainloader:
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer_fresh.zero_grad()
        outputs = model_fresh(inputs)
        loss = criterion_fresh(outputs, targets)
        loss.backward()
        optimizer_fresh.step()
        running_loss += loss.item()
    avg_loss = running_loss / len(trainloader)
    train_losses_fresh.append(avg_loss)
    train_end = time.time()
    train_time = train_end - train_start
    train_times.append(train_time)

    # Testing
    model_fresh.eval()
    correct, total = 0, 0
    test_start = time.time()
    with torch.no_grad():
        for inputs, targets in testloader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model_fresh(inputs)
            _, pred = outputs.max(1)
            total += targets.size(0)
            correct += pred.eq(targets).sum().item()
    acc = 100.0 * correct / total
    test_accuracies_fresh.append(acc)
    test_end = time.time()
    test_time = test_end - test_start
    test_times.append(test_time)

    # Calculate total time
    epoch_time = time.time() - start_time
    epoch_times.append(epoch_time)

    # Update learning rate
    scheduler_fresh.step()

    # Print results
    print(f"[Fresh] Epoch {epoch+1:3d}/{fineepochs} → "
          f"Loss: {avg_loss:.4f}, Test Acc: {acc:.2f}%, "
          f"Train Time: {train_time:.2f}s, Test Time: {test_time:.2f}s, "
          f"Epoch Time: {epoch_time:.2f}s")

# Calculate average times
avg_train_time = sum(train_times) / len(train_times)
avg_test_time = sum(test_times) / len(test_times)
avg_epoch_time = sum(epoch_times) / len(epoch_times)

print(f"\nAverage Time per Epoch → "
      f"Train: {avg_train_time:.2f}s, Test: {avg_test_time:.2f}s, "
      f"Total: {avg_epoch_time:.2f}s")

# Evaluate final model
accuracy_model_fresh = evaluate_model(model_fresh, testloader, device)
print(f"Accuracy on fresh model: {accuracy_model_fresh:.2f}%")
metric_fresh = evaluate_model_with_metrics(model_fresh, testloader, device)
print("Fresh model ")
print(f"Accuracy:  {metric_fresh['accuracy']*100:.2f}%")
print(f"Precision: {metric_fresh['precision']:.4f}")
print(f"Recall:    {metric_fresh['recall']:.4f}")
print(f"F1-score:  {metric_fresh['f1']:.4f}")