# Set up for dataset and model

Package installation, loading, and dataloaders. There's also a resnet18 model defined.

In [1]:
# !pip install tensorboardX

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import time
import matplotlib.pyplot as plt
from tqdm import tqdm

from torchvision import datasets, transforms
# from tensorboardX import SummaryWriter

use_cuda = True
device = torch.device("cuda" if use_cuda else "cpu")
batch_size = 64

np.random.seed(42)
torch.manual_seed(42)


## Dataloaders
train_dataset = datasets.CIFAR10('cifar10_data/', train=True, download=True, transform=transforms.Compose(
    [transforms.ToTensor()]
))
test_dataset = datasets.CIFAR10('cifar10_data/', train=False, download=True, transform=transforms.Compose(
    [transforms.ToTensor()]
))

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)



Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to cifar10_data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:01<00:00, 98347875.29it/s] 


Extracting cifar10_data/cifar-10-python.tar.gz to cifar10_data/
Files already downloaded and verified


In [2]:

def tp_relu(x, delta=1.):
    ind1 = (x < -1. * delta).float()
    ind2 = (x > delta).float()
    return .5 * (x + delta) * (1 - ind1) * (1 - ind2) + x * ind2

def tp_smoothed_relu(x, delta=1.):
    ind1 = (x < -1. * delta).float()
    ind2 = (x > delta).float()
    return (x + delta) ** 2 / (4 * delta) * (1 - ind1) * (1 - ind2) + x * ind2

class Normalize(nn.Module):
    def __init__(self, mu, std):
        super(Normalize, self).__init__()
        self.mu, self.std = mu, std

    def forward(self, x):
        return (x - self.mu) / self.std

class IdentityLayer(nn.Module):
    def forward(self, inputs):
        return inputs
    
class PreActBlock(nn.Module):
    '''Pre-activation version of the BasicBlock.'''
    expansion = 1

    def __init__(self, in_planes, planes, bn, learnable_bn, stride=1, activation='relu'):
        super(PreActBlock, self).__init__()
        self.collect_preact = True
        self.activation = activation
        self.avg_preacts = []
        self.bn1 = nn.BatchNorm2d(in_planes, affine=learnable_bn) if bn else IdentityLayer()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=not learnable_bn)
        self.bn2 = nn.BatchNorm2d(planes, affine=learnable_bn) if bn else IdentityLayer()
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=not learnable_bn)

        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=not learnable_bn)
            )

    def act_function(self, preact):
        if self.activation == 'relu':
            act = F.relu(preact)
        elif self.activation[:6] == '3prelu':
            act = tp_relu(preact, delta=float(self.activation.split('relu')[1]))
        elif self.activation[:8] == '3psmooth':
            act = tp_smoothed_relu(preact, delta=float(self.activation.split('smooth')[1]))
        else:
            assert self.activation[:8] == 'softplus'
            beta = int(self.activation.split('softplus')[1])
            act = F.softplus(preact, beta=beta)
        return act

    def forward(self, x):
        out = self.act_function(self.bn1(x))
        shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x  # Important: using out instead of x
        out = self.conv1(out)
        out = self.conv2(self.act_function(self.bn2(out)))
        out += shortcut
        return out

class PreActResNet(nn.Module):
    def __init__(self, block, num_blocks, n_cls, cuda=True, half_prec=False,
        activation='relu', fts_before_bn=False, normal='none'):
        super(PreActResNet, self).__init__()
        self.bn = True
        self.learnable_bn = True  # doesn't matter if self.bn=False
        self.in_planes = 64
        self.avg_preact = None
        self.activation = activation
        self.fts_before_bn = fts_before_bn
        if normal == 'cifar10':
            self.mu = torch.tensor((0.4914, 0.4822, 0.4465)).view(1, 3, 1, 1)
            self.std = torch.tensor((0.2471, 0.2435, 0.2616)).view(1, 3, 1, 1)
        else:
            self.mu = torch.tensor((0.0, 0.0, 0.0)).view(1, 3, 1, 1)
            self.std = torch.tensor((1.0, 1.0, 1.0)).view(1, 3, 1, 1)
            print('no input normalization')
        if cuda:
            self.mu = self.mu.cuda()
            self.std = self.std.cuda()
        if half_prec:
            self.mu = self.mu.half()
            self.std = self.std.half()

        self.normalize = Normalize(self.mu, self.std)
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=not self.learnable_bn)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.bn = nn.BatchNorm2d(512 * block.expansion)
        self.linear = nn.Linear(512*block.expansion, n_cls)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, self.bn, self.learnable_bn, stride, self.activation))
            # layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x, return_features=False):
        for layer in [*self.layer1, *self.layer2, *self.layer3, *self.layer4]:
            layer.avg_preacts = []

        out = self.normalize(x)
        out = self.conv1(out)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        if return_features and self.fts_before_bn:
            return out.view(out.size(0), -1)
        out = F.relu(self.bn(out))
        if return_features:
            return out.view(out.size(0), -1)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)

        return out


def PreActResNet18(n_cls, cuda=True, half_prec=False, activation='relu', fts_before_bn=False,
    normal='none'):
    #print('initializing PA RN-18 with act {}, normal {}'.format())
    return PreActResNet(PreActBlock, [2, 2, 2, 2], n_cls=n_cls, cuda=cuda, half_prec=half_prec,
        activation=activation, fts_before_bn=fts_before_bn, normal=normal)


# intialize the model
model = PreActResNet18(10, cuda=True, activation='softplus1').to(device)
model.eval()

no input normalization


PreActResNet(
  (normalize): Normalize()
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (layer1): Sequential(
    (0): PreActBlock(
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (1): PreActBlock(
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
  )
  (layer2): Sequential(
    (0): PreActBloc

# Implement the Attacks

Functions are given a simple useful signature that you can start with. Feel free to extend the signature as you see fit.

You may find it useful to create a 'batched' version of PGD that you can use to create the adversarial attack.

In [3]:
def pgd_linf_untargeted(model, x, labels, k, eps, eps_step):
    model.eval()
    ce_loss = torch.nn.CrossEntropyLoss()
    adv_x = x.clone().detach()
    adv_x.requires_grad_(True) 
    for _ in range(k):
        adv_x.requires_grad_(True)
        model.zero_grad()
        output = model(adv_x)
        # TODO: Calculate the loss
        loss = ce_loss(output, labels)
        loss.backward()
        # TODO: compute the adv_x
        # find delta, clamp with eps
        x_perturbation = eps_step * adv_x.grad.sign()
        adv_x = adv_x.detach() + x_perturbation
        delta = adv_x - x
        delta = torch.clamp(delta, min=-eps, max=eps)
        adv_x = torch.clamp(x + delta, min=0, max=1).detach()

    return adv_x

In [4]:
def pgd_l2_untargeted(model, x, labels, k, eps, eps_step):
    model.eval()
    ce_loss = torch.nn.CrossEntropyLoss()
    adv_x = x.clone().detach()
    adv_x.requires_grad_(True) 
    for _ in range(k):
        adv_x.requires_grad_(True)
        model.zero_grad()
        output = model(adv_x)
        batch_size = x.size()[0]
        # TODO: Calculate the loss
        loss = ce_loss(output, labels)
        loss.backward()
        grad = adv_x.grad.sign()
        # TODO: compute the adv_x
        # find delta, clamp with eps, project delta to the l2 ball
        # HINT: https://github.com/Harry24k/adversarial-attacks-pytorch/blob/master/torchattacks/attacks/pgdl2.py 
        grad_norms = torch.norm(grad.view(batch_size, -1), p=2, dim=1)
        grad = grad / grad_norms.view(batch_size, 1, 1, 1)

        # Take a step in the direction of the gradient
        adv_x = adv_x.detach() + eps_step * grad

        # Project back into L2 epsilon-ball
        delta = adv_x - x
        delta_norms = torch.norm(delta.view(batch_size, -1), p=2, dim=1)
        # Scaling factor to ensure ||delta||_2 <= eps
        factor = eps / delta_norms
        factor = torch.min(factor, torch.ones_like(delta_norms))
        delta = delta * factor.view(-1, 1, 1, 1)

        # Apply perturbation and clip to valid range [0, 1]
        adv_x = torch.clamp(x + delta, 0, 1).detach()
    return adv_x

# Evaluate Single and Multi-Norm Robust Accuracy

In this section, we evaluate the model on the Linf and L2 attacks as well as union accuracy.

In [11]:
def test_model_on_single_attack(model, attack='pgd_linf', eps=0.1):
    model.eval()
    tot_test, tot_acc = 0.0, 0.0
    ground_acc = 0
    for batch_idx, (x_batch, y_batch) in tqdm(enumerate(test_loader), total=len(test_loader), desc="Evaluating"):
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        if attack == 'pgd_linf':
            # TODO: get x_adv untargeted pgd linf with eps, and eps_step=eps/4
            adv_x = pgd_linf_untargeted(model, x_batch, y_batch, 1, eps, eps/4)
            
        elif attack == 'pgd_l2':
            # TODO: get x_adv untargeted pgd l2 with eps, and eps_step=eps/4
            adv_x = pgd_l2_untargeted(model, x_batch, y_batch, 1, eps, eps/4)
        else:
            pass
        
        out = model(adv_x)
        pred = torch.max(out, dim=1)[1]
        # get the testing accuracy and update tot_test and tot_acc
        tot_acc += (pred == y_batch).sum().item()
        tot_test += y_batch.size(0)

        ground_out = model(x_batch)
        ground_pred = torch.max(ground_out, dim=1)[1]
        ground_acc += (ground_pred == y_batch).sum().item()
        
            
    print('Robust accuracy %.5lf' % (tot_acc/tot_test), f'on {attack} attack with eps = {eps}')
    print('Standard accuracy %.5lf' % (ground_acc/tot_test), f'on {attack} attack with eps = {eps}')



## Single-Norm Robust Accuracy

In [12]:
# # Evaluate on Linf attack with different models with eps = 8/255
# model.load_state_dict(torch.load('models/pretr_Linf.pth'))
# # Evaluate on Linf attack with model 1 with eps = 8/255
# test_model_on_single_attack(model, 'pgd_linf', eps=8/255)

model.load_state_dict(torch.load('models/pretr_RAMP.pth'))
# Evaluate on Linf attack with model 3 with eps = 8/255
test_model_on_single_attack(model, 'pgd_linf', eps=8/255)

model.load_state_dict(torch.load("adversarial_model_eps4.pth"))
test_model_on_single_attack(model, 'pgd_linf', eps=8/255)
model.load_state_dict(torch.load("adversarial_model_eps8.pth"))
test_model_on_single_attack(model, 'pgd_linf', eps=8/255)
model.load_state_dict(torch.load("adversarial_model_eps16.pth"))
test_model_on_single_attack(model, 'pgd_linf', eps=8/255)


# model.load_state_dict(torch.load('models/pretr_L2.pth'))
# # Evaluate on Linf attack with model 2 with eps = 8/255
# test_model_on_single_attack(model, 'pgd_linf', eps=8/255)

# model.load_state_dict(torch.load('models/pretr_RAMP.pth'))
# # Evaluate on Linf attack with model 3 with eps = 8/255
# test_model_on_single_attack(model, 'pgd_linf', eps=8/255)

  model.load_state_dict(torch.load('models/pretr_RAMP.pth'))
Evaluating: 100%|██████████| 157/157 [00:04<00:00, 37.46it/s]
  model.load_state_dict(torch.load("adversarial_model_eps4.pth"))


Robust accuracy 0.24600 on pgd_linf attack with eps = 0.03137254901960784
Standard accuracy 0.37920 on pgd_linf attack with eps = 0.03137254901960784


Evaluating: 100%|██████████| 157/157 [00:04<00:00, 38.33it/s]
  model.load_state_dict(torch.load("adversarial_model_eps8.pth"))


Robust accuracy 0.35800 on pgd_linf attack with eps = 0.03137254901960784
Standard accuracy 0.41860 on pgd_linf attack with eps = 0.03137254901960784


Evaluating: 100%|██████████| 157/157 [00:04<00:00, 38.02it/s]
  model.load_state_dict(torch.load("adversarial_model_eps16.pth"))


Robust accuracy 0.31970 on pgd_linf attack with eps = 0.03137254901960784
Standard accuracy 0.35790 on pgd_linf attack with eps = 0.03137254901960784


Evaluating: 100%|██████████| 157/157 [00:04<00:00, 37.85it/s]

Robust accuracy 0.27080 on pgd_linf attack with eps = 0.03137254901960784
Standard accuracy 0.28870 on pgd_linf attack with eps = 0.03137254901960784





In [13]:
# Evaluate on L2 attack with different models with eps = 0.75
# model.load_state_dict(torch.load('models/pretr_Linf.pth'))
# # Evaluate on Linf attack with model 1 with eps = 0.75
# test_model_on_single_attack(model, 'pgd_l2', eps=0.75)

# model.load_state_dict(torch.load('models/pretr_L2.pth'))
# # Evaluate on Linf attack with model 2 with eps = 0.75
# test_model_on_single_attack(model, 'pgd_l2', eps=0.75)

model.load_state_dict(torch.load('models/pretr_RAMP.pth'))
# Evaluate on Linf attack with model 3 with eps = 0.75
test_model_on_single_attack(model, 'pgd_l2', eps=0.75)


model.load_state_dict(torch.load("adversarial_model_eps4.pth"))
test_model_on_single_attack(model, 'pgd_l2', eps=0.75)
model.load_state_dict(torch.load("adversarial_model_eps8.pth"))
test_model_on_single_attack(model, 'pgd_l2', eps=0.75)
model.load_state_dict(torch.load("adversarial_model_eps16.pth"))
test_model_on_single_attack(model, 'pgd_l2', eps=0.75)

# model.load_state_dict(torch.load('models/pretr_RAMP.pth'))
# # Evaluate on Linf attack with model 3 with eps = 0.75
# test_model_on_single_attack(model, 'pgd_l2', eps=0.75)

  model.load_state_dict(torch.load('models/pretr_RAMP.pth'))
Evaluating: 100%|██████████| 157/157 [00:04<00:00, 36.80it/s]
  model.load_state_dict(torch.load("adversarial_model_eps4.pth"))


Robust accuracy 0.31560 on pgd_l2 attack with eps = 0.75
Standard accuracy 0.37920 on pgd_l2 attack with eps = 0.75


Evaluating: 100%|██████████| 157/157 [00:04<00:00, 37.73it/s]
  model.load_state_dict(torch.load("adversarial_model_eps8.pth"))


Robust accuracy 0.39050 on pgd_l2 attack with eps = 0.75
Standard accuracy 0.41860 on pgd_l2 attack with eps = 0.75


Evaluating: 100%|██████████| 157/157 [00:04<00:00, 37.75it/s]
  model.load_state_dict(torch.load("adversarial_model_eps16.pth"))


Robust accuracy 0.34190 on pgd_l2 attack with eps = 0.75
Standard accuracy 0.35790 on pgd_l2 attack with eps = 0.75


Evaluating: 100%|██████████| 157/157 [00:04<00:00, 37.45it/s]

Robust accuracy 0.28110 on pgd_l2 attack with eps = 0.75
Standard accuracy 0.28870 on pgd_l2 attack with eps = 0.75





In [None]:
# Adversarial training function
def adversarial_train(model, train_loader, optimizer, epoch, eps, eps_step, k=10):
    """
    Perform one epoch of adversarial training
    
    Args:
        model: neural network model
        train_loader: training data loader
        optimizer: optimizer
        epoch: current epoch number
        eps: perturbation budget for PGD
        eps_step: step size for PGD
        k: number of PGD steps
    """
    model.train()
    ce_loss = torch.nn.CrossEntropyLoss()
    train_loss = 0
    correct = 0
    total = 0
    
    pbar = tqdm(train_loader, desc=f'Epoch {epoch}')
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        
        # Generate adversarial examples
        adv_data = pgd_linf_untargeted(model, data, target, k, eps, eps_step)
        
        # Train on adversarial examples
        model.train()
        optimizer.zero_grad()
        output = model(adv_data)
        loss = ce_loss(output, target)
        loss.backward()
        optimizer.step()
        
        # Track statistics
        train_loss += loss.item()
        pred = output.argmax(dim=1)
        correct += pred.eq(target).sum().item()
        total += target.size(0)
        
        pbar.set_postfix({
            'loss': train_loss / (batch_idx + 1),
            'acc': 100. * correct / total
        })
    
    return train_loss / len(train_loader), 100. * correct / total


# Evaluation function
def evaluate(model, test_loader, eps=0.0, attack_type='clean', k=10):
    """
    Evaluate model on clean or adversarial examples
    
    Args:
        model: neural network model
        test_loader: test data loader
        eps: perturbation budget (0 for clean evaluation)
        attack_type: 'clean' or 'pgd_linf'
        k: number of PGD steps for attack
    """
    model.eval()
    correct = 0
    total = 0
    
    with torch.no_grad() if attack_type == 'clean' else torch.enable_grad():
        for data, target in tqdm(test_loader, desc=f'Evaluating {attack_type}'):
            data, target = data.to(device), target.to(device)
            
            if attack_type == 'pgd_linf' and eps > 0:
                data = pgd_linf_untargeted(model, data, target, k, eps, eps/4)
            
            output = model(data)
            pred = output.argmax(dim=1)
            correct += pred.eq(target).sum().item()
            total += target.size(0)
    
    accuracy = 100. * correct / total
    return accuracy


# Main training and evaluation
def train_and_evaluate(eps_train, num_epochs=50):
    """
    Train model with adversarial training and evaluate
    
    Args:
        eps_train: epsilon value for adversarial training
        num_epochs: number of training epochs
    """
    print(f"\n{'='*60}")
    print(f"Training with epsilon = {eps_train} ({eps_train*255:.2f}/255)")
    print(f"{'='*60}\n")
    
    # Initialize model
    model = PreActResNet18(10, cuda=True, activation='softplus1', normal='cifar10').to(device)
    optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[40, 45], gamma=0.1)
    
    # Training
    for epoch in range(1, num_epochs + 1):
        train_loss, train_acc = adversarial_train(
            model, train_loader, optimizer, epoch, 
            eps=eps_train, eps_step=eps_train/4, k=10
        )
        scheduler.step()
        
        if epoch % 10 == 0:
            print(f"\nEpoch {epoch}: Train Loss = {train_loss:.4f}, Train Acc = {train_acc:.2f}%")
    
    # Evaluation
    print(f"\n{'='*60}")
    print(f"Evaluation Results (trained with eps = {eps_train})")
    print(f"{'='*60}")
    
    # Standard accuracy (clean examples)
    clean_acc = evaluate(model, test_loader, eps=0.0, attack_type='clean')
    print(f"Standard Accuracy (clean): {clean_acc:.2f}%")
    
    # Robust accuracy at different epsilon values
    test_epsilons = [4/255, 8/255, 16/255]
    for eps_test in test_epsilons:
        robust_acc = evaluate(model, test_loader, eps=eps_test, attack_type='pgd_linf', k=20)
        print(f"Robust Accuracy (PGD eps={eps_test*255:.0f}/255): {robust_acc:.2f}%")
    
    # Save model
    torch.save(model.state_dict(), f'adversarial_model_eps{int(eps_train*255)}.pth')
    print(f"\nModel saved as adversarial_model_eps{int(eps_train*255)}.pth")
    
    return model, clean_acc


# Run experiments with different epsilon values

# Training epsilon values to test
epsilon_values = [4/255, 8/255, 16/255]

results = {}
for eps in epsilon_values:
    model, clean_acc = train_and_evaluate(eps, num_epochs=10)
    results[eps] = clean_acc

# Summary
print(f"\n{'='*60}")
print("SUMMARY OF RESULTS")
print(f"{'='*60}")
for eps, acc in results.items():
    print(f"Training eps = {eps*255:.0f}/255: Standard Accuracy = {acc:.2f}%")


Training with epsilon = 0.01568627450980392 (4.00/255)



Epoch 1: 100%|██████████| 782/782 [01:43<00:00,  7.53it/s, loss=1.93, acc=28.4]
Epoch 2: 100%|██████████| 782/782 [01:42<00:00,  7.63it/s, loss=1.74, acc=34.6]
Epoch 3: 100%|██████████| 782/782 [01:42<00:00,  7.64it/s, loss=1.66, acc=38]  
Epoch 4: 100%|██████████| 782/782 [01:42<00:00,  7.63it/s, loss=1.6, acc=40.6] 
Epoch 5: 100%|██████████| 782/782 [01:41<00:00,  7.67it/s, loss=1.55, acc=42.3]
Epoch 6: 100%|██████████| 782/782 [01:42<00:00,  7.64it/s, loss=1.51, acc=43.8]
Epoch 7: 100%|██████████| 782/782 [01:42<00:00,  7.65it/s, loss=1.48, acc=45.3]
Epoch 8: 100%|██████████| 782/782 [01:41<00:00,  7.68it/s, loss=1.45, acc=46.2]
Epoch 9: 100%|██████████| 782/782 [01:45<00:00,  7.41it/s, loss=1.43, acc=47.3]
Epoch 10: 100%|██████████| 782/782 [01:49<00:00,  7.11it/s, loss=1.4, acc=48.6] 



Epoch 10: Train Loss = 1.3960, Train Acc = 48.64%

Evaluation Results (trained with eps = 0.01568627450980392)


Evaluating clean: 100%|██████████| 157/157 [00:01<00:00, 83.91it/s]


Standard Accuracy (clean): 41.86%


Evaluating pgd_linf: 100%|██████████| 157/157 [00:39<00:00,  3.99it/s]


Robust Accuracy (PGD eps=4/255): 30.02%


Evaluating pgd_linf: 100%|██████████| 157/157 [00:38<00:00,  4.06it/s]


Robust Accuracy (PGD eps=8/255): 19.05%


Evaluating pgd_linf: 100%|██████████| 157/157 [00:38<00:00,  4.07it/s]


Robust Accuracy (PGD eps=16/255): 3.80%

Model saved as adversarial_model_eps4.pth

Training with epsilon = 0.03137254901960784 (8.00/255)



Epoch 1: 100%|██████████| 782/782 [01:51<00:00,  7.02it/s, loss=2.1, acc=22]   
Epoch 2: 100%|██████████| 782/782 [01:44<00:00,  7.46it/s, loss=1.96, acc=26.8]
Epoch 3: 100%|██████████| 782/782 [01:35<00:00,  8.16it/s, loss=1.91, acc=28.7]
Epoch 4: 100%|██████████| 782/782 [01:35<00:00,  8.17it/s, loss=1.88, acc=30.1]
Epoch 5: 100%|██████████| 782/782 [01:35<00:00,  8.19it/s, loss=1.87, acc=30.6]
Epoch 6: 100%|██████████| 782/782 [01:35<00:00,  8.20it/s, loss=1.84, acc=31.4]
Epoch 7: 100%|██████████| 782/782 [01:35<00:00,  8.21it/s, loss=1.82, acc=32.1]
Epoch 8: 100%|██████████| 782/782 [01:41<00:00,  7.71it/s, loss=1.8, acc=33]   
Epoch 9: 100%|██████████| 782/782 [01:42<00:00,  7.62it/s, loss=1.78, acc=33.6]
Epoch 10: 100%|██████████| 782/782 [01:42<00:00,  7.64it/s, loss=1.77, acc=34.1]



Epoch 10: Train Loss = 1.7728, Train Acc = 34.14%

Evaluation Results (trained with eps = 0.03137254901960784)


Evaluating clean: 100%|██████████| 157/157 [00:01<00:00, 89.62it/s]


Standard Accuracy (clean): 35.79%


Evaluating pgd_linf: 100%|██████████| 157/157 [00:36<00:00,  4.28it/s]


Robust Accuracy (PGD eps=4/255): 27.98%


Evaluating pgd_linf: 100%|██████████| 157/157 [00:35<00:00,  4.43it/s]


Robust Accuracy (PGD eps=8/255): 20.58%


Evaluating pgd_linf: 100%|██████████| 157/157 [00:35<00:00,  4.41it/s]


Robust Accuracy (PGD eps=16/255): 8.78%

Model saved as adversarial_model_eps8.pth

Training with epsilon = 0.06274509803921569 (16.00/255)



Epoch 1: 100%|██████████| 782/782 [01:42<00:00,  7.66it/s, loss=2.25, acc=16.8]
Epoch 2: 100%|██████████| 782/782 [01:42<00:00,  7.65it/s, loss=2.15, acc=20.6]
Epoch 3: 100%|██████████| 782/782 [01:42<00:00,  7.64it/s, loss=2.14, acc=20.9]
Epoch 4: 100%|██████████| 782/782 [01:42<00:00,  7.64it/s, loss=2.14, acc=21]  
Epoch 5: 100%|██████████| 782/782 [01:41<00:00,  7.67it/s, loss=2.14, acc=21.1]
Epoch 6: 100%|██████████| 782/782 [01:42<00:00,  7.66it/s, loss=2.13, acc=21.3]
Epoch 7: 100%|██████████| 782/782 [01:42<00:00,  7.64it/s, loss=2.13, acc=21.1]
Epoch 8: 100%|██████████| 782/782 [01:42<00:00,  7.61it/s, loss=2.13, acc=21.1]
Epoch 9: 100%|██████████| 782/782 [01:42<00:00,  7.62it/s, loss=2.13, acc=21.6]
Epoch 10: 100%|██████████| 782/782 [01:42<00:00,  7.64it/s, loss=2.13, acc=21.2]



Epoch 10: Train Loss = 2.1259, Train Acc = 21.23%

Evaluation Results (trained with eps = 0.06274509803921569)


Evaluating clean: 100%|██████████| 157/157 [00:01<00:00, 90.87it/s]


Standard Accuracy (clean): 28.87%


Evaluating pgd_linf: 100%|██████████| 157/157 [00:35<00:00,  4.43it/s]


Robust Accuracy (PGD eps=4/255): 25.39%


Evaluating pgd_linf: 100%|██████████| 157/157 [00:35<00:00,  4.42it/s]


Robust Accuracy (PGD eps=8/255): 22.42%


Evaluating pgd_linf: 100%|██████████| 157/157 [00:35<00:00,  4.41it/s]


Robust Accuracy (PGD eps=16/255): 16.60%

Model saved as adversarial_model_eps16.pth

SUMMARY OF RESULTS
Training eps = 4/255: Standard Accuracy = 41.86%
Training eps = 8/255: Standard Accuracy = 35.79%
Training eps = 16/255: Standard Accuracy = 28.87%

Observations:
1. Larger epsilon values during training typically lead to:
   - Lower standard accuracy on clean examples
   - Higher robust accuracy against adversarial attacks
2. There's a tradeoff between robustness and accuracy
3. The model should be most robust at the epsilon it was trained on


## Multi-Norm Robust Accuracy

In [10]:
def test_model_on_multi_attacks(model, eps_linf=8./255., eps_l2=0.75):
    model.eval()
    tot_test, tot_acc = 0.0, 0.0
    ground_acc = 0.0
    for batch_idx, (x_batch, y_batch) in tqdm(enumerate(test_loader), total=len(test_loader), desc="Evaluating"):
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        # TODO: get x_adv_linf and x_adv_l2 untargeted pgd linf and l2 with eps, and eps_step=eps/4
        x_adv_linf = pgd_linf_untargeted(model, x_batch, y_batch, 10, eps_linf, eps_linf/4)
        x_adv_l2 = pgd_l2_untargeted(model, x_batch, y_batch, 10, eps_l2, eps_l2/4)
        
        ## calculate union accuracy: correct only if both attacks are correct
        
        out = model(x_adv_linf)
        pred_linf = torch.max(out, dim=1)[1]
        out = model(x_adv_l2)
        pred_l2 = torch.max(out, dim=1)[1]

        ground_out = model(x_batch)
        ground_pred = torch.max(ground_out, dim=1)[1]
        ground_acc += (ground_pred == y_batch).sum().item()
        
        # TODO: get the testing accuracy with multi-norm robustness and update tot_test and tot_acc
        tot_acc += ((pred_linf == y_batch) & (pred_l2 == y_batch)).sum().item()
        tot_test += y_batch.size(0)
            
    print('Robust accuracy %.5lf' % (tot_acc/tot_test), f'on multi attacks')
    print('Standard accuracy %.5lf' % (ground_acc/tot_test), f'on multi attacks')

In [11]:
# Evaluate on multi-norm attacks with different models with eps_linf = 8./255, eps_l2 = 0.75
model.load_state_dict(torch.load('models/pretr_Linf.pth'))
# Evaluate on multi attacks with model 1
test_model_on_multi_attacks(model, eps_linf=8./255., eps_l2=0.75)

model.load_state_dict(torch.load('models/pretr_L2.pth'))
# Evaluate on multi attacks with model 2
test_model_on_multi_attacks(model, eps_linf=8./255., eps_l2=0.75)

model.load_state_dict(torch.load('models/pretr_RAMP.pth'))
# Evaluate on multi attacks with model 3
test_model_on_multi_attacks(model, eps_linf=8./255., eps_l2=0.75)

  model.load_state_dict(torch.load('models/pretr_Linf.pth'))
Evaluating: 100%|██████████| 157/157 [00:28<00:00,  5.43it/s]
  model.load_state_dict(torch.load('models/pretr_L2.pth'))


Robust accuracy 0.51200 on multi attacks
Standard accuracy 0.82800 on multi attacks


Evaluating: 100%|██████████| 157/157 [00:29<00:00,  5.41it/s]
  model.load_state_dict(torch.load('models/pretr_RAMP.pth'))


Robust accuracy 0.30860 on multi attacks
Standard accuracy 0.88760 on multi attacks


Evaluating: 100%|██████████| 157/157 [00:29<00:00,  5.40it/s]

Robust accuracy 0.49740 on multi attacks
Standard accuracy 0.81190 on multi attacks



