# ME-NET

In [1]:
import numpy as np
import torch
import torch.utils.data as Data
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from defense import MENet

In [2]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.cuda.set_device(0)

## Get data

In [3]:
transform = transforms.Compose([transforms.ToTensor()])

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                         shuffle=False, num_workers=2)

Files already downloaded and verified


## Load model

In [4]:
# adversarially trained
checkpoint = torch.load('models/ckpt.t7_ResNet18_advtrain_concat_usvt_0.5_white')

# standard
#checkpoint = torch.load('models/ckpt.t7_ResNet18_pure_concat_usvt_0.5_white')

model = checkpoint['model']
model.device_ids = [0]
model.output_device = 0
rng_state = checkpoint['rng_state']
torch.set_rng_state(rng_state)

model = model.to(device)
menet_model = MENet(model)

model.eval()
menet_model.eval()

# check model accuracy
def test_generalization(model, loader):
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(loader):
            inputs, targets = inputs.to(device), targets.to(device)
            with torch.no_grad():
                outputs = model(inputs)

            _, pred_idx = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += pred_idx.eq(targets.data).cpu().sum().float()

    return 100. * correct / total
print('Acc: %.3f%%' % test_generalization(menet_model, testloader))

Acc: 85.240%


## Attack 1: BPDA

In [5]:
epsilon = 8.0/255
num_steps = 200
step_size = (5 * epsilon) / num_steps

attack_model = menet_model
attack_model.eval()

batch_size = 200

testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)
testloader = iter(testloader)

num_batches = 1000 // batch_size

x_adv = []

for i in range(num_batches):
    (x_test, y_test) = next(testloader)
    x_test = x_test.to(device)
    y_test = y_test.to(device)

    x = x_test.detach()
    x = x + torch.zeros_like(x).uniform_(-epsilon, epsilon)
    x = torch.clamp(x, 0, 1)

    for j in range(num_steps):
        x.requires_grad_()
        with torch.enable_grad():
            logits = attack_model(x)
            _, pred_idx = torch.max(logits.data, 1)
            loss = F.cross_entropy(logits, y_test, reduce=False)
        grad = torch.autograd.grad(torch.mean(loss), [x])[0]

        success = ~(pred_idx.eq(y_test.data).cpu().numpy().astype(np.bool))

        if j % 20 == 0:
            print(i, j, torch.mean(loss).detach().cpu().numpy(), np.mean(success))

        x = x.detach() + step_size * torch.sign(grad.detach())
        x = torch.min(torch.max(x, x_test - epsilon), x_test + epsilon)
        x = torch.clamp(x, 0, 1)
    
    x_adv.append(x.detach().cpu().numpy())

x_adv = np.concatenate(x_adv, axis=0)

0 0 0.555986 0.145
0 20 0.89678466 0.24
0 40 1.2432835 0.29
0 60 1.5040343 0.335
0 80 2.0296295 0.39
0 100 2.333188 0.42
0 120 2.643069 0.45
0 140 2.9956958 0.485
0 160 3.1431735 0.485
0 180 3.416809 0.515
1 0 0.6715832 0.125
1 20 1.0303277 0.23
1 40 1.3949406 0.285
1 60 1.8998712 0.33
1 80 2.1450403 0.39
1 100 2.454036 0.435
1 120 2.68185 0.44
1 140 2.965865 0.455
1 160 3.1352801 0.495
1 180 3.3462453 0.485
2 0 0.45458162 0.135
2 20 0.7306506 0.185
2 40 1.0998504 0.25
2 60 1.4138063 0.31
2 80 1.7645721 0.37
2 100 2.152749 0.405
2 120 2.4440153 0.45
2 140 2.8014889 0.475
2 160 2.8414094 0.475
2 180 3.1407578 0.485
3 0 0.7717588 0.16
3 20 1.0925505 0.255
3 40 1.4218915 0.28
3 60 1.8660396 0.355
3 80 2.238803 0.39
3 100 2.7191668 0.43
3 120 2.8068614 0.48
3 140 3.1176968 0.475
3 160 3.5125623 0.515
3 180 3.413438 0.515
4 0 0.46111125 0.125
4 20 0.8502369 0.21
4 40 1.0776811 0.265
4 60 1.4680864 0.31
4 80 1.8179624 0.365
4 100 2.2098296 0.38
4 120 2.3671854 0.395
4 140 2.5689921 0.425
4 1

In [6]:
# evaluate attack
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=0)
testloader = iter(testloader)

correct = []
with torch.no_grad():
    for i in range(num_batches):
        (_, y_test) = next(testloader)
        y_test = y_test.to(device)
        x = x_adv[i*batch_size:(i+1)*batch_size]
        outputs_adv = menet_model(torch.from_numpy(x).to(device))
        loss = F.cross_entropy(outputs_adv, y_test, reduce=False)
        _, pred_idx = torch.max(outputs_adv.data, 1)
        correct.append(pred_idx.eq(y_test.data).cpu().float())

print("acc: {:.1f}%".format(100 * np.mean(np.concatenate(correct))))

acc: 49.7%


# Attack 2: BPDA + EOT

In [7]:
epsilon = 8.0/255
num_steps = 200
step_size = (5 * epsilon) / num_steps

attack_model = menet_model
attack_model.eval()

batch_size = 200

testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=0)
testloader = iter(testloader)

num_batches = 1000 // batch_size

x_adv = []

for i in range(num_batches):
    (x_test, y_test) = next(testloader)
    x_test = x_test.to(device)
    y_test = y_test.to(device)
    
    x_best = x_test.cpu().numpy().copy()
    best_loss = -100*np.ones(len(x_best), dtype=np.float32)
    all_success = np.zeros(len(x_best), dtype=np.bool)

    x = x_test.detach()
    x = x + torch.zeros_like(x).uniform_(-epsilon, epsilon)
    x = torch.clamp(x, 0, 1)

    for j in range(num_steps):
        x.requires_grad_()
        with torch.enable_grad():
            logits = attack_model(x)
            _, pred_idx = torch.max(logits.data, 1)
            loss = F.cross_entropy(logits, y_test, reduce=False)
        grad = torch.autograd.grad(torch.mean(loss), [x])[0]

        num_r = 40
        for r in range(num_r):
            x.requires_grad_()
            with torch.enable_grad():
                logits = attack_model(x)
                loss_r = F.cross_entropy(logits, y_test, reduce=False)
            grad_r = torch.autograd.grad(torch.mean(loss_r), [x])[0]
            grad += grad_r
            loss += loss_r

        loss /= (1.0 + num_r)

        success = ~(pred_idx.eq(y_test.data).cpu().numpy().astype(np.bool))
        better = loss.detach().cpu().numpy() > best_loss
        if np.any(better):
            x_best[better] = x.detach().cpu().numpy()[better]
            best_loss[better] = loss.detach().cpu().numpy()[better]

        all_success |= success

        if j % 20 == 0:
            print(i, j, torch.mean(loss).detach().cpu().numpy(), np.mean(best_loss), np.mean(success), np.mean(all_success))

        x = x.detach() + step_size * torch.sign(grad.detach())
        x = torch.min(torch.max(x, x_test - epsilon), x_test + epsilon)
        x = torch.clamp(x, 0, 1)
    
    x_adv.append(x_best)
x_adv = np.concatenate(x_adv, axis=0)

0 0 0.6040787 0.60407865 0.115 0.115
0 20 2.131285 2.156087 0.42 0.47
0 40 4.044399 4.094599 0.555 0.625
0 60 5.54005 5.631854 0.645 0.735
0 80 6.3823624 6.5319157 0.715 0.81
0 100 6.6823926 6.9006596 0.725 0.845
0 120 6.7796726 7.075388 0.73 0.855
0 140 6.8647156 7.1857057 0.73 0.865
0 160 6.9455533 7.2675524 0.725 0.87
0 180 6.968773 7.3123436 0.735 0.875
1 0 0.7038498 0.7038498 0.15 0.15
1 20 2.1949828 2.2188222 0.385 0.46
1 40 3.9998465 4.055197 0.55 0.63
1 60 5.512513 5.5912766 0.66 0.75
1 80 6.340132 6.477013 0.73 0.805
1 100 6.630049 6.848197 0.73 0.835
1 120 6.7330837 7.0133104 0.75 0.85
1 140 6.8331246 7.1317973 0.74 0.855
1 160 6.892745 7.213182 0.765 0.86
1 180 6.9034123 7.264055 0.76 0.86
2 0 0.4001552 0.40015516 0.11 0.11
2 20 1.883779 1.9113045 0.345 0.44
2 40 3.9070714 3.9436333 0.545 0.62
2 60 5.4797034 5.567206 0.635 0.74
2 80 6.32311 6.469389 0.71 0.79
2 100 6.6210656 6.827215 0.72 0.825
2 120 6.8080587 7.0396833 0.71 0.86
2 140 6.899159 7.171466 0.73 0.87
2 160 6.948

In [8]:
# evaluate attack
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=0)
testloader = iter(testloader)

correct = []
with torch.no_grad():
    for i in range(num_batches):
        (_, y_test) = next(testloader)
        y_test = y_test.to(device)
        x = x_adv[i*batch_size:(i+1)*batch_size]
        outputs_adv = menet_model(torch.from_numpy(x).to(device))
        loss = F.cross_entropy(outputs_adv, y_test, reduce=False)
        _, pred_idx = torch.max(outputs_adv.data, 1)
        correct.append(pred_idx.eq(y_test.data).cpu().float())

print("acc: {:.1f}%".format(100 * np.mean(np.concatenate(correct))))

acc: 26.0%
