In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import os
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow, imsave
from sklearn.metrics import confusion_matrix, recall_score, classification_report, accuracy_score
from tqdm.notebook import tqdm
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.models as models
from semi.MCMC_loss import DBI, margin
model_path = './Model_pkl/advsemi_nocetrans_100.pkl'

from utils import *
torch.cuda.set_device(1)
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:1" if use_cuda else "cpu")
best_acc = 0  # best test accuracy
start_epoch = 0  # start from epoch 0 or last checkpoint epoch


In [None]:
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--batch-size', default=256, type=int)
parser.add_argument('--data-dir', default='../../cifar-data', type=str)
parser.add_argument('--epochs', default=80, type=int)
parser.add_argument('--lr-schedule', default='cyclic', type=str, choices=['cyclic', 'multistep'])
parser.add_argument('--lr-min', default=0., type=float)
parser.add_argument('--lr-max', default=0.01, type=float)
parser.add_argument('--weight-decay', default=5e-4, type=float)
parser.add_argument('--momentum', default=0.9, type=float)
parser.add_argument('--epsilon', default=8, type=int)
parser.add_argument('--attack-iters', default=10, type=int, help='Attack iterations')
parser.add_argument('--restarts', default=1, type=int)
parser.add_argument('--alpha', default=2, type=int, help='Step size') #pgd
#parser.add_argument('--alpha', default=10, type=int, help='Step size') #fast
parser.add_argument('--delta-init', default='random', choices=['zero', 'random'],
    help='Perturbation initialization method')
parser.add_argument('--out-dir', default='train_pgd_output', type=str, help='Output directory')
parser.add_argument('--seed', default=0, type=int, help='Random seed')
parser.add_argument('--opt-level', default='O1', type=str, choices=['O0', 'O1', 'O2'],
    help='O0 is FP32 training, O1 is Mixed Precision, and O2 is "Almost FP16" Mixed Precision')
parser.add_argument('--loss-scale', default='1.0', type=str, choices=['1.0', 'dynamic'],
    help='If loss_scale is "dynamic", adaptively adjust the loss scale over time')
parser.add_argument('--master-weights', action='store_true',
    help='Maintain FP32 master weights to accompany any FP16 model weights, not applicable for O1 opt level')
args = parser.parse_args(args=['--batch-size', '256'])

In [None]:
cifar10_mean = (0.4914, 0.4822, 0.4465)
cifar10_std = (0.2471, 0.2435, 0.2616)

mu = torch.tensor(cifar10_mean).view(3,1,1).cuda()
std = torch.tensor(cifar10_std).view(3,1,1).cuda()

epsilon = (args.epsilon / 255.) / std
alpha = (args.alpha / 255.) / std
pgd_alpha = (2 / 255.) / std

upper_limit = ((1 - mu)/ std)
lower_limit = ((0 - mu)/ std)
def clamp(X, lower_limit, upper_limit):
    return torch.max(torch.min(X, upper_limit), lower_limit)

In [None]:
#trainset, trainloader, testset, testloader, devset, devloader = read_cifar10()
trainset, trainloader, testset, testloader = read_SVHN()
from Resnet18 import *
model = ResNet18(pre_train=False).to(device)
modeling = model_1(model).to(device) #Pretrain = False
model.load_state_dict(torch.load('./Model_pkl/warm-fast.pkl'))



# Adversarial training model

In [None]:
import apex.amp as amp
optimizer = torch.optim.SGD(model.parameters(), lr=args.lr_max, momentum=args.momentum, weight_decay=args.weight_decay)
opt = optim.Adam(model.parameters(),lr=1e-3)
amp_args = dict(opt_level=args.opt_level, loss_scale=args.loss_scale, verbosity=False)
if args.opt_level == 'O2':
    amp_args['master_weights'] = args.master_weights
model, optimizer = amp.initialize(model, optimizer, **amp_args)
criterion = nn.CrossEntropyLoss().to(device)
#lr_steps = args.epochs * len(trainloader)
#optimizer = torch.optim.SGD(model.parameters(), lr=args.lr_max, momentum=args.momentum, weight_decay=args.weight_decay)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,milestones=[55, 70],gamma=0.1)

In [None]:
#def train_MCMC(modeling,inputs, labels,ul_inputs, optimizer,train_correct):
def train_MCMC(modeling, inputs, labels, ul_inputs, optimizer):
    cuda_gpu = torch.cuda.is_available()
    z_l,outputs = modeling(inputs)
    ce_loss = criterion(outputs,labels)
    z_ul,ul_y = modeling(ul_inputs)
    #print(outputs)
    loss_DB = DBI(10, 512, cuda_gpu)
    loss_ML = margin(10, 10, cuda_gpu)
    label_DB_loss = loss_DB(z_l, labels).to(device)
    #print(label_DB_loss)
    _, pre_ul_y = torch.max(ul_y, 1)

    total_DB_loss = loss_DB(z_ul, pre_ul_y).to(device)

    z_ul = z_ul.type(torch.float)

    margin_loss = loss_ML(z_ul, pre_ul_y).to(device)
    
    #loss = 3*ce_loss+ 0.5*label_DB_loss + 0.5*total_DB_loss + 1*margin_loss
    loss = label_DB_loss + 10*total_DB_loss + margin_loss

    optimizer.zero_grad()
    with amp.scale_loss(loss, optimizer) as scaled_loss:
        scaled_loss.backward()
    #loss.backward()
    optimizer.step()

    #train_correct += (torch.max(outputs,1)[1]==labels).sum().item()
    del loss_DB, loss_ML
    return ce_loss,label_DB_loss, total_DB_loss, margin_loss

In [None]:
def adversarial_training(model, inputs, labels, optimizer, epsilon, lower_limit, upper_limit, train_correct):
    delta = torch.zeros_like(inputs).to(device)
    for i in range(len(epsilon)):
        delta[:, i, :, :].uniform_(-epsilon[i][0][0].item(), epsilon[i][0][0].item())
    delta.data = clamp(delta, lower_limit - inputs, upper_limit - inputs)
    delta.requires_grad = True
    for _ in range(args.attack_iters):
        outputs = model(inputs + delta)
        loss = criterion(outputs, labels)
        with amp.scale_loss(loss, optimizer) as scaled_loss:
            scaled_loss.backward()
        #loss.backward()
        grad = delta.grad.detach()
        delta.data = clamp(delta + alpha * torch.sign(grad), -epsilon, epsilon)
        delta.data = clamp(delta, lower_limit - inputs, upper_limit - inputs)
        delta.grad.zero_()
    delta = delta.detach()
    outputs = model(inputs + delta)
    adv_loss = criterion(outputs,labels)
    loss = 1*adv_loss
    optimizer.zero_grad()
    with amp.scale_loss(loss, optimizer) as scaled_loss:
        scaled_loss.backward()
    #loss.backward()
    optimizer.step()
    
    train_correct += (torch.max(outputs,1)[1]==labels).sum().item()
    
    return adv_loss, train_correct, inputs + delta

In [None]:
#criterion = nn.CrossEntropyLoss().to(device)
tr_acc = 0
train_size = len(trainset)
test_size  = len(testset)
#cuda_gpu = torch.cuda.is_available()
prev_robust_acc  = 0
pgd_all = []
test_all = []
#lr = 1e-3
#epoch_decay_start = 50
maxepoch = 0
for epoch in tqdm(range(args.epochs)):
    train_loss = 0
    train_correct = 0
    test_correct  = 0
    for j,data in tqdm(enumerate(trainloader)):
        inputs, labels = data[0].to(device), data[1].to(device)
         # adversarial training
        adv_loss, train_correct, ul_inputs = adversarial_training(model, inputs, labels, optimizer, epsilon, lower_limit, upper_limit, train_correct)
        #ce_loss, label_DB_loss, total_DB_loss, margin_loss, train_correct = train_MCMC(modeling, inputs, labels, ul_inputs, optimizer,train_correct)
        ce_loss, label_DB_loss, total_DB_loss, margin_loss = train_MCMC(modeling, inputs, labels, ul_inputs, optimizer)
        
        #optimizer.zero_grad()
        loss = 1*label_DB_loss + 10*total_DB_loss + 1*margin_loss + 1*adv_loss
        #optimizer.step()
        train_loss += loss.item() * labels.size(0)
    scheduler.step()
    lr = scheduler.get_last_lr()[0]
    _, pgd_acc, _ = evaluate_pgd2(testloader, model, 20, 1)      
    _, test_acc = evaluate_standard(testloader, model)
    pgd_all.append(pgd_acc)
    test_all.append(test_acc)
    if pgd_acc > prev_robust_acc:
        prev_robust_acc = pgd_acc
        torch.save(model.state_dict(), './Model_pkl/advsemi_nocetrans_100_best.pkl')
        maxepoch = epoch
    torch.save(model.state_dict(), model_path)
    train_loss = train_loss/train_size
    tr_acc = train_correct*100.0/train_size
    print("Epoch :", epoch+1, ", CE Loss :", ce_loss.item(), ", LDB Loss :", label_DB_loss.item(),
                      ", TDB Loss :", total_DB_loss.item(),", MM Loss :", margin_loss.item(),", adv Loss :", adv_loss.item())
    print("Epoch [%d/%d], accuracy=[%.2f], test accuracy=[%.4f], robust = [%.4f], Loss: %.4f, lr : %.4f" 
          %(epoch+1, args.epochs, tr_acc, test_acc,pgd_acc, train_loss, lr))
torch.save(model.state_dict(), model_path)

# PGD-20 robustness

In [None]:
train_loss, train_acc = evaluate_standard(trainloader, model)
test_loss, test_acc = evaluate_standard(testloader, model)
train_adv_loss, train_adv_acc, train_dist = evaluate_pgd2(trainloader, model, 20, 1)
pgd_loss, pgd_acc, pgd_dist = evaluate_pgd2(testloader, model, 20, 1)


print('train_loss = %.4f, train_acc =%.4f' %(train_loss, train_acc))
print('test_loss = %.4f, test_acc =%.4f' %(test_loss, test_acc))
print('train_adv_loss = %.4f, train_adv_acc =%.4f, train_dis = %.4f' %(train_adv_loss, train_adv_acc, train_dist ))
print('pgd_loss = %.4f, pgd_acc = %.4f, pgd_dist = %.4f' %(pgd_loss, pgd_acc, pgd_dist))
print(train_acc,"|",test_acc,"|",train_adv_acc,"|",pgd_acc,"|",pgd_loss,"|",pgd_dist,"|")

In [None]:
#model.load_state_dict(torch.load(model_path))
model.load_state_dict(torch.load('./Model_pkl/advsemi_nocetrans_100_best.pkl'))

In [None]:
train_loss, train_acc = evaluate_standard(trainloader, model)
test_loss, test_acc = evaluate_standard(testloader, model)
train_adv_loss, train_adv_acc, train_dist = evaluate_pgd2(trainloader, model, 20, 1)
pgd_loss, pgd_acc, pgd_dist = evaluate_pgd2(testloader, model, 20, 1)


print('train_loss = %.4f, train_acc =%.4f' %(train_loss, train_acc))
print('test_loss = %.4f, test_acc =%.4f' %(test_loss, test_acc))
print('train_adv_loss = %.4f, train_adv_acc =%.4f, train_dis = %.4f' %(train_adv_loss, train_adv_acc, train_dist ))
print('pgd_loss = %.4f, pgd_acc = %.4f, pgd_dist = %.4f' %(pgd_loss, pgd_acc, pgd_dist))
print(train_acc,"|",test_acc,"|",train_adv_acc,"|",pgd_acc,"|",pgd_loss,"|",pgd_dist,"|")

# FGSM white-box

In [None]:
# FGSM robustness
evaluate_pgd1(testloader, model, 1, 1) 

In [None]:
#PGD_plot and test_plot
plt.figure(figsize=(10,5))
plt.title("ResNet Model")
x= np.arange(1,len(pgd_all)+1)
plt.plot(x,pgd_all, label = 'PGD')
plt.plot(x,test_all, label = 'Test')
plt.xticks(x)

plt.xlabel("iterations")
plt.ylabel("acc %")
plt.legend()
plt.savefig('./image/advsemi_nocetrans_100.png')
plt.show()

# CW White-Box

In [None]:
num_steps = 20
epsilon = 0.031
step_size = 0.003

from torch.autograd import Variable
def eval_adv_test_whitebox_cw(model, device, testloader):
    """
    evaluate model by white-box attack
    """
    model.eval()
    robust_err_total = 0
    for data, target in testloader:
        data, target = data.to(device), target.to(device)
        # pgd attack
        X, y = Variable(data, requires_grad=True), Variable(target)
        err_robust = _cw_whitebox(model, X, y)
        robust_err_total += err_robust
    print('cw robust_acc: ', 1 - robust_err_total / len(testloader.dataset))


def _cw_whitebox(model,
                  X,
                  y,
                  epsilon=0.031,
                  num_steps=20,
                  step_size=0.003
                ):
    # out = model(X)
    # err = (out.data.max(1)[1] != y.data).float().sum()
    X_pgd = Variable(X.data, requires_grad=True)
    
    random_noise = torch.FloatTensor(*X_pgd.shape).uniform_(-epsilon, epsilon).to(device)
    X_pgd = Variable(X_pgd.data + random_noise, requires_grad=True)
    epsilon = epsilon / std
    step_size = step_size / std
    for _ in range(num_steps):
        opt = optim.SGD([X_pgd], lr=1e-3)
        opt.zero_grad()

        with torch.enable_grad():
            output = model(X_pgd)
            correct_logit = torch.sum(torch.gather(output, 1, (y.unsqueeze(1)).long()).squeeze())
            tmp1 = torch.argsort(output, dim=1)[:, -2:]
            new_y = torch.where(tmp1[:, -1] == y, tmp1[:, -2], tmp1[:, -1])
            wrong_logit = torch.sum(torch.gather(output, 1, (new_y.unsqueeze(1)).long()).squeeze())
            loss = - F.relu(correct_logit-wrong_logit)
        loss.backward()
        eta = step_size * X_pgd.grad.data.sign()
        X_pgd = Variable(X_pgd.data + eta, requires_grad=True)
        eta = clamp(X_pgd.data - X.data, -epsilon, epsilon)
        eta = clamp(eta, lower_limit - X.data, upper_limit - X.data)
        X_pgd = Variable(X.data+eta, requires_grad= True)
        #X_pgd = Variable(X_pgd.data + eta, requires_grad=True)
        #eta = torch.clamp(X_pgd.data - X.data, -epsilon, epsilon)
        #X_pgd = Variable(X.data + eta, requires_grad=True)
        #X_pgd = Variable(torch.clamp(X_pgd, 0, 1.0), requires_grad=True)
    output= model(X_pgd)
    err_pgd = (output.data.max(1)[1] != y.data).float().sum()
    return err_pgd

model.eval()
eval_adv_test_whitebox_cw(model, device, testloader)

# CW BLACK

In [None]:
from torch.autograd import Variable

def eval_adv_test_blackbox(model_target, model_source, device, testloader):
    """
    evaluate model by black-box attack
    """
    model_target.eval()
    model_source.eval()
    robust_err_total = 0
    natural_err_total = 0

    for data, target in testloader:
        data, target = data.to(device), target.to(device)
        # pgd attack
        X, y = Variable(data, requires_grad=True), Variable(target)
        err_natural, err_robust = _pgd_blackbox(model_target, model_source, X, y)
        robust_err_total += err_robust
        natural_err_total += err_natural
    print('natural_err_total: ', natural_err_total)
    print('robust_err_total: ', robust_err_total)
    print('black-box robust_acc: ', 1 - robust_err_total / len(testloader.dataset))
    print('%.2f/%.2f/%.2f'%(natural_err_total*100/ len(testloader.dataset),robust_err_total*100/ len(testloader.dataset),(1 - robust_err_total / len(testloader.dataset))*100))
          
def _pgd_blackbox(model_target,
                  model_source,
                  X,
                  y,
                  epsilon=8/255,
                  num_steps=20,
                  step_size=1/255):
    out = model_target(X)
    err = (out.data.max(1)[1] != y.data).float().sum()
    X_pgd = Variable(X.data, requires_grad=True)
   
    random_noise = torch.FloatTensor(*X_pgd.shape).uniform_(-epsilon, epsilon).to(device)
    X_pgd = Variable(X_pgd.data + random_noise, requires_grad=True)
    epsilon = epsilon / std
    step_size = step_size / std
    for _ in range(num_steps):
        opt = optim.SGD([X_pgd], lr=1e-3)
        opt.zero_grad()
        with torch.enable_grad():
            output = model_source(X_pgd)
            correct_logit = torch.sum(torch.gather(output, 1, (y.unsqueeze(1)).long()).squeeze())
            tmp1 = torch.argsort(output, dim=1)[:, -2:]
            new_y = torch.where(tmp1[:, -1] == y, tmp1[:, -2], tmp1[:, -1])
            wrong_logit = torch.sum(torch.gather(output, 1, (new_y.unsqueeze(1)).long()).squeeze())
            loss = - F.relu(correct_logit-wrong_logit)
        loss.backward()
        eta = step_size * X_pgd.grad.data.sign()
        X_pgd = Variable(X_pgd.data + eta, requires_grad=True)
        eta = clamp(X_pgd.data - X.data, -epsilon, epsilon)
        eta = clamp(eta, lower_limit - X.data, upper_limit - X.data)
        X_pgd = Variable(X.data+eta, requires_grad= True)

    err_pgd = (model_target(X_pgd).data.max(1)[1] != y.data).float().sum()
    #print('err pgd black-box: ', err_pgd)
    return err, err_pgd



target_model_path = './Model_pkl/SVHN-100-3.pkl'
source_model_path = './Model_pkl/resnet50-svhn.pkl'
model_target = ResNet18(pre_train=False).to(device)
model_target.load_state_dict(torch.load(target_model_path))
model_source = ResNet50().to(device)
model_source.load_state_dict(torch.load(source_model_path))

eval_adv_test_blackbox(model_target, model_source, device, testloader)

# PGD BLACK


In [None]:
from torch.autograd import Variable

def eval_adv_test_blackbox(model_target, model_source, device, testloader):
    """
    evaluate model by black-box attack
    """
    model_target.eval()
    model_source.eval()
    robust_err_total = 0
    natural_err_total = 0

    for data, target in testloader:
        data, target = data.to(device), target.to(device)
        # pgd attack
        X, y = Variable(data, requires_grad=True), Variable(target)
        err_natural, err_robust = _pgd_blackbox(model_target, model_source, X, y)
        robust_err_total += err_robust
        natural_err_total += err_natural
    print('natural_err_total: ', natural_err_total)
    print('robust_err_total: ', robust_err_total)
    print('black-box robust_acc: ', 1 - robust_err_total / len(testloader.dataset))
    print('%.2f/%.2f/%.2f'%(natural_err_total*100/ len(testloader.dataset),robust_err_total*100/ len(testloader.dataset),(1 - robust_err_total / len(testloader.dataset))*100))
          
def _pgd_blackbox(model_target,
                  model_source,
                  X,
                  y,
                  epsilon=8/255,
                  num_steps=20,
                  step_size=1/255):
    out = model_target(X)
    err = (out.data.max(1)[1] != y.data).float().sum()
    X_pgd = Variable(X.data, requires_grad=True)
   
    random_noise = torch.FloatTensor(*X_pgd.shape).uniform_(-epsilon, epsilon).to(device)
    X_pgd = Variable(X_pgd.data + random_noise, requires_grad=True)
    epsilon = epsilon / std
    step_size = step_size / std
    for _ in range(num_steps):
        opt = optim.SGD([X_pgd], lr=1e-3)
        opt.zero_grad()
        with torch.enable_grad():
            loss = nn.CrossEntropyLoss()(model_source(X_pgd), y)
        loss.backward()
        eta = step_size * X_pgd.grad.data.sign()
        X_pgd = Variable(X_pgd.data + eta, requires_grad=True)
        eta = clamp(X_pgd.data - X.data, -epsilon, epsilon)
        eta = clamp(eta, lower_limit - X.data, upper_limit - X.data)
        X_pgd = Variable(X.data+eta, requires_grad= True)

    err_pgd = (model_target(X_pgd).data.max(1)[1] != y.data).float().sum()
    #print('err pgd black-box: ', err_pgd)
    return err, err_pgd


target_model_path = './Model_pkl/SVHN-100-3.pkl'
source_model_path = './Model_pkl/resnet50-svhn.pkl'
model_target = ResNet18(pre_train=False).to(device)
model_target.load_state_dict(torch.load(target_model_path))
model_source = ResNet50().to(device)
model_source.load_state_dict(torch.load(source_model_path))

eval_adv_test_blackbox(model_target, model_source, device, testloader)

In [None]:
#model_path = './Model_pkl/resnet50.pkl'
#target_model_path = './Model_pkl/pgd-200.pkl'
#target_model_path = './Model_pkl/advsemi_nocetrans_100-3-2-3.pkl'
#target_model_path = './Model_pkl/TRADES-ori.pkl'
#target_model_path = './Model_pkl/TRADES-noce-2.pkl'
#target_model_path = './Model_pkl/MART-ori.pkl'
#target_model_path = './Model_pkl/MART-noce-3.pkl'

#model_path = './Model_pkl/resnet50-svhn.pkl'
#target_model_path = './Model_pkl/SVHN-100-3_best.pkl'
#target_model_path = './Model_pkl/SVHN-MART-1_best.pkl'
#

# Result Testing in confusion matrix

In [None]:
#Eval test , and plot CM
def eval_model(model,dev_loader):
    #test_loader1 = DataLoader(test_dataset,batch_size = 1)
    model.eval()
    with torch.no_grad():
        y_pred = []
        y_true = []
        for data in tqdm(dev_loader):
            x,y = data[0].to(device), data[1].to(device) 
            output   = model(x)
            _, predicted  = torch.max(output,1)
            #y_pred +=[predicted]
            y_pred.extend(predicted.cpu().numpy())
            y_true.extend(y.cpu().numpy())
    return y_pred, y_true

In [None]:
# train 的 cm
y_pred, y_true = eval_model(model.to(device), trainloader)
cm = confusion_matrix(y_true, y_pred)

plt.figure(figsize=(10, 6))
g = sns.heatmap(cm, annot=True, fmt='d')
g.set_xlabel('pred')
g.set_ylabel('true')
plt.show()

In [None]:
accuracy_score(y_true, y_pred)

In [None]:
# test 的 cm
y_pred, y_true = eval_model(model.to(device), testloader)
cm = confusion_matrix(y_true, y_pred)

plt.figure(figsize=(10, 6))
plt.title('Confusion matrix : Semi-Supervised')
g = sns.heatmap(cm, annot=True, fmt='d')

g.set_xlabel('pred')
g.set_ylabel('true')
#g.set_xticklabels(['A','B','C'])
#g.set_yticklabels(['A','B','C'])
plt.show()

In [None]:
"""
0 : airplain (飛機)
1 : automobile (汽車)
2 : bird (鳥)
3 : cat (貓)
4 : deer (鹿)
5 : dog (狗)
6 : frog (青蛙)
7 : horse (馬)
8 : ship (船)
9 : truck (卡車)
"""

In [None]:
accuracy_score(y_true, y_pred)