In [None]:
import torch
import torchvision
import numpy as np
import logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(name)-12s %(levelname)-8s %(message)s")

from benchmark import ImageBenchmark
bench = ImageBenchmark()
models = list(bench.list_models())
models_dict = {}
for i, model in enumerate(models):
    if not model.torch_model_exists():
        continue
    print(f'{i}\t {model.__str__()}')
    models_dict[model.__str__()] = model

In [None]:
from modeldiff import ModelDiff

def compare_with_seed(model1, model2, truth=-1):
    print(f'comparing:\n  model1={model1}\n  model2={model2}')
    md = ModelDiff(model1, model2)
    seed_inputs = md.get_seed_inputs(rand=False)
    sim = md.compute_similarity_with_inputs(seed_inputs)
    if truth == -1:
        truth = 1 if model1.__str__().split('-')[0] == model2.__str__().split('-')[0] else 0
    print(f' similarity is {sim}, truth is {truth}')

def test(compare):
    # compare(models[23], models[38], 1) # should be similar
    compare(models[1], models[0], 0)   # should be different
#     compare(models[13], models[25], 0) # should be different
#     compare(models[13], models[22], 0) # should be different
    compare(models[1], models[17], 1)  # should be similar
    compare(models[16], models[13], 1) # should be similar
    compare(models[13], models[12], 1) # should be similar
    
# test(compare_with_seed)

In [None]:
DEVICE = 'cuda'

def gen_adv_inputs(model, inputs):
    from advertorch.attacks import LinfPGDAttack
    def myloss(yhat, y):
        return -((yhat[:,0]-y[:,0])**2 + 0.1*((yhat[:,1:]-y[:,1:])**2).mean(1)).mean()
        
    model = model.to(DEVICE)
    inputs = torch.from_numpy(inputs).to(DEVICE)
    with torch.no_grad():
        model.eval()
        clean_outputs = model(inputs)
    
    output_shape = clean_outputs.shape
    batch_size = output_shape[0]
    num_classes = output_shape[1]
    
    y = torch.zeros(size=output_shape).to(DEVICE)
    y[:, 0] = 1000
    # more diversity
#     rand_idx = torch.randint(low=0, high=num_classes, size=(batch_size,))
#     y = torch.nn.functional.one_hot(rand_idx, num_classes=num_classes).to(DEVICE) * 10
#     print(y)
    
    adversary = LinfPGDAttack(
        model, loss_fn=myloss, eps=0.1,
        nb_iter=40, eps_iter=0.01, 
        rand_init=True, clip_min=-2.2, clip_max=2.2,
        targeted=False
    )
    
    adv_inputs = adversary.perturb(inputs, y)
    
    with torch.no_grad():
        model.eval()
        adv_outputs = model(adv_inputs).to('cpu').numpy()
#     print(adv_outputs)
    torch.cuda.empty_cache()
    return adv_inputs.to('cpu').numpy()


model = models_dict['pretrain(mbnetv2,ImageNet)-transfer(Flower102,0.1)-prune(0.2)-']
seed_inputs = model.get_seed_inputs(100, rand=False)
seed_outputs = model.batch_forward(seed_inputs)
_, seed_preds = seed_outputs.to('cpu').data.max(1)

adv_inputs = gen_adv_inputs(model.torch_model, seed_inputs)
adv_outputs = model.batch_forward(adv_inputs)
_, adv_preds = adv_outputs.to('cpu').data.max(1)

print(f"seed_preds={seed_preds}, adv_preds={adv_preds}")

In [None]:
DEVICE = 'cuda'
image_size = 224

def expand_vector(x, size):
    batch_size = x.size(0)
    x = x.view(-1, 3, size, size)
    z = torch.zeros(batch_size, 3, image_size, image_size).to(DEVICE)
    z[:, :, :size, :size] = x
    return z

def normalize(x):
    return utils.apply_normalization(x, 'imagenet')

def get_probs(model, x, y):
    output = model(x)
    probs = torch.index_select(torch.nn.functional.softmax(output, -1).data, 1, y)
    return torch.diag(probs).cpu()

def get_preds(model, x):
    output = model(x)
    _, preds = output.data.max(1)
    return preds

# runs simba on a batch of images <images_batch> with true labels (for untargeted attack) or target labels
# (for targeted attack) <labels_batch>
def dct_attack_batch(model, images_batch, labels_batch, max_iters, freq_dims, stride, epsilon, order='rand', targeted=False, pixel_attack=False, log_every=1):
    batch_size = images_batch.size(0)
    image_size = images_batch.size(2)
    # sample a random ordering for coordinates independently per batch element
    if order == 'rand':
        indices = torch.randperm(3 * freq_dims * freq_dims)[:max_iters]
    elif order == 'diag':
        indices = utils.diagonal_order(image_size, 3)[:max_iters]
    elif order == 'strided':
        indices = utils.block_order(image_size, 3, initial_size=freq_dims, stride=stride)[:max_iters]
    else:
        indices = utils.block_order(image_size, 3)[:max_iters]
    if order == 'rand':
        expand_dims = freq_dims
    else:
        expand_dims = image_size
    n_dims = 3 * expand_dims * expand_dims
    x = torch.zeros(batch_size, n_dims)
    # logging tensors
    probs = torch.zeros(batch_size, max_iters)
    succs = torch.zeros(batch_size, max_iters)
    queries = torch.zeros(batch_size, max_iters)
    l2_norms = torch.zeros(batch_size, max_iters)
    linf_norms = torch.zeros(batch_size, max_iters)
    prev_probs = get_probs(model, images_batch, labels_batch)
    preds = get_preds(model, images_batch)
    if pixel_attack:
        trans = lambda z: z
    else:
        trans = lambda z: utils.block_idct(z, block_size=image_size, linf_bound=args.linf_bound)
    remaining_indices = torch.arange(0, batch_size).long()
    for k in range(max_iters):
        dim = indices[k]
        expanded = (images_batch[remaining_indices] + trans(expand_vector(x[remaining_indices], expand_dims))).clamp(-2.6, 2.6)
        perturbation = trans(expand_vector(x, expand_dims))
        l2_norms[:, k] = perturbation.view(batch_size, -1).norm(2, 1)
        linf_norms[:, k] = perturbation.view(batch_size, -1).abs().max(1)[0]
        preds_next = get_preds(model, expanded)
        preds[remaining_indices] = preds_next
        if targeted:
            remaining = preds.ne(labels_batch)
        else:
            remaining = preds.eq(labels_batch)
        # check if all images are misclassified and stop early
        if remaining.sum() == 0:
            adv = (images_batch + trans(expand_vector(x, expand_dims))).clamp(0, 1)
            probs_k = get_probs(model, adv, labels_batch)
            probs[:, k:] = probs_k.unsqueeze(1).repeat(1, max_iters - k)
            succs[:, k:] = torch.ones(batch_size, max_iters - k)
            queries[:, k:] = torch.zeros(batch_size, max_iters - k)
            break
        remaining_indices = torch.arange(0, batch_size)[remaining].long()
        if k > 0:
            succs[:, k-1] = ~remaining
        diff = torch.zeros(remaining.sum(), n_dims)
        diff[:, dim] = epsilon
        left_vec = x[remaining_indices] - diff
        right_vec = x[remaining_indices] + diff
        # trying negative direction
        adv = (images_batch[remaining_indices] + trans(expand_vector(left_vec, expand_dims))).clamp(-2.6, 2.6)
        left_probs = get_probs(model, adv, labels_batch[remaining_indices])
        queries_k = torch.zeros(batch_size)
        # increase query count for all images
        queries_k[remaining_indices] += 1
        if targeted:
            improved = left_probs.gt(prev_probs[remaining_indices])
        else:
            improved = left_probs.lt(prev_probs[remaining_indices])
        # only increase query count further by 1 for images that did not improve in adversarial loss
        if improved.sum() < remaining_indices.size(0):
            queries_k[remaining_indices[~improved]] += 1
        # try positive directions
        adv = (images_batch[remaining_indices] + trans(expand_vector(right_vec, expand_dims))).clamp(-2.6, 2.6)
        right_probs = get_probs(model, adv, labels_batch[remaining_indices])
        if targeted:
            right_improved = right_probs.gt(torch.max(prev_probs[remaining_indices], left_probs))
        else:
            right_improved = right_probs.lt(torch.min(prev_probs[remaining_indices], left_probs))
        probs_k = prev_probs.clone()
        # update x depending on which direction improved
        if improved.sum() > 0:
            left_indices = remaining_indices[improved]
            left_mask_remaining = improved.unsqueeze(1).repeat(1, n_dims)
            x[left_indices] = left_vec[left_mask_remaining].view(-1, n_dims)
            probs_k[left_indices] = left_probs[improved]
        if right_improved.sum() > 0:
            right_indices = remaining_indices[right_improved]
            right_mask_remaining = right_improved.unsqueeze(1).repeat(1, n_dims)
            x[right_indices] = right_vec[right_mask_remaining].view(-1, n_dims)
            probs_k[right_indices] = right_probs[right_improved]
        probs[:, k] = probs_k
        queries[:, k] = queries_k
        prev_probs = probs[:, k]
        if (k + 1) % log_every == 0 or k == max_iters - 1:
            print('Iteration %d: queries = %.4f, prob = %.4f, remaining = %.4f' % (
                    k + 1, queries.sum(1).mean(), probs[:, k].mean(), remaining.float().mean()))
    expanded = (images_batch + trans(expand_vector(x, expand_dims))).clamp(-2.6, 2.6)
    preds = get_preds(model, expanded)
    if targeted:
        remaining = preds.ne(labels_batch)
    else:
        remaining = preds.eq(labels_batch)
    succs[:, max_iters-1] = ~remaining
    return expanded, probs, succs, queries, l2_norms, linf_norms


def search_adv_inputs(model, inputs, labels):
    images_batch = torch.from_numpy(inputs).to(DEVICE)
#     labels_batch = torch.zeros(len(inputs)).long().to(DEVICE)
    labels_batch = labels.long().to(DEVICE)
    with torch.no_grad():
        adv, probs, succs, queries, l2_norms, linf_norms = dct_attack_batch(
            model, images_batch=images_batch, labels_batch=labels_batch,
            max_iters=5000, freq_dims=image_size, stride=7, epsilon=0.2, order='rand',
            targeted=False, pixel_attack=True, log_every=10
        )
    return adv


model = models_dict['pretrain(mbnetv2,ImageNet)-transfer(Flower102,0.1)-prune(0.2)-']
seed_inputs = model.get_seed_inputs(100, rand=False)
seed_outputs = model.batch_forward(seed_inputs)
_, seed_preds = seed_outputs.to('cpu').data.max(1)

adv_inputs = search_adv_inputs(model.torch_model, seed_inputs, seed_preds)
adv_outputs = model.batch_forward(adv_inputs)
_, adv_preds = adv_outputs.to('cpu').data.max(1)

print(f"seed_preds={seed_preds}, adv_preds={adv_preds}")


In [None]:
import torch
import torchvision
import torch.nn.functional as F
import numpy as np
import logging
import copy
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(name)-12s %(levelname)-8s %(message)s")


from benchmark import ImageBenchmark
bench = ImageBenchmark()
models = list(bench.list_models())
models_dict = {}
for i, model in enumerate(models):
    if not model.torch_model_exists():
        continue
#     print(f'{i}\t {model.__str__()}')
    models_dict[model.__str__()] = model


DEVICE = 'cuda'
image_size = 224

def expand_vector(x, image_size):
    batch_size = x.size(0)
    x = x.view(-1, 3, image_size, image_size)
    z = torch.zeros(batch_size, 3, image_size, image_size).to(DEVICE)
    z[:, :, :image_size, :image_size] = x
    return z

def evaluate_inputs(model, inputs, seed_outputs, seed_preds, lambda1=0.5):
    outputs = model(inputs)
    _, preds = outputs.data.max(1)
    changed = preds.ne(seed_preds)
    outputs = torch.nn.functional.softmax(outputs, -1)
    seed_outputs = torch.nn.functional.softmax(seed_outputs, -1)
    reduce_dims = tuple(range(outputs.dim())[1:])
    divergence_arr = torch.sum((outputs - seed_outputs) ** 2, dim=reduce_dims) ** 0.5
    divergence = torch.mean(divergence_arr)
#     divergence_cos = F.cosine_similarity(outputs, seed_outputs)
#     divergence_kld = F.kl_div(seed_outputs, outputs, reduction='none')
#     print(divergence_kld)
    diversity_matrix = torch.cdist(outputs, outputs, p=2.0)
    diversity = torch.mean(diversity_matrix)
    quantile = lambda t, q: t.view(-1).kthvalue(1 + round(float(q) * (t.numel() - 1))).values.item()
    diversity_quantile = quantile(diversity_matrix, 0.011)
#     diversity = diversity ** 2
    score = divergence + lambda1 * diversity
    succ = preds.ne(seed_preds)
    low_divergence_indices = list(torch.nonzero(divergence_arr.lt(divergence)).cpu().numpy())
    low_diversity_indices = list(torch.nonzero(diversity_matrix.lt(diversity_quantile)).cpu().numpy())
    remaining_indices = set()
#     print(changed)
    for i in low_divergence_indices:
        if not changed[i].cpu():
            remaining_indices.add(i[0])
    for i in low_diversity_indices:
        if i[0] == i[1]:
            continue
        if not changed[i[0]]:
            remaining_indices.add(i[0])
        if not changed[i[1]]:
            remaining_indices.add(i[1])
    remaining_indices = sorted(remaining_indices)
#     print(f' low_divergence_indices={len(low_divergence_indices)}\n low_diversity_indices={len(low_diversity_indices)}\n remaining_indices={len(remaining_indices)}')
    eval_line = f'score={score:.4f}, divergence={divergence:.4f}, diversity={diversity:.4f}, num_succ={succ.sum()}, num_remain={len(remaining_indices)}'
    return {
        'outputs': outputs,
        'preds': preds,
        'score': score,
        'divergence': divergence,
        'diversity': diversity,
        'succ': succ,
        'remaining': remaining_indices,
        'eval_line': eval_line
    }
    
    
def optimize_towards_goal(
    model, seed_inputs, seed_outputs, seed_preds, 
    max_iters=10000, mutation_size=1, epsilon=0.5, lambda1=0.0, log_every=100, save_every=1000):
#     seed_inputs = torch.from_numpy(seed_inputs).to(DEVICE)
#     seed_outputs = torch.from_numpy(seed_outputs).to(DEVICE)
#     seed_preds = torch.from_numpy(seed_preds).to(DEVICE)
    input_shape = seed_inputs[0].shape
    n_inputs = seed_inputs.shape[0]
    ndims = np.prod(input_shape)

    with torch.no_grad():
        inputs = copy.copy(seed_inputs)
        saved_inputs = {}
        evaluation = evaluate_inputs(model, inputs, seed_outputs, seed_preds, lambda1)
        print(f'initial_evaluation: {evaluation["eval_line"]}')

        for i in range(max_iters):
#             print(f'mutation {i}-th iteration')

            # mutation_pos = np.random.randint(0, ndims)
            mutation_pos = np.random.choice(ndims, size=mutation_size, replace=False)
            mutation = np.zeros(ndims).astype(np.float32)
            mutation[mutation_pos] = epsilon
            mutation = np.reshape(mutation, input_shape)

            mutation_batch = np.zeros(shape=inputs.shape).astype(np.float32)
#             all_indices = list(range(0, n_inputs))
#             mutation_indices = np.random.choice(all_indices, size=int(n_inputs * 0.85), replace=False)
#             print(mutation_indices)
#             mutation_idx = np.random.randint(0, n_inputs)
            mutation_indices = evaluation['remaining']
            if len(mutation_indices) == 0:
                print(f'{i:4d}-th - no remaining indice: {evaluation["eval_line"]}')
                break
#             print(mutation_indices)
            mutation_batch[mutation_indices] = mutation
            mutation_batch = torch.from_numpy(mutation_batch).to(DEVICE)

            prev_score = evaluation["score"]
            mutate_right_inputs = (inputs + mutation_batch).clamp(-2.6, 2.6)
            mutate_right_eval = evaluate_inputs(model, mutate_right_inputs, seed_outputs, seed_preds)
            mutate_right_score = mutate_right_eval['score']
            mutate_left_inputs = (inputs - mutation_batch).clamp(-2.6, 2.6)
            mutate_left_eval = evaluate_inputs(model, mutate_left_inputs, seed_outputs, seed_preds)
            mutate_left_score = mutate_left_eval['score']

            if mutate_right_score <= prev_score and mutate_left_score <= prev_score:
                pass
            elif mutate_right_score > mutate_left_score:
#                 print(f'mutate right: {prev_score}->{mutate_right_score}')
                inputs = mutate_right_inputs
                evaluation = mutate_right_eval
            else:
#                 print(f'mutate left: {prev_score}->{mutate_left_score}')
                inputs = mutate_left_inputs
                evaluation = mutate_left_eval
            if i % log_every == 0:
                print(f'{i:4d}-th evaluation: {evaluation["eval_line"]}')
            if i % save_every == 0:
                saved_inputs[i] = copy.copy(inputs.cpu().numpy())
        return inputs, saved_inputs

model = models_dict['pretrain(resnet18,ImageNet)-transfer(Flower102,0.1)-prune(0.2)-']
seed_inputs = model.get_seed_inputs(100, rand=False)
seed_inputs = torch.from_numpy(seed_inputs).to(DEVICE)
seed_outputs = model.batch_forward(seed_inputs)
_, seed_preds = seed_outputs.data.max(1)

adv_inputs, _ = optimize_towards_goal(model.torch_model_on_device, seed_inputs, seed_outputs, seed_preds, log_every=100)
adv_outputs = model.batch_forward(adv_inputs).cpu()
_, adv_preds = adv_outputs.data.max(1)

print(f"seed_preds={seed_preds}, adv_preds={adv_preds}")

In [None]:
from scipy import spatial
from utils import lazy_property, Utils
import os


def compute_ddv(model, normal_outputs, adv_outputs):
    output_pairs = zip(normal_outputs, adv_outputs)
    # print(list(output_pairs)[0])
    ddv = []  # DDV is short for decision distance vector
    for i, (ya, yb) in enumerate(output_pairs):
        dist = spatial.distance.cosine(ya, yb)
        ddv.append(dist)
    ddv = Utils.normalize(np.array(ddv))
    return ddv


for i, model in enumerate(models):
    if not model.torch_model_exists():
        continue
    model_name = model.__str__()
    model_path = model.torch_model_path
    if i < 6: # skip pretrained models
        continue
    if 'quantize' in model_name: # skip quantized models
        continue
    print(f'{i}\t generating inputs for {model.__str__()}')
    seed_inputs = model.get_seed_inputs(100, rand=False)
    seed_inputs = torch.from_numpy(seed_inputs).to(DEVICE)
    seed_outputs = model.batch_forward(seed_inputs)
    _, seed_preds = seed_outputs.data.max(1)

    adv_inputs, saved_inputs = optimize_towards_goal(
        model.torch_model_on_device, seed_inputs, seed_outputs, seed_preds,
        epsilon=0.2, lambda1=0.5, max_iters=10000, log_every=100, save_every=1000)
    adv_outputs = model.batch_forward(adv_inputs).cpu()
    _, adv_preds = adv_outputs.data.max(1)
    
    ddv = compute_ddv(model, seed_outputs.cpu().numpy(), adv_outputs.cpu().numpy())
    print(ddv)
    
    out_path = os.path.join(model_path, 'inputs.npz')
    np.savez(out_path, seed_inputs=seed_inputs.cpu().numpy(), adv_inputs=adv_inputs.cpu().numpy(), ddv=ddv, saved_inputs=saved_inputs)
    # break

In [None]:
import torch
import torchvision
import torch.nn.functional as F
import numpy as np
import logging
import os
import random
import sys
import copy
from scipy import spatial
from utils import lazy_property, Utils
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(name)-12s %(levelname)-8s %(message)s")


from benchmark import ImageBenchmark
bench = ImageBenchmark()
models = list(bench.list_models())
models_dict = {}
for i, model in enumerate(models):
    if not model.torch_model_exists():
        continue
#     print(f'{i}\t {model.__str__()}')
    models_dict[model.__str__()] = model

    
def get_comparable_models(target_model):
    target_model_name = target_model.__str__()
    target_model_segs = target_model_name.split('-')
    parent_model_name = '-'.join(target_model_segs[:-2]) + '-'
    parent_model = models_dict[parent_model_name]
    # print(f'parent_model: {parent_model}')
    reference_models = []
    for model in models:
        if not model.__str__().startswith(target_model_segs[0]):
            reference_models.append(model)
            # print(f'reference_model: {model}')
    return parent_model, reference_models


def compute_ddv(model, normal_inputs, adv_inputs):
    normal_outputs = model.batch_forward(normal_inputs).cpu().numpy()
    adv_outputs = model.batch_forward(adv_inputs).cpu().numpy()
    output_pairs = zip(normal_outputs, adv_outputs)
    # print(list(output_pairs)[0])
    ddv = []  # DDV is short for decision distance vector
    for i, (ya, yb) in enumerate(output_pairs):
        dist = spatial.distance.cosine(ya, yb)
        ddv.append(dist)
    ddv = Utils.normalize(np.array(ddv))
    return ddv


class bcolors:
    HEADER = '\033[95m'
    OKBLUE = '\033[94m'
    OKGREEN = '\033[92m'
    WARNING = '\033[93m'
    FAIL = '\033[91m'
    ENDC = '\033[0m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'
    # Background colors:
    GREYBG = '\033[100m'
    REDBG = '\033[101m'
    GREENBG = '\033[102m'
    YELLOWBG = '\033[103m'
    BLUEBG = '\033[104m'
    PINKBG = '\033[105m'
    CYANBG = '\033[106m'

    
DEVICE = 'cuda'
image_size = 224


def expand_vector(x, image_size):
    batch_size = x.size(0)
    x = x.view(-1, 3, image_size, image_size)
    z = torch.zeros(batch_size, 3, image_size, image_size).to(DEVICE)
    z[:, :, :image_size, :image_size] = x
    return z


def evaluate_inputs_1(model, inputs, seed_outputs, seed_preds, prev_score_arr, lambda1=0.5):
    if prev_score_arr is None:
        prev_score_arr = torch.ones(len(inputs)).to(DEVICE) * -1000
    outputs = model(inputs)
    _, preds = outputs.data.max(1)
    changed = preds.ne(seed_preds)
#     outputs = F.softmax(outputs, -1)
#     seed_outputs = F.softmax(seed_outputs, -1)
#     print(target_outputs)
    reduce_dims = tuple(range(outputs.dim())[1:])
    
    output_mean = seed_outputs.mean(axis=0)
    target_outputs = output_mean - seed_outputs
#     print(seed_outputs)
#     print(output_mean)
#     print(target_outputs)
    
    target_dist_arr = -torch.sum((outputs - target_outputs) ** 2, dim=reduce_dims) ** 0.5
    target_dist = target_dist_arr.mean()
    
    divergence_arr = torch.sum((outputs - seed_outputs) ** 2, dim=reduce_dims) ** 0.5
    divergence = torch.mean(divergence_arr)
    
    seed_var_arr = torch.sum((seed_outputs - output_mean) ** 2, dim=reduce_dims) ** 0.5
    seed_var = torch.mean(seed_var_arr)
    
    output_var_arr = torch.sum((outputs - output_mean) ** 2, dim=reduce_dims) ** 0.5
    output_var = output_var_arr.mean()
    
#     print(seed_var_arr, output_var_arr.shape, sep='\n')
# #     divergence_cos = F.cosine_similarity(outputs, seed_outputs)
# #     divergence_kld = F.kl_div(seed_outputs, outputs, reduction='none')
# #     print(divergence_kld)
#     diversity_matrix = torch.cdist(outputs, outputs, p=2.0)
#     diversity = torch.mean(diversity_matrix)
#     quantile = lambda t, q: t.view(-1).kthvalue(1 + round(float(q) * (t.numel() - 1))).values.item()
#     diversity_quantile = quantile(diversity_matrix, 0.011)
# #     diversity = diversity ** 2
#     score = divergence + lambda1 * diversity
    new_score_arr = divergence_arr
    new_score_improved = new_score_arr.gt(prev_score_arr)
    var_in_limit = seed_var_arr.ge(output_var_arr)
#     improved = new_score_improved & var_in_limit
    improved = new_score_improved
    # print(f' new_score_improved={new_score_improved}\n var_in_limit={var_in_limit}\n improved={improved}')
    improved_indices = torch.nonzero(improved).cpu().numpy().squeeze(-1).tolist()
    # print(score_improved, var_in_limit, improved_indices, score_arr, sep='\n')
    
#     succ = preds.ne(seed_preds)
#     low_divergence_indices = list(torch.nonzero(divergence_arr.lt(divergence)).cpu().numpy())
#     low_target_dist_indices = list(torch.nonzero(target_dist_arr.lt(target_dist)).cpu().numpy())
# #     low_diversity_indices = list(torch.nonzero(diversity_matrix.lt(diversity_quantile)).cpu().numpy())
#     remaining_indices = set()
# #     print(changed)
#     for i in low_target_dist_indices:
#         remaining_indices.add(i[0])
# #     for i in low_diversity_indices:
# #         if i[0] == i[1]:
# #             continue
# #         remaining_indices.add(i[0])
# #         remaining_indices.add(i[1])
#     remaining_indices = sorted(remaining_indices)
    remaining_indices = list(range(len(inputs)))
#     print(f' low_divergence_indices={len(low_divergence_indices)}\n low_diversity_indices={len(low_diversity_indices)}\n remaining_indices={len(remaining_indices)}')
    score_arr = copy.copy(prev_score_arr)
    score_arr[improved_indices] = new_score_arr[improved_indices]
    
    score = score_arr.mean()
    eval_line = f'score={score:.4f}, num_remain={len(remaining_indices)}, num_improved={len(improved_indices)}'
    return {
        'outputs': outputs,
        'preds': preds,
        'score_arr': score_arr,
        'score': score,
        'remaining': remaining_indices,
        'improved_indices': improved_indices,
        'eval_line': eval_line
    }


def evaluate_inputs_2(model, inputs, seed_outputs, seed_preds, prev_score_arr, lambda1=0.5):
    outputs = model(inputs)
    _, preds = outputs.data.max(1)
    changed = preds.ne(seed_preds)
    outputs = torch.nn.functional.softmax(outputs, -1)
    seed_outputs = torch.nn.functional.softmax(seed_outputs, -1)
    reduce_dims = tuple(range(outputs.dim())[1:])
    divergence_arr = torch.sum((outputs - seed_outputs) ** 2, dim=reduce_dims) ** 0.5
    divergence = torch.mean(divergence_arr)
#     divergence_cos = F.cosine_similarity(outputs, seed_outputs)
#     divergence_kld = F.kl_div(seed_outputs, outputs, reduction='none')
#     print(divergence_kld)
    diversity_matrix = torch.cdist(outputs, outputs, p=2.0)
    diversity = torch.mean(diversity_matrix)
    quantile = lambda t, q: t.view(-1).kthvalue(1 + round(float(q) * (t.numel() - 1))).values.item()
    diversity_quantile = quantile(diversity_matrix, 0.011)
#     diversity = diversity ** 2
    score = divergence + lambda1 * diversity
    succ = preds.ne(seed_preds)
    low_divergence_indices = list(torch.nonzero(divergence_arr.lt(divergence)).cpu().numpy())
    low_diversity_indices = list(torch.nonzero(diversity_matrix.lt(diversity_quantile)).cpu().numpy())
    remaining_indices = set()
#     print(changed)
    for i in low_divergence_indices:
        if not changed[i].cpu():
            remaining_indices.add(i[0])
    for i in low_diversity_indices:
        if i[0] == i[1]:
            continue
        if not changed[i[0]]:
            remaining_indices.add(i[0])
        if not changed[i[1]]:
            remaining_indices.add(i[1])
    remaining_indices = sorted(remaining_indices)
    improved_indices = list(range(len(inputs)))
#     print(f' low_divergence_indices={len(low_divergence_indices)}\n low_diversity_indices={len(low_diversity_indices)}\n remaining_indices={len(remaining_indices)}')
    eval_line = f'score={score:.4f}, divergence={divergence:.4f}, diversity={diversity:.4f}, num_succ={succ.sum()}, num_remain={len(remaining_indices)}'
    return {
        'outputs': outputs,
        'preds': preds,
        'score_arr': divergence_arr,
        'score': score,
        'divergence': divergence,
        'diversity': diversity,
        'succ': succ,
        'remaining': remaining_indices,
        'improved_indices': improved_indices,
        'eval_line': eval_line
    }
    
    
def optimize_towards_goal(
    model, seed_inputs, seed_outputs, seed_preds, evaluate_inputs,
    max_iters=10000, mutation_size=1, epsilon=0.5, lambda1=0.0,
    log_every=100, save_every=1000):
#     seed_inputs = torch.from_numpy(seed_inputs).to(DEVICE)
#     seed_outputs = torch.from_numpy(seed_outputs).to(DEVICE)
#     seed_preds = torch.from_numpy(seed_preds).to(DEVICE)
    input_shape = seed_inputs[0].shape
    n_inputs = seed_inputs.shape[0]
    ndims = np.prod(input_shape)

    with torch.no_grad():
        inputs = copy.deepcopy(seed_inputs)
        saved_inputs = {}
        evaluation = evaluate_inputs(model, inputs, seed_outputs, seed_preds, None, lambda1)
        print(f'initial_evaluation: {evaluation["eval_line"]}')
        
        remaining_pos = list(range(ndims))

        for i in range(max_iters):
#             print(f'mutation {i}-th iteration')
            # mutation_pos = np.random.randint(0, ndims)
            if len(remaining_pos) < mutation_size:
                print(f'not enough remaining pos')
                break
            mutation_pos = np.random.choice(remaining_pos, size=mutation_size, replace=False)
            mutation = np.zeros(ndims).astype(np.float32)
            mutation[mutation_pos] = epsilon
            mutation = np.reshape(mutation, input_shape)

            mutation_batch = np.zeros(shape=inputs.shape).astype(np.float32)
            mutation_indices = evaluation['remaining']
            if len(mutation_indices) == 0:
                print(f'no remaining indice')
                break
            mutation_batch[mutation_indices] = mutation
            mutation_batch = torch.from_numpy(mutation_batch).to(DEVICE)

            prev_score = evaluation["score"]
            mutate_right_inputs = (inputs + mutation_batch).clamp(-2.6, 2.6)
            mutate_right_eval = evaluate_inputs(model, mutate_right_inputs, seed_outputs, seed_preds, evaluation["score_arr"])
            mutate_right_score = mutate_right_eval['score']
            mutate_left_inputs = (inputs - mutation_batch).clamp(-2.6, 2.6)
            mutate_left_eval = evaluate_inputs(model, mutate_left_inputs, seed_outputs, seed_preds, evaluation["score_arr"])
            mutate_left_score = mutate_left_eval['score']

            if mutate_right_score <= prev_score and mutate_left_score <= prev_score:
                pass
            elif mutate_right_score > mutate_left_score:
#                 print(f'mutate right: {prev_score}->{mutate_right_score}')
                improved_indices = mutate_right_eval['improved_indices']
                inputs[improved_indices] = mutate_right_inputs[improved_indices]
                evaluation = mutate_right_eval
                remaining_pos = sorted(set(remaining_pos) - set(mutation_pos))
            else:
#                 print(f'mutate left: {prev_score}->{mutate_left_score}')
                improved_indices = mutate_left_eval['improved_indices']
                inputs[improved_indices] = mutate_left_inputs[improved_indices]
                evaluation = mutate_left_eval
                remaining_pos = sorted(set(remaining_pos) - set(mutation_pos))
            if i % log_every == 0:
                print(f'{i:4d}-th evaluation: {evaluation["eval_line"]}')
            if i % save_every == 0:
                saved_inputs[i] = copy.copy(inputs.cpu().numpy())
        return inputs, saved_inputs

    
def compute_ddv_with_outputs(model, normal_outputs, adv_outputs):
    output_pairs = zip(normal_outputs, adv_outputs)
    # print(list(output_pairs)[0])
    ddv = []  # DDV is short for decision distance vector
    for i, (ya, yb) in enumerate(output_pairs):
        dist = spatial.distance.cosine(ya, yb)
        ddv.append(dist)
    ddv = Utils.normalize(np.array(ddv))
    return ddv


for i, model in enumerate(models):
    if not model.torch_model_exists():
        continue
    model_name = model.__str__()
    model_path = model.torch_model_path
    if i < 6: # skip pretrained models
        continue
    if 'quantize' in model_name: # skip quantized models
        continue
        
#     if i != 8:
#         continue
    out_path = os.path.join(model_path, 'blackbox_inputs_diversity0.5_m1e0.2.npz')
#     if os.path.exists(out_path):
#         continue
    
    print(f'{i}\t generating inputs for {model.__str__()}')
    seed_inputs = model.get_seed_inputs(100, rand=False)
    seed_inputs = torch.from_numpy(seed_inputs).to(DEVICE)
    model.torch_model.to(DEVICE)
    seed_outputs = model.batch_forward(seed_inputs)
    _, seed_preds = seed_outputs.data.max(1)

    adv_inputs, saved_inputs = optimize_towards_goal(
        model.torch_model_on_device, seed_inputs, seed_outputs, seed_preds, evaluate_inputs=evaluate_inputs_2,
        mutation_size=1, epsilon=0.1, lambda1=0.5, max_iters=1000, log_every=100, save_every=100)
    adv_outputs = model.batch_forward(adv_inputs).cpu()
    _, adv_preds = adv_outputs.data.max(1)
    
    seed_inputs=seed_inputs.cpu().numpy()
    adv_inputs=adv_inputs.cpu().numpy()
    
    ddv = compute_ddv(model, seed_inputs, adv_inputs)
    
    print(f'evaluating inputs')
    parent_model, ref_models = get_comparable_models(model)
    parent_sim = 0
    for i, ref_model in enumerate([parent_model] + ref_models):
        if 'quantize' in ref_model.__str__(): # quantized models are equivalent to its teacher model
            continue
        try:
            ref_model.torch_model.to(DEVICE)
            ref_ddv = compute_ddv(ref_model, seed_inputs, adv_inputs)
            ref_sim = spatial.distance.cosine(ddv, ref_ddv)
            ref_model.torch_model.cpu()
            if i == 0:
                parent_sim = ref_sim
                gap = 1
                print(f'parent_sim: {ref_sim:.4f} {ref_model}')
            else:
                gap = ref_sim - parent_sim
                if gap > 0:
                    print(f'ref_sim: {ref_sim:.4f} gap={gap:.4f} {ref_model}')
                else:
                    print(f'{bcolors.WARNING}[ERROR] ref_sim: {ref_sim:.4f} gap={gap:.4f} {ref_model}{bcolors.ENDC}')
        except Exception as e:
            print(f'failed to compare: {ref_model}')
            print(f'exception: {e}')
    
    np.savez(out_path, seed_inputs=seed_inputs, adv_inputs=adv_inputs, ddv=ddv, saved_inputs=saved_inputs)
    # break

In [None]:
from utils import Utils
from matplotlib import pyplot as plt

def show_images(images, labels, title='examples'):
    images = np.transpose(images, (0, 2, 3, 1))
    plt.figure(figsize=(10,10))
    plt.subplots_adjust(hspace=0.2)
    for n in range(25):
        plt.subplot(5,5,n+1)
        img = images[n]
        img = img.squeeze()
        plt.imshow(img)
        plt.title(f'{labels[n]}')
        plt.axis('off')
    _ = plt.suptitle(title)
    plt.show()

# show_images(seed_inputs, list(range(seed_inputs.shape[0])))
# print(seed_inputs[0])
# batch_outputs1 = model1.batch_forward(seed_inputs)
# batch_preds1 = batch_outputs1.to('cpu').data.max(1)
# print(batch_preds1)

m = models[2]
mm = m.torch_model
test_loader = m.benchmark.get_dataloader(m.dataset_id, split='test')

# with torch.no_grad():
#     mm.eval()
#     total = 0
#     top1 = 0
#     for i, (batch, label) in enumerate(test_loader):
#         batch, label = batch.to(DEVICE), label.to(DEVICE)
#         total += batch.size(0)
#         out = mm(batch)
#         _, pred = out.max(dim=1)
#         top1 += int(pred.eq(label).sum().item())

# acc = float(top1) / total * 100
# print(top1, total, acc)

with torch.no_grad():
    for i, (images, labels) in enumerate(test_loader):
        if i != 10:
            continue
        images, labels = images.cuda(), labels.cuda()
        outputs = mm(images)
        _, preds = outputs.max(dim=1)
        print(outputs[0])
        labels = [f'{label}-{preds[i]}' for i,label in enumerate(list(labels))]
        show_images(images.to('cpu').numpy(), labels)
        break

In [None]:
from model.fe_mobilenet import mbnetv2_dropout
import os

torch_model = mbnetv2_dropout(
            pretrained=False,
            num_classes=67
        )
ckpt = torch.load(os.path.join('models', 'train(mbnetv2,MIT67)-', 'ckpt.pth'))
torch_model.load_state_dict(ckpt['state_dict'])
mm = torch_model