# The notebook contains
### Code for _Bulyan_ aggregation algorithm
### Evaluation of all of the attacks (Fang, LIE, and our SOTA AGR-tailored and AGR-agnstic) on Bulyan

In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

In [1]:
from __future__ import print_function
import argparse, os, sys, csv, shutil, time, random, operator, pickle, ast, math
import numpy as np
import pandas as pd
from torch.optim import Optimizer
import torch.nn.functional as F
import torch
import pickle
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data as data
import torch.multiprocessing as mp

sys.path.insert(0,'./../utils/')
from logger import *
from eval import *
from misc import *

from cifar10_normal_train import *
from cifar10_util import *
from adam import Adam
from sgd import SGD

## Get cifar10 data and split it in IID fashion

In [4]:
import torchvision.transforms as transforms
import torchvision.datasets as datasets
data_loc='/mnt/nfs/work1/amir/vshejwalkar/cifar10_data/'
# load the train dataset

train_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

cifar10_train = datasets.CIFAR10(root=data_loc, train=True, download=True, transform=train_transform)

cifar10_test = datasets.CIFAR10(root=data_loc, train=False, download=True, transform=train_transform)

X=[]
Y=[]
for i in range(len(cifar10_train)):
    X.append(cifar10_train[i][0].numpy())
    Y.append(cifar10_train[i][1])

for i in range(len(cifar10_test)):
    X.append(cifar10_test[i][0].numpy())
    Y.append(cifar10_test[i][1])

X=np.array(X)
Y=np.array(Y)

print('total data len: ',len(X))

if not os.path.isfile('./cifar10_shuffle.pkl'):
    all_indices = np.arange(len(X))
    np.random.shuffle(all_indices)
    pickle.dump(all_indices,open('./cifar10_shuffle.pkl','wb'))
else:
    all_indices=pickle.load(open('./cifar10_shuffle.pkl','rb'))

X=X[all_indices]
Y=Y[all_indices]

Files already downloaded and verified
Files already downloaded and verified


## Divide cifar10 data among 50 clients in IID fashion

In [6]:
# data loading

nusers=50
user_tr_len=1000

total_tr_len=user_tr_len*nusers
val_len=5000
te_len=5000

print('total data len: ',len(X))

if not os.path.isfile('./cifar10_shuffle.pkl'):
    all_indices = np.arange(len(X))
    np.random.shuffle(all_indices)
    pickle.dump(all_indices,open('./cifar10_shuffle.pkl','wb'))
else:
    all_indices=pickle.load(open('./cifar10_shuffle.pkl','rb'))

total_tr_data=X[:total_tr_len]
total_tr_label=Y[:total_tr_len]

val_data=X[total_tr_len:(total_tr_len+val_len)]
val_label=Y[total_tr_len:(total_tr_len+val_len)]

te_data=X[(total_tr_len+val_len):(total_tr_len+val_len+te_len)]
te_label=Y[(total_tr_len+val_len):(total_tr_len+val_len+te_len)]

total_tr_data_tensor=torch.from_numpy(total_tr_data).type(torch.FloatTensor)
total_tr_label_tensor=torch.from_numpy(total_tr_label).type(torch.LongTensor)

val_data_tensor=torch.from_numpy(val_data).type(torch.FloatTensor)
val_label_tensor=torch.from_numpy(val_label).type(torch.LongTensor)

te_data_tensor=torch.from_numpy(te_data).type(torch.FloatTensor)
te_label_tensor=torch.from_numpy(te_label).type(torch.LongTensor)

print('total tr len %d | val len %d | test len %d'%(len(total_tr_data_tensor),len(val_data_tensor),len(te_data_tensor)))

#==============================================================================================================

user_tr_data_tensors=[]
user_tr_label_tensors=[]

for i in range(nusers):
    
    user_tr_data_tensor=torch.from_numpy(total_tr_data[user_tr_len*i:user_tr_len*(i+1)]).type(torch.FloatTensor)
    user_tr_label_tensor=torch.from_numpy(total_tr_label[user_tr_len*i:user_tr_len*(i+1)]).type(torch.LongTensor)

    user_tr_data_tensors.append(user_tr_data_tensor)
    user_tr_label_tensors.append(user_tr_label_tensor)
    print('user %d tr len %d'%(i,len(user_tr_data_tensor)))

total data len:  60000
total tr len 50000 | val len 5000 | test len 5000
user 0 tr len 1000
user 1 tr len 1000
user 2 tr len 1000
user 3 tr len 1000
user 4 tr len 1000
user 5 tr len 1000
user 6 tr len 1000
user 7 tr len 1000
user 8 tr len 1000
user 9 tr len 1000
user 10 tr len 1000
user 11 tr len 1000
user 12 tr len 1000
user 13 tr len 1000
user 14 tr len 1000
user 15 tr len 1000
user 16 tr len 1000
user 17 tr len 1000
user 18 tr len 1000
user 19 tr len 1000
user 20 tr len 1000
user 21 tr len 1000
user 22 tr len 1000
user 23 tr len 1000
user 24 tr len 1000
user 25 tr len 1000
user 26 tr len 1000
user 27 tr len 1000
user 28 tr len 1000
user 29 tr len 1000
user 30 tr len 1000
user 31 tr len 1000
user 32 tr len 1000
user 33 tr len 1000
user 34 tr len 1000
user 35 tr len 1000
user 36 tr len 1000
user 37 tr len 1000
user 38 tr len 1000
user 39 tr len 1000
user 40 tr len 1000
user 41 tr len 1000
user 42 tr len 1000
user 43 tr len 1000
user 44 tr len 1000
user 45 tr len 1000
user 46 tr len 10

## Code for Bulyan aggregation algorithm

In [None]:
def bulyan(all_updates, n_attackers):
    nusers = all_updates.shape[0]
    bulyan_cluster = []
    candidate_indices = []
    remaining_updates = all_updates
    all_indices = np.arange(len(all_updates))

    while len(bulyan_cluster) < (nusers - 2 * n_attackers):
        torch.cuda.empty_cache()
        distances = []
        for update in remaining_updates:
            distance = []
            for update_ in remaining_updates:
                distance.append(torch.norm((update - update_)) ** 2)
            distance = torch.Tensor(distance).float()
            distances = distance[None, :] if not len(distances) else torch.cat((distances, distance[None, :]), 0)
        # print(distances)

        distances = torch.sort(distances, dim=1)[0]

        scores = torch.sum(distances[:, :len(remaining_updates) - 2 - n_attackers], dim=1)
        indices = torch.argsort(scores)[:len(remaining_updates) - 2 - n_attackers]
        if not len(indices):
            break
        candidate_indices.append(all_indices[indices[0].cpu().numpy()])
        all_indices = np.delete(all_indices, indices[0].cpu().numpy())
        bulyan_cluster = remaining_updates[indices[0]][None, :] if not len(bulyan_cluster) else torch.cat((bulyan_cluster, remaining_updates[indices[0]][None, :]), 0)
        remaining_updates = torch.cat((remaining_updates[:indices[0]], remaining_updates[indices[0] + 1:]), 0)

    # print('dim of bulyan cluster ', bulyan_cluster.shape)

    n, d = bulyan_cluster.shape
    param_med = torch.median(bulyan_cluster, dim=0)[0]
    sort_idx = torch.argsort(torch.abs(bulyan_cluster - param_med), dim=0)
    sorted_params = bulyan_cluster[sort_idx, torch.arange(d)[None, :]]

    return torch.mean(sorted_params[:n - 2 * n_attackers], dim=0), np.array(candidate_indices)

## Code for fang attack on Bulyan
### Note that Fang attacks on Multi-krum and Bulyan are the same

In [7]:
def compute_lambda_fang(all_updates, model_re, n_attackers):

    distances = []
    n_benign, d = all_updates.shape
    for update in all_updates:
        distance = torch.norm((all_updates - update), dim=1)
        distances = distance[None, :] if not len(distances) else torch.cat((distances, distance[None, :]), 0)

    distances[distances == 0] = 10000
    distances = torch.sort(distances, dim=1)[0]
    scores = torch.sum(distances[:, :n_benign - 2 - n_attackers], dim=1)
    min_score = torch.min(scores)
    term_1 = min_score / ((n_benign - n_attackers - 1) * torch.sqrt(torch.Tensor([d]))[0])
    max_wre_dist = torch.max(torch.norm((all_updates - model_re), dim=1)) / (torch.sqrt(torch.Tensor([d]))[0])

    return (term_1 + max_wre_dist)


def get_malicious_updates_fang(all_updates, model_re, deviation, n_attackers):

    lamda = compute_lambda_fang(all_updates, model_re, n_attackers)
    threshold = 1e-5

    mal_updates = []
    while lamda > threshold:
        mal_update = (- lamda * deviation)

        mal_updates = torch.stack([mal_update] * n_attackers)
        mal_updates = torch.cat((mal_updates, all_updates), 0)

        agg_grads, krum_candidate = multi_krum(mal_updates, n_attackers, multi_k=False)
        
        if krum_candidate < n_attackers:
            return mal_updates
        
        lamda *= 0.5

    if not len(mal_updates):
        print(lamda, threshold)
        mal_update = (model_re - lamda * deviation)
        
        mal_updates = torch.stack([mal_update] * n_attackers)
        mal_updates = torch.cat((mal_updates, all_updates), 0)

    return mal_updates

## Execute Fang attack on Bulyan

In [8]:
batch_size=250
resume=0
nepochs=1200
schedule=[1000]
nbatches = user_tr_len//batch_size

gamma=.5
opt = 'sgd'
fed_lr=0.5
criterion=nn.CrossEntropyLoss()
use_cuda = torch.cuda.is_available()

aggregation='bulyan'
multi_k = False
candidates = []

at_type='fang'
z=0
n_attackers=[10]

arch='alexnet'
chkpt='./'+aggregation

for n_attacker in n_attackers:
    epoch_num = 0
    best_global_acc = 0
    best_global_te_acc = 0

    fed_file='%s_checkpoint_%s_%d.pth.tar'%(arch, at_type, n_attacker)
    fed_best_file='%s_best_%s_%d.pth.tar'%(arch, at_type, n_attacker)
    
    if resume:
        fed_checkpoint = chkpt+'/'+fed_file
        assert os.path.isfile(fed_checkpoint), 'Error: no user checkpoint at %s'%(fed_checkpoint)
        checkpoint = torch.load(fed_checkpoint, map_location='cuda:%d'%torch.cuda.current_device())
        fed_model.load_state_dict(checkpoint['state_dict'])
        optimizer_fed.load_state_dict(checkpoint['optimizer'])
        resume = 0
        best_global_acc=checkpoint['best_acc']
        best_global_te_acc=checkpoint['best_te_acc']
        val_loss, val_acc = test(val_data_tensor,val_label_tensor,fed_model,criterion,use_cuda)
        epoch_num += checkpoint['epoch']
        print('resuming from epoch %d | val acc %.4f | best acc %.3f | best te acc %.3f'%(epoch_num, val_acc, best_global_acc, best_global_te_acc))

    torch.cuda.empty_cache()
    r=np.arange(user_tr_len)

    fed_model, _ = return_model(arch, 0.1, 0.9, parallel=False)
    optimizer_fed = SGD(fed_model.parameters(), lr=fed_lr)

    while epoch_num <= nepochs:
        user_grads=[]
        if not epoch_num and epoch_num%nbatches == 0:
            np.random.shuffle(r)
            for i in range(nusers):
                user_tr_data_tensors[i]=user_tr_data_tensors[i][r]
                user_tr_label_tensors[i]=user_tr_label_tensors[i][r]

        for i in range(n_attacker, nusers):

            inputs = user_tr_data_tensors[i][(epoch_num%nbatches)*batch_size:((epoch_num%nbatches) + 1) * batch_size]
            targets = user_tr_label_tensors[i][(epoch_num%nbatches)*batch_size:((epoch_num%nbatches) + 1) * batch_size]

            inputs, targets = inputs.cuda(), targets.cuda()
            inputs, targets = torch.autograd.Variable(inputs), torch.autograd.Variable(targets)

            outputs = fed_model(inputs)
            loss = criterion(outputs, targets)
            fed_model.zero_grad()
            loss.backward(retain_graph=True)

            param_grad=[]
            for param in fed_model.parameters():
                param_grad=param.grad.data.view(-1) if not len(param_grad) else torch.cat((param_grad,param.grad.view(-1)))

            user_grads=param_grad[None, :] if len(user_grads)==0 else torch.cat((user_grads,param_grad[None,:]), 0)

        malicious_grads = user_grads

        if epoch_num in schedule:
            for param_group in optimizer_fed.param_groups:
                param_group['lr'] *= gamma
                print('New learnin rate ', param_group['lr'])

        if n_attacker > 0:
            if at_type == 'paf':
                malicious_grads=get_malicious_predictions_poison_all_far_sign(malicious_grads,nusers,n_attacker)
            elif at_type == 'lie':
                malicious_grads = get_malicious_updates_lie(malicious_grads, n_attacker, z, epoch_num)
            elif at_type == 'fang':
                agg_grads = torch.mean(malicious_grads, 0)
                deviation = torch.sign(agg_grads)
                malicious_grads = get_malicious_updates_fang(malicious_grads, agg_grads, deviation, n_attacker)
            elif at_type == 'our':
                agg_grads = torch.mean(malicious_grads, 0)
                malicious_grads = our_attack_krum(malicious_grads, agg_grads, n_attacker, compression, q_level, norm)

        if not epoch_num:
            print(malicious_grads.shape)
            
        if aggregation=='median':
            agg_grads=torch.median(malicious_grads,dim=0)[0]

        elif aggregation=='average':
            agg_grads=torch.mean(malicious_grads,dim=0)

        elif aggregation=='krum' or aggregation=='mkrum':

            multi_k = True if aggregation == 'mkrum' else False
            if epoch_num == 0:
                print('multi krum is ', multi_k)

            agg_grads, krum_candidate = multi_krum(malicious_grads, n_attacker, multi_k=multi_k)

        elif aggregation=='bulyan':
            agg_grads, krum_candidate=bulyan(malicious_grads, n_attacker)


        del user_grads

        start_idx=0

        optimizer_fed.zero_grad()

        model_grads=[]

        for i, param in enumerate(fed_model.parameters()):
            param_=agg_grads[start_idx:start_idx+len(param.data.view(-1))].reshape(param.data.shape)
            start_idx=start_idx+len(param.data.view(-1))
            param_=param_.cuda()
            model_grads.append(param_)

        optimizer_fed.step(model_grads)

        val_loss, val_acc = test(val_data_tensor,val_label_tensor,fed_model,criterion,use_cuda)
        te_loss, te_acc = test(te_data_tensor,te_label_tensor, fed_model, criterion, use_cuda)

        is_best = best_global_acc < val_acc

        best_global_acc = max(best_global_acc, val_acc)

        if is_best:
            best_global_te_acc = te_acc

        if epoch_num%1==0 or epoch_num==nepochs-1:
            print('%s: at %s n_at %d n_mal_sel %d e %d val loss %.4f val acc %.4f best val_acc %f te_acc %f'%(aggregation, at_type, n_attacker, np.sum(krum_candidate < n_attacker), epoch_num, val_loss, val_acc, best_global_acc,best_global_te_acc))

        epoch_num+=1

torch.Size([50, 2472266])


	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, *, Number alpha) (Triggered internally at  /opt/conda/conda-bld/pytorch_1603729138878/work/torch/csrc/utils/python_arg_parser.cpp:882.)
  p.data.add_(-group['lr'], d_p)


bulyan: at fang n_at 10 n_mal_sel 10 e 0 val loss 2.3032 val acc 10.0041 best val_acc 10.004058 te_acc 9.760552
bulyan: at fang n_at 10 n_mal_sel 10 e 1 val loss 2.3029 val acc 9.9635 best val_acc 10.004058 te_acc 9.760552
bulyan: at fang n_at 10 n_mal_sel 10 e 2 val loss 2.3027 val acc 9.9635 best val_acc 10.004058 te_acc 9.760552
bulyan: at fang n_at 10 n_mal_sel 9 e 3 val loss 2.3023 val acc 10.0041 best val_acc 10.004058 te_acc 9.760552
bulyan: at fang n_at 10 n_mal_sel 10 e 4 val loss 2.3021 val acc 10.0244 best val_acc 10.024351 te_acc 9.882305
bulyan: at fang n_at 10 n_mal_sel 10 e 5 val loss 2.3018 val acc 10.0041 best val_acc 10.024351 te_acc 9.882305
bulyan: at fang n_at 10 n_mal_sel 10 e 6 val loss 2.3015 val acc 10.0446 best val_acc 10.044643 te_acc 9.902597
bulyan: at fang n_at 10 n_mal_sel 9 e 7 val loss 2.3012 val acc 10.2273 best val_acc 10.227273 te_acc 10.004058
bulyan: at fang n_at 10 n_mal_sel 10 e 8 val loss 2.3009 val acc 10.3084 best val_acc 10.308442 te_acc 10.0

bulyan: at fang n_at 10 n_mal_sel 10 e 73 val loss 2.1570 val acc 20.1502 best val_acc 20.616883 te_acc 20.921266
bulyan: at fang n_at 10 n_mal_sel 10 e 74 val loss 2.1550 val acc 20.2516 best val_acc 20.616883 te_acc 20.921266
bulyan: at fang n_at 10 n_mal_sel 10 e 75 val loss 2.1530 val acc 20.5763 best val_acc 20.616883 te_acc 20.921266
bulyan: at fang n_at 10 n_mal_sel 10 e 76 val loss 2.1507 val acc 20.6169 best val_acc 20.616883 te_acc 20.921266
bulyan: at fang n_at 10 n_mal_sel 10 e 77 val loss 2.1470 val acc 20.8198 best val_acc 20.819805 te_acc 19.602273
bulyan: at fang n_at 10 n_mal_sel 10 e 78 val loss 2.1446 val acc 20.9213 best val_acc 20.921266 te_acc 19.521104
bulyan: at fang n_at 10 n_mal_sel 10 e 79 val loss 2.1418 val acc 21.3880 best val_acc 21.387987 te_acc 19.987825
bulyan: at fang n_at 10 n_mal_sel 10 e 80 val loss 2.1385 val acc 21.3068 best val_acc 21.387987 te_acc 19.987825
bulyan: at fang n_at 10 n_mal_sel 10 e 81 val loss 2.1336 val acc 21.6721 best val_acc 2

bulyan: at fang n_at 10 n_mal_sel 8 e 145 val loss 2.3150 val acc 10.8563 best val_acc 23.092532 te_acc 21.976461
bulyan: at fang n_at 10 n_mal_sel 8 e 146 val loss 2.3128 val acc 10.8360 best val_acc 23.092532 te_acc 21.976461
bulyan: at fang n_at 10 n_mal_sel 7 e 147 val loss 2.3105 val acc 10.7549 best val_acc 23.092532 te_acc 21.976461
bulyan: at fang n_at 10 n_mal_sel 8 e 148 val loss 2.3079 val acc 10.6128 best val_acc 23.092532 te_acc 21.976461
bulyan: at fang n_at 10 n_mal_sel 7 e 149 val loss 2.3054 val acc 12.5203 best val_acc 23.092532 te_acc 21.976461
bulyan: at fang n_at 10 n_mal_sel 8 e 150 val loss 2.3018 val acc 12.4188 best val_acc 23.092532 te_acc 21.976461
bulyan: at fang n_at 10 n_mal_sel 7 e 151 val loss 2.2987 val acc 11.7492 best val_acc 23.092532 te_acc 21.976461
bulyan: at fang n_at 10 n_mal_sel 8 e 152 val loss 2.2946 val acc 12.8044 best val_acc 23.092532 te_acc 21.976461
bulyan: at fang n_at 10 n_mal_sel 7 e 153 val loss 2.2920 val acc 10.3490 best val_acc 2

bulyan: at fang n_at 10 n_mal_sel 10 e 217 val loss 2.2857 val acc 13.3523 best val_acc 23.092532 te_acc 21.976461
bulyan: at fang n_at 10 n_mal_sel 10 e 218 val loss 2.2845 val acc 13.5146 best val_acc 23.092532 te_acc 21.976461
bulyan: at fang n_at 10 n_mal_sel 10 e 219 val loss 2.2831 val acc 13.9205 best val_acc 23.092532 te_acc 21.976461
bulyan: at fang n_at 10 n_mal_sel 9 e 220 val loss 2.2817 val acc 14.3060 best val_acc 23.092532 te_acc 21.976461
bulyan: at fang n_at 10 n_mal_sel 10 e 221 val loss 2.2803 val acc 14.4278 best val_acc 23.092532 te_acc 21.976461
bulyan: at fang n_at 10 n_mal_sel 10 e 222 val loss 2.2787 val acc 14.4683 best val_acc 23.092532 te_acc 21.976461
bulyan: at fang n_at 10 n_mal_sel 9 e 223 val loss 2.2767 val acc 14.4278 best val_acc 23.092532 te_acc 21.976461
bulyan: at fang n_at 10 n_mal_sel 10 e 224 val loss 2.2748 val acc 13.9610 best val_acc 23.092532 te_acc 21.976461
bulyan: at fang n_at 10 n_mal_sel 10 e 225 val loss 2.2727 val acc 14.2451 best va

bulyan: at fang n_at 10 n_mal_sel 10 e 289 val loss 2.3079 val acc 9.8011 best val_acc 23.092532 te_acc 21.976461
bulyan: at fang n_at 10 n_mal_sel 9 e 290 val loss 2.3071 val acc 9.8011 best val_acc 23.092532 te_acc 21.976461
bulyan: at fang n_at 10 n_mal_sel 9 e 291 val loss 2.3062 val acc 9.8011 best val_acc 23.092532 te_acc 21.976461
bulyan: at fang n_at 10 n_mal_sel 8 e 292 val loss 2.3049 val acc 9.8011 best val_acc 23.092532 te_acc 21.976461
bulyan: at fang n_at 10 n_mal_sel 8 e 293 val loss 2.3028 val acc 9.9229 best val_acc 23.092532 te_acc 21.976461
bulyan: at fang n_at 10 n_mal_sel 8 e 294 val loss 2.3000 val acc 10.7752 best val_acc 23.092532 te_acc 21.976461
bulyan: at fang n_at 10 n_mal_sel 7 e 295 val loss 2.2957 val acc 12.7232 best val_acc 23.092532 te_acc 21.976461
bulyan: at fang n_at 10 n_mal_sel 7 e 296 val loss 2.2888 val acc 14.7727 best val_acc 23.092532 te_acc 21.976461
bulyan: at fang n_at 10 n_mal_sel 6 e 297 val loss 2.2774 val acc 16.4773 best val_acc 23.09

bulyan: at fang n_at 10 n_mal_sel 10 e 361 val loss 2.1382 val acc 19.0544 best val_acc 23.092532 te_acc 21.976461
bulyan: at fang n_at 10 n_mal_sel 10 e 362 val loss 2.1379 val acc 19.3182 best val_acc 23.092532 te_acc 21.976461
bulyan: at fang n_at 10 n_mal_sel 10 e 363 val loss 2.1382 val acc 20.1299 best val_acc 23.092532 te_acc 21.976461
bulyan: at fang n_at 10 n_mal_sel 10 e 364 val loss 2.1345 val acc 19.6023 best val_acc 23.092532 te_acc 21.976461
bulyan: at fang n_at 10 n_mal_sel 10 e 365 val loss 2.1334 val acc 19.9675 best val_acc 23.092532 te_acc 21.976461
bulyan: at fang n_at 10 n_mal_sel 10 e 366 val loss 2.1332 val acc 20.4343 best val_acc 23.092532 te_acc 21.976461
bulyan: at fang n_at 10 n_mal_sel 10 e 367 val loss 2.1336 val acc 21.2662 best val_acc 23.092532 te_acc 21.976461
bulyan: at fang n_at 10 n_mal_sel 10 e 368 val loss 2.1295 val acc 19.9878 best val_acc 23.092532 te_acc 21.976461
bulyan: at fang n_at 10 n_mal_sel 10 e 369 val loss 2.1285 val acc 20.5154 best 

bulyan: at fang n_at 10 n_mal_sel 10 e 439 val loss 2.1260 val acc 19.0747 best val_acc 23.883929 te_acc 22.909903
bulyan: at fang n_at 10 n_mal_sel 10 e 440 val loss 2.1227 val acc 19.1153 best val_acc 23.883929 te_acc 22.909903
bulyan: at fang n_at 10 n_mal_sel 10 e 441 val loss 2.1214 val acc 18.9732 best val_acc 23.883929 te_acc 22.909903
bulyan: at fang n_at 10 n_mal_sel 10 e 442 val loss 2.1218 val acc 18.9326 best val_acc 23.883929 te_acc 22.909903
bulyan: at fang n_at 10 n_mal_sel 10 e 443 val loss 2.1209 val acc 19.5211 best val_acc 23.883929 te_acc 22.909903
bulyan: at fang n_at 10 n_mal_sel 10 e 444 val loss 2.1174 val acc 19.7037 best val_acc 23.883929 te_acc 22.909903
bulyan: at fang n_at 10 n_mal_sel 10 e 445 val loss 2.1161 val acc 19.4399 best val_acc 23.883929 te_acc 22.909903
bulyan: at fang n_at 10 n_mal_sel 10 e 446 val loss 2.1165 val acc 19.1964 best val_acc 23.883929 te_acc 22.909903
bulyan: at fang n_at 10 n_mal_sel 10 e 447 val loss 2.1159 val acc 20.0081 best 

bulyan: at fang n_at 10 n_mal_sel 8 e 511 val loss 2.3326 val acc 10.2679 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 9 e 512 val loss 2.3301 val acc 10.1055 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 8 e 513 val loss 2.3276 val acc 10.3896 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 9 e 514 val loss 2.3252 val acc 10.2476 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 9 e 515 val loss 2.3230 val acc 10.4708 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 516 val loss 2.3216 val acc 10.5317 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 517 val loss 2.3200 val acc 10.3287 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 518 val loss 2.3184 val acc 10.6128 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 519 val loss 2.3168 val acc 10.5114 best val_a

bulyan: at fang n_at 10 n_mal_sel 8 e 583 val loss 2.2473 val acc 17.6948 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 8 e 584 val loss 2.2329 val acc 16.3555 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 7 e 585 val loss 2.2076 val acc 17.5528 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 8 e 586 val loss 2.1862 val acc 17.2484 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 587 val loss 2.1704 val acc 18.9935 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 9 e 588 val loss 2.1569 val acc 18.2224 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 589 val loss 2.1399 val acc 19.2776 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 590 val loss 2.1313 val acc 18.5674 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 591 val loss 2.4054 val acc 17.3295 best val_a

bulyan: at fang n_at 10 n_mal_sel 8 e 655 val loss 2.2633 val acc 14.6104 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 8 e 656 val loss 2.2511 val acc 15.2394 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 8 e 657 val loss 2.2385 val acc 15.7062 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 9 e 658 val loss 2.2264 val acc 15.4627 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 9 e 659 val loss 2.2151 val acc 17.2281 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 660 val loss 2.2060 val acc 18.0804 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 661 val loss 2.1975 val acc 18.1412 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 662 val loss 2.1911 val acc 18.2427 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 663 val loss 2.1847 val acc 18.3847 best val_a

bulyan: at fang n_at 10 n_mal_sel 10 e 727 val loss 2.1348 val acc 20.2110 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 728 val loss 2.1298 val acc 20.5357 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 729 val loss 2.1269 val acc 20.5357 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 9 e 730 val loss 2.1210 val acc 20.5154 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 731 val loss 2.1171 val acc 20.4545 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 732 val loss 2.1144 val acc 20.6778 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 733 val loss 2.1100 val acc 20.6575 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 734 val loss 2.1072 val acc 20.6575 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 735 val loss 2.1008 val acc 20.9416 best v

bulyan: at fang n_at 10 n_mal_sel 10 e 799 val loss 2.0451 val acc 21.3271 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 800 val loss 2.0456 val acc 21.6518 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 801 val loss 2.0555 val acc 19.5617 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 802 val loss 2.0484 val acc 21.5706 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 803 val loss 2.0671 val acc 18.8920 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 9 e 804 val loss 2.1010 val acc 20.8198 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 6 e 805 val loss 2.3047 val acc 15.6047 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 3 e 806 val loss 2.2025 val acc 15.3612 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 7 e 807 val loss 2.1859 val acc 15.8076 best val_

bulyan: at fang n_at 10 n_mal_sel 10 e 871 val loss 2.0017 val acc 23.1940 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 872 val loss 2.0030 val acc 22.6664 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 873 val loss 2.0013 val acc 22.8490 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 874 val loss 2.0038 val acc 22.6664 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 9 e 875 val loss 1.9994 val acc 23.4578 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 876 val loss 1.9970 val acc 23.4578 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 877 val loss 2.0118 val acc 21.2459 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 878 val loss 2.0094 val acc 23.1534 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 879 val loss 2.0276 val acc 20.2922 best v

bulyan: at fang n_at 10 n_mal_sel 10 e 943 val loss 1.9805 val acc 22.1591 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 944 val loss 1.9788 val acc 22.0170 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 945 val loss 1.9798 val acc 22.0373 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 946 val loss 1.9773 val acc 22.2200 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 947 val loss 1.9779 val acc 22.4026 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 948 val loss 1.9762 val acc 22.3620 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 949 val loss 1.9764 val acc 22.3620 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 950 val loss 1.9743 val acc 22.4229 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 951 val loss 1.9749 val acc 22.7273 best 

bulyan: at fang n_at 10 n_mal_sel 10 e 1015 val loss 2.0466 val acc 22.8084 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 1016 val loss 2.0463 val acc 22.9099 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 1017 val loss 2.0460 val acc 23.2346 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 1018 val loss 2.0448 val acc 23.2752 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 1019 val loss 2.0439 val acc 22.9911 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 1020 val loss 2.0442 val acc 23.0519 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 1021 val loss 2.0436 val acc 23.0519 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 1022 val loss 2.0432 val acc 22.8693 best val_acc 25.182630 te_acc 24.715909
bulyan: at fang n_at 10 n_mal_sel 10 e 1023 val loss 2.0426 val acc 22.8

RuntimeError: CUDA out of memory. Tried to allocate 566.00 MiB (GPU 0; 10.92 GiB total capacity; 2.14 GiB already allocated; 290.38 MiB free; 2.70 GiB reserved in total by PyTorch)

## Code for LIE attack, followed by its execution on Bulyan

In [8]:
def lie_attack(all_updates, z):
    avg = torch.mean(all_updates, dim=0)
    std = torch.std(all_updates, dim=0)
    return avg + z * std

In [9]:
batch_size=250
resume=0
nepochs=1200
schedule=[1000]
nbatches = user_tr_len//batch_size

gamma=.5
opt = 'sgd'
fed_lr=0.5
criterion=nn.CrossEntropyLoss()
use_cuda = torch.cuda.is_available()

aggregation='bulyan'
multi_k = False
candidates = []

at_type='LIE'
z_values={3:0.69847, 5:0.7054, 8:0.71904, 10:0.72575, 12:0.73891}
n_attackers=[10]

arch='alexnet'
chkpt='./'+aggregation

for n_attacker in n_attackers:
    epoch_num = 0
    best_global_acc = 0
    best_global_te_acc = 0

    torch.cuda.empty_cache()
    r=np.arange(user_tr_len)

    fed_model, _ = return_model(arch, 0.1, 0.9, parallel=False)
    optimizer_fed = SGD(fed_model.parameters(), lr=fed_lr)

    while epoch_num <= nepochs:
        user_grads=[]
        if not epoch_num and epoch_num%nbatches == 0:
            np.random.shuffle(r)
            for i in range(nusers):
                user_tr_data_tensors[i]=user_tr_data_tensors[i][r]
                user_tr_label_tensors[i]=user_tr_label_tensors[i][r]

        for i in range(n_attacker, nusers):

            inputs = user_tr_data_tensors[i][(epoch_num%nbatches)*batch_size:((epoch_num%nbatches) + 1) * batch_size]
            targets = user_tr_label_tensors[i][(epoch_num%nbatches)*batch_size:((epoch_num%nbatches) + 1) * batch_size]

            inputs, targets = inputs.cuda(), targets.cuda()
            inputs, targets = torch.autograd.Variable(inputs), torch.autograd.Variable(targets)

            outputs = fed_model(inputs)
            loss = criterion(outputs, targets)
            fed_model.zero_grad()
            loss.backward(retain_graph=True)

            param_grad=[]
            for param in fed_model.parameters():
                param_grad=param.grad.data.view(-1) if not len(param_grad) else torch.cat((param_grad,param.grad.view(-1)))

            user_grads=param_grad[None, :] if len(user_grads)==0 else torch.cat((user_grads,param_grad[None,:]), 0)

        malicious_grads = user_grads

        if epoch_num in schedule:
            for param_group in optimizer_fed.param_groups:
                param_group['lr'] *= gamma
                print('New learnin rate ', param_group['lr'])

        if n_attacker > 0:
            if at_type == 'lie':
                mal_update = lie_attack(malicious_grads, z_values[n_attacker])
                malicious_grads = torch.cat((torch.stack([mal_update]*n_attacker), malicious_grads))
            elif at_type == 'fang':
                agg_grads = torch.mean(malicious_grads, 0)
                deviation = torch.sign(agg_grads)
                malicious_grads = get_malicious_updates_fang_trmean(malicious_grads, deviation, n_attacker, epoch_num)
            elif at_type == 'our-agr':
                agg_grads = torch.mean(malicious_grads, 0)
                malicious_grads = our_attack_krum(malicious_grads, agg_grads, n_attacker, compression=compression, q_level=q_level, norm=norm)

        if not epoch_num : 
            print(malicious_grads.shape)

        if aggregation=='median':
            agg_grads=torch.median(malicious_grads,dim=0)[0]

        elif aggregation=='average':
            agg_grads=torch.mean(malicious_grads,dim=0)

        elif aggregation=='trmean':
            agg_grads=tr_mean(malicious_grads, n_attacker)

        elif aggregation=='krum' or aggregation=='mkrum':
            multi_k = True if aggregation == 'mkrum' else False
            if epoch_num == 0: print('multi krum is ', multi_k)
            agg_grads, krum_candidate = multi_krum(malicious_grads, n_attacker, multi_k=multi_k)

        elif aggregation=='bulyan':
            agg_grads, krum_candidate=bulyan(malicious_grads, n_attacker)

        del user_grads

        start_idx=0

        optimizer_fed.zero_grad()

        model_grads=[]

        for i, param in enumerate(fed_model.parameters()):
            param_=agg_grads[start_idx:start_idx+len(param.data.view(-1))].reshape(param.data.shape)
            start_idx=start_idx+len(param.data.view(-1))
            param_=param_.cuda()
            model_grads.append(param_)

        optimizer_fed.step(model_grads)

        val_loss, val_acc = test(val_data_tensor,val_label_tensor,fed_model,criterion,use_cuda)
        te_loss, te_acc = test(te_data_tensor,te_label_tensor, fed_model, criterion, use_cuda)

        is_best = best_global_acc < val_acc

        best_global_acc = max(best_global_acc, val_acc)

        if is_best:
            best_global_te_acc = te_acc

        if epoch_num%10==0 or epoch_num==nepochs-1:
            print('%s: at %s n_at %d n_mal_sel %d e %d fed_model val loss %.4f val acc %.4f best val_acc %f te_acc %f'%(aggregation, at_type, n_attacker, np.sum(krum_candidate < n_attacker), epoch_num, val_loss, val_acc, best_global_acc,best_global_te_acc))

        if val_loss > 10:
            print('val loss %f too high'%val_loss)
            break

        epoch_num+=1

torch.Size([40, 2472266])


	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, *, Number alpha) (Triggered internally at  /opt/conda/conda-bld/pytorch_1603729138878/work/torch/csrc/utils/python_arg_parser.cpp:882.)
  p.data.add_(-group['lr'], d_p)


bulyan: at LIE n_at 10 n_mal_sel 5 e 0 fed_model val loss nan val acc 9.6388 best val_acc 9.638799 te_acc 9.659091
bulyan: at LIE n_at 10 n_mal_sel 0 e 10 fed_model val loss nan val acc 9.6388 best val_acc 9.638799 te_acc 9.659091
bulyan: at LIE n_at 10 n_mal_sel 0 e 20 fed_model val loss nan val acc 9.6388 best val_acc 9.638799 te_acc 9.659091
bulyan: at LIE n_at 10 n_mal_sel 0 e 30 fed_model val loss nan val acc 9.6388 best val_acc 9.638799 te_acc 9.659091
bulyan: at LIE n_at 10 n_mal_sel 0 e 40 fed_model val loss nan val acc 9.6388 best val_acc 9.638799 te_acc 9.659091
bulyan: at LIE n_at 10 n_mal_sel 0 e 50 fed_model val loss nan val acc 9.6388 best val_acc 9.638799 te_acc 9.659091
bulyan: at LIE n_at 10 n_mal_sel 0 e 60 fed_model val loss nan val acc 9.6388 best val_acc 9.638799 te_acc 9.659091
bulyan: at LIE n_at 10 n_mal_sel 0 e 70 fed_model val loss nan val acc 9.6388 best val_acc 9.638799 te_acc 9.659091
bulyan: at LIE n_at 10 n_mal_sel 0 e 80 fed_model val loss nan val acc 9.

bulyan: at LIE n_at 10 n_mal_sel 0 e 710 fed_model val loss nan val acc 9.6388 best val_acc 9.638799 te_acc 9.659091
bulyan: at LIE n_at 10 n_mal_sel 0 e 720 fed_model val loss nan val acc 9.6388 best val_acc 9.638799 te_acc 9.659091
bulyan: at LIE n_at 10 n_mal_sel 0 e 730 fed_model val loss nan val acc 9.6388 best val_acc 9.638799 te_acc 9.659091
bulyan: at LIE n_at 10 n_mal_sel 0 e 740 fed_model val loss nan val acc 9.6388 best val_acc 9.638799 te_acc 9.659091
bulyan: at LIE n_at 10 n_mal_sel 0 e 750 fed_model val loss nan val acc 9.6388 best val_acc 9.638799 te_acc 9.659091
bulyan: at LIE n_at 10 n_mal_sel 0 e 760 fed_model val loss nan val acc 9.6388 best val_acc 9.638799 te_acc 9.659091
bulyan: at LIE n_at 10 n_mal_sel 0 e 770 fed_model val loss nan val acc 9.6388 best val_acc 9.638799 te_acc 9.659091
bulyan: at LIE n_at 10 n_mal_sel 0 e 780 fed_model val loss nan val acc 9.6388 best val_acc 9.638799 te_acc 9.659091
bulyan: at LIE n_at 10 n_mal_sel 0 e 790 fed_model val loss nan 

## Code for our AGR-tailored attack on Bulyan
* Note that our attacks on multi-krum and Bulyan aggregations are the same

In [11]:
def our_attack_mkrum(all_updates, model_re, n_attackers, dev_type='unit_vec'):

    if dev_type == 'unit_vec':
        deviation = model_re / torch.norm(model_re)
    elif dev_type == 'sign':
        deviation = torch.sign(model_re)
    elif dev_type == 'std':
        deviation = torch.std(all_updates, 0)

    lamda = torch.Tensor([3.0]).cuda()

    threshold_diff = 1e-5
    lamda_fail = lamda
    lamda_succ = 0

    while torch.abs(lamda_succ - lamda) > threshold_diff:
        mal_update = (model_re - lamda * deviation)
        mal_updates = torch.stack([mal_update] * n_attackers)
        mal_updates = torch.cat((mal_updates, all_updates), 0)

        agg_grads, krum_candidate = multi_krum(mal_updates, n_attackers, multi_k=True)
        if np.sum(krum_candidate < n_attackers) == n_attackers:
            # print('successful lamda is ', lamda)
            lamda_succ = lamda
            lamda = lamda + lamda_fail / 2
        else:
            lamda = lamda - lamda_fail / 2

        lamda_fail = lamda_fail / 2

    mal_update = (model_re - lamda_succ * deviation)
    mal_updates = torch.stack([mal_update] * n_attackers)
    mal_updates = torch.cat((mal_updates, all_updates), 0)

    return mal_updates

## Execute our AGR-tailored attack on Bulyan

In [13]:
batch_size=250
resume=0
nepochs=1200
schedule=[1000]
nbatches = user_tr_len//batch_size

gamma=.5
opt = 'sgd'
fed_lr=0.5
criterion=nn.CrossEntropyLoss()
use_cuda = torch.cuda.is_available()

aggregation='bulyan'
multi_k = False
candidates = []

at_type='our-agr'
dev_type ='std'
z=0
n_attackers=[10]

arch='alexnet'
chkpt='./'+aggregation


for n_attacker in n_attackers:
    epoch_num = 0
    best_global_acc = 0
    best_global_te_acc = 0

    torch.cuda.empty_cache()
    r=np.arange(user_tr_len)

    fed_model, _ = return_model(arch, 0.1, 0.9, parallel=False)
    optimizer_fed = SGD(fed_model.parameters(), lr=fed_lr)

    while epoch_num <= nepochs:
        user_grads=[]
        if not epoch_num and epoch_num%nbatches == 0:
            np.random.shuffle(r)
            for i in range(nusers):
                user_tr_data_tensors[i]=user_tr_data_tensors[i][r]
                user_tr_label_tensors[i]=user_tr_label_tensors[i][r]

        for i in range(n_attacker, nusers):

            inputs = user_tr_data_tensors[i][(epoch_num%nbatches)*batch_size:((epoch_num%nbatches) + 1) * batch_size]
            targets = user_tr_label_tensors[i][(epoch_num%nbatches)*batch_size:((epoch_num%nbatches) + 1) * batch_size]

            inputs, targets = inputs.cuda(), targets.cuda()
            inputs, targets = torch.autograd.Variable(inputs), torch.autograd.Variable(targets)

            outputs = fed_model(inputs)
            loss = criterion(outputs, targets)
            fed_model.zero_grad()
            loss.backward(retain_graph=True)

            param_grad=[]
            for param in fed_model.parameters():
                param_grad=param.grad.data.view(-1) if not len(param_grad) else torch.cat((param_grad,param.grad.view(-1)))

            user_grads=param_grad[None, :] if len(user_grads)==0 else torch.cat((user_grads,param_grad[None,:]), 0)

        malicious_grads = user_grads

        if epoch_num in schedule:
            for param_group in optimizer_fed.param_groups:
                param_group['lr'] *= gamma
                print('New learnin rate ', param_group['lr'])

        if n_attacker > 0:
            if at_type == 'lie':
                malicious_grads = get_malicious_updates_lie(malicious_grads, n_attacker, z, epoch_num)
            elif at_type == 'fang':
                agg_grads = torch.mean(malicious_grads, 0)
                deviation = torch.sign(agg_grads)
                malicious_grads = get_malicious_updates_fang(malicious_grads, agg_grads, deviation, n_attacker)
            elif at_type == 'our-agr':
                agg_grads = torch.mean(malicious_grads, 0)
                malicious_grads = our_attack_mkrum(malicious_grads, agg_grads, n_attacker, dev_type=dev_type)

        if aggregation=='median':
            agg_grads=torch.median(malicious_grads,dim=0)[0]

        elif aggregation=='average':
            agg_grads=torch.mean(malicious_grads,dim=0)

        elif aggregation=='krum' or aggregation=='mkrum':

            multi_k = True if aggregation == 'mkrum' else False
            if epoch_num == 0: print('multi krum is ', multi_k)
            agg_grads, krum_candidate = multi_krum(malicious_grads, n_attacker, multi_k=multi_k)

        elif aggregation=='bulyan':
            agg_grads, krum_candidate=bulyan(malicious_grads, n_attacker)

        del user_grads

        start_idx=0

        optimizer_fed.zero_grad()

        model_grads=[]

        for i, param in enumerate(fed_model.parameters()):
            param_=agg_grads[start_idx:start_idx+len(param.data.view(-1))].reshape(param.data.shape)
            start_idx=start_idx+len(param.data.view(-1))
            param_=param_.cuda()
            model_grads.append(param_)

        optimizer_fed.step(model_grads)

        val_loss, val_acc = test(val_data_tensor,val_label_tensor,fed_model,criterion,use_cuda)
        te_loss, te_acc = test(te_data_tensor,te_label_tensor, fed_model, criterion, use_cuda)

        is_best = best_global_acc < val_acc

        best_global_acc = max(best_global_acc, val_acc)

        if is_best:
            best_global_te_acc = te_acc

        if epoch_num%1==0 or epoch_num==nepochs-1:
            print('%s: at %s n_at %d n_mal_sel %d e %d | val loss %.4f val acc %.4f best val_acc %f'%(aggregation, at_type, n_attacker, np.sum(krum_candidate < n_attacker), epoch_num, val_loss, val_acc, best_global_acc))

        if val_loss > 1000:
            print('val loss %f too high'%val_loss)
            break
            
        epoch_num+=1

bulyan: at our-agr n_at 10 n_mal_sel 10 e 0 | val loss 2.3018 val acc 10.0446 best val_acc 10.044643
bulyan: at our-agr n_at 10 n_mal_sel 10 e 1 | val loss 2.3010 val acc 10.0446 best val_acc 10.044643
bulyan: at our-agr n_at 10 n_mal_sel 10 e 2 | val loss 2.3001 val acc 10.0446 best val_acc 10.044643
bulyan: at our-agr n_at 10 n_mal_sel 10 e 3 | val loss 2.2990 val acc 10.0446 best val_acc 10.044643
bulyan: at our-agr n_at 10 n_mal_sel 10 e 4 | val loss 2.2975 val acc 10.0446 best val_acc 10.044643
bulyan: at our-agr n_at 10 n_mal_sel 10 e 5 | val loss 2.2956 val acc 10.0446 best val_acc 10.044643
bulyan: at our-agr n_at 10 n_mal_sel 10 e 6 | val loss 2.2930 val acc 12.0333 best val_acc 12.033279
bulyan: at our-agr n_at 10 n_mal_sel 10 e 7 | val loss 2.2894 val acc 13.0682 best val_acc 13.068182
bulyan: at our-agr n_at 10 n_mal_sel 10 e 8 | val loss 2.2845 val acc 13.2711 best val_acc 13.271104
bulyan: at our-agr n_at 10 n_mal_sel 10 e 9 | val loss 2.2779 val acc 13.7378 best val_acc 

bulyan: at our-agr n_at 10 n_mal_sel 10 e 81 | val loss 2.3044 val acc 10.3490 best val_acc 19.460227
bulyan: at our-agr n_at 10 n_mal_sel 10 e 82 | val loss 2.3029 val acc 11.0795 best val_acc 19.460227
bulyan: at our-agr n_at 10 n_mal_sel 10 e 83 | val loss 2.3018 val acc 12.5203 best val_acc 19.460227
bulyan: at our-agr n_at 10 n_mal_sel 10 e 84 | val loss 2.3009 val acc 12.5406 best val_acc 19.460227
bulyan: at our-agr n_at 10 n_mal_sel 10 e 85 | val loss 2.3006 val acc 12.6420 best val_acc 19.460227
bulyan: at our-agr n_at 10 n_mal_sel 10 e 86 | val loss 2.2972 val acc 12.8653 best val_acc 19.460227
bulyan: at our-agr n_at 10 n_mal_sel 10 e 87 | val loss 2.2953 val acc 13.5349 best val_acc 19.460227
bulyan: at our-agr n_at 10 n_mal_sel 10 e 88 | val loss 2.2923 val acc 13.5552 best val_acc 19.460227
bulyan: at our-agr n_at 10 n_mal_sel 10 e 89 | val loss 2.2918 val acc 13.0885 best val_acc 19.460227
bulyan: at our-agr n_at 10 n_mal_sel 10 e 90 | val loss 2.2851 val acc 16.1932 bes

bulyan: at our-agr n_at 10 n_mal_sel 10 e 161 | val loss 2.2382 val acc 17.8369 best val_acc 19.460227
bulyan: at our-agr n_at 10 n_mal_sel 10 e 162 | val loss 2.2381 val acc 17.8571 best val_acc 19.460227
bulyan: at our-agr n_at 10 n_mal_sel 10 e 163 | val loss 2.1876 val acc 19.7646 best val_acc 19.764610
bulyan: at our-agr n_at 10 n_mal_sel 10 e 164 | val loss 2.3244 val acc 17.5325 best val_acc 19.764610
bulyan: at our-agr n_at 10 n_mal_sel 10 e 165 | val loss 2.3867 val acc 11.9927 best val_acc 19.764610
bulyan: at our-agr n_at 10 n_mal_sel 10 e 166 | val loss 2.3332 val acc 9.9635 best val_acc 19.764610
bulyan: at our-agr n_at 10 n_mal_sel 10 e 167 | val loss 2.3257 val acc 9.9635 best val_acc 19.764610
bulyan: at our-agr n_at 10 n_mal_sel 10 e 168 | val loss 2.3195 val acc 10.0649 best val_acc 19.764610
bulyan: at our-agr n_at 10 n_mal_sel 10 e 169 | val loss 2.3136 val acc 10.8766 best val_acc 19.764610
bulyan: at our-agr n_at 10 n_mal_sel 10 e 170 | val loss 2.3085 val acc 12.

bulyan: at our-agr n_at 10 n_mal_sel 10 e 241 | val loss 2.2496 val acc 15.5641 best val_acc 21.651786
bulyan: at our-agr n_at 10 n_mal_sel 10 e 242 | val loss 2.3012 val acc 12.9261 best val_acc 21.651786
bulyan: at our-agr n_at 10 n_mal_sel 10 e 243 | val loss 2.2317 val acc 16.5584 best val_acc 21.651786
bulyan: at our-agr n_at 10 n_mal_sel 10 e 244 | val loss 2.1682 val acc 16.3352 best val_acc 21.651786
bulyan: at our-agr n_at 10 n_mal_sel 10 e 245 | val loss 2.1677 val acc 16.3555 best val_acc 21.651786
bulyan: at our-agr n_at 10 n_mal_sel 10 e 246 | val loss 2.1650 val acc 17.3295 best val_acc 21.651786
bulyan: at our-agr n_at 10 n_mal_sel 10 e 247 | val loss 2.3450 val acc 19.6226 best val_acc 21.651786
bulyan: at our-agr n_at 10 n_mal_sel 10 e 248 | val loss 2.3675 val acc 9.9432 best val_acc 21.651786
bulyan: at our-agr n_at 10 n_mal_sel 10 e 249 | val loss 2.3537 val acc 9.9432 best val_acc 21.651786
bulyan: at our-agr n_at 10 n_mal_sel 10 e 250 | val loss 2.3426 val acc 9.9

bulyan: at our-agr n_at 10 n_mal_sel 10 e 321 | val loss 2.3047 val acc 10.8563 best val_acc 21.651786
bulyan: at our-agr n_at 10 n_mal_sel 10 e 322 | val loss 2.2975 val acc 11.9115 best val_acc 21.651786
bulyan: at our-agr n_at 10 n_mal_sel 10 e 323 | val loss 2.2906 val acc 14.2045 best val_acc 21.651786
bulyan: at our-agr n_at 10 n_mal_sel 10 e 324 | val loss 2.2784 val acc 17.0657 best val_acc 21.651786
bulyan: at our-agr n_at 10 n_mal_sel 10 e 325 | val loss 2.2569 val acc 13.6567 best val_acc 21.651786
bulyan: at our-agr n_at 10 n_mal_sel 10 e 326 | val loss 2.2212 val acc 16.8425 best val_acc 21.651786
bulyan: at our-agr n_at 10 n_mal_sel 10 e 327 | val loss 2.2709 val acc 11.1201 best val_acc 21.651786
bulyan: at our-agr n_at 10 n_mal_sel 10 e 328 | val loss 2.3210 val acc 9.1315 best val_acc 21.651786
bulyan: at our-agr n_at 10 n_mal_sel 10 e 329 | val loss 2.3046 val acc 10.2070 best val_acc 21.651786
bulyan: at our-agr n_at 10 n_mal_sel 10 e 330 | val loss 2.2810 val acc 11

bulyan: at our-agr n_at 10 n_mal_sel 10 e 401 | val loss 2.2439 val acc 17.4107 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 402 | val loss 2.2310 val acc 17.1266 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 403 | val loss 2.2379 val acc 16.5584 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 404 | val loss 2.2968 val acc 15.6047 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 405 | val loss 2.3310 val acc 11.0998 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 406 | val loss 2.3120 val acc 14.1031 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 407 | val loss 2.2924 val acc 12.1144 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 408 | val loss 2.2787 val acc 11.7898 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 409 | val loss 2.2712 val acc 13.1494 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 410 | val loss 2.2534 val acc 1

bulyan: at our-agr n_at 10 n_mal_sel 10 e 481 | val loss 2.2036 val acc 18.8312 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 482 | val loss 2.3441 val acc 15.2394 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 483 | val loss 2.3679 val acc 12.7841 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 484 | val loss 2.3236 val acc 16.9237 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 485 | val loss 2.2943 val acc 13.3117 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 486 | val loss 2.2400 val acc 17.9586 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 487 | val loss 2.2007 val acc 16.1729 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 488 | val loss 2.2851 val acc 17.0252 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 490 | val loss 2.2940 val acc 12.7232 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 491 | val loss 2.2235 val acc 1

bulyan: at our-agr n_at 10 n_mal_sel 10 e 562 | val loss 2.2152 val acc 16.2338 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 563 | val loss 2.2483 val acc 12.4188 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 564 | val loss 2.3213 val acc 9.9432 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 565 | val loss 2.2180 val acc 17.1672 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 566 | val loss 2.2663 val acc 15.0365 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 567 | val loss 2.2962 val acc 11.0390 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 568 | val loss 2.2227 val acc 15.9497 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 569 | val loss 2.1538 val acc 17.0657 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 570 | val loss 2.1767 val acc 16.4773 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 571 | val loss 2.1867 val acc 17

bulyan: at our-agr n_at 10 n_mal_sel 10 e 642 | val loss 2.2287 val acc 16.9846 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 643 | val loss 2.3527 val acc 12.7841 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 644 | val loss 2.2296 val acc 17.7557 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 645 | val loss 2.2263 val acc 17.6542 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 646 | val loss 2.3501 val acc 15.6859 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 647 | val loss 2.4208 val acc 9.8214 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 648 | val loss 2.3609 val acc 9.7606 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 649 | val loss 2.3151 val acc 13.1494 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 650 | val loss 2.2606 val acc 15.7670 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 651 | val loss 2.2055 val acc 15.

bulyan: at our-agr n_at 10 n_mal_sel 10 e 722 | val loss 2.1933 val acc 18.3036 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 723 | val loss 2.2493 val acc 16.3758 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 724 | val loss 2.3871 val acc 10.6331 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 725 | val loss 2.3062 val acc 18.5268 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 726 | val loss 2.2410 val acc 19.6631 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 727 | val loss 2.1747 val acc 21.0430 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 728 | val loss 2.1403 val acc 18.6485 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 729 | val loss 2.1186 val acc 20.3531 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 730 | val loss 2.4432 val acc 17.1469 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 731 | val loss 2.7074 val acc 1

bulyan: at our-agr n_at 10 n_mal_sel 10 e 802 | val loss 2.2137 val acc 17.0049 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 803 | val loss 2.2081 val acc 17.5933 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 804 | val loss 2.1869 val acc 17.2687 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 805 | val loss 2.2495 val acc 16.5179 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 806 | val loss 2.3480 val acc 12.0130 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 807 | val loss 2.2679 val acc 15.1583 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 808 | val loss 2.1723 val acc 17.3093 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 809 | val loss 2.2115 val acc 17.6542 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 810 | val loss 2.2864 val acc 14.5089 best val_acc 22.179383
bulyan: at our-agr n_at 10 n_mal_sel 10 e 811 | val loss 2.1561 val acc 1

bulyan: at our-agr n_at 10 n_mal_sel 10 e 883 | val loss 2.3585 val acc 11.1201 best val_acc 22.605519
bulyan: at our-agr n_at 10 n_mal_sel 10 e 884 | val loss 2.2830 val acc 12.9058 best val_acc 22.605519
bulyan: at our-agr n_at 10 n_mal_sel 10 e 885 | val loss 2.1877 val acc 17.7354 best val_acc 22.605519
bulyan: at our-agr n_at 10 n_mal_sel 10 e 886 | val loss 2.1491 val acc 20.4140 best val_acc 22.605519
bulyan: at our-agr n_at 10 n_mal_sel 10 e 887 | val loss 2.1214 val acc 21.7127 best val_acc 22.605519
bulyan: at our-agr n_at 10 n_mal_sel 10 e 888 | val loss 2.1049 val acc 21.2459 best val_acc 22.605519
bulyan: at our-agr n_at 10 n_mal_sel 10 e 889 | val loss 2.0846 val acc 22.1388 best val_acc 22.605519
bulyan: at our-agr n_at 10 n_mal_sel 10 e 890 | val loss 2.1571 val acc 17.1063 best val_acc 22.605519
bulyan: at our-agr n_at 10 n_mal_sel 10 e 891 | val loss 2.4295 val acc 16.9846 best val_acc 22.605519
bulyan: at our-agr n_at 10 n_mal_sel 10 e 892 | val loss 2.3062 val acc 1

bulyan: at our-agr n_at 10 n_mal_sel 10 e 963 | val loss 2.2493 val acc 19.7037 best val_acc 22.605519
bulyan: at our-agr n_at 10 n_mal_sel 10 e 964 | val loss 2.1423 val acc 18.8515 best val_acc 22.605519
bulyan: at our-agr n_at 10 n_mal_sel 10 e 965 | val loss 2.1312 val acc 17.1672 best val_acc 22.605519
bulyan: at our-agr n_at 10 n_mal_sel 10 e 966 | val loss 2.5965 val acc 15.2192 best val_acc 22.605519
bulyan: at our-agr n_at 10 n_mal_sel 10 e 967 | val loss 2.7800 val acc 9.9432 best val_acc 22.605519
bulyan: at our-agr n_at 10 n_mal_sel 10 e 968 | val loss 2.6951 val acc 9.9432 best val_acc 22.605519
bulyan: at our-agr n_at 10 n_mal_sel 10 e 969 | val loss 2.6364 val acc 9.9432 best val_acc 22.605519
bulyan: at our-agr n_at 10 n_mal_sel 10 e 970 | val loss 2.5895 val acc 9.9432 best val_acc 22.605519
bulyan: at our-agr n_at 10 n_mal_sel 10 e 971 | val loss 2.5468 val acc 9.9432 best val_acc 22.605519
bulyan: at our-agr n_at 10 n_mal_sel 10 e 972 | val loss 2.5001 val acc 9.9432

bulyan: at our-agr n_at 10 n_mal_sel 10 e 1043 | val loss 2.1856 val acc 14.8742 best val_acc 22.605519
bulyan: at our-agr n_at 10 n_mal_sel 10 e 1044 | val loss 2.1672 val acc 17.5933 best val_acc 22.605519
bulyan: at our-agr n_at 10 n_mal_sel 10 e 1045 | val loss 2.1654 val acc 16.0308 best val_acc 22.605519
bulyan: at our-agr n_at 10 n_mal_sel 10 e 1046 | val loss 2.1710 val acc 17.9383 best val_acc 22.605519
bulyan: at our-agr n_at 10 n_mal_sel 10 e 1047 | val loss 2.3119 val acc 15.8482 best val_acc 22.605519
bulyan: at our-agr n_at 10 n_mal_sel 10 e 1048 | val loss 2.3744 val acc 11.5666 best val_acc 22.605519
bulyan: at our-agr n_at 10 n_mal_sel 10 e 1049 | val loss 2.3051 val acc 17.1875 best val_acc 22.605519
bulyan: at our-agr n_at 10 n_mal_sel 10 e 1050 | val loss 2.2418 val acc 17.9180 best val_acc 22.605519
bulyan: at our-agr n_at 10 n_mal_sel 10 e 1051 | val loss 2.1935 val acc 17.0049 best val_acc 22.605519
bulyan: at our-agr n_at 10 n_mal_sel 10 e 1052 | val loss 2.1605

bulyan: at our-agr n_at 10 n_mal_sel 10 e 1122 | val loss 2.2164 val acc 17.2281 best val_acc 22.605519
bulyan: at our-agr n_at 10 n_mal_sel 10 e 1123 | val loss 2.1568 val acc 20.2110 best val_acc 22.605519
bulyan: at our-agr n_at 10 n_mal_sel 10 e 1124 | val loss 2.2304 val acc 17.9383 best val_acc 22.605519
bulyan: at our-agr n_at 10 n_mal_sel 10 e 1125 | val loss 2.1765 val acc 18.2021 best val_acc 22.605519
bulyan: at our-agr n_at 10 n_mal_sel 10 e 1126 | val loss 2.2456 val acc 16.4570 best val_acc 22.605519
bulyan: at our-agr n_at 10 n_mal_sel 10 e 1127 | val loss 2.2007 val acc 19.2979 best val_acc 22.605519
bulyan: at our-agr n_at 10 n_mal_sel 10 e 1128 | val loss 2.2908 val acc 15.3206 best val_acc 22.605519
bulyan: at our-agr n_at 10 n_mal_sel 10 e 1129 | val loss 2.1821 val acc 17.6745 best val_acc 22.605519
bulyan: at our-agr n_at 10 n_mal_sel 10 e 1130 | val loss 2.1700 val acc 19.5008 best val_acc 22.605519
bulyan: at our-agr n_at 10 n_mal_sel 10 e 1131 | val loss 2.1768

## Code for our first AGR-agnostic attack called Min-Max

In [7]:
'''
MIN-MAX attack
'''
def our_attack_dist(all_updates, model_re, n_attackers, dev_type='unit_vec'):

    if dev_type == 'unit_vec':
        deviation = model_re / torch.norm(model_re)  # unit vector, dir opp to good dir
    elif dev_type == 'sign':
        deviation = torch.sign(model_re)
    elif dev_type == 'std':
        deviation = torch.std(all_updates, 0)

    lamda = torch.Tensor([10.0]).float().cuda()
    # print(lamda)
    threshold_diff = 1e-5
    lamda_fail = lamda
    lamda_succ = 0
    
    distances = []
    for update in all_updates:
        distance = torch.norm((all_updates - update), dim=1) ** 2
        distances = distance[None, :] if not len(distances) else torch.cat((distances, distance[None, :]), 0)
    
    max_distance = torch.max(distances)
    del distances

    while torch.abs(lamda_succ - lamda) > threshold_diff:
        mal_update = (model_re - lamda * deviation)
        distance = torch.norm((all_updates - mal_update), dim=1) ** 2
        max_d = torch.max(distance)
        
        if max_d <= max_distance:
            # print('successful lamda is ', lamda)
            lamda_succ = lamda
            lamda = lamda + lamda_fail / 2
        else:
            lamda = lamda - lamda_fail / 2

        lamda_fail = lamda_fail / 2

    mal_update = (model_re - lamda_succ * deviation)
    
    return mal_update

## Execute Min-max attack on Bulyan

In [8]:
batch_size=250
resume=0
nepochs=1200
schedule=[1000]
nbatches = user_tr_len//batch_size

gamma=.5
opt = 'sgd'
fed_lr=0.5
criterion=nn.CrossEntropyLoss()
use_cuda = torch.cuda.is_available()

aggregation='bulyan'
multi_k = False
candidates = []

at_type='min-max'
dev_type ='std'
z=0
n_attackers=[10]

arch='alexnet'
chkpt='./'+aggregation

for n_attacker in n_attackers:
    candidates = []

    epoch_num = 0
    best_global_acc = 0
    best_global_te_acc = 0

    fed_model, _ = return_model(arch, 0.1, 0.9, parallel=False)
    optimizer_fed = SGD(fed_model.parameters(), lr=fed_lr)

    torch.cuda.empty_cache()
    r=np.arange(user_tr_len)

    while epoch_num <= nepochs:
        user_grads=[]
        if not epoch_num and epoch_num%nbatches == 0:
            np.random.shuffle(r)
            for i in range(nusers):
                user_tr_data_tensors[i]=user_tr_data_tensors[i][r]
                user_tr_label_tensors[i]=user_tr_label_tensors[i][r]

        for i in range(n_attacker, nusers):

            inputs = user_tr_data_tensors[i][(epoch_num%nbatches)*batch_size:((epoch_num%nbatches) + 1) * batch_size]
            targets = user_tr_label_tensors[i][(epoch_num%nbatches)*batch_size:((epoch_num%nbatches) + 1) * batch_size]

            inputs, targets = inputs.cuda(), targets.cuda()
            inputs, targets = torch.autograd.Variable(inputs), torch.autograd.Variable(targets)

            outputs = fed_model(inputs)
            loss = criterion(outputs, targets)
            fed_model.zero_grad()
            loss.backward(retain_graph=True)

            param_grad=[]
            for param in fed_model.parameters():
                param_grad=param.grad.data.view(-1) if not len(param_grad) else torch.cat((param_grad,param.grad.view(-1)))

            user_grads=param_grad[None, :] if len(user_grads)==0 else torch.cat((user_grads,param_grad[None,:]), 0)

        malicious_grads = user_grads

        if epoch_num in schedule:
            for param_group in optimizer_fed.param_groups:
                param_group['lr'] *= gamma
                print('New learnin rate ', param_group['lr'])

        if n_attacker > 0:
            if at_type == 'lie':
                malicious_grads = get_malicious_updates_lie(malicious_grads, n_attacker, z, epoch_num)
            elif at_type == 'fang':
                agg_grads = torch.mean(malicious_grads, 0)
                deviation = torch.sign(agg_grads)
                malicious_grads = get_malicious_updates_fang(malicious_grads, agg_grads, deviation, n_attacker)
            elif at_type == 'our-agr':
                agg_grads = torch.mean(malicious_grads, 0)
                mal_update = our_attack_median(malicious_grads, agg_grads, n_attacker, dev_type)
            elif at_type == 'min-max':
                agg_grads = torch.mean(malicious_grads, 0)
                mal_update = our_attack_dist(malicious_grads, agg_grads, n_attacker, dev_type)
            elif at_type == 'min-sum':
                agg_grads = torch.mean(malicious_grads, 0)
                mal_update = our_attack_score(malicious_grads, agg_grads, n_attacker, dev_type)

            mal_updates = torch.stack([mal_update] * n_attacker)
            malicious_grads = torch.cat((mal_updates, user_grads), 0)

        if epoch_num==0: print('malicious_grads shape ', malicious_grads.shape)

        if aggregation=='median':
            agg_grads=torch.median(malicious_grads,dim=0)[0]

        elif aggregation=='average':
            agg_grads=torch.mean(malicious_grads,dim=0)

        elif aggregation=='krum' or aggregation=='mkrum':
            multi_k = True if aggregation == 'mkrum' else False
            if epoch_num == 0: print('multi krum is ', multi_k)
            agg_grads, krum_candidate = multi_krum(malicious_grads, n_attacker, multi_k=multi_k)

        elif aggregation=='bulyan':
            agg_grads, krum_candidate=bulyan(malicious_grads, n_attacker)

        del user_grads

        start_idx=0

        optimizer_fed.zero_grad()

        model_grads=[]

        for i, param in enumerate(fed_model.parameters()):
            param_=agg_grads[start_idx:start_idx+len(param.data.view(-1))].reshape(param.data.shape)
            start_idx=start_idx+len(param.data.view(-1))
            param_=param_.cuda()
            model_grads.append(param_)

        optimizer_fed.step(model_grads)

        val_loss, val_acc = test(val_data_tensor,val_label_tensor,fed_model,criterion,use_cuda)
        te_loss, te_acc = test(te_data_tensor,te_label_tensor, fed_model, criterion, use_cuda)

        is_best = best_global_acc < val_acc

        best_global_acc = max(best_global_acc, val_acc)

        if is_best:
            best_global_te_acc = te_acc

        if epoch_num%1==0 or epoch_num==nepochs-1:
            print('%s: at %s n_at %d n_mal_sel %d e %d | val loss %.4f val acc %.4f best val_acc %f'%(aggregation, at_type, n_attacker, np.sum(krum_candidate < n_attacker), epoch_num, val_loss, val_acc, best_global_acc))

        if val_loss > 1000:
            print('val loss %f too high'%val_loss)
            break
            
        epoch_num+=1

malicious_grads shape  torch.Size([50, 2472266])


	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, *, Number alpha) (Triggered internally at  /opt/conda/conda-bld/pytorch_1603729138878/work/torch/csrc/utils/python_arg_parser.cpp:882.)
  p.data.add_(-group['lr'], d_p)


bulyan: at min-max n_at 10 n_mal_sel 5 e 0 | val loss 2.3036 val acc 10.5519 best val_acc 10.551948
bulyan: at min-max n_at 10 n_mal_sel 5 e 1 | val loss 2.3030 val acc 10.8563 best val_acc 10.856331
bulyan: at min-max n_at 10 n_mal_sel 6 e 2 | val loss 2.3024 val acc 10.9375 best val_acc 10.937500
bulyan: at min-max n_at 10 n_mal_sel 5 e 3 | val loss 2.3019 val acc 10.4302 best val_acc 10.937500
bulyan: at min-max n_at 10 n_mal_sel 5 e 4 | val loss 2.3014 val acc 10.9172 best val_acc 10.937500
bulyan: at min-max n_at 10 n_mal_sel 5 e 5 | val loss 2.3007 val acc 11.1404 best val_acc 11.140422
bulyan: at min-max n_at 10 n_mal_sel 6 e 6 | val loss 2.2999 val acc 11.4042 best val_acc 11.404221
bulyan: at min-max n_at 10 n_mal_sel 5 e 7 | val loss 2.2991 val acc 11.8506 best val_acc 11.850649
bulyan: at min-max n_at 10 n_mal_sel 5 e 8 | val loss 2.2982 val acc 12.0333 best val_acc 12.033279
bulyan: at min-max n_at 10 n_mal_sel 5 e 9 | val loss 2.2968 val acc 12.1550 best val_acc 12.155032


bulyan: at min-max n_at 10 n_mal_sel 5 e 82 | val loss 2.2978 val acc 9.4968 best val_acc 22.991071
bulyan: at min-max n_at 10 n_mal_sel 5 e 83 | val loss 2.2871 val acc 10.5519 best val_acc 22.991071
bulyan: at min-max n_at 10 n_mal_sel 5 e 84 | val loss 2.2809 val acc 14.9148 best val_acc 22.991071
bulyan: at min-max n_at 10 n_mal_sel 6 e 85 | val loss 2.2729 val acc 10.5519 best val_acc 22.991071
bulyan: at min-max n_at 10 n_mal_sel 6 e 86 | val loss 2.2823 val acc 11.8912 best val_acc 22.991071
bulyan: at min-max n_at 10 n_mal_sel 5 e 87 | val loss 2.2910 val acc 9.6185 best val_acc 22.991071
bulyan: at min-max n_at 10 n_mal_sel 5 e 88 | val loss 2.2671 val acc 9.8620 best val_acc 22.991071
bulyan: at min-max n_at 10 n_mal_sel 5 e 89 | val loss 2.2504 val acc 9.5779 best val_acc 22.991071
bulyan: at min-max n_at 10 n_mal_sel 5 e 90 | val loss 2.2301 val acc 11.7898 best val_acc 22.991071
bulyan: at min-max n_at 10 n_mal_sel 4 e 91 | val loss 2.2070 val acc 15.1989 best val_acc 22.9

bulyan: at min-max n_at 10 n_mal_sel 6 e 163 | val loss 2.0696 val acc 23.0317 best val_acc 23.457792
bulyan: at min-max n_at 10 n_mal_sel 6 e 164 | val loss 2.0501 val acc 22.6664 best val_acc 23.457792
bulyan: at min-max n_at 10 n_mal_sel 7 e 165 | val loss 2.1062 val acc 21.7532 best val_acc 23.457792
bulyan: at min-max n_at 10 n_mal_sel 4 e 166 | val loss 2.1259 val acc 22.8490 best val_acc 23.457792
bulyan: at min-max n_at 10 n_mal_sel 6 e 167 | val loss 2.2368 val acc 18.0195 best val_acc 23.457792
bulyan: at min-max n_at 10 n_mal_sel 8 e 168 | val loss 2.3687 val acc 9.7606 best val_acc 23.457792
bulyan: at min-max n_at 10 n_mal_sel 6 e 169 | val loss 2.2868 val acc 13.3320 best val_acc 23.457792
bulyan: at min-max n_at 10 n_mal_sel 5 e 170 | val loss 2.2410 val acc 14.6307 best val_acc 23.457792
bulyan: at min-max n_at 10 n_mal_sel 5 e 171 | val loss 2.2029 val acc 14.8336 best val_acc 23.457792
bulyan: at min-max n_at 10 n_mal_sel 4 e 172 | val loss 2.1726 val acc 16.8425 best

bulyan: at min-max n_at 10 n_mal_sel 6 e 244 | val loss 2.0343 val acc 22.1997 best val_acc 27.577110
bulyan: at min-max n_at 10 n_mal_sel 7 e 245 | val loss 2.0062 val acc 25.9537 best val_acc 27.577110
bulyan: at min-max n_at 10 n_mal_sel 7 e 246 | val loss 2.1880 val acc 17.1672 best val_acc 27.577110
bulyan: at min-max n_at 10 n_mal_sel 7 e 247 | val loss 2.1239 val acc 22.2808 best val_acc 27.577110
bulyan: at min-max n_at 10 n_mal_sel 7 e 248 | val loss 2.1163 val acc 19.3791 best val_acc 27.577110
bulyan: at min-max n_at 10 n_mal_sel 7 e 249 | val loss 2.0355 val acc 24.1071 best val_acc 27.577110
bulyan: at min-max n_at 10 n_mal_sel 6 e 250 | val loss 2.0217 val acc 23.5998 best val_acc 27.577110
bulyan: at min-max n_at 10 n_mal_sel 7 e 251 | val loss 1.9440 val acc 28.2265 best val_acc 28.226461
bulyan: at min-max n_at 10 n_mal_sel 7 e 252 | val loss 1.9875 val acc 23.7216 best val_acc 28.226461
bulyan: at min-max n_at 10 n_mal_sel 8 e 253 | val loss 2.1134 val acc 26.1364 bes

bulyan: at min-max n_at 10 n_mal_sel 5 e 325 | val loss 2.2262 val acc 17.4716 best val_acc 31.737013
bulyan: at min-max n_at 10 n_mal_sel 5 e 326 | val loss 2.1633 val acc 18.4253 best val_acc 31.737013
bulyan: at min-max n_at 10 n_mal_sel 4 e 327 | val loss 2.0930 val acc 19.0138 best val_acc 31.737013
bulyan: at min-max n_at 10 n_mal_sel 5 e 328 | val loss 2.0470 val acc 19.5211 best val_acc 31.737013
bulyan: at min-max n_at 10 n_mal_sel 6 e 329 | val loss 2.0299 val acc 21.5097 best val_acc 31.737013
bulyan: at min-max n_at 10 n_mal_sel 6 e 330 | val loss 2.4978 val acc 15.2394 best val_acc 31.737013
bulyan: at min-max n_at 10 n_mal_sel 5 e 331 | val loss 5.0982 val acc 10.0446 best val_acc 31.737013
bulyan: at min-max n_at 10 n_mal_sel 6 e 332 | val loss 2.3002 val acc 11.0390 best val_acc 31.737013
bulyan: at min-max n_at 10 n_mal_sel 8 e 333 | val loss 2.2642 val acc 13.6161 best val_acc 31.737013
bulyan: at min-max n_at 10 n_mal_sel 6 e 334 | val loss 2.2391 val acc 15.1989 bes

bulyan: at min-max n_at 10 n_mal_sel 5 e 406 | val loss 2.2797 val acc 15.0365 best val_acc 31.737013
bulyan: at min-max n_at 10 n_mal_sel 4 e 407 | val loss 2.2613 val acc 15.9903 best val_acc 31.737013
bulyan: at min-max n_at 10 n_mal_sel 7 e 408 | val loss 2.1958 val acc 17.3093 best val_acc 31.737013
bulyan: at min-max n_at 10 n_mal_sel 5 e 409 | val loss 2.1655 val acc 17.1672 best val_acc 31.737013
bulyan: at min-max n_at 10 n_mal_sel 7 e 410 | val loss 2.1481 val acc 18.5471 best val_acc 31.737013
bulyan: at min-max n_at 10 n_mal_sel 7 e 411 | val loss 2.0981 val acc 22.0170 best val_acc 31.737013
bulyan: at min-max n_at 10 n_mal_sel 7 e 412 | val loss 2.1460 val acc 18.4659 best val_acc 31.737013
bulyan: at min-max n_at 10 n_mal_sel 5 e 413 | val loss 2.0181 val acc 22.8896 best val_acc 31.737013
bulyan: at min-max n_at 10 n_mal_sel 7 e 414 | val loss 2.0018 val acc 25.7305 best val_acc 31.737013
bulyan: at min-max n_at 10 n_mal_sel 6 e 415 | val loss 2.0731 val acc 20.0284 bes

bulyan: at min-max n_at 10 n_mal_sel 8 e 487 | val loss 2.2992 val acc 10.6331 best val_acc 31.737013
bulyan: at min-max n_at 10 n_mal_sel 6 e 488 | val loss 2.2321 val acc 14.4481 best val_acc 31.737013
bulyan: at min-max n_at 10 n_mal_sel 6 e 489 | val loss 2.1928 val acc 20.5357 best val_acc 31.737013
bulyan: at min-max n_at 10 n_mal_sel 7 e 490 | val loss 2.2611 val acc 13.1494 best val_acc 31.737013
bulyan: at min-max n_at 10 n_mal_sel 6 e 491 | val loss 2.1797 val acc 16.8628 best val_acc 31.737013
bulyan: at min-max n_at 10 n_mal_sel 7 e 492 | val loss 2.1314 val acc 18.8515 best val_acc 31.737013
bulyan: at min-max n_at 10 n_mal_sel 7 e 493 | val loss 2.1352 val acc 21.4894 best val_acc 31.737013
bulyan: at min-max n_at 10 n_mal_sel 6 e 494 | val loss 2.2853 val acc 11.9724 best val_acc 31.737013
bulyan: at min-max n_at 10 n_mal_sel 5 e 495 | val loss 2.1521 val acc 17.2890 best val_acc 31.737013
bulyan: at min-max n_at 10 n_mal_sel 6 e 496 | val loss 2.1313 val acc 21.0430 bes

## Code for our second AGR-agnostic attack called Min-Sum

In [9]:
'''
MIN-SUM attack
'''

def our_attack_score(all_updates, model_re, n_attackers, dev_type='unit_vec'):

    if dev_type == 'unit_vec':
        deviation = model_re / torch.norm(model_re)  # unit vector, dir opp to good dir
    elif dev_type == 'sign':
        deviation = torch.sign(model_re)
    elif dev_type == 'std':
        deviation = torch.std(all_updates, 0)
    
    lamda = torch.Tensor([10.0]).float().cuda()
    # print(lamda)
    threshold_diff = 1e-5
    lamda_fail = lamda
    lamda_succ = 0
    
    distances = []
    for update in all_updates:
        distance = torch.norm((all_updates - update), dim=1) ** 2
        distances = distance[None, :] if not len(distances) else torch.cat((distances, distance[None, :]), 0)
    
    scores = torch.sum(distances, dim=1)
    min_score = torch.min(scores)
    del distances

    while torch.abs(lamda_succ - lamda) > threshold_diff:
        mal_update = (model_re - lamda * deviation)
        distance = torch.norm((all_updates - mal_update), dim=1) ** 2
        score = torch.sum(distance)
        
        if score <= min_score:
            # print('successful lamda is ', lamda)
            lamda_succ = lamda
            lamda = lamda + lamda_fail / 2
        else:
            lamda = lamda - lamda_fail / 2

        lamda_fail = lamda_fail / 2

    # print(lamda_succ)
    mal_update = (model_re - lamda_succ * deviation)
    
    return mal_update
    

## Execute Min-Sum attack on Bulyan

In [10]:
batch_size=250
resume=0
nepochs=1200
schedule=[1000]
nbatches = user_tr_len//batch_size

gamma=.5
opt = 'sgd'
fed_lr=0.5
criterion=nn.CrossEntropyLoss()
use_cuda = torch.cuda.is_available()

aggregation='bulyan'
multi_k = False
candidates = []

at_type='min-sum'
dev_type ='std'
z=0
n_attackers=[10]

arch='alexnet'
chkpt='./'+aggregation

for n_attacker in n_attackers:
    candidates = []

    epoch_num = 0
    best_global_acc = 0
    best_global_te_acc = 0

    fed_model, _ = return_model(arch, 0.1, 0.9, parallel=False)
    optimizer_fed = SGD(fed_model.parameters(), lr=fed_lr)

    torch.cuda.empty_cache()
    r=np.arange(user_tr_len)

    while epoch_num <= nepochs:
        user_grads=[]
        if not epoch_num and epoch_num%nbatches == 0:
            np.random.shuffle(r)
            for i in range(nusers):
                user_tr_data_tensors[i]=user_tr_data_tensors[i][r]
                user_tr_label_tensors[i]=user_tr_label_tensors[i][r]

        for i in range(n_attacker, nusers):

            inputs = user_tr_data_tensors[i][(epoch_num%nbatches)*batch_size:((epoch_num%nbatches) + 1) * batch_size]
            targets = user_tr_label_tensors[i][(epoch_num%nbatches)*batch_size:((epoch_num%nbatches) + 1) * batch_size]

            inputs, targets = inputs.cuda(), targets.cuda()
            inputs, targets = torch.autograd.Variable(inputs), torch.autograd.Variable(targets)

            outputs = fed_model(inputs)
            loss = criterion(outputs, targets)
            fed_model.zero_grad()
            loss.backward(retain_graph=True)

            param_grad=[]
            for param in fed_model.parameters():
                param_grad=param.grad.data.view(-1) if not len(param_grad) else torch.cat((param_grad,param.grad.view(-1)))

            user_grads=param_grad[None, :] if len(user_grads)==0 else torch.cat((user_grads,param_grad[None,:]), 0)

        malicious_grads = user_grads

        if epoch_num in schedule:
            for param_group in optimizer_fed.param_groups:
                param_group['lr'] *= gamma
                print('New learnin rate ', param_group['lr'])

        if n_attacker > 0:
            if at_type == 'lie':
                malicious_grads = get_malicious_updates_lie(malicious_grads, n_attacker, z, epoch_num)
            elif at_type == 'fang':
                agg_grads = torch.mean(malicious_grads, 0)
                deviation = torch.sign(agg_grads)
                malicious_grads = get_malicious_updates_fang(malicious_grads, agg_grads, deviation, n_attacker)
            elif at_type == 'our-agr':
                agg_grads = torch.mean(malicious_grads, 0)
                mal_update = our_attack_median(malicious_grads, agg_grads, n_attacker, dev_type)
            elif at_type == 'min-max':
                agg_grads = torch.mean(malicious_grads, 0)
                mal_update = our_attack_dist(malicious_grads, agg_grads, n_attacker, dev_type)
            elif at_type == 'min-sum':
                agg_grads = torch.mean(malicious_grads, 0)
                mal_update = our_attack_score(malicious_grads, agg_grads, n_attacker, dev_type)

            mal_updates = torch.stack([mal_update] * n_attacker)
            malicious_grads = torch.cat((mal_updates, user_grads), 0)

        if epoch_num==0: print('malicious_grads shape ', malicious_grads.shape)

        if aggregation=='median':
            agg_grads=torch.median(malicious_grads,dim=0)[0]

        elif aggregation=='average':
            agg_grads=torch.mean(malicious_grads,dim=0)

        elif aggregation=='krum' or aggregation=='mkrum':
            multi_k = True if aggregation == 'mkrum' else False
            if epoch_num == 0: print('multi krum is ', multi_k)
            agg_grads, krum_candidate = multi_krum(malicious_grads, n_attacker, multi_k=multi_k)

        elif aggregation=='bulyan':
            agg_grads, krum_candidate=bulyan(malicious_grads, n_attacker)

        del user_grads

        start_idx=0

        optimizer_fed.zero_grad()

        model_grads=[]

        for i, param in enumerate(fed_model.parameters()):
            param_=agg_grads[start_idx:start_idx+len(param.data.view(-1))].reshape(param.data.shape)
            start_idx=start_idx+len(param.data.view(-1))
            param_=param_.cuda()
            model_grads.append(param_)

        optimizer_fed.step(model_grads)

        val_loss, val_acc = test(val_data_tensor,val_label_tensor,fed_model,criterion,use_cuda)
        te_loss, te_acc = test(te_data_tensor,te_label_tensor, fed_model, criterion, use_cuda)

        is_best = best_global_acc < val_acc

        best_global_acc = max(best_global_acc, val_acc)

        if is_best:
            best_global_te_acc = te_acc

        if epoch_num%1==0 or epoch_num==nepochs-1:
            print('%s: at %s n_at %d n_mal_sel %d e %d | val loss %.4f val acc %.4f best val_acc %f'%(aggregation, at_type, n_attacker, np.sum(krum_candidate < n_attacker), epoch_num, val_loss, val_acc, best_global_acc))

        if val_loss > 1000:
            print('val loss %f too high'%val_loss)
            break
            
        epoch_num+=1

malicious_grads shape  torch.Size([50, 2472266])
bulyan: at min-sum n_at 10 n_mal_sel 10 e 0 | val loss 2.3034 val acc 10.0244 best val_acc 10.024351
bulyan: at min-sum n_at 10 n_mal_sel 10 e 1 | val loss 2.3027 val acc 10.1055 best val_acc 10.105519
bulyan: at min-sum n_at 10 n_mal_sel 10 e 2 | val loss 2.3020 val acc 10.1664 best val_acc 10.166396
bulyan: at min-sum n_at 10 n_mal_sel 10 e 3 | val loss 2.3011 val acc 11.7898 best val_acc 11.789773
bulyan: at min-sum n_at 10 n_mal_sel 10 e 4 | val loss 2.3002 val acc 10.1664 best val_acc 11.789773
bulyan: at min-sum n_at 10 n_mal_sel 10 e 5 | val loss 2.2991 val acc 10.1664 best val_acc 11.789773
bulyan: at min-sum n_at 10 n_mal_sel 10 e 6 | val loss 2.2978 val acc 10.2679 best val_acc 11.789773
bulyan: at min-sum n_at 10 n_mal_sel 10 e 7 | val loss 2.2962 val acc 11.9521 best val_acc 11.952110
bulyan: at min-sum n_at 10 n_mal_sel 10 e 8 | val loss 2.2943 val acc 10.2476 best val_acc 11.952110
bulyan: at min-sum n_at 10 n_mal_sel 10 e 

bulyan: at min-sum n_at 10 n_mal_sel 10 e 81 | val loss 2.2642 val acc 14.6104 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 10 e 82 | val loss 2.2994 val acc 11.6274 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 10 e 83 | val loss 2.2864 val acc 14.1843 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 10 e 84 | val loss 2.2662 val acc 16.2946 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 10 e 85 | val loss 2.2419 val acc 15.5844 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 10 e 86 | val loss 2.2235 val acc 20.0893 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 10 e 87 | val loss 2.2696 val acc 17.1469 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 10 e 88 | val loss 2.2983 val acc 14.1031 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 10 e 89 | val loss 2.2818 val acc 14.4683 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 10 e 90 | val loss 2.2712 val acc 13.3929 bes

bulyan: at min-sum n_at 10 n_mal_sel 10 e 161 | val loss 2.1934 val acc 16.1729 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 10 e 162 | val loss 2.1832 val acc 17.8369 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 10 e 163 | val loss 2.1847 val acc 16.0511 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 10 e 164 | val loss 2.2270 val acc 14.8945 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 10 e 165 | val loss 2.3142 val acc 14.0219 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 10 e 166 | val loss 2.3125 val acc 10.0244 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 10 e 167 | val loss 2.2592 val acc 18.4862 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 10 e 168 | val loss 2.2640 val acc 13.1696 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 10 e 169 | val loss 2.2276 val acc 15.6859 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 10 e 170 | val loss 2.2164 val acc 1

bulyan: at min-sum n_at 10 n_mal_sel 10 e 241 | val loss 2.3756 val acc 8.3401 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 10 e 242 | val loss 2.3168 val acc 12.0130 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 10 e 243 | val loss 2.2945 val acc 12.3782 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 10 e 244 | val loss 2.2591 val acc 13.1088 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 10 e 245 | val loss 2.2186 val acc 17.5528 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 10 e 246 | val loss 2.3047 val acc 12.4188 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 10 e 247 | val loss 2.2576 val acc 13.7581 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 10 e 248 | val loss 2.2325 val acc 16.1323 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 10 e 249 | val loss 2.2601 val acc 12.1956 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 10 e 250 | val loss 2.1952 val acc 16

bulyan: at min-sum n_at 10 n_mal_sel 10 e 321 | val loss 2.2389 val acc 15.1583 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 10 e 322 | val loss 2.2492 val acc 13.6567 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 10 e 323 | val loss 2.2259 val acc 14.8539 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 10 e 324 | val loss 2.2413 val acc 12.5406 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 10 e 325 | val loss 2.2332 val acc 14.6104 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 10 e 326 | val loss 2.2634 val acc 15.6047 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 10 e 327 | val loss 2.2484 val acc 11.3231 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 10 e 328 | val loss 2.2197 val acc 16.1729 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 10 e 329 | val loss 2.2200 val acc 16.2541 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 10 e 330 | val loss 2.2157 val acc 1

bulyan: at min-sum n_at 10 n_mal_sel 3 e 402 | val loss 2.3034 val acc 9.9432 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 403 | val loss 2.3034 val acc 9.9432 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 2 e 404 | val loss 2.3034 val acc 9.9635 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 405 | val loss 2.3034 val acc 9.9432 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 406 | val loss 2.3034 val acc 9.9432 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 407 | val loss 2.3034 val acc 9.9432 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 2 e 408 | val loss 2.3034 val acc 9.9838 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 409 | val loss 2.3034 val acc 9.9432 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 410 | val loss 2.3034 val acc 9.9432 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 411 | val loss 2.3034 val acc 9.9432 best val_acc 

bulyan: at min-sum n_at 10 n_mal_sel 2 e 484 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 485 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 486 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 487 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 2 e 488 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 489 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 490 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 491 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 2 e 492 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 493 | val loss 2.3033 val acc 9.6794 best val_acc 

bulyan: at min-sum n_at 10 n_mal_sel 3 e 566 | val loss 2.3033 val acc 9.6388 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 567 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 2 e 568 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 569 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 570 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 571 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 2 e 572 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 573 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 574 | val loss 2.3033 val acc 9.6591 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 575 | val loss 2.3033 val acc 9.6794 best val_acc 

bulyan: at min-sum n_at 10 n_mal_sel 2 e 648 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 649 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 650 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 651 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 2 e 652 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 653 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 654 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 655 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 2 e 656 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 657 | val loss 2.3033 val acc 9.6794 best val_acc 

bulyan: at min-sum n_at 10 n_mal_sel 3 e 730 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 731 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 2 e 732 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 733 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 734 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 735 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 2 e 736 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 737 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 738 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 739 | val loss 2.3033 val acc 9.6794 best val_acc 

bulyan: at min-sum n_at 10 n_mal_sel 2 e 812 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 813 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 814 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 815 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 2 e 816 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 817 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 818 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 819 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 2 e 820 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 821 | val loss 2.3033 val acc 9.6794 best val_acc 

bulyan: at min-sum n_at 10 n_mal_sel 3 e 894 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 895 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 2 e 896 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 897 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 898 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 899 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 2 e 900 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 901 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 902 | val loss 2.3033 val acc 9.6388 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 903 | val loss 2.3033 val acc 9.6388 best val_acc 

bulyan: at min-sum n_at 10 n_mal_sel 2 e 976 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 977 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 978 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 979 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 2 e 980 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 981 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 982 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 983 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 2 e 984 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 985 | val loss 2.3033 val acc 9.6794 best val_acc 

bulyan: at min-sum n_at 10 n_mal_sel 3 e 1057 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 1058 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 1059 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 2 e 1060 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 1061 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 1062 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 1063 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 2 e 1064 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 1065 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 1066 | val loss 2.3033 val acc 9.6794 bes

bulyan: at min-sum n_at 10 n_mal_sel 3 e 1138 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 1139 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 2 e 1140 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 1141 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 1142 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 1143 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 2 e 1144 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 1145 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 1146 | val loss 2.3033 val acc 9.6794 best val_acc 22.118506
bulyan: at min-sum n_at 10 n_mal_sel 3 e 1147 | val loss 2.3033 val acc 9.6794 bes