In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]= "6"
import torch
from torch import nn, autograd
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import TensorDataset, DataLoader, Dataset
from sklearn.model_selection import train_test_split
import numpy as np
import pickle
import pandas as pd
import torchvision
import random
from sklearn import metrics
import copy
import argparse
import time
import tqdm
from collections import Counter
from utils import data_iid, data_noniid, GRU, test_img,pesudo_label
from utils import  global_train, FedAvg
from sklearn.cluster import KMeans

In [2]:
def args_parser():
    parser = argparse.ArgumentParser()
    # federated arguments
    parser.add_argument('--epochs', type=int, default=100, help="rounds of training")
    parser.add_argument('--num_users', type=int, default=20, help="number of users: K")
    parser.add_argument('--frac', type=float, default=0.5, help="the fraction of clients: C")
    parser.add_argument('--local_ep', type=int, default=5, help="the number of local epochs: E")
    parser.add_argument('--local_bs', type=int, default=30, help="local batch size: B")
    parser.add_argument('--bs', type=int, default=256, help="test batch size")
    parser.add_argument('--lr', type=float, default=0.0005, help="learning rate")
    parser.add_argument('--local_label', type=int, default=2, help="each client's label")
    parser.add_argument('--glob_ep', type=int, default=100, help="number of global epochs")
    parser.add_argument('--split', type=str, default='user', help="train-test split type, user or sample")

    # other arguments
    parser.add_argument('--iid', action='store_true', help='whether i.i.d or not')
    parser.add_argument('--num_classes', type=int, default=5, help="number of classes")
    parser.add_argument('--gpu', type=int, default=0, help="GPU ID, -1 for CPU")
    parser.add_argument('--stopping_rounds', type=int, default=10, help='rounds of early stopping')
    parser.add_argument('--verbose', action='store_true', help='verbose print')
    parser.add_argument('--seed', type=int, default=1, help='random seed (default: 1)')
    args = parser.parse_known_args()[0]
    return args
args = args_parser()
args.device = torch.device('cuda:{}'.format(args.gpu) if torch.cuda.is_available() and args.gpu != -1 else 'cpu')
args.iid=False

In [6]:
filename = 'ssfl_data.pickle'
with open(filename, 'rb') as f:
    X,Y= pickle.load(f)
x_train, x_test, y_train, y_test = train_test_split(X, Y,test_size=0.15, random_state=3)
# x_train, x_unlabel, y_train, y_unlabel = train_test_split(x_train, y_train,test_size=0.5749, random_state=3)
x_train, x_unlabel, y_train, y_unlabel = train_test_split(x_train, y_train,test_size=0.28746, random_state=3)

# x_train, _, y_train, _ = train_test_split(x_train, y_train,test_size=0.5, random_state=3)

#0.4312   label 7914  unlabel 6000   test 2456 labeled 0.4834%
#0.5749   label 5914  unlabel 8000   test 2456 labeled 0.425%
#0.8624   label 1914  unlabel 12000   test 2456 labeled 0.137%
assert len(x_train)==len(y_train)
assert len(x_unlabel)==len(y_unlabel)

x_new=np.zeros_like(x_train)
y_new=np.zeros_like(y_train)
for i in range(len(x_train)):
    x_new[i]=np.fliplr(x_train[i])
    y_new[i]=y_train[i]
x_train=np.append(x_train,x_new,axis=0)
y_train=np.append(y_train,y_new,axis=0)


x_train = torch.from_numpy(x_train).float()
y_train = torch.from_numpy(y_train)

x_test = torch.from_numpy(x_test).float()
y_test = torch.from_numpy(y_test)

x_unlabel = torch.from_numpy(x_unlabel).float()
y_unlabel = torch.from_numpy(y_unlabel)

traindata =TensorDataset(x_train.clone().detach(),y_train.clone().detach().long())
testdata = TensorDataset(x_test.clone().detach(),y_test.clone().detach().long())

train_loader = DataLoader(traindata,batch_size = args.bs,shuffle=False)
test_loader = DataLoader(testdata,batch_size=args.bs,shuffle=True)


dataset_unlabel=(x_unlabel,y_unlabel)
dataset_test=(x_test,y_test)
dataset_server=(x_train,y_train)

In [7]:
def data_iid(dataset, num_users):
    num_shards, num_imgs = 40, 100
    idx_shard = [i for i in range(num_shards)]
    dict_users = {i: np.array([], dtype='int64') for i in range(num_users)}
    idxs = np.arange(len(dataset[0]))
    labels =dataset[1].int().numpy()
    idxs_labels = np.vstack((idxs, labels))
    idxs = idxs_labels[0,:]

    for i in range(num_users):
        rand_set = set(np.random.choice(idx_shard, 2, replace=False))
        idx_shard = list(set(idx_shard) - rand_set)
        for rand in rand_set:
            dict_users[i] = np.concatenate((dict_users[i], idxs[rand*num_imgs:(rand+1)*num_imgs]), axis=0)
    return dict_users

def data_noniid(dataset, num_users,args):
    num_shards, num_imgs = 40, 100
    idx_shard = [i for i in range(num_shards)]
    dict_users = {i: np.array([], dtype='int64') for i in range(num_users)}
    idxs = np.arange(len(dataset[0]))
    labels =dataset[1].int().numpy()
    idxs_labels = np.vstack((idxs, labels))
    idxs_labels = idxs_labels[:,idxs_labels[1,:].argsort()]
    idxs = idxs_labels[0,:]

    
    for i in range(num_users):
        rand_set=[]
        label_set=[]
        count=0
        while len(rand_set)<args.local_label:
            rs=random.choice(idx_shard)
            label=int(dataset[1][idxs[rs*num_imgs]])
            count+=1
            if label not in set(label_set) or count>10:
                label_set.append(label)
                rand_set.append(rs)
                idx_shard=list(set(idx_shard) - set(rand_set))

        for rand in rand_set:
            dict_users[i] = np.concatenate((dict_users[i], idxs[rand*num_imgs:(rand+1)*num_imgs]), axis=0)
    return dict_users

In [8]:
args.iid=False
if args.iid:
    dict_users = data_iid(dataset_unlabel, args.num_users)
else:
    dict_users = data_noniid(dataset_unlabel, args.num_users,args)

for i in range(args.num_users):
    print(pd.Series.value_counts(np.array(y_unlabel[list(dict_users[i])])))

2    100
0    100
dtype: int64
4    100
1    100
dtype: int64
2    100
0    100
dtype: int64
4    100
0    100
dtype: int64
3    100
1    100
dtype: int64
3    100
0     94
1      6
dtype: int64
3    100
1     77
2     23
dtype: int64
1    100
0    100
dtype: int64
4    100
1    100
dtype: int64
2    100
1    100
dtype: int64
4    100
1    100
dtype: int64
4    100
0    100
dtype: int64
3    100
0    100
dtype: int64
4    100
2    100
dtype: int64
2    100
0    100
dtype: int64
3    100
2    100
dtype: int64
3    100
2    100
dtype: int64
4    100
3    100
dtype: int64
4    100
2     68
3     32
dtype: int64
1    100
3     78
4     22
dtype: int64


In [9]:
# #build model
net_glob = GRU().to(args.device)   
net_glob.train()
args.lr=0.0005
# for epoch in range(1, 80 + 1):
#     start=time.time()
#     global_train(args, net_glob, epoch, dataset=dataset_server)
#     print("time {:.2f} sec:".format(time.time()-start))
#     acc_global, loss_gloabl = test_img(net_glob, dataset_test, args)
#     print("Epoch: {} Testing accuracy in global: {:.2f}".format(epoch, acc_global))
# args.lr=0.0001
# for epoch in range(1, 50 + 1):
#     start=time.time()
#     global_train(args, net_glob, epoch, dataset=dataset_server)
#     print("time {:.2f} sec:".format(time.time()-start))
#     acc_global, loss_gloabl = test_img(net_glob, dataset_test, args)
#     print("Epoch: {} Testing accuracy in global: {:.2f}".format(epoch, acc_global))

In [10]:
# def pesudo_label(args, net_g=None, dataset=None, idxs=None, tao=0.99):
#     net_glob.eval()
#     idxs = list(idxs)

#     image = dataset_unlabel[0][idxs]
#     label = dataset_unlabel[1][idxs]
#     image = image.numpy()
#     label = label.numpy()
#     x_pusedo = []
#     y_pusedo = []
#     y_true=[]
#     for i in range(len(image)):
#         x_temp = image[i][np.newaxis, :, :]
#         x_temp = torch.tensor(torch.from_numpy(x_temp).float())
#         x_temp = x_temp.to(args.device)
#         with torch.no_grad():
#             p_out, output = net_glob(x_temp)
#             pseudo = torch.softmax(p_out.detach_(), dim=-1)
#             max_probs, targets_u = torch.max(pseudo, dim=-1)
#             if max_probs > tao:
#                 x_pusedo.append(x_temp.cpu().numpy())
#                 y_pusedo.append(int(targets_u.cpu().numpy()))
#                 y_true.append(label[i])
#     x_pusedo = np.array(x_pusedo)
#     x_pusedo = np.squeeze(x_pusedo)
#     y_pusedo = np.array(y_pusedo)
#     y_true = np.array(y_true)
#     x_pusedo = torch.from_numpy(x_pusedo).float()
#     y_pusedo = torch.from_numpy(y_pusedo)
#     y_true = torch.from_numpy(y_true)
#     return (x_pusedo, y_pusedo,y_true)
# def local_update(args, net, data, target,tao=0.99):

#     traindata =TensorDataset(data.clone().detach(),target.clone().detach().long())
#     ldr_train = DataLoader(traindata, batch_size = args.local_bs, shuffle=True)
    
#     net.train()
#     # train and update
#     optimizer = torch.optim.Adam(net.parameters(), lr=args.lr)
#     epoch_loss = []
#     for iter in range(args.local_ep):
#         batch_loss = []
#         for batch_idx, (images, labels) in enumerate(ldr_train):
#             images, labels = images.to(args.device), labels.to(args.device)
#             net.zero_grad()
#             p_out,log_probs = net(images)
#             pseudo_label = torch.softmax(p_out.detach_(), dim=-1)
#             max_probs, targets_u = torch.max(pseudo_label, dim=-1)
#             mask = max_probs.ge(tao).float()
#             labels = targets_u
            
#             loss =-(F.nll_loss(p_out, labels,reduction='none')*mask).mean()
            
#             loss.requires_grad=True
#             loss.backward()
#             optimizer.step()
#             if args.verbose and batch_idx % 10 == 0:
#                 print('Update Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
#                     iter, batch_idx * len(images), len(ldr_train.dataset),
#                            100. * batch_idx / len(ldr_train), loss.item()))
#             batch_loss.append(loss.item())
#         epoch_loss.append(sum(batch_loss)/len(batch_loss))
#     return net.state_dict(), sum(epoch_loss) / len(epoch_loss)

In [11]:
def global_update(args, net, data, target):  
    traindata =TensorDataset(data.clone().detach(),target.clone().detach().long())
    ldr_train = DataLoader(traindata, batch_size = 256, shuffle=True)
    
    net.train()
    # train and update
    optimizer = torch.optim.Adam(net.parameters(), lr=args.lr)
    epoch_loss = []
    for iter in range(30):
        batch_loss = []
        for batch_idx, (images, labels) in enumerate(ldr_train):
            images, labels = images.to(args.device), labels.to(args.device)
            net.zero_grad()
            p_out,output = net(images)
            loss =F.nll_loss(output, labels)
            loss.backward()
            optimizer.step()
            if args.verbose and batch_idx % 10 == 0:
                print('Update Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    iter, batch_idx * len(images), len(ldr_train.dataset),
                           100. * batch_idx / len(ldr_train), loss.item()))
            batch_loss.append(loss.item())
        epoch_loss.append(sum(batch_loss)/len(batch_loss))
    return net.state_dict(), sum(epoch_loss) / len(epoch_loss)
def random_group_users(num_users, num_groups):
    groups = []
    g = int(num_users / num_groups)
    users = [i for i in range(num_users)]
    for i in range(num_groups):
        if i == num_groups - 1:
            groups.append(np.array(users))
            break
        idxs = np.random.choice(users, g)
        users = list(set(users) - set(idxs))
        groups.append(idxs)
    return groups

def calc_non_iid(args,net_g=None,dataset=None,idxs=None):   
    net_g.eval()
    idxs=list(idxs)
    image = dataset_unlabel[0][idxs]
    label = dataset_unlabel[1][idxs]
    data =TensorDataset(image.clone().detach(), label.clone().detach())
    data_loader = DataLoader(data, batch_size = args.bs)
    re_y=[]
    for idx, (data, target) in enumerate(data_loader):
        if args.gpu != -1:
            data, target = data.cuda(), target.cuda()
        p_out,log_probs = net_g(data)
        y_pred = log_probs.data.max(1, keepdim=True)[1]
        re_y.extend(y_pred.cpu().numpy().squeeze())
    r_count=[]
    pro_label=[0,0,0,0,0]
    r_count=Counter(re_y)
    for i in range(len(pro_label)):
        pro_label[i]=r_count[i]/len(image)
    return pro_label

def group_users(idxs_users, num_groups, cluster_labels):
    groups = []
    for i in range(num_groups):
        groups.append([])
    for i in range(len(idxs_users)):
        groups[cluster_labels[i]].append(idxs_users[i])
    return groups

In [12]:
user_data = []
user_target = []
for i in range(args.num_users):
    image = dataset_unlabel[0][dict_users[i]]
    label = dataset_unlabel[1][dict_users[i]]
    user_data.append(image)
    user_target.append(label)
w_glob = net_glob.state_dict()
w_save = copy.deepcopy(w_glob)
# training
loss_train = []
cv_loss, cv_acc = [], []
val_loss_pre, counter = 0, 0
net_best = None
best_loss = None
val_acc_list, net_list = [], []

In [13]:
args.frac = 0.5
num_groups = 3
for iter in range(args.epochs):
    start = time.time()
    w_locals, loss_locals = [], []
    w_gs,loss=global_update(args, net=copy.deepcopy(net_glob).to(args.device),data=dataset_server[0],target=dataset_server[1])

    
    m = max(int(args.frac * args.num_users), 1)
    idxs_users = np.random.choice(range(args.num_users), m, replace=False)
    client_label_distribution=[]
    for i in idxs_users:
        r_l = calc_non_iid(args,net_g=net_glob, dataset=dataset_unlabel, idxs=dict_users[i])
        client_label_distribution.append(r_l)
    
    cluster = KMeans(n_clusters=num_groups,random_state=0).fit(client_label_distribution)
    groups  =group_users(idxs_users, num_groups, cluster.labels_)

    print("sample user num:{:d}, Groups: {:d}".format(len(idxs_users), num_groups))
    for g in groups:
        w_group=[]
        w_group.append(copy.deepcopy(w_gs))
        for idx in g:
            w, loss = global_update(args,net=copy.deepcopy(net_glob).to(args.device),data=user_data[idx], target=user_target[idx])
            w_group.append(copy.deepcopy(w))
            loss_locals.append(copy.deepcopy(loss))
        w_gp = FedAvg(w_group)
        w_locals.append(copy.deepcopy(w_gp))

    w_glob = FedAvg(w_locals)

    
    net_glob.load_state_dict(w_glob)


    net_glob.eval()
    acc_train, loss_train = test_img(net_glob, dataset_unlabel, args)
    acc_test, loss_test = test_img(net_glob, dataset_test, args)
    val_acc_list.append(acc_test)
    print("time {:.2f} sec:".format(time.time()-start))
    print("Training accuracy: {:.2f}".format(acc_train))
    print("Testing accuracy: {:.2f}".format(acc_test))

  cluster = KMeans(n_clusters=num_groups,random_state=0).fit(client_label_distribution)


sample user num:10, Groups: 3




testing dataset length: 4000
testing dataset length: 2456
time 170.28 sec:
Training accuracy: 23.50
Testing accuracy: 24.14
sample user num:10, Groups: 3
testing dataset length: 4000
testing dataset length: 2456
time 171.35 sec:
Training accuracy: 78.20
Testing accuracy: 79.52
sample user num:10, Groups: 3
testing dataset length: 4000
testing dataset length: 2456
time 169.10 sec:
Training accuracy: 79.75
Testing accuracy: 80.09
sample user num:10, Groups: 3
testing dataset length: 4000
testing dataset length: 2456
time 166.94 sec:
Training accuracy: 87.53
Testing accuracy: 87.62
sample user num:10, Groups: 3
testing dataset length: 4000
testing dataset length: 2456
time 167.79 sec:
Training accuracy: 86.40
Testing accuracy: 87.58
sample user num:10, Groups: 3
testing dataset length: 4000
testing dataset length: 2456
time 168.53 sec:
Training accuracy: 85.80
Testing accuracy: 86.64
sample user num:10, Groups: 3
testing dataset length: 4000
testing dataset length: 2456
time 169.41 sec:
T

time 210.60 sec:
Training accuracy: 90.07
Testing accuracy: 90.02
sample user num:10, Groups: 3
testing dataset length: 4000
testing dataset length: 2456
time 160.67 sec:
Training accuracy: 90.18
Testing accuracy: 89.58
sample user num:10, Groups: 3
testing dataset length: 4000
testing dataset length: 2456
time 162.19 sec:
Training accuracy: 90.18
Testing accuracy: 89.45
sample user num:10, Groups: 3
testing dataset length: 4000
testing dataset length: 2456
time 160.46 sec:
Training accuracy: 90.75
Testing accuracy: 90.43
sample user num:10, Groups: 3
testing dataset length: 4000
testing dataset length: 2456
time 164.90 sec:
Training accuracy: 90.12
Testing accuracy: 89.66
sample user num:10, Groups: 3
testing dataset length: 4000
testing dataset length: 2456
time 160.56 sec:
Training accuracy: 89.93
Testing accuracy: 89.94
sample user num:10, Groups: 3
testing dataset length: 4000
testing dataset length: 2456
time 167.66 sec:
Training accuracy: 90.30
Testing accuracy: 90.15
sample use

In [14]:
acc=[]
for x in val_acc_list:
    acc.append(x.numpy())
acc=np.array(acc)
acc

array([24.14495 , 79.51955 , 80.08958 , 87.62215 , 87.581436, 86.64495 ,
       86.0342  , 86.0342  , 87.29642 , 88.39577 , 89.16938 , 88.92508 ,
       87.662865, 88.2329  , 88.9658  , 90.06515 , 90.06515 , 89.6987  ,
       89.41368 , 90.14658 , 89.53583 , 89.94299 , 89.006516, 89.53583 ,
       88.88436 , 89.94299 , 90.26873 , 89.82085 , 90.14658 , 89.73941 ,
       89.98371 , 89.82085 , 89.576546, 89.73941 , 89.41368 , 89.6987  ,
       89.53583 , 89.41368 , 89.90228 , 89.08795 , 89.73941 , 89.94299 ,
       90.39088 , 89.73941 , 89.41368 , 89.94299 , 90.22801 , 89.576546,
       89.90228 , 90.30945 , 89.94299 , 89.41368 , 90.06515 , 90.02443 ,
       89.576546, 89.4544  , 90.431595, 89.65798 , 89.94299 , 90.14658 ,
       90.187294, 90.187294, 89.861565, 89.78013 , 90.14658 , 90.06515 ,
       90.105865, 89.98371 , 90.02443 , 90.39088 , 90.187294, 90.187294,
       89.82085 , 89.6987  , 90.431595, 90.39088 , 90.30945 , 89.53583 ,
       89.82085 , 90.35017 , 90.14658 , 90.75733 , 

In [10]:
acc=[]
for x in val_acc_list:
    acc.append(x.numpy())
acc=np.array(acc)
acc

array([30.456026, 58.346905, 77.72801 , 83.754074, 81.02606 , 87.62215 ,
       86.807816, 86.97068 , 86.92997 , 88.02932 , 88.80293 , 87.78502 ,
       89.4544  , 87.7443  , 88.35505 , 89.332245, 89.576546, 88.55863 ,
       89.94299 , 88.59935 , 89.90228 , 89.861565, 89.82085 , 89.65798 ,
       90.06515 , 88.517914, 88.31433 , 90.02443 , 90.14658 , 89.78013 ,
       88.68078 , 88.80293 , 89.82085 , 90.35017 , 88.80293 , 89.861565,
       89.4544  , 89.94299 , 89.25082 , 89.94299 , 89.94299 , 88.84365 ,
       89.006516, 89.16938 , 90.55375 , 90.14658 , 89.78013 , 89.08795 ,
       89.006516, 89.94299 , 89.73941 , 89.61726 , 89.65798 , 90.14658 ,
       89.37296 , 89.94299 , 89.16938 , 89.73941 , 88.59935 , 89.82085 ,
       88.59935 , 89.78013 , 88.477196, 89.53583 , 88.88436 , 89.73941 ,
       88.64007 , 89.41368 , 89.6987  , 89.2101  , 89.861565, 89.61726 ,
       89.291534, 89.78013 , 89.25082 , 89.49512 , 89.78013 , 88.64007 ,
       88.59935 , 89.82085 , 89.37296 , 89.12866 , 

In [10]:
args.frac=0.5
for iter in range(args.epochs):
    start=time.time()
    w_locals, loss_locals = [], []
    w,loss=globe_update(args ,net=copy.deepcopy(net_glob).to(args.device),data=dataset_server[0],target=dataset_server[1])
    w_locals.append(copy.deepcopy(w))
    loss_locals.append(copy.deepcopy(loss))
    
    w_locals.append(copy.deepcopy(w_glob))
    m = max(int(args.frac * args.num_users), 1)
    idxs_users = np.random.choice(range(args.num_users), m, replace=False)
    print("sample user num:",len(idxs_users))
    for idx in idxs_users:
        w, loss = local_update(args,net=copy.deepcopy(net_glob).to(args.device),data=user_data[idx],target=user_target[idx])

        #         w, loss = local_update_label(args,net=copy.deepcopy(net_glob).to(args.device),data=user_data[idx],target=user_target[idx])
        w_locals.append(copy.deepcopy(w))
        loss_locals.append(copy.deepcopy(loss))
    # update global weights

    w_glob = FedAvg(w_locals)

    # copy weight to net_glob
    net_glob.load_state_dict(w_glob)

    # print loss
    loss_avg = sum(loss_locals) / len(loss_locals)
    print('Round {:3d}, Average loss {:.3f}'.format(iter, loss_avg))
#     loss_train.append(loss_avg)

    # testing
    net_glob.eval()
    print()
    acc_train, loss_train = test_img(net_glob, dataset_unlabel, args)
    acc_test, loss_test = test_img(net_glob, dataset_test, args)
    val_acc_list.append(acc_test)
    print("time {:.2f} sec:".format(time.time()-start))
    print("Training accuracy: {:.2f}".format(acc_train))
    print("Testing accuracy: {:.2f}".format(acc_test))

  traindata =TensorDataset(torch.tensor(data),torch.tensor(target,dtype=torch.long))


sample user num: 10


  traindata =TensorDataset(torch.tensor(data),torch.tensor(target,dtype=torch.long))


Round   0, Average loss 0.073

testing dataset length: 12000


  data =TensorDataset(torch.tensor(image),torch.tensor(label,dtype=torch.long))


testing dataset length: 2456
time 30.51 sec:
Training accuracy: 31.39
Testing accuracy: 30.25
sample user num: 10
Round   1, Average loss 0.062

testing dataset length: 12000
testing dataset length: 2456
time 30.56 sec:
Training accuracy: 31.03
Testing accuracy: 29.68
sample user num: 10
Round   2, Average loss 0.056

testing dataset length: 12000
testing dataset length: 2456
time 30.45 sec:
Training accuracy: 31.63
Testing accuracy: 30.01
sample user num: 10
Round   3, Average loss 0.054

testing dataset length: 12000
testing dataset length: 2456
time 30.15 sec:
Training accuracy: 31.27
Testing accuracy: 30.01
sample user num: 10
Round   4, Average loss 0.049

testing dataset length: 12000
testing dataset length: 2456
time 30.00 sec:
Training accuracy: 30.98
Testing accuracy: 29.72
sample user num: 10
Round   5, Average loss 0.047

testing dataset length: 12000
testing dataset length: 2456
time 30.52 sec:
Training accuracy: 31.09
Testing accuracy: 29.85
sample user num: 10
Round   6, 

KeyboardInterrupt: 