In [1]:
import os
import time
import importlib
import numpy as np
import tensorflow as tf
import tensorflow.contrib.eager as tfe
from client import Client
from server import Server
from utils.data import read_data
from utils.logger import Logger
from utils.config import Config

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
MODEL_PARAMS = {'femnist.cnn': (0.01, 0.05, 62),
                'sent140.stacked_lstm': (0.0003, 0.05, 25, 2, 100),
                'shakespeare.stacked_lstm': (0.0003, 0.05, 80, 80, 256)}

tfe.enable_eager_execution()
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

In [2]:
# 模型、客户端、服务器初始化
def init(dataset, model, alg):
    logger = Logger('out').get_logger()
    cfg = Config('default.cfg', logger)
    logger.info('Algorithm: %s ' % alg)

    # 模型初始化
    model_path = '%s.%s' % (dataset, model)
    logger.info('Model: %s ' % model_path)
    ClientModel = getattr(importlib.import_module('model.'+model_path),'ClientModel')
    model = ClientModel(*MODEL_PARAMS[model_path])

    # 客户端初始化
    train_data_dir = os.path.join('data', dataset, 'data', 'train')
    test_data_dir = os.path.join('data', dataset, 'data', 'test')
    users, train_data, test_data = read_data(train_data_dir, test_data_dir)
    clients = [Client(logger, u, train_data[u], test_data[u], cfg) for u in users]
    for i,client in enumerate(clients):
        client.id = i+1
    logger.info('Total Client num: %d' % len(clients))
    #clients = np.random.choice(clients, cfg.num_clients, replace=False)
    clients = clients[:cfg.num_clients]
    logger.info('Clients in Used: {}'.format([c.id for c in clients]))

    # 服务器初始化
    server = Server(logger, model, clients, alg)

    return logger, cfg, server


# MA(Model Average)
# 一次 Round 为完成一轮训练
def MA_train(logger, cfg, server):
    now = 0
    for i in range(cfg.num_rounds):
        logger.info('========================= Round {} of {} ========================='.format(i+1, cfg.num_rounds))
        logger.info('--------------------- select deadline ---------------------')
        server.MA_set_deadline(cfg.round_ddl)
        logger.info('-------------------------- train --------------------------')
        now += server.MA_train_model(cfg.num_epochs, cfg.batch_size)
        logger.info('Time: %.2f' % now)

        logger.info('-------------------------- update -------------------------')
        server.MA_update_model(cfg.update_frac)

        if i % cfg.eval_every == 0:
            logger.info('--------------------------- test --------------------------')
            acc, loss = server.test_model(set_to_use='train')
            logger.info('Train_acc: %.3f  Train_loss: %.3f' % (acc, loss))
            acc, loss = server.test_model(set_to_use='test')
            logger.info('Test_acc: %.3f  Test_loss: %.3f' % (acc, loss))

# ASGD(Asynchronous Stochastic Gradient Descent)
# DC_ASGD(Asynchronous Stochastic Gradient Descent with Delay Compensation)
# 一次 Round 为某个客户端完成一次训练
def ASGD_train(logger, cfg, server):
    for i in range(cfg.num_rounds):
        logger.info('========================= Round {} of {} ========================='.format(i+1, cfg.num_rounds))
        logger.info('-------------------------- train --------------------------')
        now = server.ASGD_train_model()
        logger.info('Time: %.2f' % now)

        logger.info('-------------------------- update -------------------------')
        server.ASGD_update_model()

        if i % cfg.eval_every == 0:
            logger.info('--------------------------- test --------------------------')
            acc, loss = server.test_model(set_to_use='train')
            logger.info('Train_acc: %.3f  Train_loss: %.3f' % (acc, loss))
            acc, loss = server.test_model(set_to_use='test')
            logger.info('Test_acc: %.3f  Test_loss: %.3f' % (acc, loss))

In [None]:
# ['MA','ASGD','DC_ASGD']
alg = 'ASGD'

logger, cfg, server = init('femnist', 'cnn', alg)
if alg == 'MA':
    MA_train(logger, cfg, server)
else:
    ASGD_train(logger, cfg, server)

2019-12-15 03:39:49,070 - INFO - Log File: out.log
2019-12-15 03:39:49,072 - INFO - Config File: default.cfg
2019-12-15 03:39:49,075 - INFO - 	logger = <Logger FL-type (INFO)>
2019-12-15 03:39:49,075 - INFO - 	num_clients = 10
2019-12-15 03:39:49,076 - INFO - 	num_rounds = 1000000
2019-12-15 03:39:49,077 - INFO - 	num_epochs = 1
2019-12-15 03:39:49,078 - INFO - 	batch_size = 10
2019-12-15 03:39:49,079 - INFO - 	eval_every = 1
2019-12-15 03:39:49,080 - INFO - 	round_ddl = [20.0, 5.0]
2019-12-15 03:39:49,081 - INFO - 	update_frac = 0.5
2019-12-15 03:39:49,082 - INFO - 	big_upload_time = [5.0, 1.0]
2019-12-15 03:39:49,083 - INFO - 	mid_upload_time = [10.0, 1.0]
2019-12-15 03:39:49,084 - INFO - 	small_upload_time = [15.0, 1.0]
2019-12-15 03:39:49,085 - INFO - 	big_speed = [150.0, 1.0]
2019-12-15 03:39:49,089 - INFO - 	mid_speed = [100.0, 1.0]
2019-12-15 03:39:49,090 - INFO - 	small_speed = [50.0, 1.0]
2019-12-15 03:39:49,092 - INFO - Algorithm: MA 
2019-12-15 03:39:49,093 - INFO - Model: f

2019-12-15 03:42:12,209 - INFO - client 1, use time 12.93, upload successfully!
2019-12-15 03:42:12,212 - INFO - client 2, use time 23.42, failed: timeout!
2019-12-15 03:42:12,215 - INFO - client 3, use time 22.98, failed: timeout!
2019-12-15 03:42:12,829 - INFO - client 4, use time 14.34, upload successfully!
2019-12-15 03:42:13,326 - INFO - client 5, use time 13.88, upload successfully!
2019-12-15 03:42:13,856 - INFO - client 6, use time 19.11, upload successfully!
2019-12-15 03:42:14,443 - INFO - client 7, use time 21.81, upload successfully!
2019-12-15 03:42:15,022 - INFO - client 8, use time 20.56, upload successfully!
2019-12-15 03:42:15,671 - INFO - client 9, use time 21.55, upload successfully!
2019-12-15 03:42:15,675 - INFO - client 10, use time 22.20, failed: timeout!
2019-12-15 03:42:15,676 - INFO - Time: 107.64
2019-12-15 03:42:15,677 - INFO - -------------------------- update -------------------------
2019-12-15 03:42:15,678 - INFO - 7 of 10 clients upload successfully!
20

2019-12-15 03:42:35,751 - INFO - client 1, use time 13.35, upload successfully!
2019-12-15 03:42:36,350 - INFO - client 2, use time 21.35, upload successfully!
2019-12-15 03:42:37,003 - INFO - client 3, use time 20.95, upload successfully!
2019-12-15 03:42:37,589 - INFO - client 4, use time 14.72, upload successfully!
2019-12-15 03:42:38,092 - INFO - client 5, use time 12.60, upload successfully!
2019-12-15 03:42:38,629 - INFO - client 6, use time 20.97, upload successfully!
2019-12-15 03:42:39,188 - INFO - client 7, use time 22.38, upload successfully!
2019-12-15 03:42:39,755 - INFO - client 8, use time 21.78, upload successfully!
2019-12-15 03:42:40,382 - INFO - client 9, use time 22.78, upload successfully!
2019-12-15 03:42:40,937 - INFO - client 10, use time 22.24, upload successfully!
2019-12-15 03:42:40,938 - INFO - Time: 220.14
2019-12-15 03:42:40,939 - INFO - -------------------------- update -------------------------
2019-12-15 03:42:40,940 - INFO - 10 of 10 clients upload suc

2019-12-15 03:42:47,988 - INFO - client 3, use time 21.08, failed: timeout!
2019-12-15 03:42:48,610 - INFO - client 4, use time 14.27, upload successfully!
2019-12-15 03:42:49,086 - INFO - client 5, use time 12.61, upload successfully!
2019-12-15 03:42:49,089 - INFO - client 6, use time 20.54, failed: timeout!
2019-12-15 03:42:49,091 - INFO - client 7, use time 20.73, failed: timeout!
2019-12-15 03:42:49,094 - INFO - client 8, use time 22.01, failed: timeout!
2019-12-15 03:42:49,097 - INFO - client 9, use time 23.11, failed: timeout!
2019-12-15 03:42:49,099 - INFO - client 10, use time 22.99, failed: timeout!
2019-12-15 03:42:49,100 - INFO - Time: 295.29
2019-12-15 03:42:49,101 - INFO - -------------------------- update -------------------------
2019-12-15 03:42:49,102 - INFO - 3 of 10 clients upload successfully!
2019-12-15 03:42:49,103 - INFO - round failed.
2019-12-15 03:42:49,105 - INFO - --------------------------- test --------------------------
2019-12-15 03:42:49,386 - INFO - T

2019-12-15 03:43:08,027 - INFO - client 4, use time 15.20, upload successfully!
2019-12-15 03:43:08,517 - INFO - client 5, use time 12.95, upload successfully!
2019-12-15 03:43:09,059 - INFO - client 6, use time 22.01, upload successfully!
2019-12-15 03:43:09,635 - INFO - client 7, use time 20.75, upload successfully!
2019-12-15 03:43:10,206 - INFO - client 8, use time 20.43, upload successfully!
2019-12-15 03:43:10,209 - INFO - client 9, use time 23.51, failed: timeout!
2019-12-15 03:43:10,789 - INFO - client 10, use time 21.77, upload successfully!
2019-12-15 03:43:10,791 - INFO - Time: 406.27
2019-12-15 03:43:10,792 - INFO - -------------------------- update -------------------------
2019-12-15 03:43:10,793 - INFO - 7 of 10 clients upload successfully!
2019-12-15 03:43:10,794 - INFO - round succeed, updating global model...
2019-12-15 03:43:10,814 - INFO - --------------------------- test --------------------------
2019-12-15 03:43:11,110 - INFO - Train_acc: 0.067  Train_loss: 4.040

2019-12-15 03:43:22,864 - INFO - client 6, use time 19.67, upload successfully!
2019-12-15 03:43:23,419 - INFO - client 7, use time 22.60, upload successfully!
2019-12-15 03:43:23,960 - INFO - client 8, use time 22.00, upload successfully!
2019-12-15 03:43:24,588 - INFO - client 9, use time 23.25, upload successfully!
2019-12-15 03:43:25,135 - INFO - client 10, use time 20.16, upload successfully!
2019-12-15 03:43:25,136 - INFO - Time: 500.74
2019-12-15 03:43:25,137 - INFO - -------------------------- update -------------------------
2019-12-15 03:43:25,138 - INFO - 10 of 10 clients upload successfully!
2019-12-15 03:43:25,139 - INFO - round succeed, updating global model...
2019-12-15 03:43:25,165 - INFO - --------------------------- test --------------------------
2019-12-15 03:43:25,452 - INFO - Train_acc: 0.067  Train_loss: 4.034
2019-12-15 03:43:25,562 - INFO - Test_acc: 0.072  Test_loss: 4.023
2019-12-15 03:43:25,564 - INFO - --------------------- select deadline ----------------

2019-12-15 03:43:39,375 - INFO - client 7, use time 21.83, failed: timeout!
2019-12-15 03:43:39,379 - INFO - client 8, use time 21.79, failed: timeout!
2019-12-15 03:43:39,381 - INFO - client 9, use time 21.53, failed: timeout!
2019-12-15 03:43:39,384 - INFO - client 10, use time 22.55, failed: timeout!
2019-12-15 03:43:39,385 - INFO - Time: 587.25
2019-12-15 03:43:39,386 - INFO - -------------------------- update -------------------------
2019-12-15 03:43:39,387 - INFO - 2 of 10 clients upload successfully!
2019-12-15 03:43:39,388 - INFO - round failed.
2019-12-15 03:43:39,391 - INFO - --------------------------- test --------------------------
2019-12-15 03:43:39,738 - INFO - Train_acc: 0.067  Train_loss: 4.024
2019-12-15 03:43:39,859 - INFO - Test_acc: 0.072  Test_loss: 4.012
2019-12-15 03:43:39,861 - INFO - --------------------- select deadline ---------------------
2019-12-15 03:43:39,863 - INFO - selected deadline: 25.47
2019-12-15 03:43:39,864 - INFO - --------------------------

2019-12-15 03:43:57,795 - INFO - client 8, use time 22.99, failed: timeout!
2019-12-15 03:43:57,798 - INFO - client 9, use time 23.31, failed: timeout!
2019-12-15 03:43:57,800 - INFO - client 10, use time 23.80, failed: timeout!
2019-12-15 03:43:57,801 - INFO - Time: 693.66
2019-12-15 03:43:57,802 - INFO - -------------------------- update -------------------------
2019-12-15 03:43:57,803 - INFO - 3 of 10 clients upload successfully!
2019-12-15 03:43:57,804 - INFO - round failed.
2019-12-15 03:43:57,806 - INFO - --------------------------- test --------------------------
2019-12-15 03:43:58,071 - INFO - Train_acc: 0.067  Train_loss: 4.001
2019-12-15 03:43:58,167 - INFO - Test_acc: 0.072  Test_loss: 3.985
2019-12-15 03:43:58,169 - INFO - --------------------- select deadline ---------------------
2019-12-15 03:43:58,170 - INFO - selected deadline: 18.10
2019-12-15 03:43:58,171 - INFO - -------------------------- train --------------------------
2019-12-15 03:43:58,749 - INFO - client 1,

2019-12-15 03:44:16,541 - INFO - client 9, use time 20.33, upload successfully!
2019-12-15 03:44:16,544 - INFO - client 10, use time 22.49, failed: timeout!
2019-12-15 03:44:16,545 - INFO - Time: 792.02
2019-12-15 03:44:16,547 - INFO - -------------------------- update -------------------------
2019-12-15 03:44:16,548 - INFO - 6 of 10 clients upload successfully!
2019-12-15 03:44:16,549 - INFO - round succeed, updating global model...
2019-12-15 03:44:16,570 - INFO - --------------------------- test --------------------------
2019-12-15 03:44:16,940 - INFO - Train_acc: 0.067  Train_loss: 3.972
2019-12-15 03:44:17,057 - INFO - Test_acc: 0.072  Test_loss: 3.953
2019-12-15 03:44:17,060 - INFO - --------------------- select deadline ---------------------
2019-12-15 03:44:17,061 - INFO - selected deadline: 16.33
2019-12-15 03:44:17,062 - INFO - -------------------------- train --------------------------
2019-12-15 03:44:17,709 - INFO - client 1, use time 13.67, upload successfully!
2019-12-