In [None]:
%matplotlib inline  

import os
import re
import numpy as np
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import json
from result_parser import get_exp_result


fig_width = 10
plt.rcParams["figure.figsize"] = (fig_width,6)
plt.rcParams['font.size'] = 22

In [None]:
data_distr_file = 'clsimb_dict_users.pkl'
result_folder = 'cossim_cls_imb'
acc_threshold = 80
dataset = 'cifar10'
x_labels = []
#bar_color = ['#ff7f0e', '#2ca02c']
#acc_result = [] # algos x configs
metrics = {'ACC':[], 'TTA_epoch':[], 'TTA_time':[]}
y_labels = ['Final Accuracy(%)',
            'Epochs to Reach {}% Best Acc'.format(acc_threshold),
            'Time to Reach {}% Best Acc'.format(acc_threshold)]

for shard_per_user in [10, 2]:
    for frac in [0.1, 0.05]:
        x_labels.append('s{}\nc{}'.format(shard_per_user, frac))
        algos, color_algos, bacc_algos, required_time = get_exp_result(
            dataset, data_distr_file, result_folder, 
            shard_per_user, frac, acc_threshold=92)
        
        metrics['ACC'].append(bacc_algos)
        metrics['TTA_epoch'].append(required_time['epoch'])
        metrics['TTA_time'].append(required_time['time_simu'])
        

# summary 
x = np.arange(len(x_labels))
for i, (k, v) in enumerate(metrics.items()):
    plt.figure(figsize=(10, 6))
    plt.title('{} - {}'.format(dataset, result_folder))
    cur_algos = algos
    cur_v_T = v_T = np.array(v).T
    if 'TTA' in k:
        cur_algos = algos[1:]
        cur_v_T = v_T[1:]
    for aidx, algo in enumerate(cur_algos):
        w = 0.9/len(cur_algos)
        if k == 'ACC':
            p = plt.bar(x+aidx*w-0.45+w/2, cur_v_T[aidx], label=algo, width=w)
        else:
            p = plt.bar(x+aidx*w-0.45+w/2, cur_v_T[aidx], label=algo, 
                        width=w, color=color_algos[aidx])
            
    plt.ylabel(y_labels[i])
    plt.xticks(x, x_labels)#, rotation='vertical')
    plt.legend(fancybox=True, shadow=True, fontsize=16,
                          loc='lower left', bbox_to_anchor=(1, 0))

In [None]:
deg = 1
win_size = 100
win_offset = 100
loss = df_utility['algo3_deg{}_e0.5_wof100'.format(deg)]['loss_avg']
plt.figure()
plt.title('{} - shard{} - select{}*{}'.format(run, shard_per_user, num_users, frac))
plt.plot(range(1000), 1/loss, label='1/loss_avg')
plt.plot(range(1000), loss, label='loss_avg')
MA = loss.rolling(100).mean()
#VAR = loss.rolling(10).var()
#print(loss.rolling(100).mean().iloc[98:110])
plt.plot(range(1000), MA, label='loss_avg 100MA')
#plt.plot(range(1000), VAR, label='1/loss_avg VAR')
#print((MA.pct_change(periods=50)+1).iloc[99:150])
#plt.plot(range(1000), (MA.pct_change(periods=50)+1)**3, label='1/loss_avg change')

mov_sum = np.zeros(1000-100)
# mov_sum[i] = sum(loss_avg[i:i+args.wndw_size])
mov_ratio = np.zeros(1000-100-win_offset)
for iter in range(1000):
    if iter >= 100: # start calculate mov_sum
            mov_sum[iter-100] = sum(loss[iter-100:iter])

    if iter >= 100+win_offset: # start calculate mov_ratio
        midx = iter-100-win_offset
        mov_ratio[midx] = mov_sum[midx+win_offset]/mov_sum[midx]

#for deg in [1,3,5,7,9]:
#   plt.plot(range(150, 1000), mov_ratio**deg, label='mov_ratio^{}'.format(deg))
plt.plot(range(100+win_offset, 1000), (mov_ratio+0.01)**deg, label='mov_ratio^{}'.format(deg))
#plt.plot(range(1000), (1/loss)-0.3*loss, label='(1/loss)-0.3*loss')
#plt.plot(range(1000), (1/loss)**2, label='(1/loss_avg)^2')
plt.ylabel('adaptive gamma')
plt.xlabel('epoch')
plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15),
           fancybox=True, shadow=True, ncol=4)

### 5/100

In [None]:
frac = 0.05

acc_fed = np.zeros(len(runs))
acc_utility = np.zeros(len(runs))
acc_local_localtest = np.zeros(len(runs))
acc_local_newtest_avg = np.zeros(len(runs))
acc_local_newtest_ens = np.zeros(len(runs))
lg_metrics = {}
bacc_algos = []

'''
fig, axs = plt.subplots(len(runs)+1, sharex=True, figsize=(fig_width, 12))
fig.suptitle('dataset size | selection cnt - shard' + str(shard_per_user))
axs[0].plot(range(len(local_data_size)), local_data_size,label='local data size')
'''

for idx, run in enumerate(runs):
    print('=== ', run, ' ===')
    # FedAvg
    
    base_dir_fed = os.path.join(base_dir, "{}/fedavg_c{}".format(run, frac))
    if os.path.exists(base_dir_fed):
        results_path_fed = os.path.join(base_dir_fed, "results.csv")
        cossim_path_fed = os.path.join(base_dir_fed, "cossim_glob_uni.csv")
        cossim_glob_uni_fed = np.genfromtxt(cossim_path_fed, delimiter=',')
        
        df_fed = pd.read_csv(results_path_fed)
        df_fed['cossim_glob_uni'] = cossim_glob_uni_fed
        #print(df_fed.head(20))
        df_fed = df_fed[:global_ep]
        #print(df_fed.shape)
        acc_fed[idx] = df_fed.loc[df_fed.shape[0]-1]['best_acc']
        print('fedavg, best_acc', acc_fed[idx], '===================')
        print('')
        bacc_algos.append(acc_fed[idx])
    else:
        print('No random selection training result.')

    # fedavg w/ utility selection
    #gamma_opt = [0.1, 0.2, 0.4, 0.8, 1.6]
    gamma_opt = [1.6]
    gamma_path = ['gamma_3090/{}/algo1'.format(r) for r in gamma_opt]
    #utility_algos = ['utility_cossim', 'algo1', 'algo2','algo2_2']
    #utility_algos = ['utility_cossim', 'var_utility', 'algo1_r1.6','algo2_r1.6']
    
    if shard_per_user == 10:
        #utility_algos = ['utility_cossim', 'algo1_r1.6', 'algo2_r1.6']
        utility_algos = ['utility_c0.05', '_algo1_r1.6_c0.05', 'algo2_r-loss_c0.05']
    elif shard_per_user == 2:
        #utility_algos = ['utility_cossim', 'algo1_r0.8', 'algo2_r0.8']
        utility_algos = ['utility_c0.05', '_algo1_r0.8_c0.05', 'algo2_r-loss_c0.05']
    #utility_algos.extend(gamma_path)
    #utility_algos = ['utility_c0.05', 'algo1_r0.1_c0.05', 'algo1_r1.0_c0.05', 'algo1_r2.0_c0.05', 'algo2_r-loss_c0.05']
    utility_algos = ['utility_c0.05',
                     'algo1_r0.1_c0.05', 'algo1_r1.0_c0.05','algo1_r2.0_c0.05'
                     #'algo2_r-loss_c0.05','algo2_r-loss2_c0.05'
                    ]
    #utility_algos = ['utility', 'algo1_r1.0', 'algo2_loss-ratio', 'algo5']
    utility_algos = ['utility', 
                     'algo1_r1.0',#'algo2_sq', 
                     #'algo2_loss-ratio',
                     #'algo3_deg1', 'algo3_deg3', 'algo3_deg5'
                     'algo5_deg5'
                     #'algo5_deg3', 'algo5_deg5', 'algo5_deg7', 'algo5_deg9'
                    ]
    
    df_utility = {} # algo -> df of result
    slctcnt = {}
    
    for algo in utility_algos:
        base_dir_utility = os.path.join(base_dir, '{}/{}'.format(run, algo))
        if os.path.exists(base_dir_utility):
            results_path_utility = os.path.join(base_dir_utility, "results.csv")
            slctcnt_path = os.path.join(base_dir_utility, "selection_cnt.csv")
            #cossim_path_utility = os.path.join(base_dir_utility, "cossim_glob_uni.csv")
            utility_path = os.path.join(base_dir_utility, "utility.csv")
            df_utility[algo] = pd.read_csv(results_path_utility)
            slctcnt[algo] = np.genfromtxt(slctcnt_path, delimiter=',')
            #cossim_glob_uni_utility = np.genfromtxt(cossim_path_utility, delimiter=',')
            #df_utility[algo]['cossim_glob_uni'] = cossim_glob_uni_utility
            #utility = np.genfromtxt(utility_path, delimiter='\n')

            #print(df_utility[algo].head(25))
            df_utility[algo] = df_utility[algo][:global_ep]
            #print(df_utility[algo].shape)
            acc_utility[idx] = df_utility[algo].loc[df_utility[algo].shape[0]-1]['best_acc']
            print(algo,', best_acc', acc_utility[idx], '===================')
            print('')
            bacc_algos.append(acc_utility[idx])
            
        else:
            print('No {} utility selection training result.'.format(algo))


    print('bacc summary:', bacc_algos)
    top_acc = max(bacc_algos)
    target_acc = top_acc * acc_threshold
    #['loss_avg', 'loss_test', 'acc_test', 'best_acc']
    plt.rcParams["figure.figsize"] = (fig_width,6)
    metrics = [('loss_avg', [0, 2.5]), 
               ('acc_test', [0, 100]), 
               ('best_acc', [0, 70])]
               #('cossim_glob_uni', [0, 1])]
    
    for col, yl in metrics:
        for x in ['epoch', 'time_simu']:
            plt.figure()
            ann_idx = 5
            plt.axvline(y=target_acc, color='grey')

            if os.path.exists(base_dir_fed):
                #plt.plot(df_fed[x], df_fed[col], label='random', marker='^')
                plt.plot(df_fed[x], df_fed[col], label='random')
                pass
            for algo in utility_algos:
                base_dir_utility = os.path.join(base_dir, '{}/{}'.format(run, algo))
                
                if os.path.exists(base_dir_utility):
                    #plt.plot(df_statsel[x], df_statsel[col], label='utility', marker='.')
                    plt.plot(df_utility[algo][x], df_utility[algo][col], label=algo)
                    
                    if col == 'acc_test' or col == 'best_acc':
                        bacc = df_utility[algo]['best_acc'].iloc[-1]
                        bepc = df_utility[algo]['best_acc'].idxmax()
                        px = df_utility[algo][x][bepc]
                        max_px = df_utility[algo][x].iloc[-1]
                        #plt.plot([px], [bacc], 'o')
                        plt.axvline(x=px, color='grey')
                        '''
                        plt.annotate('{}'.format(bacc), xy=(px, bacc),
                                     xytext=(px**2/max_px/2, 100*(1-px/max_px)*1.5),
                                     arrowprops=dict(color=(0,0,0,0.5), shrink=0.05, width=0.3))
                        '''
                        ann_idx -= 1
                    '''
                    # test
                    if col == 'best_acc' or col == 'loss_avg':
                        diff_bacc = df_utility[algo][col].diff()
                        ma_diff_bacc = diff_bacc.rolling(20).sum()
                        #print(ma_diff_bacc[:30])
                        plt.plot(df_utility[algo][x], diff_bacc, label=algo+' diff')
                        plt.plot(df_utility[algo][x], ma_diff_bacc, label=algo+' MA')
                        print('turning point of {}: {}'.format(algo, df_utility[algo][x][ma_diff_bacc.idxmin()]))
                    '''
            plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15),
                       fancybox=True, shadow=True, ncol=1+len(utility_algos))
            plt.ylabel(col)
            plt.xlabel(x)
            plt.ylim(yl)
            plt.title('{} - shard{} - select{}*{}'.format(run, shard_per_user, num_users, frac))
            #plt.title(run + ' - ' + str(shard_per_user) + ' class per client')
    
    
    plt.figure()
    for i, algo in enumerate(utility_algos):
        plt.bar(np.arange(len(slctcnt[algo]))+i*0.5, slctcnt[algo],
                width=0.4, label=algo + ' utility')
    plt.ylabel('selection cnt')
    plt.xlabel('client id')
    plt.legend()
    
    #axs[idx+1].bar(range(len(slctcnt['utility_cossim'])), slctcnt['utility_cossim'])
    '''
    with open(utility_path) as fp_utility:
        plot_selection(fp_utility)
    '''


# final acc plot for multiple runs
'''    
plt.rcParams["figure.figsize"] = (20,6)
#plt.rcParams['font.size'] = 22
plt.figure()
plt.title('final acc')
plt.plot(range(len(acc_fed)), acc_fed, label='random')    
plt.plot(range(len(acc_utility)), acc_utility, label=algo + ' utility')
plt.xlabel('run')
plt.ylabel('acc')
plt.legend()
'''

### impact of gamma (only valid for c0.1)

In [None]:
eq_gamma_opt = [0,]
eq_gamma_opt.extend(gamma_opt)
print(acc_algo)
plt.figure()
plt.plot(range(len(eq_gamma_opt)), acc_algo, marker='o')
plt.xticks(np.arange(len(eq_gamma_opt)), eq_gamma_opt)
plt.xlabel('gamma option')
plt.ylabel('final best acc')
plt.title('{} - shard{} - select{}*{}'.format(run, shard_per_user, num_users, frac))


In [None]:
plt.figure()
algo_list = ['algo1_r1.6', 'algo2_r1.6']
for algo in algo_list:
    plt.plot(df_utility[algo]['epoch'], df_utility[algo]['mov_sum'], label=algo+ ' utility')


In [None]:
# plot t_i
ti_path = './save/user_config/var_time/{}_{}.csv'.format(dataset, num_users)
ti_all = np.genfromtxt(ti_path, delimiter=',')

plt.figure()
for ep in range(10):
    plt.plot(range(num_users), ti_all[:, ep], label='round'+str(ep))
    plt.legend()

print(ti_all[:,0])
plt.figure()
for user in range(2):
    plt.plot(range(global_ep), ti_all[user], label='client'+str(user))
    plt.legend()
