In [1]:
import os
import pandas as pd
from box import Box
import json
from typing import Callable

In [10]:
def read_summaryfile(output_dir:str, condition:Callable = None):
    path_summary = os.path.join(output_dir, "experiment_summary.csv")
    usr_home = path_summary.replace('/',' ').split()
    #usr_homedir = f"{usr_home[0]}/{usr_home[1]}"
    usr_homedir = f"{usr_home[0]}/{usr_home[1]}/Documents/grenoble-code/fl-flower"
    summary = pd.read_csv(
        path_summary, 
        parse_dates=[
            "timestamps.end_experiment_after_sleep", 
            "timestamps.end_experiment", 
            "timestamps.start_experiment", 
            "timestamps.start_experiment_before_sleep"
            ],
        date_format='%Y-%m-%d_%H:%M:%S_%f')
    summary = match_folder_csv(summary, output_dir)
    # Filter by parameters
    summary["result_folder"] = summary["result_folder"].apply(lambda x: x.replace("root",usr_homedir))
    if condition is not None: 
        summary = summary.loc[condition(summary)]
    # Filter by subfolder
    folder_path = summary["result_folder"].values.tolist()
    for path in folder_path:
        if os.path.isdir(path):
            nb_subfolder = len(os.listdir(path))
            if nb_subfolder == 11:
                continue
            else:
                print(f"Not enough result, remove {path} from summary")
                summary = summary[summary["result_folder"] != path]
    return summary

def match_folder_csv(summaryfile, output_dir):
    correct_file = os.listdir(output_dir)
    summaryfile = summaryfile[summaryfile["result_folder"].apply(lambda x: x.split("/")[-1] in correct_file)]
    return summaryfile

def filter_epochs(summaryfile, epochs_list):
    place_holder = Box()
    exp_summary = []
    for e in epochs_list:
        summary_e = summaryfile[summaryfile["client.local_epochs"] == e]
        #print(f"Epoch {e} has {len(summary_e)} experiments")
        if summary_e.shape[0] > 5:
            summary_e = summary_e.iloc[:-1]
        epochs_path = summary_e["result_folder"].values.tolist()
        place_holder[f'epoch_{e}'] = Box(summary=summary_e, path=epochs_path)
        exp_summary.append(summary_e)
    summary_df = pd.concat(exp_summary).reset_index(drop=True)
    return place_holder, summary_df

In [11]:
import re
def create_epochs_dict(by_epochs):
    cols_to_keep_summary = ["result_folder",
                            "server", 
                            "timestamps.start_experiment_before_sleep",
                            "timestamps.start_experiment",
                            "timestamps.end_experiment",
                            "timestamps.end_experiment_after_sleep",
                            "client.local_epochs"]
    epochs_dict = {}
    for epoch in by_epochs.keys():
        byhost = {}
        for i, path in enumerate(by_epochs.__getattr__(epoch).path):
            byhost.setdefault(f'exp_{i}', {})
            params = by_epochs.__getattr__(epoch).summary[by_epochs.__getattr__(epoch).summary["result_folder"] == path]
            params = params[cols_to_keep_summary]
            byhost[f'exp_{i}']['summary'] = params.to_dict(orient='records')[0]        
            subfolder = [(subfold.split('/')[-1], os.path.join(path, f'{subfold}')) for subfold in os.listdir(path)]
            for k in range(len(subfolder)):
                client_name = subfolder[k][0]
                client_name = client_name.replace('client_host','client')
                client_path = subfolder[k][1]
                files = os.listdir(client_path)
                for e,file in enumerate(files):
                    if file == 'client.log' :
                        files[e] = 'client_log'
                    elif file == 'server.log':
                        files[e] = 'server_log'
                    elif file == 'client_pids.csv':
                        files[e] = 'client_pid'
                    elif file == 'client_sparse.log':
                        files[e] = 'client_sparse'
                    else:
                        files[e] = re.split('[._]', file)[0]
                result_files = [(name, os.path.join(client_path,file)) for name,file in zip(files,os.listdir(client_path))]
                for file_name, file_path in result_files:
                    byhost[f'exp_{i}'].setdefault(client_name, {}).setdefault(file_name,file_path)
                #byhost[f'exp_{i}'][client_name] = subfolder[k][1]
        epochs_dict[epoch] = byhost
    return epochs_dict

In [22]:
parent_path = "/Users/Slaton/Documents/grenoble-code/fl-flower/energyfl/outputcifar10/10clients/comm"

def mycondition1(summary):
    return (
        ((summary["client.local_epochs"] == 1) & (summary["params.num_rounds"] == 300) & (summary["client.lr"]==0.0316)) |
        ((summary["client.local_epochs"].isin([3, 5])) & (summary["params.num_rounds"] == 100) & (summary["client.lr"]==0.0316))
    )

def mycondition2(summary):
    return (
        (summary["client.local_epochs"].isin([1,3,5])) & 
        ((summary["client.lr"]==0.0316) | (summary["client.lr"]==0.01))
    )
    
def create_json_file(strategies, parent_path,condition,split='labelskew'):
    strategy_dict = {}
    epoch_list = [1,3,5]
    for strategy in strategies:
        path = os.path.join(parent_path, strategy, split)
        #summary_path = os.path.join(path, "experiment_summary.csv")
        summary = read_summaryfile(path, condition=condition)
        by_epochs, summary_epochs = filter_epochs(summary, epoch_list)
        strategy_dict.setdefault(strategy, {}).setdefault('exp_summary', summary_epochs.to_dict(orient='records'))
        strategy_dict[strategy]['split_epoch'] = create_epochs_dict(by_epochs)
        #strategy_dict[strategy] = create_epochs_dict(by_epochs)
    return strategy_dict

In [23]:
strategies = ['fedavg','fedadam','fedadagrad','fedyogi','fedsfw']
strategy_dict = create_json_file(strategies, parent_path, condition=mycondition2)

Not enough result, remove /Users/Slaton/Documents/grenoble-code/fl-flower/energyfl/outputcifar10/10clients/comm/fedavg/labelskew/2024-04-26_01-39-25 from summary
Not enough result, remove /Users/Slaton/Documents/grenoble-code/fl-flower/energyfl/outputcifar10/10clients/comm/fedavg/labelskew/2024-04-26_04-43-45 from summary


In [24]:
strategy_dict

{'fedavg': {'exp_summary': [],
  'split_epoch': {'epoch_1': {}, 'epoch_3': {}, 'epoch_5': {}}},
 'fedadam': {'exp_summary': [{'result_folder': '/Users/Slaton/Documents/grenoble-code/fl-flower/energyfl/outputcifar10/10clients/comm/fedadam/labelskew/2024-04-29_20-03-41',
    'energy_file': 'energy.csv',
    'server': 'estats-10.toulouse.grid5000.fr',
    'sleep_duration': 30,
    'neuralnet': 'ResNet18',
    'strategy': 'fedadam',
    'optimizer': 'SGD',
    'timestamps.start_experiment_before_sleep': '2024-04-29_20-03-52_120072',
    'timestamps.start_experiment': '2024-04-29_20-04-22_150575',
    'timestamps.end_experiment': '2024-04-29_21-12-35_840554',
    'timestamps.end_experiment_after_sleep': '2024-04-29_21-13-05_869404',
    'params.num_rounds': 100,
    'params.num_clients_per_round_fit': 2,
    'params.num_clients_per_round_eval': 2,
    'params.min_available_clients': 2,
    'params.fraction_fit': 1,
    'params.fraction_evaluate': 1,
    'params.wait_round': 100,
    'params

In [20]:
strategy_dict['fedsfw']['split_epoch']['epoch_1']

{'exp_0': {'summary': {'result_folder': '/Users/Slaton/Documents/grenoble-code/fl-flower/energyfl/outputcifar10/10clients/comm/fedsfw/labelskew/2024-05-02_20-50-34',
   'server': 'estats-10.toulouse.grid5000.fr',
   'timestamps.start_experiment_before_sleep': '2024-05-02_20-50-44_015705',
   'timestamps.start_experiment': '2024-05-02_20-51-14_035603',
   'timestamps.end_experiment': '2024-05-02_23-46-21_843098',
   'timestamps.end_experiment_after_sleep': '2024-05-02_23-46-51_844096',
   'client.local_epochs': 1},
  'client_9': {'processes': '/Users/Slaton/Documents/grenoble-code/fl-flower/energyfl/outputcifar10/10clients/comm/fedsfw/labelskew/2024-05-02_20-50-34/client_host_9/processes.csv',
   'fitresult': '/Users/Slaton/Documents/grenoble-code/fl-flower/energyfl/outputcifar10/10clients/comm/fedsfw/labelskew/2024-05-02_20-50-34/client_host_9/fitresult_client_9.csv',
   'network': '/Users/Slaton/Documents/grenoble-code/fl-flower/energyfl/outputcifar10/10clients/comm/fedsfw/labelskew/2

In [6]:
#Check if match between two keys of each strategies
strategy = 'fedyogi'
for i in range(15):
    e = 1 if i < 5 else 3 if 5 <= i < 10 else 5
    val_1 = strategy_dict[strategy]['split_epoch'][f'epoch_{e}'][f'exp_{i%5}']['summary']['result_folder']
    val_2 = pd.DataFrame(strategy_dict[strategy]['exp_summary'])['result_folder'].iloc[i]
    assert val_1 == val_2

In [148]:
# path_10clients = "/home/tunguyen/energyfl/outputcifar10/10clients"
# with open(os.path.join(path_10clients,'experiments.json'), 'w') as f:
#     json.dump(strategy_dict, f)

In [7]:
with open('./experiments.json', 'w') as f:
    json.dump(strategy_dict, f)

In [152]:
# import os

# file_path = os.path.join(path_10clients, 'experiments.json')
# print(os.path.exists(file_path))

True
