In [20]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [21]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
plt.style.use('seaborn-whitegrid')
import pandas as pd
from plot import *
from analyze import *
from analyze_budget import *

  plt.style.use('seaborn-whitegrid')


In [22]:
def postprocess_df(series, exclude_keys=None):
    """
    Takes a pandas Series with a MultiIndex and folds over all keys except for those specified
    in `exclude_keys` by concatenating the key name and its value into the 'algo' column values.

    Parameters:
    - series: pandas.Series with a MultiIndex.
    - exclude_keys: list of strings representing keys to exclude from the folding process.

    Returns:
    - A DataFrame with 'algo' and 'dataset' columns, where 'algo' has been modified to include
      information from other keys.
    """

    if exclude_keys is None:
      exclude_keys=['algo', 'dataset', 'final_acc']
    # Convert the Series into a DataFrame
    df = series.reset_index()
    
    # # Initialize a column to store the modified algo values
    df['modified_algo'] = df['algo']
    exclude_keys += ['modified_algo']
    
    # # Iterate over each level of the original MultiIndex (now columns in df)
    for key in df.columns:
        if key not in exclude_keys:
            # Append the key name and its value to the 'modified_algo' entries
            df['modified_algo'] = df['modified_algo'] + '_' + key + ':' + df[key].astype(str)
    
    # # Select and rename the relevant columns for the final DataFrame
    final_df = df[['modified_algo', 'dataset', series.name]].copy()
    final_df.rename(columns={'modified_algo': 'algo', series.name: 'value'}, inplace=True)
    
    return final_df

def aggregate_results(df, keys=None, metric=None, post_process=True,
                      error_type="sem"):
   if keys is None:
      keys = ["algo", "use_contrastive"]
   if metric is None:
      metric = "final_acc"
   keys += ["dataset"]
   m = df.groupby(keys)[
         metric].mean()
   if error_type == "sem":
      stderr = df.groupby(keys)[metric].sem()
   else:
      stderr = df.groupby(keys)[metric].std()
   if post_process:
      exclude_keys=["algo", "dataset", metric]
      m = postprocess_df(m, exclude_keys)
      stderr = postprocess_df(stderr, exclude_keys)
   return m, stderr

In [23]:
keys=None
error_type="sem"

In [24]:
def format_df_for_table(m):
    res = m.copy()
    # Remove the "_use_contrastive:False" part for cleaner extraction
    res['algo_clean'] = res['algo'].str.replace('_use_contrastive:False', '')

    # Now, extract 'base' and 'algorithm' accurately
    res['base'] = res['algo_clean'].str.extract(r'^([^_]+)')[0]
    res['algorithm'] = res['algo_clean'].str.extract(r'_(.+)$')[0]

    # Fill NaN in 'algorithm' with 'vanilla'
    res['algorithm'].fillna('vanilla', inplace=True)

    # Drop the columns we don't need anymore
    res.drop(['algo', 'algo_clean'], axis=1, inplace=True)

    res = res.pivot_table(index=['base', 'algorithm'], columns='dataset', values='value', aggfunc='first').reset_index()
    return res

In [25]:
def pickbest(m, stderr, algo_part):
    # Create empty DataFrames to hold the best configurations and their corresponding standard errors
    best_configs = pd.DataFrame()
    best_errors = pd.DataFrame()
    
    # Filter both DataFrames to include only the rows where the 'algo' column contains the algorithm part
    filtered_m = m[m['algo'].str.contains(algo_part)]
    filtered_stderr = stderr[stderr['algo'].str.contains(algo_part)]
    
    # Loop over each unique dataset
    for dataset in filtered_m['dataset'].unique():
        # Filter to only include rows for this dataset
        dataset_m = filtered_m[filtered_m['dataset'] == dataset]
        dataset_stderr = filtered_stderr[filtered_stderr['dataset'] == dataset]
        
        # Find the index of the row with the highest 'mean' value in this subset
        best_index = dataset_m['value'].idxmax()
        
        # Append the row at best_index to the best_configs DataFrame and the corresponding errors
        best_configs = best_configs.append(dataset_m.loc[best_index])
        best_errors = best_errors.append(dataset_stderr.loc[best_index])
    
    # Reset index of the resulting DataFrames for cleanliness
    best_configs.reset_index(drop=True, inplace=True)
    best_errors.reset_index(drop=True, inplace=True)
    
    return best_configs, best_errors


In [26]:
no_sparse = True
add_data_backward = True
make_new_opt = False

remap = {

# f'modmod_no_sparse_{no_sparse}_recv_mod_add_data_backward_{add_data_backward}_make_new_opt_{make_new_opt}': 'modmod',
# f'recv_data_no_sparse_{no_sparse}_recv_mod_add_data_backward_{add_data_backward}_make_new_opt_{make_new_opt}': 'data',
# f'grad_sharing_no_sparse_{no_sparse}_recv_mod_add_data_backward_{add_data_backward}_make_new_opt_{make_new_opt}': 'fed',

# f'modmod+recv_data_no_sparse_{no_sparse}_recv_mod_add_data_backward_{add_data_backward}_make_new_opt_{make_new_opt}': 'modmod+data',
# f'modmod+grad_sharing_no_sparse_{no_sparse}_recv_mod_add_data_backward_{add_data_backward}_make_new_opt_{make_new_opt}': 'modmod+fed',
# f'recv_data+grad_sharing_no_sparse_{no_sparse}_recv_mod_add_data_backward_{add_data_backward}_make_new_opt_{make_new_opt}': 'data + fed',

# f'modmod+heuristic_data_no_sparse_{no_sparse}_recv_mod_add_data_backward_{add_data_backward}_make_new_opt_{make_new_opt}': 'modmod+data',
# f'modmod+grad_sharing_no_sparse_{no_sparse}_recv_mod_add_data_backward_{add_data_backward}_make_new_opt_{make_new_opt}': 'modmod+fed',
# f'heuristic_data+grad_sharing_no_sparse_{no_sparse}_recv_mod_add_data_backward_{add_data_backward}_make_new_opt_{make_new_opt}': 'data+fed',

f'recv_data+modmod+grad_sharing_no_sparse_{no_sparse}_recv_mod_add_data_backward_{add_data_backward}_make_new_opt_{make_new_opt}': 'modmod + data + fed',
f'heuristic_data+modmod+grad_sharing_no_sparse_{no_sparse}_recv_mod_add_data_backward_{add_data_backward}_make_new_opt_{make_new_opt}': 'modmod + data + fed',

# 'heuristic_data': 'data',
# 'heuristic_data+grad_sharing': 'data + fed',
}

In [27]:
def load_modular_combine(metric, only_all=False):
    df = pd.read_csv('combine_modes_results.csv')
    cifar_df = pd.read_csv('cifar_combine_modes_results.csv')
    df = pd.concat([cifar_df, df])
    m, stderr = aggregate_results(df, keys=keys, metric=metric, error_type=error_type)
    m = format_df_for_table(m)
    stderr = format_df_for_table(stderr)
    m['algorithm'] = m['algorithm'].map(remap)
    stderr['algorithm'] = stderr['algorithm'].map(remap)
    m = m[m['base'] == 'modular']
    # hack
    if only_all:
        m = m[m['algorithm'] == 'modmod + data + fed']
    stderr = stderr[stderr['base'] == 'modular']
    # hack
    if only_all:
        stderr = stderr[stderr['algorithm'] == 'modmod + data + fed']
    m = m[~pd.isna(m['algorithm'])]
    stderr = stderr[~pd.isna(stderr['algorithm'])]
    m = m.groupby(['base', 'algorithm']).mean().reset_index()
    stderr = stderr.groupby(['base', 'algorithm']).mean().reset_index()
    return m, stderr


In [28]:
def load_monolithic_combine(metric):
    df = pd.read_csv('monolithic_combine_modes_results.csv')
    cifar_df = pd.read_csv('cifar100_monolithic_combine_modes_results.csv')
    df = pd.concat([cifar_df, df])
    m, stderr = aggregate_results(df, keys=keys, metric=metric, error_type=error_type)
    m = format_df_for_table(m)
    stderr = format_df_for_table(stderr)
    m['algorithm'] = m['algorithm'].map(remap)
    stderr['algorithm'] = stderr['algorithm'].map(remap)
    m = m[m['base'] == 'monolithic']
    stderr = stderr[stderr['base'] == 'monolithic']
    m = m[~pd.isna(m['algorithm'])]
    stderr = stderr[~pd.isna(stderr['algorithm'])]
    m = m.groupby(['base', 'algorithm']).mean().reset_index()
    stderr = stderr.groupby(['base', 'algorithm']).mean().reset_index()
    return m, stderr

In [29]:
def load_baseline(metric):
    df = pd.read_csv('experiment_results/vanilla_jorge_setting_basis_no_sparse.csv')
    df = df[df['use_contrastive'] == False]
    m, stderr = aggregate_results(df, keys=keys, metric=metric,
                                    error_type=error_type)


    m = format_df_for_table(m)
    stderr = format_df_for_table(stderr)
    return m, stderr

In [30]:
def load_recv(metric):
    # analyze('experiment_results/jorge_setting_recv')
    df = pd.read_csv('experiment_results/jorge_setting_recv.csv')
    df = df[df['use_contrastive'] == False]
    m, stderr = aggregate_results(df, keys=keys, metric=metric,
                                    error_type=error_type)


    m = format_df_for_table(m)
    stderr = format_df_for_table(stderr)
    m['algorithm'] = 'data'
    stderr['algorithm'] = 'data'
    return m, stderr

In [31]:
def load_modmod(metric):
    df = pd.read_csv('experiment_results/jorge_setting_lowest_task_id_wins_modmod_test_sync_base_True_opt_with_random_False_frozen_False_transfer_decoder_True_transfer_structure_True_no_sparse_basis_True.csv')
    df = df[df['use_contrastive'] == False]

    leep_df = pd.read_csv('experiment_results/leep_jorge_setting_lowest_task_id_wins_modmod_test_sync_base_True_opt_with_random_False_frozen_False_transfer_decoder_True_transfer_structure_True_no_sparse_basis_True.csv')
    leep_df = leep_df[leep_df['use_contrastive'] == False]
    leep_df = leep_df[leep_df['dataset'] == 'combined']

    df = pd.concat([df, leep_df])
    m, stderr = aggregate_results(df, keys=keys, metric=metric, error_type=error_type)
    m = format_df_for_table(m)
    stderr = format_df_for_table(stderr)
    m['algorithm'] = 'modmod'
    stderr['algorithm'] = 'modmod'
    return m, stderr


In [32]:
def load_fedavg(metric):
    df = pd.read_csv('experiment_results/jorge_setting_fedavg.csv')
    df = df[df['use_contrastive'] == False]
    m, stderr = aggregate_results(df, keys=keys, metric=metric, error_type=error_type)
    m = format_df_for_table(m)
    stderr = format_df_for_table(stderr)
    m['algorithm'] = 'fedavg'
    stderr['algorithm'] = 'fedavg'
    return m, stderr

In [33]:
def load_fl(metric):
    df = pd.read_csv('best_fl_results.csv')
    df = df[df['use_contrastive'] == False]
    m, stderr = aggregate_results(df, keys=keys, metric=metric, error_type=error_type)

    bases = ['modular', 'monolithic']
    fed_algos = ['fedcurv', 'fedprox']
    best_fls = []
    best_errs = []
    for base in bases:
        for fed_algo in fed_algos:
            best_fl, best_err = pickbest(m, stderr, f"{base}_{fed_algo}")
            best_fl['algo'] = f"{base}_{fed_algo}"
            best_err['algo'] = f"{base}_{fed_algo}"

            best_fls.append(best_fl)
            best_errs.append(best_err)

    best_fl = pd.concat(best_fls)
    best_err = pd.concat(best_errs)
    best_fl = format_df_for_table(best_fl)
    best_err = format_df_for_table(best_err)
    return best_fl, best_err


In [34]:
metric = 'auc'
funcs = [load_baseline, load_recv, load_fedavg, load_fl, load_modmod, load_modular_combine, load_monolithic_combine]
dfs = []
df_errs = []
for func in funcs:
    m, err = func(metric=metric)
    dfs.append(m)
    df_errs.append(err)
df_auc = pd.concat(dfs)
df_auc_err = pd.concat(df_errs)


# Resetting the index to fix any issues with non-unique or unordered indices
df_auc.reset_index(drop=True, inplace=True)
# Resetting the index to fix any issues with non-unique or unordered indices
df_auc_err.reset_index(drop=True, inplace=True)

if metric == 'final_acc':
    df_auc.loc[:, ~df_auc.columns.isin(['base', 'algorithm'])] *= 100
    df_auc_err.loc[:, ~df_auc_err.columns.isin(['base', 'algorithm'])] *= 100

  best_configs = best_configs.append(dataset_m.loc[best_index])
  best_errors = best_errors.append(dataset_stderr.loc[best_index])
  best_configs = best_configs.append(dataset_m.loc[best_index])
  best_errors = best_errors.append(dataset_stderr.loc[best_index])
  best_configs = best_configs.append(dataset_m.loc[best_index])
  best_errors = best_errors.append(dataset_stderr.loc[best_index])
  best_configs = best_configs.append(dataset_m.loc[best_index])
  best_errors = best_errors.append(dataset_stderr.loc[best_index])
  best_configs = best_configs.append(dataset_m.loc[best_index])
  best_errors = best_errors.append(dataset_stderr.loc[best_index])
  best_configs = best_configs.append(dataset_m.loc[best_index])
  best_errors = best_errors.append(dataset_stderr.loc[best_index])
  best_configs = best_configs.append(dataset_m.loc[best_index])
  best_errors = best_errors.append(dataset_stderr.loc[best_index])
  best_configs = best_configs.append(dataset_m.loc[best_index])
  best_errors = bes

In [35]:
make_table_v3(df_auc, df_auc_err)

Base,Algorithm,cifar100,combined,fashionmnist,kmnist,mnist
modular,vanilla,71.83095 +/- 0.13,88.04524 +/- 0.28,92.03035 +/- 0.32,80.62320 +/- 0.31,92.67858 +/- 0.22
monolithic,vanilla,65.59769 +/- 0.32,87.69488 +/- 0.28,93.27110 +/- 0.36,79.41837 +/- 0.31,93.10231 +/- 0.22
modular,data,73.42025 +/- 0.14,89.48271 +/- 0.25,92.96718 +/- 0.30,82.03789 +/- 0.29,93.89448 +/- 0.14
monolithic,data,67.23490 +/- 0.21,88.59379 +/- 0.28,94.71689 +/- 0.38,81.00830 +/- 0.29,94.54293 +/- 0.21
modular,fedavg,73.95755 +/- 0.14,87.99336 +/- 0.31,91.95554 +/- 0.45,80.09533 +/- 0.34,92.81523 +/- 0.23
monolithic,fedavg,69.10859 +/- 0.18,86.76124 +/- 0.31,93.59634 +/- 0.39,80.49006 +/- 0.35,93.75833 +/- 0.26
modular,fedcurv,73.94089 +/- 0.14,87.98533 +/- 0.29,92.32104 +/- 0.44,80.14174 +/- 0.38,92.75850 +/- 0.26
modular,fedprox,73.84597 +/- 0.13,88.02116 +/- 0.29,92.06747 +/- 0.46,80.20258 +/- 0.35,93.05266 +/- 0.21
monolithic,fedcurv,68.92815 +/- 0.17,86.86572 +/- 0.31,93.85225 +/- 0.40,80.39766 +/- 0.35,93.36926 +/- 0.31
monolithic,fedprox,69.09095 +/- 0.16,87.16168 +/- 0.31,93.99292 +/- 0.39,80.46526 +/- 0.35,93.64959 +/- 0.30


In [36]:
m, e = load_modular_combine(metric='auc')
m

dataset,base,algorithm,cifar100,combined,fashionmnist,kmnist,mnist
0,modular,modmod + data + fed,77.074436,90.408967,94.426507,82.895064,94.745248


In [37]:
metric = 'final_acc'
funcs = [load_baseline, load_recv, load_fedavg, load_fl, load_modmod, load_modular_combine, load_monolithic_combine]
dfs = []
df_errs = []
for func in funcs:
    m, err = func(metric=metric)
    dfs.append(m)
    df_errs.append(err)
df_final = pd.concat(dfs)
df_final_err = pd.concat(df_errs)


# Resetting the index to fix any issues with non-unique or unordered indices
df_final.reset_index(drop=True, inplace=True)
# Resetting the index to fix any issues with non-unique or unordered indices
df_final_err.reset_index(drop=True, inplace=True)

if metric == 'final_acc':
    df_final.loc[:, ~df_final.columns.isin(['base', 'algorithm'])] *= 100
    df_final_err.loc[:, ~df_final_err.columns.isin(['base', 'algorithm'])] *= 100

  best_configs = best_configs.append(dataset_m.loc[best_index])
  best_errors = best_errors.append(dataset_stderr.loc[best_index])
  best_configs = best_configs.append(dataset_m.loc[best_index])
  best_errors = best_errors.append(dataset_stderr.loc[best_index])
  best_configs = best_configs.append(dataset_m.loc[best_index])
  best_errors = best_errors.append(dataset_stderr.loc[best_index])
  best_configs = best_configs.append(dataset_m.loc[best_index])
  best_errors = best_errors.append(dataset_stderr.loc[best_index])
  best_configs = best_configs.append(dataset_m.loc[best_index])
  best_errors = best_errors.append(dataset_stderr.loc[best_index])
  best_configs = best_configs.append(dataset_m.loc[best_index])
  best_errors = best_errors.append(dataset_stderr.loc[best_index])
  best_configs = best_configs.append(dataset_m.loc[best_index])
  best_errors = best_errors.append(dataset_stderr.loc[best_index])
  best_configs = best_configs.append(dataset_m.loc[best_index])
  best_errors = bes

In [38]:
make_table_v3(df_final, df_final_err)

Base,Algorithm,cifar100,combined,fashionmnist,kmnist,mnist
modular,vanilla,70.03531 +/- 0.14,88.58490 +/- 0.36,92.80133 +/- 0.31,80.63172 +/- 0.31,93.46685 +/- 0.20
monolithic,vanilla,63.10844 +/- 0.32,86.58945 +/- 0.37,92.46734 +/- 0.33,78.22727 +/- 0.31,91.95880 +/- 0.27
modular,data,71.86266 +/- 0.16,89.03221 +/- 0.33,93.55734 +/- 0.28,82.59461 +/- 0.26,94.59515 +/- 0.12
monolithic,data,65.18141 +/- 0.17,87.81542 +/- 0.36,94.01612 +/- 0.35,80.70161 +/- 0.32,94.20244 +/- 0.17
modular,fedavg,70.98984 +/- 0.14,88.45254 +/- 0.37,92.45719 +/- 0.45,80.03750 +/- 0.35,93.56893 +/- 0.18
monolithic,fedavg,67.04531 +/- 0.16,86.56614 +/- 0.40,93.67609 +/- 0.39,80.55297 +/- 0.33,93.86382 +/- 0.20
modular,fedcurv,71.10688 +/- 0.13,88.45416 +/- 0.37,93.05695 +/- 0.40,80.33977 +/- 0.36,93.78444 +/- 0.19
modular,fedprox,71.81547 +/- 0.14,88.46861 +/- 0.37,92.92711 +/- 0.41,80.48461 +/- 0.35,93.92683 +/- 0.17
monolithic,fedcurv,66.79344 +/- 0.15,86.61881 +/- 0.41,93.74039 +/- 0.37,80.39203 +/- 0.34,93.66086 +/- 0.24
monolithic,fedprox,67.17703 +/- 0.17,86.85734 +/- 0.40,93.89984 +/- 0.35,80.25969 +/- 0.33,93.73559 +/- 0.23


In [43]:
# import pandas as pd

# def make_latex_table(df_final, df_final_err, df_auc, df_auc_err, remap_name=None):
#     if not remap_name:
#         remap_name = {}

#     # Define columns for the table display
#     columns = ['base', 'algorithm'] + [col for col in df_final.columns if col not in ('base', 'algorithm')]
#     max_final = df_final.max(numeric_only=True)  # Calculate max values for final accuracy columns to highlight best results
#     min_final = df_final.min(numeric_only=True)  # Calculate min values for final accuracy columns
#     max_auc = df_auc.max(numeric_only=True)  # Calculate max values for AUC columns to highlight best results
#     min_auc = df_auc.min(numeric_only=True)  # Calculate min values for AUC columns

#     # Start building the LaTeX table
#     latex = "\\begin{table}[ht]\n\\centering\n\\caption{Performance Metrics across Datasets and Algorithms}\n"
#     latex += "\\label{tab:performance_metrics}\n\\begin{adjustbox}{width=1.25\\textwidth}\n\\begin{tabular}{ll" + "c" * (len(columns) - 2) + "}\n\\toprule\n"
#     latex += " & ".join(["\\textbf{" + col.capitalize() + "}" for col in columns]) + " \\\\\n\\midrule\n"
    
#     # Organize data by 'base' first, then iterate
#     df_grouped = df_final.groupby('base')
#     for base, group in df_grouped:
#         first = True
#         for index, row in group.iterrows():
#             algorithm = remap_name.get(row['algorithm'], row['algorithm'])
#             row_items = [base if first else "", algorithm]  # Only show the base for the first entry
            
#             for dataset in columns[2:]:  # Start from 2 to skip 'base' and 'algorithm'
#                 final_value = df_final.loc[index, dataset]
#                 final_error = df_final_err.loc[index, dataset]
#                 auc_value = df_auc.loc[index, dataset]
#                 auc_error = df_auc_err.loc[index, dataset]

#                 # Determine if the values should be bold and colored
#                 final_str = f"{final_value:.2f} $\\pm$ {final_error:.2f}"
#                 auc_str = f"{auc_value:.2f} $\\pm$ {auc_error:.2f}"

#                 if final_value == max_final[dataset]:
#                     final_str = f"\\textbf{{\\textcolor{{Green}}{{{final_str}}}}}"
#                 elif final_value == min_final[dataset]:
#                     final_str = f"\\textbf{{\\textcolor{{red}}{{{final_str}}}}}"

#                 if auc_value == max_auc[dataset]:
#                     auc_str = f"\\textbf{{\\textcolor{{Green}}{{{auc_str}}}}}"
#                 elif auc_value == min_auc[dataset]:
#                     auc_str = f"\\textbf{{\\textcolor{{red}}{{{auc_str}}}}}"
                
#                 row_items.append(f"{final_str}/{auc_str}")  # Append the formatted string for this dataset
            
#             latex += " & ".join(row_items) + " \\\\\n"
#             first = False  # Subsequent rows won't show the 'base' again
    
#     latex += "\\bottomrule\n\\end{tabular}\n\\end{adjustbox}\n\\end{table}\n"
#     return latex


In [47]:
def make_latex_table(df_final, df_final_err, df_auc, df_auc_err, remap_name=None):
    if not remap_name:
        remap_name = {}

    # Define columns for the table display
    columns = ['base', 'algorithm'] + [col for col in df_final.columns if col not in ('base', 'algorithm')]
    max_final = df_final.max(numeric_only=True)  # Calculate max values for final accuracy columns to highlight best results
    max_auc = df_auc.max(numeric_only=True)  # Calculate max values for AUC columns to highlight best results
    
    # Start building the LaTeX table
    latex = "\\begin{table}[ht]\n\\centering\n\\caption{Performance Metrics across Datasets and Algorithms}\n"
    latex += "\\label{tab:performance_metrics}\n\\begin{adjustbox}{width=1.25\\textwidth}\n\\begin{tabular}{ll" + "c" * (len(columns) - 2) + "}\n\\toprule\n"
    latex += " & ".join(["\\textbf{" + col.capitalize() + "}" for col in columns]) + " \\\\\n\\midrule\n"
    
    # Organize data by 'base' first, then iterate
    df_grouped = df_final.groupby('base')
    for base, group in df_grouped:
        first = True
        for index, row in group.iterrows():
            algorithm = remap_name.get(row['algorithm'], row['algorithm'])
            row_items = [base if first else "", algorithm]  # Only show the base for the first entry
            
            for dataset in columns[2:]:  # Start from 2 to skip 'base' and 'algorithm'
                final_value = df_final.loc[index, dataset]
                final_error = df_final_err.loc[index, dataset]
                auc_value = df_auc.loc[index, dataset]
                auc_error = df_auc_err.loc[index, dataset]
                
                # Determine if the values should be bold
                final_str = f"{final_value:.2f} $\\pm$ {final_error:.2f}"
                auc_str = f"{auc_value:.2f} $\\pm$ {auc_error:.2f}"
                if final_value == max_final[dataset]:
                    final_str = f"\\textbf{{{final_str}}}"
                if auc_value == max_auc[dataset]:
                    auc_str = f"\\textbf{{{auc_str}}}"
                
                row_items.append(f"{final_str}/{auc_str}")  # Append the formatted string for this dataset
            
            latex += " & ".join(row_items) + " \\\\\n"
            first = False  # Subsequent rows won't show the 'base' again
    
    latex += "\\bottomrule\n\\end{tabular}\n\\end{adjustbox}\n\\end{table}\n"
    return latex


In [48]:
latex = make_latex_table(df_final, df_final_err, df_auc, df_auc_err)
print(latex)

\begin{table}[ht]
\centering
\caption{Performance Metrics across Datasets and Algorithms}
\label{tab:performance_metrics}
\begin{adjustbox}{width=1.25\textwidth}
\begin{tabular}{llccccc}
\toprule
\textbf{Base} & \textbf{Algorithm} & \textbf{Cifar100} & \textbf{Combined} & \textbf{Fashionmnist} & \textbf{Kmnist} & \textbf{Mnist} \\
\midrule
modular & vanilla & 70.04 $\pm$ 0.14/71.83 $\pm$ 0.13 & 88.58 $\pm$ 0.36/88.05 $\pm$ 0.28 & 92.80 $\pm$ 0.31/92.03 $\pm$ 0.32 & 80.63 $\pm$ 0.31/80.62 $\pm$ 0.31 & 93.47 $\pm$ 0.20/92.68 $\pm$ 0.22 \\
 & data & 71.86 $\pm$ 0.16/73.42 $\pm$ 0.14 & 89.03 $\pm$ 0.33/89.48 $\pm$ 0.25 & 93.56 $\pm$ 0.28/92.97 $\pm$ 0.30 & 82.59 $\pm$ 0.26/82.04 $\pm$ 0.29 & \textbf{94.60 $\pm$ 0.12}/93.89 $\pm$ 0.14 \\
 & fedavg & 70.99 $\pm$ 0.14/73.96 $\pm$ 0.14 & 88.45 $\pm$ 0.37/87.99 $\pm$ 0.31 & 92.46 $\pm$ 0.45/91.96 $\pm$ 0.45 & 80.04 $\pm$ 0.35/80.10 $\pm$ 0.34 & 93.57 $\pm$ 0.18/92.82 $\pm$ 0.23 \\
 & fedcurv & 71.11 $\pm$ 0.13/73.94 $\pm$ 0.14 & 88.45 $\pm$ 0.3

In [62]:
def make_auc_latex_table(df_auc, df_auc_err, remap_name=None):
    if not remap_name:
        remap_name = {}

    # Define columns for the table display
    columns = ['base', 'algorithm'] + [col for col in df_auc.columns if col not in ('base', 'algorithm')]
    max_auc = df_auc.max(numeric_only=True)  # Calculate max values for AUC columns to highlight best results
    
    # Start building the LaTeX table
    latex = "\\begin{table}[ht]\n\\centering\n\\caption{AUC Metrics across Datasets and Algorithms}\n"
    latex += "\\label{tab:auc_metrics}\n\\begin{adjustbox}{width=1.25\\textwidth}\n\\begin{tabular}{ll" + "c" * (len(columns) - 2) + "}\n\\toprule\n"
    latex += " & ".join(["\\textbf{" + col.capitalize() + "}" for col in columns]) + " \\\\\n\\midrule\n"
    
    # Organize data by 'base' first, then iterate
    df_grouped = df_auc.groupby('base')
    for base, group in df_grouped:
        first = True
        for index, row in group.iterrows():
            algorithm = remap_name.get(row['algorithm'], row['algorithm'])
            row_items = [base if first else "", algorithm]  # Only show the base for the first entry
            
            for dataset in columns[2:]:  # Start from 2 to skip 'base' and 'algorithm'
                auc_value = df_auc.loc[index, dataset]
                auc_error = df_auc_err.loc[index, dataset]
                
                # Determine if the values should be bold
                auc_str = f"{auc_value:.2f} $\\pm$ {auc_error:.2f}"
                if auc_value == max_auc[dataset]:
                    auc_str = f"\\textbf{{{auc_str}}}"
                
                row_items.append(auc_str)  # Append the formatted string for this dataset
            
            latex += " & ".join(row_items) + " \\\\\n"
            first = False  # Subsequent rows won't show the 'base' again
    
    latex += "\\bottomrule\n\\end{tabular}\n\\end{adjustbox}\n\\end{table}\n"
    return latex

In [63]:
latex = make_auc_latex_table(df_auc, df_auc_err)
print(latex)

\begin{table}[ht]
\centering
\caption{AUC Metrics across Datasets and Algorithms}
\label{tab:auc_metrics}
\begin{adjustbox}{width=1.25\textwidth}
\begin{tabular}{llccccc}
\toprule
\textbf{Base} & \textbf{Algorithm} & \textbf{Cifar100} & \textbf{Combined} & \textbf{Fashionmnist} & \textbf{Kmnist} & \textbf{Mnist} \\
\midrule
modular & vanilla & 71.83 $\pm$ 0.13 & 88.05 $\pm$ 0.28 & 92.03 $\pm$ 0.32 & 80.62 $\pm$ 0.31 & 92.68 $\pm$ 0.22 \\
 & data & 73.42 $\pm$ 0.14 & 89.48 $\pm$ 0.25 & 92.97 $\pm$ 0.30 & 82.04 $\pm$ 0.29 & 93.89 $\pm$ 0.14 \\
 & fedavg & 73.96 $\pm$ 0.14 & 87.99 $\pm$ 0.31 & 91.96 $\pm$ 0.45 & 80.10 $\pm$ 0.34 & 92.82 $\pm$ 0.23 \\
 & fedcurv & 73.94 $\pm$ 0.14 & 87.99 $\pm$ 0.29 & 92.32 $\pm$ 0.44 & 80.14 $\pm$ 0.38 & 92.76 $\pm$ 0.26 \\
 & fedprox & 73.85 $\pm$ 0.13 & 88.02 $\pm$ 0.29 & 92.07 $\pm$ 0.46 & 80.20 $\pm$ 0.35 & 93.05 $\pm$ 0.21 \\
 & modmod & 76.80 $\pm$ 0.13 & 89.62 $\pm$ 0.26 & 93.26 $\pm$ 0.41 & 81.78 $\pm$ 0.29 & 93.94 $\pm$ 0.19 \\
 & modmod + data +

In [60]:
import pandas as pd

def make_gap_table(df_final, df_final_err, df_auc, df_auc_err):
    # Define columns for the gap computation
    columns = [col for col in df_final.columns if col not in ('base', 'algorithm')]
    gaps_final = {}
    gaps_auc = {}

    # Calculate the gaps for each dataset
    for col in columns:
        max_final = df_final[col].max()
        min_final = df_final[col].min()
        max_auc = df_auc[col].max()
        min_auc = df_auc[col].min()

        # Compute relative gaps in percentage ((highest - lowest) / lowest) * 100
        gaps_final[col] = ((max_final - min_final) / min_final * 100) if min_final != 0 else float('inf')
        gaps_auc[col] = ((max_auc - min_auc) / min_auc * 100) if min_auc != 0 else float('inf')

    # Start building the LaTeX table
    latex = "\\begin{table}[ht]\n\\centering\n\\caption{Relative Gaps in Percentage for Final and AUC Scores across Datasets}\n"
    latex += "\\label{tab:relative_gaps_percentage}\n\\begin{tabular}{lcc}\n\\toprule\n"
    latex += "\\textbf{Dataset} & \\textbf{Final Gap (\%) } & \\textbf{AUC Gap (\%) } \\\\\n\\midrule\n"

    # Add data rows to the LaTeX table
    for col in columns:
        final_gap = f"{gaps_final[col]:.2f}\%" if gaps_final[col] != float('inf') else "Infinity"
        auc_gap = f"{gaps_auc[col]:.2f}\%" if gaps_auc[col] != float('inf') else "Infinity"
        latex += f"{col} & {final_gap} & {auc_gap} \\\\\n"
    
    latex += "\\bottomrule\n\\end{tabular}\n\\end{table}\n"
    return latex


In [61]:
latex = make_gap_table(df_final, df_final_err, df_auc, df_auc_err)
print(latex)

\begin{table}[ht]
\centering
\caption{Relative Gaps in Percentage for Final and AUC Scores across Datasets}
\label{tab:relative_gaps_percentage}
\begin{tabular}{lcc}
\toprule
\textbf{Dataset} & \textbf{Final Gap (\%) } & \textbf{AUC Gap (\%) } \\
\midrule
cifar100 & 22.09\% & 17.50\% \\
combined & 4.33\% & 4.20\% \\
fashionmnist & 1.78\% & 3.00\% \\
kmnist & 6.02\% & 4.38\% \\
mnist & 2.87\% & 2.23\% \\
\bottomrule
\end{tabular}
\end{table}

