In [1]:
import os
import glob
from pathlib import Path
import pandas as pd
import json
import numpy as np
from tensorboard.backend.event_processing import event_accumulator
import seaborn as sns
import matplotlib.pyplot as plt

def get_best_downstream_loss(tb_dir):
    ea = event_accumulator.EventAccumulator(tb_dir)
    ea.Reload()
    try:  
        loss = ea.Scalars('best_downstream_loss__gen_9')[-1].value
        return loss
    except:
        return None
    

exps =["exp6", "exp7","exp8","exp9"]


p = Path('/home/noah/ESSL/cc_experiments/')
dirs = [str(p_i) for p_i in list(p.glob('**/essl2/*'))]
dirs
csv_map = []
for e in exps: 
    e_dirs = [p_i for p_i in dirs if e == p_i.split("/")[-1]]
    csv_map_i = []
    for e_dir in e_dirs:
        model_dirs = set([os.path.dirname(p) for p in glob.glob(os.path.join(e_dir, "**/models"))])
        plot_dirs = set([os.path.dirname(p) for p in glob.glob(os.path.join(e_dir, "**/plots"))])
        tbs = set([os.path.dirname(p) for p in glob.glob(os.path.join(e_dir, "**/tensorboard"))])
        
        finished_exps = model_dirs.intersection(plot_dirs)
        finished_exps_tbs = finished_exps.intersection(tbs)
        if len(finished_exps) != len(finished_exps_tbs):
            continue
        for f_e in finished_exps:
            csv_map_i.append([e, os.path.basename(f_e), e_dir, f_e])
        
    csv_map+=sorted(csv_map_i, key=lambda x: str(x[1]))
    
columns = ["exp", "seed", "exp_dir", "seed_dir"]
df = pd.DataFrame(csv_map, columns=columns)

df = df.drop_duplicates(["exp", "seed"],keep= 'last')
pop_vals_df = []
for exp in df["exp"].unique():
    exp_df = df[df["exp"] == exp]
    pop_vals = []
    for _, row in exp_df.iterrows():
        outcomes_dir = os.path.join(row["seed_dir"], "outcomes.json")
        # algo = get_algorithm(os.path.join(row["seed_dir"], "params.txt"))
        try:
            tb = glob.glob(os.path.join(row["seed_dir"], "tensorboard/*/*.tfevents*" ))[0]
            best_loss = get_best_downstream_loss(tb)
        except:
            best_loss = None
        with open(outcomes_dir, "r") as f:
            outcomes = json.load(f)
            pop_vals+=[[exp, outcomes['chromos'][i][1][0], row["seed"], o[0], o[1], best_loss] for i, o in enumerate(outcomes["pop_vals"])]
    pop_vals_df+=pop_vals
pop_vals_df    
df = pd.DataFrame(pop_vals_df, columns = ["exp", "algo", "seed", "generation", "test acc", "test loss"])
df = df.dropna()
df = df[~df['exp'].isna()]
df

2022-11-22 10:46:27.688414: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-11-22 10:46:27.774257: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /opt/torch/install/lib:/usr/local/cuda/lib64
2022-11-22 10:46:27.774273: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2022-11-22 10:46:28.246604: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: 

Unnamed: 0,exp,algo,seed,generation,test acc,test loss
0,exp6,NNCLR,0,0,82.430000,0.512516
1,exp6,SwaV,0,0,82.620000,0.512516
2,exp6,NNCLR,0,0,82.490000,0.512516
3,exp6,BYOL,0,0,83.440000,0.512516
4,exp6,BYOL,0,0,82.870000,0.512516
...,...,...,...,...,...,...
2695,exp9,NNCLR,1,9,92.286417,0.308152
2696,exp9,BYOL,1,9,92.271051,0.308152
2697,exp9,BYOL,1,9,92.301782,0.308152
2698,exp9,BYOL,1,9,91.971420,0.308152


In [2]:

enc= {k:v for k, v in zip(['exp6', 'exp7', 'exp8', 'exp9'], 
                          ['bsize=32, data=cifar10', 'bsize=256, data=cifar10', 
                             'bsize=32, data=svhn', 'bsize=256, data=svhn'])}
df['exp'] = df['exp'].apply(lambda x: enc[x])
df = df.drop(columns=["generation", "seed"])
best_vals = []
for e in df["exp"].unique():
    best_vals.append(df[df['exp'] == e].sort_values(by="test acc", ascending=False).iloc[0])
df_table = pd.DataFrame(best_vals)
df_table.to_csv("/home/noah/ESSL/cc_experiments/results/mo_best.csv")
print(df_table.to_latex(caption=f"best test acc, Multi-Obj.", label="bta_mo"))


\begin{table}
\centering
\caption{best test acc, Multi-Obj.}
\label{bta_mo}
\begin{tabular}{lllrr}
\toprule
{} &                      exp &     algo &   test acc &  test loss \\
\midrule
732  &   bsize=32, data=cifar10 &     SwaV &  84.180000 &   0.522088 \\
1426 &  bsize=256, data=cifar10 &    NNCLR &  84.240000 &   0.526651 \\
2523 &      bsize=32, data=svhn &  SimSiam &  93.066226 &   0.304809 \\
2578 &     bsize=256, data=svhn &  SimSiam &  92.943301 &   0.308152 \\
\bottomrule
\end{tabular}
\end{table}



Get best values for each ssl algorithm for each experiment

In [33]:
best_outcomes = df.groupby(["exp", "algo"])['test acc'].max().reset_index()
best_outcomes['test loss'] = [df[(df[['exp', 'algo', 'test acc']] == r).all(axis=1)].iloc[0]['test loss'] for _, r in best_outcomes.iterrows()]
best_outcomes.to_csv("/home/noah/ESSL/cc_experiments/results/best_results_mo.csv")

In [3]:
# sns.set_theme()
# for group, data in df.groupby(["exp", "algo"]):
#     fig, ax = plt.subplots(1,2, figsize=(15, 5))
#     sns.boxplot(data=data, x="generation", y="fitness", color='white', ax=ax[0])
#     plt.suptitle(f"{group[0]} {group[1]}")
#     ax[0].set_ylabel("Test Accuracy")
#     for i, row in data.iterrows():
#         ax[0].scatter(np.random.normal(row["generation"], 0.04), row["fitness"], alpha=0.7, color='skyblue')
#     avg_i = data.groupby("generation")['fitness'].median()
#     max_i = data.groupby("generation")['fitness'].max()
#     min_i = data.groupby("generation")['fitness'].min()
#     ax[1].plot(range(len(avg_i)), avg_i, 'b-')
#     ax[1].plot(range(len(max_i)), max_i, 'b-')
#     ax[1].plot(range(len(min_i)), min_i, 'b-')
#     ax[1].fill_between(range(len(avg_i)), min_i, max_i, color='b',
#                      alpha=0.2)
#     ax[1].set_xlabel("Generation")
#     ax[1].set_ylabel("Test Accuracy")
#     ax[1].set_xticks = (range(len(outcomes['avg'])))
#     plt.show()
#     plt.clf()

Compare final generation of all seeds for each method: we see here that most of them are around the same

In [4]:
# from IPython.core.display import display, HTML
# enc= {k:v for k, v in zip(['exp6', 'exp7', 'exp8', 'exp9'], 
#                           ['bsize=32, data=cifar10', 'bsize=256, data=cifar10', 
#                              'bsize=32, data=svhn', 'bsize=256, data=svhn'])}

# df_table = df_best_gen[['exp', 'algo','fitness', 'best_loss']]
# df_table.columns = ['exp', 'algo', 'test acc', 'test loss']
# df_table['exp'] = df_table['exp'].apply(lambda x: enc[x.split('_')[0]])
# for exp in df_table["exp"].unique():
#     display(df_table[df_table["exp"]== exp].max())
# df_table.to_csv("/home/noah/ESSL/cc_experiments/results/mo_best.csv")

In [5]:
# df_best_gen_seed = df.groupby(["exp", "algo", "seed"]).max().reset_index()
# df_best_gen_seed = df_best_gen_seed.groupby(["exp", "algo"]).mean().reset_index()

# df_table = df_best_gen_seed[['exp', 'algo','fitness', 'best_loss']]
# df_table.columns = ['exp', 'algo', 'avg test acc', 'avg test loss']
# df_table['exp'] = df_table['exp'].apply(lambda x: enc[x.split('_')[0]])
# for exp in df_table["exp"].unique():
#     display(HTML(df_table[df_table["exp"]== exp].to_html()))
# df_table.to_csv("/home/noah/ESSL/cc_experiments/results/mo_avg_best.csv")