In [1]:
import glob
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

sns.set(style="ticks")
np.set_printoptions(precision=1, suppress=True, threshold=5)
pd.set_option('display.precision', 3)
pd.set_option('display.max_rows', None)  # Show all rows

In [2]:
def load_data(patterns, metric):
    dfs = []
    for pattern in patterns:
        files = glob.glob(pattern)
        files.sort()
        for fname in files:
            setting = fname.split('/eval_')[1].replace('.json', '').split('_')

            df = pd.read_json(fname, orient='index').transpose()
            df['seed'] = int(setting[0])
            df['runs'] = int(setting[1])
            df['noise'] = float(setting[2])

            df['method'] = setting[3]
            horizon = setting[4].split('-')
            df['ph'] = int(horizon[0])
            df['ah'] = int(horizon[1])

            # Set default values
            df['nsample'] = 1
            df['nmode'] = 1
            df['decay'] = np.nan

            if setting[3] == 'coherence':
                df['nsample'] = int(setting[5])
                df['decay'] = float(setting[6])
            elif setting[3] in ['positive']:
                df['nsample'] = int(setting[5])
                df['nmode'] = int(setting[6])
            elif setting[3] in ['contrast', 'positive', 'negative']:
                df['nsample'] = int(setting[5])
                df['nmode'] = int(setting[6])
            elif setting[3] == 'bid':
                df['nsample'] = int(setting[5])
                df['nmode'] = int(setting[6])
                df['decay'] = float(setting[7])
            elif setting[3] in ['cma', 'cwarm']:
                df['nsample'] = int(setting[5])
                df['decay'] = float(setting[6])
            elif setting[3] == 'ema':
                df['decay'] = float(setting[5])

            dfs.append(df)
    
    dff = pd.concat(dfs, ignore_index=True)
    dff['decay'] = dff['decay'].astype(float)
    dff['nsample'] = dff['nsample'].astype(int)
    dff['nmode'] = dff['nmode'].astype(int)
    dff['runs'] = dff['runs'].astype(int)
    dff[metric] = dff[metric].astype(float)
    
    method_order = {'random': 0, 'warmstart': 0.8, 'ema': 1, 'bid': 2}
    dff['sort_key'] = dff['method'].apply(lambda x: method_order.get(x.lower(), len(method_order)))
    
    dff = dff.sort_values(['noise', 'ah', 'sort_key', 'method', 'nsample', 'decay', 'nmode', 'seed', metric]).drop('sort_key', axis=1)
    cols = ['noise', 'ah', 'method', 'nsample', 'decay', 'nmode', 'seed', metric]

    dff = dff[cols]

    return dff

In [3]:
# Define tasks and corresponding metrics
task = 'pusht'
metric = 'test/mean_score'
foldername = 'outputs'

# Loop through each task and metric
patterns = [
    f'../{foldername}/{task}/*/*/*/eval_*_random_*.json',
    f'../{foldername}/{task}/*/*/*/eval_*_warmstart_*.json',
    f'../{foldername}/{task}/*/*/*/eval_*_ema_*.json',
    f'../{foldername}/{task}/*/*/*/eval_*_contrast_*.json',
    f'../{foldername}/{task}/*/*/*/eval_*_bid_*.json',
]
df = load_data(patterns, metric)

display(df)


Unnamed: 0,noise,ah,method,nsample,decay,nmode,seed,test/mean_score
0,0.0,1,random,1,,1,0,0.846
1,0.0,8,random,1,,1,0,0.884
4,0.0,8,warmstart,1,,1,0,0.887
6,0.0,8,ema,1,0.5,1,0,0.866
8,0.0,8,bid,15,0.5,3,0,0.928
2,1.0,1,random,1,,1,0,0.805
5,1.0,1,warmstart,1,,1,0,0.852
7,1.0,1,ema,1,0.5,1,0,0.823
9,1.0,1,bid,15,0.5,3,0,0.889
3,1.0,8,random,1,,1,0,0.582
