In [815]:
from IPython.core.display import display, HTML
display(HTML('<style>.container { width:100% !important; }</style><link href="https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;700&display=swap" rel="stylesheet">'))

In [811]:
import pickle
import json
import copy
import numpy as np
import yaml
import pandas as pd
import altair as alt
from functools import partial
import ipywidgets as widgets
from altair_saver import save
alt.renderers.enable('default')
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

### Note on early stopping

I think the way they do early stopping makes the online code not actually a bound on the MDL (or at least, requires an assumption that you have many samples of the data on both sides already, in which case this would be a very poor bound on MDL.

Directory with your experiments:

In [54]:
# dir_with_experiments = '../../../description-length-probing-copy/control_tasks'
dir_with_experiments = '../..'
experiment_name = 'online_l0_control'

In [55]:
num_classes = 45  

## Reading logs

For example, let's look at the experiment with linguistic task, layer 1. 

### Test accuracy

In the experiment folder you will find a file with the test scores for models trained on each portion. The last one is for the model trained on all data: this is accuracy of the standard probe.

In [56]:
test_report = json.load(open(f'{dir_with_experiments}/{experiment_name}/online_test_report.json'))

In [57]:
print("Test accuracy (standard probe): ", round(test_report[-1]['label_acc_test'] * 100, 2))

Test accuracy (standard probe):  96.22


### Codelength

In the experiment folder you will find a file with the information needed to evaluate codelength:

In [533]:
# online_report = pickle.load(open(f'{dir_with_experiments}/{experiment_name}/online_coding.pkl', 'rb'))
online_report = pickle.load(open(f'{dir_with_experiments}/evaltrain_online_l2_n40000_corruptedFalse_seed2/online_coding.pkl', 'rb'))

This are the results for model trained on the first portion:

In [534]:
online_report[-2]

{'loss_train': 37517.151065826416,
 'num_targets_train': 475954,
 'num_targets_real_train': 475954,
 'num_examples_train': 19916,
 'loss_dev': 4458.958408355713,
 'num_targets_dev': 40117,
 'num_targets_real_dev': 40117,
 'num_examples_dev': 1700,
 'loss_test': 5566.437459945679,
 'num_targets_test': 56684,
 'num_targets_real_test': 56684,
 'num_examples_test': 2416,
 'loss_online_portion': 46969.09385108948,
 'num_targets_online_portion': 474074,
 'num_targets_real_online_portion': 474074,
 'num_examples_online_portion': 19916}

Here number of examples is the number of sentences, number of targets is the number of actual targets (tokens).

#### Uniform codelength

In [102]:
train_size = online_report[-1]['train_targets']
uniform_codelength = train_size * np.log2(num_classes)
print('Uniform codelength: {:.2f}'.format(uniform_codelength / 1024))

Uniform codelength: 5095.13


#### Online codelength

Note that for MDL you __do not need__ the last portion - this is model trained on all data.

In [103]:
online_codelength = online_report[0]['train_targets'] * np.log2(num_classes) + sum([elem['loss_online_portion'] for elem in online_report[:-1]])               

In [104]:
print("Online codelength: {} kbits".format(round(online_codelength / 1024, 2)))
print("Compression: {} ".format(round(uniform_codelength / online_codelength, 2)))

Online codelength: 301.68 kbits
Compression: 16.89 


## All together now

In [784]:
def pd_reduce(dataframe, output_column, fn):
    result = dataframe.copy()
    result[output_column] = np.nan
    for i, index in enumerate(result.index):
        prev = None if i == 0 else result.iloc[i - 1]
        curr = result.loc[index]
        result.loc[index, output_column] = fn(prev, curr, output_column)
    return result

def auc_segment(prev, curr, output_column, epsilon=0):
    last_samp = 0 if prev is None else prev.train_targets
    last_tot = 0 if prev is None else prev[output_column]
#     print(curr)
#     print(last_samp, epsilon)
    return (curr.train_targets - last_samp) * max(curr.mean_loss_test - epsilon, 0)

# def auc_segment(prev, curr, output_column, epsilon=0):
#     if prev is None: return 0
#     return max(prev.loss_online_portion - epsilon * prev.num_targets_online_portion, 0)



def sum_reduction(prev, curr, output_column, input_column='auc_segment'):
    last_tot = 0 if prev is None else prev[output_column]
    return last_tot + curr[input_column]

def auc(dataframe, carry_column='auc_segment', output_column='auc_agg', epsilon=0):
#     display(dataframe)
    result = pd_reduce(dataframe, carry_column, partial(auc_segment, epsilon=epsilon))
    result = pd_reduce(result, output_column, partial(sum_reduction, input_column=carry_column))
    return result

def auc_per_data(df, epsilons):
    for epsilon in epsilons:
        colname = f'auc_agg@{epsilon}'.replace('.', '_')
        results = []
        for representation in df.representation.unique():
#         for corrupted in df.corrupted.unique():
#             for representation in df.model_layer.unique():
            subset = df[(df.representation == representation)] # & (df.model_layer == representation)]
#                 print(subset)
            result = auc(subset.sort_values('n'), output_column=colname, epsilon=epsilon)
            results.append(result)
        df = pd.concat(results)
        df[f'str_{colname}'] = df[colname].round(2).astype(str)
        df.loc[df['mean_loss_test'] > epsilon, f'str_{colname}'] = "> " + df.loc[df['mean_loss_test'] > epsilon, f'str_{colname}']
    # note that this overwrites `df` many times! not having an outer concat is by design
    return df

#     for epsilon in epsilons:
#         eps_underscore = str(epsilon).replace('.', '_')
#         old_title = f'auc_agg@{eps_underscore}'
#         new_title = f'SDL, $\\varepsilon$={epsilon}'
#         output_df = output_df.rename(columns={old_title: new_title})
#         output_df[new_title] = output_df[new_title].astype(str)
#         output_df.loc[(output_df['Val loss'] > epsilon), new_title] = "> " + output_df.loc[(output_df['Val loss'] > epsilon), new_title]


def sc_segment(prev, curr, output_column, epsilon=0):
    if prev is not None:
        prev_sc = prev[output_column]
    else:
        prev_sc = 1e20

    if curr.mean_loss_test <= epsilon:
        curr_sc = curr.n
    else:
        curr_sc = 1e20
    return min(prev_sc, curr_sc)

def sc(dataframe, carry_column='sc_segment', output_column='sc', epsilon=0):
    result = pd_reduce(dataframe, output_column, partial(sc_segment, epsilon=epsilon))
#     result = pd_reduce(result, output_column, partial(sum_reduction, input_column=carry_column))
    return result

def sc_per_data(df, epsilons):
    for epsilon in epsilons:
        colname = f'sc@{epsilon}'.replace('.', '_')
        results = []
        for representation in df.representation.unique():
#         for corrupted in df.corrupted.unique():
#             for representation in df.model_layer.unique():
            subset = df[(df.representation == representation)] # & (df.model_layer == representation)]
#                 print(subset)
            results.append(sc(subset, output_column=colname, epsilon=epsilon))
        df = pd.concat(results)
        df[f'str_{colname}'] = df[colname].astype(int).astype(str)
        df.loc[df[colname] > 1e10, f'str_{colname}'] = "> " + df.loc[df[colname] > 1e10, 'n'].astype(str)
    # note that this overwrites `df` many times! not having an outer concat is by design
    return df



In [794]:
dir_with_experiments = '../..'
num_classes = 45  
    
def get_mdls(experiment_name):
    test_report = json.load(open(f'{dir_with_experiments}/{experiment_name}/online_test_report.json'))
    test_accuracy = test_report[-1]['label_acc_test']  * 100
#     print(f"Test accuracy {experiment_name}: {test_accuracy  :.2f}")

    online_report = pickle.load(open(f'{dir_with_experiments}/{experiment_name}/online_coding.pkl', 'rb'))
#     print(online_report)
    train_n_label = 'num_targets_train' if 'num_targets_train' in online_report[0] else 'train_targets'
    for rep in online_report:
        rep['train_targets'] = rep[train_n_label]
#     print(train_n_label)
    train_size = online_report[-1][train_n_label]
    uniform_codelength = train_size * np.log2(num_classes)

    online_costs = [online_report[0][train_n_label] * np.log2(num_classes)] + [elem['loss_online_portion'] for elem in online_report[:-1]]
    online_codelengths = np.cumsum(online_costs)
    online_ns = [rep[train_n_label] for rep in online_report]
    
# online_report['train_targets'] = online_report
    return online_codelengths, online_ns, online_report

def load_params(experiment_name):
    try:
        try:
            return yaml.load(open(f'{dir_with_experiments}/{experiment_name}/online_l0.yml'))
        except FileNotFoundError:
            return yaml.load(open(f'{dir_with_experiments}/{experiment_name}/online_l0_control.yml'))
    except FileNotFoundError:
        return yaml.load(open(f'{dir_with_experiments}/{experiment_name}/{experiment_name}.yml'))


def load_exps(experiments):
    experiments = [{'name': e} for e in experiments]
    results = []
    for experiment in experiments:
        params = load_params(experiment['name'])
        experiment['n'] = params['dataset']['dataset_size']
        experiment['model_layer'] = params['model']['model_layer']
        experiment['corrupted'] = params['probe']['misc']['corrupted_token_percent'] > 0.99
#         experiment['seed'] = params['seed']
        
        partial_mdls, partial_dataset_sizes, online_report = get_mdls(experiment['name'])
#         print(partial_mdls)
        for i, (mdl, size) in enumerate(zip(partial_mdls, partial_dataset_sizes)):
            partial_exp = copy.deepcopy(experiment)
            partial_exp['n'] = size
            partial_exp['mdl'] = mdl
            for k, v in online_report[i].items():
                partial_exp[k] = v
            for split in ['dev', 'test', 'online_portion']:
                if f'loss_{split}' in partial_exp:
                    partial_exp[f'mean_loss_{split}'] = partial_exp[f'loss_{split}'] / partial_exp[f'num_targets_{split}']
            results.append(partial_exp)
    df = pd.DataFrame(results)
#     df['train_targets'] = df['train_targets'].fillna(df['num_train_targets'])
#     df['loss_online_portion'].fillna(0, inplace=True)
#     df['num_targets_online_portion'].fillna(0, inplace=True)
    df['representation'] = "ELMo layer " + df.model_layer.astype(str)
    return df

def merge_seeds(df, on='representation'):
    subset_dfs = []
    for on_key in df[on].unique():
#         representation = "ELMo layer 0"
        repr_df = df[df[on] == on_key]
        seed_dfs = []
        for name in repr_df.name.unique():
            seed_dfs.append(repr_df[repr_df.name == name].copy())

        base_df = seed_dfs[0].sort_values('n')
        ns = base_df['n'].values
        subset_dfs.append(base_df)
        for i in range(1, len(seed_dfs)):
            seed_df = seed_dfs[i].sort_values('n')#[['n', 'mean_loss_test', 'name']]
            merged_df = pd.merge_asof(seed_df, base_df, on='n', direction='nearest', suffixes=('', '_new'))
            merged_df['n'] = ns
            drop_cols = [col for col in merged_df.columns if col.endswith('_new')]
            merged_df = merged_df.drop(columns=drop_cols)
            subset_dfs.append(merged_df)
    rebuilt_df = pd.concat(subset_dfs, sort=False)
    return rebuilt_df #.groupby(['representation', 'n']).mean().reset_index()

In [827]:
def loss_data_chart(df, title='', xdomain=[40, 1000000], ydomain=[0.05, 3], xrules=[], yrules=[], 
                    color_title='Representation', final=False):
#     if final:
#         line_width = 5
#         label_size = 24
#         title_size = 30
#         nticks = 4
#     else:
#         line_width = 2
#         label_size = 14
#         title_size = 20
#         nticks = 20
    
#     rules_df = pd.concat([
#         pd.DataFrame({'x': xrules}),
#         pd.DataFrame({'y': yrules})
#     ], sort=False)

#     colorscheme = 'set1'
#     stroke_color = '333'
#     line = alt.Chart(df, title=title).mark_line(size=line_width, opacity=0.4).encode(
#         x=alt.X('n', scale=alt.Scale(type='log', domain=xdomain, nice=False),  title='Dataset size'),
#         y=alt.Y('mean(mean_loss_test)', scale=alt.Scale(type='log', domain=ydomain, nice=False), title='Test loss'),
#         color=alt.Color('representation:N', title=color_title, scale=alt.Scale(scheme=colorscheme,), legend=None),
#     )

#     point = alt.Chart(df, title=title).mark_point(size=80, opacity=1).encode(
#         x=alt.X('n', scale=alt.Scale(type='log', domain=xdomain, nice=False),  title='Dataset size'),
#         y=alt.Y('mean(mean_loss_test)', scale=alt.Scale(type='log', domain=ydomain, nice=False), title='Test loss'),
#         color=alt.Color('representation:N', title=color_title, scale=alt.Scale(scheme=colorscheme,)),
#         shape=alt.Shape('representation:N', title=color_title), 
#         tooltip=['n', 'mean(mean_loss_test)', 'mean(mdl)']
#     )

#     rule_x = alt.Chart(rules_df).mark_rule(size=3, color='999', strokeDash=[4, 4]).encode(x='x')
#     rule_y = alt.Chart(rules_df).mark_rule(size=3, color='999', strokeDash=[4, 4]).encode(y='y')

#     chart = alt.layer(rule_x, rule_y, line, point).resolve_scale(
#         color='independent',
#         shape='independent'
#     )

#     chart = chart.properties(width=600, height=500, background='white')
#     chart = chart.configure_legend(labelLimit=0)
#     chart = chart.configure(
#         title=alt.TitleConfig(fontSize=title_size, fontWeight='normal'),
#         axis=alt.AxisConfig(titleFontSize=title_size, labelFontSize=label_size, grid=(not final), 
#                             domainWidth=5, domainColor=stroke_color, 
#                             tickWidth=3, tickSize=9, tickCount=nticks, tickColor=stroke_color, tickOffset=0),
#         legend=alt.LegendConfig(titleFontSize=title_size, labelFontSize=label_size, labelLimit=0, titleLimit=0,
#                                 orient='top-right', padding=10, 
#                                 titlePadding=10, rowPadding=5,
#                                 fillColor='white', strokeColor='black', cornerRadius=0),
#         view=alt.ViewConfig(strokeWidth=0, stroke=stroke_color),
#         font='Roboto',
#     )    
#     return chart
    if final:
        line_width = 5
        label_size = 24
        title_size = 30
    else:
        line_width = 5
        label_size = 14
        title_size = 20
        
    rules_df = pd.concat([
        pd.DataFrame({'x': xrules}),
        pd.DataFrame({'y': yrules})
    ], sort=False)

    colorscheme = 'set1'
    stroke_color = '333'
    line = alt.Chart(df[df.n >= 10], title=title).mark_line(size=line_width, opacity=0.4).encode(
        x=alt.X('n', scale=alt.Scale(type='log', domain=xdomain, nice=False),  title='Dataset size'),
        y=alt.Y('mean(mean_loss_test)', scale=alt.Scale(type='log', domain=ydomain, nice=False), title='Test loss'),
        color=alt.Color('representation:N', title=color_title, scale=alt.Scale(scheme=colorscheme,), legend=None),
    )

    point = alt.Chart(df[df.n >= 10], title=title).mark_point(size=80, opacity=1).encode(
        x=alt.X('n', scale=alt.Scale(type='log', domain=xdomain, nice=False),  title='Dataset size'),
        y=alt.Y('mean(mean_loss_test)', scale=alt.Scale(type='log', domain=ydomain, nice=False), title='Test loss'),
        color=alt.Color('representation:N', title=color_title, scale=alt.Scale(scheme=colorscheme,)),
        shape=alt.Shape('representation:N', title=color_title), 
        tooltip=['n', 'representation']
    )
    
    rule_x = alt.Chart(rules_df).mark_rule(size=3, color='999', strokeDash=[4, 4]).encode(x='x')
    rule_y = alt.Chart(rules_df).mark_rule(size=3, color='999', strokeDash=[4, 4]).encode(y='y')

    chart = alt.layer(rule_x, rule_y, line, point).resolve_scale(
        color='independent',
        shape='independent'
    )
    chart = chart.properties(width=600, height=500, background='white')
    chart = chart.configure_legend(labelLimit=0)
    chart = chart.configure(
        title=alt.TitleConfig(fontSize=title_size, fontWeight='normal'),
        axis=alt.AxisConfig(titleFontSize=title_size, labelFontSize=label_size, grid=(not final), 
                            domainWidth=5, domainColor=stroke_color, 
                            tickWidth=3, tickSize=9, tickCount=4, tickColor=stroke_color, tickOffset=0),
#         axisX=alt.AxisConfig(grid=True),
        legend=alt.LegendConfig(titleFontSize=title_size, labelFontSize=label_size, labelLimit=0, titleLimit=0,
                                orient='top-right', padding=10, 
                                titlePadding=10, rowPadding=5,
                                fillColor='white', strokeColor='black', cornerRadius=0),
        view=alt.ViewConfig(strokeWidth=0, stroke=stroke_color),
        font='Roboto',
    )    
    return chart

In [828]:
def make_latex(df, ns, stack=False, group_n=True, epsilons=[0.5, 0.1]):
    df = auc_per_data(df, epsilons).reset_index(drop=True)
    df = sc_per_data(df, epsilons)
    df.reset_index(drop=True, inplace=True)
    auc_cols = {f'str_auc_agg@{eps}'.replace('.', '_'):  f'SDL, $\\varepsilon$={eps}' for eps in epsilons}
    sc_cols = {f'str_sc@{eps}'.replace('.', '_'):  f'$\\varepsilon$SC, $\\varepsilon$={eps}' for eps in epsilons}
    output_df = df[df.n.isin(ns)].groupby(['model_layer', 'n', *auc_cols.keys(), *sc_cols.keys()]).mean().reset_index()
    output_df = output_df[['n', 'model_layer', 'mean_loss_test', 'mdl', *auc_cols.keys(), *sc_cols.keys()]]
    output_df = output_df.sort_values('n')
    output_df['model_layer'] = output_df['model_layer'].astype(int)
    output_df = output_df.rename(columns={'model_layer': 'ELMo layer', 'mean_loss_test': 'Val loss', 'mdl': 'MDL', **auc_cols, **sc_cols})
    if stack:
        if not group_n:
            output_df['n'] = '$n=' + output_df['n'].astype(str) + '$'
        output_df = output_df.set_index(['ELMo layer', 'n'])
        if group_n:
            output_df = output_df.transpose()
            display(output_df)
            output_df = output_df.stack()
            output_df = output_df.swaplevel().sort_values('n', ascending=True)
    else:
        output_df = output_df.set_index(['n', 'ELMo layer'])
        output_df = output_df.transpose()
    out = widgets.Output(layout={'border': '1px solid black'})
    latex_str = output_df.to_latex(multicolumn_format='c', float_format="{:0.2f}".format, escape=False, column_format='llrrr')
    out.append_stdout(latex_str)
    return out

In [829]:
experiments = [
    'online_l0_n40000_corruptedFalse',
    'online_l1_n40000_corruptedFalse',
    'online_l2_n40000_corruptedFalse',
    'online_l0_n40000_corruptedTrue',
    'online_l1_n40000_corruptedTrue',
    'online_l2_n40000_corruptedTrue',
#     'online_l0_n40000_corruptedFalse_seed1',
#     'online_l1_n40000_corruptedFalse_seed1',
#     'online_l2_n40000_corruptedFalse_seed1',
#     'online_l0_n40000_corruptedFalse_seed0',
#     'online_l1_n40000_corruptedFalse_seed0',
#     'online_l2_n40000_corruptedFalse_seed0',
#     'online_l0_n40000_corruptedFalse_seed2',
#     'online_l1_n40000_corruptedFalse_seed2',
#     'online_l2_n40000_corruptedFalse_seed2',
]
df = load_exps(experiments)
df.representation = df.corrupted.astype(str) + ' ' + df.representation
df = merge_seeds(df).sort_values('n')
df = df.groupby(['representation', 'n']).mean().reset_index()
epsilons = [0.1, 0.5]
ns = [461, 474838]
display(loss_data_chart(df, final=False, xrules=ns, yrules=epsilons))
make_latex(df, ns=ns, stack=True)



ELMo layer,0,1,2,0,1,2
n,461,461,461,474838,474838,474838
Val loss,0.653028,0.613618,0.748647,0.173402,0.074234,0.0895781
MDL,849.696,995.901,1029.37,91856.5,52471.9,66536.0
"SDL, $\varepsilon$=0.5",> 240.12,> 289.11,> 314.91,240.12,289.11,349.98
"SDL, $\varepsilon$=0.1",> 424.52,> 473.51,> 499.31,> 40786.23,2632.57,7587.72
"$\varepsilon$SC, $\varepsilon$=0.5",> 461,> 461,> 461,934,934.0,1930.0
"$\varepsilon$SC, $\varepsilon$=0.1",> 461,> 461,> 461,> 474838,117704.0,474838.0


Output(layout=Layout(border='1px solid black'), outputs=({'output_type': 'stream', 'name': 'stdout', 'text': '…

In [830]:
df = load_exps([
    'online_l0_n40000_corruptedFalse',
    'online_l1_n40000_corruptedFalse',
    'online_l2_n40000_corruptedFalse',
    'online_l0_n40000_corruptedFalse_seed1',
    'online_l1_n40000_corruptedFalse_seed1',
    'online_l2_n40000_corruptedFalse_seed1',
    'online_l0_n40000_corruptedFalse_seed0',
    'online_l1_n40000_corruptedFalse_seed0',
    'online_l2_n40000_corruptedFalse_seed0',
    'online_l0_n40000_corruptedFalse_seed2',
    'online_l1_n40000_corruptedFalse_seed2',
    'online_l2_n40000_corruptedFalse_seed2',
])
df = merge_seeds(df)

# df_add = load_exps([
#     '1k_online_l0_n40000_corruptedFalse_seed0',
#     '1k_online_l1_n40000_corruptedFalse_seed0',
#     '1k_online_l2_n40000_corruptedFalse_seed0',
# ])
# df_add = merge_seeds(df_add)
# df = pd.concat([df, df_add], sort=False)
# df_add = load_exps([
#     '100k_online_l0_n40000_corruptedFalse_seed0',
#     '100k_online_l1_n40000_corruptedFalse_seed0',
#     '100k_online_l2_n40000_corruptedFalse_seed0',

# ])
# df_add = merge_seeds(df_add)
# df = pd.concat([df, df_add], sort=False)

df = df.groupby(['representation', 'n']).mean().reset_index()
epsilons = [0.1, 0.5]
ns = [461, 474838]
chart = loss_data_chart(df, final=True, xrules=ns, yrules=epsilons)
display(chart)
save(chart, 'elmo_layers.pdf')
make_latex(df, ns=ns, stack=True)



ELMo layer,0,1,2,0,1,2
n,461,461,461,474838,474838,474838
Val loss,0.747102,0.73853,0.869146,0.174077,0.0787667,0.0947686
MDL,884.537,1009.26,1017.72,92403.4,52648.5,65468.5
"SDL, $\varepsilon$=0.5",> 289.27,> 339.11,> 372.3,310.22,339.11,425.12
"SDL, $\varepsilon$=0.1",> 478.67,> 528.51,> 561.7,> 40882.72,2765.11,7069.56
"$\varepsilon$SC, $\varepsilon$=0.5",> 461,> 461,> 461,1930,934.0,1930.0
"$\varepsilon$SC, $\varepsilon$=0.1",> 461,> 461,> 461,> 474838,237967.0,474838.0


Output(layout=Layout(border='1px solid black'), outputs=({'output_type': 'stream', 'name': 'stdout', 'text': '…

In [793]:
auc_per_data(df, epsilons)[['representation', 'n', 'mean_loss_test', 'auc_segment', 'auc_agg@0_5', 'train_targets']]

Unnamed: 0,representation,n,mean_loss_test,auc_segment,auc_agg@0_5,train_targets
0,ELMo layer 0,48,2.399160,89.260526,89.260526,47.00
1,ELMo layer 0,108,1.663145,60.192779,149.453306,98.75
2,ELMo layer 0,250,1.094247,80.817594,230.270900,234.75
3,ELMo layer 0,461,0.726352,54.041595,284.312495,473.50
4,ELMo layer 0,546,0.684400,,,
5,ELMo layer 0,659,0.662892,,,
6,ELMo layer 0,756,0.641821,,,
7,ELMo layer 0,854,0.542274,,,
8,ELMo layer 0,934,0.543616,,,954.00
9,ELMo layer 0,1011,0.504176,,,


In [726]:
df[['representation', 'n', 'mean_loss_test']].sort_values(['representation', 'n'])

Unnamed: 0,representation,n,mean_loss_test
30,ELMo layer 0,48,2.399160
31,ELMo layer 0,108,1.663145
32,ELMo layer 0,250,1.094247
0,ELMo layer 0,461,0.643353
33,ELMo layer 0,461,0.747102
1,ELMo layer 0,546,0.684400
2,ELMo layer 0,659,0.662892
3,ELMo layer 0,756,0.641821
4,ELMo layer 0,854,0.542274
34,ELMo layer 0,934,0.543616


In [265]:
df = auc_per_data(df, [0, 0.02, 0.05, 0.1])
line = alt.Chart(df[(df.corrupted == False) & (df.n > 100)], title="").mark_line(size=5).encode(
    x=alt.X('n', scale=alt.Scale(type='log', domain=[80, 1100000], nice=False), title="Dataset size"),
    y=alt.Y('auc_agg@0', scale=alt.Scale(type='log', domain=[80, 200000], nice=False), title="Test loss"),
    color=alt.Color('representation:N', title="Representation"),
    detail='corrupted:N',
)

point = line.mark_circle(size=60, opacity=1).encode(tooltip=['name', 'mdl', 'model_layer'])
chart = point + line

chart = chart.configure(
    title=alt.TitleConfig(fontSize=35, fontWeight='normal'),
    axis=alt.AxisConfig(titleFontSize=20, labelFontSize=14),
    legend=alt.LegendConfig(titleFontSize=20, labelFontSize=14, labelLimit=0,
                            orient='bottom-right', padding=10, 
                            fillColor='white', strokeColor='black')
)
chart.properties(width=600, height=400, background='white').interactive()

In [169]:
df

Unnamed: 0,corrupted,loss_dev,loss_online_portion,loss_test,loss_train,mdl,mean_loss_dev,mean_loss_online_portion,mean_loss_test,model_layer,...,num_targets_real_train,num_targets_test,num_targets_train,train_targets,representation,auc_segment,auc_agg@0,auc_agg@0_02,auc_agg@0_05,auc_agg@0_1
0,False,78187.547913,104.721359,114056.578918,,263.608949,1.948988,1.745356,2.012148,0,...,,56684,,48,ELMo layer 0,0.000000,0.000000,0.000000,0.000000,0.000000
1,False,58227.002106,252.513977,86204.616882,,368.330308,1.451430,1.778267,1.520793,0,...,,56684,,108,ELMo layer 0,98.721359,104.721359,103.521359,101.721359,98.721359
2,False,39897.620056,228.852112,57883.565277,,620.844285,0.994531,1.084607,1.021162,0,...,,56684,,250,ELMo layer 0,238.313977,357.235336,353.195336,347.135336,337.035336
3,False,25343.419601,352.816925,37016.223267,,849.696397,0.631738,0.745913,0.653028,0,...,,56684,,461,ELMo layer 0,207.752112,586.087448,577.827448,565.437448,544.787448
4,False,20289.969872,494.675110,28339.773643,,1202.513322,0.505770,0.496662,0.499961,0,...,,56684,,934,ELMo layer 0,305.516925,938.904373,921.184373,894.604373,850.304373
5,False,15260.074669,768.501755,21807.683533,,1697.188432,0.380389,0.383675,0.384724,0,...,,56684,,1930,ELMo layer 0,395.075110,1433.579483,1395.939483,1339.479483,1245.379483
6,False,12303.664551,1130.035721,18422.490257,,2465.690186,0.306695,0.316006,0.325003,0,...,,56684,,3933,ELMo layer 0,568.201755,2202.081238,2124.381238,2007.831238,1813.581238
7,False,10756.478020,2068.216187,15499.283154,,3595.725907,0.268128,0.266351,0.273433,0,...,,56684,,7509,ELMo layer 0,772.435721,3332.116959,3182.896959,2959.066959,2586.016959
8,False,9228.108723,3678.637794,13420.827732,,5663.942094,0.230030,0.245014,0.236766,0,...,,56684,,15274,ELMo layer 0,1291.716187,5400.333145,5095.813145,4639.033145,3877.733145
9,False,8701.364555,6098.516998,12695.911018,,9342.579888,0.216900,0.218209,0.223977,0,...,,56684,,30288,ELMo layer 0,2177.237794,9078.970940,8474.170940,7566.970940,6054.970940


In [168]:
df['representation'] = "ELMo layer " + df.model_layer.astype(str)
line = alt.Chart(df[df.corrupted == False], title="").mark_line(size=5).encode(
    x=alt.X('n', scale=alt.Scale(type='log', domain=[40, 1000000], nice=False), title="Dataset size"),
    y=alt.Y('mean_loss_test', scale=alt.Scale(type='log', domain=[0.05, 3], nice=False), title="Test loss"),
    color=alt.Color('representation:N', title="Representation"),
    detail='corrupted:N',
)

point = line.mark_circle(size=60, opacity=1).encode(tooltip=['name', 'mdl', 'model_layer'])
chart = point + line

chart = chart.configure(
    title=alt.TitleConfig(fontSize=35, fontWeight='normal'),
    axis=alt.AxisConfig(titleFontSize=20, labelFontSize=14),
    legend=alt.LegendConfig(titleFontSize=20, labelFontSize=14, labelLimit=0,
                            orient='top-right', padding=10, 
                            fillColor='white', strokeColor='black')
)
chart.properties(width=600, height=400, background='white').interactive()