In [3]:
import jsonlines
import numpy as np
import os
import pandas as pd
import re
from scipy import stats
import yaml

# read the yaml files

In [2]:
data_dir = os.path.join(os.getenv("DATA_DIR"), "mica_text_coref/movie_coref/results/coreference")
data_dirs = [os.path.join(data_dir, "Nov23-25"), os.path.join(data_dir, "Nov26-27")]

In [33]:
header = ["dir", "preprocess", "bert_lr", "coref_lr", "character_lr", "schedule", "weight_decay", "dropout", "genre", "add_cr_to_coarse", "train_document_len", "cr_seq_len", "cr_hidden_size",
          "dev_score", "train_score", "epoch"]
rows = []
for dir in data_dirs:
    for subdir in os.listdir(dir):
        result_file = os.path.join(dir, subdir, "result.yaml")
        if os.path.exists(result_file):
            with open(result_file, "r") as fr:
                result = yaml.load(fr, Loader=yaml.FullLoader)
            rows.append([subdir, result["preprocess"], result["bert_lr"], result["coref_lr"], result["character_lr"], result["warmup"], result["weight_decay"], result["dropout"], 
                         result["genre"], result["add_cr_to_coarse"], result["train_document_len"], result["cr_seq_len"], result["character_recognition"]["gru_hidden_size"], result["dev_score"], 
                         result["train_score"], result["epoch"]])
df = pd.DataFrame(rows, columns=header)
print(df.shape)
print(df.dtypes)
display(df.head(5))

(303, 16)
dir                    object
preprocess             object
bert_lr               float64
coref_lr              float64
character_lr          float64
schedule              float64
weight_decay          float64
dropout               float64
genre                  object
add_cr_to_coarse         bool
train_document_len      int64
cr_seq_len              int64
cr_hidden_size          int64
dev_score             float64
train_score           float64
epoch                   int64
dtype: object


Unnamed: 0,dir,preprocess,bert_lr,coref_lr,character_lr,schedule,weight_decay,dropout,genre,add_cr_to_coarse,train_document_len,cr_seq_len,cr_hidden_size,dev_score,train_score,epoch
0,Nov24_04:54:25PM,addsays,5e-05,0.0001,0.0001,-1.0,0.0,0.3,wb,True,5120,256,256,64.817,72.647,2
1,Nov24_07:07:40PM,addsays,5e-05,0.0001,0.0001,0.0,0.0,0.3,wb,True,5120,256,256,69.277,75.1,4
2,Nov24_08:27:36AM,addsays,2e-05,0.0001,0.0001,1.0,0.0,0.0,wb,True,5120,256,256,69.687,91.127,8
3,Nov25_05:14:57AM,regular,5e-05,0.0002,0.0002,1.0,0.0,0.3,wb,True,5120,256,256,65.587,54.767,3
4,Nov24_05:19:06AM,nocharacters,2e-05,0.0001,0.0001,1.0,0.0,0.3,wb,True,5120,256,256,62.613,61.683,2


# find the best dev score hyperparams

In [34]:
df[df["dev_score"] == df["dev_score"].max()]

Unnamed: 0,dir,preprocess,bert_lr,coref_lr,character_lr,schedule,weight_decay,dropout,genre,add_cr_to_coarse,train_document_len,cr_seq_len,cr_hidden_size,dev_score,train_score,epoch
53,Nov24_11:50:00AM,regular,2e-05,0.0002,0.0002,-1.0,0.001,0.0,wb,True,5120,256,256,74.057,84.647,11
274,Nov27_03:58:46AM,regular,2e-05,0.0002,0.0002,-1.0,0.001,0.0,wb,True,5120,256,256,74.057,84.647,11
294,Nov26_09:26:33PM,regular,2e-05,0.0002,0.0002,-1.0,0.001,0.0,wb,True,5120,256,256,74.057,84.647,11
299,Nov27_01:13:03AM,regular,2e-05,0.0002,0.0002,-1.0,0.001,0.0,wb,True,5120,256,256,74.057,84.647,11


In [35]:
best_args = df[df["dev_score"] == df["dev_score"].max()].iloc[0].to_dict()
best_args.pop("dev_score")
best_args.pop("train_score")
best_args.pop("epoch")
best_args.pop("dir")
print(best_args)

{'preprocess': 'regular', 'bert_lr': 2e-05, 'coref_lr': 0.0002, 'character_lr': 0.0002, 'schedule': -1.0, 'weight_decay': 0.001, 'dropout': 0.0, 'genre': 'wb', 'add_cr_to_coarse': True, 'train_document_len': 5120, 'cr_seq_len': 256, 'cr_hidden_size': 256}


# variation

In [36]:
def vary(df, best_args, *vars, group=True):
    index = np.full((len(df),), fill_value=False, dtype=bool)
    for i, row in df.iterrows():
        index[i] = all(row[var] == best_args[var] for var in best_args.keys() if var not in vars)
    subdf = df[index]
    if group:
        _vars = sorted(best_args.keys(), key=lambda x: x in vars)
        return subdf.groupby(_vars)[["dev_score", "train_score", "epoch"]].mean()
    else:
        return subdf

# varitation w/ bert_lr

In [37]:
vary(df, best_args, "bert_lr")

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0,Unnamed: 11_level_0,dev_score,train_score,epoch
preprocess,coref_lr,character_lr,schedule,weight_decay,dropout,genre,add_cr_to_coarse,train_document_len,cr_seq_len,cr_hidden_size,bert_lr,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
regular,0.0002,0.0002,-1.0,0.001,0.0,wb,True,5120,256,256,1e-05,70.287,72.453,4.0
regular,0.0002,0.0002,-1.0,0.001,0.0,wb,True,5120,256,256,2e-05,73.5922,84.2562,10.0
regular,0.0002,0.0002,-1.0,0.001,0.0,wb,True,5120,256,256,3e-05,70.15,75.317,4.0
regular,0.0002,0.0002,-1.0,0.001,0.0,wb,True,5120,256,256,4e-05,72.65,84.42,7.0
regular,0.0002,0.0002,-1.0,0.001,0.0,wb,True,5120,256,256,5e-05,69.72,79.233,5.0


# varitation w/ coref_lr and character_lr

In [22]:
vary(df, best_args, "coref_lr", "character_lr")

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0,Unnamed: 11_level_0,dev_score,train_score,epoch
preprocess,bert_lr,schedule,weight_decay,dropout,genre,add_cr_to_coarse,train_document_len,cr_seq_len,cr_hidden_size,coref_lr,character_lr,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
regular,2e-05,-1.0,0.001,0.0,wb,True,5120,256,256,0.0001,0.0001,69.567,75.157,5.0
regular,2e-05,-1.0,0.001,0.0,wb,True,5120,256,256,0.0002,0.0002,73.5922,84.2562,10.0


# variation w/ schedule

In [23]:
vary(df, best_args, "schedule")

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0,Unnamed: 11_level_0,dev_score,train_score,epoch
preprocess,bert_lr,coref_lr,character_lr,weight_decay,dropout,genre,add_cr_to_coarse,train_document_len,cr_seq_len,cr_hidden_size,schedule,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
regular,2e-05,0.0002,0.0002,0.001,0.0,wb,True,5120,256,256,-1.0,73.5922,84.2562,10.0
regular,2e-05,0.0002,0.0002,0.001,0.0,wb,True,5120,256,256,0.0,72.25,82.84,7.0
regular,2e-05,0.0002,0.0002,0.001,0.0,wb,True,5120,256,256,1.0,71.853,84.863,10.0


# variation w/ weight_decay

In [24]:
vary(df, best_args, "weight_decay")

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0,Unnamed: 11_level_0,dev_score,train_score,epoch
preprocess,bert_lr,coref_lr,character_lr,schedule,dropout,genre,add_cr_to_coarse,train_document_len,cr_seq_len,cr_hidden_size,weight_decay,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
regular,2e-05,0.0002,0.0002,-1.0,0.0,wb,True,5120,256,256,0.0,70.353,71.34,3.0
regular,2e-05,0.0002,0.0002,-1.0,0.0,wb,True,5120,256,256,0.0001,70.353,71.34,3.0
regular,2e-05,0.0002,0.0002,-1.0,0.0,wb,True,5120,256,256,0.001,73.5922,84.2562,10.0
regular,2e-05,0.0002,0.0002,-1.0,0.0,wb,True,5120,256,256,0.01,71.68,80.18,6.0
regular,2e-05,0.0002,0.0002,-1.0,0.0,wb,True,5120,256,256,0.1,73.043,84.713,10.0


# variation w/ genre

In [25]:
vary(df, best_args, "genre")

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0,Unnamed: 11_level_0,dev_score,train_score,epoch
preprocess,bert_lr,coref_lr,character_lr,schedule,weight_decay,dropout,add_cr_to_coarse,train_document_len,cr_seq_len,cr_hidden_size,genre,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
regular,2e-05,0.0002,0.0002,-1.0,0.001,0.0,True,5120,256,256,bc,71.643,75.34,4.0
regular,2e-05,0.0002,0.0002,-1.0,0.001,0.0,True,5120,256,256,bn,71.49,85.43,12.0
regular,2e-05,0.0002,0.0002,-1.0,0.001,0.0,True,5120,256,256,mz,73.86,85.137,9.0
regular,2e-05,0.0002,0.0002,-1.0,0.001,0.0,True,5120,256,256,nw,72.997,80.92,6.0
regular,2e-05,0.0002,0.0002,-1.0,0.001,0.0,True,5120,256,256,pt,72.853,82.55,7.0
regular,2e-05,0.0002,0.0002,-1.0,0.001,0.0,True,5120,256,256,tc,72.213,76.52,6.0
regular,2e-05,0.0002,0.0002,-1.0,0.001,0.0,True,5120,256,256,wb,73.5922,84.2562,10.0


In [30]:
vary(df, best_args)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0,Unnamed: 11_level_0,dev_score,train_score,epoch
preprocess,bert_lr,coref_lr,character_lr,schedule,weight_decay,dropout,genre,add_cr_to_coarse,train_document_len,cr_seq_len,cr_hidden_size,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
regular,2e-05,0.0002,0.0002,-1.0,0.001,0.0,wb,True,5120,256,256,73.5922,84.2562,10.0


In [38]:
vary(df, best_args, group=False)

Unnamed: 0,dir,preprocess,bert_lr,coref_lr,character_lr,schedule,weight_decay,dropout,genre,add_cr_to_coarse,train_document_len,cr_seq_len,cr_hidden_size,dev_score,train_score,epoch
53,Nov24_11:50:00AM,regular,2e-05,0.0002,0.0002,-1.0,0.001,0.0,wb,True,5120,256,256,74.057,84.647,11
274,Nov27_03:58:46AM,regular,2e-05,0.0002,0.0002,-1.0,0.001,0.0,wb,True,5120,256,256,74.057,84.647,11
283,Nov27_02:05:55AM,regular,2e-05,0.0002,0.0002,-1.0,0.001,0.0,wb,True,5120,256,256,71.733,82.693,6
294,Nov26_09:26:33PM,regular,2e-05,0.0002,0.0002,-1.0,0.001,0.0,wb,True,5120,256,256,74.057,84.647,11
299,Nov27_01:13:03AM,regular,2e-05,0.0002,0.0002,-1.0,0.001,0.0,wb,True,5120,256,256,74.057,84.647,11


In [44]:
vary(df, best_args, "cr_seq_len", "cr_hidden_size")

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0,Unnamed: 11_level_0,dev_score,train_score,epoch
preprocess,bert_lr,coref_lr,character_lr,schedule,weight_decay,dropout,genre,add_cr_to_coarse,train_document_len,cr_seq_len,cr_hidden_size,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
regular,2e-05,0.0002,0.0002,-1.0,0.001,0.0,wb,True,5120,256,128,70.905,78.4515,5.0
regular,2e-05,0.0002,0.0002,-1.0,0.001,0.0,wb,True,5120,256,256,73.5922,84.2562,10.0
regular,2e-05,0.0002,0.0002,-1.0,0.001,0.0,wb,True,5120,256,512,71.66,77.4335,7.5
regular,2e-05,0.0002,0.0002,-1.0,0.001,0.0,wb,True,5120,256,1024,71.2915,72.1635,3.5
regular,2e-05,0.0002,0.0002,-1.0,0.001,0.0,wb,True,5120,512,128,70.9115,80.4415,5.5
regular,2e-05,0.0002,0.0002,-1.0,0.001,0.0,wb,True,5120,512,256,71.2035,78.585,5.0
regular,2e-05,0.0002,0.0002,-1.0,0.001,0.0,wb,True,5120,512,512,72.898,82.4535,9.0
regular,2e-05,0.0002,0.0002,-1.0,0.001,0.0,wb,True,5120,512,1024,72.6865,80.487,6.5


In [45]:
round(-np.inf, 4)

-inf