In [3]:
import itertools
import os
import pandas as pd
import re
import yaml

In [2]:
results_dir = os.path.join(os.getenv("DATA_DIR"),
                           "mica_text_coref/movie_coref/results/coreference/cross_val_excerpts_Dec19-21")

In [25]:
epoch_pattern = re.compile(r"Epoch = (\d+)\n"
                           r"dev:: loss=([0-9\.]+), metric:Word=[0-9\.]+, Span=([0-9\.]+), Character=[0-9\.]+\n"
                           r"train:: loss=([0-9\.]+), metric:Word=[0-9\.]+, Span=([0-9\.]+), Character=[0-9\.]+")

for dir_ in os.listdir(results_dir):
    if dir_.startswith("Dec"):
        log_file = os.path.join(results_dir, dir_, "train.log")
        result_file = os.path.join(results_dir, dir_, "result.yaml")
        if not os.path.exists(result_file):
            with open(log_file) as f:
                content = f.read()
            dev_losses, dev_scores, train_losses, train_scores = [], [], [], []
            max_dev_score, max_train_score, best_epoch = -1, -1, None
            for match in re.finditer(epoch_pattern, content):
                epoch = int(match.group(1))
                dev_loss, dev_score = float(match.group(2)), float(match.group(3))
                train_loss, train_score = float(match.group(4)), float(match.group(5))
                if dev_score > max_dev_score:
                    best_epoch = epoch
                max_dev_score = max(max_dev_score, dev_score)
                max_train_score = max(max_train_score, train_score)
                dev_losses.append(dev_loss)
                dev_scores.append(dev_score)
                train_losses.append(train_loss)
                train_scores.append(train_score)
            preprocess = re.search(r"preprocess\s+= (.+)", content).group(1)
            test_movie = re.search(r"test_movie\s+= (.+)", content).group(1)
            model_lr = float(re.search(r"coref_lr\s+= (.+)", content).group(1))
            bert_lr = float(re.search(r"bert_lr\s+= (.+)", content).group(1))
            warmup_steps = float(re.search(r"warmup_steps\s+= (.+)", content).group(1))
            result = dict(character_recognition=dict(tag_embedding_size=16,
                                                 gru_nlayers=1,
                                                 gru_hidden_size=256, 
                                                 gru_bidirectional=True),
                            preprocess=preprocess,
                            test_movie=test_movie,
                            train_excerpts=True,
                            topk=50,
                            dropout=0,
                            freeze_bert=False,
                            genre="wb",
                            bce_weight=0.5,
                            bert_lr=bert_lr,
                            character_lr=model_lr,
                            coref_lr=model_lr,
                            warmup_steps=warmup_steps,
                            weight_decay=1e-3,
                            train_document_len=5120,
                            train_overlap_len=0,
                            dev_document_len=5120,
                            dev_overlap_len=512,
                            dev_merge_strategy="avg",
                            test_document_lens=[5120],
                            test_overlap_lens=[512],
                            test_merge_strategies=["avg"],
                            subword_batch_size=64,
                            cr_seq_len=256,
                            cr_batch_size=64,
                            fn_batch_size=64,
                            sp_batch_size=64,
                            add_cr_to_coarse=True,
                            filter_mentions_by_cr=False,
                            remove_singleton_cr=True,
                            best_epoch=best_epoch,
                            max_epochs=20,
                            n_epochs_no_eval=0,
                            dev_losses=dev_losses,
                            train_losses=train_losses,
                            dev_scores=dev_scores,
                            train_scores=train_scores,
                            dev_metric=dict(span=dict(lea=dict(f1=max_dev_score))),
                            train_metric={5120:{512:{"avg":dict(span=dict(lea=dict(f1=max_train_score)))}}},
                            test_metric=dict(span=dict(lea=dict(f1=max_dev_score)))
                        )
            result_file2 = os.path.join(results_dir, dir_, "result2.yaml")
            with open(result_file2, "w") as fw:
                yaml.dump(result, fw)

In [28]:
settings = set(tuple(x) for x in itertools.product(
                    ["nocharacters", "addsays", "regular"], [1e-5, 2e-5, 5e-5], [1e-4, 2e-4, 5e-4], [-1, 0, 50, 100],
                    ["avengers_endgame", "dead_poets_society", "john_wick", "prestige", "quiet_place", "zootopia"]))
dirs_with_no_results = []

for dir_ in os.listdir(results_dir):
    if dir_.startswith("Dec"):
        result_file1 = os.path.join(results_dir, dir_, "result.yaml")
        result_file2 = os.path.join(results_dir, dir_, "result2.yaml")
        result_file = result_file1 if os.path.exists(result_file1) else result_file2
        if os.path.exists(result_file):
            with open(result_file, "r") as f:
                result = yaml.load(f, Loader=yaml.FullLoader)
            setting = (result["preprocess"], result["bert_lr"], result["coref_lr"], result["warmup_steps"],
                       result["test_movie"])
            settings.discard(setting)
        else:
            dirs_with_no_results.append(dir_)

print(f"{len(dirs_with_no_results)} directories with empty results:")
print(dirs_with_no_results)
print()

print(f"{len(settings)} settings not experimented")
for preprocess, bert_lr, model_lr, warmup, movie in settings:
    print(f"preprocess = {preprocess:20s}, bert_lr = {bert_lr:.0e}, model_lr = {model_lr:.0e}, "
          f"warmup_steps = {warmup:4d}, test_movie = {movie}")

print()
print("bash script arr")
for preprocess, bert_lr, model_lr, warmup, movie in settings:
    print(f"{preprocess} {bert_lr} {model_lr} {warmup} {movie}")

0 directories with empty results:
[]

14 settings not experimented
preprocess = nocharacters        , bert_lr = 5e-05, model_lr = 1e-04, warmup_steps =    0, test_movie = zootopia
preprocess = addsays             , bert_lr = 2e-05, model_lr = 1e-04, warmup_steps =   -1, test_movie = avengers_endgame
preprocess = nocharacters        , bert_lr = 1e-05, model_lr = 1e-04, warmup_steps =   -1, test_movie = dead_poets_society
preprocess = regular             , bert_lr = 5e-05, model_lr = 1e-04, warmup_steps =   -1, test_movie = avengers_endgame
preprocess = regular             , bert_lr = 2e-05, model_lr = 1e-04, warmup_steps =   -1, test_movie = avengers_endgame
preprocess = addsays             , bert_lr = 1e-05, model_lr = 1e-04, warmup_steps =   -1, test_movie = dead_poets_society
preprocess = regular             , bert_lr = 1e-05, model_lr = 1e-04, warmup_steps =   -1, test_movie = dead_poets_society
preprocess = nocharacters        , bert_lr = 5e-05, model_lr = 1e-04, warmup_steps =   -

In [5]:
header = ["preprocess", "bert_lr", "model_lr", "warmup", "movie", "lea"]
rows = []

for dir_ in os.listdir(results_dir):
    result_file1 = os.path.join(results_dir, dir_, "result.yaml")
    result_file2 = os.path.join(results_dir, dir_, "result2.yaml")
    if os.path.exists(result_file1):
        with open(result_file1) as f:
            result = yaml.load(f, Loader=yaml.FullLoader)
    elif os.path.exists(result_file2):
        with open(result_file2) as f:
            result = yaml.load(f, Loader=yaml.FullLoader)
    else:
        continue
    rows.append([result["preprocess"], result["bert_lr"], result["coref_lr"], result["warmup_steps"],
                 result["test_movie"], result["dev_metric"]["span"]["lea"]["f1"]])

df = pd.DataFrame(rows, columns=header)

In [6]:
print(df.shape)
print(df.dtypes)

for (preprocess, bert_lr, model_lr, warmup), _df in df.groupby(["preprocess", "bert_lr", "model_lr", "warmup"]):
    if _df.shape[0] != 6:
        print(preprocess, bert_lr, model_lr, warmup)

(634, 6)
preprocess     object
bert_lr       float64
model_lr      float64
warmup        float64
movie          object
lea           float64
dtype: object
addsays 1e-05 0.0001 -1.0
addsays 2e-05 0.0001 -1.0
addsays 5e-05 0.0001 -1.0
nocharacters 1e-05 0.0001 -1.0
nocharacters 2e-05 0.0001 -1.0
nocharacters 5e-05 0.0001 -1.0
nocharacters 5e-05 0.0001 0.0
regular 1e-05 0.0001 -1.0
regular 2e-05 0.0001 -1.0
regular 5e-05 0.0001 -1.0


In [14]:
df.groupby(["preprocess", "bert_lr", "model_lr", "warmup"]).agg({"lea": "mean"}).sort_values(by="lea", ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,lea
preprocess,bert_lr,model_lr,warmup,Unnamed: 4_level_1
regular,0.00002,0.0001,-1.0,64.747500
addsays,0.00002,0.0001,-1.0,63.957000
addsays,0.00005,0.0001,-1.0,62.494400
addsays,0.00001,0.0001,-1.0,62.179000
regular,0.00002,0.0002,50.0,61.600000
...,...,...,...,...
nocharacters,0.00001,0.0001,100.0,50.719000
nocharacters,0.00001,0.0001,0.0,50.707000
nocharacters,0.00005,0.0005,0.0,50.126167
nocharacters,0.00005,0.0002,-1.0,49.846167
