In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import altair as alt
import json

In [2]:
def get_df_from_history(filepath, method_name):
    with open(filepath, "r") as f:
        result = json.load(f)
    result_df = pd.DataFrame(result)
    result_df["epoch"] = result_df.index.values + 1
    result_df["method"] = method_name
    return result_df

In [3]:
result_dfs = []

method_corr = {
    "kpm_nto": "KPM",
    "kpm": "KPM*",
    "yake": "YAKE*",
    "positionrank": "PositionRank*",
    "dev": "Manual"
}

for method in ["kpm", "yake", "positionrank", "kpm_nto", "dev"]:
    result_df = get_df_from_history(f"results/spe18_20e_history/{method}.json", method_corr[method])
    result_dfs.append(result_df)
    
results_df = pd.concat(result_dfs)

In [4]:
results_df.columns.values

array(['val_loss', 'val_crf_ner_loss', 'val_crf_term_loss',
       'val_cls_rel_loss', 'loss', 'crf_ner_loss', 'crf_term_loss',
       'cls_rel_loss', 'f1_ner', 'f1_term', 'f1_rel', 'epoch', 'method'],
      dtype=object)

In [33]:
te_results_df = results_df[results_df["method"].isin(["KPM", "KPM*"])].copy()
te_results_df["method"] = te_results_df["method"].map({"KPM": "All sentences", "KPM*": "With entity only"})

ner_loss = alt.Chart(te_results_df).mark_line().encode(
    x="epoch:Q",
    y=alt.Y("val_crf_ner_loss:Q", title="NER Loss", scale=alt.Scale(domain=[9, 10.4])),
    color=alt.Color("method:N", title="Training Set")
)

term_loss = alt.Chart(te_results_df).mark_line().encode(
    x="epoch:Q",
    y=alt.Y("val_crf_term_loss:Q", title="EE Loss", scale=alt.Scale(domain=[4, 5])),
    color=alt.Color("method:N", title="Training Set")
)

rel_loss = alt.Chart(te_results_df).mark_line().encode(
    x="epoch:Q",
    y=alt.Y("val_cls_rel_loss:Q", title="RE Loss", scale=alt.Scale(domain=[0, 0.6])),
    color=alt.Color("method:N", title="Training Set")
)

all_charts = ner_loss | term_loss | rel_loss

all_charts.configure_axis(
    titleFont="Times New Roman",
    titleFontSize=18,
    labelFont="Times New Roman",
    labelFontSize=18
).configure_legend(
    titleFont="Times New Roman",
    titleFontSize=20,
    labelFont="Times New Roman",
    labelFontSize=18
)

In [35]:
te_results_df = results_df[results_df["method"].isin(["KPM", "KPM*"])].copy()
te_results_df["method"] = te_results_df["method"].map({"KPM": "All sentences", "KPM*": "With entity only"})

ner_f1 = alt.Chart(te_results_df).mark_line().encode(
    x="epoch:Q",
    y=alt.Y("f1_ner:Q", title="NER F-score", scale=alt.Scale(domain=[0, 0.6])),
    color=alt.Color("method:N", title="Training Set")
)

term_f1 = alt.Chart(te_results_df).mark_line().encode(
    x="epoch:Q",
    y=alt.Y("f1_term:Q", title="EE F-score", scale=alt.Scale(domain=[0, 0.6])),
    color=alt.Color("method:N", title="Training Set")
)

rel_f1 = alt.Chart(te_results_df).mark_line().encode(
    x="epoch:Q",
    y=alt.Y("f1_rel:Q", title="RE F-score", scale=alt.Scale(domain=[0, 0.6])),
    color=alt.Color("method:N", title="Training Set")
)

all_charts = ner_f1 | term_f1 | rel_f1

all_charts.configure_axis(
    titleFont="Times New Roman",
    titleFontSize=18,
    labelFont="Times New Roman",
    labelFontSize=18
).configure_legend(
    titleFont="Times New Roman",
    titleFontSize=20,
    labelFont="Times New Roman",
    labelFontSize=18
)

In [20]:
te_results_df = results_df[results_df["method"].isin(["KPM*", "KPM", "Manual"])]

ner_loss = alt.Chart(te_results_df).mark_line().encode(
    x="epoch:Q",
    y=alt.Y("val_crf_ner_loss:Q", title="NER Loss", scale=alt.Scale(domain=[9, 10.4])),
    color=alt.Color("method:N", title="TE Method")
)

term_loss = alt.Chart(te_results_df).mark_line().encode(
    x="epoch:Q",
    y=alt.Y("val_crf_term_loss:Q", title="TE Loss", scale=alt.Scale(domain=[4, 5])),
    color=alt.Color("method:N", title="TE Method")
)

rel_loss = alt.Chart(te_results_df).mark_line().encode(
    x="epoch:Q",
    y=alt.Y("val_cls_rel_loss:Q", title="RE Loss", scale=alt.Scale(domain=[0, 0.6])),
    color=alt.Color("method:N", title="TE Method")
)

all_charts = ner_loss | term_loss | rel_loss

all_charts.configure_axis(
    titleFont="Times New Roman",
    titleFontSize=18,
    labelFont="Times New Roman",
    labelFontSize=18
).configure_legend(
    titleFont="Times New Roman",
    titleFontSize=20,
    labelFont="Times New Roman",
    labelFontSize=18
)

In [11]:
te_results_df = results_df[results_df["method"].isin(["KPM", "KPM*", "Manual"])]

ner_f1 = alt.Chart(te_results_df).mark_line().encode(
    x="epoch:Q",
    y=alt.Y("f1_ner:Q", title="NER F1", scale=alt.Scale(domain=[0, 0.65])),
    color=alt.Color("method:N", title="TE Method")
)

term_f1 = alt.Chart(te_results_df).mark_line().encode(
    x="epoch:Q",
    y=alt.Y("f1_term:Q", title="Terms F1", scale=alt.Scale(domain=[0, 0.65])),
    color=alt.Color("method:N", title="TE Method")
)

rel_f1 = alt.Chart(te_results_df).mark_line().encode(
    x="epoch:Q",
    y=alt.Y("f1_rel:Q", title="Rel F1", scale=alt.Scale(domain=[0, 0.65])),
    color=alt.Color("method:N", title="TE Method")
)

all_charts = ner_f1 | term_f1 | rel_f1

all_charts.configure_title(
    fontSize=24,
).configure_header(
    titleFontSize=22,
    labelFontSize=20
).configure_axis(
    titleFontSize=18,
    labelFontSize=18
).configure_legend(
    titleFontSize=20,
    labelFontSize=18
)

In [24]:
results_df[results_df["method"].isin(["KPM*", "Manual"])][["method", "f1_ner", "f1_term", "f1_rel"]].groupby("method").max()

Unnamed: 0_level_0,f1_ner,f1_term,f1_rel
method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
KPM*,0.585797,0.480303,0.536034
Manual,0.631644,0.605183,0.605


In [None]:
results_df[["method", "val_loss"]].reset_index().pivot_table(index="epoch", columns="method", values="val_loss").plot()

In [None]:
results_df[["method", "f1_term"]].reset_index().pivot_table(index="epoch", columns="method", values="f1_term").plot()

In [None]:
results_df[results_df["method"] == "kpm"][["loss", "val_loss"]].plot()

In [None]:
results_df[results_df["method"] == "positionrank"][["loss", "val_loss"]].plot()

In [None]:
kpm_results_df = results_df[results_df["method"] == "KPM*"][["epoch", "crf_ner_loss", "val_crf_ner_loss"]]
kpm_results_df.columns = ["epoch", "Train", "Test"]
kpm_results_df = kpm_results_df.melt(id_vars=["epoch"], value_vars=["Train", "Test"]).rename(
    columns={"variable": "Set", "value": "Loss"}
)
kpm_results_df.columns = ["epoch", "Set", "Loss"]
ner_chart = alt.Chart(kpm_results_df).mark_line().encode(
    x="epoch:Q",
    y=alt.Y("Loss:Q", title="NER Loss", scale=alt.Scale(domain=[7, 11])),
    color="Set:N"
)

kpm_results_df = results_df[results_df["method"] == "KPM*"][["epoch", "crf_term_loss", "val_crf_term_loss"]]
kpm_results_df.columns = ["epoch", "Train", "Test"]
kpm_results_df = kpm_results_df.melt(id_vars=["epoch"], value_vars=["Train", "Test"]).rename(
    columns={"variable": "Set", "value": "Loss"}
)
kpm_results_df.columns = ["epoch", "Set", "Loss"]
term_chart = alt.Chart(kpm_results_df).mark_line().encode(
    x="epoch:Q",
    y=alt.Y("Loss:Q", title="Terms Loss", scale=alt.Scale(domain=[3, 5])),
    color="Set:N"
)

all_charts = ner_chart | term_chart
all_charts.configure_title(
    fontSize=24,
).configure_header(
    titleFontSize=22,
    labelFontSize=20
).configure_axis(
    titleFontSize=18,
    labelFontSize=18
).configure_legend(
    titleFontSize=20,
    labelFontSize=18
)