In [1]:
import numpy as np
import pandas as pd
import itertools

In [2]:
top_3_validation = []
with open("../6_validation/top_3.txt") as top_file:
    for line in top_file.readlines():
        line = line.strip()
        split_line = line.split(",")
        top_3 = [int(seed.lstrip(" ").strip("'")) for seed in line.split("[")[1].rstrip("]").split(",")]
        top_3_validation.append([split_line[0], split_line[1] == "True", split_line[2], top_3])

In [3]:
entity_stats = pd.read_csv("entity_stats.csv")
relation_stats = pd.read_csv("relation_stats.csv")
jer_stats = pd.read_csv("jer_stats.csv")

In [4]:
print(entity_stats.columns)
print(relation_stats.columns)
print(jer_stats.columns)

Index(['model', 'no_context', 'type', 'seed', 'label_name', 'prec', 'recall',
       'f1', 'support'],
      dtype='object')
Index(['model', 'no_context', 'type', 'seed', 'sentence_id', 'mcc', 'acc',
       'prec', 'recall', 'f1'],
      dtype='object')
Index(['model', 'no_context', 'type', 'seed', 'sentence_id',
       'jaccard_similarity', 'intersect', 'union'],
      dtype='object')


In [5]:
# pandas std alrady does bessel correction

In [6]:
entity_labels = ["OUTCOME_VAR", "EXPL_VAR", "BASELINE", "RR", "HR", "OR", "micro avg"]
final_entity_results = None
for experiment in top_3_validation:
    experiment_type = experiment[2]
    if experiment_type == "r":
        experiment_type = "e" 
    experiment_ent = entity_stats[(entity_stats["model"]==experiment[0])
             & (entity_stats["no_context"]==(experiment[1]))
            & (entity_stats["type"]==experiment_type)]
    filtered_ent = experiment_ent[experiment_ent["seed"].isin(experiment[3])]
    
        
    results = (filtered_ent.groupby("label_name").agg(['mean', 'std'])[['prec', 'recall', 'f1']])
    results.columns = ['_'.join(col).rstrip(_) for col in results.columns.values]
    results['model'] = experiment[0]
    results["no_context"]= experiment[1]
    results["type"]= experiment_type
    results["id"] = f"{experiment[0]} {experiment[1]} {experiment_type}"
    if final_entity_results is None:
        final_entity_results = results
    else:
        final_entity_results = pd.concat([final_entity_results, results],axis=0)

In [7]:
final_relation_results = None
for experiment in top_3_validation:
    experiment_type = experiment[2]
    experiment_rel = relation_stats[(relation_stats["model"]==experiment[0])
             & (relation_stats["no_context"]==(experiment[1]))
            & (relation_stats["sentence_id"]=="AGGREGATE")
            & (relation_stats["type"]==experiment_type)]
    filtered_ent = experiment_rel[experiment_rel["seed"].isin(experiment[3])]
    results = (filtered_ent.agg(['mean', 'std'])[['mcc', 'acc', 'prec', 'recall', 'f1']])
    results = results.stack().to_frame().T
    results.columns = results.columns.to_flat_index()
    
    results.columns = ['_'.join(col[::-1]).rstrip(_) for col in results.columns.values]
    results['model'] = experiment[0]
    results["no_context"]= experiment[1]
    results["type"]= experiment_type
    results["id"] = f"{experiment[0]} {experiment[1]} {experiment_type}"
    if final_relation_results is None:
        final_relation_results = results
    else:
        final_relation_results = pd.concat([final_relation_results, results],axis=0)


In [8]:
final_jer_results = None
for experiment in top_3_validation:
    experiment_type = experiment[2]
    experiment_jer = jer_stats[(jer_stats["model"]==experiment[0])
             & (jer_stats["no_context"]==(experiment[1]))
            & (jer_stats["sentence_id"]=="AGGREGATE")
            & (jer_stats["type"]==experiment_type)]
    filtered_jer = experiment_jer[experiment_jer["seed"].isin(experiment[3])]
    results = (filtered_jer.agg(['mean', 'std'])[['jaccard_similarity']])
    results = results.stack().to_frame().T
    results.columns = results.columns.to_flat_index()
    
    results.columns = ['_'.join(col[::-1]).rstrip(_) for col in results.columns.values]
    results['model'] = experiment[0]
    results["no_context"]= experiment[1]
    results["type"]= experiment_type
    results["id"] = f"{experiment[0]} {experiment[1]} {experiment_type}"
    if final_jer_results is None:
        final_jer_results = results
    else:
        final_jer_results = pd.concat([final_jer_results, results],axis=0)


In [9]:
final_entity_results.to_csv('ent_summary.csv')
final_relation_results.to_csv('rel_summary.csv', index=False)
final_jer_results.to_csv('jer_summary.csv', index=False)