In [56]:
import numpy as np
from joblib import load
from sklearn.metrics import f1_score

In [57]:
DATA_SOURCE = "/home/welton/data"
STACKING_DIR = f"{DATA_SOURCE}/stacking/stacking_output"
EST_DIR = f"{DATA_SOURCE}/oracle/error_detection"
DATASETS = ["acm"]
N_FOLDS = 10
LABELS_DIR = f"{DATA_SOURCE}/datasets/labels/split_{N_FOLDS}"

CLF_SET = [["bert", "normal_probas"],
           ["xlnet", "normal_probas"],
           ["ktmk", "normal_probas"],
           ["ktr", "normal_probas"],
           ["lstmk", "normal_probas"],
           ["lstr", "normal_probas"],
           ["ltr", "normal_probas"]]

SUFIX = '/'.join(sorted( [ f"{c}_{p}" for c, p in CLF_SET ]))
SUFIX

'bert_normal_probas/ktmk_normal_probas/ktr_normal_probas/lstmk_normal_probas/lstr_normal_probas/ltr_normal_probas/xlnet_normal_probas'

In [58]:
mf_set = "centroids-ratios_dist_neigborhood_probas_probas-based"

In [59]:
def load_upper_bound(source_dir: str,
                     dataset: str,
                     clfs: list,
                     fold: int):

    uppers = {}
    for clf, proba_type in clfs:
        upper_dir = f"{source_dir}/{proba_type}/{dataset}/10_folds/{clf}/{fold}"
        uppers[clf] = {}
        uppers[clf]["train"] = np.load(f"{upper_dir}/train.npz")['y']
        uppers[clf]["test"] = np.load(f"{upper_dir}/test.npz")['y']

    return uppers


def load_est_error_preds(source_dir: str,
                         dataset: str,
                         clfs: list,
                         sufix: str,
                         mf_set: str,
                         fold: int):

    est_error = {}
    est_error = {}
    for clf, proba_type in clfs:
        est_dir = f"{source_dir}/{proba_type}/{dataset}/10_folds/{clf}/{sufix}/{mf_set}/{fold}"
        est_error[clf] = {}
        est_error[clf]["train"] = np.load(f"{est_dir}/train.npz")['y']
        est_error[clf]["test"] = np.load(f"{est_dir}/test.npz")['y']

    return est_error


def load_stacking_probas(source_dir: str,
                         dataset: str,
                         sufix: str,
                         fold: int):
    

    return np.load(f"{source_dir}/{dataset}/10_folds/logistic_regression/normal_probas/{sufix}/fold_{fold}/probas.npz")

def load_y(source_dir: str, dataset: str, fold: int):

    return np.load(f"{source_dir}/{dataset}/{fold}/test.npy")

def load_probas(source_dir: str,
                dataset: str,
                clf_set: list,
                fold: int,
                n_folds: int):
    
    d_probas = {}
    for clf, proba_type in clf_set:
        probs_path = f"{source_dir}/{proba_type}/split_{n_folds}/{dataset}/{n_folds}_folds/{clf}/{fold}/test.npz"    
        d_probas[clf] = np.load(probs_path)["X_test"]
    
    return d_probas

def get_est_err_best_est(y_test: np.ndarray, est_err: dict, clf_probas: dict):

    best_est_err_preds = []
    for doc_idx in np.arange(y_test.shape[0]):
        max_conf = 0
        chose_clf = ''
        for clf, _ in CLF_SET:
            if max_conf < est_err[clf]["test"][doc_idx]:
                max_conf = est_err[clf]["test"][doc_idx]
                chose_clf = clf
        best_est_err_preds.append(clf_probas[chose_clf][doc_idx].argmax())
    return best_est_err_preds


In [60]:
est_scores = []
ml_scores = []
for fold in np.arange(N_FOLDS):

    est_err = load_est_error_preds(EST_DIR, "acm", CLF_SET, SUFIX, mf_set, fold)
    meta_probas = load_stacking_probas(STACKING_DIR, "acm", SUFIX, fold)["X_test"]
    y_test = load_y(LABELS_DIR, "acm", fold)
    y_pred = meta_probas.argmax(axis=1)
    clf_probas = load_probas(DATA_SOURCE, "acm", CLF_SET, fold, N_FOLDS)
    best_est = get_est_err_best_est(y_test, est_err, clf_probas)

    est_scores.append(f1_score(y_test, best_est, average="macro"))
    ml_scores.append(f1_score(y_test, y_pred, average="macro"))


In [61]:
np.mean(est_scores), np.mean(ml_scores)

(0.7344573676800095, 0.7395255498713335)