In [1349]:
%matplotlib inline
import xgboost as xgb
import numpy as np
from xgboost import plot_importance
import pandas as pd
from sklearn.metrics import mean_squared_error
import numpy as np
from sklearn import gaussian_process
from sklearn.gaussian_process.kernels import Matern, WhiteKernel, ConstantKernel
from scipy.stats import norm
from functools import partial

In [1350]:
def read_group(file):
    f = open(file, "r").readlines()
    group_sizes = [int(l.strip()) for l in f]
    return np.array(group_sizes)

In [1351]:
def read_data():
    dtrain = xgb.DMatrix('test-data/rank.train.txt')
    dtest = xgb.DMatrix('test-data/rank.test.txt')
    train_group_sizes = read_group("test-data/rank.train.qgsize.txt")
    test_group_sizes = read_group("test-data/rank.test.qgsize.txt")
    dtrain.set_group(train_group_sizes)
    dtest.set_group(test_group_sizes)
    return dtrain, dtest

In [1352]:
def train_ranker():
    param = {'max_depth': 6, 'eta': 1, 'silent': 1, 'objective': 'rank:ndcg'}
    param['nthread'] = 4
    param['eval_metric'] = 'ndcg'

    num_round = 10
    evallist = [(dtrain, 'train'), (dtest, 'eval')]
    bst = xgb.train(param, dtrain, num_round, evallist)
    return bst

def ranker_pred():
    ypred = bst.predict(dtest)
    ind = list(np.argsort(-ypred))
    plot_importance(bst)

In [None]:
def train_regressor(train_feats, train_labels, regressor="xgboost", quantile=0.95):
    if regressor == "xgboost":
        reg = xgb.XGBRegressor(objective ='reg:squarederror', learning_rate=0.2,
                               max_depth=5, n_estimators=200)
        
    elif regressor == "gp":
        kernel=1**2 + Matern(length_scale=2, nu=1.5) + WhiteKernel(noise_level=1)
        reg = gaussian_process.GaussianProcessRegressor(alpha=1e-10, copy_X_train=True, kernel=kernel,
                                                        n_restarts_optimizer=0, normalize_y=False,
                                                        optimizer='fmin_l_bfgs_b', random_state=None)
    elif regressor == "lower_xgbq":
        reg = XGBQuantile(n_estimators=200, max_depth=5)
        reg.set_params(quant_alpha=1. - quantile, quant_delta=1.0, quant_thres=5.0, quant_var=3.2)
    elif regressor == "upper_xgbq":
        reg = XGBQuantile(n_estimators=200, max_depth=5)
        reg.set_params(quant_alpha=quantile, quant_delta=1.0, quant_thres=6.0, quant_var=4.2)
    else:
        print("Please specify a valid regressor!")
        return 
    fit_regressor(reg, train_feats, train_labels)
    return reg

def fit_regressor(reg, train_feats, train_labels):
    reg.fit(train_feats, train_labels)
    return reg

def calculate_rmse(preds, labels):
    return np.sqrt(mean_squared_error(labels, preds))

# modify the function to get confidence band
def test_regressor(reg, test_feats, test_labels=None, get_rmse=True, 
                   get_ci=False, quantile=0.95, lower_reg=None, upper_reg=None):
    preds = None; lower_preds = None; upper_preds = None; rmse = None
    if get_ci:
        if isinstance(reg, xgb.XGBRegressor):
            preds = reg.predict(test_feats)
            lower_preds = lower_reg.predict(test_feats)
            upper_preds = upper_reg.predict(test_feats)
        elif isinstance(reg, gaussian_process.GaussianProcessRegressor):
            preds, std = reg.predict(test_feats, return_std=True)
            i = norm.ppf((1 - quantile) / 2)
            lower_preds, upper_preds = preds + i*std, preds - i*std
        else:
            preds = reg.predict(test_feats)
            print("Confidence band not supported for {}.".format(type(reg)))
    else:
        preds = reg.predict(test_feats)
    if get_rmse:
        valid_index = np.intersect1d(np.argwhere(~np.isnan(test_labels)), np.argwhere(~np.isnan(preds)))
        rmse = calculate_rmse(test_labels[valid_index], preds[valid_index])
    return preds, lower_preds, upper_preds, rmse

def get_xgb_quantile(reg_lower, reg_higher, train_feats, train_labels, test_feats, quantile=0.95):
    # try reg_alpha=5.0, gamma=0.5, reg_lambda=1.0
    lower_preds = reg_lower.predict(test_feats)
    upper_preds = reg.predict(test_feats)
    return reg_lower, reg_upper, lower_preds, upper_preds

In [1426]:
# Copy from https://towardsdatascience.com/regression-prediction-intervals-with-xgboost-428e0a018b
# quantile regression for xgboost
class XGBQuantile(xgb.XGBRegressor):
    # parameters from XGBRegressor
    def __init__(self, quant_alpha=0.95, quant_delta = 1.0, quant_thres=1.0, quant_var =1.0,
               base_score=0.5, booster='gbtree', colsample_bylevel=1,
               colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
               max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
               n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
               reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
               silent=True, subsample=1):
        self.quant_alpha = quant_alpha
        self.quant_delta = quant_delta
        self.quant_thres = quant_thres
        self.quant_var = quant_var
    
        super().__init__(base_score=base_score, booster=booster, colsample_bylevel=colsample_bylevel,
                colsample_bytree=colsample_bytree, gamma=gamma, learning_rate=learning_rate, max_delta_step=max_delta_step,
                max_depth=max_depth, min_child_weight=min_child_weight, missing=missing, n_estimators=n_estimators,
                n_jobs= n_jobs, nthread=nthread, objective=objective, random_state=random_state,
                reg_alpha=reg_alpha, reg_lambda=reg_lambda, scale_pos_weight=scale_pos_weight, seed=seed,
                silent=silent, subsample=subsample)
        self.test = None
  
    def fit(self, X, y):
        super().set_params(objective=partial(XGBQuantile.quantile_loss,alpha = self.quant_alpha,delta = self.quant_delta,threshold = self.quant_thres,var = self.quant_var) )
        super().fit(X,y)
        return self
  
    def predict(self,X):
        return super().predict(X)
  
    def score(self, X, y):
        y_pred = super().predict(X)
        score = XGBQuantile.quantile_score(y, y_pred, self.quant_alpha)
        score = 1./score
        return score
      
    @staticmethod
    def quantile_loss(y_true,y_pred,alpha,delta,threshold,var):
        x = y_true - y_pred
        grad = (x<(alpha-1.0)*delta)*(1.0-alpha)-  ((x>=(alpha-1.0)*delta)& (x<alpha*delta) )*x/delta-alpha*(x>alpha*delta)
        hess = ((x>=(alpha-1.0)*delta)& (x<alpha*delta) )/delta 
 
        grad = (np.abs(x)<threshold )*grad - (np.abs(x)>=threshold )*(2*np.random.randint(2, size=len(y_true)) -1.0)*var
        hess = (np.abs(x)<threshold )*hess + (np.abs(x)>=threshold )
        return grad, hess
  
    @staticmethod
    def original_quantile_loss(y_true, y_pred, alpha, delta):
        x = y_true - y_pred
        grad = (x < (alpha - 1.0) * delta) * (1.0 - alpha) - \
               ((x >= (alpha - 1.0) * delta) & (x < alpha * delta)) * \
               x / delta - alpha * (x > alpha * delta)
        hess = ((x >= (alpha - 1.0) * delta) & (x < alpha * delta)) / delta 
        return grad, hess

    @staticmethod
    def quantile_score(y_true, y_pred, alpha):
        score = XGBQuantile.quantile_cost(x=y_true-y_pred,alpha=alpha)
        score = np.sum(score)
        return score
  
    @staticmethod
    def quantile_cost(x, alpha):
        return (alpha - 1.0) * x * (x < 0) + alpha * x * (x >= 0)
  
    @staticmethod
    def get_split_gain(gradient,hessian,l=1):
        split_gain = list()
        for i in range(gradient.shape[0]):
            split_gain.append(np.sum(gradient[:i])/(np.sum(hessian[:i])+l)+np.sum(gradient[i:])/(np.sum(hessian[i:])+l)-np.sum(gradient)/(np.sum(hessian)+l))
        return np.array(split_gain)

In [1444]:
def p2f(x):
    return float(x.strip('%'))/100

def fix_sf(sf_data):
    sf_data = sf_data.dropna(axis=1, how="all")
    for c in sf_data.columns:
        if "\r" in c:
            sf_data[c.replace("\r", " ")] = sf_data[c]
            sf_data = sf_data.drop(axis=1, labels=[c])
    for c in sf_data.columns:
        if isinstance(sf_data[c][0], str) and sf_data[c][0].endswith("%"):
            sf_data[c] = sf_data[c].apply(p2f)
    sf_data["Keywords_Precision"] = sf_data["Precision"]
    sf_data["Keywords_Recall"] = sf_data["Recall"]
    sf_data["Keywords_F1"] = sf_data["F1-score"]
    sf_data["NN_Precision"] = sf_data["Precision.1"]
    sf_data["NN_Recall"] = sf_data["Recall.1"]
    sf_data["NN_F1"] = sf_data["F1-score.1"]
    sf_data = sf_data.drop(axis=1, labels=["Family", "Precision", "Recall", "F1-score", "Precision.1", "Recall.1", "F1-score.1"])
    return sf_data

def fix_bli(bli_data):
    bli_data["MUSE"] = bli_data["MUSE (Performance)"]
    bli_data["Artetxe17"] = bli_data["Artetxe17 (Performance)"]
    bli_data["Artetxe16"] = bli_data["Artetxe16 (Performance)"]
    bli_data = bli_data.drop(axis=1, labels=["MUSE (Performance)", "Artetxe17 (Performance)", "Artetxe16 (Performance)"])
    return bli_data
    
def convert_to_one_hot(data, pref, column_index):
    vals = np.array(data.iloc[:, column_index].drop_duplicates())
    for v in vals:
        data[pref + "_" + v] = (data.iloc[:, column_index] == v).astype(int)
    return data

# data file related manipulation
def get_data_langs(task, shuffle=False):
    data = pd.read_csv("data/data_{}.csv".format(task), thousands=',')
    lang_pairs = None
    langs = None
    if task == "sf":
        data = convert_to_one_hot(data, "fam", 3)
        data = fix_sf(data)
        langs = np.array(data.iloc[:, 1])
    elif task.startswith("tsf"):
        data = convert_to_one_hot(data, "src", 0)
        data = convert_to_one_hot(data, "tsf", 1)
        langs = list(np.array(data.iloc[:, 0].drop_duplicates()))
        lang_pairs = data.iloc[:, 0:2]
    elif task == "monomt":
        langs = list(np.array(data.iloc[:, 1]))
        data = data.dropna(axis=1, how="all")
        data = data.dropna(axis=0, how="all")
    elif task == "bli":
        data = data.dropna(axis=1, how="all")
        data = data.dropna(axis=0, how="all")
        data = convert_to_one_hot(data, "src", 1)
        data = convert_to_one_hot(data, "tsf", 2)
        data = fix_bli(data)
        lang_pairs = data.iloc[:, 1:3]
    elif task == "mi":
        data = convert_to_one_hot(data, "src", 1)
        data = convert_to_one_hot(data, "tsf", 3)
        data["Accuracy"] = data["BEST SCORE (Accuracy) from SIGMORPHON"]
        data = data.drop(axis=1, labels=["BEST SCORE (Accuracy) from SIGMORPHON"])
        lang_pairs = pd.concat([data.iloc[:, 1], data.iloc[:, 3]], axis=1)
    if shuffle:
        data = data.sample(frac=1)
    return data, langs, lang_pairs

def remove_na(df):
    return df.dropna(axis=0)

def convert_label(df):
    return df.values.reshape(len(df))
    

def get_transfer_data_by_group(data, lang, lang_pairs):
    indexes = lang_pairs[lang_pairs.iloc[:, 0] == lang].index
    start = min(indexes)
    end = max(indexes) + 1
    train_data = remove_na(pd.concat([data.iloc[0:start], data.iloc[end:]]))
    test_feats, test_labels = data.iloc[start:end, 5:], data.iloc[start:end, 2:3]
    train_feats, train_labels = train_data.iloc[:, 5:], train_data.iloc[:, 2:3]
    # train_dmatrix = xgb.DMatrix(data=train_feats, label=train_labels)
    # test_dmatrix = xgb.DMatrix(data=test_feats, label=test_labels)
    # return train_dmatrix, test_dmatrix
    return train_feats, train_labels, test_feats, test_labels

def get_k_fold_data(data, k=10, task="tsfmt"):
    lens = len(data)
    ex_per_fold = int(np.ceil(lens / k))
    k_fold_data = {"train_feats": [], "train_labels": [], "test_feats": [], "test_labels": [], "test_lang_pairs": [], "test_langs": []}
    for i in range(k):
        start = ex_per_fold*i
        end = ex_per_fold*(i+1)
        train_data = remove_na(pd.concat([data.iloc[0:start], data.iloc[end:]]))
        if task.startswith("tsf"):
            test_lang_pairs, test_feats, test_labels = data.iloc[start:end, 0:2], \
                                                       data.iloc[start:end, 5:], \
                                                       data.iloc[start:end, 2:3]
            train_feats, train_labels = train_data.iloc[:, 5:], train_data.iloc[:, 2:3]
            k_fold_data["test_lang_pairs"].append(test_lang_pairs)
        elif task == "sf":
            test_langs, test_feats, test_labels = data.iloc[start:end, 1], \
                                                  data.iloc[start:end, 4:-6], \
                                                  data.iloc[start:end, -6:]
            train_feats, train_labels = train_data.iloc[:, 4:-6], train_data.iloc[:, -6:]
            k_fold_data["test_langs"].append(test_langs)
        elif task == "monomt":
            test_langs, test_feats, test_labels =  data.iloc[start:end, 1], \
                                                   data.iloc[start:end, 3:-1], \
                                                   data.iloc[start:end, -1]
            train_feats, train_labels = train_data.iloc[:, 3:-1], train_data.iloc[:, -1]
            k_fold_data["test_langs"].append(test_langs)
        elif task == "bli":
            test_lang_pairs, test_feats, test_labels = data.iloc[start:end, 1:3], \
                                                  data.iloc[start:end, 3:-3], \
                                                  data.iloc[start:end, -3:]
            train_feats, train_labels = train_data.iloc[:, 3:-3], train_data.iloc[:, -3:]
            k_fold_data["test_lang_pairs"].append(test_lang_pairs)
        elif task == "mi":
            test_lang_pairs, test_feats, test_labels = pd.concat([data.iloc[start:end, 1], data.iloc[start:end, 3]], axis=1), \
                                                       data.iloc[start:end, 4:-1], \
                                                       data.iloc[start:end, -1]
            train_feats, train_labels = train_data.iloc[:, 4:-1], train_data.iloc[:, -1]
            k_fold_data["test_lang_pairs"].append(test_lang_pairs)
        k_fold_data["train_feats"].append(train_feats)
        k_fold_data["train_labels"].append(train_labels)
        k_fold_data["test_feats"].append(test_feats)
        k_fold_data["test_labels"].append(test_labels)
    # return train_dmatrix, test_dmatrix
    return k_fold_data

In [1462]:
# TODO: needs refatoring
def get_re(task, k_fold_eval=False, regressor="xgboost", get_rmse=True, get_ci=False, quantile=0.95):
    # when doing random k_fold_eval, we need to shuffle the data
    data, langs, lang_pairs = get_data_langs(task, shuffle=k_fold_eval)
    re = {"reg": {}, "train_rmse": {}, "test_rmse": {}, "test_preds": {}, "test_labels": {}, "test_lower_preds": {}, "test_upper_preds": {}}
    if not k_fold_eval:
        for n, lang in enumerate(langs):
            train_feats, train_labels, test_feats, test_labels = get_transfer_data_by_group(data, lang, lang_pairs)
            reg = train_regressor(train_feats, train_labels)
            _, train_rmse = test_regressor(reg, train_feats, convert_label(train_labels))
            test_preds, test_rmse = test_regressor(reg, test_feats, convert_label(test_labels))
            re["reg"][lang] = reg
            re["train_rmse"][lang] = train_rmse
            re["test_preds"][lang] = test_preds
            re["test_rmse"][lang] = test_rmse
    else:
        k = 10
        k_fold_data = get_k_fold_data(data, k, task)
        if task.startswith("tsf"):
            re["test_lang_pairs"] = {}
        elif task == "sf":
            re["test_langs"] = {}
            for c in k_fold_data["test_labels"][0].columns:
                re["reg"][c] = {}
                re["train_rmse"][c] = {}
                re["test_rmse"][c] = {}
                re["test_preds"][c] = {}
                re["test_labels"][c] = {}
                re["test_lower_preds"][c] = {}
                re["test_upper_preds"][c] = {}
        elif task == "bli":
            re["test_lang_pairs"] = {}
            for c in k_fold_data["test_labels"][0].columns:
                re["reg"][c] = {}
                re["train_rmse"][c] = {}
                re["test_rmse"][c] = {}
                re["test_preds"][c] = {}
                re["test_labels"][c] = {}
                re["test_lower_preds"][c] = {}
                re["test_upper_preds"][c] = {}
        elif task == "mi":
            re["test_lang_pairs"] = {}
        else:
            re["test_langs"] = {}
        for i in range(k):
            test_feats = k_fold_data["test_feats"][i]
            # in the case that there is less than 10 folds with limited data
            if len(test_feats) > 0:
                test_labels = k_fold_data["test_labels"][i]
                train_feats = k_fold_data["train_feats"][i]
                train_labels = k_fold_data["train_labels"][i]
                if task.startswith("tsf"):
                    test_lang_pairs = k_fold_data["test_lang_pairs"][i]
                    reg = train_regressor(train_feats, train_labels, regressor=regressor)
                    _, _, _, train_rmse = test_regressor(reg, train_feats, convert_label(train_labels))
                    lower_reg = None; upper_reg = None;
                    if get_ci and isinstance(reg, xgb.XGBRegressor):
                        lower_reg = train_regressor(train_feats, train_labels, regressor="lower_xgbq", quantile=quantile)
                        upper_reg = train_regressor(train_feats, train_labels, regressor="upper_xgbq", quantile=quantile)
                    test_preds, test_lower_preds, test_upper_preds, test_rmse = \
                        test_regressor(reg, test_feats, convert_label(test_labels), get_rmse=get_rmse, \
                        get_ci=get_ci, quantile=quantile, lower_reg=lower_reg, upper_reg =upper_reg)
                    re["reg"][i] = reg
                    re["train_rmse"][i] = train_rmse
                    re["test_preds"][i] = test_preds
                    re["test_lang_pairs"][i] = test_lang_pairs
                    re["test_rmse"][i] = test_rmse
                    re["test_labels"][i] = convert_label(test_labels)
                    re["test_lower_preds"][i] = test_lower_preds
                    re["test_upper_preds"][i] = test_upper_preds
                elif task.startswith("bli"):
                    test_lang_pairs = k_fold_data["test_lang_pairs"][i]
                    re["test_lang_pairs"][i] = test_lang_pairs
                    # nasty (needs reorganizing)
                    train_tmp = pd.concat([train_feats, train_labels], axis=1)
                    test_tmp = pd.concat([test_feats, test_labels], axis=1)
                    eval_metric_len = len(train_labels.columns)
                    for eval_metric in train_labels.columns:
                        # remove nan values
                        train_tmp_ = train_tmp.dropna(subset=[eval_metric], axis=0)
                        train_feats = train_tmp_.iloc[:, :-3]
                        train_labels_ = convert_label(train_tmp_[eval_metric])
                        test_labels_ = convert_label(test_labels[eval_metric])
                        reg = train_regressor(train_feats, train_labels_, regressor=regressor)
                        lower_reg = None; upper_reg = None;
                        if get_ci and isinstance(reg, xgb.XGBRegressor):
                            lower_reg = train_regressor(train_feats, train_labels_, regressor="lower_xgbq", quantile=quantile)
                            upper_reg = train_regressor(train_feats, train_labels_, regressor="upper_xgbq", quantile=quantile)
                        _, _, _, train_rmse = test_regressor(reg, train_feats, train_labels_)
                        test_preds, test_lower_preds, test_upper_preds, test_rmse = \
                            test_regressor(reg, test_feats, test_labels_, get_rmse=get_rmse, \
                            get_ci=get_ci, quantile=quantile, lower_reg=lower_reg, upper_reg =upper_reg)
                        re["reg"][eval_metric][i] = reg
                        re["train_rmse"][eval_metric][i] = train_rmse
                        re["test_preds"][eval_metric][i] = test_preds
                        re["test_rmse"][eval_metric][i] = test_rmse
                        re["test_labels"][eval_metric][i] = test_labels_
                        re["test_lower_preds"][eval_metric][i] = test_lower_preds
                        re["test_upper_preds"][eval_metric][i] = test_upper_preds
                elif task == "sf":
                    test_langs = k_fold_data["test_langs"][i] # Series
                    re["test_langs"][i] = pd.DataFrame(test_langs.values, columns=["test_langs"])
                    for eval_metric in train_labels.columns:
                        train_labels_ = convert_label(train_labels[eval_metric])
                        test_labels_ = convert_label(test_labels[eval_metric])
                        reg = train_regressor(train_feats, train_labels_, regressor=regressor)
                        lower_reg = None; upper_reg = None;
                        if get_ci and isinstance(reg, xgb.XGBRegressor):
                            lower_reg = train_regressor(train_feats, train_labels_, regressor="lower_xgbq", quantile=quantile)
                            upper_reg = train_regressor(train_feats, train_labels_, regressor="upper_xgbq", quantile=quantile)
                        _, _, _, train_rmse = test_regressor(reg, train_feats, train_labels_)
                        test_preds, test_lower_preds, test_upper_preds, test_rmse = \
                            test_regressor(reg, test_feats, test_labels_, get_rmse=get_rmse, \
                            get_ci=get_ci, quantile=quantile, lower_reg=lower_reg, upper_reg =upper_reg)
                        re["reg"][eval_metric][i] = reg
                        re["train_rmse"][eval_metric][i] = train_rmse
                        re["test_preds"][eval_metric][i] = test_preds
                        re["test_rmse"][eval_metric][i] = test_rmse
                        re["test_labels"][eval_metric][i] = test_labels_
                        re["test_lower_preds"][eval_metric][i] = test_lower_preds
                        re["test_upper_preds"][eval_metric][i] = test_upper_preds
                elif task == "monomt":
                    test_langs = k_fold_data["test_langs"][i]
                    re["test_langs"][i] = pd.DataFrame(test_langs.values, columns=["test_langs"])
                    reg = train_regressor(train_feats, train_labels, regressor=regressor)
                    lower_reg = None; upper_reg = None;
                    if get_ci and isinstance(reg, xgb.XGBRegressor):
                        lower_reg = train_regressor(train_feats, train_labels, regressor="lower_xgbq", quantile=quantile)
                        upper_reg = train_regressor(train_feats, train_labels, regressor="upper_xgbq", quantile=quantile)
                    _, _, _, train_rmse = test_regressor(reg, train_feats, convert_label(train_labels))
                    test_preds, test_lower_preds, test_upper_preds, test_rmse = \
                        test_regressor(reg, test_feats, convert_label(test_labels), get_rmse=get_rmse, \
                        get_ci=get_ci, quantile=quantile, lower_reg=lower_reg, upper_reg =upper_reg)
                    re["reg"][i] = reg
                    re["train_rmse"][i] = train_rmse
                    re["test_preds"][i] = test_preds
                    re["test_rmse"][i] = test_rmse
                    re["test_labels"][i] = convert_label(test_labels)  
                    re["test_lower_preds"][i] = test_lower_preds
                    re["test_upper_preds"][i] = test_upper_preds
                elif task == "mi":
                    test_lang_pairs = k_fold_data["test_lang_pairs"][i]
                    re["test_lang_pairs"][i] = test_lang_pairs
                    reg = train_regressor(train_feats, train_labels, regressor=regressor)
                    lower_reg = None; upper_reg = None;
                    if get_ci and isinstance(reg, xgb.XGBRegressor):
                        lower_reg = train_regressor(train_feats, train_labels, regressor="lower_xgbq", quantile=quantile)
                        upper_reg = train_regressor(train_feats, train_labels, regressor="upper_xgbq", quantile=quantile)
                    _, _, _, train_rmse = test_regressor(reg, train_feats, convert_label(train_labels))
                    test_preds, test_lower_preds, test_upper_preds, test_rmse = \
                        test_regressor(reg, test_feats, convert_label(test_labels), get_rmse=get_rmse, \
                        get_ci=get_ci, quantile=quantile, lower_reg=lower_reg, upper_reg =upper_reg)
                    re["reg"][i] = reg
                    re["train_rmse"][i] = train_rmse
                    re["test_preds"][i] = test_preds
                    re["test_rmse"][i] = test_rmse
                    re["test_labels"][i] = convert_label(test_labels)
                    re["test_lower_preds"][i] = test_lower_preds
                    re["test_upper_preds"][i] = test_upper_preds
                else:
                    break
        sort_pred({task: re}, task, langs, lang_pairs, k_fold_eval, get_ci=get_ci)
    return re

In [1463]:
def get_metric(re, metric="test_rmse"):
    if metric == "mean_test_rmse":
        print("TSF_MT", np.mean(list(re["tsfmt"]["test_rmse"].values())))
        print("TSF_EL", np.mean(list(re["tsfel"]["test_rmse"].values())))
        print("TSF_POS", np.mean(list(re["tsfpos"]["test_rmse"].values())))
        print("TSF_PARSING", np.mean(list(re["tsfparsing"]["test_rmse"].values())))
        print("MONO_MT", np.mean(list(re["monomt"]["test_rmse"].values())))
        print("MI", np.mean(list(re["mi"]["test_rmse"].values())))
        print("SF_Keywords_F1", np.mean(list(re["sf"]["test_rmse"]["Keywords_F1"].values())))
        print("SF_Keywords_Precision", np.mean(list(re["sf"]["test_rmse"]["Keywords_Precision"].values())))
        print("SF_Keywords_Recall", np.mean(list(re["sf"]["test_rmse"]["Keywords_Recall"].values())))
        print("SF_NN_F1", np.mean(list(re["sf"]["test_rmse"]["NN_F1"].values())))
        print("SF_NN_Precision", np.mean(list(re["sf"]["test_rmse"]["NN_Precision"].values())))
        print("SF_NN_Recall", np.mean(list(re["sf"]["test_rmse"]["NN_Recall"].values())))
        print("BLI_MUSE", np.mean(list(re["bli"]["test_rmse"]["MUSE"].values())))
        print("BLI_Artetxe17", np.mean(list(re["bli"]["test_rmse"]["Artetxe17"].values())))
        print("BLI_Artetxe16", np.mean(list(re["bli"]["test_rmse"]["Artetxe16"].values())))
    elif metric == "test_rmse":
        for task in re:
            try:
                keys = list(re[task].keys())
                for key in keys:
                    if key.startswith("result"):
                        if key != "result":
                            print(key)
                            print("{}_{}".format(task.capitalize(), key[3:].capitalize()), get_rmse(re[task][key], re[task]["{}_labels".format(key[3:])]))
                        else:
                            print("{}".format(task.capitalize()))
            except:
                print("Task {} has problem...".format(task))

In [1470]:
# TODO: needs refatoring
def sort_pred(re_dict, task, langs, lang_pairs, k_fold_eval=False, get_ci=False):
    if not k_fold_eval:
        for lang in langs:
            preds = re_dict[task]["test_preds"][lang]
            for p in preds:
                print(p)
    else:
        if task.startswith("tsf"):
            test_preds = []
            k = len(re_dict[task]["test_lang_pairs"])
            for i in range(k):
                test_lang_pairs = re_dict[task]["test_lang_pairs"][i]
                preds = re_dict[task]["test_preds"][i]
                test_lang_pairs["preds"] = preds
                test_lang_pairs["test_labels"] = re_dict[task]["test_labels"][i]
                if get_ci:
                    test_lang_pairs["test_upper_preds"] = re_dict[task]["test_upper_preds"][i]
                    test_lang_pairs["test_lower_preds"] = re_dict[task]["test_lower_preds"][i]
                test_preds.append(test_lang_pairs)
            test_preds = pd.concat(test_preds)
            result = []
            labels = []
            if get_ci:
                lower_preds = []
                upper_preds = []
            for l1, l2 in lang_pairs.values:
                se = test_preds[(test_preds.iloc[:, 0] == l1) & (test_preds.iloc[:, 1] == l2)]
                result.append(se["preds"].values[0])
                labels.append(se["test_labels"].values[0])
                if get_ci:
                    lower_preds.append(se["test_lower_preds"].values[0])
                    upper_preds.append(se["test_upper_preds"].values[0])
            re_dict[task]["result"] = np.array(result)
            re_dict[task]["labels"] = np.array(labels)
            if get_ci:
                re_dict[task]["result_lower_preds"] = np.array(lower_preds)
                re_dict[task]["result_upper_preds"] = np.array(upper_preds)
        elif task == "sf":
            k = len(re_dict[task]["test_langs"])
            test_preds_ = re_dict[task]["test_preds"]
            test_labels_ = re_dict[task]["test_labels"]
            test_lower_preds_ = re_dict[task]["test_lower_preds"]
            test_upper_preds_ = re_dict[task]["test_upper_preds"]
            test_preds = []
            for i in range(k):
                test_langs = re_dict[task]["test_langs"][i]
                for eval_metric in test_preds_:
                    test_langs[eval_metric] = test_preds_[eval_metric][i]
                    test_langs[eval_metric + "_labels"] = test_labels_[eval_metric][i]
                    if get_ci:
                        test_langs[eval_metric + "_lower_preds"] = test_lower_preds_[eval_metric][i]
                        test_langs[eval_metric + "_upper_preds"] = test_upper_preds_[eval_metric][i]
                test_preds.append(test_langs)
            test_preds = pd.concat(test_preds)
            for c in test_preds_:
                result = []
                labels = []
                if get_ci:
                    lower_preds = []
                    upper_preds = []
                for lang in langs:
                    se = test_preds[(test_preds.iloc[:, 0] == lang)]
                    result.append(se[c].values[0])
                    labels.append(se[c + "_labels"].values[0])
                    if get_ci:
                        lower_preds.append(se[c + "_lower_preds"].values[0])
                        upper_preds.append(se[c + "_upper_preds"].values[0])
                re_dict[task]["result_{}".format(c)] = np.array(result)
                re_dict[task]["{}_labels".format(c)] = np.array(labels)
                if get_ci:
                    re_dict[task]["{}_upper_preds".format(c)] = np.array(upper_preds)
                    re_dict[task]["{}_lower_preds".format(c)] = np.array(lower_preds)
        elif task == "monomt":
            k = len(re_dict[task]["test_langs"])
            test_preds = []
            for i in range(k):
                test_langs = re_dict[task]["test_langs"][i]
                preds = re_dict[task]["test_preds"][i]
                test_langs["preds"] = preds
                test_langs["test_labels"] = re_dict[task]["test_labels"][i]
                if get_ci:
                    test_langs["test_lower_preds"] = re_dict[task]["test_lower_preds"][i]
                    test_langs["test_upper_preds"] = re_dict[task]["test_upper_preds"][i]
                test_preds.append(test_langs)
            test_preds = pd.concat(test_preds)
            result = []
            labels = []
            if get_ci:
                lower_preds = []
                upper_preds = []
            for lang in langs:
                se = test_preds[(test_preds.iloc[:, 0] == lang)]
                result.append(se["preds"].values[0])
                labels.append(se["test_labels"].values[0])
                if get_ci:
                    lower_preds.append(se["test_lower_preds"].values[0])
                    upper_preds.append(se["test_upper_preds"].values[0])
            re_dict[task]["result"] = np.array(result)
            re_dict[task]["labels"] = np.array(labels)
            if get_ci:
                re_dict[task]["result_lower_preds"] = np.array(lower_preds)
                re_dict[task]["result_upper_preds"] = np.array(upper_preds)
        elif task == "bli":
            k = len(re_dict[task]["test_lang_pairs"])
            test_preds = []
            test_preds_ = re_dict[task]["test_preds"]
            test_labels_ = re_dict[task]["test_labels"]
            test_lower_preds_ = re_dict[task]["test_lower_preds"]
            test_upper_preds_ = re_dict[task]["test_upper_preds"]
            for i in range(k):
                test_lang_pairs = re_dict[task]["test_lang_pairs"][i]
                for eval_metric in test_preds_:
                    test_lang_pairs[eval_metric] = test_preds_[eval_metric][i]
                    test_lang_pairs[eval_metric + "_labels"] = test_labels_[eval_metric][i]
                    if get_ci:
                        test_lang_pairs[eval_metric + "_lower_preds"] = test_lower_preds_[eval_metric][i]
                        test_lang_pairs[eval_metric + "_upper_preds"] = test_upper_preds_[eval_metric][i]
                test_preds.append(test_lang_pairs)
            test_preds = pd.concat(test_preds)
            for c in test_preds_:
                result = []
                labels = []
                if get_ci:
                    lower_preds = []
                    upper_preds = []
                for l1, l2 in lang_pairs.values:
                    se = test_preds[(test_preds.iloc[:, 0] == l1) & (test_preds.iloc[:, 1] == l2)]
                    result.append(se[c].values[0])
                    labels.append(se[c + "_labels"].values[0])
                    if get_ci:
                        lower_preds.append(se[c + "_lower_preds"].values[0])
                        upper_preds.append(se[c + "_upper_preds"].values[0])
                re_dict[task]["result_{}".format(c)] = np.array(result)
                re_dict[task]["{}_labels".format(c)] = np.array(labels)
                if get_ci:
                    re_dict[task]["{}_lower_preds".format(c)] = np.array(lower_preds)
                    re_dict[task]["{}_upper_preds".format(c)] = np.array(upper_preds)
        elif task == "mi":
            k = len(re_dict[task]["test_lang_pairs"])
            test_preds = []
            for i in range(k):
                test_lang_pairs = re_dict[task]["test_lang_pairs"][i]
                preds = re_dict[task]["test_preds"][i]
                test_lang_pairs["preds"] = preds
                test_lang_pairs["test_labels"] = re_dict[task]["test_labels"][i]
                if get_ci:
                    test_lang_pairs["test_lower_preds"] = re_dict[task]["test_lower_preds"][i]
                    test_lang_pairs["test_upper_preds"] = re_dict[task]["test_upper_preds"][i]
                test_preds.append(test_lang_pairs)
            test_preds = pd.concat(test_preds)
            result = []
            labels = []
            if get_ci:
                lower_preds = []
                upper_preds = []
            for l1, l2 in lang_pairs.values:
                se = test_preds[(test_preds.iloc[:, 0] == l1) & (test_preds.iloc[:, 1] == l2)]
                result.append(se["preds"].values[0])
                labels.append(se["test_labels"].values[0])
                if get_ci:
                    lower_preds.append(se["test_lower_preds"].values[0])
                    upper_preds.append(se["test_upper_preds"].values[0])
            re_dict[task]["result"] = np.array(result)
            re_dict[task]["labels"] = np.array(labels)
            if get_ci:
                re_dict[task]["result_lower_preds"] = np.array(lower_preds)
                re_dict[task]["result_upper_preds"] = np.array(upper_preds)

In [1471]:
def get_result(regressor="xgboost", tasks="all", k_fold_eval=True, get_rmse=True, get_ci=False, quantile=0.95):
    re_dict = {}
    if tasks == "all":
        tasks = ["monomt", "sf", "bli", "mi", "tsfel", "tsfmt", "tsfpos", "tsfparsing"]
    for task in tasks:
        re = get_re(task=task, regressor=regressor, k_fold_eval=k_fold_eval, get_rmse=True, get_ci=get_ci, quantile=0.95)
        re_dict[task] = re
        print("{} is done!".format(task))
    return re_dict

In [1348]:
def plot(type="weight", lang="ara", task="mt"):
    task_re = ree[task]
    print(task_re["rmse"][lang])
    ax = plot_importance(task_re["reg"][lang], importance_type=type)
    fig = ax.figure
    fig.set_size_inches(5, 30)

In [None]:
re_gp = get_result(regressor="gp", tasks=["tsfmt"], get_ci=True, quantile=0.95)

here
here
here
here
here
here
here


In [1474]:
re_xgb

{'bli': {'Artetxe16_labels': array([30.07, 27.67, 47.05, 36.93, 53.27, 43.4 , 22.59, 19.87, 31.6 ,
         28.4 , 65.27, 60.87, 58.67, 51.6 , 61.07, 51.33, 69.13, 72.13,
         55.27, 42.73, 78.27, 80.07, 39.67, 34.6 , 36.37, 34.13, 54.  ,
         46.4 , 77.73, 79.2 , 50.4 , 42.4 , 38.65, 33.27, 46.5 , 37.27,
         57.33, 52.2 , 60.4 , 61.2 , 73.6 , 74.47, 31.5 , 48.12, 24.02,
         39.86, 28.93, 24.73, 35.8 , 26.47, 50.4 , 41.2 , 39.03, 44.33,
         68.33, 69.6 , 61.6 , 57.4 , 63.67, 55.73, 74.87, 74.8 , 62.67,
         54.93, 60.13, 52.47, 51.27, 36.8 , 45.4 , 35.33, 38.2 , 25.27,
         54.07, 55.93, 20.56, 18.8 ,  8.91, 28.07, 15.93, 20.73, 52.84,
         45.2 , 44.2 , 33.73, 45.8 , 33.6 , 33.07, 44.87]),
  'Artetxe16_lower_preds': array([ 6.6649265 , -1.3472272 ,  4.5251684 , -8.786209  , -2.1518104 ,
         -3.7334957 , -0.11095673,  2.169149  ,  2.9673777 , -0.54113376,
          1.299465  ,  1.9471204 ,  2.2079365 , -2.2036405 , -1.2287318 ,
          0.698156

In [None]:
re_gp = get_result(regressor="xgboost", tasks=["tsfmt"], get_ci=True, quantile=0.95)