# Model Evaluation

## Feature Options

- Engagement
  - Clicks on popular activities
  - Number of active days
- Demographics
  - IMD band
  - Region
  - Highest education
- Academic
  - Grades on weighted assessments
  - Days early assignments submitted
  - Course difficulty

In [None]:
from itertools import cycle

import baycomp as bc
import seaborn as sns
from matplotlib import pyplot as plt
from matplotlib import rcParams
from matplotlib.pyplot import subplots
from numpy import linspace

from alexlib.df import get_distinct_col_vals, rm_df_col_pattern
from alexlib.files.utils import figsave

from analysis.results import Results
from etl.db_helpers import DbHelper
from model.constants import MODEL_TYPES
from model.engine import ModelEngine
from model.features import Features
from model.params import Params


In [None]:
dbh = DbHelper("LOCAL")
LEFT, ROPE, RIGHT = range(3)
split_cols = ["is_stem", "is_female", "has_disability"]
flare = sns.color_palette("flare")
liteseq = sns.color_palette("light:#5A9", as_cmap=False, n_colors=9)
cube = sns.cubehelix_palette(start=0.5, rot=-0.5, as_cmap=False, n_colors=9)

In [None]:
dbh.generate_select_query("first30", "all_features")

In [None]:
rcParams["figure.dpi"] = 1200

In [None]:
abroca_file = "abroca_analysis.sql"
abroca_df = dbh.df_from_file(abroca_file)
abroca_df.head()
print(abroca_df.columns)

In [None]:
total_models = 0
for mtype in MODEL_TYPES:
    tab = dbh.get_table("eval", f"params_{mtype}")
    nonids = rm_df_col_pattern("_id", tab)
    df = nonids.drop_duplicates()
    n = len(df)
    print(mtype, n)
    total_models += n
print("Total distinct model hyperparamter combinations:", total_models)

In [None]:
model_types = get_distinct_col_vals(abroca_df, "model_type")
model_types
model_annot_dict = {
    "etree": "ET",
    "hxg_boost": "HGB",
    "mlp": "MLP",
    "rforest": "RF",
    "logreg": "LOG",
}
model_annots = [model_annot_dict[x] for x in model_types]
model_annots

In [None]:
print(abroca_df.columns)
abroca_df.describe()

In [None]:
df_cols = [
    "is_female_abroca",
    "has_disability_abroca",
    "mean_test_roc_auc",
    "female_ratio",
    "disabled_ratio",
    "model_type",
]
"""
sns.pairplot(
    abroca_df.loc[:, df_cols],
    #x="disabled_ratio",
    #y="has_disability_abroca",
    hue="model_type",
    #kind="hist",
    #joint_kws={"order":2}
    )
"""


In [None]:
lines_df = dbh.get_table("eval", "reg_lines")
lines_df = lines_df.sort_values(["i", "j", "model_type"], axis=0).reset_index(drop=True)
lines_df

In [None]:
fig, axs = subplots(2, 2)
print(list(lines_df.index))
for k in list(lines_df.index):
    row = lines_df.loc[k, :]
    i, j = row["i"], row["j"]
    ax = axs[i][j]
    func = lambda x: (x**2) * row["x2_term"] + x * row["x_term"] + row["intercept"]
    xspace = linspace(0, 1, 100)
    yspace = [func(y) for y in xspace]
    ax.plot(xspace, yspace)


In [None]:
all_caps = ["roc", "auc", "abroca"]


def header_to_title(header: str, all_caps: list = all_caps):
    parts = header.split("_")
    parts = [x.upper() if x in all_caps else x[0].upper() + x[1:] for x in parts]
    return " ".join(parts)


header_to_title("model_type_abroca_abcdef_roc")


In [None]:
rsq_sql = """
select
 rr.index
,rr.poly
,rr.x
,rr.y
,rr.rsquared
,rr.rsquared_adj
from eval.reg_records rr
where rr.model_type = 'all'
order by rr.index
"""
rsq = dbh.run_pd_query(rsq_sql)
rsq_list = [round(x, 4) for x in list(rsq.loc[:, "rsquared"])]
rsq_gen = cycle(rsq_list)
mtype_gen = cycle(model_annots)
rsq

In [None]:
"""

axtitles = [
    "Fitted Quadratic Regression",
    "Fitted Quadratic Regression",
    "Performance & Unfairness",
    "Performance & Unfairness",
]
titlegen = list_gen(axtitles)
from model.analysis import BLUE, ORANGE

"""
# abroca regplots
"""
hue="model_type"
hue="course_is_stem"
size="n_students"
ci=95
regcolor= "black"


labels = [
    ('is_female_abroca',  'female_ratio'),
    ('has_disability_abroca', 'disabled_ratio'),
    ]
ncols = len(labels)
nrows = 2
index = idx_list(nrows, ncols)
fig, axs = subplots(
    nrows=nrows, 
    ncols=ncols, 
    figsize=(12,10), 
    dpi=1200,
    )
for i, j in index:
    alabel, rlabel = labels[j]
    ax = axs[i][j]
    x = rlabel if i == 0 else "mean_test_roc_auc"
    y = alabel
    print(i, j, x, y)
    if (i == 1 and j == 1):
        ledg = sns.scatterplot(
        abroca_df,
        ax=ax,
        x=x,
        y=y,
        hue=hue,
        size=size,
        legend=True
    )
        hand, lab = ax.get_legend_handles_labels()
    else:
        sns.scatterplot(
        abroca_df,
        ax=ax,
        x=x,
        y=y,
        hue=hue,
        size=size,
        legend=False
    )
    ax.legend_=None
    for mtype in model_types:
        df = filter_df(abroca_df, "model_type", mtype)
        sns.regplot(
            df,
            ax=ax,
            x=x,
            y=y,
            scatter=False,
            fit_reg=True,
            label=mtype,
            ci=ci,
            color=regcolor,
            order = 2 if i == 0 else 1,
            line_kws={
                "alpha": 1,
                "linewidth": 0.5,
            },
            truncate=False,
            #label=None,
            )
        if (i == 1 and j == 1):
            ax.set_ybound(0, 1)
        else:
            ax.set_ybound(0)
        #ax.legend_=None
        plt.setp(ax.collections[-1], alpha=0.05)
    ax.set_ylabel("ABROCA")
    xlabel=ax.get_xlabel()
    ax.set_xlabel(header_to_title(xlabel))
    ax.set_title(next(titlegen) + " | Combined $R^{2}$ = " + str(next(rsq_gen)))


opatch = Patch(color=(253/255,105/255,15/255,1.), label="STEM")
bpatch = Patch(color=(27/255,98/255,165/255,1.), label="Social Studies")
legend1 = fig.legend(
    loc="outside lower left",
    #handler_map={tuple: HandlerTuple(ndivide=None)},
    ncols=2,
    fancybox=True,
    frameon=False,
    handles=[
        opatch,
        bpatch
        ],
    title="Curricular Area"
    )
legend2 = fig.legend(
    loc="outside lower right",
    ncols=4,
    fancybox=True,
    frameon=False,
    handles=hand[-4:],
    labels=lab[-4:],

    title="Class Size"
    )
fig.suptitle(
    "ABROCA by Demographic Characteristic Balance",
    fontsize=16,
    fontweight="semibold",
    )
xtext = [
    0.56, 0.62, 0.68, 0.74,
    0.56, 0.62, 0.68, 0.74,
    0.56, 0.62, 0.68, 0.74,
    0.56, 0.62, 0.68, 0.74,
]
ytext = [
    0.30, 0.36, 0.42, 0.48,
    0.30, 0.36, 0.42, 0.48,
    0.30, 0.36, 0.42, 0.48,
    0.30, 0.36, 0.42, 0.48,
]
xval = [
    0.55, 0.60, 0.65, 0.70,
    0.12, 0.13, 0.14, 0.15,
    0.7145, 0.7150, 0.7155, 0.7160,
    0.7145, 0.7150, 0.7155, 0.7160,
]

for m, k in enumerate(list(lines_df.index)):
    row = lines_df.loc[k, :]
    annot = model_annot_dict[row["model_type"]]
    i, j = row["i"], row["j"]
    ax = axs[i][j]
    func = lambda x: (x**2)*row["x2_term"] + x*row["x_term"] + row["intercept"]
    xlims = ax.get_xlim()
    ylims = ax.get_ylim()
    xv = xval[m]
    yv = func(xv)
    ax.annotate(
        annot,
        xy=(xv, func(xv)),
        xycoords="data",
        xytext=(xtext[m], ytext[m]),
        textcoords="axes fraction",
        bbox=dict(boxstyle="square", fc="1."),   
        arrowprops=dict(arrowstyle="->",
                        connectionstyle="arc,angleA=0,armA=0,angleB=0,armB=0,rad=9",
                        #connectionstyle="arc,angleA=0,angleB=45,rad=7",
                        ),
    )
figsave("abroca_by_gender_dis", format="png")
"""

In [None]:
features = dbh.get_table("eval", "v_features")
all_results = dbh.get_table("eval", "v_all_runs_results")
all_results.columns

In [None]:
# with differentiation
"""

df = all_results
fig, axs = subplots(nrows=2, ncols=2, figsize=(16, 9), dpi=800)
index = idx_list(2, 2)
print(index)
hue=None
hue="model_type"
palette = cube
kde=True
stat="probability"
common_norm=True
color=None#(68/255,155/255,214/255,1.)
fill=True
ops = ["mean", "std"]
vals = ["fit_time", "test_roc_auc"]
vars = [[f"{op}_{val}" for val in vals] for op in ops]
print(vars)
bins = [200, 100, 200, 100]

maxes = [2, 0.9, 0.5, 0.03]
for i, j in enumerate(index):
    k = i
    i, j = j
    var = vars[i][j]
    _max = maxes[k]
    _bins = bins[k]
    print(var, "ijk",i, j, k ,"max", _max)
    sns.histplot(data=df[df.loc[:, var].apply(lambda x: x < _max)],
                 hue=hue,
                 x=var,
                 fill=False,
                 kde=True,
                 bins=_bins,
                 palette=palette,
                 stat=stat,
                 #color=color,
                 #common_norm=common_norm,
                 ax=axs[i][j]
                 )
title = "ROC AUC & Fit Time | Mean & Standard Deviation per Model Type"
fig.suptitle(title, y=1)
fig.tight_layout()
figsave(f"roc_fit_by_mtype", format="png")

"""

In [None]:
# without differentiation
"""
df = all_results
fig, axs = subplots(
    nrows=2, 
    ncols=2, 
    figsize=(16, 9),
    dpi=800,
    )
index = idx_list(2, 2)
print(index)
hue="model_type"
hue=None
palette = cube
kde=True
stat="probability"
common_norm=True
color=None#(68/255,155/255,214/255,1.)
fill=True
ops = ["mean", "std"]
vals = ["fit_time", "test_roc_auc"]
vars = [[f"{op}_{val}" for val in vals] for op in ops]
print(vars)
bins = [200, 100, 200, 100]

maxes = [5.5, 1.0, 1.25, 0.03]
for i, j in enumerate(index):
    k = i
    i, j = j
    var = vars[i][j]
    _max = maxes[k]
    _bins = bins[k]
    print(var, "ijk",i, j, k ,"max", _max)
    sns.histplot(data=df[df.loc[:, var].apply(lambda x: x < _max)],
                 hue=hue,
                 x=var,
                 fill=False,
                 kde=True,
                 bins=_bins,
                 palette=palette,
                 stat=stat,
                 #color=color,
                 #common_norm=common_norm,
                 ax=axs[i][j]
                 )
fig.suptitle("ROC AUC & Fit Time | Combined Mean & Standard Deviation", y=1)
fig.tight_layout()
figsave("roc_fit_all", format="png")
"""


In [None]:
# frequentist
w, h = 20, 16
lim = 100
freq_res = Results(lim=lim)
"""
fig, ax = freq_res.plot_windowpane(bayes=False,
                         annot=None,#"values",
                         #labelsize=int(w/5),
                         figsize=(w, h),
                         dpi=1200,
                         labelsize=4,
                         )
fig.set_facecolor("white")
fig.suptitle(f"Frequentist Model Decisions | Top {lim}", y=1)
fig.tight_layout()
figsave(f"freq_window_{str(w)}_{str(h)}_top_{lim}", format="png")
"""

In [None]:
w, h = 20, 16
lim = 50
bay_res = Results(
    lim=lim,
    rope=0.001,
)
fig, ax = bay_res.plot_windowpane(
    bayes=True,
    annot=None,  # "values",
    # labelsize=int(w/5),
    figsize=(w, h),
    dpi=100,
    labelsize=4,
)
fig.set_facecolor("white")
fig.suptitle(f"Bayesian Model Decisions | Top {lim}", y=1)
fig.tight_layout()
"""
figsave(f"bayes_window_{str(w)}_{str(h)}_top_{lim}", format="png")
"""

In [None]:
# get top rope band
top_model_res = bay_res.get_top_cluster()
"""
n = len(top_model_res.obj_list)
print(bay_res.grid[0][1:n], n)
for mod in top_model_res.obj_list:
    mod.set_params(dbh)
    print(mod.name)
"""

In [None]:
# get best model for each type
# all_types = [x.model_type for x in bay_res.obj_list]
# dist_types = list(set(all_types))
# dist_idx = [all_types.index(dtype) for dtype in dist_types]
# all_best_per_model = [bay_res.obj_list[i] for i in dist_idx]
# all_best_per_model

In [None]:
nonsplit_cols = [x for x in top_model_res.df.columns if "split" not in x]
best_df = top_model_res.df.loc[:, nonsplit_cols]
best_df.head(10)

In [None]:
# best_engines = [set_engine_from_result x(verbose=2) for x in all_best_per_model]
# mtypes = [engine.params.model_type for engine in best_engines]
# mtypes

In [None]:
# abroca results engine loop
"""
course_filters = [("course_id", i) for i in range(1, 23)] + [None]
for obj in tqdm(res.obj_list[26:]):
    abroca_recs = []
    for course_id in course_filters:
        engine = set_engine_from_result(obj,
                                        verbose=1,
                                        df_filter = course_id
                                        )
        if course_id is None:
            cid = -1
        else:
            cid = course_id[-1]
        rec = {
            "run_id": obj.run_id,
            "iter_id": obj.iter_id,
            "course_id": cid,
        }
        try:
            engine.fit()
            abrocas = engine.get_abrocas(split_cols, plot=False)
        except ValueError:
            abrocas = {
                "is_stem": -1,
                "is_female": -1,
                "has_disability": -1
            }
        rec.update(abrocas)
        abroca_recs.append(rec)
    abroca_df = DataFrame.from_records(abroca_recs)
    dbh.df_to_db(abroca_df, "eval", "abroca", if_exists="append")
    
"""

In [None]:
"""
    def set_engine_from_result(self,
                               dbh: DbHelper = dbh,
                               **kwargs
                               ):
        self.set_params(dbh)
        params = Params(
            model_type=self.model_type,
            params=self.params
            )
        inc_all = istrue(self.inc_all)
        inc_aca = istrue(self.inc_aca)
        inc_dem = istrue(self.inc_dem)
        inc_eng = istrue(self.inc_eng)
        if not (inc_aca or inc_dem or inc_eng):
            inc_all=True
        feat = Features(
            use_all=inc_all,
            use_academic=inc_aca,
            use_demographic=inc_dem,
            use_engagement=inc_eng,
            **kwargs,
        )
        return ModelEngine(
            feat=feat,
            params=params,
            **kwargs
        )
"""

In [None]:
"""
model_type = "logreg"
engint1 = [i for i in range(len(mtypes)) if mtypes[i] == model_type][0]
eng1 = best_engines[engint1]

eng1_name = all_best_per_model[engint1].name
eng1.fit(show=True, title=f"{eng1_name} ROC Curve | auc = {round(eng1.roc_curve.auc, 6)}")
"""

In [None]:
"""
try:
    model_type = "hxg_boost"
    engint2 = [i for i in range(len(mtypes)) if mtypes[i] == model_type][0]
except IndexError:
    model_type = "etree"
    engint2 = [i for i in range(len(mtypes)) if mtypes[i] == model_type][0]

eng2 = best_engines[engint2]

#eng2.gridsearch.__dict__["param_distributions"]["clf__n_jobs"] = [-1]
eng2_name = all_best_per_model[engint2].name
eng2.fit(show=True, title=f"{eng2_name} ROC Curve | auc = {round(eng2.roc_curve.auc, 6)}")
"""

In [None]:
# rope show compare 2
modres1, modres2 = bay_res.obj_list[2], bay_res.obj_list[3]
bay_res.rope = 0.01
probs, fig = bay_res.bayes_comp_2_objs(modres1, modres2, plot=True)
left, rope, right = probs
fig.set_dpi(1200)
fig.figsize = (4, 3)
fig.layout = "constrained"
axes = fig.axes
ax = axes[0]
plt.title(
    f"Region Of Practical Equivalence = {bay_res.rope}, P(ROPE) = {round(rope, 3)}"
)
# figsave(f"rope_{modres1.name}_{modres2.name}", format="png")

In [None]:
rope = 0.01
modres1, modres2 = bay_res.obj_list[6], bay_res.obj_list[4]
probs, fig = bc.two_on_multiple(
    modres1.splits_test_roc_auc,
    modres2.splits_test_roc_auc,
    runs=1,
    rope=rope,
    names=(modres1.name, modres2.name),
    plot=True,
)
# fig.tight_layout()
plt.suptitle("Posterior Plot from Bayesian Hierarchical Correlated t-test")
plt.title(f"ROPE = {rope}")
figsave(f"rope_on_two_{modres1.name}_{modres2.name}", format="png")

In [None]:
# compare abrocas
"""
for col in list(abrocas1.keys()):
    a1 = abrocas1[col]
    a2 = abrocas2[col]
    dif = a1 - a2
    if dif > 0:
        best = "model 2"
    else:
        best = "model 1"
    print(f"in the case of {col}, {best} is a fairer predictor by {abs(round(dif,4))}")
"""

In [None]:
"""
_split_cols = split_cols[1:]
engines = [eng1, eng2]
names = [eng1_name, eng2_name]
ncols = len(_split_cols)
nrows = len(engines)
square_idx_list = lambda n: idx_list(n, n)

fig, axs = subplots(ncols=ncols,
                    nrows=nrows,
                    figsize=(int(6*ncols),int(4.5*nrows)),
                    dpi=800,
                    sharex=True,
                    sharey=True
                    )
axidx = idx_list(nrows, ncols)

for i, j in axidx:
    eng = engines[j]
    col = _split_cols[i]
    ax = axs[i][j]
    abroca = eng.get_abroca(col, ax=ax)
    if i == 0:
        ax.set_title(names[j], fontsize="medium")
    if i == 1:
        ax.set_xlabel("False Positive Rate", fontsize="medium")
    if j == 0:
        ax.set_ylabel("True Positive Rate", fontsize="medium")
    ax.legend(fontsize="small")

fig.suptitle("Absolute Between-ROC Area (ABROCA)")
fig.tight_layout()
figname = f"abroca_{names[0]}{names[1]}"
figsave(figname)
#plt.subplots_adjust(right=1.2)
"""

In [None]:
e = ModelEngine(
    feat=Features(use_academic=True, to_include=["is_female"]),
    params=Params(model_type="hxg_boost"),
)

In [None]:
e.fit(roc=True, show=False)

In [None]:
e.roc_curve.plot()
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title(f"{e.params.model_type} ROC Curve | ROC AUC = {round(e.roc_curve.auc, 6)}")
figsave(f"{e.params.model_type}_roc_demo", format="png")

In [None]:
fig, ax = subplots(nrows=1, ncols=1, figsize=(12, 9), dpi=400)
sc = "is_female"
abr = e.roc_curve.get_abroca(sc)
abr.plot(ax=ax)
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title(f"{e.params.model_type} ROC Curves | Gender")
ax.legend()
figsave(f"{e.params.model_type}_abroca_{sc}_demo", format="png")