In [1]:
import pandas as pd
from pathlib import Path
import lightgbm as lgbm

In [2]:

BASE_PATH = Path("../../../../ilmart-data")
df = pd.read_csv(BASE_PATH / "perf/eval.csv")

In [3]:
df[["split", "model_type"]] = df["exp_name"].str.split('-', n=1, expand=True)

In [4]:
df = df.drop(columns="exp_name")

In [5]:
df_grouped = df.groupby(by=["split", "model_type", "model"]).agg({'time':['mean','std']})

In [6]:
# Computing the speedups
df_grouped = df_grouped.reset_index()
for split in df["split"].unique():
    for model_type in df["model_type"].unique():
        selector = (df_grouped["split"] == split) & (df_grouped["model_type"] == model_type)
        df_split_model_type = df_grouped[selector]
        baseline_time = df_split_model_type[df_split_model_type["model"]=="lightgbm"][("time", "mean")]
        df_grouped.loc[selector, "speedup"] = baseline_time.values / df_split_model_type[("time", "mean")].values

In [7]:
# custom ordering
df_grouped["split"] = pd.Categorical( df_grouped["split"], ["train", "test"])
df_grouped["model_type"] = pd.Categorical( df_grouped["model_type"], ["no-inter", "inter"])
df_grouped["model"] = pd.Categorical( df_grouped["model"], ["lightgbm", "quickscorer", "ilmart"])
df_grouped = df_grouped.sort_values(by=["split", "model_type", "model"])
df_grouped = df_grouped.set_index(['split', 'model_type', 'model'])

In [8]:
print(df_grouped.style.format({("speedup", ''): '{:.2f}'}, decimal='.', thousands=',', precision=0).to_latex())

\begin{tabular}{lllrrr}
 &  &  & \multicolumn{2}{r}{time} & speedup \\
 &  &  & mean & std &  \\
split & model_type & model &  &  &  \\
\multirow[c]{6}{*}{train} & \multirow[c]{3}{*}{no-inter} & lightgbm & 301,504 & 7,509 & 1.00 \\
 &  & quickscorer & 44,790 & 159 & 6.73 \\
 &  & ilmart & 26,020 & 894 & 11.59 \\
 & \multirow[c]{3}{*}{inter} & lightgbm & 416,676 & 11,945 & 1.00 \\
 &  & quickscorer & 69,805 & 609 & 5.97 \\
 &  & ilmart & 33,652 & 856 & 12.38 \\
\multirow[c]{6}{*}{test} & \multirow[c]{3}{*}{no-inter} & lightgbm & 100,652 & 3,538 & 1.00 \\
 &  & quickscorer & 14,735 & 402 & 6.83 \\
 &  & ilmart & 8,691 & 172 & 11.58 \\
 & \multirow[c]{3}{*}{inter} & lightgbm & 137,876 & 1,063 & 1.00 \\
 &  & quickscorer & 23,515 & 542 & 5.86 \\
 &  & ilmart & 11,347 & 458 & 12.15 \\
\end{tabular}



In [9]:
model_without_inter = lgbm.Booster(model_file=BASE_PATH / "ft/web30k/greedy/no_inter.lgbm")

In [10]:
model_without_inter.num_trees()

1701

In [11]:
model_with_inter = lgbm.Booster(model_file=BASE_PATH / "ft/web30k/greedy/inter_greedy.lgbm")

In [12]:
model_with_inter.num_trees()

2423