In [1]:
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
print(pd.__version__)

2.2.1


In [5]:
files = glob.glob('../data/optuna_results*.txt')
columns = 'Study,model name,metric to optimize,rank,embed. size,learning rate,optimizer,batch size,return_L2,coef vectors,coef biases,"part id",trial number,epoch number,train loss,train auc,validation loss,validation auc,test loss,test auc,epoch train time,validation time,criterion,dataset'.split(',')

In [6]:
dfs = []
for file in files:
  print(f'Loading {file}')
  dfs.append(pd.read_csv(file, names=columns))
df = pd.concat(dfs, axis=0, ignore_index=True)
df.head()

Loading ../data/optuna_results.txt


Unnamed: 0,Study,model name,metric to optimize,rank,embed. size,learning rate,optimizer,batch size,return_L2,coef vectors,...,trial number,epoch number,validation loss,validation auc,test loss,test auc,epoch train time,validation time,criterion,dataset
0,Study pruned_fwfm logloss 1 8,pruned_fwfm,logloss,1,8,0.001723,adagrad,1024,True,9.5e-05,...,0,0,0.483627,0.741341,0.483071,0.742091,126.303244,1.884501,bcelogitloss,avazu
1,Study pruned_fwfm logloss 4 8,pruned_fwfm,logloss,4,8,0.001723,adagrad,1024,True,9.5e-05,...,0,0,0.424057,0.744801,0.423299,0.745406,126.634219,1.978106,bcelogitloss,avazu
2,Study pruned_fwfm logloss 3 16,pruned_fwfm,logloss,3,16,0.001723,adagrad,1024,True,9.5e-05,...,0,0,0.440976,0.742753,0.440213,0.743586,128.26916,1.955984,bcelogitloss,avazu
3,Study pruned_fwfm logloss 3 8,pruned_fwfm,logloss,3,8,0.001723,adagrad,1024,True,9.5e-05,...,0,0,0.436348,0.743615,0.435627,0.744242,127.855864,1.788292,bcelogitloss,avazu
4,Study pruned_fwfm logloss 4 16,pruned_fwfm,logloss,4,16,0.001723,adagrad,1024,True,9.5e-05,...,0,0,0.426121,0.745695,0.42533,0.746411,130.938987,1.779484,bcelogitloss,avazu


In [7]:
datasets = df['dataset'].unique()
datasets

array(['avazu', 'criteo', 'movielens'], dtype=object)

In [8]:
experiment_cols = ['dataset', 'metric to optimize', 'model name', 'rank', 'embed. size']

def best_config_test_metric(group):
  if group['metric to optimize'].iloc[0] == 'auc':
    costs = group[['validation auc', 'test auc']]
    agg = pd.Series.idxmax
  else:
    costs = group[['validation loss', 'test loss']]
    agg = pd.Series.idxmin
  costs.columns = ['validation', 'test']

  idx_opt = agg(costs['validation'])
  opt = costs.loc[idx_opt, 'test']
  return pd.Series([opt], name='test loss').to_frame()

test_loss_df = df \
  .groupby(experiment_cols).apply(best_config_test_metric) \
  .reset_index() \
  .set_index('dataset')

  .groupby(experiment_cols).apply(best_config_test_metric) \


In [9]:
def dataset_summary(df, name, zero_rank_models=['fm', 'fwfm']):
  # extract only relevant dataset
  df = df.loc[name, :]

  # replicate FM / FwFM results across all ranks.
  max_rank = df['rank'].max()
  replicated_ranks = df[df['model name'].isin(zero_rank_models)] \
    .drop('rank', axis=1) \
    .merge(pd.DataFrame({'rank': np.arange(1, 1 + max_rank)}), how='cross')
  to_pivot = pd.concat([df, replicated_ranks], axis=0)
  to_pivot = to_pivot[to_pivot['rank'] > 0]


  # create pivoted frame with readable model names
  to_pivot.replace({
      'fm': 'FM',
      'fwfm': 'FwFM',
      'lowrank_fwfm': 'DPLR',
      'pruned_fwfm': 'Pruned',
      'logloss': 'LogLoss',
      'auc': 'AUC',
      'mse': 'MSE'
  }, inplace=True)
  pivoted = to_pivot.pivot(
      index=['metric to optimize', 'rank'],
      columns=['embed. size', 'model name'],
      values='test loss'
  )

  # compute lifts and concatenate to the pivoted DF
  pruned = pivoted.xs('Pruned', axis=1, level=1)
  dplr = pivoted.xs('DPLR', axis=1, level=1)
  lift = 100 * (1 - dplr / pruned)
  lift = pd.concat([lift], keys=['DPLR vs Pruned (%)'], names=['model name'], axis=1) \
    .reorder_levels([1, 0], axis=1)
  summary = pd.concat([pivoted, lift], axis=1)

  # invert AUC lift sign
  if 'AUC' in summary.index.levels[0]:
    summary.loc[pd.IndexSlice['AUC', :], pd.IndexSlice[:, 'DPLR vs Pruned (%)']] *= -1

  # make sure models are displayed in the correct order using a categotical variable
  model_name_cat = pd.CategoricalIndex(pd.Categorical(
      summary.columns.levels[1],
      categories=['FM', 'FwFM', 'DPLR', 'Pruned', 'DPLR vs Pruned (%)'],
      ordered=True
  ))
  summary.columns = summary.columns.set_levels(model_name_cat, level=1)
  return summary.sort_index(axis=1)

In [10]:
criteo_summary = dataset_summary(test_loss_df, 'criteo')
criteo_summary

Unnamed: 0_level_0,embed. size,8,8,8,8,8,16,16,16,16,16
Unnamed: 0_level_1,model name,FM,FwFM,DPLR,Pruned,DPLR vs Pruned (%),FM,FwFM,DPLR,Pruned,DPLR vs Pruned (%)
metric to optimize,rank,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
AUC,1,0.804394,0.808756,0.805015,0.800813,0.524822,0.806853,0.809968,0.80689,0.802036,0.60522
AUC,2,0.804394,0.808756,0.806559,0.804859,0.211297,0.806853,0.809968,0.808009,0.805707,0.285748
AUC,3,0.804394,0.808756,0.806686,0.80654,0.018039,0.806853,0.809968,0.808292,0.807591,0.086795
AUC,4,0.804394,0.808756,0.806922,0.807364,-0.054735,0.806853,0.809968,0.808504,0.808598,-0.011566
AUC,5,0.804394,0.808756,0.80705,0.807846,-0.098514,0.806853,0.809968,0.808544,0.809122,-0.071441
LogLoss,1,0.44704,0.442922,0.446662,0.451023,0.966807,0.444858,0.441737,0.44494,0.45078,1.295645
LogLoss,2,0.44704,0.442922,0.445366,0.446897,0.342733,0.444858,0.441737,0.443676,0.446357,0.600684
LogLoss,3,0.44704,0.442922,0.445023,0.445076,0.012057,0.444858,0.441737,0.443464,0.444398,0.209994
LogLoss,4,0.44704,0.442922,0.444287,0.44429,0.000632,0.444858,0.441737,0.443136,0.443128,-0.001889
LogLoss,5,0.44704,0.442922,0.444274,0.44385,-0.095355,0.444858,0.441737,0.442966,0.442633,-0.075349


In [11]:
avazu_summary = dataset_summary(test_loss_df, 'avazu')
avazu_summary

Unnamed: 0_level_0,embed. size,8,8,8,8,8,16,16,16,16,16
Unnamed: 0_level_1,model name,FM,FwFM,DPLR,Pruned,DPLR vs Pruned (%),FM,FwFM,DPLR,Pruned,DPLR vs Pruned (%)
metric to optimize,rank,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
AUC,1,0.776783,0.777658,0.776427,0.772667,0.486546,0.778747,0.779638,0.777647,0.773817,0.494951
AUC,2,0.776783,0.777658,0.776881,0.775609,0.164003,0.778747,0.779638,0.778369,0.777949,0.053916
AUC,3,0.776783,0.777658,0.777204,0.776399,0.103748,0.778747,0.779638,0.778922,0.778917,0.000559
AUC,4,0.776783,0.777658,0.777189,0.776989,0.025683,0.778747,0.779638,0.778941,0.779392,-0.057969
AUC,5,0.776783,0.777658,0.777406,0.777409,-0.000291,0.778747,0.779638,0.77879,0.779575,-0.100741
LogLoss,1,0.38106,0.380832,0.381731,0.384127,0.623622,0.379962,0.379543,0.38098,0.382962,0.517455
LogLoss,2,0.38106,0.380832,0.381227,0.381658,0.113164,0.379962,0.379543,0.38014,0.380221,0.021232
LogLoss,3,0.38106,0.380832,0.381152,0.381434,0.074136,0.379962,0.379543,0.380091,0.380006,-0.022293
LogLoss,4,0.38106,0.380832,0.38103,0.381054,0.006321,0.379962,0.379543,0.380039,0.379589,-0.118611
LogLoss,5,0.38106,0.380832,0.381128,0.380951,-0.04666,0.379962,0.379543,0.380012,0.379613,-0.105311


In [12]:
movielens_summary = dataset_summary(test_loss_df, 'movielens')
movielens_summary

Unnamed: 0_level_0,embed. size,8,8,8,8,8,16,16,16,16,16
Unnamed: 0_level_1,model name,FM,FwFM,DPLR,Pruned,DPLR vs Pruned (%),FM,FwFM,DPLR,Pruned,DPLR vs Pruned (%)
metric to optimize,rank,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
MSE,1,0.743063,0.740681,0.744942,0.75716,1.613638,0.737641,0.739397,0.744478,0.763012,2.429088
MSE,2,0.743063,0.740681,0.741755,0.746361,0.617136,0.737641,0.739397,0.739783,0.748384,1.149391


In [13]:
full_summary = pd.concat(
    [criteo_summary, avazu_summary, movielens_summary],
    keys=['Criteo', 'Avazu', 'Movielens'],
    names=['Dataset'],
    axis=0
)
lst = [(f'{dataset} ({metric})', rank) for dataset, metric, rank in full_summary.index]
full_summary.index = pd.MultiIndex.from_tuples(lst)

In [14]:
def make_pretty(styler):
  styler.format(precision=4)
  styler.format('{:.2f}%', subset=[(8, 'DPLR vs Pruned (%)'), (16, 'DPLR vs Pruned (%)')])
  return styler

full_summary.style.pipe(make_pretty)

Unnamed: 0_level_0,embed. size,8,8,8,8,8,16,16,16,16,16
Unnamed: 0_level_1,model name,FM,FwFM,DPLR,Pruned,DPLR vs Pruned (%),FM,FwFM,DPLR,Pruned,DPLR vs Pruned (%)
Criteo (AUC),1,0.8044,0.8088,0.805,0.8008,0.52%,0.8069,0.81,0.8069,0.802,0.61%
Criteo (AUC),2,0.8044,0.8088,0.8066,0.8049,0.21%,0.8069,0.81,0.808,0.8057,0.29%
Criteo (AUC),3,0.8044,0.8088,0.8067,0.8065,0.02%,0.8069,0.81,0.8083,0.8076,0.09%
Criteo (AUC),4,0.8044,0.8088,0.8069,0.8074,-0.05%,0.8069,0.81,0.8085,0.8086,-0.01%
Criteo (AUC),5,0.8044,0.8088,0.807,0.8078,-0.10%,0.8069,0.81,0.8085,0.8091,-0.07%
Criteo (LogLoss),1,0.447,0.4429,0.4467,0.451,0.97%,0.4449,0.4417,0.4449,0.4508,1.30%
Criteo (LogLoss),2,0.447,0.4429,0.4454,0.4469,0.34%,0.4449,0.4417,0.4437,0.4464,0.60%
Criteo (LogLoss),3,0.447,0.4429,0.445,0.4451,0.01%,0.4449,0.4417,0.4435,0.4444,0.21%
Criteo (LogLoss),4,0.447,0.4429,0.4443,0.4443,0.00%,0.4449,0.4417,0.4431,0.4431,-0.00%
Criteo (LogLoss),5,0.447,0.4429,0.4443,0.4439,-0.10%,0.4449,0.4417,0.443,0.4426,-0.08%


In [16]:
latex_code = full_summary.style.pipe(make_pretty).to_latex(multirow_align='t', hrules=True).replace('%', '\\%')
print(latex_code)

\begin{tabular}{llrrrrrrrrrr}
\toprule
 & embed. size & \multicolumn{5}{r}{8} & \multicolumn{5}{r}{16} \\
 & model name & FM & FwFM & DPLR & Pruned & DPLR vs Pruned (\%) & FM & FwFM & DPLR & Pruned & DPLR vs Pruned (\%) \\
\midrule
\multirow[t]{5}{*}{Criteo (AUC)} & 1 & 0.8044 & 0.8088 & 0.8050 & 0.8008 & 0.52\% & 0.8069 & 0.8100 & 0.8069 & 0.8020 & 0.61\% \\
 & 2 & 0.8044 & 0.8088 & 0.8066 & 0.8049 & 0.21\% & 0.8069 & 0.8100 & 0.8080 & 0.8057 & 0.29\% \\
 & 3 & 0.8044 & 0.8088 & 0.8067 & 0.8065 & 0.02\% & 0.8069 & 0.8100 & 0.8083 & 0.8076 & 0.09\% \\
 & 4 & 0.8044 & 0.8088 & 0.8069 & 0.8074 & -0.05\% & 0.8069 & 0.8100 & 0.8085 & 0.8086 & -0.01\% \\
 & 5 & 0.8044 & 0.8088 & 0.8070 & 0.8078 & -0.10\% & 0.8069 & 0.8100 & 0.8085 & 0.8091 & -0.07\% \\
\multirow[t]{5}{*}{Criteo (LogLoss)} & 1 & 0.4470 & 0.4429 & 0.4467 & 0.4510 & 0.97\% & 0.4449 & 0.4417 & 0.4449 & 0.4508 & 1.30\% \\
 & 2 & 0.4470 & 0.4429 & 0.4454 & 0.4469 & 0.34\% & 0.4449 & 0.4417 & 0.4437 & 0.4464 & 0.60\% \\
 & 3 & 0.4