In [None]:
import pandas as pd
from pathlib import Path

import numpy as np

In [None]:
EXPERIMENTS_PATH = Path("../experiments")
results = list(Path(EXPERIMENTS_PATH).glob('**/output.txt'))

In [None]:
dfs = []
for result in results:
    df = pd.read_csv(result, skiprows = 19, sep = "\t", header=None, names=['iter', 'train', 'test'])
    df['iter'] = df.iter.str.split("=").str[1].str.strip().astype(float)
    df['train'] = df.train.str.split("=").str[1].str.strip().astype(float)
    df['test'] = df.test.str.split("=").str[1].str.strip().astype(float)
    df['difference'] = df['test'] - df['train']
    df['experiment'] = result.parent.name
    dfs.append(df)
df_results = pd.concat(dfs)
df_results.to_csv("../results-mcmc.csv")

In [None]:
idx = df_results.groupby(['experiment'])['test'].transform(min) == df_results['test']
df_results[idx]

Unnamed: 0,iter,train,test,difference,experiment
49,49.0,9.06689,9.53791,0.47102,metadata
49,49.0,9.01425,9.5632,0.54895,metadata_artist
49,49.0,8.0277,10.1613,2.1336,metadata_lyrics
49,49.0,8.47837,12.1404,3.66203,metadata_spotify
49,49.0,10.2307,10.2491,0.0184,triplets


In [None]:
df_results.loc[df_results.iter==5]

Unnamed: 0,iter,train,test,difference,experiment
5,5.0,9.0144,9.63999,0.62559,metadata
5,5.0,9.02464,9.65549,0.63085,metadata_artist
5,5.0,8.88838,19.679,10.79062,metadata_lyrics
5,5.0,29.0892,53.6222,24.533,metadata_spotify
5,5.0,10.2675,10.2638,-0.0037,triplets


In [None]:
df_melted = pd.melt(df_results, ['iter', 'experiment'], ['train', 'test'])
df_melted.head()

Unnamed: 0,iter,experiment,variable,value
0,0.0,metadata,train,9.77181
1,1.0,metadata,train,9.08557
2,2.0,metadata,train,9.01842
3,3.0,metadata,train,9.01413
4,4.0,metadata,train,9.01202


In [None]:
import plotly.express as px

fig = px.line(df_results.loc[df_results.iter <= 10], x="iter", y="train", color="experiment",
              line_group="experiment", hover_name="experiment")
fig.show()

In [None]:
import plotly.express as px

fig = px.line(df_results.loc[df_results.iter <= 50], x="iter", y="test", color="experiment",
              line_group="experiment", hover_name="experiment")
fig.show()

In [None]:
preds = list(Path(EXPERIMENTS_PATH).glob('**/output.libfm'))
tests = list(Path(EXPERIMENTS_PATH).glob('**/test.csv'))

In [None]:
for i in range(len(preds)):
    print(tests[i])
    test_df = pd.read_csv(tests[i])
    pred_df = pd.read_csv(preds[i], header=None, names=['pred'])
    df_pred = pd.concat([test_df, pred_df], axis=1)

..\experiments\metadata\test.csv
..\experiments\metadata_artist\test.csv
..\experiments\metadata_lyrics\test.csv
..\experiments\metadata_spotify\test.csv
..\experiments\triplets\test.csv


In [None]:
df_pred['actual_ranked'] = df_pred.groupby('user_id')['play_count'].rank(method="first")
df_pred['pred_ranked'] = df_pred.groupby('user_id')['pred'].rank(method="first")


In [None]:
spearman = df_pred.groupby('user_id')['actual_ranked', 'pred_ranked'].corr(method='spearman').unstack().iloc[:,1]
np.mean(spearman)


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



0.08075341483179907

In [None]:
kendall = df_pred.groupby('user_id')['actual_ranked', 'pred_ranked'].corr(method='kendall').unstack().iloc[:,1]
np.mean(kendall)


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



0.06808914095561586