In [None]:
import os
import re
import numpy as np
import pandas as pd
import collections
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
%matplotlib inline

# helper functions 

In [None]:
class Experiment:
    def __init__(self, recommender="PMF", evaluator="Recall", user_per=0.0, keep_days=0, hist="2014_1"):
        self.recommender = recommender
        self.evaluator = evaluator
        self.user_per = user_per
        self.keep_days = keep_days
        self.hist = hist

In [None]:
def flatten(x):
    if isinstance(x, collections.Iterable):
        return [a for i in x for a in flatten(i)]
    else:
        return [x]

In [None]:
def get_valid_folders(in_dir, exp):
    files = os.listdir(in_dir)
    folders = []
    for f in files:
        #print(f)
        if (exp.recommender in f)  and f.endswith(exp.hist) and "{}_{}".format(exp.user_per, exp.keep_days) in f:
            folders.append(in_dir + f)
    return sorted(folders)

In [None]:
def get_test_folders(exp):
    test_dir = "../movielens_test_logs/"
    files = os.listdir(test_dir)
    folders = []
    for f in files:
        if (exp.recommender in f) and (exp.evaluator in f) and f.endswith(exp.hist) and "{}_{}".format(exp.user_per, exp.keep_days) in f:
            folders.append(test_dir + f)
    return sorted(folders)

In [None]:
def get_eva(folder, evaluator="Recall", eval_name=None): 
    f_ = '\\d+\\.\\d+'
    if "@" in evaluator:
        evaluator, at = evaluator.split("@")
        at = int(at)//10-1
    else:
        at = 0
    log_files = [os.path.join(folder, f) for f in sorted(os.listdir(folder))]
    results = []
    for f in log_files:
        if os.path.isdir(f): f = f + '/training.log'
        with open(f, 'r') as in_file:
            lines = in_file.readlines()
            val_r = []
            for l in lines:
                if evaluator in l:
                    if eval_name in l: val_r.append(list(map(float, re.findall(f_, l)))[at])
        if len(val_r) == 0:
            print (f)
        results.append(val_r)
    return np.array(results)

<a id='config'></a>

In [None]:
def generate_test_config(val_dir, exp):
    display_itr = 1000 # result outputs after every 1000 iterations, change this according to variable used in 'time_validation.py'  
    folders = get_valid_folders(val_dir, exp)
    res = np.array([get_eva(folder, evaluator=exp.evaluator, eval_name="Val") for folder in folders])
    max_epoch = np.max(res, axis=2)
    max_l2 = np.argmax(max_epoch, axis=0)
    train_config = folders[flatten(max_l2)[0]].split('/')[-1].split('_')
    training_itr = (flatten(np.argmax(res, axis=2)[max_l2])[0]+1) * display_itr 
    test_config = train_config[:-2] + [str(training_itr), "_".join(train_config[-2:])]
    return test_config

In [None]:
def save_test_config(recommender, evaluator):
    
    with open("../configs/{}_{}_test_config.txt".format(recommender, evaluator), 'w') as outfile:
        # baseline: no-filtering
        exp = Experiment(recommender=recommender, evaluator=evaluator, user_per=0.0, keep_days=0)
        test_config = generate_test_config(val_dir, exp)
        outfile.write(" ".join(test_config) + "\n")

        # with filter 
        for user_per in [0.25, 0.5, 0.75, 1.0]:
            for keep_days in [1,7,14,30,60,90,180]:
                exp = Experiment(recommender=recommender, evaluator=evaluator, user_per=user_per, keep_days=keep_days)
                test_config = generate_test_config(val_dir, exp)
                outfile.write(" ".join(test_config) + "\n")

        # complete user records for time intervals of varying length in multiples of 6 months
        for year in range(2010, 2014):
            for month in [1,7]:
                hist = "{}_{}".format(year, month)
                exp = Experiment(recommender=recommender, evaluator=evaluator, user_per=0.0, keep_days=0, hist=hist)
                test_config = generate_test_config(val_dir, exp)
                outfile.write(" ".join(test_config) + "\n")

# Model configuration

In [None]:
val_dir = "../movielens_validation_logs/" # change to your log files directory
for recommender in ['PMF', 'BPR', 'CML']:
    for evaluator in ['Recall','NDCG']:
        save_test_config(recommender, evaluator)

# Experiments

## 1. Population-level performance 

In [None]:
def get_test_results(exp):
    folders = get_test_folders(exp)
    results = get_eva(folders[0], evaluator=exp.evaluator, eval_name="Test")
    return np.concatenate(results)

In [None]:
def generate_df(recommender="PMF", evaluator="Recall"):
    df = pd.DataFrame(columns=["user_per", "keep_days", "recommender", "evaluator", "result"])
    row = 0
    exp = Experiment(recommender=recommender, evaluator=evaluator, user_per=0.0, keep_days=0)
    baseline_results = get_test_results(exp)
    df.loc[row] = [0.0, 0, recommender, evaluator, np.mean(baseline_results)]
    row += 1

    for user_per in [0.25, 0.5, 0.75, 1.0]:
        for keep_days in [1, 7, 14, 30, 60, 90, 180]:
            exp = Experiment(recommender=recommender, evaluator=evaluator, user_per=user_per, keep_days=keep_days)
            results = get_test_results(exp)
            df.loc[row] = [user_per, keep_days, recommender, evaluator, np.mean(results)]
            row += 1        
    return df

In [None]:
def plot_population():
    fig, axn = plt.subplots(2, 3, sharex=True, sharey=True)
    fig.set_size_inches(12,5)
    fig.tight_layout(rect=[0, 0.1, 1, 1])
    
    recommenders = ["CML", "BPR", "PMF"]
    evaluators = ["Recall", "NDCG"]
    for i, ax in enumerate(axn.flat):
        recommender = recommenders[i%3]
        evaluator = evaluators[i//3]
        df = generate_df(recommender=recommender, evaluator=evaluator)
        baseline = df["result"][0]
        df["result_mean"] = df["result"].apply(lambda x: (x-baseline)*100/baseline)
        plot_df = df.iloc[1:,]
        plot_df["user_per"] = plot_df["user_per"].apply(lambda x: "P={}".format(x))

        sns.pointplot(data=plot_df, ax=ax, x="keep_days", y="result", hue="user_per", linestyles='--', palette="tab20")
        
        plt.subplots_adjust(hspace = .1, wspace=.1)
        ax.set_xlabel("")
        ax.set_ylabel("")
        ax.set_ylim(-45, 20)
        ax.axhline(y=0, color='k', linestyle="--", alpha=0.5)
        ax.legend(bbox_to_anchor=(-0.05, -0.2, 1.1, -0.1), mode="expand", ncol=4).set_visible(i==4)

In [None]:
# show population-level performance change under different settings (P,N)
plot_population()

## 2. Compare recommenders on complete user records for time intervals of varying length

In [None]:
def truncate_history(recommender, evaluator):    
    df = pd.DataFrame(columns=["recommender", "evaluator", "history_length", "result"])
    row = 0
    hist = "{}_{}".format(2014, 1)
    exp = Experiment(recommender=recommender, evaluator=evaluator, user_per=0.0, keep_days=0, hist=hist)
    result = get_test_results(exp)
    df.loc[row] = [recommender, evaluator, hist, np.mean(result)]
    row += 1
    for year in range(2010, 2014):
        for month in [1,7]:
            hist = "{}_{}".format(year, month)
            exp = Experiment(recommender=recommender, evaluator=evaluator, user_per=0.0, keep_days=0, hist=hist)
            result = get_test_results(exp)
            df.loc[row] = [recommender, evaluator, hist, np.mean(result)]
            row += 1
    return(df)

In [None]:
truncate_history("PMF", "Recall")

In [None]:
truncate_history("BPR", "NDCG")

In [None]:
truncate_history("CML", "Recall")

## 3. Group-level peroformance

In [None]:
from openrec.utils.evaluators import Recall, NDCG

In [None]:
def user_scores(p):
    recall_evaluator = Recall(recall_at=[10])
    ndcg_evaluator = NDCG(ndcg_at=[10])

    score_per_user = dict()
    count_per_user = dict()

    for user in p['users']:
        neg_scores = p['results'][user][:p['num_negatives']]
        for i in range(len(p['user_items'][user][p['num_negatives']:])):
            pos_score = p['results'][user][p['num_negatives'] + i]
            rank_above = np.array([ float(np.sum(neg_scores > pos_score)) ])
            negative_num = float(p['num_negatives'])
            curr_score_recall = recall_evaluator.compute(rank_above, negative_num)[0]
            curr_score_ndcg = ndcg_evaluator.compute(rank_above, negative_num)[0]
            if user not in score_per_user:
                score_per_user[user] = list()
            if user not in count_per_user:
                count_per_user[user] = 0.0
            score_per_user[user].append((curr_score_recall, curr_score_ndcg))
            count_per_user[user] += 1

    # calculate per-user scores
    per_user_recall = dict()
    per_user_ndcg = dict()

    for key in score_per_user.keys():
        curr_recall = 0.0
        curr_ndcg = 0.0
        for tup in score_per_user[key]:
            curr_recall += tup[0]
            curr_ndcg += tup[1]
        per_user_recall[ key ] = curr_recall / count_per_user[key]
        per_user_ndcg[ key ] = curr_ndcg / count_per_user[key]
        
    return {"Recall":per_user_recall,
            "NDCG": per_user_ndcg
           }

In [None]:
def user_grouping(logdir):
    
    with open(logdir+"/filtered_data.npy", 'rb') as filter_profile:
        filter_data = np.load(filter_profile)

    with open(logdir+"/train_data.npy", 'rb') as train_profile:
        train_data = np.load(train_profile)
        
    with open(logdir+"/test_data.npy", 'rb') as test_profile:
        test_data = np.load(test_profile)
    
    filter_user = np.unique(filter_data["user_id"])
    train_user = np.unique(train_data["user_id"])
    test_user = np.unique(test_data["user_id"])
    
    # decompose test users into three groups: (1)user profile changed (2) user profile unchanged (3) new users (cold start)
    group_filter = [i for i in test_user if i in filter_user]
    group_same = [i for i in test_user if i in (set(train_user) - set(filter_user))]
    group_new = [i for i in test_user if i not in (set(train_user) | set(filter_user))]
    return [len(train_data), len(train_user), len(test_data), group_filter, group_same, group_new]

In [None]:
def user_compare(recommender=None, evaluator=None):
    baseline = Experiment(recommender=recommender, evaluator=evaluator, user_per=0.0, keep_days=0)
    baseline_folder = get_test_folders(baseline)[0]
    baseline_files = [os.path.join(baseline_folder, f) for f in sorted(os.listdir(baseline_folder))]
    
    df = pd.DataFrame(columns=["group", "user_per", "keep_days", "recommender", "evaluator", "result_mean", "baseline_result_mean", "performance_change"])
    row = 0
    
    for user_per in [0.25, 0.5, 0.75, 1.0]:
        for keep_days in [1,7,14,30,60,90,180]:
            print (user_per, keep_days)
            exp = Experiment(recommender=recommender, evaluator=evaluator, user_per=user_per, keep_days=keep_days)
            folder = get_test_folders(exp)[0]
            log_files = [os.path.join(folder, f) for f in sorted(os.listdir(folder))]

            filter_users = []
            unfilter_users = []
            new_users = []

            base_filter_users = []
            base_unfilter_users = []
            base_new_users = []

            for i in range(len(log_files)):
            
                #print ("###with user filtering###") 
                with open(log_files[i]+"/_evaluate_partial.pickle", 'rb') as eva_file:
                    p = pickle.load(eva_file)
                per_user_performance = user_scores(p)[evaluator]
                
                group_filter, group_same, group_new = user_grouping(log_files[i])[-3:]
                #print (len(group_filter), len(group_same), len(group_new))
                assert len(per_user_performance) == len(group_filter) + len(group_same) + len(group_new)
                
                after_group_filter = [per_user_performance[u] for u in group_filter]
                after_group_same = [per_user_performance[u] for u in group_same]
                after_group_new = [per_user_performance[u] for u in group_new]

                filter_users.append(np.mean(after_group_filter))
                unfilter_users.append(np.mean(after_group_same))
                new_users.append(np.mean(after_group_new))
                
                #print ("###baseline###")    
                with open(baseline_files[i]+"/_evaluate_partial.pickle", 'rb') as eva_file:
                    p = pickle.load(eva_file)
                per_user_performance = user_scores(p)[evaluator]
                
                baseline_group_filter = [per_user_performance[u] for u in group_filter]
                baseline_group_same = [per_user_performance[u] for u in group_same]
                baseline_group_new = [per_user_performance[u] for u in group_new]
            
                base_filter_users.append(np.mean(baseline_group_filter))
                base_unfilter_users.append(np.mean(baseline_group_same))
                base_new_users.append(np.mean(baseline_group_new))
            
            df.loc[row] = ["filtered", user_per, keep_days, recommender, evaluator, 
                           np.mean(filter_users), np.mean(base_filter_users),
                          (np.mean(filter_users) - np.mean(base_filter_users))/np.mean(base_filter_users)]
            row +=1
            
            df.loc[row] = ["no filtered", user_per, keep_days, recommender, evaluator, 
                           np.mean(unfilter_users), np.mean(base_unfilter_users),
                          (np.mean(unfilter_users) - np.mean(base_unfilter_users))/np.mean(base_unfilter_users)]
            row +=1
            
            df.loc[row] = ["cold start", user_per, keep_days, recommender, evaluator, 
                           np.mean(new_users), np.mean(base_new_users),
                          (np.mean(new_users) - np.mean(base_new_users))/np.mean(base_new_users)]
            row +=1
            
    return df

### User group distribution

In [None]:
# use any recommender and evaluator 
recommender = "PMF" 
evaluator = "NDCG"
df = pd.DataFrame(columns=["group", "user_per", "keep_days", "user_num", "percentage"])
row = 0
for user_per in [0.25, 0.5, 0.75, 1.0]:
    for keep_days in [1,7,14,30,60,90,180]:
        exp = Experiment(recommender=recommender, evaluator=evaluator, user_per=user_per, keep_days=keep_days)
        folder = get_test_folders(exp)[0]
        log_files = [os.path.join(folder, f) for f in sorted(os.listdir(folder))]
        stats = []
        for i in range(len(log_files)):
            stats.append(user_grouping(log_files[i]))
        groups = np.mean(stats, axis=0)
        df.loc[row] = ["Group 1", user_per, keep_days, len(groups[-3]), len(groups[-3])/len(groups[-4])]
        row += 1
        df.loc[row] = ["Group 2", user_per, keep_days, len(groups[-2]), len(groups[-2])/len(groups[-4])]
        row +=1
        df.loc[row] = ["Group 3", user_per, keep_days, len(groups[-1]), len(groups[-1])/len(groups[-4])]
        row += 1
        
fig, axn = plt.subplots(1, 4, sharex=True, sharey=True)
fig.set_size_inches(15,3)
fig.tight_layout(rect=[0, 0.2, 1, 1])
user_pers = [0.25, 0.5, 0.75, 1.0]
days = ["1","7","14","30","60","90","180"]
for i, ax in enumerate(axn.flat):
    user_per = user_pers[i%4]
    plot_df = df[df["user_per"] == user_per]
    group1 = plot_df[plot_df["group"] == "Group 1"]["percentage"]
    group2 = plot_df[plot_df["group"] == "Group 2"]["percentage"]
    group3 = plot_df[plot_df["group"] == "Group 3"]["percentage"]
    ax.bar(days, group1, label="Group 1",alpha=0.8)
    ax.bar(days, group2, bottom=group1, label="Group 2", alpha=0.8)
    ax.bar(days, group3, bottom=np.array(group2) + np.array(group1), label="Group 3", alpha=0.8)
    ax.legend(bbox_to_anchor=(-0.65, -0.2, 1.1, -0.1), mode="expand", ncol=3).set_visible(i==2)

### Group-level performance change after data filtering 

In [None]:
def plot_grouptrend(evaluator):
    fig, axn = plt.subplots(3, 4, sharex=True, sharey=True)
    fig.set_size_inches(12,6)
    fig.tight_layout(rect=[0, 0.1, 1, 1])
    recommenders = ["CML", "BPR", "PMF"]
    user_pers = [0.25, 0.5, 0.75, 1.0]
    legend_mapping = {"filtered": "Group 1", "no filtered":"Group 2", "cold start": "Group 3"}
    
    for i, ax in enumerate(axn.flat):
        recommender = recommenders[i//4]
        user_per = user_pers[i%4]
        group_df = user_compare(recommender=recommender, evaluator=evaluator)
        plot_df = group_df[(group_df["user_per"] == user_per)]
        plot_df["performance_change"] = group_df["performance_change"].apply(lambda x: x*100)
        plot_df["group"] = group_df["group"].apply(lambda x: legend_mapping[x])
        # sns.set(font_scale=1.1)
        sns.pointplot(data=plot_df, ax=ax, x="keep_days", y="performance_change", hue="group", linestyles='--', palette="tab10")
        ax.set_ylim(-45, 20)
        ax.axhline(y=0, color='k', linestyle="--", alpha=0.5)
        plt.subplots_adjust(hspace = .1, wspace=.1)
        ax.set_xlabel("")
        ax.set_ylabel("")
        ax.legend(bbox_to_anchor=(-0.75, -0.15, 1.5, -0.2), mode="expand", ncol=4).set_visible(i==10)

In [None]:
plot_grouptrend("Recall")

In [None]:
plot_grouptrend("NDCG")