In [24]:
import pickle
import pandas as pd
import tensorflow as tf
import numpy as np
from scipy.sparse import coo_matrix
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

import wals
import model

In [25]:
user_map, item_map, train_sparse, test_sparse, unique_items, unique_users = model.clean_data("../data/ratings.csv")

In [26]:
latent_factors = 14
num_iters = 20

output_row, output_col = model.train_model(train_sparse, latent_factors, num_iters)

train_rmse = wals.get_rmse(output_row, output_col, train_sparse)
test_rmse = wals.get_rmse(output_row, output_col, test_sparse)
print('Train: ' + str(train_rmse) + ', Test: ' + str(test_rmse))

Train: 1.3752278367431066, Test: 1.9272221487569356


In [27]:
# This cell is just for testing...
user = 18
user_rated = [item_map[i] for i, x in enumerate(user_map) if x == user]
# print(user_rated)
print(str(output_row.shape[0]) + ", " + str(len(user_rated)))
model.generate_recommendations(user, user_rated, output_row, output_col, 6)

610, 703


[1066, 973, 920, 701, 792, 510]

In [29]:
# Generate top-n...
k = 0
topn_recommendations = []
for u in unique_users:
    user_rated = [item_map[i] for i, x in enumerate(user_map) if x == u]
    if (k % 100 == 0):
        print(k)
    if user_rated:
        topn_recommendations.append(model.generate_recommendations(u, [], output_row, output_col, 6))
    k += 1
print("length: " + str(len(topn_recommendations)))
with open("topn_100k.pickle", "wb") as fp:
    pickle.dump(topn_recommendations, fp)
    
# ...or read them from a file...
# with open("topn_100k.pickle", "rb") as fp:
    # topn_recommendations = pickle.load(fp)

0
100
200
300
400
500
600
length: 609


In [30]:
te = TransactionEncoder()
te_ary = te.fit(topn_recommendations).transform(topn_recommendations)
topn_df = pd.DataFrame(te_ary, columns=te.columns_)

frequent_itemsets = apriori(topn_df, min_support=0.05)
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))

# frequent_itemsets[(frequent_itemsets['length'] == 2)]
rules = association_rules(frequent_itemsets)

rules = rules[(rules['support'] > 0.05) &
      (rules['confidence'] > 0.2) &
      (rules['lift'] > 3.0)]

# print(rules)

In [32]:
explainableRecommendations = []
for u in unique_users:
    recommendations = []
    user_rated = [item_map[i] for i, x in enumerate(user_map) if x == u]
    if user_rated:
        for index, row in rules.iterrows():
            antecedents = list(row['antecedents'])
            consequents = list(row['consequents'])
            if all(x in user_rated for x in antecedents) and all(x not in user_rated for x in consequents):
                recommendations.append({"explanation": tuple(row['antecedents']), "recommendation": tuple(row['consequents'])})
    explainableRecommendations.append(recommendations)
    
with open("explainable_100k.pickle", "wb") as fp:
    pickle.dump(explainableRecommendations, fp)