# Imports

In [1]:
from utils.io import load_yaml
from utils.modelnames import critiquing_models
from utils.sampler import Negative_Sampler

import ast
import numpy as np
import pandas as pd

In [62]:
DATA_DIR = "data/beer/"
EPOCH = 100
ITEM_COL = "ItemIndex"
KEYPHRASE_SET = "KeyPhrases.csv"
KEYPHRASE_VECTOR_COL = "keyVector"
LAMB = 0.0001
LEARNING_RATE = 0.0001
MODEL = "CE-VNCF"
MODEL_SAVED_PATH = "beer"
NEGATIVE_SAMPLING_SIZE = 5
NUM_LAYERS = 1
RANK = 200
RATING_COL = "Binary"
TEST_SET = "Test.csv"
TOPK = 10
TOPK_KEYPHRASE = 5
TRAIN_BATCH_SIZE = 128
TRAIN_SET = "Train.csv"
USER_COL = "UserIndex"

# Load Dataset

In [3]:
num_users = pd.read_csv(DATA_DIR + USER_COL + '.csv')[USER_COL].nunique()
num_items = pd.read_csv(DATA_DIR + ITEM_COL + '.csv')[ITEM_COL].nunique()

print("Number of users: {}.".format(num_users))
print("Number of items: {}.".format(num_items))

Number of users: 6370.
Number of items: 3668.


In [4]:
df_train = pd.read_csv(DATA_DIR + TRAIN_SET)
df_train = df_train[df_train[RATING_COL] == 1]
df_train[KEYPHRASE_VECTOR_COL] = df_train[KEYPHRASE_VECTOR_COL].apply(ast.literal_eval)

In [5]:
df_valid = pd.read_csv(DATA_DIR + TEST_SET)

In [103]:
keyphrase_names = pd.read_csv(DATA_DIR + KEYPHRASE_SET, index_col=False).values

num_keyphrases = len(keyphrase_names)
print("Number of keyphrases: {}.".format(num_keyphrases))

Number of keyphrases: 75.


In [25]:
df_items = pd.read_csv('data/beer/ItemIndex.csv', index_col=False).drop_duplicates().sort_values('ItemIndex').reset_index(drop=True)
items_dict = df_items.set_index('ItemIndex').to_dict()['beer/name']

# Initialize Negative Sampler

In [8]:
negative_sampler = Negative_Sampler(df_train[[USER_COL,
                                              ITEM_COL,
                                              KEYPHRASE_VECTOR_COL]],
                                    USER_COL,
                                    ITEM_COL,
                                    RATING_COL,
                                    KEYPHRASE_VECTOR_COL,
                                    num_items,
                                    batch_size=TRAIN_BATCH_SIZE,
                                    num_keyphrases=num_keyphrases,
                                    negative_sampling_size=NEGATIVE_SAMPLING_SIZE)

# Train

In [9]:
model = critiquing_models[MODEL](num_users=num_users,
                                 num_items=num_items,
                                 text_dim=num_keyphrases,
                                 embed_dim=RANK,
                                 num_layers=NUM_LAYERS,
                                 negative_sampler=negative_sampler,
                                 lamb=LAMB,
                                 learning_rate=LEARNING_RATE)

In [10]:
pretrained_path = load_yaml('config/global.yml', key='path')['pretrained']
try:
    model.load_model(pretrained_path+MODEL_SAVED_PATH, MODEL)
except:
    model.train_model(df_train,
                      user_col=USER_COL,
                      item_col=ITEM_COL,
                      rating_col=RATING_COL,
                      epoch=EPOCH)

INFO:tensorflow:Restoring parameters from pretrained/beer/CE-VNCF/model.ckpt
Model restored.


# Case Study 1

In [87]:
user_index = 292

## Predict

In [88]:
inputs = np.array([[user_index, item_index] for item_index in range(num_items)])

In [89]:
rating, explanation = model.predict(inputs)

## Initial Recommended Items

In [109]:
top_items = rating.flatten().argsort()[::-1][:TOPK]

for k, top_item in enumerate(top_items):
    print("Top {} Recommended Item ID: {}. Product Name: {}".format(k+1, top_item, items_dict[top_item]))

Top 1 Recommended Item ID: 2927. Product Name: Sierra Nevada Torpedo Extra IPA
Top 2 Recommended Item ID: 976. Product Name: Double Jack
Top 3 Recommended Item ID: 1412. Product Name: Harpoon Leviathan - Imperial IPA
Top 4 Recommended Item ID: 3121. Product Name: Stone 15th Anniversary Escondidian Imperial Black IPA
Top 5 Recommended Item ID: 1552. Product Name: Hop Rod Rye
Top 6 Recommended Item ID: 42. Product Name: 30th Anniversary - Grand Cru
Top 7 Recommended Item ID: 2501. Product Name: Punkin Ale
Top 8 Recommended Item ID: 1589. Product Name: Hoptimum Double IPA - Beer Camp #19
Top 9 Recommended Item ID: 2516. Product Name: Racer 5 India Pale Ale
Top 10 Recommended Item ID: 2081. Product Name: Mokah


In [111]:
top_items

array([2927,  976, 1412, 3121, 1552,   42, 2501, 1589, 2516, 2081])

## Initial Predicted Explanation

In [110]:
explanation_rank_list = np.argsort(-explanation, axis=1)[:, :TOPK_KEYPHRASE]
top_explanation = explanation_rank_list[top_items]
top_explanation

array([[16, 12, 31,  1, 34],
       [14, 16, 12, 39, 45],
       [16,  1, 12, 14,  2],
       [16,  1,  6, 12, 50],
       [25, 12, 16, 58,  1],
       [ 1, 14, 12, 16, 39],
       [ 1, 21, 55, 39, 14],
       [ 1, 16, 14, 12, 45],
       [12, 16, 31,  1, 34],
       [ 6, 50, 16,  1,  0]])

## Initial Prediction Summary

In [119]:
for k1, top_item_keyphrase in enumerate(top_explanation):
    print("Top {} Item ID: {}. Product Name: {}.".format(k1+1, top_items[k1], items_dict[top_items[k1]]))
    for k2, top_keyphrase in enumerate(top_item_keyphrase):
        print("Top {} Keyphrase ID: {}. Keyphrase Name: {}".format(k2+1, top_keyphrase, keyphrase_names[top_keyphrase][0]))
    print("\n")

Top 1 Item ID: 2927. Product Name: Sierra Nevada Torpedo Extra IPA.
Top 1 Keyphrase ID: 16. Keyphrase Name: bitter
Top 2 Keyphrase ID: 12. Keyphrase Name: citrus
Top 3 Keyphrase ID: 31. Keyphrase Name:  pine 
Top 4 Keyphrase ID: 1. Keyphrase Name: sweet
Top 5 Keyphrase ID: 34. Keyphrase Name: grapefruit


Top 2 Item ID: 976. Product Name: Double Jack.
Top 1 Keyphrase ID: 14. Keyphrase Name: fruit
Top 2 Keyphrase ID: 16. Keyphrase Name: bitter
Top 3 Keyphrase ID: 12. Keyphrase Name: citrus
Top 4 Keyphrase ID: 39. Keyphrase Name: orang
Top 5 Keyphrase ID: 45. Keyphrase Name: gold


Top 3 Item ID: 1412. Product Name: Harpoon Leviathan - Imperial IPA.
Top 1 Keyphrase ID: 16. Keyphrase Name: bitter
Top 2 Keyphrase ID: 1. Keyphrase Name: sweet
Top 3 Keyphrase ID: 12. Keyphrase Name: citrus
Top 4 Keyphrase ID: 14. Keyphrase Name: fruit
Top 5 Keyphrase ID: 2. Keyphrase Name: caramel


Top 4 Item ID: 3121. Product Name: Stone 15th Anniversary Escondidian Imperial Black IPA.
Top 1 Keyphrase ID: 

## Start Critiquing

In [131]:
keyphrase_index = 14
print("Critiqued Keyphrase ID: {}.\nCritiqued Keyphrase Name: {}.".format(keyphrase_index, keyphrase_names[keyphrase_index][0]))

Critiqued Keyphrase ID: 14.
Critiqued Keyphrase Name: fruit.


## Zero out the critiqued keyphrase in all affected items

In [124]:
explanation[:, keyphrase_index] = np.min(explanation, axis=1)

In [125]:
modified_rating, modified_explanation = model.refine_predict(inputs, explanation)

## Top Items Recommended after Critiquing

In [127]:
modified_top_items = modified_rating.flatten().argsort()[::-1][:TOPK]

for k, modified_top_item in enumerate(modified_top_items):
    print("Top {} Recommended Item ID: {}. Product Name: {}".format(k+1, modified_top_item, items_dict[modified_top_item]))

Top 1 Recommended Item ID: 2927. Product Name: Sierra Nevada Torpedo Extra IPA
Top 2 Recommended Item ID: 3121. Product Name: Stone 15th Anniversary Escondidian Imperial Black IPA
Top 3 Recommended Item ID: 2516. Product Name: Racer 5 India Pale Ale
Top 4 Recommended Item ID: 1552. Product Name: Hop Rod Rye
Top 5 Recommended Item ID: 1412. Product Name: Harpoon Leviathan - Imperial IPA
Top 6 Recommended Item ID: 976. Product Name: Double Jack
Top 7 Recommended Item ID: 3240. Product Name: Ten FIDY
Top 8 Recommended Item ID: 2081. Product Name: Mokah
Top 9 Recommended Item ID: 42. Product Name: 30th Anniversary - Grand Cru
Top 10 Recommended Item ID: 1589. Product Name: Hoptimum Double IPA - Beer Camp #19


## Top Refined Explanation after Critiquing

In [128]:
modified_explanation_rank_list = np.argsort(-modified_explanation, axis=1)[:, :TOPK_KEYPHRASE]
modified_top_explanation = modified_explanation_rank_list[modified_top_items]
modified_top_explanation

array([[16, 12, 31,  1, 14],
       [16,  1,  6, 50,  0],
       [12, 16, 31,  1, 34],
       [25, 16, 12,  1, 58],
       [ 1, 16, 12, 14,  2],
       [16, 12, 14,  1, 39],
       [ 6, 50,  0,  8, 16],
       [ 6, 50,  1,  0, 16],
       [ 1, 14, 12, 16, 42],
       [ 1, 16, 12, 14, 39]])

## Refined Prediction Summary

In [129]:
for k1, modified_top_item_keyphrase in enumerate(modified_top_explanation):
    print("Top {} Item ID: {}. Product Name: {}.".format(k1+1, modified_top_items[k1], items_dict[modified_top_items[k1]]))
    for k2, top_keyphrase in enumerate(modified_top_item_keyphrase):
        print("Top {} Keyphrase ID: {}. Keyphrase Name: {}".format(k2+1, top_keyphrase, keyphrase_names[top_keyphrase][0]))
    print("\n")

Top 1 Item ID: 2927. Product Name: Sierra Nevada Torpedo Extra IPA.
Top 1 Keyphrase ID: 16. Keyphrase Name: bitter
Top 2 Keyphrase ID: 12. Keyphrase Name: citrus
Top 3 Keyphrase ID: 31. Keyphrase Name:  pine 
Top 4 Keyphrase ID: 1. Keyphrase Name: sweet
Top 5 Keyphrase ID: 14. Keyphrase Name: fruit


Top 2 Item ID: 3121. Product Name: Stone 15th Anniversary Escondidian Imperial Black IPA.
Top 1 Keyphrase ID: 16. Keyphrase Name: bitter
Top 2 Keyphrase ID: 1. Keyphrase Name: sweet
Top 3 Keyphrase ID: 6. Keyphrase Name: chocolate
Top 4 Keyphrase ID: 50. Keyphrase Name: black
Top 5 Keyphrase ID: 0. Keyphrase Name: roast


Top 3 Item ID: 2516. Product Name: Racer 5 India Pale Ale.
Top 1 Keyphrase ID: 12. Keyphrase Name: citrus
Top 2 Keyphrase ID: 16. Keyphrase Name: bitter
Top 3 Keyphrase ID: 31. Keyphrase Name:  pine 
Top 4 Keyphrase ID: 1. Keyphrase Name: sweet
Top 5 Keyphrase ID: 34. Keyphrase Name: grapefruit


Top 4 Item ID: 1552. Product Name: Hop Rod Rye.
Top 1 Keyphrase ID: 25. Keyp