In [1]:
import warnings

import numpy as np
import pandas as pd

from gensim.models.doc2vec import Doc2Vec
from surprise import Dataset, Reader
from tqdm import tqdm

from src.models import cf, evaluate_model
from src.utilities import utilities

tqdm.pandas()
warnings.filterwarnings('ignore')



# Load Data and Models

In [2]:
# global variables
DATA_PATH = "data/evaluation"
D2V_PATH = "models/d2v"
CATEGORY = "Grocery_and_Gourmet_Food"

# d2v training parameters
EPOCHS = 10

# training parameters
N_EPOCHS = 15
LR_ALL = 0.01
BETA = 0.1

# reproducibility checks
SEED = 42
np.random.seed(42)

train = pd.read_csv(f"{DATA_PATH}/{CATEGORY}_train.csv")
d2v = Doc2Vec.load(f"{D2V_PATH}/{CATEGORY}_user_item_50_{EPOCHS}_d2v.model")

In [3]:
# checking train dataframe
train.head().append(train.tail())

Unnamed: 0,index,asin,title,categories,reviewerID,overall,reviewText,reviewTime,processedReviewText
0,0,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A23RYWDS884TUL,5.0,This curry paste makes a delicious curry. I j...,2013-05-28,curry paste delicious curry fry chicken vegeta...
1,1,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A945RBQWGZXCK,5.0,I've purchased different curries in the grocer...,2012-09-17,purchase different curry grocery store complet...
2,3,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A3AMNY44OP8AOU,4.0,I started a new diet restricting all added sug...,2014-01-23,start new diet restrict added sugar brand suga...
3,4,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A3IB4CQ2QEJLJ8,5.0,So many flavors. I can't begin to tell you how...,2014-04-27,flavor begin tell love mae ploy curry ask reci...
4,5,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",AQA5DF3RWKETQ,5.0,I've used this a lot recently in some of my ch...,2012-11-27,use lot recently chicken dish use lot like spi...
47769,77420,B00I33696K,Reese's Miniature Peanut Butter Cups .31oz - 1...,"['Grocery & Gourmet Food', 'Candy & Chocolate'...",A192LQZWDYPR4U,5.0,Another quality Reese Peanut Butter Cup produc...,2014-02-27,quality reese peanut butter cup product great ...
47770,77421,B00I33696K,Reese's Miniature Peanut Butter Cups .31oz - 1...,"['Grocery & Gourmet Food', 'Candy & Chocolate'...",A2QKXW3LDQ66P5,5.0,I purchased these for my husband who has every...,2013-02-20,purchase husband love reeses valentine day pre...
47771,77430,B00ID9VSOM,"Viva Labs Organic Coconut Sugar: Non-GMO, Low-...","['Grocery & Gourmet Food', 'Cooking & Baking',...",A2P3TGJU301KXD,5.0,this stuff is INCREDIBILY yummy! SO much bette...,2014-07-15,stuff incredibily yummy good regular brown sug...
47772,77456,B00IRL93SY,Barrie House Kenya Estate - AA Single Cup Caps...,"['Grocery & Gourmet Food', 'Beverages', 'Coffe...",AEFE9VDHTQ199,5.0,"Very nice aroma, body and taste! Will buy this...",2014-05-24,nice aroma body taste buy coffee good coffee a...
47773,77508,B00ISVHJ3Y,"Wholesome Sweeteners, Organic Sweet and Lite S...","['Grocery & Gourmet Food', 'Cooking & Baking',...",A2AEZQ3DGBBLPR,2.0,This is a no go for diabetics according to my ...,2014-06-26,diabetic accord wife doctor order intention us...


In [4]:
# testing d2v models
d2v.dv[0]

array([-0.27549782,  0.20413424, -0.08202688,  0.11671811, -0.12001666,
        0.14240262, -0.1531975 , -0.04004756,  0.22749828,  0.07618919,
       -0.02856197, -0.17817275, -0.20922764,  0.00729961,  0.23265481,
       -0.40277228,  0.14615832, -0.16966432, -0.05449987, -0.17599425,
        0.01856123, -0.07569042,  0.19056639,  0.04286703, -0.03356553,
        0.3742853 , -0.3664872 ,  0.02314829, -0.3146308 ,  0.09372662,
        0.0800645 ,  0.17360352, -0.12081917, -0.20905615,  0.01595591,
       -0.02186558, -0.31253535, -0.09657891,  0.15574145,  0.33629766,
       -0.2215663 , -0.23758855, -0.14656585, -0.05834552, -0.05299786,
       -0.20230757, -0.28766856,  0.07311316,  0.19890316, -0.0960729 ],
      dtype=float32)

# Generating User & Item Embeddings

In [5]:
user_idx_map, user_vecs, item_idx_map, item_vecs = utilities.generate_user_item_embeddings(train, d2v)

In [6]:
# converting factors into numpy obj
user_factors = user_vecs.to_numpy()
item_factors = item_vecs.to_numpy()

In [7]:
# check user factors
user_factors[0,:]

array([-0.02736311,  0.00325166, -0.00024077,  0.00998505, -0.01258574,
       -0.00547524,  0.00145343, -0.00015249,  0.00506267,  0.02065624,
        0.01541428,  0.0094595 , -0.03136451,  0.00920544,  0.01232455,
       -0.0171574 ,  0.01223645, -0.00086656, -0.0191491 , -0.02470782,
       -0.01328263, -0.01610228,  0.00892396,  0.00606314, -0.005087  ,
        0.03478223, -0.03029266, -0.01222032, -0.03508016, -0.00363975,
        0.02242427,  0.00488477,  0.00057464, -0.00215315, -0.00866075,
       -0.00311717, -0.01708894,  0.00399594,  0.03462548,  0.02504152,
        0.00528121, -0.00953821,  0.00041977, -0.01929475, -0.00617209,
       -0.01144104,  0.00639438, -0.01098371,  0.02299422, -0.00714812],
      dtype=float32)

In [8]:
# check item factors
item_factors[0,:]

array([-0.27549782,  0.20413424, -0.08202688,  0.11671811, -0.12001666,
        0.14240262, -0.1531975 , -0.04004756,  0.22749828,  0.07618919,
       -0.02856197, -0.17817275, -0.20922764,  0.00729961,  0.23265481,
       -0.40277228,  0.14615832, -0.16966432, -0.05449987, -0.17599425,
        0.01856123, -0.07569042,  0.19056639,  0.04286703, -0.03356553,
        0.3742853 , -0.3664872 ,  0.02314829, -0.3146308 ,  0.09372662,
        0.0800645 ,  0.17360352, -0.12081917, -0.20905615,  0.01595591,
       -0.02186558, -0.31253535, -0.09657891,  0.15574145,  0.33629766,
       -0.2215663 , -0.23758855, -0.14656585, -0.05834552, -0.05299786,
       -0.20230757, -0.28766856,  0.07311316,  0.19890316, -0.0960729 ],
      dtype=float32)

# Utility Functions

# Generate N-Recommendations = {10, 25, 30, 45}

## Load Test Data

In [9]:
test = pd.read_csv(f"{DATA_PATH}/{CATEGORY}_test.csv")
test.head().append(test.tail())

Unnamed: 0,index,asin,title,categories,reviewerID,overall,reviewText,reviewTime,processedReviewText
0,2,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A1TCSC0YWT82Q0,5.0,I love ethnic foods and to cook them. I recent...,2013-08-03,love ethnic food cook recently purchase produc...
1,8,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A1Z7Y2GMAP9SRY,5.0,I like to make my own curry but this is a tast...,2014-06-27,like curry tasty alternative use base kind dif...
2,23,B00004S1C5,"Ateco Food Coloring Kit, 6 colors","['Grocery & Gourmet Food', 'Cooking & Baking',...",A14YSMLYLJEMET,1.0,This product is no where near natural / organi...,2013-03-29,product near natural organic wish review purch...
3,31,B00005344V,Traditional Medicinals Organic Breathe Easy Se...,"['Grocery & Gourmet Food', 'Beverages', 'Coffe...",A2F488C4PLWGEI,5.0,If my wife drinks a cup of this tea when she f...,2014-03-23,wife drink cup tea feel attack come help avoid...
4,32,B00005344V,Traditional Medicinals Organic Breathe Easy Se...,"['Grocery & Gourmet Food', 'Beverages', 'Coffe...",AO1HXV7DWZZIR,5.0,I don't know about the medicinal aspects of th...,2014-02-06,know medicinal aspect tea flavor downright scr...
28001,77519,B00ISVHJ3Y,"Wholesome Sweeteners, Organic Sweet and Lite S...","['Grocery & Gourmet Food', 'Cooking & Baking',...",A1WT3TVHANP7ZF,3.0,Hmmm. I really wanted to love this sweetener. ...,2014-07-22,hmmm want love sweetener half sugar half stevi...
28002,77520,B00ISVHJ3Y,"Wholesome Sweeteners, Organic Sweet and Lite S...","['Grocery & Gourmet Food', 'Cooking & Baking',...",A3NEAETOSXDBOM,5.0,"I confess I have a sweet tooth, and love the t...",2014-06-30,confess sweet tooth love taste sugar recognize...
28003,77521,B00ISVHJ3Y,"Wholesome Sweeteners, Organic Sweet and Lite S...","['Grocery & Gourmet Food', 'Cooking & Baking',...",AD1ZOPB0BBEHB,4.0,"It has a little of the stevia aftertaste, but ...",2014-07-17,little stevia aftertaste fair compromise able ...
28004,77522,B00ISVHJ3Y,"Wholesome Sweeteners, Organic Sweet and Lite S...","['Grocery & Gourmet Food', 'Cooking & Baking',...",A18ECVX2RJ7HUE,5.0,i love marinade for grilled flank steak or lon...,2014-05-30,love marinade grilled flank steak london broil...
28005,77523,B00ISVHJ3Y,"Wholesome Sweeteners, Organic Sweet and Lite S...","['Grocery & Gourmet Food', 'Cooking & Baking',...",A2G04D4QZAXL15,3.0,I've been using Truvia (a form of stevia) on m...,2014-05-27,use truvia form stevia cereal greek yogurt yea...


In [10]:
# generating test history
test_user_history = (pd.DataFrame(test.groupby(['reviewerID'])['asin']
                                  .apply(list).reset_index()))

In [11]:
print(test_user_history)

                  reviewerID  \
0      A00177463W0XWB16A9O05   
1      A022899328A0QROR32DCT   
2      A068255029AHTHDXZURNU   
3      A06944662TFWOKKV4GJKX   
4             A1004703RC79J9   
...                      ...   
13274          AZWRZZAMX90VT   
13275          AZXKAH2DE6C8A   
13276          AZXON596A1VXC   
13277          AZYXC63SS008M   
13278          AZZ5ASC403N74   

                                                    asin  
0                               [B00474OR8G, B00BFM6OAW]  
1                                           [B00CMQDKES]  
2                                           [B001FA1K2G]  
3                                           [B000GFYRHG]  
4                                           [B003GTR8IO]  
...                                                  ...  
13274  [B0007R9L4M, B000CN7BMA, B001EQ5D1K, B002VT3GX...  
13275   [B000MAK41I, B004X8TJP2, B006H34CUS, B007W14RMM]  
13276                           [B001EO5S0I, B00271QQ7Q]  
13277                    

## Preparing Dataset for Surprise's Algorithm

In [12]:
# create reader
reader = Reader(rating_scale=(1,5))
# generate data required for surprise
data = Dataset.load_from_df(train[["reviewerID", "asin", "overall"]], reader)
# generating trainset
trainset = data.build_full_trainset()

## Instantiate Pre-Initialised Matrix Factorization (Paragraph Vector)

In [13]:
# instantiating mod-ecf
mod_ecf = cf.PreInitialisedMF(user_map=user_idx_map,
                              item_map=item_idx_map,
                              user_factor=user_factors,
                              item_factor=item_factors,
                              learning_rate=LR_ALL,
                              beta=BETA,
                              num_epochs=N_EPOCHS,
                              num_factors=50)

In [14]:
%%time
# fitting to training data
mod_ecf.fit(trainset, verbose=True)

Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 10
Processing epoch 11
Processing epoch 12
Processing epoch 13
Processing epoch 14
CPU times: user 5min 49s, sys: 756 ms, total: 5min 49s
Wall time: 5min 50s


In [15]:
%%time
# generate candidate items for user to predict rating
testset = trainset.build_anti_testset()

CPU times: user 30.4 s, sys: 1.44 s, total: 31.8 s
Wall time: 32 s


In [16]:
%%time
# predict ratings for all pairs (u, i) that are NOT in the training set
candidate_items = mod_ecf.test(testset, verbose=False)

CPU times: user 7min 25s, sys: 1min 35s, total: 9min
Wall time: 9min 43s


## Loop through N = {10, 25, 30, 45}

In [17]:
# generate item popularity
item_popularity = evaluate_model.generate_item_popularity(train)

In [18]:
n_recommendations = {}
for n in [10, 25, 30, 45]:
    # retrieve the top-n items based on similarities
    # top_ns = get_top_n(candidate_items, n)
    top_ns = mod_ecf.get_top_n(candidate_items, n)
    # evaluate how well the recommended items predicted the future purchases
    n_recommended_items = (evaluate_model.
                           evaluate_recommendations(model_name = 'MOD-ECF',
                                                    top_ns = top_ns,
                                                    user_rating_history = test_user_history, 
                                                    item_popularity = item_popularity, 
                                                    n = n,
                                                    mf_based = True))
    # saving the n-value and recommended items
    n_recommendations[n] = (top_ns, n_recommended_items)

The MOD-ECF has an average recall@10: 0.00043, average novelty@10: 0.97630
The MOD-ECF has an average recall@25: 0.00135, average novelty@25: 0.96513
The MOD-ECF has an average recall@30: 0.00188, average novelty@30: 0.95590
The MOD-ECF has an average recall@45: 0.00236, average novelty@45: 0.95725


# Evaluate N-Recommendations

## N=10

In [19]:
top_ns_10 = n_recommendations[10][0]
utilities.retrieve_recommendations(train, top_ns_10, mf_based=True)

For user: A2H84V55USFJQX:
Purchase History:
             asin                                              title
13844  B000ZSZ5S4  Blue Diamond Almonds, Bold Salt &amp; Vinegar,...
41088  B005K4Q1T0  Grove Square Hot Cocoa Dark Chocolate, 24 Sing...

Recommending:

         asin                                              title
0  9742356831                  Mae Ploy Green Curry Paste, 14 oz
1  B00004S1C5                  Ateco Food Coloring Kit, 6 colors
2  B00005344V  Traditional Medicinals Organic Breathe Easy Se...
3  B0000CDEPD      Rodelle Organic Pure Vanilla Extract, 4-Ounce
4  B0000CFPI2                      BJ-6 3-Pack Jerky Spice Works
5  B0000CH39R            See's Candies 3 lb. Assorted Chocolates
6  B0000CNU1S                   Dynasty Five Spices Powder, 2 oz
7  B0000CNU1X  Hime Powdered Wasabi (Japanese Horseradish) - ...
8  B0000CNU15  Lee Kum Kee Chiu Chow Chili Oil net wt. 205g (...
9  B0000DBN1H  Tazo Awake English Breakfast Tea, 24 Tea Bags,...


## N=25

In [20]:
top_ns_25 = n_recommendations[25][0]
utilities.retrieve_recommendations(train, top_ns_25, mf_based=True)

For user: A2W9B725TZBXOX:
Purchase History:
             asin                                              title
1645   B0005Z6LLW  Ghirardelli Hot Chocolate Mix , Chocolate Moch...
4683   B000ED9L6C  Bob's Red Mill Raw Shelled Sunflower Seeds (Ke...
8508   B000GATCRQ  Dream Foods International Volcano Lemon Burst,...
15333  B0014EOUAW  V8 V-Fusion Light Peach Mango Juice Drink, 46-...
18192  B001E52ZAS  Post Shredded Wheat Lightly Frosted Cereal, Sp...
21093  B001EPQV1W  Honey Bunches of Oats with Almonds, 14.5-Ounce...
22932  B001G8UC8K  Tootsie Roll Midgees Candy 5 Pound Value Bag 7...
23025  B001GVIRD4                     Grocery &amp; Gourmet Food" />
24945  B001NC8HQS  Idahoan Buttery homestyle flavored mashed pota...
25830  B001SAQ7WQ  Progresso Traditional Soup, Chickarina (Chicke...
26956  B0025UOMY8  Maruchan Yakisoba Spicy Vegetable, 3.79-Ounce ...
33467  B00421DMLM  Libby's Splenda Sliced Peaches, 23.5-Ounce Jar...
34141  B004727CL2  Snack Factory Pretzel Crisps Everything,

## N=30

In [21]:
top_ns_30 = n_recommendations[30][0]
utilities.retrieve_recommendations(train, top_ns_30, mf_based=True)

For user: A1G8ECTOF35O2R:
Purchase History:
             asin                                              title
9115   B000H26J7E  Lindt Excellence Bar, 70% Cocoa Smooth Dark Ch...
47677  B00EKLPLU4              Healthworks Cacao Powder Organic, 1lb

Recommending:

          asin                                              title
0   9742356831                  Mae Ploy Green Curry Paste, 14 oz
1   B00004S1C5                  Ateco Food Coloring Kit, 6 colors
2   B00005344V  Traditional Medicinals Organic Breathe Easy Se...
3   B0000CDEPD      Rodelle Organic Pure Vanilla Extract, 4-Ounce
4   B0000CFPI2                      BJ-6 3-Pack Jerky Spice Works
5   B0000CH39R            See's Candies 3 lb. Assorted Chocolates
6   B0000CNU1S                   Dynasty Five Spices Powder, 2 oz
7   B0000CNU1X  Hime Powdered Wasabi (Japanese Horseradish) - ...
8   B0000CNU15  Lee Kum Kee Chiu Chow Chili Oil net wt. 205g (...
9   B0000DBN1H  Tazo Awake English Breakfast Tea, 24 Tea Bags,...
10  B00

## N=45

In [22]:
top_ns_45 = n_recommendations[45][0]
utilities.retrieve_recommendations(train, top_ns_45, mf_based=True)

For user: A1NEKFNO1E743Y:
Purchase History:
            asin                                              title
8695  B000GFYRHG  Bigelow Constant Comment Tea 20-Count Boxes (P...

Recommending:

          asin                                              title
0   9742356831                  Mae Ploy Green Curry Paste, 14 oz
1   B00004S1C5                  Ateco Food Coloring Kit, 6 colors
2   B00005344V  Traditional Medicinals Organic Breathe Easy Se...
3   B0000CDEPD      Rodelle Organic Pure Vanilla Extract, 4-Ounce
4   B0000CFPI2                      BJ-6 3-Pack Jerky Spice Works
5   B0000CH39R            See's Candies 3 lb. Assorted Chocolates
6   B0000CNU1S                   Dynasty Five Spices Powder, 2 oz
7   B0000CNU1X  Hime Powdered Wasabi (Japanese Horseradish) - ...
8   B0000CNU15  Lee Kum Kee Chiu Chow Chili Oil net wt. 205g (...
9   B0000DBN1H  Tazo Awake English Breakfast Tea, 24 Tea Bags,...
10  B0000DBN1L               Tazo Organic Chai, 24 Tea Bags 2.3oz
11  B0000DGF