In [1]:
import os
import sys
import warnings

# set environment path
sys.path.append("../..")

import numpy as np
import pandas as pd
from sqlalchemy import create_engine
from tqdm import tqdm

from src.models import algorithms, evaluate_model
from src.utilities import utilities

tqdm.pandas()
warnings.filterwarnings('ignore')



# Load Data

In [2]:
# set path to where repo is located
REPO_PATH = os.path.join("..", "..")

# setting path
DATA_PATH = os.path.join(REPO_PATH, "data", "evaluation")

# category to train
CATEGORY = "Grocery_and_Gourmet_Food"

# training parameters: training epochs, learning, and regularisation rate
PARAMS = {"n_epochs": 5,
          "lr_all": 0.005,
          "reg_all": 0.1}

# reproducibility checks
SEED = 42
np.random.seed(42)

# load train/test dataset
train = pd.read_csv(f"{DATA_PATH}/{CATEGORY}_train.csv")
test = pd.read_csv(f"{DATA_PATH}/{CATEGORY}_test.csv")

In [3]:
# checking train dataframe
train.head().append(train.tail())

Unnamed: 0,index,asin,title,categories,reviewerID,overall,reviewText,reviewTime,processedReviewText
0,0,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A23RYWDS884TUL,5.0,This curry paste makes a delicious curry. I j...,2013-05-28,curry paste delicious curry fry chicken vegeta...
1,1,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A945RBQWGZXCK,5.0,I've purchased different curries in the grocer...,2012-09-17,purchase different curry grocery store complet...
2,3,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A3AMNY44OP8AOU,4.0,I started a new diet restricting all added sug...,2014-01-23,start new diet restrict added sugar brand suga...
3,4,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A3IB4CQ2QEJLJ8,5.0,So many flavors. I can't begin to tell you how...,2014-04-27,flavor begin tell love mae ploy curry ask reci...
4,5,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",AQA5DF3RWKETQ,5.0,I've used this a lot recently in some of my ch...,2012-11-27,use lot recently chicken dish use lot like spi...
47769,77420,B00I33696K,Reese's Miniature Peanut Butter Cups .31oz - 1...,"['Grocery & Gourmet Food', 'Candy & Chocolate'...",A192LQZWDYPR4U,5.0,Another quality Reese Peanut Butter Cup produc...,2014-02-27,quality reese peanut butter cup product great ...
47770,77421,B00I33696K,Reese's Miniature Peanut Butter Cups .31oz - 1...,"['Grocery & Gourmet Food', 'Candy & Chocolate'...",A2QKXW3LDQ66P5,5.0,I purchased these for my husband who has every...,2013-02-20,purchase husband love reeses valentine day pre...
47771,77430,B00ID9VSOM,"Viva Labs Organic Coconut Sugar: Non-GMO, Low-...","['Grocery & Gourmet Food', 'Cooking & Baking',...",A2P3TGJU301KXD,5.0,this stuff is INCREDIBILY yummy! SO much bette...,2014-07-15,stuff incredibily yummy good regular brown sug...
47772,77456,B00IRL93SY,Barrie House Kenya Estate - AA Single Cup Caps...,"['Grocery & Gourmet Food', 'Beverages', 'Coffe...",AEFE9VDHTQ199,5.0,"Very nice aroma, body and taste! Will buy this...",2014-05-24,nice aroma body taste buy coffee good coffee a...
47773,77508,B00ISVHJ3Y,"Wholesome Sweeteners, Organic Sweet and Lite S...","['Grocery & Gourmet Food', 'Cooking & Baking',...",A2AEZQ3DGBBLPR,2.0,This is a no go for diabetics according to my ...,2014-06-26,diabetic accord wife doctor order intention us...


In [4]:
test.head().append(test.tail())

Unnamed: 0,index,asin,title,categories,reviewerID,overall,reviewText,reviewTime,processedReviewText
0,2,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A1TCSC0YWT82Q0,5.0,I love ethnic foods and to cook them. I recent...,2013-08-03,love ethnic food cook recently purchase produc...
1,8,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A1Z7Y2GMAP9SRY,5.0,I like to make my own curry but this is a tast...,2014-06-27,like curry tasty alternative use base kind dif...
2,23,B00004S1C5,"Ateco Food Coloring Kit, 6 colors","['Grocery & Gourmet Food', 'Cooking & Baking',...",A14YSMLYLJEMET,1.0,This product is no where near natural / organi...,2013-03-29,product near natural organic wish review purch...
3,31,B00005344V,Traditional Medicinals Organic Breathe Easy Se...,"['Grocery & Gourmet Food', 'Beverages', 'Coffe...",A2F488C4PLWGEI,5.0,If my wife drinks a cup of this tea when she f...,2014-03-23,wife drink cup tea feel attack come help avoid...
4,32,B00005344V,Traditional Medicinals Organic Breathe Easy Se...,"['Grocery & Gourmet Food', 'Beverages', 'Coffe...",AO1HXV7DWZZIR,5.0,I don't know about the medicinal aspects of th...,2014-02-06,know medicinal aspect tea flavor downright scr...
28001,77519,B00ISVHJ3Y,"Wholesome Sweeteners, Organic Sweet and Lite S...","['Grocery & Gourmet Food', 'Cooking & Baking',...",A1WT3TVHANP7ZF,3.0,Hmmm. I really wanted to love this sweetener. ...,2014-07-22,hmmm want love sweetener half sugar half stevi...
28002,77520,B00ISVHJ3Y,"Wholesome Sweeteners, Organic Sweet and Lite S...","['Grocery & Gourmet Food', 'Cooking & Baking',...",A3NEAETOSXDBOM,5.0,"I confess I have a sweet tooth, and love the t...",2014-06-30,confess sweet tooth love taste sugar recognize...
28003,77521,B00ISVHJ3Y,"Wholesome Sweeteners, Organic Sweet and Lite S...","['Grocery & Gourmet Food', 'Cooking & Baking',...",AD1ZOPB0BBEHB,4.0,"It has a little of the stevia aftertaste, but ...",2014-07-17,little stevia aftertaste fair compromise able ...
28004,77522,B00ISVHJ3Y,"Wholesome Sweeteners, Organic Sweet and Lite S...","['Grocery & Gourmet Food', 'Cooking & Baking',...",A18ECVX2RJ7HUE,5.0,i love marinade for grilled flank steak or lon...,2014-05-30,love marinade grilled flank steak london broil...
28005,77523,B00ISVHJ3Y,"Wholesome Sweeteners, Organic Sweet and Lite S...","['Grocery & Gourmet Food', 'Cooking & Baking',...",A2G04D4QZAXL15,3.0,I've been using Truvia (a form of stevia) on m...,2014-05-27,use truvia form stevia cereal greek yogurt yea...


In [5]:
# generating test history
test_user_history = (pd.DataFrame(test.groupby(['reviewerID'])['asin']
                                  .apply(list).reset_index()))
print(test_user_history)

                  reviewerID  \
0      A00177463W0XWB16A9O05   
1      A022899328A0QROR32DCT   
2      A068255029AHTHDXZURNU   
3      A06944662TFWOKKV4GJKX   
4             A1004703RC79J9   
...                      ...   
13274          AZWRZZAMX90VT   
13275          AZXKAH2DE6C8A   
13276          AZXON596A1VXC   
13277          AZYXC63SS008M   
13278          AZZ5ASC403N74   

                                                    asin  
0                               [B00474OR8G, B00BFM6OAW]  
1                                           [B00CMQDKES]  
2                                           [B001FA1K2G]  
3                                           [B000GFYRHG]  
4                                           [B003GTR8IO]  
...                                                  ...  
13274  [B0007R9L4M, B000CN7BMA, B001EQ5D1K, B002VT3GX...  
13275   [B000MAK41I, B004X8TJP2, B006H34CUS, B007W14RMM]  
13276                           [B001EO5S0I, B00271QQ7Q]  
13277                    

# Generate N-Recommendations = {5, 10, 15, 20}

## Instantiate FunkSVD (Matrix Factorization)

In [6]:
# instantiating funksvd 
funk_svd = algorithms.FunkMF(**PARAMS)

In [7]:
%%time
# fitting to the training data
funk_svd.fit(train)

Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
CPU times: user 576 ms, sys: 17.1 ms, total: 593 ms
Wall time: 597 ms


In [8]:
%%time
# creating testset for prediction
testset = funk_svd.trainset.build_anti_testset()

CPU times: user 33.3 s, sys: 1.82 s, total: 35.2 s
Wall time: 35.6 s


In [9]:
%%time
# generate candidate times
candidate_items = funk_svd.test(testset)

CPU times: user 7min 2s, sys: 1min 56s, total: 8min 59s
Wall time: 9min 51s


## Loop through N = {5, 10, 15, 20}

For each top-N setting, we will generate candidates items up to *N*-items and run metrics evaluation of `Recall@N` and `Novelty@N` on all users.

In [10]:
# generate item popularity
item_popularity = evaluate_model.generate_item_popularity(train)

In [11]:
%%time
n_recommendations = {}
for n in [5, 10, 15, 20]:
    # retrieve the top-n items based on similarities
    top_ns = funk_svd.get_top_n(n)
    # evaluate how well the recommended items predicted the future purchases
    n_recommended_items = (evaluate_model.
                           evaluate_recommendations(model_name="FUNK-SVD",
                                                    top_ns=top_ns,
                                                    user_rating_history=test_user_history, 
                                                    item_popularity=item_popularity, 
                                                    n=n,
                                                    mf_based=True))
    # saving the n-value and recommended items
    n_recommendations[n] = (top_ns, n_recommended_items)

The FUNK-SVD has an average recall@5: 0.00867, average novelty@5: 0.83229
The FUNK-SVD has an average recall@10: 0.01355, average novelty@10: 0.85758
The FUNK-SVD has an average recall@15: 0.01708, average novelty@15: 0.86792
The FUNK-SVD has an average recall@20: 0.02089, average novelty@20: 0.87420
CPU times: user 4min 11s, sys: 1min 55s, total: 6min 7s
Wall time: 7min 40s


# Evaluate N-Recommendations

For each top-N recommendation list, we pick a randomly sampled user to look at their *N*-number of recommendations based on their purchase history.

## N=5

In [12]:
top_ns_05 = n_recommendations[5][0]
utilities.retrieve_recommendations(train, top_ns_05, mf_based=True)

For user: A3R3SOV9T2GQUG:
Purchase History:
             asin                                              title
17110  B001CHFUDC  Coffee People Donut Shop Regular Medium Roast ...
20836  B001EO6ESS  Green Mountain Coffee Decaf Dark Magic, 24-Cou...

Recommending:

         asin                                              title
0  B000G82L62  Lundberg Family Farms Wild Blend Rice, 16 Ounc...
1  B00014JNI0  YS Organic Bee Farms CERTIFIED ORGANIC RAW HON...
2  B00DS842HS  Viva Naturals Organic Extra Virgin Coconut Oil...
3  B000GAT6NG  Nutiva Organic, Cold-Pressed, Unrefined, Virgi...
4  B000EDDS6Q  Bob's Red Mill Old Country Style Muesli Cereal...


## N=10

In [13]:
top_ns_10 = n_recommendations[10][0]
utilities.retrieve_recommendations(train, top_ns_10, mf_based=True)

For user: AT2IQUQOH1W4Q:
Purchase History:
             asin                                              title
21373  B001EQ4Z96  McCormick Culinary Imitation Banana Extract, 1...
29358  B002EY5TTW  Wild Planet, Wild Albacore Tuna, 5-Ounce Cans,...

Recommending:

         asin                                              title
0  B00014JNI0  YS Organic Bee Farms CERTIFIED ORGANIC RAW HON...
1  B00DS842HS  Viva Naturals Organic Extra Virgin Coconut Oil...
2  B001O1Q0NA  The Spice Lab Pink Himalayan Salt - 1 Pound X-...
3  B000EDG3UE  Bob's Red Mill Organic Grain Quinoa, 26 Ounce ...
4  B000S8593W           Nutiva Hempseeds, Shelled, Pouch 8.00 OZ
5  B0001M0Z6Q  Spicy World Peppercorn (Whole)-Black Tellicher...
6  B003OGKCDC  Nature's Way Organic Extra Virgin Coconut Oil-...
7  B000EDDS6Q  Bob's Red Mill Old Country Style Muesli Cereal...
8  B000HDJZWO  Enjoy Life Baking Chocolate, Soy free, Nut fre...
9  B000EDK5LM  Bob's Red Mill Vital Wheat Gluten Flour, 22-ou...


## N=15

In [14]:
top_ns_15 = n_recommendations[15][0]
utilities.retrieve_recommendations(train, top_ns_15, mf_based=True)

For user: A1F9Z42CFF9IAY:
Purchase History:
             asin                                              title
24784  B001M09AZS  Quaker Instant Oatmeal Cinnamon &amp; Spice, 1...
33996  B0045Z4JAI  Newman's Own Organics Newman's Special Decaf K...
35453  B004FELBH8  Newtons Fruit Thins Fig and Honey, 10.5-Ounce ...
37061  B004LA1LKI  Ritz Crackerful Multigrain Crackers, Sharp Che...
41144  B005K4Q1T0  Grove Square Hot Cocoa Dark Chocolate, 24 Sing...
42190  B005VOOQHS  Kool Aid Peach Mango Sugar Sweetened, 19-Ounce...
42541  B0061IUKDM  Higgins &amp; Burke, Green Tea, 20 Count (Pack...
43485  B006MONQMC  Vitamin Squeeze Energy Drink, Fruit Punch, 12-...
44735  B007JFXWRC  Lipton Herbal Pyramid Tea Bags, Blackberry Van...
45014  B007POT6VI         Quaker Instant Oats Variety Pack, 48-Count
45594  B008YUL4KI  Lipton Pyramid Tea Bags, Spiced Cinnamon Chai,...

Recommending:

          asin                                              title
0   B00014JNI0  YS Organic Bee Farms CERTIFIED

## N=20

In [15]:
top_ns_20 = n_recommendations[20][0]
utilities.retrieve_recommendations(train, top_ns_20, mf_based=True)

For user: A19XXJ5ZQCMA5B:
Purchase History:
             asin                                              title
13551  B000YPIL2K  Quaker Instant Grits Flavor Variety, 12-Count ...
19001  B001E5E24A   Medaglia D&#39;Oro Instant Espresso Coffee, 2...

Recommending:

          asin                                              title
0   B003OGKCDC  Nature's Way Organic Extra Virgin Coconut Oil-...
1   B00014JNI0  YS Organic Bee Farms CERTIFIED ORGANIC RAW HON...
2   B000Z93FQC               Y.S. Eco Bee Farms Raw Honey - 22 oz
3   B0001CXUHW                   Saf Instant Yeast, 1 Pound Pouch
4   B0025UCHRC   Barilla Plus Penne Pasta, 14.5 Ounce (Pack of 8)
5   B001E5E1WS  Rishi Tea Organic Silver Needle Jasmine Loose ...
6   B000HDJZWO  Enjoy Life Baking Chocolate, Soy free, Nut fre...
7   B000EDDS6Q  Bob's Red Mill Old Country Style Muesli Cereal...
8   B00DS842HS  Viva Naturals Organic Extra Virgin Coconut Oil...
9   B000F4D5GC  Let's Do Organic Shredded, Unsweetened Coconut...
10  B00

# Cross-Analysis for Cold-Start Users (<= 2 Purchased Items)

For each top-N setting, we will generate candidates items up to *N*-items and run metrics evaluation of `Recall@N` and `Novelty@N` on cold-start users (e.g., users who purchased two or less items based on items per user in the training set).

In [16]:
cold_start_users = utilities.generate_cold_start_users(train)

In [17]:
for n in tuple(zip([5, 10, 15, 20], [top_ns_05, top_ns_10, top_ns_15, top_ns_20])):
    cold_start_top_ns = dict(filter(lambda x: x[0] in cold_start_users, n[1].items()))
    # evaluate how well the recommended items predicted the future purchases
    # on cold start users
    n_recommended_items = (evaluate_model.
                           evaluate_recommendations(model_name="FUNK-SVD",
                                                    top_ns=cold_start_top_ns,
                                                    user_rating_history=test_user_history, 
                                                    item_popularity=item_popularity, 
                                                    n=n[0],
                                                    mf_based=True))

The FUNK-SVD has an average recall@5: 0.00923, average novelty@5: 0.83225
The FUNK-SVD has an average recall@10: 0.01385, average novelty@10: 0.85747
The FUNK-SVD has an average recall@15: 0.01739, average novelty@15: 0.86795
The FUNK-SVD has an average recall@20: 0.02174, average novelty@20: 0.87419


# Generating Recommended Items DataFrame

In [18]:
max_recommendations = (utilities
                       .generate_recommendations_df(train=train,
                                                    n_recommendations=n_recommendations,
                                                    algo_name="FUNK-SVD",
                                                    mf_based=True,
                                                    max_recommended=20))

In [19]:
max_recommendations

Unnamed: 0,reviewerID,item_rank,asin,algorithm,title
0,A00177463W0XWB16A9O05,0,B000EDDS6Q,FUNK-SVD,Bob's Red Mill Old Country Style Muesli Cereal...
1,A00177463W0XWB16A9O05,1,B00014JNI0,FUNK-SVD,YS Organic Bee Farms CERTIFIED ORGANIC RAW HON...
2,A00177463W0XWB16A9O05,2,B00DS842HS,FUNK-SVD,Viva Naturals Organic Extra Virgin Coconut Oil...
3,A00177463W0XWB16A9O05,3,B000F4DKAI,FUNK-SVD,Twinings of London English Afternoon Black Tea...
4,A00177463W0XWB16A9O05,4,B00271OPVU,FUNK-SVD,"Coombs Family Farms Maple Syrup, Organic, Gra..."
...,...,...,...,...,...
267935,AZZ5ASC403N74,15,B0001M0Z6Q,FUNK-SVD,Spicy World Peppercorn (Whole)-Black Tellicher...
267936,AZZ5ASC403N74,16,B0025UCHS6,FUNK-SVD,"Barilla Plus Pasta, 14.5 Ounce"
267937,AZZ5ASC403N74,17,B0025UCI0I,FUNK-SVD,"Barilla Whole Grain Spaghetti, 13.25 oz(Pack o..."
267938,AZZ5ASC403N74,18,B000S8593W,FUNK-SVD,"Nutiva Hempseeds, Shelled, Pouch 8.00 OZ"


# Store in `SQLite` DB

In [20]:
# engine = create_engine("sqlite:///recommender.db", echo=True)

In [21]:
# max_recommendations.to_sql(f"{CATEGORY}", con=engine, if_exists="append")