In [1]:
from pathlib import Path
import pickle
import warnings

import numpy as np
import pandas as pd

from surprise import Dataset, Reader, dump
from tqdm import tqdm

from src.models import cf, evaluate_model
from src.utilities import utilities

tqdm.pandas()
warnings.filterwarnings('ignore')



# Load Data

In [2]:
# global variables
DATA_PATH = "data/evaluation"
CATEGORY = "Grocery_and_Gourmet_Food"
MODEL_PATH = Path(f"models/funk_svd/funk_svd_{CATEGORY}.pkl")

# training parameters
N_EPOCHS = 15
LR_ALL = 0.01
BETA = 0.1

# reproducibility checks
SEED = 42
np.random.seed(42)

train = pd.read_csv(f"{DATA_PATH}/{CATEGORY}_train.csv")

In [3]:
# checking train dataframe
train.head().append(train.tail())

Unnamed: 0,index,asin,title,categories,reviewerID,overall,reviewText,reviewTime,processedReviewText
0,0,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A23RYWDS884TUL,5.0,This curry paste makes a delicious curry. I j...,2013-05-28,curry paste delicious curry fry chicken vegeta...
1,1,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A945RBQWGZXCK,5.0,I've purchased different curries in the grocer...,2012-09-17,purchase different curry grocery store complet...
2,3,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A3AMNY44OP8AOU,4.0,I started a new diet restricting all added sug...,2014-01-23,start new diet restrict added sugar brand suga...
3,4,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A3IB4CQ2QEJLJ8,5.0,So many flavors. I can't begin to tell you how...,2014-04-27,flavor begin tell love mae ploy curry ask reci...
4,5,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",AQA5DF3RWKETQ,5.0,I've used this a lot recently in some of my ch...,2012-11-27,use lot recently chicken dish use lot like spi...
47769,77420,B00I33696K,Reese's Miniature Peanut Butter Cups .31oz - 1...,"['Grocery & Gourmet Food', 'Candy & Chocolate'...",A192LQZWDYPR4U,5.0,Another quality Reese Peanut Butter Cup produc...,2014-02-27,quality reese peanut butter cup product great ...
47770,77421,B00I33696K,Reese's Miniature Peanut Butter Cups .31oz - 1...,"['Grocery & Gourmet Food', 'Candy & Chocolate'...",A2QKXW3LDQ66P5,5.0,I purchased these for my husband who has every...,2013-02-20,purchase husband love reeses valentine day pre...
47771,77430,B00ID9VSOM,"Viva Labs Organic Coconut Sugar: Non-GMO, Low-...","['Grocery & Gourmet Food', 'Cooking & Baking',...",A2P3TGJU301KXD,5.0,this stuff is INCREDIBILY yummy! SO much bette...,2014-07-15,stuff incredibily yummy good regular brown sug...
47772,77456,B00IRL93SY,Barrie House Kenya Estate - AA Single Cup Caps...,"['Grocery & Gourmet Food', 'Beverages', 'Coffe...",AEFE9VDHTQ199,5.0,"Very nice aroma, body and taste! Will buy this...",2014-05-24,nice aroma body taste buy coffee good coffee a...
47773,77508,B00ISVHJ3Y,"Wholesome Sweeteners, Organic Sweet and Lite S...","['Grocery & Gourmet Food', 'Cooking & Baking',...",A2AEZQ3DGBBLPR,2.0,This is a no go for diabetics according to my ...,2014-06-26,diabetic accord wife doctor order intention us...


# Utility Functions

# Generate N-Recommendations = {10, 25, 30, 45}

## Load Test Data

In [4]:
test = pd.read_csv(f"{DATA_PATH}/{CATEGORY}_test.csv")

In [5]:
test.head().append(test.tail())

Unnamed: 0,index,asin,title,categories,reviewerID,overall,reviewText,reviewTime,processedReviewText
0,2,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A1TCSC0YWT82Q0,5.0,I love ethnic foods and to cook them. I recent...,2013-08-03,love ethnic food cook recently purchase produc...
1,8,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A1Z7Y2GMAP9SRY,5.0,I like to make my own curry but this is a tast...,2014-06-27,like curry tasty alternative use base kind dif...
2,23,B00004S1C5,"Ateco Food Coloring Kit, 6 colors","['Grocery & Gourmet Food', 'Cooking & Baking',...",A14YSMLYLJEMET,1.0,This product is no where near natural / organi...,2013-03-29,product near natural organic wish review purch...
3,31,B00005344V,Traditional Medicinals Organic Breathe Easy Se...,"['Grocery & Gourmet Food', 'Beverages', 'Coffe...",A2F488C4PLWGEI,5.0,If my wife drinks a cup of this tea when she f...,2014-03-23,wife drink cup tea feel attack come help avoid...
4,32,B00005344V,Traditional Medicinals Organic Breathe Easy Se...,"['Grocery & Gourmet Food', 'Beverages', 'Coffe...",AO1HXV7DWZZIR,5.0,I don't know about the medicinal aspects of th...,2014-02-06,know medicinal aspect tea flavor downright scr...
28001,77519,B00ISVHJ3Y,"Wholesome Sweeteners, Organic Sweet and Lite S...","['Grocery & Gourmet Food', 'Cooking & Baking',...",A1WT3TVHANP7ZF,3.0,Hmmm. I really wanted to love this sweetener. ...,2014-07-22,hmmm want love sweetener half sugar half stevi...
28002,77520,B00ISVHJ3Y,"Wholesome Sweeteners, Organic Sweet and Lite S...","['Grocery & Gourmet Food', 'Cooking & Baking',...",A3NEAETOSXDBOM,5.0,"I confess I have a sweet tooth, and love the t...",2014-06-30,confess sweet tooth love taste sugar recognize...
28003,77521,B00ISVHJ3Y,"Wholesome Sweeteners, Organic Sweet and Lite S...","['Grocery & Gourmet Food', 'Cooking & Baking',...",AD1ZOPB0BBEHB,4.0,"It has a little of the stevia aftertaste, but ...",2014-07-17,little stevia aftertaste fair compromise able ...
28004,77522,B00ISVHJ3Y,"Wholesome Sweeteners, Organic Sweet and Lite S...","['Grocery & Gourmet Food', 'Cooking & Baking',...",A18ECVX2RJ7HUE,5.0,i love marinade for grilled flank steak or lon...,2014-05-30,love marinade grilled flank steak london broil...
28005,77523,B00ISVHJ3Y,"Wholesome Sweeteners, Organic Sweet and Lite S...","['Grocery & Gourmet Food', 'Cooking & Baking',...",A2G04D4QZAXL15,3.0,I've been using Truvia (a form of stevia) on m...,2014-05-27,use truvia form stevia cereal greek yogurt yea...


In [6]:
# generating test history
test_user_history = (pd.DataFrame(test.groupby(['reviewerID'])['asin']
                                  .apply(list).reset_index()))

In [7]:
print(test_user_history)

                  reviewerID  \
0      A00177463W0XWB16A9O05   
1      A022899328A0QROR32DCT   
2      A068255029AHTHDXZURNU   
3      A06944662TFWOKKV4GJKX   
4             A1004703RC79J9   
...                      ...   
13274          AZWRZZAMX90VT   
13275          AZXKAH2DE6C8A   
13276          AZXON596A1VXC   
13277          AZYXC63SS008M   
13278          AZZ5ASC403N74   

                                                    asin  
0                               [B00474OR8G, B00BFM6OAW]  
1                                           [B00CMQDKES]  
2                                           [B001FA1K2G]  
3                                           [B000GFYRHG]  
4                                           [B003GTR8IO]  
...                                                  ...  
13274  [B0007R9L4M, B000CN7BMA, B001EQ5D1K, B002VT3GX...  
13275   [B000MAK41I, B004X8TJP2, B006H34CUS, B007W14RMM]  
13276                           [B001EO5S0I, B00271QQ7Q]  
13277                    

## Instantiate FunkSVD (Matrix Factorization)

In [8]:
# instantiating funksvd 
funk_svd = cf.FunkMF(n_epochs=N_EPOCHS, 
                     lr_all=LR_ALL, 
                     reg_all=BETA)

In [9]:
# fitting to the training data
funk_svd.fit(train)

Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 10
Processing epoch 11
Processing epoch 12
Processing epoch 13
Processing epoch 14


In [10]:
%%time
# creating testset for prediction
testset = funk_svd.trainset.build_anti_testset()

CPU times: user 30.8 s, sys: 1.25 s, total: 32.1 s
Wall time: 32.2 s


In [11]:
%%time
# generate candidate times
candidate_items = funk_svd.test(testset)

CPU times: user 6min 52s, sys: 1min 46s, total: 8min 38s
Wall time: 9min 14s


## Save Model

* Not adviced to save model due to long persistence time required to save the model.

In [12]:
# MODEL_PATH.parent.mkdir(parents=True, exist_ok=True)
# dump.dump(MODEL_PATH, algo=funk_svd)

In [13]:
# load model
# _, funk_svd = dump.load(MODEL_PATH)

## Loop through N = {10, 25, 30, 45}

In [14]:
# generate item popularity
item_popularity = evaluate_model.generate_item_popularity(train)

In [15]:
%%time
n_recommendations = {}
for n in [10, 25, 30, 45]:
    # retrieve the top-n items based on similarities
    # top_ns = get_top_n(candidate_items, n)
    top_ns = funk_svd.get_top_n(n)
    # evaluate how well the recommended items predicted the future purchases
    n_recommended_items = (evaluate_model.
                           evaluate_recommendations(model_name = 'FUNK-SVD',
                                                    top_ns = top_ns,
                                                    user_rating_history = test_user_history, 
                                                    item_popularity = item_popularity, 
                                                    n = n,
                                                    mf_based = True))
    # saving the n-value and recommended items
    n_recommendations[n] = (top_ns, n_recommended_items)

The FUNK-SVD has an average recall@10: 0.00349, average novelty@10: 0.94647
The FUNK-SVD has an average recall@25: 0.01046, average novelty@25: 0.94908
The FUNK-SVD has an average recall@30: 0.01257, average novelty@30: 0.94992
The FUNK-SVD has an average recall@45: 0.01875, average novelty@45: 0.95206
CPU times: user 4min 10s, sys: 1min 48s, total: 5min 59s
Wall time: 7min 19s


# Evaluate N-Recommendations

## N=10

In [16]:
top_ns_10 = n_recommendations[10][0]
utilities.retrieve_recommendations(train, top_ns_10, mf_based=True)

For user: A3R3SOV9T2GQUG:
Purchase History:
             asin                                              title
17110  B001CHFUDC  Coffee People Donut Shop Regular Medium Roast ...
20836  B001EO6ESS  Green Mountain Coffee Decaf Dark Magic, 24-Cou...

Recommending:

         asin                                              title
0  B000G82L62  Lundberg Family Farms Wild Blend Rice, 16 Ounc...
1  B000JMAVYO  Spicy World Almonds Whole (Natural and Raw), 4...
2  B001PEWJWC  Garbanzo Beans aka Chickpeas or Ceci Beans | N...
3  B0042PB70Q  La Tourangelle Organic Sunflower Oil, 16.9-Oun...
4  B000VK4F5A  Kinnikinnick Gluten Free Animal Cookies, 8 Oun...
5  B000EDM6BO  Bob's Red Mill Whole Grain Teff, 24-ounce (Pac...
6  B002EY5TTW  Wild Planet, Wild Albacore Tuna, 5-Ounce Cans,...
7  B0029JEJR0  Dove Dark Chocolate Almond Promises, 8.5-Ounce...
8  B001LQWO8I  Stakich RAW HONEY - 100% Pure, Unprocessed, Un...
9  B000216O16   Authentic Foods Superfine White Rice Flour - 3lb


## N=25

In [17]:
top_ns_25 = n_recommendations[25][0]
utilities.retrieve_recommendations(train, top_ns_25, mf_based=True)

For user: AT2IQUQOH1W4Q:
Purchase History:
             asin                                              title
21373  B001EQ4Z96  McCormick Culinary Imitation Banana Extract, 1...
29358  B002EY5TTW  Wild Planet, Wild Albacore Tuna, 5-Ounce Cans,...

Recommending:

          asin                                              title
0   B00271QQ7Q  Coombs Family Farms 100% Pure Organic Maple Sy...
1   B000EDK5LM  Bob's Red Mill Vital Wheat Gluten Flour, 22-ou...
2   B000216O16   Authentic Foods Superfine White Rice Flour - 3lb
3   B000S8593W           Nutiva Hempseeds, Shelled, Pouch 8.00 OZ
4   B003OGKCDC  Nature's Way Organic Extra Virgin Coconut Oil-...
5   B0029J6QLM         Snickers Dark Chocolate Candy (Pack of 24)
6   B001PEWJWC  Garbanzo Beans aka Chickpeas or Ceci Beans | N...
7   B000EDM6BO  Bob's Red Mill Whole Grain Teff, 24-ounce (Pac...
8   B004AFODLI  Kodiak Cakes All Natural Frontier Pancake, Fla...
9   B001LQWO8I  Stakich RAW HONEY - 100% Pure, Unprocessed, Un...
10  B000

## N=30

In [18]:
top_ns_30 = n_recommendations[30][0]
utilities.retrieve_recommendations(train, top_ns_30, mf_based=True)

For user: A1F9Z42CFF9IAY:
Purchase History:
             asin                                              title
24784  B001M09AZS  Quaker Instant Oatmeal Cinnamon &amp; Spice, 1...
33996  B0045Z4JAI  Newman's Own Organics Newman's Special Decaf K...
35453  B004FELBH8  Newtons Fruit Thins Fig and Honey, 10.5-Ounce ...
37061  B004LA1LKI  Ritz Crackerful Multigrain Crackers, Sharp Che...
41144  B005K4Q1T0  Grove Square Hot Cocoa Dark Chocolate, 24 Sing...
42190  B005VOOQHS  Kool Aid Peach Mango Sugar Sweetened, 19-Ounce...
42541  B0061IUKDM  Higgins &amp; Burke, Green Tea, 20 Count (Pack...
43485  B006MONQMC  Vitamin Squeeze Energy Drink, Fruit Punch, 12-...
44735  B007JFXWRC  Lipton Herbal Pyramid Tea Bags, Blackberry Van...
45014  B007POT6VI         Quaker Instant Oats Variety Pack, 48-Count
45594  B008YUL4KI  Lipton Pyramid Tea Bags, Spiced Cinnamon Chai,...

Recommending:

          asin                                              title
0   B000JMAVYO  Spicy World Almonds Whole (Nat

## N=45

In [19]:
top_ns_45 = n_recommendations[45][0]
utilities.retrieve_recommendations(train, top_ns_45, mf_based=True)

For user: A19XXJ5ZQCMA5B:
Purchase History:
             asin                                              title
13551  B000YPIL2K  Quaker Instant Grits Flavor Variety, 12-Count ...
19001  B001E5E24A   Medaglia D&#39;Oro Instant Espresso Coffee, 2...

Recommending:

          asin                                              title
0   B000216O16   Authentic Foods Superfine White Rice Flour - 3lb
1   B0029J6QLM         Snickers Dark Chocolate Candy (Pack of 24)
2   B0021491QM  Ortiz Bonito Del Norte - White Tuna in Olive O...
3   B000JMAVYO  Spicy World Almonds Whole (Natural and Raw), 4...
4   B001E50WDA  SPLENDA No Calorie Sweetener Granular, 9.7-Oun...
5   B000EICISA       Mae Ploy Thai Yellow Curry Paste - 14 oz jar
6   B000EVE3YE  Glutino Gluten Free Pantry Muffin Mix, 15-Ounc...
7   B0029JEJR0  Dove Dark Chocolate Almond Promises, 8.5-Ounce...
8   B001PF1846  Green Split Peas | Non-GMO Project Verified | ...
9   B003OGKCDC  Nature's Way Organic Extra Virgin Coconut Oil-...
10  B00