In [1]:
from collections import defaultdict

# data manipulation
import numpy as np
import pandas as pd
import warnings

# db, recommender algorithm, progress bar
from sqlalchemy import create_engine
from surprise import Dataset, Reader, NormalPredictor
from tqdm import tqdm

# custom functions
from src.models import evaluate_model
from src.utilities import utilities

tqdm.pandas()
warnings.filterwarnings('ignore')



# Load Data

In [2]:
# global variables
DATA_PATH = "data/evaluation"
CATEGORY = "Grocery_and_Gourmet_Food"

# reproducibility checks
SEED = 42
np.random.seed(42)

train = pd.read_csv(f"{DATA_PATH}/{CATEGORY}_train.csv")

In [3]:
# checking train dataframe
train.head().append(train.tail())

Unnamed: 0,index,asin,title,categories,reviewerID,overall,reviewText,reviewTime,processedReviewText
0,0,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A23RYWDS884TUL,5.0,This curry paste makes a delicious curry. I j...,2013-05-28,curry paste delicious curry fry chicken vegeta...
1,1,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A945RBQWGZXCK,5.0,I've purchased different curries in the grocer...,2012-09-17,purchase different curry grocery store complet...
2,3,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A3AMNY44OP8AOU,4.0,I started a new diet restricting all added sug...,2014-01-23,start new diet restrict added sugar brand suga...
3,4,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A3IB4CQ2QEJLJ8,5.0,So many flavors. I can't begin to tell you how...,2014-04-27,flavor begin tell love mae ploy curry ask reci...
4,5,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",AQA5DF3RWKETQ,5.0,I've used this a lot recently in some of my ch...,2012-11-27,use lot recently chicken dish use lot like spi...
47769,77420,B00I33696K,Reese's Miniature Peanut Butter Cups .31oz - 1...,"['Grocery & Gourmet Food', 'Candy & Chocolate'...",A192LQZWDYPR4U,5.0,Another quality Reese Peanut Butter Cup produc...,2014-02-27,quality reese peanut butter cup product great ...
47770,77421,B00I33696K,Reese's Miniature Peanut Butter Cups .31oz - 1...,"['Grocery & Gourmet Food', 'Candy & Chocolate'...",A2QKXW3LDQ66P5,5.0,I purchased these for my husband who has every...,2013-02-20,purchase husband love reeses valentine day pre...
47771,77430,B00ID9VSOM,"Viva Labs Organic Coconut Sugar: Non-GMO, Low-...","['Grocery & Gourmet Food', 'Cooking & Baking',...",A2P3TGJU301KXD,5.0,this stuff is INCREDIBILY yummy! SO much bette...,2014-07-15,stuff incredibily yummy good regular brown sug...
47772,77456,B00IRL93SY,Barrie House Kenya Estate - AA Single Cup Caps...,"['Grocery & Gourmet Food', 'Beverages', 'Coffe...",AEFE9VDHTQ199,5.0,"Very nice aroma, body and taste! Will buy this...",2014-05-24,nice aroma body taste buy coffee good coffee a...
47773,77508,B00ISVHJ3Y,"Wholesome Sweeteners, Organic Sweet and Lite S...","['Grocery & Gourmet Food', 'Cooking & Baking',...",A2AEZQ3DGBBLPR,2.0,This is a no go for diabetics according to my ...,2014-06-26,diabetic accord wife doctor order intention us...


# Utility Function

In [4]:
def get_top_n(predictions, n=10):
    """Return the top-N recommendation for each user from a set of predictions.

    Args:
        predictions(list of Prediction objects): The list of predictions, as
            returned by the test method of an algorithm.
        n(int): The number of recommendation to output for each user. Default
            is 10.

    Returns:
    A dict where keys are user (raw) ids and values are lists of tuples:
        [(raw item id, rating estimation), ...] of size n.
    """

    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

# Generate N-Recommendations = {10, 25, 30, 45}

## Load Test Data

In [5]:
test = pd.read_csv(f"{DATA_PATH}/{CATEGORY}_test.csv")

In [6]:
test.head().append(test.tail())

Unnamed: 0,index,asin,title,categories,reviewerID,overall,reviewText,reviewTime,processedReviewText
0,2,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A1TCSC0YWT82Q0,5.0,I love ethnic foods and to cook them. I recent...,2013-08-03,love ethnic food cook recently purchase produc...
1,8,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A1Z7Y2GMAP9SRY,5.0,I like to make my own curry but this is a tast...,2014-06-27,like curry tasty alternative use base kind dif...
2,23,B00004S1C5,"Ateco Food Coloring Kit, 6 colors","['Grocery & Gourmet Food', 'Cooking & Baking',...",A14YSMLYLJEMET,1.0,This product is no where near natural / organi...,2013-03-29,product near natural organic wish review purch...
3,31,B00005344V,Traditional Medicinals Organic Breathe Easy Se...,"['Grocery & Gourmet Food', 'Beverages', 'Coffe...",A2F488C4PLWGEI,5.0,If my wife drinks a cup of this tea when she f...,2014-03-23,wife drink cup tea feel attack come help avoid...
4,32,B00005344V,Traditional Medicinals Organic Breathe Easy Se...,"['Grocery & Gourmet Food', 'Beverages', 'Coffe...",AO1HXV7DWZZIR,5.0,I don't know about the medicinal aspects of th...,2014-02-06,know medicinal aspect tea flavor downright scr...
28001,77519,B00ISVHJ3Y,"Wholesome Sweeteners, Organic Sweet and Lite S...","['Grocery & Gourmet Food', 'Cooking & Baking',...",A1WT3TVHANP7ZF,3.0,Hmmm. I really wanted to love this sweetener. ...,2014-07-22,hmmm want love sweetener half sugar half stevi...
28002,77520,B00ISVHJ3Y,"Wholesome Sweeteners, Organic Sweet and Lite S...","['Grocery & Gourmet Food', 'Cooking & Baking',...",A3NEAETOSXDBOM,5.0,"I confess I have a sweet tooth, and love the t...",2014-06-30,confess sweet tooth love taste sugar recognize...
28003,77521,B00ISVHJ3Y,"Wholesome Sweeteners, Organic Sweet and Lite S...","['Grocery & Gourmet Food', 'Cooking & Baking',...",AD1ZOPB0BBEHB,4.0,"It has a little of the stevia aftertaste, but ...",2014-07-17,little stevia aftertaste fair compromise able ...
28004,77522,B00ISVHJ3Y,"Wholesome Sweeteners, Organic Sweet and Lite S...","['Grocery & Gourmet Food', 'Cooking & Baking',...",A18ECVX2RJ7HUE,5.0,i love marinade for grilled flank steak or lon...,2014-05-30,love marinade grilled flank steak london broil...
28005,77523,B00ISVHJ3Y,"Wholesome Sweeteners, Organic Sweet and Lite S...","['Grocery & Gourmet Food', 'Cooking & Baking',...",A2G04D4QZAXL15,3.0,I've been using Truvia (a form of stevia) on m...,2014-05-27,use truvia form stevia cereal greek yogurt yea...


In [7]:
# generating test history
test_user_history = (pd.DataFrame(test.groupby(['reviewerID'])['asin']
                                  .apply(list).reset_index()))

In [8]:
print(test_user_history)

                  reviewerID  \
0      A00177463W0XWB16A9O05   
1      A022899328A0QROR32DCT   
2      A068255029AHTHDXZURNU   
3      A06944662TFWOKKV4GJKX   
4             A1004703RC79J9   
...                      ...   
13274          AZWRZZAMX90VT   
13275          AZXKAH2DE6C8A   
13276          AZXON596A1VXC   
13277          AZYXC63SS008M   
13278          AZZ5ASC403N74   

                                                    asin  
0                               [B00474OR8G, B00BFM6OAW]  
1                                           [B00CMQDKES]  
2                                           [B001FA1K2G]  
3                                           [B000GFYRHG]  
4                                           [B003GTR8IO]  
...                                                  ...  
13274  [B0007R9L4M, B000CN7BMA, B001EQ5D1K, B002VT3GX...  
13275   [B000MAK41I, B004X8TJP2, B006H34CUS, B007W14RMM]  
13276                           [B001EO5S0I, B00271QQ7Q]  
13277                    

# Preparing Dataset for Surprise's Algorithm

In [9]:
# create reader
reader = Reader(rating_scale=(1,5))
# generate data required for surprise
data = Dataset.load_from_df(train[["reviewerID", "asin", "overall"]], reader)
# generating trainset
trainset = data.build_full_trainset()

# Instantiate Random Recommender (Topic Modelling)

In [10]:
random = NormalPredictor()

In [11]:
random.fit(trainset)

<surprise.prediction_algorithms.random_pred.NormalPredictor at 0x131b6ff10>

In [12]:
%%time
# generate candidate items for user to predict rating
testset = trainset.build_anti_testset()

CPU times: user 35.8 s, sys: 1.64 s, total: 37.5 s
Wall time: 37.9 s


In [13]:
%%time
# predict ratings for all pairs (u, i) that are NOT in the training set
candidate_items = random.test(testset, verbose=False)

CPU times: user 6min 45s, sys: 1min 54s, total: 8min 39s
Wall time: 9min 16s


## Loop through N = {10, 25, 30, 45}

In [14]:
# generate item popularity
item_popularity = evaluate_model.generate_item_popularity(train)

In [15]:
n_recommendations = {}
for n in [10, 25, 30, 45]:
    # retrieve the top-n items based on similarities
    top_ns = get_top_n(candidate_items, n)
    # evaluate how well the recommended items predicted the future purchases
    n_recommended_items = (evaluate_model.
                           evaluate_recommendations(model_name = 'RANDOM',
                                                    top_ns = top_ns,
                                                    user_rating_history = test_user_history, 
                                                    item_popularity = item_popularity, 
                                                    n = n,
                                                    mf_based = True))
    # saving the n-value and recommended items
    n_recommendations[n] = (top_ns, n_recommended_items)

The RANDOM has an average recall@10: 0.00046, average novelty@10: 0.95775
The RANDOM has an average recall@25: 0.00166, average novelty@25: 0.95966
The RANDOM has an average recall@30: 0.00213, average novelty@30: 0.96197
The RANDOM has an average recall@45: 0.00376, average novelty@45: 0.96666


# Evaluate N-Recommendations

## N=10

In [16]:
top_ns_10 = n_recommendations[10][0]
utilities.retrieve_recommendations(train, top_ns_10, mf_based=True)

For user: A3BIR2418I0M3N:
Purchase History:
             asin                                              title
1117   B0001M0Z6Q  Spicy World Peppercorn (Whole)-Black Tellicher...
6127   B000EVMNMI        Haribo Gummi Candy, Happy-Cola, 5-Pound Bag
9746   B000HVX6NK  Sea's Gift Korean Seaweed Snack (Kim Nori), Ro...
10086  B000J4IDO2  Coffee People K-Cups, Black Tiger Extra Bold, ...
10249  B000JZEABG  Black Forest Gummy Bears Ferrara Candy, Natura...
17529  B001D0IZBM                     Grocery &amp; Gourmet Food" />
23972  B001KTA03C  Green Mountain Coffee Double Black Diamond, 24...

Recommending:

         asin                                              title
0  9742356831                  Mae Ploy Green Curry Paste, 14 oz
1  B00004S1C5                  Ateco Food Coloring Kit, 6 colors
2  B0000DID5R           Dave's Original Insanity Hot Sauce - 5oz
3  B0000GHNT0  El Yucateco Chile Habanero Hot Sauce Bottle, R...
4  B0000IJYK4  Maseca Instant Yellow Corn Masa Flour 4.84lb |..

## N=25

In [17]:
top_ns_25 = n_recommendations[25][0]
utilities.retrieve_recommendations(train, top_ns_25, mf_based=True)

For user: A310AONCWMN6MV:
Purchase History:
            asin                                              title
9516  B000HDJZWO  Enjoy Life Baking Chocolate, Soy free, Nut fre...

Recommending:

          asin                                              title
0   B00005344V  Traditional Medicinals Organic Breathe Easy Se...
1   B0000CNU15  Lee Kum Kee Chiu Chow Chili Oil net wt. 205g (...
2   B0000DBN1L               Tazo Organic Chai, 24 Tea Bags 2.3oz
3   B0000DID5R           Dave's Original Insanity Hot Sauce - 5oz
4   B0000GHNT0  El Yucateco Chile Habanero Hot Sauce Bottle, R...
5   B0000SXEN2      Sans Sucre Chocolate Mousse Mix - Gluten Free
6   B00012182G                                 Fresh Whole Rabbit
7   B0001590IC    Freshpak Rooibos Tea 80 Tagless Bags (2 X Pack)
8   B00015UC8O                          Barley Malt Powder, 1 lb.
9   B00015YTU6                              Quinoa Berries, 1 lb.
10  B00016AQRU  Traditional Medicinal Organic Ginger with Cham...
11  B00016JG

## N=30

In [18]:
top_ns_30 = n_recommendations[30][0]
utilities.retrieve_recommendations(train, top_ns_30, mf_based=True)

For user: A1U90MYVSLLFI1:
Purchase History:
             asin                                              title
23887  B001J9QBU4                     Grocery &amp; Gourmet Food" />
40633  B005HGAVD8  Crazy Cups Wolfgang Puck Sampler,  K-Cup Porti...
41039  B005K4Q1T0  Grove Square Hot Cocoa Dark Chocolate, 24 Sing...
42212  B005XB145Q         Tazo Zen Green Tea Keurig K-Cups, 16 Count
46460  B00AP7VGNI  GEVALIA Kaffe Signature Blend K-CUP Pods - 12 ...

Recommending:

          asin                                              title
0   B00005344V  Traditional Medicinals Organic Breathe Easy Se...
1   B0000CNU15  Lee Kum Kee Chiu Chow Chili Oil net wt. 205g (...
2   B0000DI085  Simply Organic Almond Extract, Certified Organ...
3   B0000DID5R           Dave's Original Insanity Hot Sauce - 5oz
4   B0000E5JIU      Cafe Du Monde Coffee Chicory, 15 Ounce Ground
5   B0000GHNT0  El Yucateco Chile Habanero Hot Sauce Bottle, R...
6   B00014JNI0  YS Organic Bee Farms CERTIFIED ORGANIC RAW HON..

## N=45

In [19]:
top_ns_45 = n_recommendations[45][0]
utilities.retrieve_recommendations(train, top_ns_45, mf_based=True)

For user: A3EAAFGS0DU8R6:
Purchase History:
             asin                                              title
16531  B001ACMCNA  Chebe Bread Pizza Crust Mix, Gluten Free, 7.5-...
16562  B001ACNWUC   Chebe Bread Focaccia Flat Bread Mix, Gluten F...
34472  B004AHCGI8  Pomi Tomatoes, Strained, 26.46-Ounce Carton (P...

Recommending:

          asin                                              title
0   B0000CDEPD      Rodelle Organic Pure Vanilla Extract, 4-Ounce
1   B0000CNU1S                   Dynasty Five Spices Powder, 2 oz
2   B0000CNU1X  Hime Powdered Wasabi (Japanese Horseradish) - ...
3   B0000DBN1L               Tazo Organic Chai, 24 Tea Bags 2.3oz
4   B0000DI085  Simply Organic Almond Extract, Certified Organ...
5   B0000DID5R           Dave's Original Insanity Hot Sauce - 5oz
6   B0000GL6RK         Taj&iacute;n Cl&aacute;sico Seasoning 5 oz
7   B0000V09L4  Igourmet Cheddars of the World Assortment, 2-P...
8   B00012182G                                 Fresh Whole Rabbit
9   

# Cross-Analysis for Cold-Start Users (<= 2 Purchased Items)

In [20]:
cold_start_users = utilities.generate_cold_start_users(train)

In [21]:
for n in tuple(zip([10, 25, 30, 45], [top_ns_10, top_ns_25, top_ns_30, top_ns_45])):
    cold_start_top_ns = dict(filter(lambda x: x[0] in cold_start_users, n[1].items()))
    # evaluate how well the recommended items predicted the future purchases
    # on cold start users
    n_recommended_items = (evaluate_model.
                           evaluate_recommendations(model_name = 'RANDOM',
                                                    top_ns = cold_start_top_ns,
                                                    user_rating_history = test_user_history, 
                                                    item_popularity = item_popularity, 
                                                    n = n[0],
                                                    mf_based = True))

The RANDOM has an average recall@10: 0.00020, average novelty@10: 0.95770
The RANDOM has an average recall@25: 0.00094, average novelty@25: 0.95963
The RANDOM has an average recall@30: 0.00134, average novelty@30: 0.96195
The RANDOM has an average recall@45: 0.00321, average novelty@45: 0.96664


# Generating Recommended Items DataFrame

In [22]:
max_recommendations = (utilities
                       .generate_recommendations_df(
                           train, 
                           n_recommendations, 
                           "RANDOM", mf_based=True))

In [23]:
max_recommendations

Unnamed: 0,reviewerID,item_rank,asin,algorithm,title
0,A00177463W0XWB16A9O05,0,B0000CNU15,RANDOM,Lee Kum Kee Chiu Chow Chili Oil net wt. 205g (...
1,A00177463W0XWB16A9O05,1,B0000GHNT0,RANDOM,"El Yucateco Chile Habanero Hot Sauce Bottle, R..."
2,A00177463W0XWB16A9O05,2,B0000TA3SK,RANDOM,Wright's Natural Hickory Seasoning Liquid Smok...
3,A00177463W0XWB16A9O05,3,B0000W0GQQ,RANDOM,Nielsen-Massey Madagascar Bourbon Vanilla Bean...
4,A00177463W0XWB16A9O05,4,B00014JNI0,RANDOM,YS Organic Bee Farms CERTIFIED ORGANIC RAW HON...
...,...,...,...,...,...
602860,AZZ5ASC403N74,40,B000BIQLL8,RANDOM,Kikkoman Instant Tofu Miso Soup (Soybean Paste...
602861,AZZ5ASC403N74,41,B000BLNU6E,RANDOM,Grand Ghirardelli Chocolate Gift Basket
602862,AZZ5ASC403N74,42,B000BP1SDM,RANDOM,Ginger People Original Ginger Chews 3oz Bag
602863,AZZ5ASC403N74,43,B000CFJDWY,RANDOM,"Late July Organic Classic Rich Crackers, 6-Oun..."


# Store in `SQLite` DB

In [26]:
engine = create_engine("sqlite:///recommender.db", echo=True)

In [27]:
max_recommendations.to_sql(f"{CATEGORY}", con=engine, if_exists="append")

2021-09-29 17:32:34,523 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("Grocery_and_Gourmet_Food")
2021-09-29 17:32:34,527 INFO sqlalchemy.engine.Engine [raw sql] ()
2021-09-29 17:32:34,531 INFO sqlalchemy.engine.Engine PRAGMA temp.table_info("Grocery_and_Gourmet_Food")
2021-09-29 17:32:34,531 INFO sqlalchemy.engine.Engine [raw sql] ()
2021-09-29 17:32:34,535 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2021-09-29 17:32:34,537 INFO sqlalchemy.engine.Engine 
CREATE TABLE "Grocery_and_Gourmet_Food" (
	"index" BIGINT, 
	"reviewerID" TEXT, 
	item_rank BIGINT, 
	asin TEXT, 
	algorithm TEXT, 
	title TEXT
)


2021-09-29 17:32:34,537 INFO sqlalchemy.engine.Engine [no key 0.00076s] ()
2021-09-29 17:32:34,540 INFO sqlalchemy.engine.Engine CREATE INDEX "ix_Grocery_and_Gourmet_Food_index" ON "Grocery_and_Gourmet_Food" ("index")
2021-09-29 17:32:34,541 INFO sqlalchemy.engine.Engine [no key 0.00074s] ()
2021-09-29 17:32:34,543 INFO sqlalchemy.engine.Engine COMMIT
2021-09-29 17:32:34,783 INFO