In [1]:
import numpy as np
import pandas as pd
import warnings

from gensim.models.doc2vec import Doc2Vec
from pandarallel import pandarallel
from sqlalchemy import create_engine
from tqdm import tqdm

from src.models import cf, evaluate_model
from src.utilities import utilities

tqdm.pandas()
warnings.filterwarnings('ignore')



# Load Data and Models

In [2]:
# global variables
DATA_PATH = "data/evaluation"
D2V_PATH = "models/d2v"
CATEGORY = "Grocery_and_Gourmet_Food"

# d2v training parameters
EPOCHS=10

# reproducibility checks
SEED = 42
np.random.seed(42)

train = pd.read_csv(f"{DATA_PATH}/{CATEGORY}_train.csv")
d2v = Doc2Vec.load(f"{D2V_PATH}/{CATEGORY}_item_50_{EPOCHS}_d2v.model")

In [3]:
# checking train dataframe
train.head().append(train.tail())

Unnamed: 0,index,asin,title,categories,reviewerID,overall,reviewText,reviewTime,processedReviewText
0,0,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A23RYWDS884TUL,5.0,This curry paste makes a delicious curry. I j...,2013-05-28,curry paste delicious curry fry chicken vegeta...
1,1,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A945RBQWGZXCK,5.0,I've purchased different curries in the grocer...,2012-09-17,purchase different curry grocery store complet...
2,3,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A3AMNY44OP8AOU,4.0,I started a new diet restricting all added sug...,2014-01-23,start new diet restrict added sugar brand suga...
3,4,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A3IB4CQ2QEJLJ8,5.0,So many flavors. I can't begin to tell you how...,2014-04-27,flavor begin tell love mae ploy curry ask reci...
4,5,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",AQA5DF3RWKETQ,5.0,I've used this a lot recently in some of my ch...,2012-11-27,use lot recently chicken dish use lot like spi...
47769,77420,B00I33696K,Reese's Miniature Peanut Butter Cups .31oz - 1...,"['Grocery & Gourmet Food', 'Candy & Chocolate'...",A192LQZWDYPR4U,5.0,Another quality Reese Peanut Butter Cup produc...,2014-02-27,quality reese peanut butter cup product great ...
47770,77421,B00I33696K,Reese's Miniature Peanut Butter Cups .31oz - 1...,"['Grocery & Gourmet Food', 'Candy & Chocolate'...",A2QKXW3LDQ66P5,5.0,I purchased these for my husband who has every...,2013-02-20,purchase husband love reeses valentine day pre...
47771,77430,B00ID9VSOM,"Viva Labs Organic Coconut Sugar: Non-GMO, Low-...","['Grocery & Gourmet Food', 'Cooking & Baking',...",A2P3TGJU301KXD,5.0,this stuff is INCREDIBILY yummy! SO much bette...,2014-07-15,stuff incredibily yummy good regular brown sug...
47772,77456,B00IRL93SY,Barrie House Kenya Estate - AA Single Cup Caps...,"['Grocery & Gourmet Food', 'Beverages', 'Coffe...",AEFE9VDHTQ199,5.0,"Very nice aroma, body and taste! Will buy this...",2014-05-24,nice aroma body taste buy coffee good coffee a...
47773,77508,B00ISVHJ3Y,"Wholesome Sweeteners, Organic Sweet and Lite S...","['Grocery & Gourmet Food', 'Cooking & Baking',...",A2AEZQ3DGBBLPR,2.0,This is a no go for diabetics according to my ...,2014-06-26,diabetic accord wife doctor order intention us...


In [4]:
# testing d2v models
d2v.dv[0]

array([-0.19320773,  0.23249052,  0.10087004,  0.11353737, -0.29637456,
        0.1206838 , -0.15905488, -0.03072077, -0.18343581,  0.09816217,
       -0.01866587, -0.02012681, -0.49991536,  0.05031789,  0.2729918 ,
       -0.14091071,  0.43504936, -0.36140546, -0.08976636, -0.06389496,
       -0.09563986, -0.0033619 ,  0.38012826, -0.10016942, -0.07466158,
        0.6029501 , -0.3629382 , -0.02172032, -0.37396008,  0.03635319,
        0.22457832,  0.36284095, -0.09430069,  0.5297421 , -0.04680703,
        0.17874746, -0.27136478, -0.16296121,  0.21850628,  0.12752077,
       -0.09792925, -0.17686078, -0.22154953, -0.29303235,  0.23883624,
       -0.09763848,  0.02103806, -0.09468511,  0.02304819,  0.1887425 ],
      dtype=float32)

# Generate User Embeddings

In [5]:
# get user rating history
train_user_rating_history = train.groupby(["reviewerID"])["asin"].progress_apply(list)
print(train_user_rating_history)

100%|███████████████████████████████████████████| 13397/13397 [00:00<00:00, 48768.73it/s]

reviewerID
A00177463W0XWB16A9O05                             [B0029XDZIK, B0094ISOMA]
A022899328A0QROR32DCT                             [B001ACMCNU, B003TO9RSU]
A068255029AHTHDXZURNU                             [B000K8WVYA, B0094ISOMA]
A06944662TFWOKKV4GJKX                                         [B000CQBZPG]
A1004703RC79J9                                                [B001E50THY]
                                               ...                        
AZWRZZAMX90VT            [B0007R9L5Q, B000CQ01GU, B000E123IC, B000E46LZ...
AZXKAH2DE6C8A            [B000EML7DS, B000ODF2ME, B001650XUK, B0018QLG9...
AZXON596A1VXC                         [B00113SKZW, B00113ZTVK, B001L4JH5I]
AZYXC63SS008M                                                 [B0040WCQKQ]
AZZ5ASC403N74                                                 [B004U49QU2]
Name: asin, Length: 13397, dtype: object





# Generate N-Recommendations = {10, 25, 30, 45}

## Load Test Data

In [7]:
test = pd.read_csv(f"{DATA_PATH}/{CATEGORY}_test.csv")

In [8]:
test.head().append(test.tail())

Unnamed: 0,index,asin,title,categories,reviewerID,overall,reviewText,reviewTime,processedReviewText
0,2,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A1TCSC0YWT82Q0,5.0,I love ethnic foods and to cook them. I recent...,2013-08-03,love ethnic food cook recently purchase produc...
1,8,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A1Z7Y2GMAP9SRY,5.0,I like to make my own curry but this is a tast...,2014-06-27,like curry tasty alternative use base kind dif...
2,23,B00004S1C5,"Ateco Food Coloring Kit, 6 colors","['Grocery & Gourmet Food', 'Cooking & Baking',...",A14YSMLYLJEMET,1.0,This product is no where near natural / organi...,2013-03-29,product near natural organic wish review purch...
3,31,B00005344V,Traditional Medicinals Organic Breathe Easy Se...,"['Grocery & Gourmet Food', 'Beverages', 'Coffe...",A2F488C4PLWGEI,5.0,If my wife drinks a cup of this tea when she f...,2014-03-23,wife drink cup tea feel attack come help avoid...
4,32,B00005344V,Traditional Medicinals Organic Breathe Easy Se...,"['Grocery & Gourmet Food', 'Beverages', 'Coffe...",AO1HXV7DWZZIR,5.0,I don't know about the medicinal aspects of th...,2014-02-06,know medicinal aspect tea flavor downright scr...
28001,77519,B00ISVHJ3Y,"Wholesome Sweeteners, Organic Sweet and Lite S...","['Grocery & Gourmet Food', 'Cooking & Baking',...",A1WT3TVHANP7ZF,3.0,Hmmm. I really wanted to love this sweetener. ...,2014-07-22,hmmm want love sweetener half sugar half stevi...
28002,77520,B00ISVHJ3Y,"Wholesome Sweeteners, Organic Sweet and Lite S...","['Grocery & Gourmet Food', 'Cooking & Baking',...",A3NEAETOSXDBOM,5.0,"I confess I have a sweet tooth, and love the t...",2014-06-30,confess sweet tooth love taste sugar recognize...
28003,77521,B00ISVHJ3Y,"Wholesome Sweeteners, Organic Sweet and Lite S...","['Grocery & Gourmet Food', 'Cooking & Baking',...",AD1ZOPB0BBEHB,4.0,"It has a little of the stevia aftertaste, but ...",2014-07-17,little stevia aftertaste fair compromise able ...
28004,77522,B00ISVHJ3Y,"Wholesome Sweeteners, Organic Sweet and Lite S...","['Grocery & Gourmet Food', 'Cooking & Baking',...",A18ECVX2RJ7HUE,5.0,i love marinade for grilled flank steak or lon...,2014-05-30,love marinade grilled flank steak london broil...
28005,77523,B00ISVHJ3Y,"Wholesome Sweeteners, Organic Sweet and Lite S...","['Grocery & Gourmet Food', 'Cooking & Baking',...",A2G04D4QZAXL15,3.0,I've been using Truvia (a form of stevia) on m...,2014-05-27,use truvia form stevia cereal greek yogurt yea...


In [9]:
# generating test history
test_user_history = (pd.DataFrame(test.groupby(['reviewerID'])['asin']
                                  .apply(list).reset_index()))

In [10]:
print(test_user_history)

                  reviewerID  \
0      A00177463W0XWB16A9O05   
1      A022899328A0QROR32DCT   
2      A068255029AHTHDXZURNU   
3      A06944662TFWOKKV4GJKX   
4             A1004703RC79J9   
...                      ...   
13274          AZWRZZAMX90VT   
13275          AZXKAH2DE6C8A   
13276          AZXON596A1VXC   
13277          AZYXC63SS008M   
13278          AZZ5ASC403N74   

                                                    asin  
0                               [B00474OR8G, B00BFM6OAW]  
1                                           [B00CMQDKES]  
2                                           [B001FA1K2G]  
3                                           [B000GFYRHG]  
4                                           [B003GTR8IO]  
...                                                  ...  
13274  [B0007R9L4M, B000CN7BMA, B001EQ5D1K, B002VT3GX...  
13275   [B000MAK41I, B004X8TJP2, B006H34CUS, B007W14RMM]  
13276                           [B001EO5S0I, B00271QQ7Q]  
13277                    

## Instantiate Embedded Review CBF (Item-based)

In [11]:
er_cbf = cf.EmbeddedReviewCBF(d2v)

In [12]:
%%time
# fit learning algorithm to training data
er_cbf.fit(train)

100%|███████████████████████████████████████████| 13397/13397 [00:00<00:00, 32088.88it/s]

CPU times: user 600 ms, sys: 34 ms, total: 634 ms
Wall time: 654 ms





In [13]:
%%time
# generate n-number of candidates items (200)
candidate_items = er_cbf.test()

100%|████████████████████████████████████████████| 13397/13397 [00:09<00:00, 1480.98it/s]

CPU times: user 22 s, sys: 6.65 s, total: 28.6 s
Wall time: 9.05 s





## Save Model

## Loop through N = {10, 25, 30, 45}

In [14]:
# generate item popularity
item_popularity = evaluate_model.generate_item_popularity(train)

In [15]:
n_recommendations = {}
for n in [10, 25, 30, 45]:
    # retrieve the top-n items based on similarities
    # top_ns = get_top_n(candidate_items, mem_ecf.user_rating_history, n)
    top_ns = er_cbf.get_top_n(n)
    # evaluate how well the recommended items predicted the future purchases
    n_recommended_items = (evaluate_model.
                           evaluate_recommendations(model_name = 'ER-CBF',
                                                    top_ns = top_ns,
                                                    user_rating_history = test_user_history, 
                                                    item_popularity = item_popularity, 
                                                    n = n,
                                                    mf_based = False))
    # saving the n-value and recommended items
    n_recommendations[n] = (top_ns, n_recommended_items)

The ER-CBF has an average recall@10: 0.02177, average novelty@10: 0.94207
The ER-CBF has an average recall@25: 0.04086, average novelty@25: 0.95132
The ER-CBF has an average recall@30: 0.04460, average novelty@30: 0.95325
The ER-CBF has an average recall@45: 0.05688, average novelty@45: 0.95740


# Evaluate N-Recommendations

## N=10

In [16]:
top_ns_10 = n_recommendations[10][0]

In [17]:
utilities.retrieve_recommendations(train, top_ns_10)

For user: A2H84V55USFJQX:
Purchase History:
             asin                                              title
13844  B000ZSZ5S4  Blue Diamond Almonds, Bold Salt &amp; Vinegar,...
41088  B005K4Q1T0  Grove Square Hot Cocoa Dark Chocolate, 24 Sing...

Recommending:

         asin                                              title
0  B0033HPPIO  Donut House Collection Coffee Chocolate Glazed...
1  B001EO5Z6A   Rocamojo Organic Gourmet Coffee, Wild French ...
2  B002WENMX4  Suchard Hot Chocolate Syrup, 8-Count T-Discs f...
3  B002MV63EW  Gloria Jean's K-Cup, Holiday Traditions, 24-Co...
4  B005QKH1V0  Cameron's Coffee Roasted Ground Coffee, Variet...
5  B006ZI66J0  Yuban Colombian Coffee, Medium Roast, T-Discs ...
6  B005EF0I0I                 Coffee Sampler Gift Basket by ig4U
7  B001D0IZ4O  Timothy's World Coffee, White Hot Chocolate, 2...
8  B00BIEU44O  Community Coffee Ground Coffee, 5 Star Hotel B...
9  B005EF0HTK           Celebrate Good Times Gift Basket by ig4U


## N=25

In [18]:
top_ns_25 = n_recommendations[25][0]

In [19]:
utilities.retrieve_recommendations(train, top_ns_25)

For user: A2W9B725TZBXOX:
Purchase History:
             asin                                              title
1645   B0005Z6LLW  Ghirardelli Hot Chocolate Mix , Chocolate Moch...
4683   B000ED9L6C  Bob's Red Mill Raw Shelled Sunflower Seeds (Ke...
8508   B000GATCRQ  Dream Foods International Volcano Lemon Burst,...
15333  B0014EOUAW  V8 V-Fusion Light Peach Mango Juice Drink, 46-...
18192  B001E52ZAS  Post Shredded Wheat Lightly Frosted Cereal, Sp...
21093  B001EPQV1W  Honey Bunches of Oats with Almonds, 14.5-Ounce...
22932  B001G8UC8K  Tootsie Roll Midgees Candy 5 Pound Value Bag 7...
23025  B001GVIRD4                     Grocery &amp; Gourmet Food" />
24945  B001NC8HQS  Idahoan Buttery homestyle flavored mashed pota...
25830  B001SAQ7WQ  Progresso Traditional Soup, Chickarina (Chicke...
26956  B0025UOMY8  Maruchan Yakisoba Spicy Vegetable, 3.79-Ounce ...
33467  B00421DMLM  Libby's Splenda Sliced Peaches, 23.5-Ounce Jar...
34141  B004727CL2  Snack Factory Pretzel Crisps Everything,

## N=30

In [20]:
top_ns_30 = n_recommendations[30][0]

In [21]:
utilities.retrieve_recommendations(train, top_ns_30)

For user: A1G8ECTOF35O2R:
Purchase History:
             asin                                              title
9115   B000H26J7E  Lindt Excellence Bar, 70% Cocoa Smooth Dark Ch...
47677  B00EKLPLU4              Healthworks Cacao Powder Organic, 1lb

Recommending:

          asin                                              title
0   B0019ZHU2Q  Ghirardelli Chocolate Intense Dark Bar, Twilig...
1   B000KNB4WA  Ghirardelli Chocolate Baking Chips, Semi-Sweet...
2   B001G0MG2I   Ghirardelli Chocolate Intense Dark Bar, Midni...
3   B000OQ4A3S  Navitas Organics Cacao Nibs, 8 oz. Bag &mdash;...
4   B000Y0HCHK  Nevada Manna Sugar Free Semi Sweet Chocolate C...
5   B000H27PB8  Ritter Sport Bars, Dark Chocolate with Marzipa...
6   B0029J6QLM         Snickers Dark Chocolate Candy (Pack of 24)
7   B0029JASWA  Dove Dark Chocolate Promises, 9.5-Ounce Packag...
8   B000JSLYSU        Cadbury Flake Chocolate Bars 23.5g, 4-Count
9   B001H9N9RE  Ghirardelli Dark Chocolate Twilight Delight 72...
10  B00

## N=45

In [22]:
top_ns_45 = n_recommendations[45][0]

In [23]:
utilities.retrieve_recommendations(train, top_ns_45)

For user: A1NEKFNO1E743Y:
Purchase History:
            asin                                              title
8695  B000GFYRHG  Bigelow Constant Comment Tea 20-Count Boxes (P...

Recommending:

          asin                                              title
0   B000F4DKAI  Twinings of London English Afternoon Black Tea...
1   B007JFXXJY  Lipton Herbal Pyramid Tea Bags, Orange Blossom...
2   B001E5E1WS  Rishi Tea Organic Silver Needle Jasmine Loose ...
3   B005CT9OEW  Lipton Green Tea Bags, Decaffeinated Blackberr...
4   B001BM62T4  Red Espresso Ground Rooibos Tea, 8.8-Ounce Pou...
5   B008YUL4CQ  Lipton Decaffeinated Green Tea, Superfruit Jas...
6   B002AR158W   Numi Organic Tea Magnolia Puerh, Full Leaf Gr...
7   B0061IUKDM  Higgins &amp; Burke, Green Tea, 20 Count (Pack...
8   B001E5E20O  Rishi Tea Organic Black Tea, Earl Grey Loose T...
9   B004051BO0  Yogi Tea - Vanilla Spice Perfect Energy - Ener...
10  B002HQCWYM  Twinings of London English Breakfast Tea K-Cup...
11  B000CQBZ

# Cross-Analysis for Cold-Start Users (<= 2 Purchased Items)

In [24]:
cold_start_users = utilities.generate_cold_start_users(train)

In [25]:
for n in tuple(zip([10, 25, 30, 45], [top_ns_10, top_ns_25, top_ns_30, top_ns_45])):
    cold_start_top_ns = dict(filter(lambda x: x[0] in cold_start_users, n[1].items()))
    # evaluate how well the recommended items predicted the future purchases
    # on cold start users
    n_recommended_items = (evaluate_model.
                           evaluate_recommendations(model_name = 'ER-CBF',
                                                    top_ns = cold_start_top_ns,
                                                    user_rating_history = test_user_history, 
                                                    item_popularity = item_popularity, 
                                                    n = n[0],
                                                    mf_based = False))

The ER-CBF has an average recall@10: 0.02408, average novelty@10: 0.94052
The ER-CBF has an average recall@25: 0.04482, average novelty@25: 0.95046
The ER-CBF has an average recall@30: 0.04829, average novelty@30: 0.95253
The ER-CBF has an average recall@45: 0.06120, average novelty@45: 0.95703


# Generating Recommended Items DataFrame

In [26]:
max_recommendations = utilities.generate_recommendations_df(train, n_recommendations, "ER-CBF")

In [27]:
max_recommendations

Unnamed: 0,reviewerID,item_rank,asin,algorithm,title
0,A00177463W0XWB16A9O05,0,B001D0IZBM,ER-CBF,"Grocery &amp; Gourmet Food"" />"
1,A00177463W0XWB16A9O05,1,B000J4IDO2,ER-CBF,"Coffee People K-Cups, Black Tiger Extra Bold, ..."
2,A00177463W0XWB16A9O05,2,B001ELL67A,ER-CBF,"Grocery &amp; Gourmet Food"" />"
3,A00177463W0XWB16A9O05,3,B005K4Q1YA,ER-CBF,"Grove Square Cappuccino, French Vanilla, 24 Co..."
4,A00177463W0XWB16A9O05,4,B0007IQQXA,ER-CBF,Van Houtte French Vanilla Coffee K Cups 18 Count
...,...,...,...,...,...
602860,AZZ5ASC403N74,40,B007JFMIWW,ER-CBF,Quaker Stila Crispy Oat Cookie Bar - Blueberry...
602861,AZZ5ASC403N74,41,B00DUQNFSU,ER-CBF,"Werther's Original Popcorn, Caramel, 8 Ounce"
602862,AZZ5ASC403N74,42,B001G0MFZQ,ER-CBF,"Ghirardelli Chocolate Squares, Milk Chocolate ..."
602863,AZZ5ASC403N74,43,B0007OVY3Q,ER-CBF,King Leo Soft Peppermint Candy 5lb


# Store in `SQLite` DB

In [30]:
engine = create_engine("sqlite:///recommender.db", echo=True)

In [31]:
max_recommendations.to_sql(f"{CATEGORY}", con=engine, if_exists="append")

2021-09-29 20:25:39,439 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("Grocery_and_Gourmet_Food")
2021-09-29 20:25:39,440 INFO sqlalchemy.engine.Engine [raw sql] ()
2021-09-29 20:25:39,644 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2021-09-29 20:25:43,285 INFO sqlalchemy.engine.Engine INSERT INTO "Grocery_and_Gourmet_Food" ("index", "reviewerID", item_rank, asin, algorithm, title) VALUES (?, ?, ?, ?, ?, ?)
2021-09-29 20:25:43,286 INFO sqlalchemy.engine.Engine [generated in 3.07651s] ((0, 'A00177463W0XWB16A9O05', 0, 'B001D0IZBM', 'ER-CBF', ' Grocery &amp; Gourmet Food" />'), (1, 'A00177463W0XWB16A9O05', 1, 'B000J4IDO2', 'ER-CBF', 'Coffee People K-Cups, Black Tiger Extra Bold, 24-Count Box (Pack of 2)'), (2, 'A00177463W0XWB16A9O05', 2, 'B001ELL67A', 'ER-CBF', ' Grocery &amp; Gourmet Food" />'), (3, 'A00177463W0XWB16A9O05', 3, 'B005K4Q1YA', 'ER-CBF', 'Grove Square Cappuccino, French Vanilla, 24 Count Single Serve Cups'), (4, 'A00177463W0XWB16A9O05', 4, 'B0007IQQXA', 'ER-CBF', '