In [1]:
import os
import sys

# set environment path
sys.path.append("../..")

# data manipulation
import numpy as np
import pandas as pd
import warnings

from gensim.models.doc2vec import Doc2Vec
from sqlalchemy import create_engine
from tqdm import tqdm

# custom functions
from src.models import algorithms, evaluate_model
from src.utilities import utilities

tqdm.pandas()
warnings.filterwarnings('ignore')



# Load Data and Models

In [2]:
# set path to where repo is located
REPO_PATH = os.path.join("..", "..")

# setting path
DATA_PATH = os.path.join(REPO_PATH, "data", "evaluation")
D2V_PATH = os.path.join(REPO_PATH, "models", "d2v")

# category to train
CATEGORY = "Pet_Supplies"

# reproducibility checks
SEED = 42
np.random.seed(42)

# load train/test dataset and doc2vec trained model
train = pd.read_csv(f"{DATA_PATH}/{CATEGORY}_train.csv")
test = pd.read_csv(f"{DATA_PATH}/{CATEGORY}_test.csv")
d2v = Doc2Vec.load(f"{D2V_PATH}/{CATEGORY}_item_50_10_d2v.model")

In [3]:
# checking train dataframe
train.head().append(train.tail())

Unnamed: 0,index,asin,title,categories,reviewerID,overall,reviewText,reviewTime,processedReviewText
0,0,1223000893,"Cat Sitter DVD Trilogy - Vol 1, Vol 2 and Vol 3",[],A14CK12J7C7JRK,3.0,I purchased the Trilogy with hoping my two cat...,2011-01-12,purchase trilogy hop cat age interested yr old...
1,2,1223000893,"Cat Sitter DVD Trilogy - Vol 1, Vol 2 and Vol 3",[],A2CR37UY3VR7BN,4.0,I bought the triliogy and have tested out all ...,2012-12-19,buy triliogy test dvd appear volume receive re...
2,3,1223000893,"Cat Sitter DVD Trilogy - Vol 1, Vol 2 and Vol 3",[],A2A4COGL9VW2HY,4.0,My female kitty could care less about these vi...,2011-05-12,female kitty care video care little male dig a...
3,4,1223000893,"Cat Sitter DVD Trilogy - Vol 1, Vol 2 and Vol 3",[],A2UBQA85NIGLHA,3.0,"If I had gotten just volume two, I would have ...",2012-03-05,volume star trilogy star read review know vol ...
4,5,B00005MF9U,LitterMaid LM900 Mega Self-Cleaning Litter Box,"['Pet Supplies', 'Cats', 'Litter &amp; Housebr...",A2BH04B9G9LOYA,1.0,"First off, it seems that someone is spamming t...",2006-12-31,spamming review glow reviewer review amazon ba...
68865,111581,B00K3YPOO0,Brightest Black Light Flashlight on Amazon- UV...,[],A11J1FHCK5U06J,4.0,Now I know exactly where the trouble spots are...,2014-05-23,know exactly trouble spot sniffing guess invis...
68866,111585,B00K3YPOO0,Brightest Black Light Flashlight on Amazon- UV...,[],A18JF0T0GOCORW,4.0,I use this light to help me find stains when I...,2014-05-24,use light help stain carpet clean pre treat ca...
68867,111595,B00K7EG97C,Nutro Crunchy Dog Treats with Real Mixed Berri...,"['Pet Supplies', 'Dogs', 'Treats', 'Cookies, B...",A3GRPCW9DG427Z,5.0,We are owned by the 3 pickiest pooches in the ...,2013-07-27,pickiest pooch world love fool reject doggie t...
68868,111598,B00K7EG97C,Nutro Crunchy Dog Treats with Real Mixed Berri...,"['Pet Supplies', 'Dogs', 'Treats', 'Cookies, B...",A2X6TLAX3JEO1A,5.0,My highly allergic white boxer loves these tre...,2014-05-09,highly allergic white boxer love treat meat co...
68869,111602,B00KJGFGFO,Curry Brush with Coarse or Fine Bristles. High...,[],A9PG9ODPPP31N,5.0,Works great on my medium sized dog. She has ve...,2014-07-09,work great medium size dog coarse hair work gr...


In [4]:
test.head().append(test.tail())

Unnamed: 0,index,asin,title,categories,reviewerID,overall,reviewText,reviewTime,processedReviewText
0,1,1223000893,"Cat Sitter DVD Trilogy - Vol 1, Vol 2 and Vol 3",[],A39QHP5WLON5HV,5.0,There are usually one or more of my cats watch...,2013-09-14,usually cat watch tv stay trouble dvd play lik...
1,104,B00005MF9V,LitterMaid Universal Cat Privacy Tent (LMT100),"['Pet Supplies', 'Cats', 'Litter & Housebreaki...",A366V0GCEPH5CX,5.0,My cats love it and so do I. I no longer have ...,2013-02-02,cat love longer cat litter fly floor litter fl...
2,133,B00005MF9T,LitterMaid LM500 Automated Litter Box,"['Pet Supplies', 'Cats', 'Litter & Housebreaki...",ALWWS8QBYN80B,1.0,I have one female cat that weighs under 10 pou...,2004-11-17,female cat weigh pound year old use everclean ...
3,153,B00005MF9W,LitterMaid Waste Receptacles Automatic Litter ...,"['Pet Supplies', 'Cats', 'Litter & Housebreaki...",A3PVI3NE7OY1SP,5.0,I love these. They make the clean up so much e...,2013-09-26,love clean easy clean box manually use issue w...
4,154,B00005MF9W,LitterMaid Waste Receptacles Automatic Litter ...,"['Pet Supplies', 'Cats', 'Litter & Housebreaki...",A2H83XMHUVDLJY,4.0,"I love this litter box. I do not use the lids,...",2014-06-26,love litter box use lid use receptacle tear cr...
41564,111601,B00KJGFGFO,Curry Brush with Coarse or Fine Bristles. High...,[],AV34KNYW82YSS,4.0,Pulled lots of hair out of my Labs coat. Didn'...,2014-07-18,pulled lot hair labs coat think prove wrong co...
41565,111603,B00KJGFGFO,Curry Brush with Coarse or Fine Bristles. High...,[],A1YMNTFLNDYQ1F,5.0,I have been trying to find a rubber bristle br...,2014-07-16,try rubber bristle brush persian year lose glo...
41566,111604,B00KJGFGFO,Curry Brush with Coarse or Fine Bristles. High...,[],A1FQ3HRVXA4A5B,5.0,Great product to use on your pets knowing this...,2014-07-11,great product use pet know gentle rubber damag...
41567,111605,B00KJGFGFO,Curry Brush with Coarse or Fine Bristles. High...,[],A3OP6CI0XCRQXO,5.0,I bought a second one because I have two cats ...,2014-07-22,buy second cat american short hair buy brush m...
41568,111606,B00KJGFGFO,Curry Brush with Coarse or Fine Bristles. High...,[],A11LC938XF35XN,5.0,Our dogs love getting brushed with this. It m...,2014-07-17,dog love brush massage remove heavy undercoat ...


In [5]:
# generating test history
test_user_history = (pd.DataFrame(test.groupby(['reviewerID'])['asin']
                                  .apply(list).reset_index()))
print(test_user_history)

                  reviewerID                                  asin
0      A04173782GDZSQ91AJ7OD              [B0090Z9AYS, B00CPDWT2M]
1      A042274212BJJVOBS4Q85              [B005AZ4M3Q, B00771WQIY]
2       A0436342QLT4257JODYJ  [B0018CDR68, B003SJTM8Q, B00474A3DY]
3      A04795073FIBKY8GSLZYI              [B001PKT30M, B005DGI2RY]
4      A06658082A27F4VB5UG8E              [B000TZ1TTM, B0019VUHH0]
...                      ...                                   ...
18993          AZYJE40XW6MFG              [B00HVAKJZS, B00IDZT294]
18994          AZZ56WF4X19G2                          [B004A7X218]
18995          AZZNK89PXD006  [B0002DHV16, B005BP8MQ8, B009RTX4SU]
18996          AZZV9PDNMCOZW              [B007EQL390, B00ISBWVT6]
18997          AZZYW4YOE1B6E  [B0002AQPA2, B0002AQPA2, B0002ARQV4]

[18998 rows x 2 columns]


In [6]:
# testing d2v models
d2v.dv[0]

array([-0.35133928, -0.17229962,  0.0257495 , -0.06583848, -0.04991433,
        0.02270661,  0.06725261,  0.06901126, -0.1656547 ,  0.13047186,
       -0.002418  ,  0.20924543,  0.04927928,  0.10133805, -0.02269881,
       -0.07407231,  0.02801738, -0.03638814, -0.13904478, -0.03911084,
       -0.02153517, -0.00893907, -0.1383793 , -0.06263901, -0.03441359,
        0.18414213,  0.03725764, -0.18159781, -0.14632137, -0.17885764,
        0.07679559,  0.12158407,  0.02334197,  0.10554555,  0.08597451,
        0.20490324, -0.1590936 , -0.24413228,  0.17860389,  0.10301679,
       -0.06252921,  0.12318915, -0.03536423, -0.01251181,  0.022075  ,
        0.1465826 ,  0.1827539 , -0.13209747, -0.06246685, -0.01958987],
      dtype=float32)

# Generate N-Recommendations = {5, 10, 15, 20}

## Instantiate and Training Embedded Review CBF (Item-based)

In [7]:
er_cbf = algorithms.EmbeddedReviewCBF(d2v)

In [8]:
%%time
# fit learning algorithm to training data
er_cbf.fit(train)

100%|████████████████████████████████████████████████████████████████████████████████████████████████| 19058/19058 [00:00<00:00, 41775.26it/s]

CPU times: user 699 ms, sys: 31.3 ms, total: 731 ms
Wall time: 730 ms





In [9]:
%%time
# generate n-number of candidates items (200)
candidate_items = er_cbf.test()

100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 19058/19058 [00:06<00:00, 2930.07it/s]

CPU times: user 22 s, sys: 3.5 s, total: 25.5 s
Wall time: 6.51 s





## Loop through N = {5, 10, 15, 20}

For each top-N setting, we will generate candidates items up to *N*-items and run metrics evaluation of `Recall@N` and `Novelty@N` on all users.

In [10]:
# generate item popularity
item_popularity = evaluate_model.generate_item_popularity(train)

In [11]:
%%time
n_recommendations = {}
for n in [5, 10, 15, 20]:
    # retrieve the top-n items based on similarities
    top_ns = er_cbf.get_top_n(n)
    # evaluate how well the recommended items predicted the future purchases
    n_recommended_items = (evaluate_model.
                           evaluate_recommendations(model_name="ER-CBF",
                                                    top_ns=top_ns,
                                                    user_rating_history=test_user_history, 
                                                    item_popularity=item_popularity, 
                                                    n=n,
                                                    mf_based=False))
    # saving the n-value and recommended items
    n_recommendations[n] = (top_ns, n_recommended_items)

The ER-CBF has an average recall@5: 0.01616, average novelty@5: 0.94879
The ER-CBF has an average recall@10: 0.02474, average novelty@10: 0.95356
The ER-CBF has an average recall@15: 0.03334, average novelty@15: 0.95632
The ER-CBF has an average recall@20: 0.03927, average novelty@20: 0.95855
CPU times: user 1min 10s, sys: 1.06 s, total: 1min 11s
Wall time: 1min 12s


# Evaluate N-Recommendations

For each top-N recommendation list, we pick a randomly sampled user to look at their *N*-number of recommendations based on their purchase history.

## N=5

In [12]:
top_ns_05 = n_recommendations[5][0]
utilities.retrieve_recommendations(train, top_ns_05)

For user: A3T87QAUUPTMZK:
Purchase History:
             asin                                              title
43366  B0010OSIHW  Zoo Med Eco Earth Compressed Coconut Fiber Sub...
45100  B00167VVP4   Zoo Med Eco Earth Loose Coconut Fiber Substra...
46998  B0019IJXD2       Zoo Med Reptile Fogger Terrarium Humidifier 

Recommending:

         asin                                            title
0  B00025664C              POLYBIO POLY FILTER PAD 4 X 8&quot;
1  B001F9CVRK     Loving Pets Acurel Water Clarifier, Aquarium
2  B00025YRJS               Boyd Enterprises Chemi-Clean - 2 g
3  B000HHLH0I                            Tetra Gel Fish Feeder
4  B0006IGZCI  Microbe Lift 1-Quart Pond Microbe-Lift PL 10PLQ


## N=10

In [13]:
top_ns_10 = n_recommendations[10][0]
utilities.retrieve_recommendations(train, top_ns_10)

For user: A1XEZIHQIUAOR1:
Purchase History:
             asin                                              title
1487   B000084E6V                      Nylabone Dental Dinosaur Chew
1727   B000084E6V                      Nylabone Dental Dinosaur Chew
28789  B0006VMN4O  Pioneer Pet SmartCat Peek-A-Prize Toy Box with...
37658  B000JZ1WSU                     SmartCat 3836 Tick Tock Teaser
42603  B000XZDV44  Hill'S Science Diet Kitten Savory Salmon Entre...
43867  B0012KCUOG  Whisker Lickin'S Soft &amp; Delicious Chicken ...

Recommending:

         asin                                              title
0  B0027J5RZA  Nylabone Dental Chew Bacon flavored Pro Action...
1  B0002DGMGG         Nylabone Dura Chew Double Action Chew Wolf
2  B0002DGM7K  Nylabone Just For Puppies  Key Ring Bone Puppy...
3  B000084EXU  Nylabone Dental Chew Petit Original Flavored B...
4  B0000BYDH7                    Nylabone FlexiChew Chicken Bone
5  B0002ASMT4                              Nylabone Dog Chew Toy
6 

## N=15

In [14]:
top_ns_15 = n_recommendations[15][0]
utilities.retrieve_recommendations(train, top_ns_15)

For user: A89LQAXW1IY6S:
Purchase History:
             asin                                              title
12819  B0002ARP2O  Marshall Ferret Deluxe Leisure Lounge, Pattern...
12825  B0002ARP2O  Marshall Ferret Deluxe Leisure Lounge, Pattern...
38837  B000MD3NLS  MidWest Homes for Pets Snap'y Fit Stainless St...
42683  B000Y8UNAU  Pro Select Fleece Cat Perch Covers - Comfortab...
46465  B0018CJZ32                         SmartCat Corner Litter Box

Recommending:

          asin                                              title
0   B0002602UK                        Ethical Stonewe Animal Dish
1   B000GEZCJ4  PetSafe Wall Entry Pet Door with Telescoping T...
2   B000MD3MIW     PetSafe Plastic Pet Door with Soft Tinted Flap
3   B0002AT3QK                  Four Paws Dog Rake Pooper Scooper
4   B0002ZS370                 Stainless Steel Bird Cup with Wire
5   B000CMHWZC  Kaytee Multi-Level Habitat w/Removable Casters...
6   B0002Y1PSA                   Cat Mate Lockable Cat Flap Brown

## N=20

In [15]:
top_ns_20 = n_recommendations[20][0]
utilities.retrieve_recommendations(train, top_ns_20)

For user: A3C2ECIXEQ0YFQ:
Purchase History:
             asin                                              title
30751  B0009YS4P0  Nutri-Vet Hip &amp; Joint Extra Strength Chewa...
37067  B000IBRI2Y              Dog Dazer II Ultrasonic Dog Deterrent
40175  B000OV4VAU                        Nutri-Vet Alaska Salmon Oil
53623  B0029NQTI8  Pedigree Choice Cuts Variety Pack Lamb/Vegetab...

Recommending:

          asin                                              title
0   B0006G5REC  Grannicks Bitter Apple Dog Chew Deterrent, 32-...
1   B000633Z3K  NaturVet GrassSaver for Dogs, 250 Chewable Tab...
2   B000O5K03I  ADAPTIL (D.A.P.) Dog Appeasing Pheromone Refil...
3   B005B0OKO6                            Virbac Anxitane Tablets
4   B000J3HZWE  Comfort Zone with DAP for Dogs Diffuser and Si...
5   B000HHSAA8                        Emt Spray for Pets, 1 Ounce
6   B00076KPGG            Cholodin Canine (500 tablets) CHEWABLES
7   B000G85UM4           Comfort Zone Refill with D.A.P. for Dogs
8

# Cross-Analysis for Cold-Start Users (<= 2 Purchased Items)

For each top-N setting, we will generate candidates items up to *N*-items and run metrics evaluation of `Recall@N` and `Novelty@N` on cold-start users (e.g., users who purchased two or less items based on items per user in the training set).

In [16]:
cold_start_users = utilities.generate_cold_start_users(train)

In [17]:
for n in tuple(zip([5, 10, 15, 20], [top_ns_05, top_ns_10, top_ns_15, top_ns_20])):
    cold_start_top_ns = dict(filter(lambda x: x[0] in cold_start_users, n[1].items()))
    # evaluate how well the recommended items predicted the future purchases
    # on cold start users
    n_recommended_items = (evaluate_model.
                           evaluate_recommendations(model_name="ER-CBF",
                                                    top_ns=cold_start_top_ns,
                                                    user_rating_history=test_user_history, 
                                                    item_popularity=item_popularity, 
                                                    n=n[0],
                                                    mf_based=False))

The ER-CBF has an average recall@5: 0.01867, average novelty@5: 0.94987
The ER-CBF has an average recall@10: 0.02746, average novelty@10: 0.95508
The ER-CBF has an average recall@15: 0.03691, average novelty@15: 0.95790
The ER-CBF has an average recall@20: 0.04233, average novelty@20: 0.96017


# Generating Recommended Items DataFrame

In [18]:
max_recommendations = (utilities
                       .generate_recommendations_df(train=train,
                                                    n_recommendations=n_recommendations,
                                                    algo_name="ER-CBF",
                                                    mf_based=False,
                                                    max_recommended=20))

In [19]:
max_recommendations

Unnamed: 0,reviewerID,item_rank,asin,algorithm,title
0,A04173782GDZSQ91AJ7OD,0,B0002AT450,ER-CBF,Rake Set for Pet Waste Removal
1,A04173782GDZSQ91AJ7OD,1,B0002AQQ56,ER-CBF,"Omega Paw Paw-Cleaning Litter Mat, Tan"
2,A04173782GDZSQ91AJ7OD,2,B0002AR0KG,ER-CBF,Booda No Track Litter Mat
3,A04173782GDZSQ91AJ7OD,3,B0002DHA2Q,ER-CBF,Booda Scoop'N Hide Litter Scoop
4,A04173782GDZSQ91AJ7OD,4,B00023XCYG,ER-CBF,LitterMaid LMC100 Litter Box Carpet (LMC100)
...,...,...,...,...,...
381155,AZZYW4YOE1B6E,15,B0018707AW,ER-CBF,Safari Stainless Steel Double-Bladed Nail Trim...
381156,AZZYW4YOE1B6E,16,B0002DK09G,ER-CBF,JW Pet Company GripSoft Cat Nail Clipper
381157,AZZYW4YOE1B6E,17,B008FWOAXI,ER-CBF,Loving Pets Pure Buffalo 10-Inch Backstrap Ten...
381158,AZZYW4YOE1B6E,18,B0002ARUKQ,ER-CBF,Millers Forge Stainless Steel Dog Nail Clipper...


# Store in `SQLite` DB

In [20]:
# engine = create_engine("sqlite:///recommender.db", echo=True)

In [21]:
# max_recommendations.to_sql(f"{CATEGORY}", con=engine, if_exists="append")