In [1]:
from collections import defaultdict

import gensim
import numpy as np
import pandas as pd
from sqlalchemy import create_engine
from surprise import Dataset, Reader, NormalPredictor
from tqdm import tqdm

from src.models import evaluate_model
from src.utilities import utilities

tqdm.pandas()



# Load Data

In [2]:
# global variables
DATA_PATH = "data/evaluation"
CATEGORY = "Pet_Supplies"

# reproducibility checks
SEED = 42
np.random.seed(42)

train = pd.read_csv(f"{DATA_PATH}/{CATEGORY}_train.csv")

In [3]:
# checking train dataframe
train.head().append(train.tail())

Unnamed: 0,index,asin,title,categories,reviewerID,overall,reviewText,reviewTime,processedReviewText
0,0,1223000893,"Cat Sitter DVD Trilogy - Vol 1, Vol 2 and Vol 3",[],A14CK12J7C7JRK,3.0,I purchased the Trilogy with hoping my two cat...,2011-01-12,purchase trilogy hop cat age interested yr old...
1,2,1223000893,"Cat Sitter DVD Trilogy - Vol 1, Vol 2 and Vol 3",[],A2CR37UY3VR7BN,4.0,I bought the triliogy and have tested out all ...,2012-12-19,buy triliogy test dvd appear volume receive re...
2,3,1223000893,"Cat Sitter DVD Trilogy - Vol 1, Vol 2 and Vol 3",[],A2A4COGL9VW2HY,4.0,My female kitty could care less about these vi...,2011-05-12,female kitty care video care little male dig a...
3,4,1223000893,"Cat Sitter DVD Trilogy - Vol 1, Vol 2 and Vol 3",[],A2UBQA85NIGLHA,3.0,"If I had gotten just volume two, I would have ...",2012-03-05,volume star trilogy star read review know vol ...
4,5,B00005MF9U,LitterMaid LM900 Mega Self-Cleaning Litter Box,"['Pet Supplies', 'Cats', 'Litter &amp; Housebr...",A2BH04B9G9LOYA,1.0,"First off, it seems that someone is spamming t...",2006-12-31,spamming review glow reviewer review amazon ba...
68865,111581,B00K3YPOO0,Brightest Black Light Flashlight on Amazon- UV...,[],A11J1FHCK5U06J,4.0,Now I know exactly where the trouble spots are...,2014-05-23,know exactly trouble spot sniffing guess invis...
68866,111585,B00K3YPOO0,Brightest Black Light Flashlight on Amazon- UV...,[],A18JF0T0GOCORW,4.0,I use this light to help me find stains when I...,2014-05-24,use light help stain carpet clean pre treat ca...
68867,111595,B00K7EG97C,Nutro Crunchy Dog Treats with Real Mixed Berri...,"['Pet Supplies', 'Dogs', 'Treats', 'Cookies, B...",A3GRPCW9DG427Z,5.0,We are owned by the 3 pickiest pooches in the ...,2013-07-27,pickiest pooch world love fool reject doggie t...
68868,111598,B00K7EG97C,Nutro Crunchy Dog Treats with Real Mixed Berri...,"['Pet Supplies', 'Dogs', 'Treats', 'Cookies, B...",A2X6TLAX3JEO1A,5.0,My highly allergic white boxer loves these tre...,2014-05-09,highly allergic white boxer love treat meat co...
68869,111602,B00KJGFGFO,Curry Brush with Coarse or Fine Bristles. High...,[],A9PG9ODPPP31N,5.0,Works great on my medium sized dog. She has ve...,2014-07-09,work great medium size dog coarse hair work gr...


# Utility Function

In [4]:
def get_top_n(predictions, n=10):
    """Return the top-N recommendation for each user from a set of predictions.

    Args:
        predictions(list of Prediction objects): The list of predictions, as
            returned by the test method of an algorithm.
        n(int): The number of recommendation to output for each user. Default
            is 10.

    Returns:
    A dict where keys are user (raw) ids and values are lists of tuples:
        [(raw item id, rating estimation), ...] of size n.
    """

    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

# Generate N-Recommendations = {10, 25, 30, 45}

## Load Test Data

In [5]:
test = pd.read_csv(f"{DATA_PATH}/{CATEGORY}_test.csv")

In [6]:
test.head().append(test.tail())

Unnamed: 0,index,asin,title,categories,reviewerID,overall,reviewText,reviewTime,processedReviewText
0,1,1223000893,"Cat Sitter DVD Trilogy - Vol 1, Vol 2 and Vol 3",[],A39QHP5WLON5HV,5.0,There are usually one or more of my cats watch...,2013-09-14,usually cat watch tv stay trouble dvd play lik...
1,104,B00005MF9V,LitterMaid Universal Cat Privacy Tent (LMT100),"['Pet Supplies', 'Cats', 'Litter & Housebreaki...",A366V0GCEPH5CX,5.0,My cats love it and so do I. I no longer have ...,2013-02-02,cat love longer cat litter fly floor litter fl...
2,133,B00005MF9T,LitterMaid LM500 Automated Litter Box,"['Pet Supplies', 'Cats', 'Litter & Housebreaki...",ALWWS8QBYN80B,1.0,I have one female cat that weighs under 10 pou...,2004-11-17,female cat weigh pound year old use everclean ...
3,153,B00005MF9W,LitterMaid Waste Receptacles Automatic Litter ...,"['Pet Supplies', 'Cats', 'Litter & Housebreaki...",A3PVI3NE7OY1SP,5.0,I love these. They make the clean up so much e...,2013-09-26,love clean easy clean box manually use issue w...
4,154,B00005MF9W,LitterMaid Waste Receptacles Automatic Litter ...,"['Pet Supplies', 'Cats', 'Litter & Housebreaki...",A2H83XMHUVDLJY,4.0,"I love this litter box. I do not use the lids,...",2014-06-26,love litter box use lid use receptacle tear cr...
41564,111601,B00KJGFGFO,Curry Brush with Coarse or Fine Bristles. High...,[],AV34KNYW82YSS,4.0,Pulled lots of hair out of my Labs coat. Didn'...,2014-07-18,pulled lot hair labs coat think prove wrong co...
41565,111603,B00KJGFGFO,Curry Brush with Coarse or Fine Bristles. High...,[],A1YMNTFLNDYQ1F,5.0,I have been trying to find a rubber bristle br...,2014-07-16,try rubber bristle brush persian year lose glo...
41566,111604,B00KJGFGFO,Curry Brush with Coarse or Fine Bristles. High...,[],A1FQ3HRVXA4A5B,5.0,Great product to use on your pets knowing this...,2014-07-11,great product use pet know gentle rubber damag...
41567,111605,B00KJGFGFO,Curry Brush with Coarse or Fine Bristles. High...,[],A3OP6CI0XCRQXO,5.0,I bought a second one because I have two cats ...,2014-07-22,buy second cat american short hair buy brush m...
41568,111606,B00KJGFGFO,Curry Brush with Coarse or Fine Bristles. High...,[],A11LC938XF35XN,5.0,Our dogs love getting brushed with this. It m...,2014-07-17,dog love brush massage remove heavy undercoat ...


In [7]:
# generating test history
test_user_history = (pd.DataFrame(test.groupby(['reviewerID'])['asin']
                                  .apply(list).reset_index()))

In [8]:
print(test_user_history)

                  reviewerID                                  asin
0      A04173782GDZSQ91AJ7OD              [B0090Z9AYS, B00CPDWT2M]
1      A042274212BJJVOBS4Q85              [B005AZ4M3Q, B00771WQIY]
2       A0436342QLT4257JODYJ  [B0018CDR68, B003SJTM8Q, B00474A3DY]
3      A04795073FIBKY8GSLZYI              [B001PKT30M, B005DGI2RY]
4      A06658082A27F4VB5UG8E              [B000TZ1TTM, B0019VUHH0]
...                      ...                                   ...
18993          AZYJE40XW6MFG              [B00HVAKJZS, B00IDZT294]
18994          AZZ56WF4X19G2                          [B004A7X218]
18995          AZZNK89PXD006  [B0002DHV16, B005BP8MQ8, B009RTX4SU]
18996          AZZV9PDNMCOZW              [B007EQL390, B00ISBWVT6]
18997          AZZYW4YOE1B6E  [B0002AQPA2, B0002AQPA2, B0002ARQV4]

[18998 rows x 2 columns]


# Preparing Dataset for Surprise's Algorithm

In [9]:
# create reader
reader = Reader(rating_scale=(1,5))
# generate data required for surprise
data = Dataset.load_from_df(train[["reviewerID", "asin", "overall"]], reader)
# generating trainset
trainset = data.build_full_trainset()

# Instantiate Random Recommender (Topic Modelling)

In [10]:
random = NormalPredictor()

In [11]:
random.fit(trainset)

<surprise.prediction_algorithms.random_pred.NormalPredictor at 0x12e610a90>

In [12]:
%%time
# generate candidate items for user to predict rating
testset = trainset.build_anti_testset()

CPU times: user 48.9 s, sys: 2.19 s, total: 51.1 s
Wall time: 51.5 s


In [13]:
%%time
# predict ratings for all pairs (u, i) that are NOT in the training set
candidate_items = random.test(testset, verbose=False)

CPU times: user 10min 27s, sys: 5min 25s, total: 15min 53s
Wall time: 18min 7s


## Loop through N = {10, 25, 30, 45}

In [14]:
# generate item popularity
item_popularity = evaluate_model.generate_item_popularity(train)

In [15]:
n_recommendations = {}
for n in [10, 25, 30, 45]:
    # retrieve the top-n items based on similarities
    top_ns = get_top_n(candidate_items, n)
    # evaluate how well the recommended items predicted the future purchases
    n_recommended_items = (evaluate_model.
                           evaluate_recommendations(model_name = 'RANDOM',
                                                    top_ns = top_ns,
                                                    user_rating_history = test_user_history, 
                                                    item_popularity = item_popularity, 
                                                    n = n,
                                                    mf_based = True))
    # saving the n-value and recommended items
    n_recommendations[n] = (top_ns, n_recommended_items)

The RANDOM has an average recall@10: 0.00085, average novelty@10: 0.91573
The RANDOM has an average recall@25: 0.00202, average novelty@25: 0.92857
The RANDOM has an average recall@30: 0.00256, average novelty@30: 0.93204
The RANDOM has an average recall@45: 0.00349, average novelty@45: 0.93951


# Evaluate N-Recommendations

## N=10

In [16]:
top_ns_10 = n_recommendations[10][0]
utilities.retrieve_recommendations(train, top_ns_10, mf_based=True)

For user: A1VDN9IJ05N2DM:
Purchase History:
             asin                                              title
13671  B0002ARQT6                      Marshall Small Animal PlayPen
13675  B0002ARQT6                      Marshall Small Animal PlayPen
27343  B0006ABVEI  IRIS Neat 'n Dry Premium Pet Training Pads, Sm...

Recommending:

         asin                                              title
0  B000068GQ3  Nutri-Vet Hip &amp; Joint Regular Strength Che...
1  B00006H36X  Advantage Once-A-Month Topical Flea Treatment ...
2  B00006HI46                    Mutt Mitt Dog Waste Pick Up Bag
3  B000084EEC    Nupro Nutri-Pet All Natural Supplement for Dogs
4  B000084ET2                World's Best Cat Litter (17-lb bag)
5  B000084ESL  KONG - Dental - Durable Rubber, Teeth and Gum ...
6  B000084F04  sWheat Scoop Fast-Clumping All-Natural Cat Lit...
7  B000084F2Q                   Aspen pet Cool Flow Pet Fountain
8  B000084EN5                      TetraPond Floating Koi Sticks
9  B00008DFGY 

## N=25

In [17]:
top_ns_25 = n_recommendations[25][0]
utilities.retrieve_recommendations(train, top_ns_25, mf_based=True)

For user: A2TE6RYAMG7MAA:
Purchase History:
             asin                                              title
45912  B0017SZSI8      Pawsitively Gourmet Doughnut Cookies For Dogs
48222  B001E52YO0   Nathan&#39;S Famous Hot Dog Treats, All Beef,...

Recommending:

          asin                                              title
0   1223000893    Cat Sitter DVD Trilogy - Vol 1, Vol 2 and Vol 3
1   B00005MF9V     LitterMaid Universal Cat Privacy Tent (LMT100)
2   B00005MF9T              LitterMaid LM500 Automated Litter Box
3   B00005MF9W  LitterMaid Waste Receptacles Automatic Litter ...
4   B00006HI46                    Mutt Mitt Dog Waste Pick Up Bag
5   B00006IX5A               Canine Hardware Chuckit! Tennis Ball
6   B00006JHRE                    Lentek 6-Day Automatic Pet Dish
7   B000084EXU  Nylabone Dental Chew Petit Original Flavored B...
8   B000084EVX  Northeastern Products Cedarific Natural Cedar ...
9   B000084F1Z      Natural Balance Limited Ingredient Dog Treats
10  B00

## N=30

In [18]:
top_ns_30 = n_recommendations[30][0]
utilities.retrieve_recommendations(train, top_ns_30, mf_based=True)

For user: ATZAD00EIVTF4:
Purchase History:
             asin                                              title
26187  B0006345PW       Hill's Science Diet Adult Oral Care Cat Food
26473  B000634IBS           Purina Cat Chow Indoor Formula, 7-Pounds
29118  B0007A67SC  Purina Moist &amp; Meaty Burger With Cheddar C...
44025  B0012TYZ4K  Merrick Hungry Dog Value Pack, 2-Pound Bag, 1 Bag
51569  B001QCKS4O   Purina Pro Plan Savor Shredded Blend Chicken ...
54771  B002CJIPEK                     Purina Chef Michael'S Dog Food
66120  B007KLK9WQ  Joy Pet 6-1/2-Inch Double Headed Tarter Remove...

Recommending:

          asin                                              title
0   1223000893    Cat Sitter DVD Trilogy - Vol 1, Vol 2 and Vol 3
1   B00005MF9U     LitterMaid LM900 Mega Self-Cleaning Litter Box
2   B00005MF9V     LitterMaid Universal Cat Privacy Tent (LMT100)
3   B00005OU62  LitterMaid LM700 Self-Cleaning Automatic Litte...
4   B000062WUT                             Multipet Plush D

## N=45

In [19]:
top_ns_45 = n_recommendations[45][0]
utilities.retrieve_recommendations(train, top_ns_45, mf_based=True)

For user: A2WR1ANKIO05ZF:
Purchase History:
             asin                                              title
38867  B000MD3NLS  MidWest Homes for Pets Snap'y Fit Stainless St...

Recommending:

          asin                                              title
0   B000065AAY              KONG Material Dog - Dr. Noys' Dog Toy
1   B00006H388  Novartis Program Once-a-Month Flea Prevention ...
2   B00006H38A  Program Oral Suspension For Cats 11-20 lbs, 6 ...
3   B00006HI46                    Mutt Mitt Dog Waste Pick Up Bag
4   B00006IV1S  BISSELL Pet Stain &amp; Odor Pretreat for Carp...
5   B000084F04  sWheat Scoop Fast-Clumping All-Natural Cat Lit...
6   B000084E66          Petmate 26124 Kitty Cap (Colors May Vary)
7   B000084F6X   Arm &amp; Hammer Super Scoop Litter, Fresh Scent
8   B00008DFGY  Frontline Plus for Dogs Small Dog (5-22 pounds...
9   B00008Q2XX  VetriScience Laboratories Canine Plus Suppleme...
10  B00009OLE2  Avoderm Natural Adult Dry Dog Food Chicken Mea...
11  B0000B

# Cross-Analysis for Cold-Start Users (<= 2 Purchased Items)

In [20]:
cold_start_users = utilities.generate_cold_start_users(train)

In [21]:
for n in tuple(zip([10, 25, 30, 45], [top_ns_10, top_ns_25, top_ns_30, top_ns_45])):
    cold_start_top_ns = dict(filter(lambda x: x[0] in cold_start_users, n[1].items()))
    # evaluate how well the recommended items predicted the future purchases
    # on cold start users
    n_recommended_items = (evaluate_model.
                           evaluate_recommendations(model_name = 'RANDOM',
                                                    top_ns = cold_start_top_ns,
                                                    user_rating_history = test_user_history, 
                                                    item_popularity = item_popularity, 
                                                    n = n[0],
                                                    mf_based = True))

The RANDOM has an average recall@10: 0.00018, average novelty@10: 0.91557
The RANDOM has an average recall@25: 0.00054, average novelty@25: 0.92865
The RANDOM has an average recall@30: 0.00072, average novelty@30: 0.93209
The RANDOM has an average recall@45: 0.00120, average novelty@45: 0.93958


# Generating Recommended Items DataFrame

In [22]:
max_recommendations = (utilities
                       .generate_recommendations_df(
                           train, 
                           n_recommendations, 
                           "RANDOM", mf_based=True))

# Store in `SQLite` DB

In [25]:
engine = create_engine("sqlite:///recommender.db", echo=True)

In [26]:
max_recommendations.to_sql(f"{CATEGORY}", con=engine, if_exists="append")

2021-09-28 19:50:35,098 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("Pet_Supplies")
2021-09-28 19:50:35,099 INFO sqlalchemy.engine.Engine [raw sql] ()
2021-09-28 19:50:35,414 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2021-09-28 19:50:39,586 INFO sqlalchemy.engine.Engine INSERT INTO "Pet_Supplies" ("index", "reviewerID", item_rank, asin, algorithm, title) VALUES (?, ?, ?, ?, ?, ?)
2021-09-28 19:50:39,587 INFO sqlalchemy.engine.Engine [generated in 3.17510s] ((0, 'A04173782GDZSQ91AJ7OD', 0, 'B000062WUT', 'RANDOM', 'Multipet Plush Dog Toy'), (1, 'A04173782GDZSQ91AJ7OD', 1, 'B000068GQ3', 'RANDOM', 'Nutri-Vet Hip &amp; Joint Regular Strength Chewables for Dogs'), (2, 'A04173782GDZSQ91AJ7OD', 2, 'B00006IX5A', 'RANDOM', 'Canine Hardware Chuckit! Tennis Ball'), (3, 'A04173782GDZSQ91AJ7OD', 3, 'B00006OALW', 'RANDOM', 'PetSTEP Original Folding Pet Ramp'), (4, 'A04173782GDZSQ91AJ7OD', 4, 'B000084ETV', 'RANDOM', 'Canidae Dry Dog Food For All Life Stages, Chicken, Turkey, Lamb, And Fi