In [1]:
from pathlib import Path
# import pickle
import warnings

import numpy as np
import pandas as pd
from sqlalchemy import create_engine
from surprise import Dataset, Reader, dump
from tqdm import tqdm

from src.models import cf, evaluate_model
from src.utilities import utilities

tqdm.pandas()
warnings.filterwarnings('ignore')



# Load Data

In [2]:
# global variables
DATA_PATH = "data/evaluation"
CATEGORY = "Pet_Supplies"
# MODEL_PATH = Path(f"models/funk_svd/funk_svd_{CATEGORY}.pkl")

# training parameters
N_EPOCHS = 5
LR_ALL = 0.005
BETA = 0.1

# reproducibility checks
SEED = 42
np.random.seed(42)

train = pd.read_csv(f"{DATA_PATH}/{CATEGORY}_train.csv")

In [3]:
# checking train dataframe
train.head().append(train.tail())

Unnamed: 0,index,asin,title,categories,reviewerID,overall,reviewText,reviewTime,processedReviewText
0,0,1223000893,"Cat Sitter DVD Trilogy - Vol 1, Vol 2 and Vol 3",[],A14CK12J7C7JRK,3.0,I purchased the Trilogy with hoping my two cat...,2011-01-12,purchase trilogy hop cat age interested yr old...
1,2,1223000893,"Cat Sitter DVD Trilogy - Vol 1, Vol 2 and Vol 3",[],A2CR37UY3VR7BN,4.0,I bought the triliogy and have tested out all ...,2012-12-19,buy triliogy test dvd appear volume receive re...
2,3,1223000893,"Cat Sitter DVD Trilogy - Vol 1, Vol 2 and Vol 3",[],A2A4COGL9VW2HY,4.0,My female kitty could care less about these vi...,2011-05-12,female kitty care video care little male dig a...
3,4,1223000893,"Cat Sitter DVD Trilogy - Vol 1, Vol 2 and Vol 3",[],A2UBQA85NIGLHA,3.0,"If I had gotten just volume two, I would have ...",2012-03-05,volume star trilogy star read review know vol ...
4,5,B00005MF9U,LitterMaid LM900 Mega Self-Cleaning Litter Box,"['Pet Supplies', 'Cats', 'Litter &amp; Housebr...",A2BH04B9G9LOYA,1.0,"First off, it seems that someone is spamming t...",2006-12-31,spamming review glow reviewer review amazon ba...
68865,111581,B00K3YPOO0,Brightest Black Light Flashlight on Amazon- UV...,[],A11J1FHCK5U06J,4.0,Now I know exactly where the trouble spots are...,2014-05-23,know exactly trouble spot sniffing guess invis...
68866,111585,B00K3YPOO0,Brightest Black Light Flashlight on Amazon- UV...,[],A18JF0T0GOCORW,4.0,I use this light to help me find stains when I...,2014-05-24,use light help stain carpet clean pre treat ca...
68867,111595,B00K7EG97C,Nutro Crunchy Dog Treats with Real Mixed Berri...,"['Pet Supplies', 'Dogs', 'Treats', 'Cookies, B...",A3GRPCW9DG427Z,5.0,We are owned by the 3 pickiest pooches in the ...,2013-07-27,pickiest pooch world love fool reject doggie t...
68868,111598,B00K7EG97C,Nutro Crunchy Dog Treats with Real Mixed Berri...,"['Pet Supplies', 'Dogs', 'Treats', 'Cookies, B...",A2X6TLAX3JEO1A,5.0,My highly allergic white boxer loves these tre...,2014-05-09,highly allergic white boxer love treat meat co...
68869,111602,B00KJGFGFO,Curry Brush with Coarse or Fine Bristles. High...,[],A9PG9ODPPP31N,5.0,Works great on my medium sized dog. She has ve...,2014-07-09,work great medium size dog coarse hair work gr...


# Generate N-Recommendations = {10, 25, 30, 45}

## Load Test Data

In [4]:
test = pd.read_csv(f"{DATA_PATH}/{CATEGORY}_test.csv")

In [5]:
test.head().append(test.tail())

Unnamed: 0,index,asin,title,categories,reviewerID,overall,reviewText,reviewTime,processedReviewText
0,1,1223000893,"Cat Sitter DVD Trilogy - Vol 1, Vol 2 and Vol 3",[],A39QHP5WLON5HV,5.0,There are usually one or more of my cats watch...,2013-09-14,usually cat watch tv stay trouble dvd play lik...
1,104,B00005MF9V,LitterMaid Universal Cat Privacy Tent (LMT100),"['Pet Supplies', 'Cats', 'Litter & Housebreaki...",A366V0GCEPH5CX,5.0,My cats love it and so do I. I no longer have ...,2013-02-02,cat love longer cat litter fly floor litter fl...
2,133,B00005MF9T,LitterMaid LM500 Automated Litter Box,"['Pet Supplies', 'Cats', 'Litter & Housebreaki...",ALWWS8QBYN80B,1.0,I have one female cat that weighs under 10 pou...,2004-11-17,female cat weigh pound year old use everclean ...
3,153,B00005MF9W,LitterMaid Waste Receptacles Automatic Litter ...,"['Pet Supplies', 'Cats', 'Litter & Housebreaki...",A3PVI3NE7OY1SP,5.0,I love these. They make the clean up so much e...,2013-09-26,love clean easy clean box manually use issue w...
4,154,B00005MF9W,LitterMaid Waste Receptacles Automatic Litter ...,"['Pet Supplies', 'Cats', 'Litter & Housebreaki...",A2H83XMHUVDLJY,4.0,"I love this litter box. I do not use the lids,...",2014-06-26,love litter box use lid use receptacle tear cr...
41564,111601,B00KJGFGFO,Curry Brush with Coarse or Fine Bristles. High...,[],AV34KNYW82YSS,4.0,Pulled lots of hair out of my Labs coat. Didn'...,2014-07-18,pulled lot hair labs coat think prove wrong co...
41565,111603,B00KJGFGFO,Curry Brush with Coarse or Fine Bristles. High...,[],A1YMNTFLNDYQ1F,5.0,I have been trying to find a rubber bristle br...,2014-07-16,try rubber bristle brush persian year lose glo...
41566,111604,B00KJGFGFO,Curry Brush with Coarse or Fine Bristles. High...,[],A1FQ3HRVXA4A5B,5.0,Great product to use on your pets knowing this...,2014-07-11,great product use pet know gentle rubber damag...
41567,111605,B00KJGFGFO,Curry Brush with Coarse or Fine Bristles. High...,[],A3OP6CI0XCRQXO,5.0,I bought a second one because I have two cats ...,2014-07-22,buy second cat american short hair buy brush m...
41568,111606,B00KJGFGFO,Curry Brush with Coarse or Fine Bristles. High...,[],A11LC938XF35XN,5.0,Our dogs love getting brushed with this. It m...,2014-07-17,dog love brush massage remove heavy undercoat ...


In [6]:
# generating test history
test_user_history = (pd.DataFrame(test.groupby(['reviewerID'])['asin']
                                  .apply(list).reset_index()))

In [7]:
print(test_user_history)

                  reviewerID                                  asin
0      A04173782GDZSQ91AJ7OD              [B0090Z9AYS, B00CPDWT2M]
1      A042274212BJJVOBS4Q85              [B005AZ4M3Q, B00771WQIY]
2       A0436342QLT4257JODYJ  [B0018CDR68, B003SJTM8Q, B00474A3DY]
3      A04795073FIBKY8GSLZYI              [B001PKT30M, B005DGI2RY]
4      A06658082A27F4VB5UG8E              [B000TZ1TTM, B0019VUHH0]
...                      ...                                   ...
18993          AZYJE40XW6MFG              [B00HVAKJZS, B00IDZT294]
18994          AZZ56WF4X19G2                          [B004A7X218]
18995          AZZNK89PXD006  [B0002DHV16, B005BP8MQ8, B009RTX4SU]
18996          AZZV9PDNMCOZW              [B007EQL390, B00ISBWVT6]
18997          AZZYW4YOE1B6E  [B0002AQPA2, B0002AQPA2, B0002ARQV4]

[18998 rows x 2 columns]


## Instantiate FunkSVD (Matrix Factorization)

In [8]:
# instantiating funksvd 
funk_svd = cf.FunkMF(n_epochs=N_EPOCHS, 
                     lr_all=LR_ALL, 
                     reg_all=BETA)

In [9]:
# fitting to the training data
funk_svd.fit(train)

Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4


In [10]:
%%time
# creating testset for prediction
testset = funk_svd.trainset.build_anti_testset()

CPU times: user 45.6 s, sys: 1.91 s, total: 47.6 s
Wall time: 47.7 s


In [11]:
%%time
# generate candidate times
candidate_items = funk_svd.test(testset)

CPU times: user 10min 39s, sys: 5min 36s, total: 16min 16s
Wall time: 18min 29s


## Save Model

* Not adviced to save model due to long persistence time required to save the model.

In [12]:
# MODEL_PATH.parent.mkdir(parents=True, exist_ok=True)
# dump.dump(MODEL_PATH, algo=funk_svd)

In [13]:
# load model
# _, funk_svd = dump.load(MODEL_PATH)

## Loop through N = {10, 25, 30, 45}

In [14]:
# generate item popularity
item_popularity = evaluate_model.generate_item_popularity(train)

In [15]:
%%time
n_recommendations = {}
for n in [10, 25, 30, 45]:
    # retrieve the top-n items based on similarities
    # top_ns = get_top_n(candidate_items, n)
    top_ns = funk_svd.get_top_n(n)
    # evaluate how well the recommended items predicted the future purchases
    n_recommended_items = (evaluate_model.
                           evaluate_recommendations(model_name = 'FUNK-SVD',
                                                    top_ns = top_ns,
                                                    user_rating_history = test_user_history, 
                                                    item_popularity = item_popularity, 
                                                    n = n,
                                                    mf_based = True))
    # saving the n-value and recommended items
    n_recommendations[n] = (top_ns, n_recommended_items)

The FUNK-SVD has an average recall@10: 0.00893, average novelty@10: 0.79893
The FUNK-SVD has an average recall@25: 0.01945, average novelty@25: 0.82808
The FUNK-SVD has an average recall@30: 0.02230, average novelty@30: 0.83360
The FUNK-SVD has an average recall@45: 0.03075, average novelty@45: 0.84864
CPU times: user 6min 10s, sys: 3min 18s, total: 9min 28s
Wall time: 12min 6s


# Evaluate N-Recommendations

## N=10

In [16]:
top_ns_10 = n_recommendations[10][0]
utilities.retrieve_recommendations(train, top_ns_10, mf_based=True)

For user: AM7YDJ5OF3P5P:
Purchase History:
             asin                                              title
40436  B000PKSW5A  Precious Cat Dr. Elsey's Kitten Attract Scoopa...
50006  B001K5NTOS                   Our Pets SmartScoop Litter Mat-p
57972  B003R0LLIK  Purina Pro Plan Focus Chicken &amp; Rice Formu...
66811  B008MZILMY           Blue Tasty Chicken Kitty Yums Cat Treats

Recommending:

         asin                                              title
0  B001LNSSH2  ZYMOX Pet King Brand Otic Pet Ear Treatment wi...
1  B000F4AVPA                                Chuckit! Ultra Ball
2  B003JFRQQ4  Scaredy Cut Tiny Trim by Small Pet Grooming Sa...
3  B0002AS1CC                 Bergan Turbo Scratcher Accessories
4  B000255NCI                               API Master Test Kits
5  B0002DJONY                            Vittles Vault Stackable
6  B0002563O0                           JW Comfy Perch for Birds
7  B000K9JRH8  GoCat DaBird Feather Refill, Assorted Colors, ...
8  B0002AQK

## N=25

In [17]:
top_ns_25 = n_recommendations[25][0]
utilities.retrieve_recommendations(train, top_ns_25, mf_based=True)

For user: AUIQTTPIMCVQ4:
Purchase History:
             asin                                              title
40981  B000ROR8Z4  Sojos Good Dog Crunchy Natural Dog Treats, 8-O...
60885  B004ULYZII                         Bully Pizzle, 6 Inch 25 Pk
67348  B00AEJANCW  KONG Chase-It Wand Squeaking Dog Toy, Assorted...

Recommending:

          asin                                              title
0   B003JFRQQ4  Scaredy Cut Tiny Trim by Small Pet Grooming Sa...
1   B000K9JRH8  GoCat DaBird Feather Refill, Assorted Colors, ...
2   B00025Z6YI  TetraFin Balanced Diet Goldfish Flake Food for...
3   B0002DJONY                            Vittles Vault Stackable
4   B000255NCI                               API Master Test Kits
5   B000F4AVPA                                Chuckit! Ultra Ball
6   B0002AS1CC                 Bergan Turbo Scratcher Accessories
7   B000255OIG       Stewart Freeze Dried Treats 14 oz Beef Liver
8   B0002AQI9K                     Marina Airline Tubing, 20 Feet
9   B

## N=30

In [18]:
top_ns_30 = n_recommendations[30][0]
utilities.retrieve_recommendations(train, top_ns_30, mf_based=True)

For user: A2YORE61EX6ASQ:
Purchase History:
             asin                             title
24664  B0002XUIRA  Fashion Pet Suede Shearling Coat
24707  B0002XUIRA  Fashion Pet Suede Shearling Coat

Recommending:

          asin                                              title
0   B0002DJONY                            Vittles Vault Stackable
1   B000F4AVPA                                Chuckit! Ultra Ball
2   B0002AS1CC                 Bergan Turbo Scratcher Accessories
3   B001LNSSH2  ZYMOX Pet King Brand Otic Pet Ear Treatment wi...
4   B003JFRQQ4  Scaredy Cut Tiny Trim by Small Pet Grooming Sa...
5   B000K9JRH8  GoCat DaBird Feather Refill, Assorted Colors, ...
6   B000255NCI                               API Master Test Kits
7   B00025Z6YI  TetraFin Balanced Diet Goldfish Flake Food for...
8   B0010B6IFY  Purina Fancy Feast Grilled Collection Wet Cat ...
9   B003BYQ1C8                            Armarkat Cat Tree Model
10  B0002AQKIO        Fluval Carbon, 100-gram Nylon Bags -

## N=45

In [19]:
top_ns_45 = n_recommendations[45][0]
utilities.retrieve_recommendations(train, top_ns_45, mf_based=True)

For user: A3PDSVXNSBA9ZQ:
Purchase History:
             asin                                              title
20851  B0002DK2DU              Roll n Clean Self Cleaning Litter Box
21049  B0002DK2DU              Roll n Clean Self Cleaning Litter Box
31120  B0009YWLCM  K&amp;H Pet Products Pet Bed Warmer Tan - MET ...
35405  B000G136RM                 Hartz Home Protection Gel Dog Pads

Recommending:

          asin                                              title
0   B001LNSSH2  ZYMOX Pet King Brand Otic Pet Ear Treatment wi...
1   B000F4AVPA                                Chuckit! Ultra Ball
2   B000K9JRH8  GoCat DaBird Feather Refill, Assorted Colors, ...
3   B003JFRQQ4  Scaredy Cut Tiny Trim by Small Pet Grooming Sa...
4   B0002AS1CC                 Bergan Turbo Scratcher Accessories
5   B0002563O0                           JW Comfy Perch for Birds
6   B00008JOL0                       Zuke's Hip Action Dog Treats
7   B0002DJONY                            Vittles Vault Stackable
8

# Cross-Analysis for Cold-Start Users (<= 2 Purchased Items)

In [20]:
cold_start_users = utilities.generate_cold_start_users(train)

In [21]:
for n in tuple(zip([10, 25, 30, 45], [top_ns_10, top_ns_25, top_ns_30, top_ns_45])):
    cold_start_top_ns = dict(filter(lambda x: x[0] in cold_start_users, n[1].items()))
    # evaluate how well the recommended items predicted the future purchases
    # on cold start users
    n_recommended_items = (evaluate_model.
                           evaluate_recommendations(model_name = 'FUNK-SVD',
                                                    top_ns = cold_start_top_ns,
                                                    user_rating_history = test_user_history, 
                                                    item_popularity = item_popularity, 
                                                    n = n[0],
                                                    mf_based = True))

The FUNK-SVD has an average recall@10: 0.00909, average novelty@10: 0.79886
The FUNK-SVD has an average recall@25: 0.01867, average novelty@25: 0.82767
The FUNK-SVD has an average recall@30: 0.02168, average novelty@30: 0.83333
The FUNK-SVD has an average recall@45: 0.02944, average novelty@45: 0.84827


# Generating Recommended Items DataFrame

In [22]:
max_recommendations = (utilities
                       .generate_recommendations_df(
                           train, 
                           n_recommendations, 
                           "FUNK-SVD", mf_based=True))

In [23]:
max_recommendations

Unnamed: 0,reviewerID,item_rank,asin,algorithm,title
0,A04173782GDZSQ91AJ7OD,0,B001LNSSH2,FUNK-SVD,ZYMOX Pet King Brand Otic Pet Ear Treatment wi...
1,A04173782GDZSQ91AJ7OD,1,B0002DJONY,FUNK-SVD,Vittles Vault Stackable
2,A04173782GDZSQ91AJ7OD,2,B000F4AVPA,FUNK-SVD,Chuckit! Ultra Ball
3,A04173782GDZSQ91AJ7OD,3,B0002AQKIO,FUNK-SVD,"Fluval Carbon, 100-gram Nylon Bags - 3-Pack"
4,A04173782GDZSQ91AJ7OD,4,B000255NCI,FUNK-SVD,API Master Test Kits
...,...,...,...,...,...
857605,AZZYW4YOE1B6E,40,B0002ASCGM,FUNK-SVD,Van Ness Odor Control Extra Giant Enclosed Cat...
857606,AZZYW4YOE1B6E,41,B001B4TV2W,FUNK-SVD,"Chuckit! Max Glow Ball,"
857607,AZZYW4YOE1B6E,42,B000I6NF6W,FUNK-SVD,Mister Max Original Scent Anti Icky Poo Odor R...
857608,AZZYW4YOE1B6E,43,B000MPR2GI,FUNK-SVD,Good Pet Stuff Company Hidden Cat Litter Box


# Store in `SQLite` DB

In [26]:
engine = create_engine("sqlite:///recommender.db", echo=True)

In [27]:
max_recommendations.to_sql(f"{CATEGORY}", con=engine, if_exists="append")

2021-09-28 21:21:06,714 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("Pet_Supplies")
2021-09-28 21:21:06,715 INFO sqlalchemy.engine.Engine [raw sql] ()
2021-09-28 21:21:07,019 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2021-09-28 21:21:11,611 INFO sqlalchemy.engine.Engine INSERT INTO "Pet_Supplies" ("index", "reviewerID", item_rank, asin, algorithm, title) VALUES (?, ?, ?, ?, ?, ?)
2021-09-28 21:21:11,612 INFO sqlalchemy.engine.Engine [generated in 3.56559s] ((0, 'A04173782GDZSQ91AJ7OD', 0, 'B001LNSSH2', 'FUNK-SVD', 'ZYMOX Pet King Brand Otic Pet Ear Treatment with Hydrocortisone'), (1, 'A04173782GDZSQ91AJ7OD', 1, 'B0002DJONY', 'FUNK-SVD', 'Vittles Vault Stackable'), (2, 'A04173782GDZSQ91AJ7OD', 2, 'B000F4AVPA', 'FUNK-SVD', 'Chuckit! Ultra Ball'), (3, 'A04173782GDZSQ91AJ7OD', 3, 'B0002AQKIO', 'FUNK-SVD', 'Fluval Carbon, 100-gram Nylon Bags - 3-Pack'), (4, 'A04173782GDZSQ91AJ7OD', 4, 'B000255NCI', 'FUNK-SVD', 'API Master Test Kits'), (5, 'A04173782GDZSQ91AJ7OD', 5, 'B003JFRQ