In [1]:
from pathlib import Path
import pickle
import warnings

import gensim
import numpy as np
import pandas as pd
from surprise import Dataset, Reader
from tqdm import tqdm

from src.models import cf, evaluate_model, lda
from src.utilities import utilities

tqdm.pandas()
warnings.filterwarnings('ignore')



# Load Data

In [2]:
# global variables
DATA_PATH = "data/evaluation"
CATEGORY = "Pet_Supplies"
MODEL_PATH = Path(f"models/ti_mf/ti_mf_{CATEGORY}.pkl")

# LDA parameters
EPOCHS = 10

# training parameters
N_EPOCHS = 10
LR_ALL = 0.01
BETA = 0.1

# reproducibility checks
SEED = 42
np.random.seed(42)

train = pd.read_csv(f"{DATA_PATH}/{CATEGORY}_train.csv")

In [3]:
# checking train dataframe
train.head().append(train.tail())

Unnamed: 0,index,asin,title,categories,reviewerID,overall,reviewText,reviewTime,processedReviewText
0,0,1223000893,"Cat Sitter DVD Trilogy - Vol 1, Vol 2 and Vol 3",[],A14CK12J7C7JRK,3.0,I purchased the Trilogy with hoping my two cat...,2011-01-12,purchase trilogy hop cat age interested yr old...
1,2,1223000893,"Cat Sitter DVD Trilogy - Vol 1, Vol 2 and Vol 3",[],A2CR37UY3VR7BN,4.0,I bought the triliogy and have tested out all ...,2012-12-19,buy triliogy test dvd appear volume receive re...
2,3,1223000893,"Cat Sitter DVD Trilogy - Vol 1, Vol 2 and Vol 3",[],A2A4COGL9VW2HY,4.0,My female kitty could care less about these vi...,2011-05-12,female kitty care video care little male dig a...
3,4,1223000893,"Cat Sitter DVD Trilogy - Vol 1, Vol 2 and Vol 3",[],A2UBQA85NIGLHA,3.0,"If I had gotten just volume two, I would have ...",2012-03-05,volume star trilogy star read review know vol ...
4,5,B00005MF9U,LitterMaid LM900 Mega Self-Cleaning Litter Box,"['Pet Supplies', 'Cats', 'Litter &amp; Housebr...",A2BH04B9G9LOYA,1.0,"First off, it seems that someone is spamming t...",2006-12-31,spamming review glow reviewer review amazon ba...
68865,111581,B00K3YPOO0,Brightest Black Light Flashlight on Amazon- UV...,[],A11J1FHCK5U06J,4.0,Now I know exactly where the trouble spots are...,2014-05-23,know exactly trouble spot sniffing guess invis...
68866,111585,B00K3YPOO0,Brightest Black Light Flashlight on Amazon- UV...,[],A18JF0T0GOCORW,4.0,I use this light to help me find stains when I...,2014-05-24,use light help stain carpet clean pre treat ca...
68867,111595,B00K7EG97C,Nutro Crunchy Dog Treats with Real Mixed Berri...,"['Pet Supplies', 'Dogs', 'Treats', 'Cookies, B...",A3GRPCW9DG427Z,5.0,We are owned by the 3 pickiest pooches in the ...,2013-07-27,pickiest pooch world love fool reject doggie t...
68868,111598,B00K7EG97C,Nutro Crunchy Dog Treats with Real Mixed Berri...,"['Pet Supplies', 'Dogs', 'Treats', 'Cookies, B...",A2X6TLAX3JEO1A,5.0,My highly allergic white boxer loves these tre...,2014-05-09,highly allergic white boxer love treat meat co...
68869,111602,B00KJGFGFO,Curry Brush with Coarse or Fine Bristles. High...,[],A9PG9ODPPP31N,5.0,Works great on my medium sized dog. She has ve...,2014-07-09,work great medium size dog coarse hair work gr...


# Preparing Topic Vectors

In [4]:
# generating tokenized reviews
processed_reviews = train["processedReviewText"].apply(lambda x: x.split())

In [5]:
# instantiate lda model
lda_model = lda.LDA(reviews=processed_reviews, n_epochs=EPOCHS)

In [6]:
%%time
# training the LDA model
lda_model.train()



CPU times: user 1min 21s, sys: 14 s, total: 1min 35s
Wall time: 1min 58s


# Generating User/Item Topic Vectors

In [7]:
user_idx_map, user_vecs, item_idx_map, item_vecs = utilities.generate_user_item_vectors(lda_model, train)

100%|████████████████████████████████████████████| 19058/19058 [00:14<00:00, 1341.67it/s]
100%|███████████████████████████████████████████████| 4878/4878 [00:05<00:00, 913.74it/s]


In [8]:
# converting factors into numpy obj
user_factors = user_vecs.to_numpy()
item_factors = item_vecs.to_numpy()

In [9]:
# check user factors
user_factors[0,:]

array([0.00036379, 0.00036379, 0.00036379, 0.00036379, 0.26873264,
       0.00036379, 0.00036379, 0.00036379, 0.00036379, 0.00036379,
       0.00036379, 0.00036379, 0.00036379, 0.12225699, 0.00036379,
       0.00036379, 0.00036379, 0.17429906, 0.00036379, 0.23009104,
       0.00036379, 0.00036379, 0.00036379, 0.00036379, 0.00036379,
       0.00036379, 0.00036379, 0.00036379, 0.08897535, 0.00036379,
       0.00036379, 0.00036379, 0.00036379, 0.00036379, 0.00036379,
       0.00036379, 0.00036379, 0.00036379, 0.00036379, 0.00036379,
       0.00036379, 0.00036379, 0.09963837, 0.00036379, 0.00036379,
       0.00036379, 0.00036379, 0.00036379, 0.00036379, 0.00036379],
      dtype=float32)

In [10]:
# check item factors
item_factors[0,:]

array([1.1177247e-01, 1.4506711e-04, 5.6503665e-02, 1.4506711e-04,
       1.4506711e-04, 1.4506711e-04, 1.4506711e-04, 1.4506711e-04,
       1.4506711e-04, 2.2955917e-02, 1.4506711e-04, 5.0217144e-02,
       1.4506711e-04, 1.4506711e-04, 1.4506711e-04, 1.4506711e-04,
       2.4304722e-02, 1.4506711e-04, 1.4506711e-04, 1.4506711e-04,
       1.4506711e-04, 8.3538137e-02, 1.4506711e-04, 1.4506711e-04,
       1.4506711e-04, 1.4506711e-04, 1.4506711e-04, 2.7800582e-02,
       3.9269468e-01, 1.4506711e-04, 1.4506711e-04, 1.4506711e-04,
       1.4506711e-04, 1.4506711e-04, 1.4506711e-04, 7.0282929e-02,
       9.3918078e-02, 1.4506711e-04, 1.4506711e-04, 1.4506711e-04,
       1.4506711e-04, 1.4506711e-04, 1.4506711e-04, 1.4506711e-04,
       1.4506711e-04, 1.4506711e-04, 1.4506711e-04, 4.0322088e-02,
       1.4506711e-04, 2.0177044e-02], dtype=float32)

# Utility Functions

# Generate N-Recommendations = {10, 25, 30, 45}

## Load Test Data

In [11]:
test = pd.read_csv(f"{DATA_PATH}/{CATEGORY}_test.csv")

In [12]:
test.head().append(test.tail())

Unnamed: 0,index,asin,title,categories,reviewerID,overall,reviewText,reviewTime,processedReviewText
0,1,1223000893,"Cat Sitter DVD Trilogy - Vol 1, Vol 2 and Vol 3",[],A39QHP5WLON5HV,5.0,There are usually one or more of my cats watch...,2013-09-14,usually cat watch tv stay trouble dvd play lik...
1,104,B00005MF9V,LitterMaid Universal Cat Privacy Tent (LMT100),"['Pet Supplies', 'Cats', 'Litter & Housebreaki...",A366V0GCEPH5CX,5.0,My cats love it and so do I. I no longer have ...,2013-02-02,cat love longer cat litter fly floor litter fl...
2,133,B00005MF9T,LitterMaid LM500 Automated Litter Box,"['Pet Supplies', 'Cats', 'Litter & Housebreaki...",ALWWS8QBYN80B,1.0,I have one female cat that weighs under 10 pou...,2004-11-17,female cat weigh pound year old use everclean ...
3,153,B00005MF9W,LitterMaid Waste Receptacles Automatic Litter ...,"['Pet Supplies', 'Cats', 'Litter & Housebreaki...",A3PVI3NE7OY1SP,5.0,I love these. They make the clean up so much e...,2013-09-26,love clean easy clean box manually use issue w...
4,154,B00005MF9W,LitterMaid Waste Receptacles Automatic Litter ...,"['Pet Supplies', 'Cats', 'Litter & Housebreaki...",A2H83XMHUVDLJY,4.0,"I love this litter box. I do not use the lids,...",2014-06-26,love litter box use lid use receptacle tear cr...
41564,111601,B00KJGFGFO,Curry Brush with Coarse or Fine Bristles. High...,[],AV34KNYW82YSS,4.0,Pulled lots of hair out of my Labs coat. Didn'...,2014-07-18,pulled lot hair labs coat think prove wrong co...
41565,111603,B00KJGFGFO,Curry Brush with Coarse or Fine Bristles. High...,[],A1YMNTFLNDYQ1F,5.0,I have been trying to find a rubber bristle br...,2014-07-16,try rubber bristle brush persian year lose glo...
41566,111604,B00KJGFGFO,Curry Brush with Coarse or Fine Bristles. High...,[],A1FQ3HRVXA4A5B,5.0,Great product to use on your pets knowing this...,2014-07-11,great product use pet know gentle rubber damag...
41567,111605,B00KJGFGFO,Curry Brush with Coarse or Fine Bristles. High...,[],A3OP6CI0XCRQXO,5.0,I bought a second one because I have two cats ...,2014-07-22,buy second cat american short hair buy brush m...
41568,111606,B00KJGFGFO,Curry Brush with Coarse or Fine Bristles. High...,[],A11LC938XF35XN,5.0,Our dogs love getting brushed with this. It m...,2014-07-17,dog love brush massage remove heavy undercoat ...


In [13]:
# generating test history
test_user_history = (pd.DataFrame(test.groupby(['reviewerID'])['asin']
                                  .apply(list).reset_index()))

In [14]:
print(test_user_history)

                  reviewerID                                  asin
0      A04173782GDZSQ91AJ7OD              [B0090Z9AYS, B00CPDWT2M]
1      A042274212BJJVOBS4Q85              [B005AZ4M3Q, B00771WQIY]
2       A0436342QLT4257JODYJ  [B0018CDR68, B003SJTM8Q, B00474A3DY]
3      A04795073FIBKY8GSLZYI              [B001PKT30M, B005DGI2RY]
4      A06658082A27F4VB5UG8E              [B000TZ1TTM, B0019VUHH0]
...                      ...                                   ...
18993          AZYJE40XW6MFG              [B00HVAKJZS, B00IDZT294]
18994          AZZ56WF4X19G2                          [B004A7X218]
18995          AZZNK89PXD006  [B0002DHV16, B005BP8MQ8, B009RTX4SU]
18996          AZZV9PDNMCOZW              [B007EQL390, B00ISBWVT6]
18997          AZZYW4YOE1B6E  [B0002AQPA2, B0002AQPA2, B0002ARQV4]

[18998 rows x 2 columns]


## Preparing Dataset for Surprise's Algorithm

In [15]:
# create reader
reader = Reader(rating_scale=(1,5))
# generate data required for surprise
data = Dataset.load_from_df(train[["reviewerID", "asin", "overall"]], reader)
# generating trainset
trainset = data.build_full_trainset()

## Instantiate Pre-Initialised Matrix Factorization (Topic Modelling)

In [16]:
# instantiating ti_mf
ti_mf = cf.PreInitialisedMF(user_map=user_idx_map,
                            item_map=item_idx_map,
                            user_factor=user_factors,
                            item_factor=item_factors,
                            learning_rate=LR_ALL,
                            beta=BETA,
                            num_epochs=N_EPOCHS,
                            num_factors=50)

In [17]:
%%time
# fitting to training data
ti_mf.fit(trainset, verbose=True)

Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
CPU times: user 5min 36s, sys: 1.41 s, total: 5min 37s
Wall time: 5min 40s


In [18]:
%%time
# generate candidate items for user to predict rating
testset = trainset.build_anti_testset()

CPU times: user 42.8 s, sys: 1.85 s, total: 44.7 s
Wall time: 44.8 s


In [19]:
%%time
# predict ratings for all pairs (u, i) that are NOT in the training set
candidate_items = ti_mf.test(testset, verbose=False)

CPU times: user 12min 5s, sys: 5min 35s, total: 17min 40s
Wall time: 19min 54s


## Save Model
* Not adviced to save model due to long persistence time required to save model.

In [20]:
# MODEL_PATH.parent.mkdir(parents=True, exist_ok=True)
# dump.dump(MODEL_PATH, algo=ti_mf)

In [21]:
# load model
# _, ti_mf = dump.load(MODEL_PATH)

## Loop through N = {10, 25, 30, 45}

In [22]:
# generate item popularity
item_popularity = evaluate_model.generate_item_popularity(train)

In [23]:
n_recommendations = {}
for n in [10, 25, 30, 45]:
    # retrieve the top-n items based on similarities
    # top_ns = get_top_n(candidate_items, n)
    top_ns = ti_mf.get_top_n(candidate_items, n)
    # evaluate how well the recommended items predicted the future purchases
    n_recommended_items = (evaluate_model.
                           evaluate_recommendations(model_name = 'TI-MF',
                                                    top_ns = top_ns,
                                                    user_rating_history = test_user_history, 
                                                    item_popularity = item_popularity, 
                                                    n = n,
                                                    mf_based = True))
    # saving the n-value and recommended items
    n_recommendations[n] = (top_ns, n_recommended_items)

The TI-MF has an average recall@10: 0.00195, average novelty@10: 0.96859
The TI-MF has an average recall@25: 0.00689, average novelty@25: 0.95925
The TI-MF has an average recall@30: 0.00829, average novelty@30: 0.95847
The TI-MF has an average recall@45: 0.01263, average novelty@45: 0.95831


# Evaluate N-Recommendations

## N=10

In [24]:
top_ns_10 = n_recommendations[10][0]
utilities.retrieve_recommendations(train, top_ns_10, mf_based=True)

For user: A4F6KTQ0A6REY:
Purchase History:
             asin                                              title
29467  B0009JOX2S  Purina Fancy Feast With Ocean Fish &amp; Salmo...
43750  B0012KB466  Purina Fancy Feast Flaked Feast Wet Cat Food -...
63942  B005MWF4BQ  Purina Fancy Feast Purely Natural Entrees in a...
64115  B005OB3E30  Purina Fancy Feast Gravy Lovers Poultry &amp; ...

Recommending:

         asin                                              title
0  B003BYQ100             Armarkat B5701 57-Inch Cat Tree, Ivory
1  B001IMN8GW  New Kitty Cat Fun Play Tunnel 4 Feet Long Pet ...
2  B00025Z6IO  HIKARI Tropical Algae Wafers for Plecostomus &...
3  B000K9JRH8  GoCat DaBird Feather Refill, Assorted Colors, ...
4  B0002Y1QOI                        Rattling Fur Mice - 12 pack
5  B0009YUE7Q         Our Pets Lift-N-Aid Large Mobility Harness
6  B0051BGOJ4    KONG Beaver Refillable Catnip Toy (Colors Vary)
7  B0029NIBE8            Whiskas Tender Bites Wet Cat Food Pouch
8  B001JQLK

## N=25

In [25]:
top_ns_25 = n_recommendations[25][0]
utilities.retrieve_recommendations(train, top_ns_25, mf_based=True)

For user: A162BBV0C0ZFNK:
Purchase History:
             asin                                              title
24594  B0002XJ15U  X-O Odor Neutralizer Concentrated ( 32oz, 1gal...
24598  B0002XJ15U  X-O Odor Neutralizer Concentrated ( 32oz, 1gal...
51429  B001Q9EGHC      Hartz Nodor Unscented Cat Litter Spray - 17oz

Recommending:

          asin                                              title
0   B00025Z6IO  HIKARI Tropical Algae Wafers for Plecostomus &...
1   B003B3S3TS  EZwhelp 27&quot; x 32&quot; Machine Washable, ...
2   B009XB1E5W  Allmax Puppy Training Pads, 27.5-Inch by 35.5-...
3   B000HHSAU8    Tetra Whisper Fish Net With Soft Cushion Handle
4   B0012KB4M0  Purina Busy Bone Large Dog Treats - 8 Count - ...
5   B000GA75RK  33 Pack GRIDLOCK 24&rdquo; x 24&rdquo; Puppy D...
6   B0017JFNNC                     Redbarn Naturals Bully Springs
7   B00028ZLV8      Sentry Petrodex Enzymatic Toothpaste for Dogs
8   B0029O0XGQ                     Temptations Classic Cat Treats
9   

## N=30

In [26]:
top_ns_30 = n_recommendations[30][0]
utilities.retrieve_recommendations(train, top_ns_30, mf_based=True)

For user: ABJVSQBJ4G438:
Purchase History:
             asin                                              title
68610  B00H7PY3JA  Petseer Pet Odor Eliminator and Stain Remover ...
68836  B00IRJUCL8  Pawbiotics pet probiotics and enzymes with pre...
68858  B00JRC5WC8  Cat Bed - Cozy Cat Thermal Cat Mat Cheetah Leo...

Recommending:

          asin                                              title
0   B00025Z6IO  HIKARI Tropical Algae Wafers for Plecostomus &...
1   B000F4O9S0  LuckyPet Pet ID Tag - Round - Custom engraved ...
2   B00028ZLV8      Sentry Petrodex Enzymatic Toothpaste for Dogs
3   B0002ARKVA           Kenic Kalaya Emu Oil Pet Spray, 17-Ounce
4   B0012KB4M0  Purina Busy Bone Large Dog Treats - 8 Count - ...
5   B000HHSAU8    Tetra Whisper Fish Net With Soft Cushion Handle
6   B0002DK4AG  Four Paws Magic Coat Gentle Slicker Wire Brush...
7   B0002563S6          Magic Coat Cat Tearless Shampoo, 12-Ounce
8   B0017JFNNC                     Redbarn Naturals Bully Springs
9   B

## N=45

In [27]:
top_ns_45 = n_recommendations[45][0]
utilities.retrieve_recommendations(train, top_ns_45, mf_based=True)

For user: A27DWPL2X5520Z:
Purchase History:
             asin                                              title
16881  B0002DGLPS  Nylabone FlexiChew Regular Bone Dog Chew Toy, ...
16895  B0002DGLPS  Nylabone FlexiChew Regular Bone Dog Chew Toy, ...
29822  B0009X0RA6  Chicken Soup For The Dog Lover'S Soul Dry Dog ...
48062  B001CXIUVK  Corner Dog Bed with Bolster XXL 44&quot; x 64&...

Recommending:

          asin                                              title
0   B00025Z6IO  HIKARI Tropical Algae Wafers for Plecostomus &...
1   B000HHSAU8    Tetra Whisper Fish Net With Soft Cushion Handle
2   B000F4O9S0  LuckyPet Pet ID Tag - Round - Custom engraved ...
3   B00028ZLV8      Sentry Petrodex Enzymatic Toothpaste for Dogs
4   B0012KB4M0  Purina Busy Bone Large Dog Treats - 8 Count - ...
5   B0017JFNNC                     Redbarn Naturals Bully Springs
6   B001LNUKE6                Purebites Cheddar Cheese Dog Treats
7   B0002563S6          Magic Coat Cat Tearless Shampoo, 12-Ounce
8