In [1]:
from collections import defaultdict
from itertools import chain
import logging

import numpy as np
import pandas as pd
import warnings
import yaml

from gensim.models.doc2vec import Doc2Vec, TaggedDocument
from pandarallel import pandarallel
from surprise import Dataset, Reader
from tqdm import tqdm

from src.models import cf

pandarallel.initialize()
tqdm.pandas()
warnings.filterwarnings('ignore')



INFO: Pandarallel will run on 8 workers.
INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.


# Load Data and Models

In [2]:
# global variables
DATA_PATH = "data/evaluation"
# D2V_PATH = "models/d2v"
CATEGORY = "Pet_Supplies"

# training parameters
N_EPOCHS = 10
LR_ALL = 0.005
BETA = 0.1

train = pd.read_csv(f"{DATA_PATH}/{CATEGORY}_train.csv")

In [3]:
# checking train dataframe
train.head().append(train.tail())

Unnamed: 0,index,asin,title,categories,reviewerID,overall,reviewText,reviewTime,processedReviewText
0,0,1223000893,"Cat Sitter DVD Trilogy - Vol 1, Vol 2 and Vol 3",[],A14CK12J7C7JRK,3.0,I purchased the Trilogy with hoping my two cat...,2011-01-12,purchase trilogy hop cat age interested yr old...
1,2,1223000893,"Cat Sitter DVD Trilogy - Vol 1, Vol 2 and Vol 3",[],A2CR37UY3VR7BN,4.0,I bought the triliogy and have tested out all ...,2012-12-19,buy triliogy test dvd appear volume receive re...
2,3,1223000893,"Cat Sitter DVD Trilogy - Vol 1, Vol 2 and Vol 3",[],A2A4COGL9VW2HY,4.0,My female kitty could care less about these vi...,2011-05-12,female kitty care video care little male dig a...
3,4,1223000893,"Cat Sitter DVD Trilogy - Vol 1, Vol 2 and Vol 3",[],A2UBQA85NIGLHA,3.0,"If I had gotten just volume two, I would have ...",2012-03-05,volume star trilogy star read review know vol ...
4,5,B00005MF9U,LitterMaid LM900 Mega Self-Cleaning Litter Box,"['Pet Supplies', 'Cats', 'Litter &amp; Housebr...",A2BH04B9G9LOYA,1.0,"First off, it seems that someone is spamming t...",2006-12-31,spamming review glow reviewer review amazon ba...
68865,111581,B00K3YPOO0,Brightest Black Light Flashlight on Amazon- UV...,[],A11J1FHCK5U06J,4.0,Now I know exactly where the trouble spots are...,2014-05-23,know exactly trouble spot sniffing guess invis...
68866,111585,B00K3YPOO0,Brightest Black Light Flashlight on Amazon- UV...,[],A18JF0T0GOCORW,4.0,I use this light to help me find stains when I...,2014-05-24,use light help stain carpet clean pre treat ca...
68867,111595,B00K7EG97C,Nutro Crunchy Dog Treats with Real Mixed Berri...,"['Pet Supplies', 'Dogs', 'Treats', 'Cookies, B...",A3GRPCW9DG427Z,5.0,We are owned by the 3 pickiest pooches in the ...,2013-07-27,pickiest pooch world love fool reject doggie t...
68868,111598,B00K7EG97C,Nutro Crunchy Dog Treats with Real Mixed Berri...,"['Pet Supplies', 'Dogs', 'Treats', 'Cookies, B...",A2X6TLAX3JEO1A,5.0,My highly allergic white boxer loves these tre...,2014-05-09,highly allergic white boxer love treat meat co...
68869,111602,B00KJGFGFO,Curry Brush with Coarse or Fine Bristles. High...,[],A9PG9ODPPP31N,5.0,Works great on my medium sized dog. She has ve...,2014-07-09,work great medium size dog coarse hair work gr...


# Train Doc2Vec Model (User & Item)

In [4]:
logging.basicConfig(level=logging.DEBUG)

In [5]:
# read params
params = yaml.safe_load(open("params.yaml"))["generate_vectors"]
MODEL_PARAMS = params["d2v_params"]

print(MODEL_PARAMS)

{'dm': 1, 'vector_size': 50, 'min_count': 1, 'negative': 5, 'ns_exponent': 0.5, 'sample': 1e-05, 'workers': 8, 'epochs': 10}


In [6]:
train["processedReviewText"] = train["processedReviewText"].progress_apply(lambda x: x.split())

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 68870/68870 [00:00<00:00, 173894.29it/s]


In [7]:
item_corpus = [
    TaggedDocument(review, [asin])
    for asin, review in list(zip(train["asin"], train["processedReviewText"]))
]

user_corpus = [
    TaggedDocument(review, [reviewerID])
    for reviewerID, review in list(zip(train["reviewerID"], train["processedReviewText"]))
]

In [8]:
train_corpus = item_corpus + user_corpus

## Training Doc2Vec

In [9]:
d2v = Doc2Vec(**MODEL_PARAMS)
d2v.build_vocab(train_corpus)
d2v.train(train_corpus, total_examples=d2v.corpus_count, epochs=d2v.epochs)

DEBUG:gensim.utils:starting a new internal lifecycle event log for Doc2Vec
INFO:gensim.utils:Doc2Vec lifecycle event {'params': 'Doc2Vec(dm/m,d50,n5,w5,s1e-05,t8)', 'datetime': '2021-09-10T02:30:36.639826', 'gensim': '4.0.1', 'python': '3.9.6 (default, Jun 29 2021, 05:25:02) \n[Clang 12.0.5 (clang-1205.0.22.9)]', 'platform': 'macOS-11.4-x86_64-i386-64bit', 'event': 'created'}
INFO:gensim.models.doc2vec:collecting all words and their counts
INFO:gensim.models.doc2vec:PROGRESS: at example #0, processed 0 words (0/s), 0 word types, 0 tags
INFO:gensim.models.doc2vec:PROGRESS: at example #10000, processed 328019 words (4456260/s), 10456 word types, 296 tags
INFO:gensim.models.doc2vec:PROGRESS: at example #20000, processed 621905 words (4437470/s), 14219 word types, 588 tags
INFO:gensim.models.doc2vec:PROGRESS: at example #30000, processed 973691 words (4659207/s), 18951 word types, 1070 tags
INFO:gensim.models.doc2vec:PROGRESS: at example #40000, processed 1319005 words (1330613/s), 23818 w

DEBUG:gensim.models.word2vec:worker exiting, processed 60 jobs
INFO:gensim.models.word2vec:worker thread finished; awaiting finish of 1 more threads
DEBUG:gensim.models.word2vec:worker exiting, processed 62 jobs
INFO:gensim.models.word2vec:worker thread finished; awaiting finish of 0 more threads
INFO:gensim.models.word2vec:EPOCH - 2 : training on 4829848 raw words (1323651 effective words) took 9.5s, 139926 effective words/s
INFO:gensim.models.word2vec:EPOCH 3 - PROGRESS: at 10.21% examples, 123118 words/s, in_qsize 15, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 3 - PROGRESS: at 21.85% examples, 130019 words/s, in_qsize 15, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 3 - PROGRESS: at 33.12% examples, 134808 words/s, in_qsize 15, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 3 - PROGRESS: at 44.70% examples, 142333 words/s, in_qsize 15, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 3 - PROGRESS: at 55.86% examples, 144231 words/s, in_qsize 15, out_qsize 0
INFO:gensim.models.word2v

INFO:gensim.models.word2vec:worker thread finished; awaiting finish of 7 more threads
DEBUG:gensim.models.word2vec:worker exiting, processed 59 jobs
INFO:gensim.models.word2vec:worker thread finished; awaiting finish of 6 more threads
DEBUG:gensim.models.word2vec:worker exiting, processed 60 jobs
INFO:gensim.models.word2vec:worker thread finished; awaiting finish of 5 more threads
DEBUG:gensim.models.word2vec:worker exiting, processed 62 jobs
INFO:gensim.models.word2vec:worker thread finished; awaiting finish of 4 more threads
DEBUG:gensim.models.word2vec:worker exiting, processed 61 jobs
INFO:gensim.models.word2vec:worker thread finished; awaiting finish of 3 more threads
DEBUG:gensim.models.word2vec:worker exiting, processed 60 jobs
INFO:gensim.models.word2vec:worker thread finished; awaiting finish of 2 more threads
DEBUG:gensim.models.word2vec:worker exiting, processed 61 jobs
INFO:gensim.models.word2vec:worker thread finished; awaiting finish of 1 more threads
DEBUG:gensim.models.

INFO:gensim.models.word2vec:EPOCH 10 - PROGRESS: at 33.99% examples, 138487 words/s, in_qsize 15, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 10 - PROGRESS: at 45.09% examples, 143375 words/s, in_qsize 15, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 10 - PROGRESS: at 56.43% examples, 145628 words/s, in_qsize 15, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 10 - PROGRESS: at 68.11% examples, 143816 words/s, in_qsize 15, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 10 - PROGRESS: at 79.65% examples, 143744 words/s, in_qsize 15, out_qsize 0
INFO:gensim.models.word2vec:EPOCH 10 - PROGRESS: at 91.03% examples, 145345 words/s, in_qsize 16, out_qsize 0
DEBUG:gensim.models.word2vec:job loop exiting, total 485 jobs
DEBUG:gensim.models.word2vec:worker exiting, processed 60 jobs
INFO:gensim.models.word2vec:worker thread finished; awaiting finish of 7 more threads
DEBUG:gensim.models.word2vec:worker exiting, processed 61 jobs
INFO:gensim.models.word2vec:worker thread finished; awaiting finis

# Generating User & Item Embeddings

In [10]:
def generate_user_item_vectors(train: pd.DataFrame, d2v: Doc2Vec) -> tuple:
    """
    """
    # get unique users and items
    unique_users = train["reviewerID"].unique().tolist()
    unique_items = train["asin"].unique().tolist()
    
    # generating mapping
    user_idx_map = {j: unique_users[j] for j in range(len(unique_users))}
    item_idx_map = {k: unique_items[k] for k in range(len(unique_items))}
    user_vec_map = {j: d2v[j] for j in unique_users}
    item_vec_map = {k: d2v[k] for k in unique_items}
    
    # loading user d2v vectors into DF
    user_vecs = pd.DataFrame.from_dict(user_vec_map, orient='index')
    user_vecs.index.name = 'reviewerID'
    # loading item d2v vectors into DF
    item_vecs = pd.DataFrame.from_dict(item_vec_map, orient='index')
    item_vecs.index.name = 'asin'
    
    return user_idx_map, user_vecs, item_idx_map, item_vecs

In [11]:
user_idx_map, user_vecs, item_idx_map, item_vecs = generate_user_item_vectors(train, d2v)

In [12]:
# converting factors into numpy obj
user_factors = user_vecs.to_numpy()
item_factors = item_vecs.to_numpy()

In [13]:
# check user factors
user_factors[0,:]

array([-0.77922696, -0.04419504,  0.16107428, -0.25824603,  0.0884501 ,
       -0.02654092, -0.02497942,  0.16291593, -0.37637684,  0.30814305,
        0.11929291,  0.2186527 ,  0.18439731,  0.11816351,  0.08316469,
        0.12371369, -0.05493512,  0.11931185, -0.2154937 , -0.31036133,
        0.01955248,  0.26068544, -0.10980664, -0.10918962, -0.06849704,
        0.23774654, -0.25029686, -0.4825891 , -0.2773218 , -0.19737539,
        0.32247096, -0.10018329,  0.03507648,  0.11142119,  0.06777498,
        0.58456314, -0.11256726, -0.14702937,  0.5582978 ,  0.37698284,
       -0.06631536,  0.10135109, -0.05887937, -0.1210361 , -0.15826057,
        0.27591133,  0.21363966, -0.12117494,  0.00095104,  0.03555251],
      dtype=float32)

In [14]:
# check item factors
item_factors[0,:]

array([-0.26331735, -0.11056983, -0.03232048, -0.01091432, -0.038435  ,
        0.02057339,  0.05015131, -0.0178882 , -0.10059441,  0.05925798,
        0.05865845,  0.13821098,  0.04844033,  0.04657209, -0.06200987,
       -0.05522555, -0.03117626, -0.01920465, -0.0764898 ,  0.05243775,
       -0.00426404,  0.0231584 , -0.08311716, -0.10264607, -0.05266638,
        0.09226736, -0.0326032 , -0.15560375, -0.05665855, -0.13925622,
        0.07608208,  0.05124145, -0.03601347,  0.10328931,  0.00628529,
        0.13207851, -0.095514  , -0.15085706,  0.14196587,  0.00942813,
       -0.02785949,  0.07429376, -0.041956  , -0.01490974,  0.0295794 ,
        0.16168976,  0.11567512,  0.00170415, -0.05042413, -0.02102186],
      dtype=float32)

# Utility Functions

In [15]:
def get_top_n(predictions, n=10):
    """Return the top-N recommendation for each user from a set of predictions.

    Args:
        predictions(list of Prediction objects): The list of predictions, as
            returned by the test method of an algorithm.
        n(int): The number of recommendation to output for each user. Default
            is 10.

    Returns:
    A dict where keys are user (raw) ids and values are lists of tuples:
        [(raw item id, rating estimation), ...] of size n.
    """

    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in tqdm(predictions):
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in tqdm(top_n.items()):
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

def recall_at_k(asins, predicted_asins, k=10):
    # number of relevant items
    set_actual = set(asins)
    set_preds = set(predicted_asins)
    num_relevant = len(set_actual.intersection(set_preds))
    
    # calculating recall@K - relevant / total relevant items
    recall_at_k = num_relevant / len(asins)
    
    return recall_at_k

def novelty_at_k(item_popularity, predicted_asins, k=10):
    """
    """
    # finding avg novelty
    popularity_sum = item_popularity.loc[predicted_asins].sum()
    novelty_at_k = ((k*1) - popularity_sum) / k
    
    return novelty_at_k

def generate_item_popularity(train: pd.DataFrame) -> pd.DataFrame:
    """
    """
    
    # create a mapping of item popularatity
    # based on sum(item's review / max reviews) / no items
    max_reviews = (train.groupby(['asin'])
                   .agg({'processedReviewText': 'count'})
                   .max()
                   .values[0])
    item_popularity = (train.groupby(['asin'])
                       .agg({'processedReviewText': 'count'})
                       .apply(lambda x: x/max_reviews))
    
    return item_popularity
    

def evaluate_recommendations(top_ns: dict, user_rating_history: pd.DataFrame, item_popularity: pd.DataFrame, k=10) -> pd.DataFrame:
    """
    
    Args:
        top_ns
        user_rating_history
    """
    
    test_recommendations = pd.DataFrame(top_ns.items(), columns=["reviewerID", "pred_asin"])
    test_recommendations['pred_asin'] = test_recommendations['pred_asin'].apply(lambda x: [i[0] for i in x])
    
    # combined test history and recommendations
    test_merged = pd.merge(user_rating_history, test_recommendations, on="reviewerID", how="inner")
    
    # generating recall@k metrics
    test_merged["recall@k"] = test_merged.apply(lambda x: recall_at_k(x.asin, x.pred_asin, k=k), axis=1)
    test_merged["novelty@k"] = test_merged.apply(lambda x: novelty_at_k(item_popularity, x.pred_asin, k=k), axis=1)
    average_recall_at_k = test_merged["recall@k"].mean()
    average_novelty_at_k = test_merged["novelty@k"].mean()
    
    print(f"The MOD-ECF has an average recall@{k}: {average_recall_at_k:.5f}, average novelty@{k}: {average_novelty_at_k:.5f}")
    
    return test_merged

# Generate N-Recommendations = {10, 25, 30, 45}

## Load Test Data

In [16]:
test = pd.read_csv(f"{DATA_PATH}/{CATEGORY}_test.csv")
test.head().append(test.tail())

Unnamed: 0,index,asin,title,categories,reviewerID,overall,reviewText,reviewTime,processedReviewText
0,1,1223000893,"Cat Sitter DVD Trilogy - Vol 1, Vol 2 and Vol 3",[],A39QHP5WLON5HV,5.0,There are usually one or more of my cats watch...,2013-09-14,usually cat watch tv stay trouble dvd play lik...
1,104,B00005MF9V,LitterMaid Universal Cat Privacy Tent (LMT100),"['Pet Supplies', 'Cats', 'Litter & Housebreaki...",A366V0GCEPH5CX,5.0,My cats love it and so do I. I no longer have ...,2013-02-02,cat love longer cat litter fly floor litter fl...
2,133,B00005MF9T,LitterMaid LM500 Automated Litter Box,"['Pet Supplies', 'Cats', 'Litter & Housebreaki...",ALWWS8QBYN80B,1.0,I have one female cat that weighs under 10 pou...,2004-11-17,female cat weigh pound year old use everclean ...
3,153,B00005MF9W,LitterMaid Waste Receptacles Automatic Litter ...,"['Pet Supplies', 'Cats', 'Litter & Housebreaki...",A3PVI3NE7OY1SP,5.0,I love these. They make the clean up so much e...,2013-09-26,love clean easy clean box manually use issue w...
4,154,B00005MF9W,LitterMaid Waste Receptacles Automatic Litter ...,"['Pet Supplies', 'Cats', 'Litter & Housebreaki...",A2H83XMHUVDLJY,4.0,"I love this litter box. I do not use the lids,...",2014-06-26,love litter box use lid use receptacle tear cr...
41564,111601,B00KJGFGFO,Curry Brush with Coarse or Fine Bristles. High...,[],AV34KNYW82YSS,4.0,Pulled lots of hair out of my Labs coat. Didn'...,2014-07-18,pulled lot hair labs coat think prove wrong co...
41565,111603,B00KJGFGFO,Curry Brush with Coarse or Fine Bristles. High...,[],A1YMNTFLNDYQ1F,5.0,I have been trying to find a rubber bristle br...,2014-07-16,try rubber bristle brush persian year lose glo...
41566,111604,B00KJGFGFO,Curry Brush with Coarse or Fine Bristles. High...,[],A1FQ3HRVXA4A5B,5.0,Great product to use on your pets knowing this...,2014-07-11,great product use pet know gentle rubber damag...
41567,111605,B00KJGFGFO,Curry Brush with Coarse or Fine Bristles. High...,[],A3OP6CI0XCRQXO,5.0,I bought a second one because I have two cats ...,2014-07-22,buy second cat american short hair buy brush m...
41568,111606,B00KJGFGFO,Curry Brush with Coarse or Fine Bristles. High...,[],A11LC938XF35XN,5.0,Our dogs love getting brushed with this. It m...,2014-07-17,dog love brush massage remove heavy undercoat ...


In [17]:
# generating test history
test_user_history = (pd.DataFrame(test.groupby(['reviewerID'])['asin']
                                  .apply(list).reset_index()))

In [18]:
print(test_user_history)

                  reviewerID                                  asin
0      A04173782GDZSQ91AJ7OD              [B0090Z9AYS, B00CPDWT2M]
1      A042274212BJJVOBS4Q85              [B005AZ4M3Q, B00771WQIY]
2       A0436342QLT4257JODYJ  [B0018CDR68, B003SJTM8Q, B00474A3DY]
3      A04795073FIBKY8GSLZYI              [B001PKT30M, B005DGI2RY]
4      A06658082A27F4VB5UG8E              [B000TZ1TTM, B0019VUHH0]
...                      ...                                   ...
18993          AZYJE40XW6MFG              [B00HVAKJZS, B00IDZT294]
18994          AZZ56WF4X19G2                          [B004A7X218]
18995          AZZNK89PXD006  [B0002DHV16, B005BP8MQ8, B009RTX4SU]
18996          AZZV9PDNMCOZW              [B007EQL390, B00ISBWVT6]
18997          AZZYW4YOE1B6E  [B0002AQPA2, B0002AQPA2, B0002ARQV4]

[18998 rows x 2 columns]


# Preparing Dataset for Surprise's Algorithm

In [19]:
# create reader
reader = Reader(rating_scale=(1,5))
# generate data required for surprise
data = Dataset.load_from_df(train[["reviewerID", "asin", "overall"]], reader)
# generating trainset
trainset = data.build_full_trainset()

# Instantiate Pre-Initialised Matrix Factorization (Paragraph Vector)

In [20]:
# instantiating mod-ecf
mod_ecf = cf.PreInitialisedMF(user_map=user_idx_map,
                              item_map=item_idx_map,
                              user_factor=user_factors,
                              item_factor=item_factors,
                              learning_rate=LR_ALL,
                              beta=BETA,
                              num_epochs=N_EPOCHS,
                              num_factors=50)

In [21]:
%%time
# fitting to training data
mod_ecf.fit(trainset, verbose=True)

Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
CPU times: user 5min 38s, sys: 996 ms, total: 5min 39s
Wall time: 5min 40s


In [22]:
%%time
# generate candidate items for user to predict rating
testset = trainset.build_anti_testset()

CPU times: user 44.9 s, sys: 1.94 s, total: 46.8 s
Wall time: 47 s


In [23]:
%%time
# predict ratings for all pairs (u, i) that are NOT in the training set
candidate_items = mod_ecf.test(testset, verbose=False)

CPU times: user 12min 3s, sys: 5min 35s, total: 17min 38s
Wall time: 19min 49s


## Loop through N = {10, 25, 30, 45}

In [24]:
# generate item popularity
item_popularity = generate_item_popularity(train)

In [25]:
n_recommendations = {}
for n in [10, 25, 30, 45]:
    # retrieve the top-n items based on similarities
    top_ns = get_top_n(candidate_items, n)
    # evaluate how well the recommended items predicted the future purchases
    n_recommended_items = evaluate_recommendations(top_ns, test_user_history, item_popularity, n)
    # saving the n-value and recommended items
    n_recommendations[n] = (top_ns, n_recommended_items)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 92907537/92907537 [01:38<00:00, 939765.87it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 19058/19058 [02:03<00:00, 154.67it/s]


The MOD-ECF has an average recall@10: 0.00912, average novelty@10: 0.94959


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 92907537/92907537 [01:57<00:00, 793229.43it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 19058/19058 [01:05<00:00, 292.68it/s]


The MOD-ECF has an average recall@25: 0.01850, average novelty@25: 0.95902


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 92907537/92907537 [02:05<00:00, 739274.68it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 19058/19058 [01:13<00:00, 258.95it/s]


The MOD-ECF has an average recall@30: 0.02161, average novelty@30: 0.96069


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 92907537/92907537 [01:56<00:00, 795803.94it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 19058/19058 [01:13<00:00, 260.67it/s]


The MOD-ECF has an average recall@45: 0.02921, average novelty@45: 0.96408


# Evaluate N-Recommendations

In [26]:
def retrieve_recommendations(train: pd.DataFrame, top_ns: dict):
    """
    """
    # generating a random user
    random_user = np.random.choice(list(train['reviewerID'].unique()), 1)[0]
    print(f"For user: {random_user}:")
    print(f"Purchase History:\n{train[train['reviewerID'] == random_user][['asin', 'title']]}")

    # find the recommendations
    print(f"\nRecommending:\n")
    recommendations = (train[train['asin']
                             .isin([i[0] for i in top_ns[random_user]])][['asin', 'title']]
                       .drop_duplicates(subset='asin')
                       .set_index('asin'))
    print(f"{recommendations.loc[[i[0] for i in top_ns[random_user]]].reset_index()}")

## N=10

In [27]:
top_ns_10 = n_recommendations[10][0]
retrieve_recommendations(train, top_ns_10)

For user: A567QYRZ56S3N:
Purchase History:
             asin                                          title
27728  B0006JKCN0          KONG Frog Dog Toy, Extra Small, Green
62945  B0057XF9R4  Catit Design Senses Illuminated Ball - 2-Pack

Recommending:

         asin                                              title
0  B000F4AVPA                                Chuckit! Ultra Ball
1  B0017J8NDY  Mammoth Flossy Chews Cottonblend Color 5-Knot ...
2  B00168OD80                   Coleman Pterodactyl Dino Dog Toy
3  B000FWAP8A  GoCat Da Bird Rod and Feather Cat Toy, Handmad...
4  B0014AOC68                                         Smart Ramp
5  B000BART6M  Planet Dog Orbee Tuff Diamond Plate Dog Ball, ...
6  B000K9JRH8  GoCat DaBird Feather Refill, Assorted Colors, ...
7  B000OWXA4C  JW Pet Company Activitoy Disco Ball Small Bird...
8  B004IN9NAS                Pet Studio Pine Frame Dog RampSteps
9  B0002DGVY4  Herm Sprenger Pet Supply Imports Chrome Plated...


## N=25

In [28]:
top_ns_25 = n_recommendations[25][0]
retrieve_recommendations(train, top_ns_25)

For user: ASFQXOU7XHTDV:
Purchase History:
             asin                                              title
45197  B0016ZP4B8  PoochieBells Original Housetraining &amp; Pott...
64474  B005VEWAN0  Dental Teeth Cleaning Chew Toys for Small Dogs...

Recommending:

          asin                                              title
0   B0002DHV16           Cat Dancer - Cat Charmer Wand Teaser Toy
1   B0029NVJFQ  Whiskas Temptations Creamy Dairy Flavour Treat...
2   B0002DGVY4  Herm Sprenger Pet Supply Imports Chrome Plated...
3   B005799UUK                              Redbarn Bully Springs
4   B000084F4T  Purina Pro Plan Focus Weight Management Chicke...
5   B00025Z6Q6  Tetra TetraCichlid Balanced Diet Flakes Food f...
6   B00008434T                Ticked Off Pets Tick Remover, White
7   B00027466A                            Chuckit! Travel Dog Bed
8   B000AAIAJS                             Vitakraft Rabbit Slims
9   B0000AH3UK            Merrick Flossies Tendon Chews Pack of 5
10  B001

## N=30

In [29]:
top_ns_30 = n_recommendations[30][0]
retrieve_recommendations(train, top_ns_30)

For user: AYVLWK24S0E8D:
Purchase History:
             asin                                              title
35458  B000GA75RK  33 Pack GRIDLOCK 24&rdquo; x 24&rdquo; Puppy D...
35609  B000GDXHQ0  Handi-Drink Dog Water Bottle- 17 Oz - The Port...

Recommending:

          asin                                              title
0   B00008434T                Ticked Off Pets Tick Remover, White
1   B006SO16N2  ZippyPaws Dog Poop Waste Pick-Up Bags with Han...
2   B000NSGKYY                Litter Locker Refill Cartridge 5 pk
3   B0002DHV16           Cat Dancer - Cat Charmer Wand Teaser Toy
4   B006JRRRQI  IRIS Airtight Pet Food Container, 50-Pound, Cl...
5   B003B3S3TS  EZwhelp 27&quot; x 32&quot; Machine Washable, ...
6   B000PKSW5A  Precious Cat Dr. Elsey's Kitten Attract Scoopa...
7   B00H7PY3JA  Petseer Pet Odor Eliminator and Stain Remover ...
8   B000084F4T  Purina Pro Plan Focus Weight Management Chicke...
9   B00027466A                            Chuckit! Travel Dog Bed
10  B000

## N=45