In [1]:
from collections import defaultdict
from itertools import chain
import logging

import numpy as np
import pandas as pd
import warnings
import yaml

from gensim.models.doc2vec import Doc2Vec, TaggedDocument
from pandarallel import pandarallel
from surprise import Dataset, Reader
from tqdm import tqdm

from src.models import cf

pandarallel.initialize()
tqdm.pandas()
warnings.filterwarnings('ignore')



INFO: Pandarallel will run on 8 workers.
INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.


# Load Data and Models

In [2]:
# global variables
DATA_PATH = "data/evaluation"
# D2V_PATH = "models/d2v"
CATEGORY = "Grocery_and_Gourmet_Food"

# training parameters
N_EPOCHS = 10
LR_ALL = 0.005
BETA = 0.1

train = pd.read_csv(f"{DATA_PATH}/{CATEGORY}_train.csv")

In [3]:
# checking train dataframe
train.head().append(train.tail())

Unnamed: 0,index,asin,title,categories,reviewerID,overall,reviewText,reviewTime,processedReviewText
0,0,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A23RYWDS884TUL,5.0,This curry paste makes a delicious curry. I j...,2013-05-28,curry paste delicious curry fry chicken vegeta...
1,1,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A945RBQWGZXCK,5.0,I've purchased different curries in the grocer...,2012-09-17,purchase different curry grocery store complet...
2,3,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A3AMNY44OP8AOU,4.0,I started a new diet restricting all added sug...,2014-01-23,start new diet restrict added sugar brand suga...
3,4,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A3IB4CQ2QEJLJ8,5.0,So many flavors. I can't begin to tell you how...,2014-04-27,flavor begin tell love mae ploy curry ask reci...
4,5,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",AQA5DF3RWKETQ,5.0,I've used this a lot recently in some of my ch...,2012-11-27,use lot recently chicken dish use lot like spi...
47769,77420,B00I33696K,Reese's Miniature Peanut Butter Cups .31oz - 1...,"['Grocery & Gourmet Food', 'Candy & Chocolate'...",A192LQZWDYPR4U,5.0,Another quality Reese Peanut Butter Cup produc...,2014-02-27,quality reese peanut butter cup product great ...
47770,77421,B00I33696K,Reese's Miniature Peanut Butter Cups .31oz - 1...,"['Grocery & Gourmet Food', 'Candy & Chocolate'...",A2QKXW3LDQ66P5,5.0,I purchased these for my husband who has every...,2013-02-20,purchase husband love reeses valentine day pre...
47771,77430,B00ID9VSOM,"Viva Labs Organic Coconut Sugar: Non-GMO, Low-...","['Grocery & Gourmet Food', 'Cooking & Baking',...",A2P3TGJU301KXD,5.0,this stuff is INCREDIBILY yummy! SO much bette...,2014-07-15,stuff incredibily yummy good regular brown sug...
47772,77456,B00IRL93SY,Barrie House Kenya Estate - AA Single Cup Caps...,"['Grocery & Gourmet Food', 'Beverages', 'Coffe...",AEFE9VDHTQ199,5.0,"Very nice aroma, body and taste! Will buy this...",2014-05-24,nice aroma body taste buy coffee good coffee a...
47773,77508,B00ISVHJ3Y,"Wholesome Sweeteners, Organic Sweet and Lite S...","['Grocery & Gourmet Food', 'Cooking & Baking',...",A2AEZQ3DGBBLPR,2.0,This is a no go for diabetics according to my ...,2014-06-26,diabetic accord wife doctor order intention us...


# Train Doc2Vec Model (User & Item)

In [4]:
logging.basicConfig(level=logging.DEBUG)

In [5]:
# read params
params = yaml.safe_load(open("params.yaml"))["generate_vectors"]
MODEL_PARAMS = params["d2v_params"]

print(MODEL_PARAMS)

{'dm': 1, 'vector_size': 50, 'min_count': 1, 'negative': 5, 'ns_exponent': 0.5, 'sample': 1e-05, 'workers': 8, 'epochs': 10}


In [6]:
train["processedReviewText"] = train["processedReviewText"].progress_apply(lambda x: x.split())

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 47774/47774 [00:00<00:00, 169357.90it/s]


In [7]:
item_corpus = [
    TaggedDocument(review, [asin])
    for asin, review in list(zip(train["asin"], train["processedReviewText"]))
]

user_corpus = [
    TaggedDocument(review, [reviewerID])
    for reviewerID, review in list(zip(train["reviewerID"], train["processedReviewText"]))
]

In [8]:
train_corpus = item_corpus + user_corpus

## Training Doc2Vec

In [9]:
d2v = Doc2Vec(**MODEL_PARAMS)
d2v.build_vocab(train_corpus)
d2v.train(train_corpus, total_examples=d2v.corpus_count, epochs=d2v.epochs)

DEBUG:gensim.utils:starting a new internal lifecycle event log for Doc2Vec
INFO:gensim.utils:Doc2Vec lifecycle event {'params': 'Doc2Vec(dm/m,d50,n5,w5,s1e-05,t8)', 'datetime': '2021-09-09T22:05:44.571714', 'gensim': '4.0.1', 'python': '3.9.6 (default, Jun 29 2021, 05:25:02) \n[Clang 12.0.5 (clang-1205.0.22.9)]', 'platform': 'macOS-11.4-x86_64-i386-64bit', 'event': 'created'}
INFO:gensim.models.doc2vec:collecting all words and their counts
INFO:gensim.models.doc2vec:PROGRESS: at example #0, processed 0 words (0/s), 0 word types, 0 tags
INFO:gensim.models.doc2vec:PROGRESS: at example #10000, processed 364241 words (4307179/s), 14898 word types, 904 tags
INFO:gensim.models.doc2vec:PROGRESS: at example #20000, processed 744636 words (4586721/s), 21785 word types, 1940 tags
INFO:gensim.models.doc2vec:PROGRESS: at example #30000, processed 1142910 words (4372544/s), 27143 word types, 2955 tags
INFO:gensim.models.doc2vec:PROGRESS: at example #40000, processed 1580118 words (4593124/s), 31844

INFO:gensim.models.word2vec:EPOCH 3 - PROGRESS: at 92.74% examples, 154512 words/s, in_qsize 15, out_qsize 0
DEBUG:gensim.models.word2vec:job loop exiting, total 391 jobs
DEBUG:gensim.models.word2vec:worker exiting, processed 49 jobs
INFO:gensim.models.word2vec:worker thread finished; awaiting finish of 7 more threads
DEBUG:gensim.models.word2vec:worker exiting, processed 47 jobs
INFO:gensim.models.word2vec:worker thread finished; awaiting finish of 6 more threads
DEBUG:gensim.models.word2vec:worker exiting, processed 48 jobs
DEBUG:gensim.models.word2vec:worker exiting, processed 51 jobs
INFO:gensim.models.word2vec:worker thread finished; awaiting finish of 5 more threads
INFO:gensim.models.word2vec:worker thread finished; awaiting finish of 4 more threads
DEBUG:gensim.models.word2vec:worker exiting, processed 48 jobs
INFO:gensim.models.word2vec:worker thread finished; awaiting finish of 3 more threads
DEBUG:gensim.models.word2vec:worker exiting, processed 49 jobs
INFO:gensim.models.wo

DEBUG:gensim.models.word2vec:job loop exiting, total 391 jobs
DEBUG:gensim.models.word2vec:worker exiting, processed 49 jobs
INFO:gensim.models.word2vec:worker thread finished; awaiting finish of 7 more threads
DEBUG:gensim.models.word2vec:worker exiting, processed 48 jobs
INFO:gensim.models.word2vec:worker thread finished; awaiting finish of 6 more threads
DEBUG:gensim.models.word2vec:worker exiting, processed 50 jobs
INFO:gensim.models.word2vec:worker thread finished; awaiting finish of 5 more threads
DEBUG:gensim.models.word2vec:worker exiting, processed 48 jobs
INFO:gensim.models.word2vec:worker thread finished; awaiting finish of 4 more threads
DEBUG:gensim.models.word2vec:worker exiting, processed 49 jobs
INFO:gensim.models.word2vec:worker thread finished; awaiting finish of 3 more threads
DEBUG:gensim.models.word2vec:worker exiting, processed 48 jobs
INFO:gensim.models.word2vec:worker thread finished; awaiting finish of 2 more threads
DEBUG:gensim.models.word2vec:worker exiting,

# Generating User & Item Embeddings

In [10]:
def generate_user_item_vectors(train: pd.DataFrame, d2v: Doc2Vec) -> tuple:
    """
    """
    # get unique users and items
    unique_users = train["reviewerID"].unique().tolist()
    unique_items = train["asin"].unique().tolist()
    
    # generating mapping
    user_idx_map = {j: unique_users[j] for j in range(len(unique_users))}
    item_idx_map = {k: unique_items[k] for k in range(len(unique_items))}
    user_vec_map = {j: d2v[j] for j in unique_users}
    item_vec_map = {k: d2v[k] for k in unique_items}
    
    # loading user d2v vectors into DF
    user_vecs = pd.DataFrame.from_dict(user_vec_map, orient='index')
    user_vecs.index.name = 'reviewerID'
    # loading item d2v vectors into DF
    item_vecs = pd.DataFrame.from_dict(item_vec_map, orient='index')
    item_vecs.index.name = 'asin'
    
    return user_idx_map, user_vecs, item_idx_map, item_vecs

In [11]:
user_idx_map, user_vecs, item_idx_map, item_vecs = generate_user_item_vectors(train, d2v)

In [12]:
# converting factors into numpy obj
user_factors = user_vecs.to_numpy()
item_factors = item_vecs.to_numpy()

In [13]:
# check user factors
user_factors[0,:]

array([-2.01698896e-02, -2.05447176e-03,  1.21297427e-02,  3.04938573e-03,
       -1.86814126e-02, -1.15144029e-02, -1.86024932e-03,  1.28131651e-05,
       -4.64737043e-03,  2.47492287e-02,  1.84277538e-02,  1.81788150e-02,
       -4.60735559e-02,  2.87961550e-02,  2.25295182e-02,  5.06839342e-03,
        4.18663323e-02, -5.84903080e-03, -1.63989756e-02, -1.16623342e-02,
       -2.44675148e-02, -1.59855280e-02,  2.32356042e-02, -7.04046618e-03,
       -9.83982906e-03,  4.68950458e-02, -2.27734465e-02, -6.77542342e-03,
       -4.08900753e-02,  7.80975679e-04,  2.66978592e-02,  4.06152243e-03,
       -1.05919398e-03,  2.53511015e-02, -1.13817658e-02,  4.11136402e-03,
       -1.51395975e-02,  5.51014207e-04,  3.13999839e-02,  8.56930390e-03,
        3.15239374e-03, -3.12041957e-03, -1.42149944e-02, -2.83877309e-02,
        6.17698813e-03, -3.86410858e-03,  2.08088532e-02, -2.22205985e-02,
        3.04410909e-03,  1.07085165e-02], dtype=float32)

In [14]:
# check item factors
item_factors[0,:]

array([-0.33036214,  0.09933216, -0.08279146,  0.20955645, -0.09724324,
        0.22373076, -0.04580458,  0.03147466,  0.25918925,  0.02754547,
        0.02591094, -0.00948335, -0.22754323, -0.06776297,  0.25926092,
       -0.2728187 ,  0.36369812, -0.13113324, -0.02920303, -0.22315627,
        0.10133823,  0.03130703,  0.13151968, -0.04523718, -0.18187846,
        0.3499963 , -0.3315193 ,  0.14636506, -0.369077  ,  0.06127454,
        0.0188331 ,  0.09504317, -0.11739492,  0.11775835, -0.00802265,
        0.00041661, -0.30812517, -0.11272635,  0.04930919,  0.3568229 ,
       -0.32372668, -0.25754696, -0.03834489, -0.09389088, -0.08689753,
       -0.21286209, -0.17648485, -0.0183565 ,  0.13319981, -0.15399422],
      dtype=float32)

# Utility Functions

In [15]:
def get_top_n(predictions, n=10):
    """Return the top-N recommendation for each user from a set of predictions.

    Args:
        predictions(list of Prediction objects): The list of predictions, as
            returned by the test method of an algorithm.
        n(int): The number of recommendation to output for each user. Default
            is 10.

    Returns:
    A dict where keys are user (raw) ids and values are lists of tuples:
        [(raw item id, rating estimation), ...] of size n.
    """

    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in tqdm(predictions):
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in tqdm(top_n.items()):
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

def recall_at_k(asins, predicted_asins, k=10):
    # number of relevant items
    set_actual = set(asins)
    set_preds = set(predicted_asins)
    num_relevant = len(set_actual.intersection(set_preds))
    
    # calculating recall@K - relevant / total relevant items
    recall_at_k = num_relevant / len(asins)
    
    return recall_at_k

def novelty_at_k(item_popularity, predicted_asins, k=10):
    """
    """
    # finding avg novelty
    popularity_sum = item_popularity.loc[predicted_asins].sum()
    novelty_at_k = ((k*1) - popularity_sum) / k
    
    return novelty_at_k

def generate_item_popularity(train: pd.DataFrame) -> pd.DataFrame:
    """
    """
    
    # create a mapping of item popularatity
    # based on sum(item's review / max reviews) / no items
    max_reviews = (train.groupby(['asin'])
                   .agg({'processedReviewText': 'count'})
                   .max()
                   .values[0])
    item_popularity = (train.groupby(['asin'])
                       .agg({'processedReviewText': 'count'})
                       .apply(lambda x: x/max_reviews))
    
    return item_popularity
    

def evaluate_recommendations(top_ns: dict, user_rating_history: pd.DataFrame, item_popularity: pd.DataFrame, k=10) -> pd.DataFrame:
    """
    
    Args:
        top_ns
        user_rating_history
    """
    
    test_recommendations = pd.DataFrame(top_ns.items(), columns=["reviewerID", "pred_asin"])
    test_recommendations['pred_asin'] = test_recommendations['pred_asin'].apply(lambda x: [i[0] for i in x])
    
    # combined test history and recommendations
    test_merged = pd.merge(user_rating_history, test_recommendations, on="reviewerID", how="inner")
    
    # generating recall@k metrics
    test_merged["recall@k"] = test_merged.apply(lambda x: recall_at_k(x.asin, x.pred_asin, k=k), axis=1)
    test_merged["novelty@k"] = test_merged.apply(lambda x: novelty_at_k(item_popularity, x.pred_asin, k=k), axis=1)
    average_recall_at_k = test_merged["recall@k"].mean()
    average_novelty_at_k = test_merged["novelty@k"].mean()
    
    print(f"The MOD-ECF has an average recall@{k}: {average_recall_at_k:.5f}, average novelty@{k}: {average_novelty_at_k:.5f}")
    
    return test_merged

# Generate N-Recommendations = {10, 25, 30, 45}

## Load Test Data

In [16]:
test = pd.read_csv(f"{DATA_PATH}/{CATEGORY}_test.csv")
test.head().append(test.tail())

Unnamed: 0,index,asin,title,categories,reviewerID,overall,reviewText,reviewTime,processedReviewText
0,2,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A1TCSC0YWT82Q0,5.0,I love ethnic foods and to cook them. I recent...,2013-08-03,love ethnic food cook recently purchase produc...
1,8,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A1Z7Y2GMAP9SRY,5.0,I like to make my own curry but this is a tast...,2014-06-27,like curry tasty alternative use base kind dif...
2,23,B00004S1C5,"Ateco Food Coloring Kit, 6 colors","['Grocery & Gourmet Food', 'Cooking & Baking',...",A14YSMLYLJEMET,1.0,This product is no where near natural / organi...,2013-03-29,product near natural organic wish review purch...
3,31,B00005344V,Traditional Medicinals Organic Breathe Easy Se...,"['Grocery & Gourmet Food', 'Beverages', 'Coffe...",A2F488C4PLWGEI,5.0,If my wife drinks a cup of this tea when she f...,2014-03-23,wife drink cup tea feel attack come help avoid...
4,32,B00005344V,Traditional Medicinals Organic Breathe Easy Se...,"['Grocery & Gourmet Food', 'Beverages', 'Coffe...",AO1HXV7DWZZIR,5.0,I don't know about the medicinal aspects of th...,2014-02-06,know medicinal aspect tea flavor downright scr...
28001,77519,B00ISVHJ3Y,"Wholesome Sweeteners, Organic Sweet and Lite S...","['Grocery & Gourmet Food', 'Cooking & Baking',...",A1WT3TVHANP7ZF,3.0,Hmmm. I really wanted to love this sweetener. ...,2014-07-22,hmmm want love sweetener half sugar half stevi...
28002,77520,B00ISVHJ3Y,"Wholesome Sweeteners, Organic Sweet and Lite S...","['Grocery & Gourmet Food', 'Cooking & Baking',...",A3NEAETOSXDBOM,5.0,"I confess I have a sweet tooth, and love the t...",2014-06-30,confess sweet tooth love taste sugar recognize...
28003,77521,B00ISVHJ3Y,"Wholesome Sweeteners, Organic Sweet and Lite S...","['Grocery & Gourmet Food', 'Cooking & Baking',...",AD1ZOPB0BBEHB,4.0,"It has a little of the stevia aftertaste, but ...",2014-07-17,little stevia aftertaste fair compromise able ...
28004,77522,B00ISVHJ3Y,"Wholesome Sweeteners, Organic Sweet and Lite S...","['Grocery & Gourmet Food', 'Cooking & Baking',...",A18ECVX2RJ7HUE,5.0,i love marinade for grilled flank steak or lon...,2014-05-30,love marinade grilled flank steak london broil...
28005,77523,B00ISVHJ3Y,"Wholesome Sweeteners, Organic Sweet and Lite S...","['Grocery & Gourmet Food', 'Cooking & Baking',...",A2G04D4QZAXL15,3.0,I've been using Truvia (a form of stevia) on m...,2014-05-27,use truvia form stevia cereal greek yogurt yea...


In [17]:
# generating test history
test_user_history = (pd.DataFrame(test.groupby(['reviewerID'])['asin']
                                  .apply(list).reset_index()))

In [18]:
print(test_user_history)

                  reviewerID  \
0      A00177463W0XWB16A9O05   
1      A022899328A0QROR32DCT   
2      A068255029AHTHDXZURNU   
3      A06944662TFWOKKV4GJKX   
4             A1004703RC79J9   
...                      ...   
13274          AZWRZZAMX90VT   
13275          AZXKAH2DE6C8A   
13276          AZXON596A1VXC   
13277          AZYXC63SS008M   
13278          AZZ5ASC403N74   

                                                    asin  
0                               [B00474OR8G, B00BFM6OAW]  
1                                           [B00CMQDKES]  
2                                           [B001FA1K2G]  
3                                           [B000GFYRHG]  
4                                           [B003GTR8IO]  
...                                                  ...  
13274  [B0007R9L4M, B000CN7BMA, B001EQ5D1K, B002VT3GX...  
13275   [B000MAK41I, B004X8TJP2, B006H34CUS, B007W14RMM]  
13276                           [B001EO5S0I, B00271QQ7Q]  
13277                    

# Preparing Dataset for Surprise's Algorithm

In [19]:
# create reader
reader = Reader(rating_scale=(1,5))
# generate data required for surprise
data = Dataset.load_from_df(train[["reviewerID", "asin", "overall"]], reader)
# generating trainset
trainset = data.build_full_trainset()

# Instantiate Pre-Initialised Matrix Factorization (Paragraph Vector)

In [20]:
# instantiating mod-ecf
mod_ecf = cf.PreInitialisedMF(user_map=user_idx_map,
                              item_map=item_idx_map,
                              user_factor=user_factors,
                              item_factor=item_factors,
                              learning_rate=LR_ALL,
                              beta=BETA,
                              num_epochs=N_EPOCHS,
                              num_factors=50)

In [21]:
%%time
# fitting to training data
mod_ecf.fit(trainset, verbose=True)

Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
CPU times: user 4min 7s, sys: 1.54 s, total: 4min 8s
Wall time: 4min 10s


In [22]:
%%time
# generate candidate items for user to predict rating
testset = trainset.build_anti_testset()

CPU times: user 30.5 s, sys: 1.46 s, total: 31.9 s
Wall time: 32.1 s


In [23]:
%%time
# predict ratings for all pairs (u, i) that are NOT in the training set
candidate_items = mod_ecf.test(testset, verbose=False)

CPU times: user 8min 1s, sys: 1min 50s, total: 9min 52s
Wall time: 10min 25s


## Loop through N = {10, 25, 30, 45}

In [24]:
# generate item popularity
item_popularity = generate_item_popularity(train)

In [25]:
n_recommendations = {}
for n in [10, 25, 30, 45]:
    # retrieve the top-n items based on similarities
    top_ns = get_top_n(candidate_items, n)
    # evaluate how well the recommended items predicted the future purchases
    n_recommended_items = evaluate_recommendations(top_ns, test_user_history, item_popularity, n)
    # saving the n-value and recommended items
    n_recommendations[n] = (top_ns, n_recommended_items)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63307346/63307346 [00:43<00:00, 1471206.50it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 13397/13397 [00:47<00:00, 283.71it/s]


The MOD-ECF has an average recall@10: 0.00489, average novelty@10: 0.96466


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63307346/63307346 [01:07<00:00, 938558.24it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 13397/13397 [00:47<00:00, 283.26it/s]


The MOD-ECF has an average recall@25: 0.01317, average novelty@25: 0.96496


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63307346/63307346 [01:11<00:00, 882757.13it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 13397/13397 [00:47<00:00, 280.74it/s]


The MOD-ECF has an average recall@30: 0.01524, average novelty@30: 0.96514


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63307346/63307346 [01:12<00:00, 875510.25it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 13397/13397 [00:51<00:00, 261.95it/s]


The MOD-ECF has an average recall@45: 0.02274, average novelty@45: 0.96588


# Evaluate N-Recommendations

In [26]:
def retrieve_recommendations(train: pd.DataFrame, top_ns: dict):
    """
    """
    # generating a random user
    random_user = np.random.choice(list(train['reviewerID'].unique()), 1)[0]
    print(f"For user: {random_user}:")
    print(f"Purchase History:\n{train[train['reviewerID'] == random_user][['asin', 'title']]}")

    # find the recommendations
    print(f"\nRecommending:\n")
    recommendations = (train[train['asin']
                             .isin([i[0] for i in top_ns[random_user]])][['asin', 'title']]
                       .drop_duplicates(subset='asin')
                       .set_index('asin'))
    print(f"{recommendations.loc[[i[0] for i in top_ns[random_user]]].reset_index()}")

## N=10

In [27]:
top_ns_10 = n_recommendations[10][0]
retrieve_recommendations(train, top_ns_10)

For user: A2IYX5W3A1N3VB:
Purchase History:
             asin                                              title
42065  B005V9YXTO  Eat Your Vegetables Sea Salt Veggie Chips, 4.5...
46597  B00B18PAWI  Nestle Skinny Cow Divine Filled Chocolates, Ca...

Recommending:

         asin                                              title
0  B004IX03MK  Wedderspoon 100% Raw Organic Dandelion Honey, ...
1  B004YTV5S4  Navitas Organics Hemp Seeds, 8 oz. Bags (Pack ...
2  B005EF0HTK           Celebrate Good Times Gift Basket by ig4U
3  B004DIR3TQ  SCHARFEEN BERGER Artisan Chocolate Bars, Bitte...
4  B00B8DSFXC  Kevala Organic Black Raw and Unhulled Sesame S...
5  B002RBRY0Y  Lindt LINDOR Dark Chocolate Truffles, Kosher, ...
6  B005EF0HTA         Celebrate with a Crowd Gift Basket by ig4U
7  B0029JASWA  Dove Dark Chocolate Promises, 9.5-Ounce Packag...
8  B00474OQWI  Caribou Coffee, Caribou Blend Decaf, K-Cup Por...
9  B006IOKA9S  San Francisco Bay OneCup Decaf French Roast (3...


## N=25

In [28]:
top_ns_25 = n_recommendations[25][0]
retrieve_recommendations(train, top_ns_25)

For user: AWBMGLP57SAGK:
Purchase History:
             asin                                              title
22830  B001FSK3S0  Triscuit Crackers (Hint Of Salt, 9-Ounce Box, ...
27871  B0029JHHO2  Ricochet Candies with Xylitol, Grape Escape, 1...

Recommending:

          asin                                              title
0   B004IX03MK  Wedderspoon 100% Raw Organic Dandelion Honey, ...
1   B004YTV5S4  Navitas Organics Hemp Seeds, 8 oz. Bags (Pack ...
2   B00B8DSFXC  Kevala Organic Black Raw and Unhulled Sesame S...
3   B00856TSCC  Manitoba Harvest Hemp Hearts Raw Shelled Hemp ...
4   B001EQ5G1C  Manitoba Harvest Organic Hemp Hearts Raw Shell...
5   B003BI2EUW     YS BEE FARMS Raw Tupelo Blossom Honey, 13.5 OZ
6   B004334FPG        Bob's Red Mill White Sesame Seeds, 16-ounce
7   B008XCSXHO  Vitacost Organic Apple Cider Vinegar with ''Mo...
8   B004TDTZEG  MW Polar Herring, Hot Tomato Sauce, 6-Ounce (P...
9   B00B8DU1QQ           Kevala Organic Toasted Sesame Seeds 2Lbs
10  B003

## N=30

In [29]:
top_ns_30 = n_recommendations[30][0]
retrieve_recommendations(train, top_ns_30)

For user: A1RTSVWEXMKAR1:
Purchase History:
             asin                                              title
46316  B00A16P8X2  DOVE PROMISES Valentine Milk Chocolate and Str...

Recommending:

          asin                                              title
0   B0029J6QLM         Snickers Dark Chocolate Candy (Pack of 24)
1   B0029JES6W  M&amp;M'S Almond Chocolate Candy 9.9-Ounce Bag...
2   B000HDJZWO  Enjoy Life Baking Chocolate, Soy free, Nut fre...
3   B004DIR3TQ  SCHARFEEN BERGER Artisan Chocolate Bars, Bitte...
4   B002RBRY0Y  Lindt LINDOR Dark Chocolate Truffles, Kosher, ...
5   B000JMAVYO  Spicy World Almonds Whole (Natural and Raw), 4...
6   B00B8DSFXC  Kevala Organic Black Raw and Unhulled Sesame S...
7   B004YTV5S4  Navitas Organics Hemp Seeds, 8 oz. Bags (Pack ...
8   B001PIH3MY  Annie's Organic Cheddar Bunnies, Baked Snack C...
9   B0016BS3BK  Ghirardelli Double Chocolate Brownie Mix, 18-O...
10  B00DUQNFSU       Werther's Original Popcorn, Caramel, 8 Ounce
11  B004VL

## N=45

In [30]:
top_ns_45 = n_recommendations[45][0]
retrieve_recommendations(train, top_ns_45)

For user: ASF0R1CMSF26F:
Purchase History:
             asin                                              title
3154   B000CQE3HS  Slim Jim Giant Smoked Snack Sticks, Tabasco, ....
29957  B002NKPCZI  Peter Pan Creamy Honey Roast Peanut Spread, 16...
36161  B004K00DGC  Jamba Juice Energy Drink, Crisp Apple, 8.4-Oun...
41820  B005SPQENY  Chantea Aloe Vera Green Tea, Passion Fruit, 11...
43121  B006BXV14E  Kellogg's Frosted Mini-Wheats Little Bites Fla...
44829  B007K5KAJY    Nawgan Mandarin Orange, 11.5-Ounce (Pack of 24)
44985  B007POT6VI         Quaker Instant Oats Variety Pack, 48-Count
45325  B008JA73RG  V8 +Energy, Juice Drink with Green Tea, Peach ...
46564  B00B18PAWI  Nestle Skinny Cow Divine Filled Chocolates, Ca...
46897  B00BIEU5MK  Community Coffee Ground Coffee, French Vanilla...
47331  B00DBSG2NC         Keebler Jumbo Dark Fudge Sticks, 6.6 Ounce
47353  B00DBSGJ4E  Kellogg's Harvest Acres Fruit Snacks, Mixed Fr...
47430  B00DBSG3K4  Keebler El Duende Cookies, Coconut, 11 Ou