In [168]:
from collections import Counter

import numpy as np
import pandas as pd
from tqdm import tqdm

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import pairwise_distances

tqdm.pandas()

# Load Data

In [2]:
# global variable
DATA_PATH = "data/evaluation"
CATEGORY = "Grocery_and_Gourmet_Food"

In [3]:
train = pd.read_csv(f"{DATA_PATH}/{CATEGORY}_train.csv")

In [4]:
# head dataframe
train.head().append(train.tail())

Unnamed: 0,index,asin,title,categories,reviewerID,overall,reviewText,reviewTime,processedReviewText
0,0,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A23RYWDS884TUL,5.0,This curry paste makes a delicious curry. I j...,2013-05-28,curry paste delicious curry fry chicken vegeta...
1,1,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A945RBQWGZXCK,5.0,I've purchased different curries in the grocer...,2012-09-17,purchase different curry grocery store complet...
2,3,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A3AMNY44OP8AOU,4.0,I started a new diet restricting all added sug...,2014-01-23,start new diet restrict added sugar brand suga...
3,4,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A3IB4CQ2QEJLJ8,5.0,So many flavors. I can't begin to tell you how...,2014-04-27,flavor begin tell love mae ploy curry ask reci...
4,5,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",AQA5DF3RWKETQ,5.0,I've used this a lot recently in some of my ch...,2012-11-27,use lot recently chicken dish use lot like spi...
47769,77420,B00I33696K,Reese's Miniature Peanut Butter Cups .31oz - 1...,"['Grocery & Gourmet Food', 'Candy & Chocolate'...",A192LQZWDYPR4U,5.0,Another quality Reese Peanut Butter Cup produc...,2014-02-27,quality reese peanut butter cup product great ...
47770,77421,B00I33696K,Reese's Miniature Peanut Butter Cups .31oz - 1...,"['Grocery & Gourmet Food', 'Candy & Chocolate'...",A2QKXW3LDQ66P5,5.0,I purchased these for my husband who has every...,2013-02-20,purchase husband love reeses valentine day pre...
47771,77430,B00ID9VSOM,"Viva Labs Organic Coconut Sugar: Non-GMO, Low-...","['Grocery & Gourmet Food', 'Cooking & Baking',...",A2P3TGJU301KXD,5.0,this stuff is INCREDIBILY yummy! SO much bette...,2014-07-15,stuff incredibily yummy good regular brown sug...
47772,77456,B00IRL93SY,Barrie House Kenya Estate - AA Single Cup Caps...,"['Grocery & Gourmet Food', 'Beverages', 'Coffe...",AEFE9VDHTQ199,5.0,"Very nice aroma, body and taste! Will buy this...",2014-05-24,nice aroma body taste buy coffee good coffee a...
47773,77508,B00ISVHJ3Y,"Wholesome Sweeteners, Organic Sweet and Lite S...","['Grocery & Gourmet Food', 'Cooking & Baking',...",A2AEZQ3DGBBLPR,2.0,This is a no go for diabetics according to my ...,2014-06-26,diabetic accord wife doctor order intention us...


# Preparing Sparse Matrix

In [5]:
# normalize the rating given based on a deviation of ratings from average rating given by a user
# https://medium.com/sfu-cspmp/recommendation-systems-user-based-collaborative-filtering-using-n-nearest-neighbors-bf7361dc24e0

mean_user_ratings = train.groupby(['reviewerID'], as_index=False)['overall'].mean()
mean_user_ratings.columns = ['reviewerID', 'mean_overall']
train_trans = pd.merge(train, mean_user_ratings, on='reviewerID')
train_trans['dev_overall'] = train_trans['overall'] - train_trans['mean_overall']

# check first 5 rows
train_trans.head()

Unnamed: 0,index,asin,title,categories,reviewerID,overall,reviewText,reviewTime,processedReviewText,mean_overall,dev_overall
0,0,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A23RYWDS884TUL,5.0,This curry paste makes a delicious curry. I j...,2013-05-28,curry paste delicious curry fry chicken vegeta...,5.0,0.0
1,1,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A945RBQWGZXCK,5.0,I've purchased different curries in the grocer...,2012-09-17,purchase different curry grocery store complet...,5.0,0.0
2,3352,B000BD0SDU,REDMOND Real Sea Salt - Natural Unrefined Orga...,"['Grocery & Gourmet Food', 'Herbs, Spices & Se...",A945RBQWGZXCK,5.0,Great flavor - very healthy for you -- I think...,2012-06-08,great flavor healthy think flavor bring food f...,5.0,0.0
3,3,9742356831,"Mae Ploy Green Curry Paste, 14 oz","['Grocery & Gourmet Food', 'Sauces, Gravies & ...",A3AMNY44OP8AOU,4.0,I started a new diet restricting all added sug...,2014-01-23,start new diet restrict added sugar brand suga...,4.5,-0.5
4,64933,B006K3VO5Q,"Bergin Nut Company Organic Medjool Dates, 14 O...","['Grocery & Gourmet Food', 'Produce', 'Dried F...",A3AMNY44OP8AOU,5.0,These are the moistest dried dates I've ever h...,2014-01-23,moist dried date lovely snack handful raw almond,4.5,0.5


In [6]:
# creating pivot matrix
utility_matrix = train_trans.pivot_table(index='reviewerID', columns='asin', values='dev_overall')
utility_matrix

asin,9742356831,B00004S1C5,B00005344V,B0000CDEPD,B0000CFPI2,B0000CH39R,B0000CNU15,B0000CNU1S,B0000CNU1X,B0000DBN1H,...,B00HANG4VO,B00HDLZ072,B00HHFWJS2,B00HKGB9ZW,B00HQ3ZPJA,B00I08JNWU,B00I33696K,B00ID9VSOM,B00IRL93SY,B00ISVHJ3Y
reviewerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A00177463W0XWB16A9O05,,,,,,,,,,,...,,,,,,,,,,
A022899328A0QROR32DCT,,,,,,,,,,,...,,,,,,,,,,
A068255029AHTHDXZURNU,,,,,,,,,,,...,,,,,,,,,,
A06944662TFWOKKV4GJKX,,,,,,,,,,,...,,,,,,,,,,
A1004703RC79J9,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
AZWRZZAMX90VT,,,,,,,,,,,...,,,,,,,,,,
AZXKAH2DE6C8A,,,,,,,,,,,...,,,,,,,,,,
AZXON596A1VXC,,,,,,,,,,,...,,,,,,,,,,
AZYXC63SS008M,,,,,,,,,,,...,,,,,,,,,,


In [7]:
# replacing all NaN by item average
utility_matrix = utility_matrix.fillna(utility_matrix.mean(axis=0))

In [8]:
utility_matrix

asin,9742356831,B00004S1C5,B00005344V,B0000CDEPD,B0000CFPI2,B0000CH39R,B0000CNU15,B0000CNU1S,B0000CNU1X,B0000DBN1H,...,B00HANG4VO,B00HDLZ072,B00HHFWJS2,B00HKGB9ZW,B00HQ3ZPJA,B00I08JNWU,B00I33696K,B00ID9VSOM,B00IRL93SY,B00ISVHJ3Y
reviewerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A00177463W0XWB16A9O05,0.072527,-0.392857,-0.270833,0.0,0.0,0.069444,0.269841,-0.463675,-0.569697,-0.173188,...,-0.25,0.25,0.666667,0.25,0.0,0.5,0.152222,0.0,1.0,-1.5
A022899328A0QROR32DCT,0.072527,-0.392857,-0.270833,0.0,0.0,0.069444,0.269841,-0.463675,-0.569697,-0.173188,...,-0.25,0.25,0.666667,0.25,0.0,0.5,0.152222,0.0,1.0,-1.5
A068255029AHTHDXZURNU,0.072527,-0.392857,-0.270833,0.0,0.0,0.069444,0.269841,-0.463675,-0.569697,-0.173188,...,-0.25,0.25,0.666667,0.25,0.0,0.5,0.152222,0.0,1.0,-1.5
A06944662TFWOKKV4GJKX,0.072527,-0.392857,-0.270833,0.0,0.0,0.069444,0.269841,-0.463675,-0.569697,-0.173188,...,-0.25,0.25,0.666667,0.25,0.0,0.5,0.152222,0.0,1.0,-1.5
A1004703RC79J9,0.072527,-0.392857,-0.270833,0.0,0.0,0.069444,0.269841,-0.463675,-0.569697,-0.173188,...,-0.25,0.25,0.666667,0.25,0.0,0.5,0.152222,0.0,1.0,-1.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
AZWRZZAMX90VT,0.072527,-0.392857,-0.270833,0.0,0.0,0.069444,0.269841,-0.463675,-0.569697,-0.173188,...,-0.25,0.25,0.666667,0.25,0.0,0.5,0.152222,0.0,1.0,-1.5
AZXKAH2DE6C8A,0.072527,-0.392857,-0.270833,0.0,0.0,0.069444,0.269841,-0.463675,-0.569697,-0.173188,...,-0.25,0.25,0.666667,0.25,0.0,0.5,0.152222,0.0,1.0,-1.5
AZXON596A1VXC,0.072527,-0.392857,-0.270833,0.0,0.0,0.069444,0.269841,-0.463675,-0.569697,-0.173188,...,-0.25,0.25,0.666667,0.25,0.0,0.5,0.152222,0.0,1.0,-1.5
AZYXC63SS008M,0.072527,-0.392857,-0.270833,0.0,0.0,0.069444,0.269841,-0.463675,-0.569697,-0.173188,...,-0.25,0.25,0.666667,0.25,0.0,0.5,0.152222,0.0,1.0,-1.5


# Computing Similarities With Users

In [10]:
# user similarity by replacing NaN with item average
cosine_similarities = cosine_similarity(utility_matrix)

In [14]:
user_similarities = pd.DataFrame(cosine_similarities, index=utility_matrix.index)
user_similarities.columns = utility_matrix.index

user_similarities.head()

reviewerID,A00177463W0XWB16A9O05,A022899328A0QROR32DCT,A068255029AHTHDXZURNU,A06944662TFWOKKV4GJKX,A1004703RC79J9,A1006HCQDMYC5W,A1008DPSP6KC9J,A100DXY4SLAMPM,A100I4UAHGQCF6,A100L918633LUO,...,AZV3KWJRFXLAX,AZVJHW8TARWV9,AZWIAYHWL2FWE,AZWIEXG6P4Y9W,AZWP97BZPJI1D,AZWRZZAMX90VT,AZXKAH2DE6C8A,AZXON596A1VXC,AZYXC63SS008M,AZZ5ASC403N74
reviewerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A00177463W0XWB16A9O05,1.0,0.996808,0.999672,0.999747,0.999743,0.999069,0.999745,0.999578,0.999695,0.99646,...,0.999733,0.999743,0.999662,0.999637,0.99869,0.997676,0.998558,0.999711,0.99956,0.999655
A022899328A0QROR32DCT,0.996808,1.0,0.996974,0.99706,0.997055,0.996383,0.997057,0.99689,0.997007,0.993781,...,0.997046,0.997055,0.996974,0.996949,0.996005,0.994994,0.995873,0.997023,0.996873,0.996967
A068255029AHTHDXZURNU,0.999672,0.996974,1.0,0.999915,0.99991,0.999236,0.999912,0.999745,0.999862,0.996627,...,0.999901,0.99991,0.999829,0.999804,0.998857,0.997843,0.998725,0.999878,0.999727,0.999822
A06944662TFWOKKV4GJKX,0.999747,0.99706,0.999915,1.0,0.999996,0.999321,0.999998,0.99983,0.999948,0.996712,...,0.999986,0.999996,0.999914,0.999889,0.998942,0.997928,0.99881,0.999964,0.999813,0.999908
A1004703RC79J9,0.999743,0.997055,0.99991,0.999996,1.0,0.999317,0.999993,0.999826,0.999943,0.996708,...,0.999982,0.999992,0.99991,0.999885,0.998938,0.997924,0.998806,0.999959,0.999808,0.999903


In [250]:
def get_k_neigbourhood(sim_matrix, n=50):
    """
    """
    order = np.argsort(sim_matrix.values, axis=1)[:, :n+1]
    neighbours = (sim_matrix
                  .progress_apply(lambda x: pd.Series(x.sort_values(ascending=False)
                  .iloc[1:n+1]
                  .index,
                  index=['top{}'.format(i) for i in range(1, n+1)]), 
                                  axis=1))
    
    return neighbours

In [25]:
get_k_neigbourhood(user_similarities, n=50)

100%|████████████████████████████████████████████████████████| 13397/13397 [00:20<00:00, 646.99it/s]


Unnamed: 0_level_0,top1,top2,top3,top4,top5,top6,top7,top8,top9,top10,...,top41,top42,top43,top44,top45,top46,top47,top48,top49,top50
reviewerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A00177463W0XWB16A9O05,A37R8YBCRLA87N,A298SSMYTNS2JY,AXF0UDD12DOV5,AA5D2Q1O4ZMG8,AU9DTRBFWCI1T,A2X1TN90B0QMT8,A1NRTPOMPK8O7R,A3RWWN5LET1271,A2N3PEE9Z1LON6,A1EZ02S81PUE87,...,A33CRB1T025T10,A21Y4TYJZ3BMOJ,A3JX5X77692322,A5CPBTIBTD2LD,A3V4TSMF59UFI6,A230J10CVDXZTH,A1J2KOMDJJB7F,A1J5OF3Z3CRXPG,A2UB20PCCYGZ57,A10SWOV1EF9WU5
A022899328A0QROR32DCT,A1P7X120WBZYEA,AWZC74S7VCC1R,A1R1DW5GGE3B5Q,A5E4IGZFCREMN,A5U5B6CGYSH17,AS783H6UPEKXX,A3F16J64W1OHUO,A1MWBGCGN6N7B9,A3UFLIIAMD28XC,ASX6UL80SKCE3,...,A39IXPMD2O6PRV,A1YYOWJBXFQY6V,A25UFYNXKB0J0Z,A1J2KOMDJJB7F,A10SWOV1EF9WU5,A2ZLZW4X0T9243,ACYMT1PL5FO9C,A37TVHX9EJWZ95,A2U3N8TS3Y98HG,A3NYTSAEPZDJ7Y
A068255029AHTHDXZURNU,A3FIIHT9LGM08M,A1OA8LXJY3VH0A,A1T361NP6X32CS,A3CNAQO03WKW1T,A1EMZLER8HYFXU,AVM6OR01CPJNQ,A28R87WLV78WR7,AU9DTRBFWCI1T,A1NRTPOMPK8O7R,A3RWWN5LET1271,...,A2HQYERFHL1BNM,A25UFYNXKB0J0Z,A3S8YN3SLC0QCT,A37TVHX9EJWZ95,A265B1IZE5RVG6,A3NYTSAEPZDJ7Y,A3SFQQB23IZVVL,A21Y4TYJZ3BMOJ,A783X2Z6HP2RQ,A3ENOBXC0LUDVD
A06944662TFWOKKV4GJKX,A06944662TFWOKKV4GJKX,A1XBUHT0JKF82F,A1QPJRSBQQF1L1,AENJWE4QD5R2Y,A33CRB1T025T10,A3S8YN3SLC0QCT,A230J10CVDXZTH,A3SFQQB23IZVVL,A2RJWYOFLVRBRM,A2HQYERFHL1BNM,...,A1ZPSBJ4Y5G8P4,AVQZ0JG8OP8P7,A1SHKOFJ3HUPMU,A2D2XKSXEG3O7F,A783X2Z6HP2RQ,A18B5QL328E02A,A3ASZKR4TLXV9O,A37TVHX9EJWZ95,A15MJB3USB0XTI,A1SLKZ69B3NOYF
A1004703RC79J9,A3FD9FWGETS6M7,A32Q726D4KHNHH,A2835FANQ9CSJY,A32U9FBU6IN5RF,A28ETU4J2PG09N,A2HS0HUHVLT9NG,A10SWOV1EF9WU5,A230J10CVDXZTH,A3SFQQB23IZVVL,A1JXULIHVB3YIR,...,A25UFYNXKB0J0Z,AB7Z2QB30DPZX,A3ENOBXC0LUDVD,A1RTSVWEXMKAR1,A265B1IZE5RVG6,A1SHKOFJ3HUPMU,A1SLKZ69B3NOYF,A3NYTSAEPZDJ7Y,AVQZ0JG8OP8P7,ACYMT1PL5FO9C
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
AZWRZZAMX90VT,A2UME3JR31Z6PW,A22UH02B61LAL5,A3DN66RSL902HY,A1GGZZQZ8DDEEI,A2UR6DN0RMUWSI,A1Y1ODQM9H4EYC,A3I6IRC6PKRFZZ,A3BRCHCI1627ZW,A10ZK1IH0PDGJU,A1OD8O27WU6R6X,...,A3ASZKR4TLXV9O,A1RTSVWEXMKAR1,A2J6L8JTU438GF,A26TUZBFZTA9LO,A2ZLZW4X0T9243,A2JCTH2LDJ3K4C,A2JJGT40FU3I2X,A26ZK8QFWXN859,A2AKRD5PK5PQ14,A3NYTSAEPZDJ7Y
AZXKAH2DE6C8A,A3QYNBLW04575V,A2HLJO32XRZIG,A3RA3PWBYQMLT6,AJGDYHWBVBL58,A2VX6RJQ18KO6J,AHL4368A8Y91I,A1LV28UZ40BP79,A8P8KPVXCWV9R,A1WQ3UGXPT15VI,A3RGPBC7HZA8V4,...,A2FFDCTHCOQHXJ,A1RTSVWEXMKAR1,A25UFYNXKB0J0Z,A230J10CVDXZTH,A3SFQQB23IZVVL,AX80SWHDEKJCY,ACYMT1PL5FO9C,A3E9MXJ04VH915,A292KZ0OU2H0RP,A1JXULIHVB3YIR
AZXON596A1VXC,A2V27XVHZE5P38,A3GOAJ20MMKZTJ,A141SA7FZ2G9GZ,A1LRGXJ3AODW79,A277R1CHAIWT8C,A3E9MXJ04VH915,A2IHVX8D32TX55,A3V4TSMF59UFI6,A2013X1OLSB1O9,A1ACIMW2M0EH18,...,A2UB20PCCYGZ57,A3JX5X77692322,A3JXT9XJKR9YZ5,A16YMWR93A753M,A1SLKZ69B3NOYF,A2D2XKSXEG3O7F,A1SHKOFJ3HUPMU,A1RTSVWEXMKAR1,A15MJB3USB0XTI,AB7Z2QB30DPZX
AZYXC63SS008M,A1894JIDECYET,AWJXWD0EMKIUV,A2TA961ZTSSNZQ,A3VZCV0K650T0Q,A26ZK8QFWXN859,A18B5QL328E02A,A3ENOBXC0LUDVD,AOCLQ0X4UGCE,A2ZLZW4X0T9243,A2RJWYOFLVRBRM,...,A3JX5X77692322,AW6QMMJQ0Z9LD,AW3NY2DE6K0LB,A2JJGT40FU3I2X,AX80SWHDEKJCY,AVQZ0JG8OP8P7,A25UFYNXKB0J0Z,A2JCTH2LDJ3K4C,A2J6L8JTU438GF,A265B1IZE5RVG6


In [28]:
def get_user_similar_items(user1, user2):
    common_items = (train_trans[train_trans['reviewerID'] == user1]
                    .merge(train_trans[train_trans['reviewerID'] == user2],
                          on=['asin', 'title'],
                          how='inner')
                   )
    
    return common_items[['asin', 'title']]

In [29]:
get_user_similar_items('A00177463W0XWB16A9O05', 'A37R8YBCRLA87N')

Unnamed: 0,asin,title
0,B0029XDZIK,"Keurig, The Original Donut Shop, Medium Roast ..."
1,B0094ISOMA,"Marley Coffee Single Serve Coffee Capsules, Ma..."


In [30]:
get_user_similar_items('A022899328A0QROR32DCT', 'A1P7X120WBZYEA')

Unnamed: 0,asin,title
0,B003TO9RSU,Kinnikinnick Gluten Free S'moreables Graham St...


In [31]:
get_user_similar_items('A068255029AHTHDXZURNU', 'A3FIIHT9LGM08M')

Unnamed: 0,asin,title
0,B000K8WVYA,"Hint&nbsp; Water Pomegranate, (Pack of 12) 16 ..."


# Computing Score With Similar Users

In [33]:
sim_50_users = get_k_neigbourhood(user_similarities, n=50)

100%|████████████████████████████████████████████████████████| 13397/13397 [00:22<00:00, 605.17it/s]


In [73]:
def predict_rating(user, item, mean_rating, utility_matrix, sim_matrix, k_neighbourhood):
    """
    """
    sim_users = (k_neighbourhood[k_neighbourhood.index == 'A00177463W0XWB16A9O05']
                 .values
                 .squeeze()
                 .tolist())
    item_norm_ratings = utility_matrix.loc[:,'B0029XDZIK']
    sim_users_rating = item_norm_ratings[item_norm_ratings.index.isin(sim_users)]
    user_mean_rating = mean_rating.loc[mean_rating['reviewerID'] == user, 'mean_overall'].values[0]
    
    index = sim_users_rating.index.values.squeeze().tolist()
    corr = sim_matrix.loc[user, index]
    user_corr = pd.concat([sim_users_rating, corr], axis=1)
    user_corr.columns = ['dev_overall', 'correlation']
    user_corr['overall'] = user_corr.progress_apply(lambda x: x['dev_overall'] * x['correlation'], axis=1)
    
    numerator = user_corr['overall'].sum()
    denominator = user_corr['correlation'].sum()
    predict_rating = user_mean_rating + (numerator/denominator)
    
    print(predict_rating)

In [74]:
predict_rating('A00177463W0XWB16A9O05', 
               'B0029XDZIK', 
               mean_user_ratings,
               utility_matrix,  
               user_similarities, 
               sim_50_users)

100%|████████████████████████████████████████████████████████████| 50/50 [00:00<00:00, 25795.23it/s]

4.633586542685807





In [75]:
train_trans[train_trans['reviewerID'] == 'A00177463W0XWB16A9O05']

Unnamed: 0,index,asin,title,categories,reviewerID,overall,reviewText,reviewTime,processedReviewText,mean_overall,dev_overall
42026,40181,B0029XDZIK,"Keurig, The Original Donut Shop, Medium Roast ...","['Grocery & Gourmet Food', 'Beverages', 'Coffe...",A00177463W0XWB16A9O05,5.0,It is a good stand by coffee you can count on....,2013-01-06,good stand coffee count reley daily use catagory,4.5,0.5
42027,70171,B0094ISOMA,"Marley Coffee Single Serve Coffee Capsules, Ma...","['Grocery & Gourmet Food', 'Beverages', 'Coffe...",A00177463W0XWB16A9O05,4.0,I received this coffee for the first time. Ha...,2013-02-27,receive coffee time good flavor pep raggie jam,4.5,-0.5


In [218]:
def predict(user):
    # retrieving user rating history
    rating_history = train_trans.groupby(['reviewerID'])['asin'].apply(list)
    user_rating_history = rating_history[user]
    # find a list of top 50 similar users
    sim_users = (sim_50_users[sim_50_users.index == user]
                 .values
                 .squeeze()
                 .tolist())
    # find user rating history of the top 50 similar users
    sim_user_rating_history = [j for i in rating_history[sim_users] for j in i]
    # find items rated by sim user but not by target user
    item_under_consideration = set(sim_user_rating_history) - set(user_rating_history)
    # target user average rating 
    user_mean_rating = mean_user_ratings.loc[mean_user_ratings['reviewerID'] == user, 'mean_overall'].values[0]
    
    candidate_items = {}
    for item in item_under_consideration:
        # retrieve item norm ratings
        item_norm_ratings = utility_matrix.loc[:, item]
        # retrieve norm ratings from sim user
        sim_norm_ratings = item_norm_ratings[item_norm_ratings.index.isin(sim_users)]
        # retrieving sim user and target user cosine similarities
        corr = user_similarities.loc[user, sim_users]
        # combine item rating and user cosine similarities
        user_corr = pd.concat([sim_norm_ratings, corr], axis=1)
        user_corr.columns = ['dev_overall', 'correlation']
        user_corr['overall'] = user_corr.apply(lambda x: x['dev_overall'] * x['correlation'], axis=1)
        
        numerator = user_corr['overall'].sum()
        denominator = user_corr['correlation'].sum()
        predict_rating = user_mean_rating + (numerator/denominator)
    
        candidate_items[item] = predict_rating
    
    # retrieving the counts of items appearing in item sim_user_rating_history
    item_counts = pd.DataFrame.from_dict(Counter(sim_user_rating_history), orient='index', columns=['count'])
    candidate_items = pd.DataFrame.from_dict(candidate_items, orient='index', columns=['pred_overall'])
    # merge ratings and counts
    candidate_items = candidate_items.merge(item_counts, left_index=True, right_index=True)
    
    return candidate_items.sort_values(by=['count', 'pred_overall'], ascending=False).index.tolist()

In [219]:
# unique users in trainset
unique_users = train_trans['reviewerID'].unique().tolist()

# generating candidate list for all users
predictions = {}
for user in tqdm(unique_users):
    predictions[user] = predict(user)

100%|███████████████████████████████████████████████████████| 13397/13397 [1:22:53<00:00,  2.69it/s]


In [220]:
predictions['A00177463W0XWB16A9O05'][:5]

['B000BXSRT2', 'B0029XDZKI', 'B001CHFUDC', 'B000NERTSE', 'B001TNW23U']

# Evaluation Top-N Recommendations

### Defining Evaluation Metrics

In [243]:
def precision_at_k(asins, predicted_asins, k=10):
    # number of relevant items
    set_actual = set(asins)
    set_preds = set(predicted_asins)
    num_relevant = len(set_actual.intersection(set_preds))
    
    # calculating precision@K - relevant / total recommended
    precision_at_k = num_relevant / k
    
    return precision_at_k

def recall_at_k(asins, predicted_asins, k=10):
    # number of relevant items
    set_actual = set(asins)
    set_preds = set(predicted_asins)
    num_relevant = len(set_actual.intersection(set_preds))
    
    # calculating recall@K - relevant / total relevant items
    recall_at_k = num_relevant / len(asins)
    
    return recall_at_k

### Loading Test Dataset

In [239]:
# loading test dataset
test = pd.read_csv(f"{DATA_PATH}/{CATEGORY}_test.csv")

# get user rating history
user_rating_history = train.groupby(['reviewerID'])['asin'].apply(list)
# generating test rating history
test_user_history = (pd.DataFrame(test.groupby(['reviewerID'])['asin']
                                  .apply(list).reset_index()))

In [230]:
def get_top_n(predictions, user_rating_history, n=10):
    """Return the top-N recommendations for each user based on cosine similarity.
    
    Args:
    
    Returns:
        ([dict]): A dictionary of top-N recommendations for each unique user, sorted by
            cosine similarties.
    """
    
    # retrieve a 200 items candidate list based on similarities
    top_ns = {}
    for user in tqdm(predictions):
        top_ns[user] = predictions[user][:n]
        
    return top_ns

In [231]:
top_ns = get_top_n(predictions, user_rating_history, n=10)

100%|█████████████████████████████████████████████████████| 13397/13397 [00:00<00:00, 590670.66it/s]


In [237]:
# generating a random user
random_user = np.random.choice(list(train['reviewerID'].unique()), 1)[0]
print(f"For user: {random_user}:")
print(f"Purchase History:\n{train[train['reviewerID'] == random_user][['asin', 'title']]}")

# find the recommendations
print(f"\nRecommending:\n")
print(f"{train[train['asin'].isin(top_ns[random_user])][['asin', 'title']].drop_duplicates(subset='asin')}")

For user: A25ETVRL1LE4LN:
Purchase History:
             asin                                              title
4703   B000EDBPQ6  Bob's Red Mill Gluten Free Pancake Mix, 22-oun...
6065   B000EVIDUY  The Gluten-Free Pantry Double Chocolate Browni...
7589   B000FFIIT0  Ener-G Foods Tapioca Loaf, 16-Ounce Packages (...
10694  B000LKTB90  Ancient Harvest Organic Gluten-Free Corn and Q...
20226  B001EO5S6M  Glutino Gluten Free Breakfast Bars, Blueberry ...
23732  B001HXLSUU   Maple Grove Farms, Pancake &amp; Waffle Mix, ...

Recommending:

             asin                                              title
1221   B0001WOLMU               Authentic Foods Vanilla Powder - 3oz
3435   B000DZFMEQ  Pamela's Products Gluten Free, Bread Mix, 19-O...
5990   B000EVE3YE  Glutino Gluten Free Pantry Muffin Mix, 15-Ounc...
6020   B000EVG8H4  Glutino Gluten Free Pantry Favorite Sandwich B...
7903   B000FG14QI  KIND Fruit &amp; Nut Bar, Macadamia &amp; Apri...
8381   B000G7X0OK  Lundberg Family Farms Cr

### N=10

In [244]:
top_ns = get_top_n(predictions, user_rating_history, n=10)

test_recommendations = pd.DataFrame(top_ns.items(), columns=['reviewerID', 'pred_asin'])

# combined test dataset and recommendations
test_merged = pd.merge(test_user_history, test_recommendations, on='reviewerID', how='inner')

k = 10
test_merged['precision@k'] = test_merged.progress_apply(lambda x: precision_at_k(x.asin, x.pred_asin, k=k), axis=1)
test_merged['recall@k'] = test_merged.progress_apply(lambda x: recall_at_k(x.asin, x.pred_asin, k=k), axis=1)

average_precision_at_k = test_merged["precision@k"].mean()
average_recall_at_k = test_merged["recall@k"].mean()

print(f"The MEM-ECF has a average precision@{k}: {average_precision_at_k:.5f}, average recall@{k}: {average_recall_at_k:.5f}.")

100%|█████████████████████████████████████████████████████| 13397/13397 [00:00<00:00, 771357.65it/s]
100%|██████████████████████████████████████████████████████| 13279/13279 [00:00<00:00, 51756.32it/s]
100%|██████████████████████████████████████████████████████| 13279/13279 [00:00<00:00, 52840.10it/s]

The MEM-ECF has a average precision@10: 0.00465, average recall@10: 0.02221.





### N=25

In [245]:
top_ns = get_top_n(predictions, user_rating_history, n=25)

test_recommendations = pd.DataFrame(top_ns.items(), columns=['reviewerID', 'pred_asin'])

# combined test dataset and recommendations
test_merged = pd.merge(test_user_history, test_recommendations, on='reviewerID', how='inner')

k = 25
test_merged['precision@k'] = test_merged.progress_apply(lambda x: precision_at_k(x.asin, x.pred_asin, k=k), axis=1)
test_merged['recall@k'] = test_merged.progress_apply(lambda x: recall_at_k(x.asin, x.pred_asin, k=k), axis=1)

average_precision_at_k = test_merged["precision@k"].mean()
average_recall_at_k = test_merged["recall@k"].mean()

print(f"The MEM-ECF has a average precision@{k}: {average_precision_at_k:.5f}, average recall@{k}: {average_recall_at_k:.5f}.")

100%|█████████████████████████████████████████████████████| 13397/13397 [00:00<00:00, 685901.28it/s]
100%|██████████████████████████████████████████████████████| 13279/13279 [00:00<00:00, 48523.73it/s]
100%|██████████████████████████████████████████████████████| 13279/13279 [00:00<00:00, 50242.17it/s]

The MEM-ECF has a average precision@25: 0.00310, average recall@25: 0.03387.





### N=30

In [247]:
top_ns = get_top_n(predictions, user_rating_history, n=30)

test_recommendations = pd.DataFrame(top_ns.items(), columns=['reviewerID', 'pred_asin'])

# combined test dataset and recommendations
test_merged = pd.merge(test_user_history, test_recommendations, on='reviewerID', how='inner')

k = 30
test_merged['precision@k'] = test_merged.progress_apply(lambda x: precision_at_k(x.asin, x.pred_asin, k=k), axis=1)
test_merged['recall@k'] = test_merged.progress_apply(lambda x: recall_at_k(x.asin, x.pred_asin, k=k), axis=1)

average_precision_at_k = test_merged["precision@k"].mean()
average_recall_at_k = test_merged["recall@k"].mean()

print(f"The MEM-ECF has a average precision@{k}: {average_precision_at_k:.5f}, average recall@{k}: {average_recall_at_k:.5f}.")

100%|█████████████████████████████████████████████████████| 13397/13397 [00:00<00:00, 734562.47it/s]
100%|██████████████████████████████████████████████████████| 13279/13279 [00:00<00:00, 47689.73it/s]
100%|██████████████████████████████████████████████████████| 13279/13279 [00:00<00:00, 48145.46it/s]

The MEM-ECF has a average precision@30: 0.00282, average recall@30: 0.03652.





### N=45

In [248]:
top_ns = get_top_n(predictions, user_rating_history, n=45)

test_recommendations = pd.DataFrame(top_ns.items(), columns=['reviewerID', 'pred_asin'])

# combined test dataset and recommendations
test_merged = pd.merge(test_user_history, test_recommendations, on='reviewerID', how='inner')

k = 45
test_merged['precision@k'] = test_merged.progress_apply(lambda x: precision_at_k(x.asin, x.pred_asin, k=k), axis=1)
test_merged['recall@k'] = test_merged.progress_apply(lambda x: recall_at_k(x.asin, x.pred_asin, k=k), axis=1)

average_precision_at_k = test_merged["precision@k"].mean()
average_recall_at_k = test_merged["recall@k"].mean()

print(f"The MEM-ECF has a average precision@{k}: {average_precision_at_k:.5f}, average recall@{k}: {average_recall_at_k:.5f}.")

100%|█████████████████████████████████████████████████████| 13397/13397 [00:00<00:00, 649307.73it/s]
100%|██████████████████████████████████████████████████████| 13279/13279 [00:00<00:00, 46710.00it/s]
100%|██████████████████████████████████████████████████████| 13279/13279 [00:00<00:00, 47335.40it/s]

The MEM-ECF has a average precision@45: 0.00242, average recall@45: 0.04593.





In [249]:
# looking at how many get correct
test_merged[test_merged['recall@k'] > 0]

Unnamed: 0,reviewerID,asin,pred_asin,precision@k,recall@k
0,A00177463W0XWB16A9O05,"[B00474OR8G, B00BFM6OAW]","[B000BXSRT2, B0029XDZKI, B001CHFUDC, B000NERTS...",0.022222,0.500000
11,A100WO06OQR8BQ,"[B001ELL4ZY, B0029XDZIK, B004EDZ83S, B005K4Q1T...","[B004K0630M, B003XV8XK2, B005IW4WEA, B0061IUKD...",0.022222,0.125000
26,A105S56ODHGJEK,"[B0025UCHRC, B005V9YXTO, B007JFXXJY, B00934WBRO]","[B001H3201Q, B00110FINM, B001EQ55TU, B000YVOHB...",0.022222,0.250000
56,A10FJGRMOTJ35Y,"[B0033HPPIO, B0046HG0SY, B007PA33MA]","[B007PA33MA, B000BXSRT2, B001EO6654, B001CHFUD...",0.022222,0.333333
59,A10G4BPT5MGBHY,"[B000F3V880, B001OCBT3U, B004MO6NI8]","[B001H3201Q, B000F3V880, B003GTR8IO, B003XV8XK...",0.044444,0.666667
...,...,...,...,...,...
13230,AZAKCIFE27RAA,"[B0026ZYZ7Q, B0047277QC, B004U43ZO0, B004U49QU...","[B001LG940E, B004JRMG98, B004LA1LKI, B004R8J8E...",0.066667,0.600000
13236,AZFHSPEZUPGD2,"[B002DM62BY, B00A64NLOM, B00BHIS6MI, B00FPNVTU...","[B004JGQ15E, B004K0630M, B007JFXWRC, B005A1LIN...",0.022222,0.200000
13247,AZM8N4NZCHYPM,[B000F3V880],"[B001H3201Q, B005IW4WEA, B001EQ55RW, B002NHSQ6...",0.022222,1.000000
13259,AZQA8ZIGS01FG,"[B001CHFUDC, B004538TME]","[B0042LH7TK, B001EPQRGG, B000EIE7GQ, B000H225T...",0.022222,0.500000


# Evaluating `UserBasedCF` class

In [260]:
class UserBasedCF:
    """
    """

    def __init__(self):
        self._rating_history = None
        self._mean_ratings = None
        self._k_neighbourhood = None
        self.utility_matrix = None
        self.sim_matrix = None

    def __get_utility_matrix(self, trainset: pd.DataFrame):
        """
        """
        self._mean_ratings = trainset.groupby(['reviewerID'], as_index=False)['overall'].mean()
        self._mean_ratings.columns = ['reviewerID', 'mean_overall']

        # creating utility matrix
        train = pd.merge(trainset, self._mean_ratings, on='reviewerID')
        # deviation from user's average rating
        train['dev_overall'] = train['overall'] - train['mean_overall']
        utility_matrix = train.pivot_table(index='reviewerID', columns='asin', values='dev_overall')

        return utility_matrix.fillna(utility_matrix.mean(axis=0))

    def __get_similarities_matrix(self):
        """
        """
        cosine_sim = cosine_similarity(self.utility_matrix)
        np.fill_diagonal(cosine_sim, 0)
        # generate user similarity matrix
        users_sim = pd.DataFrame(cosine_sim, index=self.utility_matrix.index)
        users_sim.columns = self.utility_matrix.index

        return users_sim

    def __get_k_neighbourhood(self, k_neighbours: float):
        """
        """
        # sim_order = np.argsort(self.sim_matrix.values, axis=1)[:, :k_neighbours]
        neighbours = (self.sim_matrix
                      .apply(lambda x: pd.Series(x.sort_values(ascending=False)
                                                 .iloc[:k_neighbours]
                                                 .index,
                                                 index=['top{}'.format(i) for i in range(1, k_neighbours+1)]),
                             axis=1))

        return neighbours

    def __predict_rating(self, user):
        """
        """
        # retrieve user rating history
        user_rating_history = self._rating_history[user]

        # list of K-neighbourhood of similar users
        sim_users = (self._k_neighbourhood[self._k_neighbourhood.index == user]
                     .values
                     .squeeze()
                     .tolist())
        # retrieve similar user rating history
        sim_users_rating_history = [j for i in self._rating_history[sim_users] for j in i]
        # find items rated by similar users by not by target user
        item_under_consideration = set(sim_users_rating_history) - set(user_rating_history)

        # retrieve target user mean rating
        user_mean_rating = self._mean_ratings.loc[self._mean_ratings['reviewerID'] == user, 'mean_overall'].values[0]

        candidate_items = {}
        for item in item_under_consideration:
            # retrieve item norm ratings
            item_norm_ratings = self.utility_matrix.loc[:, item]
            # retrieve norm ratings from similar users
            sim_norm_ratings = item_norm_ratings[item_norm_ratings.index.isin(sim_users)]
            # retrieve target user and similar user cosine similarities
            corrs = self.sim_matrix.loc[user, sim_users]

            # combined item norm ratings and user corrs - cosine similarities
            user_corrs = pd.concat([sim_norm_ratings, corrs], axis=1)
            user_corrs.columns = ['dev_overall', 'correlation']
            user_corrs['overall'] = user_corrs.apply(lambda x: x['dev_overall'] * x['correlation'], axis=1)

            # compute predicted ratings
            numerator = user_corrs['overall'].sum()
            denominator = user_corrs['correlation'].sum()
            predict_rating = user_mean_rating + (numerator/denominator)

            candidate_items[item] = predict_rating

        # retrieve counts of items appearing in similar user rating history
        item_counts = pd.DataFrame.from_dict(Counter(sim_users_rating_history), orient='index', columns=['count'])
        candidate_items = pd.DataFrame.from_dict(candidate_items, orient='index', columns=['pred_overall'])
        # merge predicted ratings and counts
        candidate_items = candidate_items.merge(item_counts, left_index=True, right_index=True)

        return candidate_items.sort_values(by=['count', 'pred_overall'], ascending=False).index.tolist()

    def fit(self, trainset: pd.DataFrame, k_neighbours: float=50):
        """

        Args:
            trainset ([pd.DataFrame]):
            k_neighbours ([int]):
        """
        # generate user rating history
        self._rating_history = trainset.groupby(['reviewerID'])['asin'].apply(list)
        self.utility_matrix = self.__get_utility_matrix(trainset)
        self.sim_matrix = self.__get_similarities_matrix()
        self._k_neighbourhood = self.__get_k_neighbourhood(k_neighbours)

    def predict(self):
        """
        """
        # retrieve unique users
        unique_users = self._rating_history.reset_index()['reviewerID'].tolist()

        predictions = {}
        for user in tqdm(unique_users):
            predictions[user] = self.__predict_rating(user)

        return predictions

In [261]:
# instantiate model
ub_cf = UserBasedCF()

In [262]:
# fit model to training data
ub_cf.fit(train, k_neighbours=50)

In [263]:
# generate candidate items
predictions = ub_cf.predict()

100%|█████████████████████████████████████████████████████████| 13397/13397 [41:14<00:00,  5.41it/s]


### N=10

In [264]:
top_ns = get_top_n(predictions, user_rating_history, n=10)

test_recommendations = pd.DataFrame(top_ns.items(), columns=['reviewerID', 'pred_asin'])

# combined test dataset and recommendations
test_merged = pd.merge(test_user_history, test_recommendations, on='reviewerID', how='inner')

k = 10
test_merged['precision@k'] = test_merged.progress_apply(lambda x: precision_at_k(x.asin, x.pred_asin, k=k), axis=1)
test_merged['recall@k'] = test_merged.progress_apply(lambda x: recall_at_k(x.asin, x.pred_asin, k=k), axis=1)

average_precision_at_k = test_merged["precision@k"].mean()
average_recall_at_k = test_merged["recall@k"].mean()

print(f"The MEM-ECF has a average precision@{k}: {average_precision_at_k:.5f}, average recall@{k}: {average_recall_at_k:.5f}.")

100%|█████████████████████████████████████████████████████| 13397/13397 [00:00<00:00, 617369.18it/s]
100%|██████████████████████████████████████████████████████| 13279/13279 [00:00<00:00, 46517.55it/s]
100%|██████████████████████████████████████████████████████| 13279/13279 [00:00<00:00, 49669.20it/s]

The MEM-ECF has a average precision@10: 0.00460, average recall@10: 0.02179.





### N=25

In [265]:
top_ns = get_top_n(predictions, user_rating_history, n=25)

test_recommendations = pd.DataFrame(top_ns.items(), columns=['reviewerID', 'pred_asin'])

# combined test dataset and recommendations
test_merged = pd.merge(test_user_history, test_recommendations, on='reviewerID', how='inner')

k = 25
test_merged['precision@k'] = test_merged.progress_apply(lambda x: precision_at_k(x.asin, x.pred_asin, k=k), axis=1)
test_merged['recall@k'] = test_merged.progress_apply(lambda x: recall_at_k(x.asin, x.pred_asin, k=k), axis=1)

average_precision_at_k = test_merged["precision@k"].mean()
average_recall_at_k = test_merged["recall@k"].mean()

print(f"The MEM-ECF has a average precision@{k}: {average_precision_at_k:.5f}, average recall@{k}: {average_recall_at_k:.5f}.")

100%|█████████████████████████████████████████████████████| 13397/13397 [00:00<00:00, 834711.23it/s]
100%|██████████████████████████████████████████████████████| 13279/13279 [00:00<00:00, 46420.16it/s]
100%|██████████████████████████████████████████████████████| 13279/13279 [00:00<00:00, 47152.35it/s]

The MEM-ECF has a average precision@25: 0.00313, average recall@25: 0.03403.





### N=30

In [266]:
top_ns = get_top_n(predictions, user_rating_history, n=30)

test_recommendations = pd.DataFrame(top_ns.items(), columns=['reviewerID', 'pred_asin'])

# combined test dataset and recommendations
test_merged = pd.merge(test_user_history, test_recommendations, on='reviewerID', how='inner')

k = 30
test_merged['precision@k'] = test_merged.progress_apply(lambda x: precision_at_k(x.asin, x.pred_asin, k=k), axis=1)
test_merged['recall@k'] = test_merged.progress_apply(lambda x: recall_at_k(x.asin, x.pred_asin, k=k), axis=1)

average_precision_at_k = test_merged["precision@k"].mean()
average_recall_at_k = test_merged["recall@k"].mean()

print(f"The MEM-ECF has a average precision@{k}: {average_precision_at_k:.5f}, average recall@{k}: {average_recall_at_k:.5f}.")

100%|█████████████████████████████████████████████████████| 13397/13397 [00:00<00:00, 733919.66it/s]
100%|██████████████████████████████████████████████████████| 13279/13279 [00:00<00:00, 44621.65it/s]
100%|██████████████████████████████████████████████████████| 13279/13279 [00:00<00:00, 48155.41it/s]

The MEM-ECF has a average precision@30: 0.00284, average recall@30: 0.03650.





### N=45

In [267]:
top_ns = get_top_n(predictions, user_rating_history, n=45)

test_recommendations = pd.DataFrame(top_ns.items(), columns=['reviewerID', 'pred_asin'])

# combined test dataset and recommendations
test_merged = pd.merge(test_user_history, test_recommendations, on='reviewerID', how='inner')

k = 45
test_merged['precision@k'] = test_merged.progress_apply(lambda x: precision_at_k(x.asin, x.pred_asin, k=k), axis=1)
test_merged['recall@k'] = test_merged.progress_apply(lambda x: recall_at_k(x.asin, x.pred_asin, k=k), axis=1)

average_precision_at_k = test_merged["precision@k"].mean()
average_recall_at_k = test_merged["recall@k"].mean()

print(f"The MEM-ECF has a average precision@{k}: {average_precision_at_k:.5f}, average recall@{k}: {average_recall_at_k:.5f}.")

100%|█████████████████████████████████████████████████████| 13397/13397 [00:00<00:00, 647795.65it/s]
100%|██████████████████████████████████████████████████████| 13279/13279 [00:00<00:00, 46953.63it/s]
100%|██████████████████████████████████████████████████████| 13279/13279 [00:00<00:00, 47240.29it/s]

The MEM-ECF has a average precision@45: 0.00243, average recall@45: 0.04593.



