In [1]:
import numpy as np 
import pandas as pd
import warnings

from gensim.models.doc2vec import Doc2Vec
from pandarallel import pandarallel
from src.models import cf
from tqdm import tqdm 

pandarallel.initialize()
tqdm.pandas()
warnings.filterwarnings('ignore')



INFO: Pandarallel will run on 8 workers.
INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.


# Load Data and Model

In [2]:
# global variables
DATA_PATH = 'data/evaluation'
MODEL_PATH = 'models/d2v'
CATEGORY = "Clothing_Shoes_and_Jewelry"

train = pd.read_csv(f"{DATA_PATH}/{CATEGORY}_train.csv")
model = Doc2Vec.load(f"{MODEL_PATH}/{CATEGORY}_50_d2v.model") 

In [3]:
train.head().append(train.tail())

Unnamed: 0,index,asin,title,categories,reviewerID,overall,reviewText,reviewTime,processedReviewText
0,0,0000031887,Ballet Dress-Up Fairy Tutu,"[['Clothing, Shoes & Jewelry', 'Girls', 'Cloth...",A1KLRMWW2FWPL4,5.0,This is a great tutu and at a really great pri...,2011-02-12,great tutu great price look cheap glad look am...
1,1,0000031887,Ballet Dress-Up Fairy Tutu,"[['Clothing, Shoes & Jewelry', 'Girls', 'Cloth...",A2G5TCU2WDFZ65,5.0,I bought this for my 4 yr old daughter for dan...,2013-01-19,buy yr old daughter dance class wear today tim...
2,2,0000031887,Ballet Dress-Up Fairy Tutu,"[['Clothing, Shoes & Jewelry', 'Girls', 'Cloth...",A1RLQXYNCMWRWN,5.0,What can I say... my daughters have it in oran...,2013-01-04,daughter orange black white pink think buy fuc...
3,3,0000031887,Ballet Dress-Up Fairy Tutu,"[['Clothing, Shoes & Jewelry', 'Girls', 'Cloth...",A8U3FAMSJVHS5,5.0,"We bought several tutus at once, and they are ...",2014-04-27,buy tutu high review sturdy seemingly girl wea...
4,4,0000031887,Ballet Dress-Up Fairy Tutu,"[['Clothing, Shoes & Jewelry', 'Girls', 'Cloth...",A3GEOILWLK86XM,5.0,Thank you Halo Heaven great product for Little...,2014-03-15,thank halo heaven great product little girls g...
176500,278257,B00JULS24Q,Buyinhouse Feeling Adorable Cute Pink Bowknot ...,"[['Clothing, Shoes & Jewelry', 'Novelty, Costu...",A2IQT5AFFXA1OM,4.0,This item is exactly as described but for some...,2014-05-21,item exactly described reason like person like...
176501,278290,B00K035Y08,Sakkas 197 Oasis Gauzy Crepe Sleeveless Blouse...,"[['Clothing, Shoes & Jewelry', 'Women', 'Cloth...",A1XWMGHBAPKOV3,5.0,Put on and it fit great easy care also.Would t...,2014-06-15,fit great easy care tell friend buy thank hawa...
176502,278297,B00K0352PU,Sakkas Paradise Embroidered Relaxed Fit Blouse,"[['Clothing, Shoes & Jewelry', 'Women', 'Cloth...",A3VTQB69FYGQDU,3.0,Before I washed the top it was extremely large...,2014-06-06,wash extremely large fit perfectly wash embroi...
176503,278316,B00K8J06CK,TrendzArt Azules Poly Span Floral Print Full L...,"[['Clothing, Shoes & Jewelry', 'Women', 'Cloth...",A241BLSJL8AGY,4.0,This maxi skirt was very nice material it fit ...,2014-06-20,maxi skirt nice material fit nicely color pret...
176504,278345,B00KF9180W,[2 PACK] Multi-Purpose Sports Balaclava - For ...,"[['Clothing, Shoes & Jewelry', 'Men', 'Accesso...",A2YKWYC3WQJX5J,5.0,This is truly a year round product. Here in th...,2014-06-21,truly year round product midwest summer use ba...


In [4]:
# testing model
model.dv[0]

array([ 0.18483885,  0.13683623,  0.431223  ,  0.46865723, -0.6673366 ,
       -0.26540083,  0.67599654, -0.6965546 , -0.95767546,  0.2404644 ,
       -0.08230511, -0.07275622, -1.5187771 , -0.17228617, -1.4812549 ,
        0.33527163,  0.79808766, -0.38892385, -0.09236064, -0.3225281 ,
       -0.41883117,  0.2977529 ,  1.0714364 ,  1.6929846 ,  2.6893625 ,
        1.4981884 , -0.35175997, -0.7717815 ,  0.5517118 , -0.00497056,
        0.11664307,  0.9710174 , -0.16621333,  0.29962972, -0.52694905,
        1.5606344 ,  1.441607  ,  1.0353522 ,  0.37510684,  1.9985726 ,
        1.0185841 , -0.2807343 , -0.41933128, -0.34082264,  1.9616672 ,
       -0.6369609 , -0.09271178, -0.6820405 , -0.9024493 ,  0.6379632 ],
      dtype=float32)

# Generate User Embeddings

In [5]:
# get user rating history
user_rating_history = train.groupby(['reviewerID'])['asin'].apply(list)

print(user_rating_history)

reviewerID
A001114613O3F18Q5NVR6     [B0016JNS44, B001T54XA8, B004AZXO1I, B004QJWKLS]
A00146182PNM90WNNAZ5Q     [B000JJX7C0, B000MX3SH2, B003CO205E, B008JXDFCU]
A00165422B2GAUE3EL6Z0     [B007WADN4G, B007WAEBPQ, B007WAT3I6, B008G51WHQ]
A00338282E99B8OR2JYTZ                 [B002FA5B8O, B003F06XQW, B00768LFYY]
A00354001GE099Q1FL0TU                 [B00387EEYA, B003RYZY8E, B0058XN9ZC]
                                               ...                        
AZZMQ85DPFEG3            [B0007KPPAI, B005EYUQ7E, B005VEMVI4, B007LOTZ5...
AZZNK89PXD006                         [B000KGOHLM, B005C3DH00, B005PQPLLC]
AZZT1ERHBSNQ8            [B000UANLGU, B002ATSG8C, B002UTJVMM, B007P83XJ...
AZZTOUKVTUMVM             [B000196UJ0, B009GE3XQ4, B00C2DJ66C, B00C4NV6LS]
AZZYW4YOE1B6E                         [B003V4AKTS, B004G8GOW0, B007LTV82W]
Name: asin, Length: 39386, dtype: object


In [6]:
# getting unique users
unique_users = user_rating_history.reset_index()['reviewerID'].tolist()

# generating user embeddings for all unique users
user_embeddings = {}

for user in tqdm(unique_users):
    user_embedding = np.zeros(50)
    for item in user_rating_history[user]:
        user_embedding += model.dv[item]
        
    # mean aggregation
    user_embedding /= len(user_rating_history[user])
    user_embeddings[user] = user_embedding

100%|████████████████████████████████████████████████████████████████████████████████████████████████| 39386/39386 [00:01<00:00, 34101.26it/s]


# Generate Top-N Recommendations (N=10)

In [None]:
def get_top_n(d2v, user_embeddings, n=10):
    """Return the top-N recommendations for each user based on cosine similarity.
    
    Args:
        d2v ([Doc2Vec]): Doc2Vec of item representations based on reviews.
        user_embeddings ([dict]): User representations based on mean aggregation of item
            representations within the d2v vector dimension space.
    
    Returns:
        ([dict]): A dictionary of top-N recommendations for each unique user, sorted by
            cosine similarties.
    """
    
    # retrieve a 200 items candidate list based on similarities
    top_ns = {}
    for user in tqdm(user_embeddings.items()):
        candidate_items = [i[0] for i in d2v.dv.most_similar([user[1]], topn=200)]
        unrated_items = set(candidate_items) - set(user_rating_history[user[0]])
        
        user_top_n = []
        idx = 0
        while len(user_top_n) < n:
            if candidate_items[idx] in unrated_items:
                user_top_n.append(candidate_items[idx])
                idx += 1
            else:
                idx += 1
        
        top_ns[user[0]] = user_top_n
        
    return top_ns

In [None]:
top_ns = get_top_n(model, user_embeddings, 10)

In [None]:
# generating a random user
random_user = np.random.choice(list(train['reviewerID'].unique()), 1)[0]
print(f"For user: {random_user}:")
print(f"Purchase History:\n{train[train['reviewerID'] == random_user][['asin', 'title']]}")

# find the recommendations
print(f"\nRecommending:\n")
print(f"{train[train['asin'].isin(top_ns[random_user])][['asin', 'title']].drop_duplicates(subset='asin')}")

# Evaluate Top-N Recommendations (N=10)

### Defining Evaluation Metrics

In [21]:
def precision_at_k(asins, predicted_asins, k=10):
    # number of relevant items
    set_actual = set(asins)
    set_preds = set(predicted_asins)
    num_relevant = len(set_actual.intersection(set_preds))
    
    # calculating precision@K - relevant / total recommended
    precision_at_k = num_relevant / k
    
    return precision_at_k

def recall_at_k(asins, predicted_asins, k=10):
    # number of relevant items
    set_actual = set(asins)
    set_preds = set(predicted_asins)
    num_relevant = len(set_actual.intersection(set_preds))
    
    # calculating recall@K - relevant / total relevant items
    recall_at_k = num_relevant / len(asins)
    
    return recall_at_k

In [16]:
# loading test dataset
test = pd.read_csv(f"{DATA_PATH}/{CATEGORY}_test.csv")

In [17]:
test.head().append(test.tail())

Unnamed: 0,index,asin,title,categories,reviewerID,overall,reviewText,reviewTime,processedReviewText
0,6,0000031887,Ballet Dress-Up Fairy Tutu,"[['Clothing, Shoes & Jewelry', 'Girls', 'Cloth...",A16GFPNVF4Y816,5.0,Bought this as a backup to the regular ballet ...,2014-05-03,bought backup regular ballet outfit daughter w...
1,17,0000031887,Ballet Dress-Up Fairy Tutu,"[['Clothing, Shoes & Jewelry', 'Girls', 'Cloth...",A2XJ13PIXVJFJH,1.0,Never GOT this item - but gave a 1 STAR becaus...,2014-05-12,item star reply supplier great try send item r...
2,23,0123456479,SHINING IMAGE HUGE PINK LEATHER JEWELRY BOX / ...,"[['Clothing, Shoes & Jewelry', 'Novelty, Costu...",A2WNN1DQVL4LH5,5.0,The minute I saw this my heart skipped a beat....,2013-11-07,minute saw heart skip beat nice case sort coll...
3,24,0123456479,SHINING IMAGE HUGE PINK LEATHER JEWELRY BOX / ...,"[['Clothing, Shoes & Jewelry', 'Novelty, Costu...",A1ZPOCG2ST2CY3,5.0,Love this Jewelry Box so well put together ho...,2014-01-19,love jewelry box hold plendy love pink look ni...
4,27,0123456479,SHINING IMAGE HUGE PINK LEATHER JEWELRY BOX / ...,"[['Clothing, Shoes & Jewelry', 'Novelty, Costu...",A1JC50F14SLAEV,3.0,I wanted to have the title summarize my though...,2014-05-12,want title summarize thought decide read entir...
99497,278341,B00KF9180W,[2 PACK] Multi-Purpose Sports Balaclava - For ...,"[['Clothing, Shoes & Jewelry', 'Men', 'Accesso...",A1EVV74UQYVKRY,4.0,I go walking a lot in all kinds of weather and...,2014-06-16,walk lot kind weather know lot like block cold...
99498,278342,B00KF9180W,[2 PACK] Multi-Purpose Sports Balaclava - For ...,"[['Clothing, Shoes & Jewelry', 'Men', 'Accesso...",ABUE0ALHKWKHC,5.0,This two pack of Balaclavas makes for a very n...,2014-06-09,pack balaclava nice purchase balaclava fit snu...
99499,278343,B00KF9180W,[2 PACK] Multi-Purpose Sports Balaclava - For ...,"[['Clothing, Shoes & Jewelry', 'Men', 'Accesso...",A1PI8VBCXXSGC7,5.0,"Well, the first thing I did was try the balacl...",2014-06-13,thing try balaclava hubby try fit average size...
99500,278344,B00KF9180W,[2 PACK] Multi-Purpose Sports Balaclava - For ...,"[['Clothing, Shoes & Jewelry', 'Men', 'Accesso...",A2XX2A4OJCDNLZ,5.0,While balaclavas can be used for a variety of ...,2014-06-13,balaclava use variety thing use mainly late fa...
99501,278346,B00KF9180W,[2 PACK] Multi-Purpose Sports Balaclava - For ...,"[['Clothing, Shoes & Jewelry', 'Men', 'Accesso...",A3UJRNI8UR4871,4.0,"Nice material, but not as nice as silk or mer...",2014-06-09,nice material nice silk merino wool course mat...


In [18]:
# generating test rating history
test_user_history = (pd.DataFrame(test.groupby(['reviewerID'])['asin']
                                  .apply(list).reset_index()))

In [None]:
test_recommendations = pd.DataFrame(top_ns.items(), columns=['reviewerID', 'pred_asin'])

In [None]:
# combined test dataset and recommendations
test_merged = pd.merge(test_user_history, test_recommendations, on='reviewerID', how='inner')

In [None]:
test_merged.head()

In [None]:
test_merged['precision@k'] = test_merged.progress_apply(lambda x: precision_at_k(x.asin, x.pred_asin), axis=1)
test_merged['recall@k'] = test_merged.progress_apply(lambda x: recall_at_k(x.asin, x.pred_asin), axis=1)

In [None]:
test_merged.head()

In [None]:
k = 10
average_precision_at_k = test_merged["precision@k"].mean()
average_recall_at_k = test_merged["recall@k"].mean()

print(f"The MEM-ECF has a average precision@{k}: {average_precision_at_k:.5f}, average recall@{k}: {average_recall_at_k:.5f}.")

# Looking at Relevant Recommendations

In [None]:
test_merged[test_merged['recall@k'] > 0]

# Generating Top-N Recommendations (N=25)

In [None]:
top_ns = get_top_n(model, user_embeddings, 25)

In [None]:
test_recommendations = pd.DataFrame(top_ns.items(), columns=['reviewerID', 'pred_asin'])

# combined test dataset and recommendations
test_merged = pd.merge(test_user_history, test_recommendations, on='reviewerID', how='inner')

In [None]:
test_merged['precision@k'] = test_merged.progress_apply(lambda x: precision_at_k(x.asin, x.pred_asin, k=25), axis=1)
test_merged['recall@k'] = test_merged.progress_apply(lambda x: recall_at_k(x.asin, x.pred_asin, k=25), axis=1)

k = 25
average_precision_at_k = test_merged["precision@k"].mean()
average_recall_at_k = test_merged["recall@k"].mean()

print(f"The MEM-ECF has a average precision@{k}: {average_precision_at_k:.5f}, average recall@{k}: {average_recall_at_k:.5f}.")

In [None]:
test_merged[test_merged['recall@k'] > 0]

# Generating Top-N Recommendations (N=30)

In [None]:
top_ns = get_top_n(model, user_embeddings, 30)

In [None]:
test_recommendations = pd.DataFrame(top_ns.items(), columns=['reviewerID', 'pred_asin'])

# combined test dataset and recommendations
test_merged = pd.merge(test_user_history, test_recommendations, on='reviewerID', how='inner')

In [None]:
test_merged['precision@k'] = test_merged.progress_apply(lambda x: precision_at_k(x.asin, x.pred_asin, k=30), axis=1)
test_merged['recall@k'] = test_merged.progress_apply(lambda x: recall_at_k(x.asin, x.pred_asin, k=30), axis=1)

k = 30
average_precision_at_k = test_merged["precision@k"].mean()
average_recall_at_k = test_merged["recall@k"].mean()

print(f"The MEM-ECF has a average precision@{k}: {average_precision_at_k:.5f}, average recall@{k}: {average_recall_at_k:.5f}.")

In [None]:
test_merged[test_merged['recall@k'] > 0]

# Generating Top-N Recommendations (N=45)

In [None]:
top_ns = get_top_n(model, user_embeddings, 45)

In [None]:
test_recommendations = pd.DataFrame(top_ns.items(), columns=['reviewerID', 'pred_asin'])

# combined test dataset and recommendations
test_merged = pd.merge(test_user_history, test_recommendations, on='reviewerID', how='inner')

In [None]:
test_merged['precision@k'] = test_merged.progress_apply(lambda x: precision_at_k(x.asin, x.pred_asin, k=45), axis=1)
test_merged['recall@k'] = test_merged.progress_apply(lambda x: recall_at_k(x.asin, x.pred_asin, k=45), axis=1)

k = 45
average_precision_at_k = test_merged["precision@k"].mean()
average_recall_at_k = test_merged["recall@k"].mean()

print(f"The MEM-ECF has a average precision@{k}: {average_precision_at_k:.5f}, average recall@{k}: {average_recall_at_k:.5f}.")

In [None]:
test_merged[test_merged['recall@k'] > 0]

# Evaluating `EmbeddedCF` class

In [7]:
class EmbeddedCF():
    """
    """
    def __init__(self, d2v):
        self.d2v = d2v
        self.user_rating_history = None
        self.user_embeddings = None
        
    def fit(self, train, dimension=50):
        # get user rating history
        user_rating_history = train.groupby(['reviewerID'])['asin'].apply(list)
        # getting unique users
        unique_users = user_rating_history.reset_index()['reviewerID'].tolist()

        # generating user embeddings for all unique users
        user_embeddings = {}

        for user in tqdm(unique_users):
            user_embedding = np.zeros(dimension)
            for item in user_rating_history[user]:
                user_embedding += self.d2v.dv[item]

            # mean aggregation
            user_embedding /= len(user_rating_history[user])
            user_embeddings[user] = user_embedding
        
        self.user_rating_history = user_rating_history
        self.user_embeddings = user_embeddings
        
    def predict(self, n=200):
        """Generate a list of n-number of candidates items.

        This only generates a generic candidate list of items which do not factor
        in existing rated items and also top-N items required for recommendations.

        """
        candidate_items = {}
        for user in tqdm(self.user_embeddings.items()):
            candidate_items[user[0]] = [i for i in self.d2v.dv.most_similar([user[1]], topn=n)]

        return candidate_items


In [8]:
mem_ecf = EmbeddedCF(model)

In [9]:
mem_ecf.fit(train, dimension=50)

100%|████████████████████████████████████████████████████████████████████████████████████████████████| 39386/39386 [00:01<00:00, 36545.57it/s]


In [10]:
mem_ecf.user_rating_history

reviewerID
A001114613O3F18Q5NVR6     [B0016JNS44, B001T54XA8, B004AZXO1I, B004QJWKLS]
A00146182PNM90WNNAZ5Q     [B000JJX7C0, B000MX3SH2, B003CO205E, B008JXDFCU]
A00165422B2GAUE3EL6Z0     [B007WADN4G, B007WAEBPQ, B007WAT3I6, B008G51WHQ]
A00338282E99B8OR2JYTZ                 [B002FA5B8O, B003F06XQW, B00768LFYY]
A00354001GE099Q1FL0TU                 [B00387EEYA, B003RYZY8E, B0058XN9ZC]
                                               ...                        
AZZMQ85DPFEG3            [B0007KPPAI, B005EYUQ7E, B005VEMVI4, B007LOTZ5...
AZZNK89PXD006                         [B000KGOHLM, B005C3DH00, B005PQPLLC]
AZZT1ERHBSNQ8            [B000UANLGU, B002ATSG8C, B002UTJVMM, B007P83XJ...
AZZTOUKVTUMVM             [B000196UJ0, B009GE3XQ4, B00C2DJ66C, B00C4NV6LS]
AZZYW4YOE1B6E                         [B003V4AKTS, B004G8GOW0, B007LTV82W]
Name: asin, Length: 39386, dtype: object

In [11]:
predictions = mem_ecf.predict(n=200)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 39386/39386 [00:28<00:00, 1368.28it/s]


In [13]:
# check predictions
predictions['A001114613O3F18Q5NVR6'][:5]

[('B0016JNS44', 0.8733696341514587),
 ('B004AZXO1I', 0.8602843284606934),
 ('B0022JV0ZG', 0.8170426487922668),
 ('B002RZQIDO', 0.8118180632591248),
 ('B004QJWKLS', 0.8106290102005005)]

In [14]:
def get_top_n(predictions, user_rating_history, n=10):
    """Return the top-N recommendations for each user based on cosine similarity.
    
    Args:
    
    Returns:
        ([dict]): A dictionary of top-N recommendations for each unique user, sorted by
            cosine similarties.
    """
    
    # retrieve a 200 items candidate list based on similarities
    top_ns = {}
    for user in tqdm(predictions):
        rated_items = user_rating_history[user]
        candidate_items = [i[0] for i in predictions[user]]
        unrated_items = set(candidate_items) - set(rated_items)
        
        user_top_n = []
        idx = 0
        while len(user_top_n) < n:
            if candidate_items[idx] in unrated_items:
                user_top_n.append(candidate_items[idx])
                idx += 1
            else:
                idx += 1
        
        top_ns[user] = user_top_n
        
    return top_ns

### N=10

In [22]:
top_ns = get_top_n(predictions, mem_ecf.user_rating_history, n=10)

test_recommendations = pd.DataFrame(top_ns.items(), columns=['reviewerID', 'pred_asin'])

# combined test dataset and recommendations
test_merged = pd.merge(test_user_history, test_recommendations, on='reviewerID', how='inner')

k = 10
test_merged['precision@k'] = test_merged.progress_apply(lambda x: precision_at_k(x.asin, x.pred_asin, k=k), axis=1)
test_merged['recall@k'] = test_merged.progress_apply(lambda x: recall_at_k(x.asin, x.pred_asin, k=k), axis=1)

average_precision_at_k = test_merged["precision@k"].mean()
average_recall_at_k = test_merged["recall@k"].mean()

print(f"The MEM-ECF has a average precision@{k}: {average_precision_at_k:.5f}, average recall@{k}: {average_recall_at_k:.5f}.")

100%|████████████████████████████████████████████████████████████████████████████████████████████████| 39386/39386 [00:01<00:00, 30964.69it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████| 39363/39363 [00:00<00:00, 50576.71it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████| 39363/39363 [00:00<00:00, 52982.10it/s]

The MEM-ECF has a average precision@10: 0.00277, average recall@10: 0.01173.





### N=25

In [23]:
top_ns = get_top_n(predictions, mem_ecf.user_rating_history, n=25)

test_recommendations = pd.DataFrame(top_ns.items(), columns=['reviewerID', 'pred_asin'])

# combined test dataset and recommendations
test_merged = pd.merge(test_user_history, test_recommendations, on='reviewerID', how='inner')

k = 25
test_merged['precision@k'] = test_merged.progress_apply(lambda x: precision_at_k(x.asin, x.pred_asin, k=k), axis=1)
test_merged['recall@k'] = test_merged.progress_apply(lambda x: recall_at_k(x.asin, x.pred_asin, k=k), axis=1)

average_precision_at_k = test_merged["precision@k"].mean()
average_recall_at_k = test_merged["recall@k"].mean()

print(f"The MEM-ECF has a average precision@{k}: {average_precision_at_k:.5f}, average recall@{k}: {average_recall_at_k:.5f}.")

100%|████████████████████████████████████████████████████████████████████████████████████████████████| 39386/39386 [00:01<00:00, 26816.42it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████| 39363/39363 [00:00<00:00, 48438.72it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████| 39363/39363 [00:00<00:00, 45829.21it/s]

The MEM-ECF has a average precision@25: 0.00224, average recall@25: 0.02344.





### N=30

In [24]:
top_ns = get_top_n(predictions, mem_ecf.user_rating_history, n=30)

test_recommendations = pd.DataFrame(top_ns.items(), columns=['reviewerID', 'pred_asin'])

# combined test dataset and recommendations
test_merged = pd.merge(test_user_history, test_recommendations, on='reviewerID', how='inner')

k = 30
test_merged['precision@k'] = test_merged.progress_apply(lambda x: precision_at_k(x.asin, x.pred_asin, k=k), axis=1)
test_merged['recall@k'] = test_merged.progress_apply(lambda x: recall_at_k(x.asin, x.pred_asin, k=k), axis=1)

average_precision_at_k = test_merged["precision@k"].mean()
average_recall_at_k = test_merged["recall@k"].mean()

print(f"The MEM-ECF has a average precision@{k}: {average_precision_at_k:.5f}, average recall@{k}: {average_recall_at_k:.5f}.")

100%|████████████████████████████████████████████████████████████████████████████████████████████████| 39386/39386 [00:01<00:00, 22943.74it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████| 39363/39363 [00:00<00:00, 47655.36it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████| 39363/39363 [00:00<00:00, 48280.91it/s]

The MEM-ECF has a average precision@30: 0.00215, average recall@30: 0.02710.





### N=45

In [25]:
top_ns = get_top_n(predictions, mem_ecf.user_rating_history, n=45)

test_recommendations = pd.DataFrame(top_ns.items(), columns=['reviewerID', 'pred_asin'])

# combined test dataset and recommendations
test_merged = pd.merge(test_user_history, test_recommendations, on='reviewerID', how='inner')

k = 45
test_merged['precision@k'] = test_merged.progress_apply(lambda x: precision_at_k(x.asin, x.pred_asin, k=k), axis=1)
test_merged['recall@k'] = test_merged.progress_apply(lambda x: recall_at_k(x.asin, x.pred_asin, k=k), axis=1)

average_precision_at_k = test_merged["precision@k"].mean()
average_recall_at_k = test_merged["recall@k"].mean()

print(f"The MEM-ECF has a average precision@{k}: {average_precision_at_k:.5f}, average recall@{k}: {average_recall_at_k:.5f}.")

100%|████████████████████████████████████████████████████████████████████████████████████████████████| 39386/39386 [00:01<00:00, 20836.49it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████| 39363/39363 [00:00<00:00, 44207.96it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████| 39363/39363 [00:00<00:00, 46873.38it/s]

The MEM-ECF has a average precision@45: 0.00191, average recall@45: 0.03592.



