In [1]:
import os
import sys

sys.path.insert(0, f'{os.environ.get("HOME")}/workspace/recommendation-study')

In [2]:
from collections import defaultdict
import numpy as np
from util.models import Dataset, RecommendResult
from base import BaseRecommender

np.random.seed(0)

In [11]:
class PopularityRecommender(BaseRecommender):
    def recommend(self, dataset: Dataset, k: int, **kwargs):
        min_rating_size = kwargs.get('min_rating_size', 200)

        rating_average = dataset.train.groupby('movie_id').agg({'rating': np.mean})
        test_df = dataset.test.copy()
        test_df = test_df.merge(rating_average, on='movie_id', how='left', suffixes=('_test', '_pred')).fillna(0)

        pred_user2items = defaultdict(list)
        user_watched_movies = dataset.train.groupby('user_id').agg({'movie_id': list})['movie_id'].to_dict()

        movie_stats = dataset.train.groupby('movie_id').agg({'rating': [np.size, np.mean]})
        at_least = movie_stats['rating']['size'] >= min_rating_size

        movies_sortedby_rating = movie_stats[at_least].sort_values(by=('rating', 'mean'), ascending=False).index.to_list()

        user_ids = dataset.train.user_id.unique()
        for user_id in user_ids:
            for movie_id in movies_sortedby_rating:
                if movie_id not in user_watched_movies[user_id]:
                    pred_user2items[user_id].append(movie_id)
                if len(pred_user2items[user_id]) >= k:
                    break

        return RecommendResult(rating=test_df.rating_pred, user2items=pred_user2items)


In [12]:
metrics = PopularityRecommender().run_sample()
print(metrics)

rmse: 1.082, precision@K: 0.012, recall@K: 0.039
