In [1]:
import os
import sys

sys.path.insert(0, f'{os.environ.get("HOME")}/workspace/recommendation-study')

In [2]:
from util.data import DataLoader

dataset = DataLoader().load()

In [3]:
from util.models import RecommendResult, Metrics
from recommend.base import BaseRecommender
import numpy as np
import gensim

In [4]:
np.random.seed(0)

In [7]:

from util.models import Dataset, RecommendResult


class Word2VecCollaborativeFilteringRecommender(BaseRecommender):
    def recommend(self, dataset: Dataset, k: int, **kwargs) -> RecommendResult:
        vector_size = kwargs.get('vector_size', 128)
        n_epochs = kwargs.get('n_epochs', 50)
        window_size = kwargs.get('window_size', 30)
        skip_gram = kwargs.get('skip_gram', 1)
        use_hierarchial_softmax = kwargs.get('use_hierarchial_softmax', 0)
        min_count = kwargs.get('min_count', 5)

        high_rating = dataset.train[dataset.train.rating >= 4]
        item2vec_data = []
        for user_id, data in high_rating.groupby('user_id'):
            item2vec_data.append(data.sort_values('timestamp')['movie_id'].tolist())

        model = gensim.models.Word2Vec(
            sentences=item2vec_data,
            vector_size=vector_size,
            epochs=n_epochs,
            window=window_size,
            sg=skip_gram,
            hs=use_hierarchial_softmax,
            min_count=min_count,
        )

        vocabs = set(model.wv.key_to_index.keys())

        pred_user2items = {}
        for user_id, data in high_rating.groupby('user_id'):
            input_data = []
            for item_id in data.sort_values('timestamp')['movie_id'].tolist():
                if item_id in vocabs:
                    input_data.append(item_id)
            if input_data:
                most_similars = model.wv.most_similar(input_data, topn=k)
                recommend_items = [s[0] for s in most_similars]
            else:
                recommend_items = []

            pred_user2items[user_id] = recommend_items

        return RecommendResult(dataset.test.rating, pred_user2items)

In [8]:
recommender = Word2VecCollaborativeFilteringRecommender()
metrics = recommender.run_sample()
print(metrics)

rmse: 0.000, precision@K: 0.000, recall@K: 0.000
