In [44]:
import pickle
import numpy as np

In [45]:
# подразумеваются результаты для 2к определенных пользователей, иначе надо добавить проверку по id
class Ensemble():
    def __init__(self, most_popular_prediction, item_based_prediction, user_based_prediction, actual_purchases):
        '''
        :param most_popular_predict: dictionary like {user_id: [predicted products]}
        :param item_based_predict: dictionary like {user_id: [predicted products]}
        :param user_based_predict: dictionary like {user_id: [predicted products]}
        :param actual_purchases: dictionary like {user_id: [actual products]}
        '''
        self.predictions = {'most popular': most_popular_prediction,
                            'item based'  : item_based_prediction,
                            'user based'  : user_based_prediction}
        self.actual = actual_purchases
        
    # AP@k
    def apk(self, actual, predicted, k=10):
        
        if len(predicted) > k:
            predicted = predicted[:k]

        score = 0.0
        num_hits = 0.0

        for i,p in enumerate(predicted):
            if p in actual and p not in predicted[:i]:
                num_hits += 1.0
                score += num_hits / (i+1.0)

        if not actual:
            return 0.0

        return score / min(len(actual), k)
    
    def fit(self, users_ids=False):
        '''
        :param user_ids: list with IDs of interesting users (2k)
        :return: dict {user_id: [products ids from best prediction]}
        '''
        if not users_ids:
            users_ids = [*self.predictions['user based']]
        print(users_ids)
        self.results = self.get_best_predictions(users_ids=users_ids)
        return self.results
    
    def get_best_predictions(self, users_ids):
        results = {}
        
        d = {0: 'most popular', 1: 'item based', 2:'user based'}
        
        for user_id in users_ids:
            if user_id in self.predictions['most popular'] and \
                user_id in self.predictions['item based'] and \
                user_id in self.predictions['user based']:
                
                metrics = np.array([self.apk(self.actual[user_id], self.predictions['most popular'][user_id]),
                                    self.apk(self.actual[user_id], self.predictions['item based'][user_id]),
                                    self.apk(self.actual[user_id], self.predictions['user based'][user_id])])
                
                results[user_id] = self.predictions[d[np.argmax(metrics)]][user_id]
                
        return results
    
    def predict(self, user_id=False):
        if user_id:
            return self.results[user_id]
        else:
            return self.results

In [46]:
with open('data/user_based_first_2k.pickle', 'rb') as f:
    user_based_prediction = pickle.load(f)

In [47]:
with open('data/item_based_random_2k.pickle', 'rb') as f:
    item_based_prediction = pickle.load(f)

In [48]:
mp = {1: [1, 2, 10, 4], 2: [3, 4, 5, 6], 5: [1, 2, 3, 4, 5]}
ib = {1: [6, 2, 3, 12], 2: [3, 5, 4, 6], 5: [5, 22, 3, 4]}
ub = {1: [1, 2, 3, 12], 2: [6, 4, 5, 9], 5: [1, 11, 4, 8]}
actual = {1: [1, 2, 3, 11], 2: [3, 4, 5, 9], 5: [5, 11, 3, 5]}

In [49]:
predictor = Ensemble(mp, ib, ub, actual)

In [50]:
predictor.fit()

[1, 2, 5]


{1: [1, 2, 3, 12], 2: [3, 4, 5, 6], 5: [5, 22, 3, 4]}