In [1]:
import sys
from pathlib import Path
sys.path.append(Path().absolute())

In [2]:
from item_based_recommender import ItemBasedRecommender, split
from user_based_recommender import UserBasedRecommender
from most_popular_recommender import MostPopularRecommender
import pandas as pd
import numpy as np
import pickle
import random

In [9]:
orders = pd.read_csv('orders.csv')
order_products_train = pd.read_csv('order_products__train.csv')
order_products_prior = pd.read_csv('order_products__prior.csv')
order_products = pd.concat([order_products_train, order_products_prior])[['order_id', 'product_id']]
order_products.to_csv('order_products.csv', index=False)
train_indices, validation_indices, test_indices = split(orders)
            
orders = orders[['user_id', 'order_id']]
train_df = pd.concat([orders.loc[train_indices], orders.loc[validation_indices]])
train_df.to_csv('train_valid.csv', index=False)
validation_df = orders.loc[validation_indices]
test_df = orders.loc[test_indices]

In [12]:
def pipeline(recommender, loop_num, user_num, predictions_num):
    recommender_class_name = type(recommender).__name__
    for i in range(loop_num):

        orders = pd.read_csv('orders.csv')
        order_products = pd.read_csv('order_products.csv')
        train_indices, validation_indices, test_indices = split(orders)
            
        orders = orders[['user_id', 'order_id']]
        train_df = pd.concat([orders.loc[np.concatenate([train_indices, validation_indices])]])
        train_df.to_csv('train_valid.csv', index=False)
        validation_df = orders.loc[validation_indices]
        test_df = orders.loc[test_indices]

        if recommender_class_name == 'MostPopularRecommender':
            recommender.train = train_df
            recommender.validation = validation_df
            recommender.test = test_df
            recommender.order_products_df = order_products
        if recommender_class_name == 'ItemBasedRecommender':
            recommender.train_data = pd.merge(train_df, order_products, on='order_id')
            recommender.validation_data = pd.merge(validation_df, order_products, on='order_id')
            recommender.test_data = pd.merge(test_df, order_products, on='order_id')

        recommender.fit()

        recommendations = {key: [] for key in range(1, user_num + 1)}
        for user_id in recommendations.keys():
            tmp = []
            if recommender_class_name == 'MostPopularRecommender':
                tmp = random.sample(recommender.predict([user_id], 100)['predictions'][0], predictions_num)
            if recommender_class_name == 'UserBasedRecommender':
                tmp = random.sample(list(recommender.predict(user_id)[user_id]), predictions_num)
            if recommender_class_name == 'ItemBasedRecommender':
                tmp = random.sample(list(recommender.predict(user_id)), predictions_num)
            recommendations[user_id] = tmp
                
#         пока просто сохраним предсказания
#         потом надо будет передать их в функцию, которая добавит всё в основные таблицы
        with open(str(predictions_num) + 'predBy' + recommender_class_name + str(i) + '.pickle', 'wb') as out:
            pickle.dump(recommendations, out)   

In [15]:
recommender1 = MostPopularRecommender(train_df, validation_df, test_df, orders, order_products)
pipeline(recommender1, 1, 100, 7)

recommender2 = ItemBasedRecommender(train_df, validation_df, test_df)
pipeline(recommender2, 1, 100, 7)

recommender3 = UserBasedRecommender('orders.csv', 'train_valid.csv', 10)
pipeline(recommender3, 1, 100, 7)

In [16]:
with open('7predByUserBasedRecommender.pickle', 'rb') as inp:
    recommendations = pickle.load(inp)
recommendations

{1: [45051, 21572, 40939, 46562, 6729, 37600, 28371],
 2: [11399, 38618, 49451, 4071, 33000, 14958, 8197],
 3: [24062, 23427, 17789, 31805, 45177, 32139, 27104],
 4: [19057, 48335, 6740, 18209, 39794, 35469, 17769],
 5: [12779, 48647, 1117, 31717, 3362, 43693, 9436],
 6: [12315, 48679, 39013, 15579, 13085, 22935, 26940],
 7: [274, 22640, 22742, 24691, 23765, 29193, 1158],
 8: [4605, 9796, 29487, 47144, 43686, 49113, 9076],
 9: [41074, 38277, 39561, 5194, 2627, 28928, 5438],
 10: [22035, 15392, 35855, 44359, 5545, 41860, 19535],
 11: [34658, 28465, 19769, 33884, 5605, 28323, 5782],
 12: [30635, 33761, 17159, 4372, 16589, 34745, 29557],
 13: [5652, 30561, 3919, 14877, 32297, 15784, 33735],
 14: [33655, 19863, 12962, 37113, 38948, 7751, 37229],
 15: [45099, 5259, 48142, 34269, 21472, 10441, 5258],
 16: [26209, 18441, 47042, 22504, 44142, 15950, 3682],
 17: [24838, 32338, 45656, 18762, 37241, 5750, 2927],
 18: [15613, 37458, 1359, 13548, 16965, 49175, 38203],
 19: [46522, 19887, 3364, 1278