In [1]:
import pandas as pd
import numpy as np
import pickle

In [2]:
class MostPopularRecommender():
    
    def fit(self):
        merged = pd.merge(train, order_products, on='order_id')[['order_id', 'product_id']]
        product_ids = merged['product_id'].value_counts().to_frame().reset_index()
        product_ids.columns = ['product_id', 'count']
        popular_products = pd.merge(product_ids, products, on='product_id')[['product_id', 'product_name']]
    
        return popular_products

    def predict(self, user_id, k):
        
        popularity_recommendations = self.fit()
        user_recommendations = popularity_recommendations.head(k).copy()
          
        user_recommendations['user_id'] = user_id 
      
        cols = user_recommendations.columns.tolist() 
        cols = cols[-1:] + cols[:-1] 
        user_recommendations = user_recommendations[cols] 
          
        return user_recommendations

In [7]:
def _split_indices(grouped_ratings, retriever):
    return np.concatenate(grouped_ratings.apply(retriever).values)

def split(orders):
    grouper = orders.sort_values('order_number').groupby('user_id')
    train_indices = _split_indices(
        grouper,
        lambda user_ratings: user_ratings[:int(user_ratings.shape[0] * 0.5)].index.values)
    
    validation_indices = _split_indices(
        grouper,
        lambda user_ratings: user_ratings.iloc[int(user_ratings.shape[0] * 0.5):
                                               int(user_ratings.shape[0] * 0.75)].index.values)
    
    test_indices = _split_indices(
        grouper,
        lambda user_ratings: user_ratings.iloc[int(user_ratings.shape[0] * 0.75):].index.values)
    
    return train_indices, validation_indices, test_indices

train_indices, validation_indices, test_indices = split(orders)

# save results
with open('train_indices.pickle', 'wb') as out:
    pickle.dump(train_indices, out)

with open('validation_indices.pickle', 'wb') as out:
    pickle.dump(validation_indices, out)

with open('test_indices.pickle', 'wb') as out:
    pickle.dump(test_indices, out)

In [3]:
def read_data():
    
    products_df = pd.read_csv('products.csv')
    orders_df = pd.read_csv('orders.csv')
    order_products_prior_df = pd.read_csv('order_products__prior.csv')
    order_products_train_df = pd.read_csv('order_products__train.csv')
    order_products_df = pd.concat([order_products_prior_df, order_products_train_df])
    
    return products_df, orders_df, order_products_df

In [4]:
def get_split_df(orders, order_products):

    with open('train_indices.pickle', 'rb') as input:
        train_indices = pickle.load(input)

    with open('validation_indices.pickle', 'rb') as input:
        validation_indices = pickle.load(input)

    with open('test_indices.pickle', 'rb') as input:
        test_indices = pickle.load(input)
        
    train_df = orders.loc[train_indices]
    validation_df = orders.loc[validation_indices]
    test_df = orders.loc[test_indices]
    
    return train_df, validation_df, test_df

In [5]:
# AP@k
def apk(actual, predicted, k=10):
    if len(predicted)>k:
        predicted = predicted[:k]

    score = 0.0
    num_hits = 0.0

    for i,p in enumerate(predicted):
        if p in actual and p not in predicted[:i]:
            num_hits += 1.0
            score += num_hits / (i+1.0)

    if not actual:
        return 0.0

    return score / min(len(actual), k)

# MAP@K
def mapk(actual, predicted, k=10):
    return np.mean([apk(a,p,k) for a,p in zip(actual, predicted)])

In [6]:
products, orders, order_products = read_data()
train, validation, test = get_split_df(orders, order_products)

In [7]:
user_id = 42
mpr = MostPopularRecommender()
recommendations = mpr.predict(user_id, 10)

In [8]:
recommendations

Unnamed: 0,user_id,product_id,product_name
0,42,24852,Banana
1,42,13176,Bag of Organic Bananas
2,42,21137,Organic Strawberries
3,42,21903,Organic Baby Spinach
4,42,47209,Organic Hass Avocado
5,42,47766,Organic Avocado
6,42,26209,Limes
7,42,27845,Organic Whole Milk
8,42,47626,Large Lemon
9,42,16797,Strawberries
