In [15]:
# Import necessary libraries 

import surprise 
import pandas as pd 
import numpy as np 

from recommenders.datasets.sparse import AffinityMatrix
from recommenders.datasets.python_splitters import numpy_stratified_split, python_random_split
from recommenders.evaluation.python_evaluation import (
    precision_at_k,
    recall_at_k,
    map_at_k,
    get_top_k_items
)

from recommenders.utils.timer import Timer
from recommenders.models.surprise.surprise_utils import predict, compute_ranking_predictions


In [3]:
# Load Data
df = pd.read_csv("../../00_Data/online_retail_prep.csv", index_col=0)
df.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country,purchased
0,536365,3249,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,4048,United Kingdom,1
1,536365,2649,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,4048,United Kingdom,1
2,536365,2855,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,4048,United Kingdom,1
3,536365,2803,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,4048,United Kingdom,1
4,536365,2802,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,4048,United Kingdom,1


In [5]:
#to use standard names across the analysis 
header = {
        "col_user": "CustomerID",
        "col_item": "StockCode",
        "col_rating": "purchased",
    }

#instantiate the sparse matrix generation  
am = AffinityMatrix(df = df, col_user='CustomerID', col_item='StockCode', col_rating='purchased')

#obtain the sparse matrix 
X, _, _ = am.gen_affinity_matrix()

# check that the generated matrix has the correct dimensions
assert (X.shape[0] == df.CustomerID.unique().shape[0]) & (
        X.shape[1] == df.StockCode.unique().shape[0]
    )

In [6]:
# Split the Data
Xtr, Xtst = numpy_stratified_split(X, ratio=0.8, seed=1)

In [8]:
# Turn all values to 5 that are above 0 
Xtr = np.where(Xtr > 0, 5, Xtr)
Xtst = np.where(Xtst > 0, 5, Xtst)

In [16]:
train, test = python_random_split(df, 0.8, seed=1)

In [18]:
train.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country,purchased
329951,565859,1843,TRAVEL CARD WALLET KEEP CALM,24,2011-09-07 14:40:00,0.42,3309,United Kingdom,1
38267,539496,1335,HEART OF WICKER SMALL,12,2010-12-20 10:38:00,1.65,543,United Kingdom,1
23767,538248,97,LAVENDER INCENSE IN TIN,2,2010-12-10 12:14:00,1.25,4243,United Kingdom,1
508238,579196,1543,DOORMAT MERRY CHRISTMAS RED,4,2011-11-28 15:54:00,16.63,1300,United Kingdom,1
115861,546156,1486,SPACEBOY LUNCH BOX,12,2011-03-10 08:31:00,1.95,1446,United Kingdom,1


In [25]:
reader = surprise.Reader(rating_scale=(0,1))
train_set = surprise.Dataset.load_from_df(train[['CustomerID', 'StockCode', 'purchased']], reader=reader).build_full_trainset()

In [55]:
# Model
try:
    del(model)
except:
    pass

# Create Object for model 
model = surprise.SVDpp(random_state=0, n_factors=20, n_epochs=30, verbose=True)


model.fit(train_set)
    

 processing epoch 0
 processing epoch 1
 processing epoch 2
 processing epoch 3
 processing epoch 4
 processing epoch 5
 processing epoch 6
 processing epoch 7
 processing epoch 8
 processing epoch 9
 processing epoch 10
 processing epoch 11
 processing epoch 12
 processing epoch 13
 processing epoch 14
 processing epoch 15
 processing epoch 16
 processing epoch 17
 processing epoch 18
 processing epoch 19
 processing epoch 20
 processing epoch 21
 processing epoch 22
 processing epoch 23
 processing epoch 24
 processing epoch 25
 processing epoch 26
 processing epoch 27
 processing epoch 28
 processing epoch 29


<surprise.prediction_algorithms.matrix_factorization.SVDpp at 0x7fac7d513760>

In [56]:
predictions = predict(model, test, usercol='CustomerID', itemcol='StockCode')
predictions.head()

Unnamed: 0,CustomerID,StockCode,prediction
0,683,162,0.997668
1,4042,1478,1.0
2,540,2247,0.987671
3,2226,399,0.998757
4,3357,140,1.0


In [63]:
predictions.shape

(80768, 3)

In [57]:
all_predictions = compute_ranking_predictions(model, test, usercol='CustomerID', itemcol='StockCode', remove_seen=True)

In [58]:
all_predictions.shape

(12222508, 3)

In [59]:
all_predictions.head()

Unnamed: 0,CustomerID,StockCode,prediction
80768,683,1478,1.0
80769,683,2247,1.0
80770,683,399,1.0
80771,683,140,1.0
80772,683,1479,0.998263


In [60]:
all_predictions.prediction.min()

0.7749182971700429

In [61]:
eval_precision = precision_at_k(test, all_predictions, col_user="CustomerID", col_item="StockCode",
                                    col_rating="purchased", col_prediction="prediction", 
                                    relevancy_method="top_k", k=20)
print('precision \t:', eval_precision)
eval_recall = recall_at_k(test, all_predictions,col_user="CustomerID", col_item="StockCode",
                                    col_rating="purchased", col_prediction="prediction", 
                                    relevancy_method="top_k", k=20)
print('recall \t:', eval_recall)

precision 	: 0.0
recall 	: 0.0


In [62]:
print('recall \t:', eval_recall)

recall 	: 0.0
