# DATA 643: Recommender Systems
Final Project:  Book Crossing RecSys | Walt Wells, Summer 2017

# Notebook P3 - Modeling

# Environment Prep

In [1]:
suppressMessages(library(dplyr))
suppressMessages(library(stringr))
suppressMessages(library(Matrix))
suppressMessages(library(recommenderlab))
suppressMessages(library(ggplot2))

# Data Prep

In [33]:
BinBook <- readRDS("binBookM.rds")
Book <- readRDS("BookM.rds")

In [34]:
BinBook
Book
m <- mean(colMeans(Book))

33095 x 95795 rating matrix of class ‘binaryRatingMatrix’ with 176017 ratings.

33095 x 95795 rating matrix of class ‘realRatingMatrix’ with 176017 ratings.

In [60]:
Samp <- Book[1:5000,1:5000]
Samp

5000 x 5000 rating matrix of class ‘realRatingMatrix’ with 1547 ratings.

In [82]:
which_train <- sample(x = c(TRUE, FALSE), size = nrow(Samp), 
                      replace = TRUE, prob = c(0.8, 0.2))
recc_data_train <- Samp[which_train, ]
recc_data_test <- Samp[-which_train, ]

In [83]:
recc_model <- Recommender(data = recc_data_train, 
                          method = "SVD")
recc_model

Recommender of type ‘SVD’ for ‘realRatingMatrix’ 
learned using 4012 users.

In [68]:
recommenderRegistry$get_entry("SVD", dataType = "realRatingMatrix")

Recommender method: SVD for realRatingMatrix
Description: Recommender based on SVD approximation with column-mean imputation.
Reference: NA
Parameters:
   k maxiter normalize
1 10     100  "center"

In [69]:
n_recommended <- 3
recc_predicted <- predict(object = recc_model, newdata = recc_data_test, n = n_recommended)
recc_predicted

Recommendations as ‘topNList’ with n = 3 for 4999 users. 

In [75]:
recc_user_1 <- recc_predicted@items[[1]]
recc_user_1
books_user_1 <- recc_predicted@itemLabels[recc_user_1]
books_user_1

## Subset For Testing

In [84]:
n <- 1
BookS <- Book[rowCounts(Book) > n, colCounts(Book) > n*2]
BookS

13768 x 11506 rating matrix of class ‘realRatingMatrix’ with 70584 ratings.

In [5]:
#set.seed(643)
#eval_sets <- evaluationScheme(data=BookS, 
#                                   method="split", 
#                                   train=0.8,
#                                   given=n, 
#                                   goodRating=m)
#eval_sets
#getData(eval_sets, "train")
#getData(eval_sets, "known")
#getData(eval_sets, "unknown")

In [43]:
set.seed(643)
eval_setsK <- evaluationScheme(data=Book, 
                              method = "cross-validation",
                              goodRating = m, 
                              k = 3, 
                              given = -1)
eval_setsK
getData(eval_setsK, "train")
getData(eval_setsK, "known")
getData(eval_setsK, "unknown")

Evaluation scheme using all-but-1 items
Method: ‘cross-validation’ with 3 run(s).
Good ratings: >=7.441965
Data set: 33095 x 95795 rating matrix of class ‘realRatingMatrix’ with 176017 ratings.

22062 x 95795 rating matrix of class ‘realRatingMatrix’ with 120779 ratings.

11033 x 95795 rating matrix of class ‘realRatingMatrix’ with 44205 ratings.

11033 x 95795 rating matrix of class ‘realRatingMatrix’ with 11033 ratings.

In [45]:
models_to_evaluate <- list(
    IBCF_cos = list(name = "IBCF", 
                    param = list(method = "cosine")),
#    IBCF_pear = list(name = "IBCF", 
#                     param = list(method = "pearson")),
    UBCF_cos = list(name = "UBCF", 
                    param = list(method = "cosine")),
#    UBCF_pear = list(name = "UBCF", 
#                     param = list(method = "pearson")),
    ALS_explicit = list(name="ALS", 
                        param = list()),
#    ALS_implicit = list(name="ALS_implicit", 
#                        param = list(lambda=0.1, alpha = 0.5, n_factors=10, 
#                                   n_iterations=10, seed = 1234, verbose = TRUE)),
#    ALS = list(name = "ALS", param = list(
#        normalize=NULL, lambda=0.1, n_factors=200, 
#        n_iterations=10, seed = 643)),
    #SVD = list(name = "SVD", )
    random = list(name = "RANDOM", 
                  param =  NULL)
)

n_recommendations <- c(1, 3, 5, 10, 15, 20, 30)

In [46]:
list_results <- evaluate(x = eval_setsK, 
                         method = models_to_evaluate, 
                         n = n_recommendations, 
                         type = "topNList")

IBCF run fold/sample [model time/prediction time]
	 1  Timing stopped at: 0.143 0 0.143 
UBCF run fold/sample [model time/prediction time]
	 1  Timing stopped at: 0.082 0.001 0.081 
ALS run fold/sample [model time/prediction time]
	 1  Timing stopped at: 681.951 0.043 682.159 
RANDOM run fold/sample [model time/prediction time]
	 1  Timing stopped at: 1.167 0 1.167 


“
  Recommender 'IBCF_cos' has failed and has been removed from the results!
  Recommender 'UBCF_cos' has failed and has been removed from the results!
  Recommender 'ALS_explicit' has failed and has been removed from the results!
  Recommender 'random' has failed and has been removed from the results!”

In [47]:
evalNums <- evaluate(x = eval_setsK, 
               method = models_to_evaluate, 
               type="ratings")

IBCF run fold/sample [model time/prediction time]
	 1  Timing stopped at: 0.105 0 0.105 
UBCF run fold/sample [model time/prediction time]
	 1  Timing stopped at: 0.066 0 0.066 
ALS run fold/sample [model time/prediction time]
	 1  Timing stopped at: 679.066 0 679.129 
RANDOM run fold/sample [model time/prediction time]
	 1  Timing stopped at: 1.155 0 1.155 


“
  Recommender 'IBCF_cos' has failed and has been removed from the results!
  Recommender 'UBCF_cos' has failed and has been removed from the results!
  Recommender 'ALS_explicit' has failed and has been removed from the results!
  Recommender 'random' has failed and has been removed from the results!”

In [48]:
plot(list_results, annotate=1, legend = "bottomright") 
title("ROC curve")

ERROR: Error in a[[1]]: subscript out of bounds


In [49]:
plot(list_results, "prec/rec", annotate = 1, legend = "topright")
title("Precision-recall")

ERROR: Error in a[[1]]: subscript out of bounds


In [50]:
evalTable <- avg(evalNums)
evalTable <- t(sapply(evalTable, rbind))
colnames(evalTable) <- c("RMSE", "MSE", "MAE")
knitr::kable(evalTable)

ERROR: Error in `colnames<-`(`*tmp*`, value = c("RMSE", "MSE", "MAE")): length of 'dimnames' [2] not equal to array extent
