In [3]:
!pip install scikit-surprise
import numpy as np
import pandas as pd
from surprise import Dataset
from surprise import Reader
import heapq



In [4]:
data_main = pd.read_csv('https://s3.amazonaws.com/amazon-reviews-pds/tsv/amazon_reviews_us_Digital_Software_v1_00.tsv.gz',
                 sep='\t',
                 compression='gzip',
                 error_bad_lines=False,
                 warn_bad_lines=False)

In [5]:
def process_dataframe(data_main):
  # Drop Null Values
    combine_product_rating = data_main.dropna(axis=0, subset=['product_title'])

    # Get Rating Count Per Product
    product_ratingCount = (combine_product_rating.
        groupby(by=['product_title'])['star_rating'].
        count().
        reset_index().
        rename(columns={'star_rating': 'totalRatingCount'})
    [['product_title', 'totalRatingCount']]
        )

    # Combine Rating count to get record
    rating_with_totalRatingCount = combine_product_rating.merge(product_ratingCount, left_on='product_title',
                                                                right_on='product_title', how='left')
    # Set threshold for required review count
    popularity_threshold = 50

    # Get records with product popularity threshold & ignore star_ratings less than 3
    cleaned_data = rating_with_totalRatingCount.query(
        "totalRatingCount >= @popularity_threshold")
    
    return cleaned_data

In [6]:
data_main = process_dataframe(data_main)

In [7]:
products = data_main[['product_id', 'product_title']].drop_duplicates(['product_id'])
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(data_main[["customer_id", "product_id", "star_rating"]], reader)
trainingSet = data.build_full_trainset()

In [8]:
from surprise import SVDpp

algo = SVDpp(n_epochs=100, lr_all=0.1)
algo.fit(trainingSet)
predictions = algo.test(trainingSet.build_testset())

In [9]:
from surprise.dump import *

In [10]:
dump(file_name='svd.model', predictions=predictions, algo= algo, verbose=1)

The dump has been saved as file svd.model


In [11]:
model = load(file_name='svd.model')

In [12]:
def getProductName(product_id):
      return products.loc[products['product_id'] == product_id, 'product_title'].iloc[0]

In [13]:
from collections import defaultdict

def get_top_n(predictions, n=10):
    """Return the top-N recommendation for each user from a set of predictions.
    Args:
        predictions(list of Prediction objects): The list of predictions, as
            returned by the test method of an algorithm.
        n(int): The number of recommendation to output for each user. Default
            is 10.
    Returns:
    A dict where keys are user (raw) ids and values are lists of tuples:
        [(raw item id, rating estimation), ...] of size n.
    """

    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

In [14]:
test_customer = 164400
test_subject_iid = trainingSet.to_inner_uid(test_customer)
test_subject_ratings = trainingSet.ur[test_subject_iid]

In [15]:
k = 10 + len(test_subject_ratings)
watched = {}
for itemID, rating in trainingSet.ur[test_subject_iid]:
  watched[trainingSet.to_raw_iid(itemID)] = 1

In [16]:
list_tem = []
for item in products.product_id:
  list_tem.append(model[1].predict(uid=str(test_customer), iid=item))

In [17]:
k_neighbors = heapq.nlargest(k, list_tem, key=lambda t: t[3])
top_t = get_top_n(k_neighbors, n=k)

In [18]:
position = 0
recommendations = []
for rec in top_t[str(test_customer)]:
  if not rec[0] in watched:
    recommendations.append(getProductName(rec[0]))
    position += 1
    if (position >= 10): break # We only want top 10

In [19]:
for recommendation in recommendations:
  print(recommendation)

Microsoft Windows Store Gift Card
Avast Free Antivirus 2015 [Download]
Microsoft Windows Store Gift Card
RPG Maker VX Ace
RPG Maker VX Ace
avast! Internet Security 2014 [Download]
Sound Forge
QuickBooks Online Plus 2015
QuickBooks Premier with Enhanced Payroll 2014 [Old Version]
avast! Pro Antivirus 2014 (1 PC, 1 Year) [Download]
