# 1. Import Libraries



In [1]:
import re
import os
import random

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy

from collections import defaultdict
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS, CountVectorizer, TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

import nltk
nltk.download('stopwords');
nltk.download('wordnet');
nltk.download('punkt')

from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem.porter import *
from nltk.stem.wordnet import WordNetLemmatizer

from textblob import TextBlob

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Unzipping corpora/wordnet.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


# 2. Load Data


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
data = pd.read_csv('/content/drive/MyDrive/Applied_ML/data/10_core_Video_Games_Processed.csv')
data.head()

Unnamed: 0,overall,verified,reviewTime,reviewerID,asin,reviewerName,reviewText,summary,category,description,title,also_buy,brand,feature,also_view,similar_item,details
0,5.0,False,2001-01-01,A3BEJOL2X8PS8I,B00004TC6E,ATHF Fan,Before I start this review let me get somethin...,The best RPG... Ever!,"['Video Games', 'Retro Gaming & Microconsoles'...","[""While it's officially a sequel to the immens...",Chrono Cross - PlayStation,"['B00002R28C', 'B00005LOXE', 'B00004Y57G', 'B0...",by\n \n Square Enix,['PSX'],"['B0045L3SNQ', 'B00005LOXE', 'B00004Y57G', 'B0...",,
1,5.0,False,2003-01-01,A3E0MSKBIWMU6F,B000069D39,obryanstars,"Well, at first, getting used to the controls i...","A Great Game, a HALO for the PS2 (maybe)","['Video Games', 'Retro Gaming &amp; Microconso...",['The evil race of TimeSplitters is traveling ...,Time Splitters 2 - PlayStation 2,"['B00004WKN9', 'B0006ZJP8M', 'B0009O7HVM', 'B0...",by\n \n Square Enix,"[""This time you've got to stop the evil lords ...","['B0006ZJP8M', 'B00006AVB1', 'B00004WKN9', 'B0...",,
2,4.0,False,2003-01-01,A3KI73MFVL0QWH,B00005NN5J,GAMEMASTER,this game is pretty good its graphics and game...,cool game,"['Video Games', 'Retro Gaming & Microconsoles'...","[""Danger takes wing again, as Namco's breaknec...",Namco Ace Combat 4 Shattered Skies,"['B01L1P49OA', 'B00JA81WQE', 'B0000C7GHG', 'B0...",Playstation,"['Soar through the blue in 24 missions, piloti...","['B00065QTT0', 'B01A0P8NBC', 'B015PJ68BK', 'B0...",,
3,5.0,False,2003-01-01,A3E0MSKBIWMU6F,B00007E1OC,obryanstars,The Lord of the Rings: The Two Towers (the gam...,"So Much Fun, So Many Things To Do! A Classic!!!","['Video Games', 'Retro Gaming & Microconsoles'...","['Features detailed tactical component.', '<I>...",Lord of the Rings The Two Towers - Gamecube,"['B00009VE6B', 'B0002I9RQI', 'B01LWA474D', 'B0...",by\n \n Electronic Arts,"[""The evil Wizard Saruman has built is buildin...","['B00009VE6B', 'B00008KTW7', 'B0002I9RQI', 'B0...",,
4,4.0,False,2003-01-01,A3713PP4MADKED,B00006GSNZ,Nick Watkins,"I got this game for Christmas, and played it a...",Good...BUT TOO HARD!!!!!!,"['Video Games', 'Retro Gaming & Microconsoles'...","[""Battle the evil forces of Mordor and experie...",Lord of the Rings The Two Towers - PlayStation 2,"['B00009VE6D', 'B00006LEM9', 'B0002IQD1U', 'B0...",by\n \n Electronic Arts,['Enter the pastoral world of Middle Earth whe...,"['B00009VE6D', 'B00006LEM9', 'B0002IQD1U', 'B0...",,


# 3. Preprocessing

> 'description' - game description - maps one-to-one with 'asin'
> 
> 'category' - game category - maps one-to-one with 'asin'
> 
> 'brand' - game brand - maps one-to-one with 'asin'
> 
> 'feature' - game features - maps one-to-one with 'asin'
> 
> 'reviewText' - review for a game - maps many-to-one with 'asin' 
> 
> 'summary' - concise summary of the review - maps many-to-one with 'asin' 
> 
> 'overall' - rating given by reviewer - maps many-to-one with 'asin'
> 
> 'also_buy' - games recommended by amazon for buying together - maps one-to-one with 'asin'
> 
> 'also_view' - games recommended by amazon for viewier together - maps one-to-one with 'asin'

In [4]:
data[['asin', 'description', 'category', 'brand', 'feature', 'reviewText', 
      'summary', 'overall', 'also_buy', 'also_view']] = data[['asin', 'description', 'category', 'brand', 'feature', 'reviewText', 
                                                   
                                                              'summary','overall', 'also_buy', 'also_view']].fillna('')
meta = data[['asin', 'title', 'description', 'category', 'brand', 
             'feature', 'reviewText', 'summary', 'overall', 'also_buy', 'also_view']]

# mean - for overall rating
# first - for columns uniquely mapped to 'asin' 
# concatenate - for columns that map many-to-one with 'asin' 
meta = meta.groupby(['asin', 'title', 'also_buy', 'also_view'], as_index=False).agg({'overall':'mean', 
                                                                                     'description':'first',
                                                                                     'category':'first',
                                                                                     'brand':'first',
                                                                                     'feature':'first',
                                                                                     'reviewText': lambda x: ''.join(str(x.values)),
                                                                                     'summary': lambda x: ''.join(str(x.values))})

meta.head()

Unnamed: 0,asin,title,also_buy,also_view,overall,description,category,brand,feature,reviewText,summary
0,0804161380,Legend of Zelda Box Set Prima Official Game Guide,"['1911015214', '1911015222', '1506703356', '11...","['1911015486', '1506703356', '1616550414', '15...",4.733333,"[""It's 6 Legend Of Zelda strategy guides, offe...",,DIAMOND SELECT TOYS,['Set includes 6x Legend Of Zelda strategy gui...,['amazing\n\ncool awesome interesting guides o...,['Great' 'Five Stars' 'Five Stars' 'Epic colle...
1,9629971372,Dotop Sony Playstation 2 PS2 64MB Memory Card,"['B01LY4QRT8', 'B0000C7GHG', 'B00004SQPD', 'B0...",,4.588235,"[""This is Memory Card 128MB for PlayStation 2 ...","['Video Games', 'Retro Gaming &amp; Microconso...",Dotop,['brand new'],['How can you go wrong with obtaining a 128 Me...,['Works on 95% of games!' 'nice' 'As Expected'...
2,B000006OVJ,Mega Man Legends,"['B00004XONL', 'B0045L3SNQ', 'B0009XBWY6', 'B0...","['B0045L3SNQ', 'B00004XONL', 'B00000K1VC', 'B0...",4.4,['It is a world covered by endless water. Only...,"['Video Games', 'Retro Gaming & Microconsoles'...",Capcom,,['Well as far as the person who sold this to m...,['Hmm' 'Mega Man Legends For The PlayStation'\...
3,B000006P0K,Tekken 2,"['B00000K2X5', 'B000006P0J', 'B0000664JF', 'B0...","['B00000K2X5', 'B0002CHJ8C', 'B0000664JF', 'B0...",5.0,"['New green label', ""They're all here Heihachi...","['Video Games', 'Retro Gaming & Microconsoles'...",Namco,,"[""Same goes for Tekken 2: played but never own...",['Tekken 2' 'Five Stars' 'One of the best figh...
4,B000006RGR,Tomb Raider II,"['B00000DMB4', 'B008LYNLFI', 'B00001ZT9E', 'B0...","['B008LYNLFI', 'B00000DMB4', 'B00002SFNG', 'B0...",4.833333,"[""Lara's Back. Explore new worlds. Awesome Cla...","['Video Games', 'Retro Gaming & Microconsoles'...",by\n \n Square Enix,"['Classic action/adventure game sequel', 'New ...","[""Tomb Raider II is the best of the series, fo...",['best tomb raider' 'A better game than the or...


In [5]:
type(meta['also_buy'][1])

str

In [6]:
process = lambda x: str(x).replace(' ', '').replace("'", '')[1:-1].split(',')
cleanText = lambda x: (re.sub(r'[^A-Za-z0-9 ]+', '', x)).lower()

meta[['description', 'category', 'brand', 'feature', 'reviewText', 'summary', 'also_buy', 'also_view']] = meta.apply({
          'description': lambda x: cleanText(x), 
          'category': lambda x: cleanText(x), 
          'brand': lambda x: cleanText(x), 
          'feature': lambda x: cleanText(x), 
          'reviewText': lambda x: cleanText(x), 
          'summary': lambda x: cleanText(x), 
          'also_buy': lambda x: process(x), 
          'also_view': lambda x: process(x)})

meta.head()

Unnamed: 0,asin,title,also_buy,also_view,overall,description,category,brand,feature,reviewText,summary
0,0804161380,Legend of Zelda Box Set Prima Official Game Guide,"[1911015214, 1911015222, 1506703356, 110189843...","[1911015486, 1506703356, 1616550414, 150670740...",4.733333,its 6 legend of zelda strategy guides offered ...,,diamond select toys,set includes 6x legend of zelda strategy guide...,amazingnncool awesome interesting guides of th...,great five stars five stars epic collection fi...
1,9629971372,Dotop Sony Playstation 2 PS2 64MB Memory Card,"[B01LY4QRT8, B0000C7GHG, B00004SQPD, B000SEU92...",[],4.588235,this is memory card 128mb for playstation 2 ps...,video games retro gaming amp microconsoles pla...,dotop,brand new,how can you go wrong with obtaining a 128 mega...,works on 95 of games nice as expected my first...
2,B000006OVJ,Mega Man Legends,"[B00004XONL, B0045L3SNQ, B0009XBWY6, B00005MA9...","[B0045L3SNQ, B00004XONL, B00000K1VC, B000050FA...",4.4,it is a world covered by endless water only sm...,video games retro gaming microconsoles playst...,capcom,,well as far as the person who sold this to me ...,hmm mega man legends for the playstation megam...
3,B000006P0K,Tekken 2,"[B00000K2X5, B000006P0J, B0000664JF, B0002CHJ8...","[B00000K2X5, B0002CHJ8C, B0000664JF, B000006P0...",5.0,new green label theyre all here heihachi paul ...,video games retro gaming microconsoles playst...,namco,,same goes for tekken 2 played but never owned ...,tekken 2 five stars one of the best fighting g...
4,B000006RGR,Tomb Raider II,"[B00000DMB4, B008LYNLFI, B00001ZT9E, B000006RG...","[B008LYNLFI, B00000DMB4, B00002SFNG, B00001ZT9...",4.833333,laras back explore new worlds awesome classic ...,video games retro gaming microconsoles playst...,by square enix,classic actionadventure game sequel new moves ...,tomb raider ii is the best of the series follo...,best tomb raider a better game than the origin...


In [7]:
stop = stopwords.words('english')
removeStopWords = lambda x:' '.join([word for word in x.split() if word not in (stop)])

meta[['description', 'category', 'brand', 'feature', 'reviewText', 'summary']] = meta.apply({
    'description': lambda x: removeStopWords(x), 
    'category': lambda x: removeStopWords(x), 
    'brand': lambda x: removeStopWords(x), 
    'feature': lambda x: removeStopWords(x),
    'reviewText': lambda x: removeStopWords(x), 
    'summary': lambda x: removeStopWords(x)})

meta.head()

Unnamed: 0,asin,title,also_buy,also_view,overall,description,category,brand,feature,reviewText,summary
0,0804161380,Legend of Zelda Box Set Prima Official Game Guide,"[1911015214, 1911015222, 1506703356, 110189843...","[1911015486, 1506703356, 1616550414, 150670740...",4.733333,6 legend zelda strategy guides offered numbere...,,diamond select toys,set includes 6x legend zelda strategy guides o...,amazingnncool awesome interesting guides legen...,great five stars five stars epic collection fi...
1,9629971372,Dotop Sony Playstation 2 PS2 64MB Memory Card,"[B01LY4QRT8, B0000C7GHG, B00004SQPD, B000SEU92...",[],4.588235,memory card 128mb playstation 2 ps2 building b...,video games retro gaming amp microconsoles pla...,dotop,brand new,go wrong obtaining 128 megabyte memory card ps...,works 95 games nice expected first ps2 memory ...
2,B000006OVJ,Mega Man Legends,"[B00004XONL, B0045L3SNQ, B0009XBWY6, B00005MA9...","[B0045L3SNQ, B00004XONL, B00000K1VC, B000050FA...",4.4,world covered endless water small patches land...,video games retro gaming microconsoles playsta...,capcom,,well far person sold goes game came described ...,hmm mega man legends playstation megaman ocari...
3,B000006P0K,Tekken 2,"[B00000K2X5, B000006P0J, B0000664JF, B0002CHJ8...","[B00000K2X5, B0002CHJ8C, B0000664JF, B000006P0...",5.0,new green label theyre heihachi paul phoenix k...,video games retro gaming microconsoles playsta...,namco,,goes tekken 2 played never owned im officially...,tekken 2 five stars one best fighting games ti...
4,B000006RGR,Tomb Raider II,"[B00000DMB4, B008LYNLFI, B00001ZT9E, B000006RG...","[B008LYNLFI, B00000DMB4, B00002SFNG, B00001ZT9...",4.833333,laras back explore new worlds awesome classic ...,video games retro gaming microconsoles playsta...,square enix,classic actionadventure game sequel new moves ...,tomb raider ii best series followed original s...,best tomb raider better game original perfect ...


In [8]:
lemmatizer = WordNetLemmatizer()
lemmtizeText = lambda sentence: ' '.join([lemmatizer.lemmatize(w) for w in nltk.word_tokenize(sentence)])

meta[['description', 'category', 'brand', 'feature', 'reviewText', 'summary']] = meta.apply({
    'description': lambda x: lemmtizeText(x), 
    'category': lambda x: lemmtizeText(x), 
    'brand': lambda x: lemmtizeText(x), 
    'feature': lambda x: lemmtizeText(x),
    'reviewText': lambda x: lemmtizeText(x), 
    'summary': lambda x: lemmtizeText(x)})

meta.head()

Unnamed: 0,asin,title,also_buy,also_view,overall,description,category,brand,feature,reviewText,summary
0,0804161380,Legend of Zelda Box Set Prima Official Game Guide,"[1911015214, 1911015222, 1506703356, 110189843...","[1911015486, 1506703356, 1616550414, 150670740...",4.733333,6 legend zelda strategy guide offered numbered...,,diamond select toy,set includes 6x legend zelda strategy guide of...,amazingnncool awesome interesting guide legend...,great five star five star epic collection five...
1,9629971372,Dotop Sony Playstation 2 PS2 64MB Memory Card,"[B01LY4QRT8, B0000C7GHG, B00004SQPD, B000SEU92...",[],4.588235,memory card 128mb playstation 2 ps2 building b...,video game retro gaming amp microconsoles play...,dotop,brand new,go wrong obtaining 128 megabyte memory card ps...,work 95 game nice expected first ps2 memory ca...
2,B000006OVJ,Mega Man Legends,"[B00004XONL, B0045L3SNQ, B0009XBWY6, B00005MA9...","[B0045L3SNQ, B00004XONL, B00000K1VC, B000050FA...",4.4,world covered endless water small patch land s...,video game retro gaming microconsoles playstat...,capcom,,well far person sold go game came described we...,hmm mega man legend playstation megaman ocarin...
3,B000006P0K,Tekken 2,"[B00000K2X5, B000006P0J, B0000664JF, B0002CHJ8...","[B00000K2X5, B0002CHJ8C, B0000664JF, B000006P0...",5.0,new green label theyre heihachi paul phoenix k...,video game retro gaming microconsoles playstat...,namco,,go tekken 2 played never owned im officially b...,tekken 2 five star one best fighting game time...
4,B000006RGR,Tomb Raider II,"[B00000DMB4, B008LYNLFI, B00001ZT9E, B000006RG...","[B008LYNLFI, B00000DMB4, B00002SFNG, B00001ZT9...",4.833333,laras back explore new world awesome classic g...,video game retro gaming microconsoles playstat...,square enix,classic actionadventure game sequel new move c...,tomb raider ii best series followed original s...,best tomb raider better game original perfect ...


In order to incorporate review data, we include the 'summary' column, i.e, summary of each review, instead of the actual reviews. This is done to keep the dimension of our sparse matrices relatively less.

In [9]:
meta['without review data'] = meta['description'] + meta['category'] + meta['brand'] + meta['feature']
meta['with summary'] = meta['description'] + meta['category'] + meta['brand'] + meta['feature'] + meta['summary']
meta['with reviews'] = meta['description'] + meta['category'] + meta['brand'] + meta['feature'] + meta['reviewText']
meta.head()

Unnamed: 0,asin,title,also_buy,also_view,overall,description,category,brand,feature,reviewText,summary,without review data,with summary,with reviews
0,0804161380,Legend of Zelda Box Set Prima Official Game Guide,"[1911015214, 1911015222, 1506703356, 110189843...","[1911015486, 1506703356, 1616550414, 150670740...",4.733333,6 legend zelda strategy guide offered numbered...,,diamond select toy,set includes 6x legend zelda strategy guide of...,amazingnncool awesome interesting guide legend...,great five star five star epic collection five...,6 legend zelda strategy guide offered numbered...,6 legend zelda strategy guide offered numbered...,6 legend zelda strategy guide offered numbered...
1,9629971372,Dotop Sony Playstation 2 PS2 64MB Memory Card,"[B01LY4QRT8, B0000C7GHG, B00004SQPD, B000SEU92...",[],4.588235,memory card 128mb playstation 2 ps2 building b...,video game retro gaming amp microconsoles play...,dotop,brand new,go wrong obtaining 128 megabyte memory card ps...,work 95 game nice expected first ps2 memory ca...,memory card 128mb playstation 2 ps2 building b...,memory card 128mb playstation 2 ps2 building b...,memory card 128mb playstation 2 ps2 building b...
2,B000006OVJ,Mega Man Legends,"[B00004XONL, B0045L3SNQ, B0009XBWY6, B00005MA9...","[B0045L3SNQ, B00004XONL, B00000K1VC, B000050FA...",4.4,world covered endless water small patch land s...,video game retro gaming microconsoles playstat...,capcom,,well far person sold go game came described we...,hmm mega man legend playstation megaman ocarin...,world covered endless water small patch land s...,world covered endless water small patch land s...,world covered endless water small patch land s...
3,B000006P0K,Tekken 2,"[B00000K2X5, B000006P0J, B0000664JF, B0002CHJ8...","[B00000K2X5, B0002CHJ8C, B0000664JF, B000006P0...",5.0,new green label theyre heihachi paul phoenix k...,video game retro gaming microconsoles playstat...,namco,,go tekken 2 played never owned im officially b...,tekken 2 five star one best fighting game time...,new green label theyre heihachi paul phoenix k...,new green label theyre heihachi paul phoenix k...,new green label theyre heihachi paul phoenix k...
4,B000006RGR,Tomb Raider II,"[B00000DMB4, B008LYNLFI, B00001ZT9E, B000006RG...","[B008LYNLFI, B00000DMB4, B00002SFNG, B00001ZT9...",4.833333,laras back explore new world awesome classic g...,video game retro gaming microconsoles playstat...,square enix,classic actionadventure game sequel new move c...,tomb raider ii best series followed original s...,best tomb raider better game original perfect ...,laras back explore new world awesome classic g...,laras back explore new world awesome classic g...,laras back explore new world awesome classic g...


# 4. Model Building

In [10]:
def tf_similarity(df, col):
  tf = TfidfVectorizer(ngram_range=(1,2))
  mat = tf.fit_transform(df[col])
  # print('Tfidf Matrix Shape: {}'.format(mat.shape))
  sim = linear_kernel(mat, mat)
  # print('Similarity Matrix Shape: {}\n'.format(sim.shape))
  return sim

def cv_similarity(df, col):
  cv = CountVectorizer(ngram_range=(1,2))
  mat = cv.fit_transform(df[col])
  # print('CV Matrix Shape: {}'.format(mat.shape))
  sim = linear_kernel(mat, mat)
  # print('Similarity Matrix Shape: {}\n'.format(sim.shape))
  return sim

def similarity(df, col, method):
  sim = tf_similarity(df, col) if method == 'tfidf' else cv_similarity(df, col)
  return sim

def get_recommendations(df, sim, id):
  # find recommendations
  ids = df['asin']
  indices = pd.Series(df.index, index=df['asin'])
  idx = indices[id]
  sim_scores = list(enumerate(sim[idx]))
  sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
  sim_scores = sim_scores[1:31]
  prod_indices = [i[0] for i in sim_scores]
  prod_indices = list(set(prod_indices))
  return ids.iloc[prod_indices]

def display_recommendations(df, sim, id):
  # get recommendations
  recommendations = get_recommendations(df, sim, id)
  for rid in recommendations.head(10).values:
    print(df[df['asin'] == rid]['title'].unique())
    

**Recommendations**

In [11]:
print('--- Recommendations created using Tfidf without review data')
print('\n--- Displaying games similar to: {}\n'.format(data[data['asin']=='0804161380']['title'].unique()[0]))
sim = similarity(meta, 'without review data', 'tfidf')
display_recommendations(meta, sim, '0804161380')

--- Recommendations created using Tfidf without review data

--- Displaying games similar to: Legend of Zelda Box Set Prima Official Game Guide

['Xbox One Limited Edition Call of Duty: Advanced Warfare Bundle']
['The Legend of Zelda: Ocarina of Time']
["Mortal Kombat X: Kollector's Edition - PlayStation 4"]
['The Legend of Zelda: Twilight Princess (Nintendo Selects)']
['Secret of Mana']
['The Legend of Zelda: Spirit Tracks']
['Forza Motorsport 5 Limited Edition']
['The Legend of Zelda: The Wind Waker']
['The Legend of Zelda: The Wind Waker (HD Deluxe Set) for Nintendo Wii U']
['Nintendo Selects: The Legend of Zelda: The Wind Waker HD - Wii U']


In [12]:
print('--- Recommendations created using Tfidf with summary')
print('\n--- Displaying games similar to: {}\n'.format(data[data['asin']=='0804161380']['title'].unique()[0]))
sim = similarity(meta, 'with summary', 'tfidf')
display_recommendations(meta, sim, '0804161380')

--- Recommendations created using Tfidf with summary

--- Displaying games similar to: Legend of Zelda Box Set Prima Official Game Guide

['The Legend of Zelda: Ocarina of Time']
['The Legend of Zelda: Twilight Princess (Nintendo Selects)']
["Mortal Kombat X: Kollector's Edition - PlayStation 4"]
['The Legend of Zelda: Spirit Tracks']
['The Legend of Zelda: Twilight Princess']
['The Legend of Zelda: The Wind Waker']
['The Legend of Zelda: The Wind Waker (HD Deluxe Set) for Nintendo Wii U']
['Nintendo Selects: The Legend of Zelda: The Wind Waker HD - Wii U']
['The Legend of Zelda: Twilight Princess HD - Wii U']
['Zelda II: The Adventure of Link (Classic NES Series)']


In [13]:
print('--- Recommendations created using Tfidf with reviews')
print('\n--- Displaying games similar to: {}\n'.format(data[data['asin']=='0804161380']['title'].unique()[0]))
sim = similarity(meta, 'with reviews', 'tfidf')
display_recommendations(meta, sim, '0804161380')

--- Recommendations created using Tfidf with reviews

--- Displaying games similar to: Legend of Zelda Box Set Prima Official Game Guide

['The Legend of Zelda: Ocarina of Time']
['The Legend of Zelda: Twilight Princess (Nintendo Selects)']
['The Legend of Zelda: Spirit Tracks']
['The Legend of Zelda: Twilight Princess']
['The Legend of Zelda: The Wind Waker']
['The Legend of Zelda: The Wind Waker (HD Deluxe Set) for Nintendo Wii U']
['The Legend of Zelda: Twilight Princess HD - Wii U']
['Zelda II: The Adventure of Link (Classic NES Series)']
['The Legend of Zelda - The Minish Cap']
['The Legend of Zelda: Ocarina of Time (w/ Master Quest)']


In [14]:
print('--- Recommendations created using CV without review data')
print('\n--- Displaying games similar to: {}\n'.format(data[data['asin']=='0804161380']['title'].unique()[0]))
sim = similarity(meta, 'without review data', 'cv')
display_recommendations(meta, sim, '0804161380')

--- Recommendations created using CV without review data

--- Displaying games similar to: Legend of Zelda Box Set Prima Official Game Guide

['Legend of Zelda Box Set Prima Official Game Guide']
['Prince of Persia: Sands of Time']
['The Legend of Zelda: Twilight Princess (Nintendo Selects)']
['Gears of War 2 Limited Edition -Xbox 360']
['Call of Duty: Ghosts Prestige Edition - PlayStation 3']
['LittleBigPlanet - Game of the Year Edition Playstation 3']
['Forza Motorsport 5 Limited Edition']
['WWE Legends of WrestleMania']
['The Legend of Zelda: The Wind Waker']
['The Legend of Zelda: The Wind Waker (HD Deluxe Set) for Nintendo Wii U']


In [15]:
print('--- Recommendations created using CV with summary')
print('\n--- Displaying games similar to: {}\n'.format(data[data['asin']=='0804161380']['title'].unique()[0]))
sim = similarity(meta, 'with summary', 'cv')
display_recommendations(meta, sim, '0804161380')


--- Recommendations created using CV with summary

--- Displaying games similar to: Legend of Zelda Box Set Prima Official Game Guide

['Tomb Raider: Definitive Edition - PlayStation 4']
['Batman: Arkham Knight - PlayStation 4 [Digital Code]']
['Xbox One Wireless Controller (Without 3.5 millimeter headset jack)']
['inFAMOUS: Second Son Standard Edition (PlayStation 4)']
['Yoshi amiibo (Super Smash Bros Series)']
["Assassin's Creed - Playstation 3"]
["Assassin's Creed"]
['Xbox 360 Wireless Controller - Glossy Black']
['Gears of War 3']
['PlayStation 4 500GB Console [Old Model][Discontinued]']


In [16]:
print('--- Recommendations created using CV with reviews')
print('\n--- Displaying games similar to: {}\n'.format(data[data['asin']=='0804161380']['title'].unique()[0]))
sim = similarity(meta, 'with reviews', 'cv')
display_recommendations(meta, sim, '0804161380')

--- Recommendations created using CV with reviews

--- Displaying games similar to: Legend of Zelda Box Set Prima Official Game Guide

['Final Fantasy X']
['Heavy Rain - Greatest Hits']
['Mass Effect - Xbox 360']
["Uncharted: Drake's Fortune - Playstation 3"]
['Xbox One + Kinect']
["Assassin's Creed - Playstation 3"]
["Assassin's Creed"]
[' Mass Effect 2 Platinum Hits']
['The Legend of Zelda: The Wind Waker']
['Wii']


# 5. Evaluation

Let $A$ denote the set of product IDs, i.e, list of values in meta['asin']

For a given product id $x\in A$, let $R(x)$ denote the set of similar items for this item built buy our model. 


Let $\text{also_buy}(x)$ represent the set of items in the 'also_buy' column for the row corresponding to 'asin'$=x$, and similarly for $\text{also_view}(x)$, then, we have the following scores,

$$\text{also_buy} =\text{mean} \{\frac{|R(x)\cap \text{also_buy}(x)|}{|R(x)|} \;\big|\; x \in A\}$$

$$\text{also_view} =\text{mean} \{\frac{|R(x)\cap \text{also_view}(x)|}{|R(x)|} \;\big|\; x \in A\}$$

In [17]:
def evaluate(items, df, col, method):
    arr_buy = []
    arr_view = []
    print('--- Building Recommendations ---')
    i = 1
    sim = similarity(df, col, method)

    for id in items:
        recommendations_ = get_recommendations(df, sim, id)[0:5]
        recommendations_ = set(recommendations_)

        also_buy_ = list(df[df['asin']==id].also_buy)[0]
        also_buy_ = set(also_buy_)

        also_view_ = list(df[df['asin']==id].also_view)[0]
        also_view_ = set(also_view_)

        intr_buy = also_buy_.intersection(recommendations_)
        intr_view = also_view_.intersection(recommendations_)

        arr_buy.append(len(intr_buy)/len(recommendations_))
        arr_view.append(len(intr_view)/len(recommendations_))

        if (i%500 == 0 and i>500):
            print('{} IDs processed'.format(i))
        i += 1  

    print('\nTotal IDs Processed: {}'.format(len(df)))
    print('\n\n--- Evaluation Result ---')
    print('Model Parameters: {}, method-{}'.format(col, method))
    buy_score = sum(arr_buy)/len(arr_buy)
    view_score = sum(arr_view)/len(arr_view)

    print('Also Buy Score: {}'.format(buy_score))
    print('Also View Score: {}\n\n'.format(view_score))
    return buy_score, view_score

In [18]:
e = evaluate(list(meta.asin.values), meta, 'without review data', 'tfidf')

--- Building Recommendations ---
1000 IDs processed
1500 IDs processed
2000 IDs processed
2500 IDs processed
3000 IDs processed
3500 IDs processed

Total IDs Processed: 3784


--- Evaluation Result ---
Model Parameters: without review data, method-tfidf
Also Buy Score: 0.13234672304439463
Also View Score: 0.1404862579281158




In [19]:
e = evaluate(list(meta.asin.values), meta, 'with summary', 'tfidf')

--- Building Recommendations ---
1000 IDs processed
1500 IDs processed
2000 IDs processed
2500 IDs processed
3000 IDs processed
3500 IDs processed

Total IDs Processed: 3784


--- Evaluation Result ---
Model Parameters: with summary, method-tfidf
Also Buy Score: 0.15338266384777843
Also View Score: 0.16146934460887874




In [20]:
e = evaluate(list(meta.asin.values), meta, 'with reviews', 'tfidf')

--- Building Recommendations ---
1000 IDs processed
1500 IDs processed
2000 IDs processed
2500 IDs processed
3000 IDs processed
3500 IDs processed

Total IDs Processed: 3784


--- Evaluation Result ---
Model Parameters: with reviews, method-tfidf
Also Buy Score: 0.16353065539112072
Also View Score: 0.1694503171247359




In [21]:
e = evaluate(list(meta.asin.values), meta, 'without review data', 'cv')

--- Building Recommendations ---
1000 IDs processed
1500 IDs processed
2000 IDs processed
2500 IDs processed
3000 IDs processed
3500 IDs processed

Total IDs Processed: 3784


--- Evaluation Result ---
Model Parameters: without review data, method-cv
Also Buy Score: 0.041014799154333974
Also View Score: 0.04941860465116247




In [22]:
e = evaluate(list(meta.asin.values), meta, 'with summary', 'cv')

--- Building Recommendations ---
1000 IDs processed
1500 IDs processed
2000 IDs processed
2500 IDs processed
3000 IDs processed
3500 IDs processed

Total IDs Processed: 3784


--- Evaluation Result ---
Model Parameters: with summary, method-cv
Also Buy Score: 0.035940803382664005
Also View Score: 0.03033826638477823




In [23]:
e = evaluate(list(meta.asin.values), meta, 'with reviews', 'cv')

--- Building Recommendations ---
1000 IDs processed
1500 IDs processed
2000 IDs processed
2500 IDs processed
3000 IDs processed
3500 IDs processed

Total IDs Processed: 3784


--- Evaluation Result ---
Model Parameters: with reviews, method-cv
Also Buy Score: 0.021247357293869056
Also View Score: 0.012790697674418636




We see that the tfidf method massively outperforms cout vectorizer. 

We also observe that including the summary of review texts along with metadata provides a significant boost in performance. 

Including the actual reviews further increased our performance aswell. 

Best parameters: method='tfidf', col='with reviews'



Let us build personalised recommendations using best parameters found.

**Personalized Recommendations for Users**

In [24]:
user_df = data.loc[data["overall"] == 5]
user_df = data.groupby("reviewerID")["asin"].apply(lambda x: ",".join(x)).reset_index()

def get_rec_user(df, col, method, id):
    
    rec_set = set()
    product_list = user_df.loc[user_df['reviewerID'] == id]['asin']
    product_list = list(product_list)[0].split(",")
    sim = similarity(df, col, method)
    for item in product_list:
        rec_list = get_recommendations(df, sim, item).head(2) #Top 2 recommendation for each product reviewed
        
        for r in rec_list:
            rec_set.add(r)
    
    for i in rec_set:
        print(df[df['asin'] == i]['title'].unique())


print('--- Personalized Recommendations created using Tfidf with reviews')
print('\n--- Displaying games for reviewer: {}\n'.format(data[data['reviewerID']=='A0380485C177Q6QQNJIX']['reviewerName'].unique()[0]))
get_rec_user(meta,"with reviews",'tfidf',"A0380485C177Q6QQNJIX")

--- Personalized Recommendations created using Tfidf with reviews

--- Displaying games for reviewer: Franklin Tineo

['Guitar Hero Metallica - Playstation 3']
['Nintendo 3DS XL - Black [Old Model]']
['Ratchet and Clank Future: Tools of Destruction - Playstation 3']
['PlayStation Vita - The Walking Dead Bundle']
['Borderlands 2 - Limited Edition - PlayStation Vita Bundle']
['inFAMOUS: Second Son Standard Edition (PlayStation 4)']
["Assassin's Creed: Revelations"]
["Assassin's Creed: Revelations"]
['Fallout 3']
['Mass Effect - Xbox 360']
['FINAL FANTASY X|X-2 HD Remaster - PlayStation Vita']
['PDP PSVita Trigger Grips']
['PlayStation 2 Memory Card (8MB)']
["PDP PSVita Pull 'N Go Folio"]
['Need for Speed: Hot Pursuit, XBOX 360']
['Metal Gear Solid: The Essential Collection']
['Heavy Rain - Greatest Hits']
["Nintendo - New 3DS XL Legend of Zelda: Majora's Mask Limited Edition - Gold/Black"]
['Metal Gear Solid 3 Subsistence']
['Tomb Raider: Definitive Edition - PlayStation 4']
["Uncharted:

We can also build personalized recommendations using sentiment polarity scores of reviews

In [25]:
meta_sentiment = meta
meta_sentiment['sentiment'] = [TextBlob(x).sentiment.polarity for x in meta_sentiment['reviewText']]

def get_rec_user2(df, col, method, id):
    
    rec_set = set()
    product_list = user_df.loc[user_df['reviewerID'] == id]['asin']
    product_list = list(product_list)[0].split(",")
    sim = similarity(df, col, method)
    for item in product_list:
        rec_list = get_recommendations(df, sim, item).head(2) #Top 2 recommendation for each product reviewed
        
        for r in rec_list:
            rec_set.add(r)
    
    for i in rec_set:
        print(df[df['asin'] == i]['title'].unique(), df[df['asin'] == i]['sentiment'].unique())

print('--- Personalized Recommendations created using Tfidf with reviews and sentient scores')
print('\n--- Displaying games for reviewer: {}\n'.format(data[data['reviewerID']=='A0380485C177Q6QQNJIX']['reviewerName'].unique()[0]))
get_rec_user2(meta_sentiment,"with reviews",'tfidf',"A0380485C177Q6QQNJIX")

--- Personalized Recommendations created using Tfidf with reviews and sentient scores

--- Displaying games for reviewer: Franklin Tineo

['Guitar Hero Metallica - Playstation 3'] [0.14036688]
['Nintendo 3DS XL - Black [Old Model]'] [0.14048065]
['Ratchet and Clank Future: Tools of Destruction - Playstation 3'] [0.0891597]
['PlayStation Vita - The Walking Dead Bundle'] [0.16957287]
['Borderlands 2 - Limited Edition - PlayStation Vita Bundle'] [0.13758568]
['inFAMOUS: Second Son Standard Edition (PlayStation 4)'] [0.0468283]
["Assassin's Creed: Revelations"] [0.0678516]
["Assassin's Creed: Revelations"] [0.04393441]
['Fallout 3'] [0.03836331]
['Mass Effect - Xbox 360'] [0.06019702]
['FINAL FANTASY X|X-2 HD Remaster - PlayStation Vita'] [0.0789998]
['PDP PSVita Trigger Grips'] [0.15593813]
['PlayStation 2 Memory Card (8MB)'] [0.12940558]
["PDP PSVita Pull 'N Go Folio"] [0.09772855]
['Need for Speed: Hot Pursuit, XBOX 360'] [0.08401457]
['Metal Gear Solid: The Essential Collection'] [0.06