# Setup

In [1]:
import numpy as np
import math
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

In [2]:
np.random.seed(42)

In [3]:
REVIEWS_PATH  = "./data/Video_Games_5.json.gz"
METADATA_PATH = "./data/meta_Video_Games.json.gz"

In [4]:
REVIEWS_FEATS = ['asin', 'reviewerID', 'overall']
METADATA_FEATS = ['asin', 'title']

In [5]:
NUM_PRODUCTS = 10000

# Load and transform data

In [6]:
# Load review data
reviews = pd.read_json(REVIEWS_PATH, lines=True, compression='gzip', encoding = 'utf-8')
reviews = reviews[REVIEWS_FEATS]
reviews.head(5)

Unnamed: 0,asin,reviewerID,overall
0,700026657,A1HP7NVNPFMA4N,5
1,700026657,A1JGAP0185YJI6,4
2,700026657,A1YJWEXHQBWK2B,3
3,700026657,A2204E1TH211HT,2
4,700026657,A2RF5B5H74JLPE,5


In [7]:
# TODO: try to incorporate metadata
# Load product metadata
metadata = pd.read_json(METADATA_PATH, lines=True, compression='gzip', encoding = 'utf-8')
metadata = metadata[METADATA_FEATS]
metadata.head(5)

Unnamed: 0,asin,title
0,42000742,Reversi Sensory Challenger
1,78764343,Medal of Honor: Warfighter - Includes Battlefi...
2,276425316,street fighter 2 II turbo super nintendo snes ...
3,324411812,Xbox 360 MAS STICK
4,439335310,Phonics Alive! 3: The Speller


In [8]:
# Subset the data
sample_asins = np.random.choice(reviews['asin'].unique(), size=NUM_PRODUCTS, replace=False)
reviews = reviews[reviews['asin'].isin(sample_asins)]

# Item-based collaborative filtering

In [9]:
# Preprocess data for item-based collaborative filtering
user_item_matrix = reviews.pivot_table(index='reviewerID', columns='asin', values='overall').fillna(0)
item_user_matrix = user_item_matrix.T
item_user_matrix.shape

(10000, 54653)

In [10]:
# Standardize the matrix
scaler = StandardScaler()
standardized_matrix = scaler.fit_transform(item_user_matrix)

In [11]:
# Calculate item-item similarity using cosine similarity
item_similarity = cosine_similarity(item_user_matrix)
item_similarity_df = pd.DataFrame(item_similarity, index=user_item_matrix.columns, columns=user_item_matrix.columns)
item_similarity_df.head(5)

asin,0700026657,0804161380,3828770193,6050036071,8176503290,907843905X,9629971372,9882106463,9882155456,B000006OVI,...,B01H5XD36C,B01H6DHITE,B01H6GUCCQ,B01H6SM5CY,B01H74VPX2,B01H7VI5TC,B01HD1B64C,B01HDJFJOM,B01HFRICLE,B01HH6JEOC
asin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
700026657,1.0,0.0,0.0,0.0,0.0,0.0,0.035269,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
804161380,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.033755,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3828770193,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6050036071,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8176503290,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Get recommendations

In [12]:
def get_item_based_recommendations(asin, similarity_df, user_item_matrix, k=5):
    similar_items = similarity_df[asin].sort_values(ascending=False).index[1:k+1]
    recommendations = [item for item in similar_items if user_item_matrix[item].max() > 0]
    return recommendations

In [13]:
def get_metadata(asin):
    return metadata[metadata['asin'] == asin].iloc[0]

In [14]:
query_asins = np.random.choice(user_item_matrix.columns, size=10, replace=False)
for query_asin in query_asins:
    recommendations = get_item_based_recommendations(query_asin, item_similarity_df, user_item_matrix)
    print(f"Top recommendations for {get_metadata(query_asin).title} (item {query_asin}):")
    for i, rec in enumerate(recommendations):
        print(f"  {i+1}) {get_metadata(rec).title} (item {rec})")
    print('-'*100)

Top recommendations for Spyro: Attack of the Rhynocs (item B00008NRMD):
  1) Spyro 2: Season of Flame (item B00006F2ZO)
  2) Naruto: Ninja Council (item B000BNOINQ)
  3) Crash and Spyro Super Pack (item B000A32ODI)
  4) Frogger: The Great Quest (item B00005NCAG)
  5) Crash Bandicoot 2: N-tranced (item B00007KUUE)
----------------------------------------------------------------------------------------------------
Top recommendations for Orcs &amp; Elves - Nintendo DS (item B000UV2M64):
  1) NameStar , Personalized Stainless Steel Kid's Water Bottle, Silver, 12.5 oz (item B00I7IUZ3E)
  2) Gothic 3 Collector's Edition - PC (item B001F0OOF6)
  3) SADES SA-738 PC Gaming Headset with LED with Microphone, Professional Stereo Headphone 3.5mm LED with w/ Protein Leather Pad, for PC Gaming Mac (item B00X7F6KCW)
  4) The Chronicles of Narnia: Prince Caspian - Nintendo Wii (item B00139PR5U)
  5) Metal Gear Acid 2 (item B000CBCVG8)
-------------------------------------------------------------------