<a href="https://colab.research.google.com/github/vivekkchandra/CE888_Lab/blob/master/Recommend.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import pandas as pd
import numpy as np
from IPython.display import Image
np.set_printoptions(precision = 3)

In [0]:
uratings_df = pd.read_csv("user_ratings.csv")
ufeatures_df = pd.read_csv("user_features.csv")
ifeatures_df = pd.read_csv("item_features.csv")

In [0]:
ufeatures_df["key"] = 0
ufeatures_df["user_id"] = range(ufeatures_df.shape[0])
ifeatures_df["key"] = 0
ifeatures_df["item_id"] = range(ifeatures_df.shape[0])
merged_df = pd.merge(ufeatures_df, ifeatures_df, left_index=True, on="key")
merged_df.drop(labels='key', axis=1, inplace=True) 
merged_df[["item_id", "user_id"]] 

In [0]:
merged_df["rating"] = map(lambda ids: uratings_df.values[ids[1]][ids[2]], 
                          merged_df[["user_id", "item_id"]].itertuples())
train = merged_df.dropna()
test = merged_df[merged_df.isnull().any(axis=1)]

In [0]:

n_latent_factors = 2

uratings = uratings_df.values
latent_upreferences = np.random.random((uratings.shape[0], n_latent_factors))
latent_ifeatures = np.random.random((uratings.shape[1], n_latent_factors))

ufeatures = ufeatures_df.values
ifeatures = ifeatures_df.values

ufeatures = np.concatenate([np.ones(shape=(ufeatures.shape[0],1)), ufeatures], axis=1)
ifeatures = np.concatenate([np.ones(shape=(ifeatures.shape[0],1)), ifeatures], axis=1)

ufeatures_weights = np.random.random((uratings.shape[0], ufeatures.shape[1]))
ifeatures_weights = np.random.random((uratings.shape[1], ifeatures.shape[1]))

print(ufeatures)

[[1. 1. 0. 0. 0.]
 [1. 0. 1. 0. 1.]
 [1. 0. 0. 0. 2.]
 [1. 1. 0. 0. 3.]
 [1. 0. 1. 0. 4.]
 [1. 0. 0. 0. 5.]
 [1. 0. 0. 0. 6.]
 [1. 1. 0. 0. 7.]
 [1. 0. 1. 0. 8.]
 [1. 1. 0. 0. 9.]]


In [0]:
def predict_rating(user_id, item_id):
    """
    Predict a rating given a user_id and an item_id.
    """
    upreference = latent_upreferences[user_id]
    ipreference = latent_ifeatures[item_id]
    
    uscore = ufeatures_weights[user_id].dot(ufeatures[user_id])
    iscore = ifeatures_weights[item_id].dot(ifeatures[item_id])
    #print(user_preference.dot(item_preference), user_score, item_score)
    return upreference.dot(ipreference) + uscore + iscore


def train(user_id, item_id, rating, alpha=0.001, 
          latent_feature_weight_decay=0.1,
          user_weight_decay=0.01, item_weight_decay=0.0001):
    #print(item_id)
    prediction_rating = predict_rating(user_id, item_id)
    err = prediction_rating - rating
    #print(err)
    upref_values = latent_upreferences[user_id][:]
    latent_upreferences[user_id] -= alpha * err * (latent_ifeatures[item_id] + latent_feature_weight_decay * latent_upreferences[user_id])
    latent_ifeatures[item_id] -= alpha * err * (upref_values + latent_feature_weight_decay * latent_ifeatures[item_id])
    
    ufeatures_weights[user_id] -=alpha * err * (ufeatures[user_id] + user_weight_decay * ufeatures_weights[user_id])
    ifeatures_weights[item_id] -=alpha * err * (ifeatures_weights[item_id] + item_weight_decay * ifeatures_weights[item_id])
    
    return err


def sgd(iterations=30000):
    """ 
    Iterate over all users and all items and train for 
    a certain number of iterations
    """
    for iteration in range(iterations):
        error = []
        for user_id in range(latent_upreferences.shape[0]):
            for item_id in range(latent_ifeatures.shape[0]):
                rating = uratings[user_id][item_id]
                if not np.isnan(rating):
                    err = train(user_id, item_id, rating)
                    error.append(err)
    mse = (np.array(error) ** 2).mean()          
    print(mse)

In [0]:
for _ in range(10): 
    sgd()

0.2782875140601273
0.2747840946385837
0.27373859633551134
0.2732977930675394
0.2730972631262692
0.2730183862871594
0.2730120315340104
0.2730548555870435
0.27313488963277155
0.27324585168429333


In [0]:
predictions = np.zeros(shape=(latent_upreferences.shape[0], latent_ifeatures.shape[0]))
print(ufeatures_weights)
print(ifeatures_weights)
for user_id in range(latent_upreferences.shape[0]):
    for item_id in range(latent_ifeatures.shape[0]):
        predictions[user_id, item_id] =  predict_rating(user_id, item_id)

In [0]:
values = [zip(uratings[i], predictions[i]) for i in range(predictions.shape[0])]
comparison_data = pd.DataFrame(values)
comparison_data.columns = uratings_df.columns
comparison_data

Unnamed: 0,The Call of Cthulhu,Frankenstein,Dracula,Neuromancer,Space Odyssey
0,"(8.0, 7.938224526594304)","(2.0, 2.228445927223479)","(nan, 21.369956801771703)","(5.0, 4.856364917905309)","(4.0, 3.9791579435318383)"
1,"(3.0, 2.9032569428953203)","(2.0, 2.3639390898008186)","(nan, -32.591377729615964)","(7.0, 6.770225814389079)","(7.0, 6.966957257868175)"
2,"(9.0, 8.93776295326112)","(nan, 4.625133317345899)","(7.0, 7.007455585149249)","(8.0, 8.051721957208958)","(5.0, 4.999653981827789)"
3,"(nan, 8.910713859082188)","(nan, 4.961062116543591)","(7.0, 7.000033726304304)","(8.0, 7.999290900374654)","(9.0, 8.999311597620473)"
4,"(nan, 4.9049084566403085)","(1.0, 0.6842750747700012)","(8.0, 8.006865479030765)","(3.0, 3.2655447243132603)","(7.0, 7.027929929130206)"
5,"(2.0, 2.00525507817647)","(3.0, 2.994537195737978)","(5.0, 4.99963016626663)","(nan, 0.059414147940308304)","(nan, 61.53431362792209)"
6,"(4.0, 4.443750332728209)","(2.0, 0.3880007660764865)","(nan, 6.304487302515303)","(2.0, 2.9785857726844744)","(7.0, 7.14247818520789)"
7,"(7.0, 6.441255223335681)","(1.0, 2.9244629141597165)","(2.0, 2.062285176883774)","(7.0, 5.8697476358268075)","(9.0, 8.833516266131923)"
8,"(3.0, 3.1599541290024384)","(3.0, 2.4249139824740045)","(nan, -34.34343262970783)","(7.0, 7.337471359121851)","(3.0, 3.04895696058054)"
9,"(4.0, 4.165603314035945)","(nan, -0.14641924212746985)","(5.0, 4.993052811328214)","(3.0, 2.8698128958849236)","(3.0, 2.999236260790674)"
