# Hybrid Models for Recommendation Systems

Load Pandas, we are going to need it for manipulating data

In [2]:
import pandas as pd
import numpy as np
from IPython.display import Image
np.set_printoptions(precision = 3)

Now load the data

In [3]:
user_ratings_df = pd.read_csv("user_ratings.csv")
user_features_df = pd.read_csv("user_features.csv")
item_features_df = pd.read_csv("item_features.csv")


user_features_df["key"] = 0
user_features_df["user_id"] = range(0,user_features_df.shape[0])
item_features_df["key"] = 0
item_features_df["item_id"] = range(0,item_features_df.shape[0])

merged_df = pd.merge(user_features_df, item_features_df,left_index=True,on="key")
merged_df[["item_id", "user_id"]]




merged_df["rating"] = map(lambda ids: user_ratings_df.values[ids[1]][ids[2]], 
                          merged_df[["user_id", "item_id"]].itertuples())

train = merged_df.dropna()

test = merged_df[merged_df.isnull().any(axis=1)]

print(test.to_latex())



\begin{tabular}{lrrrrrrrl}
\toprule
Empty DataFrame
Columns: Index(['Sex', ' Over60', 'key', 'user\_id', 'Critic0', ' Critic1', 'item\_id',
       'rating'],
      dtype='object')
Index: Int64Index([], dtype='int64') \\
\bottomrule
\end{tabular}



In [4]:
n_latent_features = 2

data = pd.read_csv("user_ratings.csv")
user_ratings = data.values
latent_user_preferences = np.random.random((user_ratings.shape[0], n_latent_features))
latent_item_features = np.random.random((user_ratings.shape[1],n_latent_features))

user_features = user_features_df.values
item_features = item_features_df.values

print(item_features_df.to_latex())


user_features = np.concatenate([np.ones(shape = (user_features.shape[0],1)), user_features], axis = 1)
item_features = np.concatenate([np.ones(shape = (item_features.shape[0],1)), item_features], axis = 1)



user_features_weights = np.random.random((user_ratings.shape[0], user_features.shape[1] ))
item_features_weights = np.random.random((user_ratings.shape[1],item_features.shape[1] ))



# print user_features

\begin{tabular}{lrrrr}
\toprule
{} &  Critic0 &   Critic1 &  key &  item\_id \\
\midrule
0 &      0.3 &       0.9 &    0 &        0 \\
1 &      0.9 &       0.3 &    0 &        1 \\
2 &      0.6 &       0.4 &    0 &        2 \\
3 &      0.2 &       0.1 &    0 &        3 \\
4 &      0.7 &       0.8 &    0 &        4 \\
\bottomrule
\end{tabular}



In [5]:
def predict_rating(user_id,item_id):
    """ Predict a rating given a user_id and an item_id.
    """
    user_preference = latent_user_preferences[user_id]
    item_preference = latent_item_features[item_id]
    
    user_score = user_features_weights[user_id].dot(user_features[user_id])
    item_score = item_features_weights[item_id].dot(item_features[item_id])
    #print user_preference.dot(item_preference), user_score, item_score
    return user_preference.dot(item_preference) + user_score + item_score

def train(user_id, item_id, rating,alpha = 0.001, 
                                   latent_feature_weight_decay = 0.1, 
                                   user_weight_decay = 0.01,
                                   item_weight_decay = 0.0001):
    
    #print item_id
    prediction_rating = predict_rating(user_id, item_id)
    err =  ( prediction_rating - rating );
    #print err
    user_pref_values = latent_user_preferences[user_id][:]
    latent_user_preferences[user_id] -= alpha * err *  ( latent_item_features[item_id] + latent_feature_weight_decay*latent_user_preferences[user_id])
    latent_item_features[item_id] -= alpha * err * ( user_pref_values + latent_feature_weight_decay*latent_item_features[item_id])
    
    user_features_weights[user_id] -=alpha * err *(  user_features[user_id] + user_weight_decay* user_features_weights[user_id])
    item_features_weights[item_id] -=alpha * err * ( item_features_weights[item_id] + item_weight_decay* item_features_weights[item_id])
    
    
    return err
    


def sgd(iterations = 30000):
    """ Iterate over all users and all items and train for 
        a certain number of iterations
    """
    for iteration in range(0,iterations):
        error = []
        for user_id in range(0,latent_user_preferences.shape[0]):
            for item_id in range(0,latent_item_features.shape[0]):
                rating = user_ratings[user_id][item_id]
                if(not np.isnan(rating)):
                    err = train(user_id,item_id,rating)
                    error.append(err)
    mse = (np.array(error) ** 2).mean()          
    print(mse)
                    
                    
    
                    
                    
    


In [6]:
for _ in range(0,10): 
    sgd()

0.2784118624526648
0.27465582557887536
0.27356026126090294
0.27309965063858094
0.27288762940386946
0.27279997013799223
0.27278551914076804
0.2728199965824466
0.2728908906685253
0.272991531294393


In [8]:
predictions = np.zeros(shape = (latent_user_preferences.shape[0], latent_item_features.shape[0]) )
#print latent_user_preferences
print(user_features_weights)
print(item_features_weights)
for user_id in range(0,latent_user_preferences.shape[0]):
            for item_id in range(0,latent_item_features.shape[0]):
                predictions[user_id,item_id] =  predict_rating(user_id,item_id)

[[ 1.167  0.606  0.713  0.086  0.621]
 [-0.749  0.357 -0.931  0.577 -1.077]
 [ 0.768  0.372  0.296  0.003  0.681]
 [ 0.273  0.8    0.531  0.312  0.774]
 [ 0.513  0.302  0.586  0.922 -0.211]
 [ 1.308  0.433  0.148  0.031  2.947]
 [ 0.564  0.734  0.207  0.382 -0.107]
 [ 0.064  0.576  0.073  0.621  0.131]
 [ 0.629  0.207  0.383  0.299 -0.617]
 [ 0.43   0.037  0.02   0.34  -0.234]]
[[1.685 0.391 1.701 1.732 0.525]
 [0.041 0.014 0.023 0.021 0.028]
 [0.089 0.162 0.108 0.132 0.128]
 [0.238 0.292 0.449 0.02  0.724]
 [1.154 0.353 2.689 2.038 1.702]]


In [11]:
values = [zip(user_ratings[i], predictions[i]) for i in range(0,predictions.shape[0])]
comparison_data = pd.DataFrame(values)
comparison_data.columns = data.columns
comparison_data#.applymap(lambda(x,y): "(%2.3f|%2.3f)"%(x,y))

Unnamed: 0,The Call of Cthulhu,Frankenstein,Dracula,Neuromancer,Space Odyssey
0,"(8.0, 7.937461665902083)","(2.0, 2.230408015977145)","(nan, 21.305346004349676)","(5.0, 4.854874025125911)","(4.0, 3.979170367041119)"
1,"(3.0, 2.9031634208352513)","(2.0, 2.3626270492362296)","(nan, -32.43760797595069)","(7.0, 6.770739934219383)","(7.0, 6.967427917574499)"
2,"(9.0, 8.936872043625552)","(nan, 4.62947920461969)","(7.0, 7.007523883949421)","(8.0, 8.05253363576117)","(5.0, 4.99980228710435)"
3,"(nan, 8.911220840185965)","(nan, 4.961328156890585)","(7.0, 7.000029181451457)","(8.0, 7.999282811614534)","(9.0, 8.999434839211375)"
4,"(nan, 4.910293189089428)","(1.0, 0.6839165979097902)","(8.0, 8.006541889287751)","(3.0, 3.2661322265232484)","(7.0, 7.027800003692679)"
5,"(2.0, 2.004927191735232)","(3.0, 2.9947382798376596)","(5.0, 4.999660795436882)","(nan, 0.013057630909453444)","(nan, 62.47900275294873)"
6,"(4.0, 4.445074053328606)","(2.0, 0.38687141440726003)","(nan, 6.265689044341696)","(2.0, 2.9806443877675424)","(7.0, 7.1410499097625175)"
7,"(7.0, 6.441100212372691)","(1.0, 2.9222877609108746)","(2.0, 2.063901357001854)","(7.0, 5.869238391549164)","(9.0, 8.835279563444987)"
8,"(3.0, 3.160004846127212)","(3.0, 2.4268554621594616)","(nan, -34.150554593351565)","(7.0, 7.336587176910389)","(3.0, 3.048345542170628)"
9,"(4.0, 4.166602142659847)","(nan, -0.14548044907313837)","(5.0, 4.993109604031358)","(3.0, 2.8690220220589433)","(3.0, 2.999173225008306)"


In [12]:
comparison_data

Unnamed: 0,The Call of Cthulhu,Frankenstein,Dracula,Neuromancer,Space Odyssey
0,"(8.0, 7.937461665902083)","(2.0, 2.230408015977145)","(nan, 21.305346004349676)","(5.0, 4.854874025125911)","(4.0, 3.979170367041119)"
1,"(3.0, 2.9031634208352513)","(2.0, 2.3626270492362296)","(nan, -32.43760797595069)","(7.0, 6.770739934219383)","(7.0, 6.967427917574499)"
2,"(9.0, 8.936872043625552)","(nan, 4.62947920461969)","(7.0, 7.007523883949421)","(8.0, 8.05253363576117)","(5.0, 4.99980228710435)"
3,"(nan, 8.911220840185965)","(nan, 4.961328156890585)","(7.0, 7.000029181451457)","(8.0, 7.999282811614534)","(9.0, 8.999434839211375)"
4,"(nan, 4.910293189089428)","(1.0, 0.6839165979097902)","(8.0, 8.006541889287751)","(3.0, 3.2661322265232484)","(7.0, 7.027800003692679)"
5,"(2.0, 2.004927191735232)","(3.0, 2.9947382798376596)","(5.0, 4.999660795436882)","(nan, 0.013057630909453444)","(nan, 62.47900275294873)"
6,"(4.0, 4.445074053328606)","(2.0, 0.38687141440726003)","(nan, 6.265689044341696)","(2.0, 2.9806443877675424)","(7.0, 7.1410499097625175)"
7,"(7.0, 6.441100212372691)","(1.0, 2.9222877609108746)","(2.0, 2.063901357001854)","(7.0, 5.869238391549164)","(9.0, 8.835279563444987)"
8,"(3.0, 3.160004846127212)","(3.0, 2.4268554621594616)","(nan, -34.150554593351565)","(7.0, 7.336587176910389)","(3.0, 3.048345542170628)"
9,"(4.0, 4.166602142659847)","(nan, -0.14548044907313837)","(5.0, 4.993109604031358)","(3.0, 2.8690220220589433)","(3.0, 2.999173225008306)"


In [13]:
d = comparison_data.to_latex()
text_file = open("comparison.txt", "w")
text_file.write(d)
text_file.close()