In [10]:
import pandas as pd
import numpy as np
import math
from surprise import Reader, Dataset

In [11]:
file_path = 'ratings_small.csv'

reader = Reader(line_format='user item rating', sep=',', rating_scale = (1,5))
#ratings = pd.read_csv('ratings_small.csv')

ratings = pd.read_csv(file_path)
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205


In [12]:
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)
trainset = data.build_full_trainset()

In [13]:
print('Number of users: ', trainset.n_users, '\n')
print('Number of items: ', trainset.n_items, '\n')

Number of users:  671 

Number of items:  9066 



In [14]:
#find the unique id of items and users
trainset_iids = list(trainset.all_items())
iid_converter = lambda x: trainset.to_raw_iid(x)
trainset_raw_iids = np.array(list(map(iid_converter, trainset_iids)))

trainset_uids = list(trainset.all_users())
uid_converter = lambda x: trainset.to_raw_uid(x)
trainset_raw_uids = np.array(list(map(uid_converter, trainset_uids)))

In [15]:
#build the transformation from raw iid to index
def rawiid2index(rawiid):
    return int(np.argwhere(trainset_raw_iids==rawiid))

def index2rawiid(index):
    return trainset_raw_iids[index]

#build the transformation from raw uid to index
def rawuid2index(rawuid):
    return int(np.argwhere(trainset_raw_uids==rawuid))

def index2rawuid(index):
    return trainset_raw_uids[index]

In [8]:
#read the table filled with method
method='svd'
table_path='filled_useritem_table_'+method+'.npy'

try:
    uitable=np.load(table_path)
    print('succesfully loaded',table_path)
    print('shape:',uitable.shape,' user number:', uitable.shape[0],' item numebr:', uitable.shape[1])
except:
    print('table not found')

succesfully loaded filled_useritem_table_svd.npy
shape: (671, 9066)  user number: 671  item numebr: 9066


In [28]:
def sortrating(user_ratings):
    sorted_rating_index=np.argsort(user_ratings)
    return sorted_rating_index


In [85]:
def get_top_n_iid(uid, n=10):
    #find the user column
    user_ratings=uitable[rawuid2index(uid)]

    # Then sort the predictions for each user and retrieve the k highest ones.
    sorted_index=sortrating(user_ratings)
    result_index=[]
    result_iid=[]
    for i in range(0,n):
        index=sorted_index[-i-1]
        result_index.append(index)
        result_iid.append(index2rawiid(index))

    return result_iid

In [49]:
metadata=pd.read_csv('movies_metadata.csv')

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [58]:
metadata.head()

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0
1,False,,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,1995-12-22,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0
3,False,,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",...,1995-12-22,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",0,"[{'id': 35, 'name': 'Comedy'}]",,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,...,1995-02-10,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0


In [106]:
#metadata.iloc[0,8] = title
#metadata.iloc[mvid,5] =tmdbid
#metadata[metadata.original_title=='Jumanji'].index.tolist()=mvid

def mvid2title(mvid):
    return metadata.iloc[mvid,8]

    
def title2mvid(title):
    return int(metadata[metadata.original_title==title].index.tolist()[0])

In [90]:
def distance(a,b):
    return (a-b)**2

In [115]:
def find_mostsimilar_uid(user_input):

    #build user vector
    v_user=np.zeros(uitable.shape[1])
    for i in user_input:
        title=i[0]
        rating=i[1]
        mvid=title2mvid(title)
        index=rawiid2index(mvid)
        v_user[index]=rating
    #print(v_user[0:10])
    #calculate distance
    #between v_user and all the user vector in uitable
    min_distance=99999999
    most_similar_uid=0
    for i in range(0,uitable.shape[0]):
        temp_distance=0
        for j in range(0, uitable.shape[1]):
            if(v_user[j]!=0):
                temp_distance+=distance(v_user[j],uitable[i][j])
        if(temp_distance<min_distance):
            min_distance=temp_distance
            most_similar_uid=i+1
    return most_similar_uid, min_distance



In [110]:
def give_n_recommondation(user_input,n=10):

    #first find the most similar user in the dataset
    most_similar_uid,min_distance=find_mostsimilar_uid(user_input)

    #get recommondation list for that user
    iidlist=get_top_n_iid(most_similar_uid,n)

    #return names
    result=[]
    for i in range(0,n):
        result.append(mvid2title(iidlist[i]))
    return result

In [118]:
#demo

#build userinput
user_input=[[mvid2title(31),2.5], [mvid2title(1029), 3],[mvid2title(1061), 3]]

#call function
give_n_recommondation(user_input,10)

['Number Seventeen',
 'Army of Darkness',
 'The Governess',
 'Honey, I Shrunk the Kids',
 'Unforgiven',
 'Town Without Pity',
 'Bogus',
 'Pink Floyd: The Wall',
 '獨臂拳王大破血滴子',
 'Patch Adams']