Importing Libraries

In [1]:
import pandas as pd
import os
from math import pow, sqrt
from copy import deepcopy
import content_based2 

{'Adventure': [('5007', 5.0), ('5357', 5.0), ('5720', 5.0), ('6902', 5.0), ('27800', 5.0), ('31367', 5.0), ('49817', 5.0), ('86864', 5.0), ('102666', 5.0), ('139385', 5.0), ('139620', 5.0), ('1306', 4.667), ('108979', 4.583), ('3612', 4.5), ('6311', 4.5), ('6536', 4.5), ('6630', 4.5), ('6721', 4.5), ('8580', 4.5), ('26012', 4.5), ('27155', 4.5), ('31934', 4.5), ('33138', 4.5), ('51698', 4.5), ('55671', 4.5), ('62970', 4.5), ('66785', 4.5), ('69803', 4.5), ('103210', 4.5), ('110216', 4.5), ('114552', 4.5), ('114678', 4.5), ('136598', 4.5), ('7099', 4.477), ('2905', 4.429), ('68659', 4.4), ('3000', 4.385), ('577', 4.333), ('5974', 4.333), ('26649', 4.333), ('91485', 4.333), ('1136', 4.302), ('7215', 4.3), ('1948', 4.286), ('908', 4.274), ('1254', 4.25), ('3629', 4.25), ('6584', 4.25), ('7720', 4.25), ('8253', 4.25), ('8684', 4.25), ('71129', 4.25), ('72982', 4.25), ('5618', 4.236), ('29', 4.233), ('1196', 4.228), ('1216', 4.227), ('91542', 4.225), ('2019', 4.218), ('65261', 4.214), ('119

Reading dataset

In [2]:
ratings = pd.read_csv('./ratings.csv')


Data preprocessing

In [3]:
#Reading ratings.csv obtains the data dictionary in the format {userid:{movieid:rating, movieid:rating}}
file = open('./ratings.csv','r',encoding='utf-8')

data = {}
linenum=0
for line in file.readlines():

    if linenum==0:
        linenum+=1
        continue
    line = line.strip().split(',')

    if not line[0] in data.keys():
        data[line[0]] = {line[1]:float(line[2])}

    else:
        data[line[0]][line[1]] = float(line[2])

data_dict=data
#print(data_dict)

Calculate the cos similarity between two users

In [4]:
def calculate_cos(user1,user2,data):
    user1_data = data[user1]
    user2_data = data[user2]
    # print("user1_data: ",user1_data)
    # print("user2_data: ",user2_data)
    Molecular=0
    for key in user1_data.keys():
        if key in user2_data.keys():
            Molecular += float(user1_data[key])*float(user2_data[key])
    rxdistance=0
    for key in user1_data.keys():
        rxdistance += user1_data[key]*user1_data[key]
    rxdistance=sqrt(rxdistance)
    rydistance=0
    for key in user2_data.keys():
        rydistance += user2_data[key] * user2_data[key]
    rydistance = sqrt(rydistance)

    cos=Molecular/(rxdistance*rydistance)
    return cos

Find the user most similar to this user

In [5]:
def most_similar(userID,data):
    res = []
    for userid in data.keys():
        if not userid == userID:
            sim = calculate_cos(userID,userid,data)
            res.append((userid, sim))
    res.sort(key=lambda val: val[1], reverse=True)

    return res[0]

Obtain the most similar user list for each user

In [6]:
def get_similar_list(data):
    record={}
    for key in data.keys():
        res=most_similar(key,data)
        record[key]=res
    return record

In [7]:
similar_list=get_similar_list(data_dict)
print(similar_list)

{'1': ('348', 0.4138376737150969), '2': ('96', 0.7558831522705317), '3': ('245', 0.6233473516916178), '4': ('322', 0.3267436954982175), '5': ('38', 0.19311600805318488), '6': ('195', 0.2941614460167349), '7': ('403', 0.43228738341040535), '8': ('96', 0.556708252221131), '9': ('473', 0.6041792188185172), '10': ('186', 0.12743299694043467), '11': ('453', 0.4455892796103885), '12': ('464', 0.7535433834675822), '13': ('270', 0.16185424283446712), '14': ('151', 0.5815750615279165), '15': ('396', 0.22740114939757783), '16': ('148', 0.5868208582958798), '17': ('1', 0.3066393094877372), '18': ('528', 0.3291615066385674), '19': ('401', 0.26610830672909985), '20': ('299', 0.20283149388320051), '21': ('114', 0.3461787172539232), '22': ('228', 0.29884141128999553), '23': ('405', 0.27317643447449746), '24': ('615', 0.46437805909650953), '25': ('420', 0.344769110389678), '26': ('184', 0.3248145294331461), '27': ('369', 0.26990866175372547), '28': ('105', 0.49005513813349966), '29': ('545', 0.5348117

Get high score movies from users who are most similar to users and recommend them to users

In [8]:
def recommend(user):
    recomm = []
    most_sim=similar_list[user][0]
    sim=similar_list[user][1]
    
    # When the highest similarity is less than 0.2, it is considered that there is no user similar to the user, return -1
    if sim< 0.2:
        return -1
  
    # When similar users have not seen a movie that is different from this user and has a score of more than 4 points, return - 1
    items = data_dict[most_sim]   
    for item in items.keys():
        if item not in data_dict[user].keys() and items[item]>4.0:
            recomm.append((item, items[item]))

    if(len(recomm)==0):
        return -1
    
    recomm.sort(key=lambda val: val[1], reverse=True)

    # When the number of recommended films is less than 3, it is recommended directly; when it is more than 3, only the first 3 films are recommended
    if(len(recomm)<3):
        return {user:recomm}
    else:
        return {user:recomm[:3]}

In [9]:
recommend('150')

{'150': [('110', 5.0), ('293', 5.0), ('364', 5.0)]}

Collaborative + Content-based

In [10]:
def run():   
    results={}
    for key in similar_list.keys():
        # print(similar_list[key][1])
        result=recommend(key)
        # CF algorithm is used when the result is not equal to - 1
        if result!=-1:
            results.update(result)
        # CB algorithm is used when result is equal to - 1
        else:
            result=content_based2.recommend(key)
            results.update(result)

    return results

Return Results {'UserID':[('MovieID', rating)]}

In [11]:
run()


{'1': [('4226', 5.0), ('34405', 5.0), ('55247', 5.0)],
 '2': [('1414', 5.0)],
 '3': [('62', 5.0), ('122', 5.0), ('318', 5.0)],
 '4': [('246', 5.0), ('541', 5.0), ('915', 5.0)],
 '5': [('567', 5.0), ('583', 5.0), ('4454', 5.0)],
 '6': [('296', 5.0), ('318', 5.0), ('329', 5.0)],
 '7': [('1653', 5.0), ('4226', 5.0), ('61240', 5.0)],
 '8': [('124', 5.0), ('418', 5.0), ('567', 5.0)],
 '9': [('661', 5.0)],
 '10': [('567', 5.0), ('583', 5.0), ('124', 5.0)],
 '11': [('32', 5.0), ('288', 5.0), ('457', 5.0)],
 '12': [('567', 5.0), ('583', 5.0), ('1757', 5.0)],
 '13': [('124', 5.0), ('418', 5.0), ('226', 5.0)],
 '14': [('260', 5.0), ('802', 5.0), ('805', 5.0)],
 '15': [('608', 5.0), ('858', 5.0), ('912', 5.0)],
 '16': [('34', 5.0), ('110', 5.0), ('318', 5.0)],
 '17': [('593', 5.0), ('912', 5.0), ('1213', 5.0)],
 '18': [('593', 5.0), ('1127', 5.0), ('1197', 5.0)],
 '19': [('47', 5.0), ('356', 5.0), ('2329', 5.0)],
 '20': [('1', 5.0), ('10', 5.0), ('47', 5.0)],
 '21': [('318', 5.0), ('356', 5.0), (