In [1]:
import pandas as pd
import numpy as np
import math
import pickle
import random
import time

In [3]:
# 写数据
def writeFile(data, path):
    with open(path, 'wb') as f:
        pickle.dump(data, f)

# 读数据
def readFile(data, path):
    with open(path, 'rb') as f:
        return pickle.load(data, f)

In [11]:
# 获取用户的电影的两个字典
def getDict(train_data):
    user2mv = {}
    mv2user = {}
    for index, item in train_data.iterrows():
        uid, mid = 'u'+str(item['UserID']),'m'+str(item['MovieID'])
        user2mv.setdefault(uid,[]).append(mid)
        mv2user.setdefault(mid,[]).append(uid)
    return user2mv, mv2user

In [14]:
def personalRank(train_data, alpha):
    user2mv, mv2user = getDict(train_data)
    users = list(user2mv.keys())
    movies = list(mv2user.keys())
    unum, mnum = len(users), len(movies)
    M = pd.DataFrame(data=np.zeros([unum+mnum, unum+mnum]), index =users+movies, columns=users+movies)
    for u, mlist in user2mv.items():
        out = len(mlist)
        for m in mlist:
            M.loc[u][m] = 1/out
    
    for m, ulist in mv2user.items():
        out = len(ulist)
        for u in ulist:
            M.loc[m][u] = 1/out
    PR = np.eye(unum+mnum)-alpha*np.array(M).T
    PR = np.linalg.inv(PR)*(1-alpha)
    PR = pd.DataFrame(data=PR, index=users+movies, columns=users+movies)
    PR.drop(columns=movies,inplace=True)
    PR.drop(columns=users,inplace=True)
    
    ulist, mlist = [], []
    for u in users:
        ulist.append(u[1:])
    for m in movies:
        mlist.append(m[1:])
    PR.index = mlist
    PR.columns = ulist
    return PR#[movieXuser]

In [12]:
if __name__ == "__main__":
    moviesPath = './ml-1m/movies.dat'
    ratingsPath = './ml-1m/ratings.dat'
    usersPath = './ml-1m/users.dat'
    
    ratingsDF = pd.read_csv(ratingsPath, index_col=None, sep='::', header=None,names=['UserID', 'MovieID', 'rating', 'timestamp'])
#     X=ratingsDF['user_id'][:1000]
#     Y=ratingsDF['movie_id'][:1000]
    user2mv, mv2user = getDict(ratingsDF)

  


In [13]:
user2mv

{'u1': ['m1193',
  'm661',
  'm914',
  'm3408',
  'm2355',
  'm1197',
  'm1287',
  'm2804',
  'm594',
  'm919',
  'm595',
  'm938',
  'm2398',
  'm2918',
  'm1035',
  'm2791',
  'm2687',
  'm2018',
  'm3105',
  'm2797',
  'm2321',
  'm720',
  'm1270',
  'm527',
  'm2340',
  'm48',
  'm1097',
  'm1721',
  'm1545',
  'm745',
  'm2294',
  'm3186',
  'm1566',
  'm588',
  'm1907',
  'm783',
  'm1836',
  'm1022',
  'm2762',
  'm150',
  'm1',
  'm1961',
  'm1962',
  'm2692',
  'm260',
  'm1028',
  'm1029',
  'm1207',
  'm2028',
  'm531',
  'm3114',
  'm608',
  'm1246'],
 'u2': ['m1357',
  'm3068',
  'm1537',
  'm647',
  'm2194',
  'm648',
  'm2268',
  'm2628',
  'm1103',
  'm2916',
  'm3468',
  'm1210',
  'm1792',
  'm1687',
  'm1213',
  'm3578',
  'm2881',
  'm3030',
  'm1217',
  'm3105',
  'm434',
  'm2126',
  'm3107',
  'm3108',
  'm3035',
  'm1253',
  'm1610',
  'm292',
  'm2236',
  'm3071',
  'm902',
  'm368',
  'm1259',
  'm3147',
  'm1544',
  'm1293',
  'm1188',
  'm3255',
  'm3256',
 