In [1]:
from numpy import corrcoef, mat, shape, nonzero, logical_and
import numpy.linalg as la

In [2]:
def loadExtData():
    return [[4,4,0,2,2],
            [4,0,0,3,3],
            [4,0,0,1,1],
            [1,1,1,2,0],
            [2,2,2,0,0],
            [1,1,1,0,0],
            [5,5,5,0,0]]

In [3]:
def cos_sim(inA,inB):
    tmp = float(inA.T * inB)
    dis = 0.5+0.5*tmp/(la.norm(inA)*la.norm(inB))
    return dis

In [47]:
def standEst(dataMat, user, simMean, item):
    """
    dataMat: rating matrix,
    user: row index,
    simMean: sim function,
    item: col index
    """
    
    # 如果该用户对item有评分，直接返回
    if(dataMat[user,item]!=0):
        return dataMat[user,item]
    # n是dataMat的列，即item
    n = shape(dataMat)[1]
    # 权制加和
    simTotal = 0.0
    # 加权和
    ratSimTotal = 0.0
    for i in range(n):
        userRating = dataMat[user,i]
        # 如果打分为0则跳过，这里是筛选该用户已打分item
        if(userRating==0 or i==item):
            continue
        # 这里获取已打分item和未打分item的非0部分向量，用于后续计算相似度
        overLap = nonzero(logical_and(dataMat[:,i].A>0, dataMat[:,item].A>0))[0]
        if(len(overLap) == 0):
            similarity = 0
        else:
            # 计算相似度
            similarity = simMean(dataMat[overLap, i], dataMat[overLap, item])
        print('processing {} col, rating {}, similarity {}'.format(i, userRating,similarity))
        # 相似度加和
        simTotal += similarity
        # 用已打分item和未打分item的相似度作为权重，对已打分item的分数进行加权
        ratSimTotal += similarity * userRating
    if(simTotal == 0):
        return 0
    else: 
        return ratSimTotal / simTotal # return user's score for item, 加权平均

In [48]:
def recommend(dataMat, user, N=3, simMean=cos_sim, estTestMean=standEst):
    # 获取user用户未打分item
    unRatedItem = nonzero(dataMat[user,:].A == 0)[1]
    if(len(unRatedItem) == 0):
        print('There is nothing to recommend')
    else:
        print('not rated item col index:',unRatedItem)
    retScores = []
    for item in unRatedItem:
        print('processing col ', item)
        itemScore = estTestMean(dataMat, user, simMean, item)
        retScores.append((item,itemScore))
        print('col: {}, score: {}'.format(item, itemScore))
    return sorted(retScores, key=lambda j:j[1], reverse=True)[:N]

In [49]:
myData = mat(loadExtData())
ans = recommend(myData, 2)
print(ans)

not rated item col index: [1 2]
processing col  1
processing 0 col, rating 4, similarity1.0
processing 3 col, rating 1, similarity0.928746462856272
processing 4 col, rating 1, similarity1.0
col: 1, score: 2.0243290220056256
processing col  2
processing 0 col, rating 4, similarity1.0
processing 3 col, rating 1, similarity1.0
processing 4 col, rating 1, similarity0
col: 2, score: 2.5
[(2, 2.5), (1, 2.0243290220056256)]
