In [2]:
import numpy as np
def ecludSim(inA,inB):
    return 1.0/(1.0 + np.linalg.norm(inA - inB))

def pearsSim(inA,inB):
    if len(inA) < 3 : return 1.0
    return 0.5+0.5*np.corrcoef(inA, inB, rowvar = 0)[0][1]

def cosSim(inA,inB):
    num = float(inA.T*inB)
    denom = np.linalg.norm(inA)*np.linalg.norm(inB)
    return 0.5+0.5*(num/denom)

In [16]:
def standEst(dataMat, user, simMeas, item):
    n = np.shape(dataMat)[1]
    simTotal = 0.0; ratSimTotal = 0.0
    for j in range(n):
        userRating = dataMat[user,j]   
        if userRating == 0: continue
        overLap = np.nonzero(np.logical_and(dataMat[:,item].A>0, \  # item列和j列都非零的行坐标
                                      dataMat[:,j].A>0))[0]
        print("overLap:",overLap) 
        if len(overLap) == 0: similarity = 0
        else: 
            similarity = simMeas(dataMat[overLap,item], \
                                   dataMat[overLap,j])
            
        print('the %d and %d similarity is: %f' % (item, j, similarity))
        simTotal += similarity
        ratSimTotal += similarity * userRating   # 第j个商品的与item商品的相似度*第j个商品的评分，累加
    if simTotal == 0: return 0
    else: return ratSimTotal/simTotal
    
print(np.array([[4,4,0,2,2],
        [4,0,0,3,3]])>0)
print(np.logical_and(np.array([True,False,True,True]),np.array([True,True,False,True])))
print(np.nonzero(np.array([ True,False,False,True])))
print(np.nonzero(np.array([[True,False,False,True]])))
print(np.nonzero(np.array([[True],[False],[False],[True]])))

[[ True  True False  True  True]
 [ True False False  True  True]]
[ True False False  True]
(array([0, 3], dtype=int64),)
(array([0, 0], dtype=int64), array([0, 3], dtype=int64))
(array([0, 3], dtype=int64), array([0, 0], dtype=int64))


In [17]:
def recommend(dataMat, user, N=3, simMeas=cosSim, estMethod=standEst):
    print(np.nonzero(dataMat[user,:].A==0))
    unratedItems =np.nonzero(dataMat[user,:].A==0)[1] # user行找到没有评分的商品(列)的index
    print("unratedItems:",unratedItems)
    if len(unratedItems) == 0: return 'you rated everything'
    itemScores = []
    for item in unratedItems:
        estimatedScore = estMethod(dataMat, user, simMeas, item)
        itemScores.append((item, estimatedScore))
    return sorted(itemScores, key=lambda jj: jj[1], reverse=True)[:N]

In [23]:
myMat =np.mat([[4,4,0,2,2],
        [4,0,0,3,3],
        [4,0,0,1,1],
        [1,1,1,2,0],
        [2,2,2,0,0],
        [1,1,1,0,0],
        [5,5,5,0,0]])
recommend(myMat,2)

(array([0, 0], dtype=int64), array([1, 2], dtype=int64))
unratedItems: [1 2]
overLap: [0 3 4 5 6]
the 1 and 0 similarity is: 1.000000
overLap: [0 3]
the 1 and 3 similarity is: 0.928746
overLap: [0]
the 1 and 4 similarity is: 1.000000
overLap: [3 4 5 6]
the 2 and 0 similarity is: 1.000000
overLap: [3]
the 2 and 3 similarity is: 1.000000
overLap: []
the 2 and 4 similarity is: 0.000000


[(2, 2.5), (1, 2.0243290220056256)]

In [32]:
def svdEst(dataMat, user, simMeas, item):
    n = np.shape(dataMat)[1]
    simTotal = 0.0; ratSimTotal = 0.0
    U,Sigma,VT = np.linalg.svd(dataMat)
    Sig4 = np.mat(np.eye(4)*Sigma[:4]) 
    xformedItems = dataMat.T * U[:,:4] * Sig4.I  # .I返回矩阵的逆矩阵,其中数学原理还是不太懂
    print("xformedItems:",xformedItems)  # 以下与standEst部分一致
    for j in range(n):
        userRating = dataMat[user,j]
        if userRating == 0 or j==item: continue
        similarity = simMeas(xformedItems[item,:].T,\
                             xformedItems[j,:].T)
        print('the %d and %d similarity is: %f' % (item, j, similarity))
        simTotal += similarity
        ratSimTotal += similarity * userRating
    if simTotal == 0: return 0
    else: return ratSimTotal/simTotal

In [33]:
recommend(myMat,2,estMethod=svdEst)

(array([0, 0], dtype=int64), array([1, 2], dtype=int64))
unratedItems: [1 2]
xformedItems: [[-0.72506083  0.27691869  0.41122905 -0.38106455]
 [-0.51452694 -0.39611316 -0.7522123  -0.1112701 ]
 [-0.35115882 -0.55606743  0.4531718   0.50325279]
 [-0.22150588  0.48633454 -0.21296214  0.7647803 ]
 [-0.192799    0.46976392 -0.11979054 -0.06524181]]
the 1 and 0 similarity is: 0.498142
the 1 and 3 similarity is: 0.498131
the 1 and 4 similarity is: 0.509974
xformedItems: [[-0.72506083  0.27691869  0.41122905 -0.38106455]
 [-0.51452694 -0.39611316 -0.7522123  -0.1112701 ]
 [-0.35115882 -0.55606743  0.4531718   0.50325279]
 [-0.22150588  0.48633454 -0.21296214  0.7647803 ]
 [-0.192799    0.46976392 -0.11979054 -0.06524181]]
the 2 and 0 similarity is: 0.552670
the 2 and 3 similarity is: 0.552976
the 2 and 4 similarity is: 0.217301


[(2, 2.2532707559777143), (1, 1.9921514636756925)]