# 对于电源的推荐系统

## 首先实现ALS算法，这里使用之前使用梯度下降来实现的

[原始代码](../4BasicKnowledgePoints/6GradientDescentMethod.ipynb)

In [None]:
import numpy as np

K=2
max_iter = 5000 #迭代次数多意味着步长比较小。
alpha = 0.0002
lamda = 0.004

def grad(R, K=2, max_iter= 5000, alpha=0.001, lamda= 0.002, cost_threshold = 0.0001):
    m = len(R)
    n = len(R[0])
    
    P = np.random.rand(m, K)
    Q = np.random.rand(K, n)
    
    for step in range(max_iter):
        # 对所有的用户u和物品i做遍历。对对应的Pu和Qi向量进行梯度下降。
        for u in range(m):
            for i in range(n):
                # 对于每一个大于0的评分，求出评分误差。
                if R[u][i] > 0:
                    eui = np.dot(P[u, :],Q[:, i]) - R[u,i]
                    
                    # 带入梯度下降的公式，按照梯度下降算法更新当前的Pu和Qi。也就是按照K个隐藏维度来更新。
                    for k in range(K):
                        # 注意这里和公式不同的地方在于求和公式。由于求和是对i在求和，而本计算是包含在
                        # for i in range(n):当中的，就相对于每个步骤都减去了一个对于i的元素，所以不
                        # 用再求和了。
                        P[u][k] = P[u][k] - alpha * (2 * eui * Q[k][i] - 2 * lamda * P[u][k])
                        # 同样的
                        Q[k][i] = Q[k][i] - alpha * (2 * eui * P[u][k] - 2 * lamda * Q[k][i])
                
        # u和i遍历完成。所有特征向量都更新完成。可以计算预测评分矩阵。
        # predictR = np.dot(P, Q)
        # 计算当前的损失函数。
        cost = 0
        
        for u in range(m):
            for i in range(n):
                # 在评分矩阵R中为0的不计算损失函数，原因依然是为0的评分可能是用户没有评分。
                if R[u][i] > 0:
                    cost += (np.dot(P[u, :],Q[:, i]) - R[u,i]) ** 2
                    for k in range(K):
                        cost += lamda * (P[u][k] ** 2 + Q[k][i] ** 2)
        # 当损失函数小于某一个特定阈值时退出。
        if cost < cost_threshold:
            break
    return P, Q, cost

In [13]:
import pandas as pd
movies_columns = ['mid', 'title', 'descri', 'duration', 'issueTime', 'shootTime', 'language', 'category', 'actors', 'director']
movies = pd.read_csv("../../data/MovieRecommendationSystem/movies.csv", sep='^', header=0, names=movies_columns)
ratings = pd.read_csv("../../data/MovieRecommendationSystem/ratings.csv", header=0, names=['uid', 'mid', 'rating', 'timestamp'])
tags = pd.read_csv("../../data/MovieRecommendationSystem/tags.csv", header=0, names=['uid', 'mid', 'tag', 'timestamp'])

In [14]:
movies.describe()

Unnamed: 0,id,title,duration,populateTime,productTime,language,category,performers,director
count,2790,2790.0,2790,2790.0,2790,2790,2790,2790.0,2790
unique,2790,2732.0,170,675.0,97,294,484,2769.0,1714
top,Jumanji (1995),,100 minutes,,1998,English,Drama,,Alfred Hitchcock
freq,1,57.0,92,502.0,232,1857,365,17.0,29


In [15]:
movies.head()

Unnamed: 0,id,title,duration,populateTime,productTime,language,category,performers,director
2,Jumanji (1995),,104 minutes,"April 30, 1997",1995,English|Français,Adventure|Children|Fantasy,Robin Williams|Jonathan Hyde|Kirsten Dunst|Bra...,Joe Johnston
3,Grumpier Old Men (1995),,101 minutes,"September 5, 2000",1995,English,Comedy|Romance,Walter Matthau|Jack Lemmon|Ann-Margret|Sophia ...,Howard Deutch
10,GoldenEye (1995),,130 minutes,"October 22, 2002",1995,English|Pусский|Español,Action|Adventure|Thriller,Pierce Brosnan|Sean Bean|Izabella Scorupco|Fam...,Martin Campbell
11,"American President, The (1995)",,106 minutes,"August 31, 1999",1995,English,Comedy|Drama|Romance,Michael Douglas|Annette Bening|Michael J. Fox|...,Rob Reiner
12,Dracula: Dead and Loving It (1995),,88 minutes,"June 29, 2004",1995,English|Deutsch,Comedy|Horror,Leslie Nielsen|Mel Brooks|Amy Yasbeck|Peter Ma...,Mel Brooks
