## Description:
这个Jupyter用Pytorch实现MLP模型， 完成该模型的预训练过程。

## 导入包

In [1]:
import datetime
import numpy as np
import pandas as pd
from collections import Counter
import heapq

import torch
from torch.utils.data import DataLoader, Dataset, TensorDataset

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torchkeras import summary, Model

import warnings
warnings.filterwarnings('ignore')

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [2]:
# 一些超参数设置
topK = 10
num_factors = 8
num_negatives = 4
batch_size = 64
lr = 0.001

## 导入数据

In [3]:
# 数据在processed Data里面
train = np.load('ProcessedData/train.npy', allow_pickle=True).tolist()
testRatings = np.load('ProcessedData/testRatings.npy').tolist()
testNegatives = np.load('ProcessedData/testNegatives.npy').tolist()

In [4]:
num_users, num_items = train.shape

In [5]:
# 制作数据   用户打过分的为正样本， 用户没打分的为负样本， 负样本这里采用的采样的方式
def get_train_instances(train, num_negatives):
    user_input, item_input, labels = [], [], []
    num_items = train.shape[1]
    for (u, i) in train.keys():  # train.keys()是打分的用户和商品       
        # positive instance
        user_input.append(u)
        item_input.append(i)
        labels.append(1)
        
        # negative instance
        for t in range(num_negatives):
            j = np.random.randint(num_items)
            while (u, j) in train:
                j = np.random.randint(num_items)
            #print(u, j)
            user_input.append(u)
            item_input.append(j)
            labels.append(0)
    return user_input, item_input, labels

In [6]:
user_input, item_input, labels = get_train_instances(train, num_negatives)

In [7]:
train_x = np.vstack([user_input, item_input]).T
labels = np.array(labels)

In [8]:
# 构建成Dataset和DataLoader
train_dataset = TensorDataset(torch.tensor(train_x), torch.tensor(labels).float())
dl_train = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [10]:
# 测试一下
for (x, y) in iter(dl_train):
    print(x, y)
    break

tensor([[4097, 2904],
        [3362,  231],
        [ 972, 2974],
        [5629,  918],
        [4984, 2451],
        [2245,   68],
        [1998, 1158],
        [ 523, 1822],
        [2152,  608],
        [4288, 2336],
        [ 968, 1361],
        [ 172,  450],
        [ 330,  260],
        [1497, 1979],
        [1898, 1196],
        [5112, 1572],
        [4591, 1127],
        [ 942,  322],
        [ 654,  739],
        [1674, 2969],
        [3575, 3674],
        [1657, 1120],
        [3128, 1763],
        [3474, 2556],
        [4541,  892],
        [1169, 1202],
        [2327,  759],
        [3791, 3007],
        [1403,  247],
        [5762, 2460],
        [ 240, 2783],
        [ 147, 1729],
        [4727,  999],
        [5888, 1528],
        [4573,  289],
        [4542, 3240],
        [1193,  889],
        [ 570,  529],
        [5781,  273],
        [ 509, 2861],
        [3194, 2440],
        [5557, 1594],
        [5025,  390],
        [3532,  243],
        [ 527, 3540],
        [ 

## MLP模型
这里建立mlp模型， 这个模型的输入就是用户和物品的ID， 然后通过Embedding层得到它的向量， 然后两个向量合并经过多个全连接层得到最后的输出.<br>

![](img/2.png)

In [17]:
class MLP(nn.Module):
    
    def __init__(self, num_users, num_items, layers=[20, 64, 32, 16], regs=[0, 0]):
        super(MLP, self).__init__()
        self.MF_Embedding_User = nn.Embedding(num_embeddings=num_users, embedding_dim=layers[0]//2)
        self.MF_Embedding_Item = nn.Embedding(num_embeddings=num_items, embedding_dim=layers[0]//2)
        
        # 全连接网络
        self.dnn_network = nn.ModuleList([nn.Linear(layer[0], layer[1]) for layer in list(zip(layers[:-1], layers[1:]))])
        self.linear = nn.Linear(layers[-1], 1)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, inputs):
        # 这个inputs是一个批次的数据， 所以后面的操作切记写成inputs[0], [1]这种， 这是针对某个样本了， 我们都是对列进行的操作
        # 先把输入转成long类型
        inputs = inputs.long()
        
        # MF的前向传播  用户和物品的embedding
        MF_Embedding_User = self.MF_Embedding_User(inputs[:, 0])  # 这里踩了个坑， 千万不要写成[0]， 我们这里是第一列
        MF_Embedding_Item = self.MF_Embedding_Item(inputs[:, 1])
        
        
        # 两个隐向量堆叠起来
        x = torch.cat([MF_Embedding_User, MF_Embedding_Item], dim=-1)
        
        # l全连接网络
        for linear in self.dnn_network:
            x = linear(x)
            x = F.relu(x)
        
        x = self.linear(x)
        output = self.sigmoid(x)
        
        return output

In [19]:
# 看一下这个网络
model = MLP(1, 1, [20, 64, 32, 16, 8])    # 全连接网络可以随意扩展
summary(model, input_shape=(2,))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
         Embedding-1                   [-1, 10]              10
         Embedding-2                   [-1, 10]              10
            Linear-3                   [-1, 64]           1,344
            Linear-4                   [-1, 32]           2,080
            Linear-5                   [-1, 16]             528
            Linear-6                    [-1, 8]             136
            Linear-7                    [-1, 1]               9
           Sigmoid-8                    [-1, 1]               0
Total params: 4,117
Trainable params: 4,117
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.000008
Forward/backward pass size (MB): 0.001083
Params size (MB): 0.015705
Estimated Total Size (MB): 0.016796
----------------------------------------------------------------


## 建立模型 

In [20]:
## 设置
layers = [num_factors*2, 64, 32, 16, 8]
model = MLP(num_users, num_items, layers)
model.to(device)

MLP(
  (MF_Embedding_User): Embedding(6040, 8)
  (MF_Embedding_Item): Embedding(3706, 8)
  (dnn_network): ModuleList(
    (0): Linear(in_features=16, out_features=64, bias=True)
    (1): Linear(in_features=64, out_features=32, bias=True)
    (2): Linear(in_features=32, out_features=16, bias=True)
    (3): Linear(in_features=16, out_features=8, bias=True)
  )
  (linear): Linear(in_features=8, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)

In [21]:
# 简单测试一下模型
for (x, y) in iter(dl_train):
    x = x.cuda()
    print(model(x))
    break

tensor([[0.4850],
        [0.4859],
        [0.4838],
        [0.4842],
        [0.4864],
        [0.4841],
        [0.4856],
        [0.4870],
        [0.4843],
        [0.4842],
        [0.4875],
        [0.4821],
        [0.4928],
        [0.4852],
        [0.4862],
        [0.4863],
        [0.4861],
        [0.4869],
        [0.4852],
        [0.4857],
        [0.4834],
        [0.4793],
        [0.4837],
        [0.4877],
        [0.4834],
        [0.4833],
        [0.4860],
        [0.4835],
        [0.4826],
        [0.4815],
        [0.4893],
        [0.4864],
        [0.4847],
        [0.4824],
        [0.4826],
        [0.4843],
        [0.4802],
        [0.4803],
        [0.4897],
        [0.4846],
        [0.4851],
        [0.4822],
        [0.4858],
        [0.4821],
        [0.4895],
        [0.4833],
        [0.4818],
        [0.4812],
        [0.4881],
        [0.4884],
        [0.4793],
        [0.4847],
        [0.4868],
        [0.4864],
        [0.4899],
        [0

## 模型的训练与评估

### 模型评估函数

In [22]:
# Global variables that are shared across processes
_model = None
_testRatings = None
_testNegatives = None
_K = None

# HitRation
def getHitRatio(ranklist, gtItem):
    for item in ranklist:
        if item == gtItem:
            return 1
    return 0

# NDCG
def getNDCG(ranklist, gtItem):
    for i in range(len(ranklist)):
        item = ranklist[i]
        if item == gtItem:
            return np.log(2) / np.log(i+2)
    return 0

def eval_one_rating(idx):   # 一次评分预测
    rating = _testRatings[idx]
    items = _testNegatives[idx]
    u = rating[0]
    gtItem = rating[1]
    items.append(gtItem)
    
    # Get prediction scores
    map_item_score = {}
    users = np.full(len(items), u, dtype='int32')
    
    test_data = torch.tensor(np.vstack([users, np.array(items)]).T).to(device)
    predictions = _model(test_data)
    for i in range(len(items)):
        item = items[i]
        map_item_score[item] = predictions[i].data.cpu().numpy()[0]
    items.pop()
    
    # Evaluate top rank list
    ranklist = heapq.nlargest(_K, map_item_score, key=lambda k: map_item_score[k])  # heapq是堆排序算法， 取前K个
    hr = getHitRatio(ranklist, gtItem)
    ndcg = getNDCG(ranklist, gtItem)
    return hr, ndcg

def evaluate_model(model, testRatings, testNegatives, K):
    """
    Evaluate the performance (Hit_Ratio, NDCG) of top-K recommendation
    Return: score of each test rating.
    """
    global _model
    global _testRatings
    global _testNegatives
    global _K
    
    _model = model
    _testNegatives = testNegatives
    _testRatings = testRatings
    _K = K
    
    hits, ndcgs = [], []
    for idx in range(len(_testRatings)):
        (hr, ndcg) = eval_one_rating(idx)
        hits.append(hr)
        ndcgs.append(ndcg)
    return hits, ndcgs   

### 模型的训练

In [23]:
# 训练参数设置
loss_func = nn.BCELoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=lr)

In [24]:
# 计算出初始的评估
(hits, ndcgs) = evaluate_model(model, testRatings, testNegatives, topK)

In [25]:
hr, ndcg = np.array(hits).mean(), np.array(ndcgs).mean()
print('Init: HR=%.4f, NDCG=%.4f' %(hr, ndcg))

Init: HR=0.0959, NDCG=0.0443


In [26]:
# 模型训练 
best_hr, best_ndcg, best_iter = hr, ndcg, -1

epochs = 20
log_step_freq = 10000

for epoch in range(epochs):
    
    # 训练阶段
    model.train()
    loss_sum = 0.0
    for step, (features, labels) in enumerate(dl_train, 1):
        
        features, labels = features.cuda(), labels.cuda()
        # 梯度清零
        optimizer.zero_grad()
        
        # 正向传播
        predictions = model(features)
        loss = loss_func(predictions, labels)
        
        # 反向传播求梯度
        loss.backward()
        optimizer.step()
        
        # 打印batch级别日志
        loss_sum += loss.item()
        if step % log_step_freq == 0:
            print(("[step = %d] loss: %.3f") % (step, loss_sum/step))
    
    # 验证阶段
    model.eval()
    (hits, ndcgs) = evaluate_model(model, testRatings, testNegatives, topK)
    hr, ndcg = np.array(hits).mean(), np.array(ndcgs).mean()
    if hr > best_hr:
        best_hr, best_ndcg, best_iter = hr, ndcg, epoch
        torch.save(model.state_dict(), 'Pre_train/m1-1m_MLP.pkl')  
        
    info = (epoch, loss_sum/step, hr, ndcg)
    print(("\nEPOCH = %d, loss = %.3f, hr = %.3f, ndcg = %.3f") %info)
print('Finished Training...') 

[step = 10000] loss: 0.416
[step = 20000] loss: 0.391
[step = 30000] loss: 0.382
[step = 40000] loss: 0.376
[step = 50000] loss: 0.373
[step = 60000] loss: 0.370
[step = 70000] loss: 0.369

EPOCH = 0, loss = 0.368, hr = 0.447, ndcg = 0.248
[step = 10000] loss: 0.355
[step = 20000] loss: 0.354
[step = 30000] loss: 0.355
[step = 40000] loss: 0.355
[step = 50000] loss: 0.355
[step = 60000] loss: 0.354
[step = 70000] loss: 0.354

EPOCH = 1, loss = 0.354, hr = 0.452, ndcg = 0.251
[step = 10000] loss: 0.352
[step = 20000] loss: 0.351
[step = 30000] loss: 0.351
[step = 40000] loss: 0.351
[step = 50000] loss: 0.351
[step = 60000] loss: 0.350
[step = 70000] loss: 0.350

EPOCH = 2, loss = 0.350, hr = 0.465, ndcg = 0.258
[step = 10000] loss: 0.342
[step = 20000] loss: 0.341
[step = 30000] loss: 0.341
[step = 40000] loss: 0.340
[step = 50000] loss: 0.339
[step = 60000] loss: 0.338
[step = 70000] loss: 0.337

EPOCH = 3, loss = 0.337, hr = 0.479, ndcg = 0.268
[step = 10000] loss: 0.329
[step = 20000