## Description:
这个Jupyter用Pytorch实现NeuralMF模型，也是最终的模型， 是MLP和GMF模型的结合版本

## 导入包

In [1]:
import datetime
import numpy as np
import pandas as pd
from collections import Counter
import heapq

import torch
from torch.utils.data import DataLoader, Dataset, TensorDataset

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

#from torchkeras import summary, Model

import warnings
warnings.filterwarnings('ignore')

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [2]:
# 一些超参数设置
topK = 10
num_factors = 8
num_negatives = 4
batch_size = 64
lr = 0.001

## 导入数据

In [3]:
# 数据在processed Data里面
train = np.load('ProcessedData/train.npy', allow_pickle=True).tolist()
testRatings = np.load('ProcessedData/testRatings.npy').tolist()
testNegatives = np.load('ProcessedData/testNegatives.npy').tolist()

In [4]:
num_users, num_items = train.shape

In [5]:
# 制作数据   用户打过分的为正样本， 用户没打分的为负样本， 负样本这里采用的采样的方式
def get_train_instances(train, num_negatives):
    user_input, item_input, labels = [], [], []
    num_items = train.shape[1]
    for (u, i) in train.keys():  # train.keys()是打分的用户和商品       
        # positive instance
        user_input.append(u)
        item_input.append(i)
        labels.append(1)
        
        # negative instance
        for t in range(num_negatives):
            j = np.random.randint(num_items)
            while (u, j) in train:
                j = np.random.randint(num_items)
            #print(u, j)
            user_input.append(u)
            item_input.append(j)
            labels.append(0)
    return user_input, item_input, labels

In [6]:
user_input, item_input, labels = get_train_instances(train, num_negatives)

In [7]:
train_x = np.vstack([user_input, item_input]).T
labels = np.array(labels)

In [9]:
# 构建成Dataset和DataLoader
train_dataset = TensorDataset(torch.tensor(train_x), torch.tensor(labels).float())
dl_train = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [11]:
# 测试一下
for (x, y) in iter(dl_train):
    print(x, y)
    break

tensor([[2302, 1126],
        [3674, 1319],
        [1327, 1503],
        [4380,  645],
        [4222, 1845],
        [1692, 2762],
        [5353,  493],
        [2528, 2027],
        [3820, 1421],
        [1940, 3362],
        [5223, 3171],
        [2184, 3551],
        [3393, 1125],
        [3131, 2055],
        [4132, 2661],
        [4851, 1986],
        [1120, 1042],
        [5479,  330],
        [3767, 1155],
        [6023,  218],
        [3415,  349],
        [2994, 1069],
        [4778,  178],
        [2665, 3239],
        [2053,  660],
        [1180, 3045],
        [4660, 3168],
        [5629,  328],
        [ 966, 2675],
        [ 300, 1341],
        [2228, 3028],
        [4226, 1127],
        [ 373, 2602],
        [5685, 2586],
        [4417,  552],
        [5794, 2885],
        [ 763, 1347],
        [ 684, 2559],
        [1263, 2891],
        [2873, 3418],
        [1570,  316],
        [1638, 2306],
        [3761, 1739],
        [ 427, 1194],
        [1921, 2472],
        [2

## NeuralMF模型
这里建立NeuralMF模型， 这个模型是MLP和GMF的合并版本， 图片如下：

![](img/1.png)

In [10]:
class NeuralMF(nn.Module):
    
    def __init__(self, num_users, num_items, mf_dim, layers):
        super(NeuralMF, self).__init__()
        
        self.MF_Embedding_User = nn.Embedding(num_embeddings=num_users, embedding_dim=mf_dim)
        self.MF_Embedding_Item = nn.Embedding(num_embeddings=num_items, embedding_dim=mf_dim)
        
        self.MLP_Embedding_User = nn.Embedding(num_embeddings=num_users, embedding_dim=layers[0] // 2)
        self.MLP_Embedding_Item = nn.Embedding(num_embeddings=num_items, embedding_dim=layers[0] // 2)
        
        # 全连接网络
        self.dnn_network = nn.ModuleList([nn.Linear(layer[0], layer[1]) for layer in list(zip(layers[:-1], layers[1:]))])
        self.linear = nn.Linear(layers[-1], mf_dim)
        
        # 合并之后
        self.linear2 = nn.Linear(2*mf_dim, 1)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, inputs):
        # 这个inputs是一个批次的数据， 所以后面的操作切记写成inputs[0], [1]这种， 这是针对某个样本了， 我们都是对列进行的操作
        
        # 先把输入转成long类型
        inputs = inputs.long()
        
        # MF模型的计算 用户和物品的embedding
        MF_Embedding_User = self.MF_Embedding_User(inputs[:, 0])  # 这里踩了个坑， 千万不要写成[0]， 我们这里是第一列
        MF_Embedding_Item = self.MF_Embedding_Item(inputs[:, 1])
        # 两个向量点积过一个全连接
        mf_vec = torch.mul(MF_Embedding_User, MF_Embedding_Item)
        
        # MLP 模型的计算
        MLP_Embedding_User = self.MLP_Embedding_User(inputs[:, 0])  
        MLP_Embedding_Item = self.MLP_Embedding_Item(inputs[:, 1])
        # 两个隐向量堆叠起来
        x = torch.cat([MF_Embedding_User, MF_Embedding_Item], dim=-1)
        # l全连接网络
        for linear in self.dnn_network:
            x = linear(x)
            x = F.relu(x)
        mlp_vec = self.linear(x)
        
        # 合并两个
        vector = torch.cat([mf_vec, mlp_vec], dim=-1)
        
        # liner
        linear = self.linear2(vector)
        output = self.sigmoid(linear)
        
        return output

In [11]:
# 看一下这个网络
model = NeuralMF(1, 1, 10, [20, 64, 32, 16])
#summary(model, input_shape=(2,))

## 建立模型 

In [12]:
## 设置
layers = [num_factors*2, 64, 32, 16]
model = NeuralMF(num_users, num_items, num_factors, layers)
model.to(device)

NeuralMF(
  (MF_Embedding_User): Embedding(6040, 8)
  (MF_Embedding_Item): Embedding(3706, 8)
  (MLP_Embedding_User): Embedding(6040, 8)
  (MLP_Embedding_Item): Embedding(3706, 8)
  (dnn_network): ModuleList(
    (0): Linear(in_features=16, out_features=64, bias=True)
    (1): Linear(in_features=64, out_features=32, bias=True)
    (2): Linear(in_features=32, out_features=16, bias=True)
  )
  (linear): Linear(in_features=16, out_features=8, bias=True)
  (linear2): Linear(in_features=16, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)

In [13]:
children_dict = {name:module for name,module in model.named_children()}

In [14]:
dict(model.state_dict()).keys()

dict_keys(['MF_Embedding_User.weight', 'MF_Embedding_Item.weight', 'MLP_Embedding_User.weight', 'MLP_Embedding_Item.weight', 'dnn_network.0.weight', 'dnn_network.0.bias', 'dnn_network.1.weight', 'dnn_network.1.bias', 'dnn_network.2.weight', 'dnn_network.2.bias', 'linear.weight', 'linear.bias', 'linear2.weight', 'linear2.bias'])

In [15]:
for m in model.parameters():
    print(m)

Parameter containing:
tensor([[ 1.1709, -0.4426, -0.6260,  ...,  1.0484, -0.8389,  1.8611],
        [ 0.1521,  0.6039, -0.3019,  ...,  0.8437, -0.6239, -0.9326],
        [ 1.7790, -1.1589, -0.2400,  ...,  0.4139, -0.5713, -1.6345],
        ...,
        [-0.3822, -0.2985, -0.0359,  ...,  0.9410, -0.4607,  0.4453],
        [-1.0245,  1.3295, -0.3865,  ..., -0.5181,  0.3817, -0.0125],
        [-1.2858, -0.3548, -0.3710,  ...,  2.1751, -1.6506, -1.8563]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([[-0.2087, -0.6825,  1.4534,  ..., -0.0390,  0.6873, -0.0701],
        [ 0.6477, -0.1886, -2.0421,  ...,  0.7109, -2.4402,  1.6883],
        [-0.6667, -0.7265, -0.0233,  ..., -0.0959, -0.0890, -0.7599],
        ...,
        [-0.6539,  0.4124, -0.2805,  ...,  1.1740, -0.7151,  0.5618],
        [-1.0864,  0.4361,  0.2328,  ..., -0.3536, -1.2812,  0.2452],
        [-1.7298,  0.2811,  0.0380,  ..., -0.8430, -2.0017,  0.9016]],
       device='cuda:0', requires_grad=True)


In [16]:
# 简单测试一下模型
for (x, y) in iter(dl_train):
    x = x.cuda()
    print(model(x))
    break

tensor([[0.6571],
        [0.5611],
        [0.4242],
        [0.3730],
        [0.3629],
        [0.4684],
        [0.4846],
        [0.5499],
        [0.5467],
        [0.4819],
        [0.4654],
        [0.3639],
        [0.4479],
        [0.4584],
        [0.5055],
        [0.4785],
        [0.5577],
        [0.3604],
        [0.5382],
        [0.4297],
        [0.3515],
        [0.4512],
        [0.4706],
        [0.4632],
        [0.3307],
        [0.4183],
        [0.5258],
        [0.5802],
        [0.5989],
        [0.6743],
        [0.6863],
        [0.4905],
        [0.3589],
        [0.5526],
        [0.4963],
        [0.5502],
        [0.4455],
        [0.5962],
        [0.5092],
        [0.5736],
        [0.5427],
        [0.5002],
        [0.3664],
        [0.5767],
        [0.3538],
        [0.3392],
        [0.4803],
        [0.3047],
        [0.5985],
        [0.5016],
        [0.3084],
        [0.6806],
        [0.4185],
        [0.4621],
        [0.5474],
        [0

## 模型的训练与评估

### 模型评估函数

In [17]:
# Global variables that are shared across processes
_model = None
_testRatings = None
_testNegatives = None
_K = None

# HitRation
def getHitRatio(ranklist, gtItem):
    for item in ranklist:
        if item == gtItem:
            return 1
    return 0

# NDCG
def getNDCG(ranklist, gtItem):
    for i in range(len(ranklist)):
        item = ranklist[i]
        if item == gtItem:
            return np.log(2) / np.log(i+2)
    return 0

def eval_one_rating(idx):   # 一次评分预测
    rating = _testRatings[idx]
    items = _testNegatives[idx]
    u = rating[0]
    gtItem = rating[1]
    items.append(gtItem)
    
    # Get prediction scores
    map_item_score = {}
    users = np.full(len(items), u, dtype='int32')
    
    test_data = torch.tensor(np.vstack([users, np.array(items)]).T).to(device)
    predictions = _model(test_data)
    for i in range(len(items)):
        item = items[i]
        map_item_score[item] = predictions[i].data.cpu().numpy()[0]
    items.pop()
    
    # Evaluate top rank list
    ranklist = heapq.nlargest(_K, map_item_score, key=lambda k: map_item_score[k])  # heapq是堆排序算法， 取前K个
    hr = getHitRatio(ranklist, gtItem)
    ndcg = getNDCG(ranklist, gtItem)
    return hr, ndcg

def evaluate_model(model, testRatings, testNegatives, K):
    """
    Evaluate the performance (Hit_Ratio, NDCG) of top-K recommendation
    Return: score of each test rating.
    """
    global _model
    global _testRatings
    global _testNegatives
    global _K
    
    _model = model
    _testNegatives = testNegatives
    _testRatings = testRatings
    _K = K
    
    hits, ndcgs = [], []
    for idx in range(len(_testRatings)):
        (hr, ndcg) = eval_one_rating(idx)
        hits.append(hr)
        ndcgs.append(ndcg)
    return hits, ndcgs   

### 模型的训练

In [18]:
# 训练参数设置
loss_func = nn.BCELoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=lr)

In [19]:
# 计算出初始的评估
(hits, ndcgs) = evaluate_model(model, testRatings, testNegatives, topK)

In [20]:
hr, ndcg = np.array(hits).mean(), np.array(ndcgs).mean()
print('Init: HR=%.4f, NDCG=%.4f' %(hr, ndcg))

Init: HR=0.0983, NDCG=0.0444


In [23]:
print(labels.shape)

torch.Size([64])


In [25]:
np_label = labels.cpu().numpy()

In [26]:
np_label.shape

(64,)

In [27]:
np_label.reshape(64, 1)

array([[0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [0.],
       [1.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.]], dtype=float32)

In [21]:
# 模型训练 
best_hr, best_ndcg, best_iter = hr, ndcg, -1

epochs = 10
log_step_freq = 10000

for epoch in range(epochs):
    
    # 训练阶段
    model.train()
    loss_sum = 0.0
    for step, (features, labels) in enumerate(dl_train, 1):
        
        features, labels = features.cuda(), labels.cuda()
        # 梯度清零
        optimizer.zero_grad()
        
        # 正向传播
        predictions = model(features)
        loss = loss_func(predictions, labels)
        
        # 反向传播求梯度
        loss.backward()
        optimizer.step()
        
        # 打印batch级别日志
        loss_sum += loss.item()
        if step % log_step_freq == 0:
            print(("[step = %d] loss: %.3f") %
                  (step, loss_sum/step))
    
    # 验证阶段
    model.eval()
    (hits, ndcgs) = evaluate_model(model, testRatings, testNegatives, topK)
    hr, ndcg = np.array(hits).mean(), np.array(ndcgs).mean()
    if hr > best_hr:
        best_hr, best_ndcg, best_iter = hr, ndcg, epoch
        torch.save(model.state_dict(), 'Pre_train/m1-1m_NeuralMF.pkl')  
        
    info = (epoch, loss_sum/step, hr, ndcg)
    print(("\nEPOCH = %d, loss = %.3f, hr = %.3f, ndcg = %.3f") %info)
print('Finished Training...') 

[step = 10000] loss: 0.418
[step = 20000] loss: 0.391
[step = 30000] loss: 0.382
[step = 40000] loss: 0.377
[step = 50000] loss: 0.373
[step = 60000] loss: 0.371
[step = 70000] loss: 0.369

EPOCH = 0, loss = 0.368, hr = 0.451, ndcg = 0.251
[step = 10000] loss: 0.354
[step = 20000] loss: 0.355
[step = 30000] loss: 0.355
[step = 40000] loss: 0.355
[step = 50000] loss: 0.355
[step = 60000] loss: 0.355
[step = 70000] loss: 0.355

EPOCH = 1, loss = 0.354, hr = 0.450, ndcg = 0.249
[step = 10000] loss: 0.350
[step = 20000] loss: 0.351
[step = 30000] loss: 0.351
[step = 40000] loss: 0.351
[step = 50000] loss: 0.351
[step = 60000] loss: 0.351
[step = 70000] loss: 0.351

EPOCH = 2, loss = 0.351, hr = 0.465, ndcg = 0.258
[step = 10000] loss: 0.345
[step = 20000] loss: 0.346
[step = 30000] loss: 0.344
[step = 40000] loss: 0.344
[step = 50000] loss: 0.343
[step = 60000] loss: 0.342
[step = 70000] loss: 0.342

EPOCH = 3, loss = 0.341, hr = 0.487, ndcg = 0.272
[step = 10000] loss: 0.332
[step = 20000

KeyboardInterrupt: 

## 玩一下预训练
预训练就是用已经训练好的参数， 直接用到NeuralMF上， 然后再进行训练， 这里考察对结构每个层参数的把握， 步骤如下：
* 首先要建立GMF和MLP模型， 并导入保存好的参数
* 建立NeuralMF模型， 获取到相应层， 得到相应的参数

In [120]:
from GMF_MLP import GMF, MLP

In [47]:
gmf = GMF(num_users, num_items, num_factors)
mlp = MLP(num_users, num_items, [num_factors*2, 64, 32, 16, 8])

In [49]:
gmf.state_dict()

OrderedDict([('MF_Embedding_User.weight',
              tensor([[ 0.6279, -1.0892, -1.4830,  ...,  1.2806,  0.7660,  0.8838],
                      [-1.4812,  0.2715, -0.0581,  ..., -0.3319,  1.0200,  0.2614],
                      [-0.2170, -0.1739, -0.0279,  ..., -1.9526, -0.0041, -1.3912],
                      ...,
                      [-0.1122,  0.5834,  1.0698,  ..., -0.5528, -0.7011,  0.4295],
                      [-1.5001, -0.3486, -0.7905,  ...,  1.5360,  0.3815,  1.4865],
                      [-1.6764,  1.9348,  1.3802,  ..., -0.9602,  0.8150, -0.4911]])),
             ('MF_Embedding_Item.weight',
              tensor([[-0.0801, -0.3678, -0.6555,  ..., -1.2752,  1.2915,  0.4929],
                      [ 1.1122,  0.7490,  1.8574,  ...,  0.4982, -0.2133, -0.8871],
                      [ 1.3091,  1.2819, -0.4126,  ..., -0.7710,  0.5700,  0.7012],
                      ...,
                      [-0.9993, -1.4289, -0.7301,  ...,  0.2427, -0.1776,  1.5740],
                   

In [53]:
mlp.state_dict()

OrderedDict([('MF_Embedding_User.weight',
              tensor([[-0.5454,  1.9617,  0.0869,  ...,  0.4281,  1.8094,  0.3854],
                      [-0.5980, -1.1430, -0.3457,  ...,  0.5913, -0.5720, -0.7922],
                      [ 0.6597,  0.7201, -1.5586,  ..., -1.4184,  1.7707, -1.5127],
                      ...,
                      [ 2.0239, -0.7791,  0.4733,  ..., -1.0085, -0.7556,  2.5122],
                      [-0.9829,  0.2878, -0.4144,  ..., -0.0494, -0.3845,  0.3127],
                      [ 0.3841,  0.0400, -0.5352,  ...,  0.6778, -0.7824,  0.6954]])),
             ('MF_Embedding_Item.weight',
              tensor([[-1.5673, -0.5923,  0.3009,  ...,  0.3700,  0.2939, -1.4450],
                      [-1.2123, -0.2765,  1.1029,  ..., -1.2877,  0.0081, -1.2377],
                      [-1.6032, -0.3513,  0.4709,  ...,  0.4668, -1.6501,  0.3641],
                      ...,
                      [ 0.6977,  0.7832, -1.5199,  ...,  1.7004,  0.1634, -0.8731],
                   

In [54]:
# 下面导入保存好的参数
gmf.load_state_dict(torch.load('Pre_train/m1-1m_GMF.pkl'))
mlp.load_state_dict(torch.load('Pre_train/m1-1m_MLP.pkl'))

<All keys matched successfully>

In [57]:
# 下面建立NeuralMF 模型
neural_mf = NeuralMF(num_users, num_items, num_factors, layers)

In [60]:
neural_mf.state_dict().keys()

odict_keys(['MF_Embedding_User.weight', 'MF_Embedding_Item.weight', 'MLP_Embedding_User.weight', 'MLP_Embedding_Item.weight', 'dnn_network.0.weight', 'dnn_network.0.bias', 'dnn_network.1.weight', 'dnn_network.1.bias', 'dnn_network.2.weight', 'dnn_network.2.bias', 'linear.weight', 'linear.bias', 'linear2.weight', 'linear2.bias'])

In [97]:
old_param = neural_mf.state_dict()

In [114]:
gmf.state_dict().keys()

odict_keys(['MF_Embedding_User.weight', 'MF_Embedding_Item.weight', 'linear.weight', 'linear.bias'])

In [102]:
old_param.keys()

odict_keys(['MF_Embedding_User.weight', 'MF_Embedding_Item.weight', 'MLP_Embedding_User.weight', 'MLP_Embedding_Item.weight', 'dnn_network.0.weight', 'dnn_network.0.bias', 'dnn_network.1.weight', 'dnn_network.1.bias', 'dnn_network.2.weight', 'dnn_network.2.bias', 'linear.weight', 'linear.bias', 'linear2.weight', 'linear2.bias'])

In [103]:
mlp.state_dict().keys()

odict_keys(['MF_Embedding_User.weight', 'MF_Embedding_Item.weight', 'dnn_network.0.weight', 'dnn_network.0.bias', 'dnn_network.1.weight', 'dnn_network.1.bias', 'dnn_network.2.weight', 'dnn_network.2.bias', 'dnn_network.3.weight', 'dnn_network.3.bias', 'linear.weight', 'linear.bias'])

In [115]:
old_param['MF_Embedding_User.weight'] = gmf.state_dict().get('MF_Embedding_User.weight')
old_param['MF_Embedding_Item.weight'] = gmf.state_dict().get('MF_Embedding_Item.weight')
old_param['MLP_Embedding_User.weight'] = mlp.state_dict().get('MF_Embedding_User.weight')
old_param['MLP_Embedding_Item.weight'] = mlp.state_dict().get('MF_Embedding_Item.weight')

for i in range(3):
    old_param['dnn_network.' + str(i) + '.weight'] = mlp.state_dict().get('dnn_network.' + str(i) + '.weight')
    old_param['dnn_network.' + str(i) + '.bias'] = mlp.state_dict().get('dnn_network.' + str(i) + '.bias')

# 最后一层的参数， 两个模型需要一个alpha来划分比例 但是我这里最后一层两个模型的方式不一致了， 所以这里维度对不上

In [111]:
old_param

OrderedDict([('MF_Embedding_User.weight',
              tensor([[-10.9049,   2.0719,   4.0300,  ...,  -3.8390, -14.3656,   2.9681],
                      [ -0.7090,  -1.1438,   3.7491,  ...,  -6.2249,  -8.4540,   4.3511],
                      [ -8.5488,  -4.4219,   4.3419,  ...,  -9.4483,  -4.8121,   2.1526],
                      ...,
                      [ -8.2142,  -2.5525,   3.6702,  ...,  -4.8879, -10.4885,   0.7474],
                      [ -5.4971,   1.8283,   3.5808,  ...,  -8.7869, -18.5828,   3.7338],
                      [ -2.2635,  -7.6829,   2.3591,  ...,  -1.6704, -12.4867,   1.1410]])),
             ('MF_Embedding_Item.weight',
              tensor([[  1.5389,  -3.9638,   1.7898,  ...,   1.7804,   6.2358,  -0.5696],
                      [ -4.0041,   3.1085,  -0.4557,  ...,  10.5345,  -0.4570,   4.2654],
                      [  4.9035,   5.8368,   1.4269,  ...,   4.1320,   5.8839,  -1.8388],
                      ...,
                      [ 24.8963,  21.7012, -18.66

In [107]:
neural_mf.state_dict()

OrderedDict([('MF_Embedding_User.weight',
              tensor([[ 0.8996, -1.0869, -0.5185,  ..., -2.1004,  0.2913,  0.9657],
                      [-0.3456, -0.6880, -0.7943,  ..., -0.1422, -0.3804,  0.5994],
                      [-1.1899, -0.9722,  0.7825,  ..., -1.6434,  1.4448, -0.8690],
                      ...,
                      [-0.6792, -0.7107, -1.0414,  ..., -0.0736,  1.9246,  0.8537],
                      [ 0.4429,  0.8433,  0.4007,  ...,  0.4654, -0.1879,  0.5628],
                      [-0.1354, -1.3665,  1.0789,  ..., -0.7139, -0.3299, -0.7006]])),
             ('MF_Embedding_Item.weight',
              tensor([[-0.2368, -0.8478,  0.8295,  ...,  1.6162,  1.3757, -0.1032],
                      [ 0.8535,  0.3204, -0.4572,  ...,  1.1362, -0.6789, -0.2666],
                      [-1.3712,  0.3528,  0.1000,  ..., -0.6342,  0.1856,  1.0360],
                      ...,
                      [-1.0873,  0.7209,  0.2486,  ...,  0.7014,  1.2417,  1.3204],
                   

In [112]:
# 重新更新模型的参数
neural_mf.load_state_dict(old_param)

<All keys matched successfully>

In [116]:
# 更新完毕
neural_mf.state_dict()

OrderedDict([('MF_Embedding_User.weight',
              tensor([[-10.9049,   2.0719,   4.0300,  ...,  -3.8390, -14.3656,   2.9681],
                      [ -0.7090,  -1.1438,   3.7491,  ...,  -6.2249,  -8.4540,   4.3511],
                      [ -8.5488,  -4.4219,   4.3419,  ...,  -9.4483,  -4.8121,   2.1526],
                      ...,
                      [ -8.2142,  -2.5525,   3.6702,  ...,  -4.8879, -10.4885,   0.7474],
                      [ -5.4971,   1.8283,   3.5808,  ...,  -8.7869, -18.5828,   3.7338],
                      [ -2.2635,  -7.6829,   2.3591,  ...,  -1.6704, -12.4867,   1.1410]])),
             ('MF_Embedding_Item.weight',
              tensor([[  1.5389,  -3.9638,   1.7898,  ...,   1.7804,   6.2358,  -0.5696],
                      [ -4.0041,   3.1085,  -0.4557,  ...,  10.5345,  -0.4570,   4.2654],
                      [  4.9035,   5.8368,   1.4269,  ...,   4.1320,   5.8839,  -1.8388],
                      ...,
                      [ 24.8963,  21.7012, -18.66

In [121]:
# 再来训练一遍
# 模型训练 
best_hr, best_ndcg, best_iter = hr, ndcg, -1

epochs = 10
log_step_freq = 10000

for epoch in range(epochs):
    
    # 训练阶段
    neural_mf.train()
    loss_sum = 0.0
    for step, (features, labels) in enumerate(dl_train, 1):
        
        features, labels = features.cuda(), labels.cuda()
        # 梯度清零
        optimizer.zero_grad()
        
        # 正向传播
        predictions = neural_mf(features)
        loss = loss_func(predictions, labels)
        
        # 反向传播求梯度
        loss.backward()
        optimizer.step()
        
        # 打印batch级别日志
        loss_sum += loss.item()
        if step % log_step_freq == 0:
            print(("[step = %d] loss: %.3f") %
                  (step, loss_sum/step))
    
    # 验证阶段
    neural_mf.eval()
    (hits, ndcgs) = evaluate_model(neural_mf, testRatings, testNegatives, topK)
    hr, ndcg = np.array(hits).mean(), np.array(ndcgs).mean()
    if hr > best_hr:
        best_hr, best_ndcg, best_iter = hr, ndcg, epoch
        torch.save(neural_mf.state_dict(), 'Pre_train/m1-1m_NeuralMF.pkl')  
        
    info = (epoch, loss_sum/step, hr, ndcg)
    print(("\nEPOCH = %d, loss = %.3f, hr = %.3f, ndcg = %.3f") %info)
print('Finished Training...') 

RuntimeError: Expected object of backend CPU but got backend CUDA for argument #3 'index'