In [1]:
import time
import os

import numpy as np
import matplotlib.pyplot as plt
import scipy.sparse as sp
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import torch.nn.functional as F
import torch.backends.cudnn as cudnn


#引入脚本文件
from data_utils import load_all    #加载数据
from evaluate import metrics    #模型评测

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class Config():
    def __init__(self):
        self.DEBUG = False
        self.factor_num = 32
        self.num_layers = 3
        self.dropout = 0.0
        self.lr = 0.0001
        self.batch_size = 128
        self.num_ng = 5 #训练时每个正例的负采样数
        self.test_num_ng = 100 #测试时的负采样
        self.epochs = 20 if self.DEBUG else 2
        self.out = 1
        self.top_k = 10
        self.model = 'GMF'
        self.model_path = './temp/'
        self.trainRatingPath = 'data/train.rating'
        self.testNegativePath = 'data/test.negative'
        self.device = torch.device('cpu')
        if torch.cuda.is_available():
            self.device = torch.device('cuda')
        if self.model == 'NeuMF-pre':
            assert os.path.exists(model_path + 'GMF.pth'), 'lack of GMF model'
            assert os.path.exists(model_path + 'MLP.pth'), 'lack of MLP model'
            self.GMF_model = torch.load(model_path + 'GMF.pth')
            self.MLP_model = torch.load(model_path + 'MLP.pth')
        else:
            self.GMF_model = None
            self.MLP_model = None
args = Config()

In [3]:
train_data, test_data, user_num ,item_num, train_mat = load_all(args.trainRatingPath, args.testNegativePath)

In [4]:
class NCF(nn.Module):
    def __init__(self, user_num, item_num, factor_num, num_layers,
            dropout, model, GMF_model=None, MLP_model=None):
        super(NCF, self).__init__()
        """
        user_num: 数据集中用户数量;
        item_num: 数据集中商品数量;
        factor_num: 最终形成的预测向量维度;
        num_layers: MLP的层数;
        dropout: 丢失率;
        model: 'MLP', 'GMF', 'NeuMF-end', and 'NeuMF-pre';
        GMF_model:是否有预训练的 GMF，若有，值为路径;
        MLP_model: 是否有预训练的 MLP，若有，值为路径.
        """
        self.dropout = dropout
        self.model = model
        self.GMF_model = GMF_model
        self.MLP_model = MLP_model
        self.embed_user_GMF = nn.Embedding(user_num, factor_num)
        self.embed_item_GMF = nn.Embedding(item_num, factor_num)
        self.embed_user_MLP = nn.Embedding(user_num, factor_num * (2 ** (num_layers - 1)))
        self.embed_item_MLP = nn.Embedding(item_num, factor_num * (2 ** (num_layers - 1)))

        MLP_modules = []
        for i in range(num_layers):
            input_size = factor_num * (2 ** (num_layers - i))
            MLP_modules.append(nn.Dropout(p=self.dropout))
            MLP_modules.append(nn.Linear(input_size, input_size//2))
            MLP_modules.append(nn.ReLU())
        self.MLP_layers = nn.Sequential(*MLP_modules)

        if self.model in ['MLP', 'GMF']:
            predict_size = factor_num 
        else:
            predict_size = factor_num * 2
        self.predict_layer = nn.Linear(predict_size, 1)

        self._init_weight_()
    def _init_weight_(self):
        """ 权重初始化"""
        if not self.model == 'NeuMF-pre':
            nn.init.normal_(self.embed_user_GMF.weight, std=0.01)
            nn.init.normal_(self.embed_user_MLP.weight, std=0.01)
            nn.init.normal_(self.embed_item_GMF.weight, std=0.01)
            nn.init.normal_(self.embed_item_MLP.weight, std=0.01)

            for m in self.MLP_layers:
                if isinstance(m, nn.Linear):
                    nn.init.xavier_uniform_(m.weight)
            nn.init.kaiming_uniform_(self.predict_layer.weight, a=1, nonlinearity='sigmoid')

            for m in self.modules():
                if isinstance(m, nn.Linear) and m.bias is not None:
                    m.bias.data.zero_()
        else:
            # embedding layers
            self.embed_user_GMF.weight.data.copy_(self.GMF_model.embed_user_GMF.weight)
            self.embed_item_GMF.weight.data.copy_(self.GMF_model.embed_item_GMF.weight)
            self.embed_user_MLP.weight.data.copy_(self.MLP_model.embed_user_MLP.weight)
            self.embed_item_MLP.weight.data.copy_(self.MLP_model.embed_item_MLP.weight)

            # mlp layers
            for (m1, m2) in zip(
                self.MLP_layers, self.MLP_model.MLP_layers):
                if isinstance(m1, nn.Linear) and isinstance(m2, nn.Linear):
                    m1.weight.data.copy_(m2.weight)
                    m1.bias.data.copy_(m2.bias)
            # predict layers
            predict_weight = torch.cat([
                self.GMF_model.predict_layer.weight, 
                self.MLP_model.predict_layer.weight], dim=1)
            precit_bias = self.GMF_model.predict_layer.bias + self.MLP_model.predict_layer.bias

            self.predict_layer.weight.data.copy_(0.5 * predict_weight)
            self.predict_layer.bias.data.copy_(0.5 * precit_bias)
    def forward(self, user, item):
        if not self.model == 'MLP':
            embed_user_GMF = self.embed_user_GMF(user)
            embed_item_GMF = self.embed_item_GMF(item)
            output_GMF = embed_user_GMF * embed_item_GMF
        if not self.model == 'GMF':
            embed_user_MLP = self.embed_user_MLP(user)
            embed_item_MLP = self.embed_item_MLP(item)
            interaction = torch.cat((embed_user_MLP, embed_item_MLP), -1)
            output_MLP = self.MLP_layers(interaction)

        if self.model == 'GMF':
            concat = output_GMF
        elif self.model == 'MLP':
            concat = output_MLP
        else:
            concat = torch.cat((output_GMF, output_MLP), -1)

        prediction = self.predict_layer(concat)
        return prediction.view(-1)


In [5]:
Model = NCF(user_num, item_num, args.factor_num, args.num_layers, args.dropout, args.model, args.GMF_model, args.MLP_model)
Model.to(args.device)
loss_function = nn.BCEWithLogitsLoss()

if args.model == 'NeuMF-pre':
    optimizer = optim.SGD(Model.parameters(), lr=args.lr)
else:
    optimizer = optim.Adam(Model.parameters(), lr=args.lr)

In [6]:
class NCFData(data.Dataset):
    def __init__(self, features, 
                num_item, train_mat=None, num_ng=0, is_training=None):
        super(NCFData, self).__init__()
        """ Note that the labels are only useful when training, we thus 
            add them in the ng_sample() function.
        """
        self.features_ps = features
        self.num_item = num_item
        self.train_mat = train_mat
        self.num_ng = num_ng
        self.is_training = is_training
        self.labels = [0 for _ in range(len(features))]
    def ng_sample(self):
        assert self.is_training, 'no need to sampling when testing'

        self.features_ng = []
        for x in self.features_ps:
            u = x[0]
            for t in range(self.num_ng):
                j = np.random.randint(self.num_item)
                while (u, j) in self.train_mat:
                    j = np.random.randint(self.num_item)
                self.features_ng.append([u, j])

        labels_ps = [1 for _ in range(len(self.features_ps))]
        labels_ng = [0 for _ in range(len(self.features_ng))]

        self.features_fill = self.features_ps + self.features_ng
        self.labels_fill = labels_ps + labels_ng
    def __len__(self):
        return (self.num_ng + 1) * len(self.labels)

    def __getitem__(self, idx):
        features = self.features_fill if self.is_training else self.features_ps
        labels = self.labels_fill if self.is_training else self.labels

        user = features[idx][0]
        item = features[idx][1]
        label = labels[idx]
        return user, item ,label

In [7]:
train_dataset = NCFData(
        train_data, item_num, train_mat, args.num_ng, True)
test_dataset = NCFData(
        test_data, item_num, train_mat, 0, False)
train_loader = data.DataLoader(train_dataset,
        batch_size=args.batch_size, shuffle=True, num_workers=0)
test_loader = data.DataLoader(test_dataset,
        batch_size=args.test_num_ng+1, shuffle=False, num_workers=0)

In [8]:
def hit(gt_item, pred_items):
    if gt_item in pred_items:
        return 1
    return 0


def ndcg(gt_item, pred_items):
    if gt_item in pred_items:
        index = pred_items.index(gt_item)
        return np.reciprocal(np.log2(index+2))
    return 0

In [9]:
best_hr = 0
hrs, ndcgs = [], []
Loss = []
Time = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
with open('./NeuMF-work/' + Time + '.log', 'w') as f:
    for epoch in range(args.epochs):
        count, Sum = 0, 0
        Model.train() # Enable dropout (if have).
        start_time = time.time()
        train_loader.dataset.ng_sample()

        for user, item, label in train_loader:
            user = user.to(args.device)
            item = item.to(args.device)
            label = label.float().to(args.device)
            Model.zero_grad()
            prediction = Model(user, item)
            loss = loss_function(prediction, label)
            Sum += loss.item()
            loss.backward()
            optimizer.step()
            # writer.add_scalar('data/loss', loss.item(), count)
            count += 1
        Loss.append(Sum / count)
        Model.eval()
        HR, NDCG = metrics(Model, test_loader, args.top_k)
        hrs.append(HR)
        ndcgs.append(NDCG)
        elapsed_time = time.time() - start_time
        f.write("The time elapse of epoch {:03d}".format(epoch) + " is: " + time.strftime("%H: %M: %S", time.gmtime(elapsed_time)) + "\n")
        f.write("HR: {:.3f}\tNDCG: {:.3f}\tLoss: {:.3f}\n".format(HR, NDCG, Loss[epoch]))
#         print("The time elapse of epoch {:03d}".format(epoch) + " is: " + 
#                 time.strftime("%H: %M: %S", time.gmtime(elapsed_time)))
        if HR > best_hr:
            best_hr, best_ndcg, best_epoch = HR, NDCG, epoch
            if args.out:
                if not os.path.exists(args.model_path):
                    os.mkdir(args.model_path)
                torch.save(Model, '{}{}.pth'.format(args.model_path, args.model))

print("End. Best epoch {:03d}: HR = {:.3f}, NDCG = {:.3f}".format(best_epoch, best_hr, best_ndcg))

End. Best epoch 001: HR = 0.277, NDCG = 0.152
