# MF

In [1]:
# 加载依赖
import numpy as np
import torch
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim

from tqdm import tqdm
from utils.movielens_dataset import MovieLensDatasetWithTorch
from utils.metric import Metric
from utils.trainer import Trainer

In [2]:
# 使用的超参数
config = {
    'TRAIN_BATCH_SIZE': 8,
    'VALID_BATCH_SIZE': 4,
    'TEST_BATCH_SIZE': 4,
    'DATASET_RATIO': [0.8, 0.1, 0.1],
    'DEVICE': 'cpu',
    'NUM_WORKERS': 0,
    'EPOCH': 30,
    'NUM_FEATURE': 111,
    'POS_WEIGHT': 2,
    'LEARNING_RATE': 1e-3,
    'K': 8,
}

In [3]:
# 加载数据集
# MovieLens
with open('../dataset/ml-latest-small-ratings.txt', 'r', encoding='utf-8') as f:
    ml_dataset = MovieLensDatasetWithTorch(f, task='rating')
dataset_size = len(ml_dataset)
dataset_split_size = [int(dataset_size * r) for r in config['DATASET_RATIO']]
if sum(dataset_split_size) != dataset_size:
    dataset_split_size[-1] += dataset_size - sum(dataset_split_size)
train_set, valid_set, test_set = torch.utils.data.random_split(ml_dataset, dataset_split_size)

user_num, item_num = ml_dataset.user_num, ml_dataset.item_num

In [4]:
type(ml_dataset)

utils.movielens_dataset.MovieLensDatasetWithTorch

In [5]:
train_loader = DataLoader(
    dataset=train_set,
    batch_size=config['TRAIN_BATCH_SIZE'],
    shuffle=True,
    num_workers=config['NUM_WORKERS']
)

valid_loader = DataLoader(
    dataset=valid_set,
    batch_size=config['VALID_BATCH_SIZE'],
    shuffle=False,
    num_workers=config['NUM_WORKERS']
)

test_loader = DataLoader(
    dataset=test_set,
    batch_size=config['TEST_BATCH_SIZE'],
    shuffle=False,
    num_workers=config['NUM_WORKERS']
)

In [6]:
# 构建模型
class MF(nn.Module):
    def __init__(self, user_num, item_num, k):
        super(MF, self).__init__()
        U = torch.zeros((user_num, k))
        I = torch.zeros((item_num, k))

        nn.init.xavier_uniform_(U)
        nn.init.xavier_uniform_(I)

        self.U = nn.Parameter(U, requires_grad=True)
        self.I = nn.Parameter(I, requires_grad=True)

    def forward(self, batch_x):
        batch_x = batch_x[0]
        idxs = batch_x.coalesce().indices()
        user_idx, item_idx = idxs[0, :], idxs[1, :]
        user_factor = self.U[user_idx, :]
        item_factor = self.I[item_idx, :]
        return torch.sum(user_factor * item_factor, dim=1)

In [7]:
model = MF(user_num, item_num, k=config['K'])
optimizer = optim.Adam(lr=config['LEARNING_RATE'], params=model.parameters())
loss_func = nn.MSELoss()
metric = Metric()

trainer = Trainer(
    model=model,
    loss_func=loss_func,
    optimizer=optimizer,
    metric=metric,
    train_loader=train_loader,
    valid_loader=valid_loader,
    test_loader=test_loader,
    config=config,
)

TRAIN_BATCH_SIZE: 8
VALID_BATCH_SIZE: 4
TEST_BATCH_SIZE: 4
DATASET_RATIO: [0.8, 0.1, 0.1]
DEVICE: cpu
NUM_WORKERS: 0
EPOCH: 30
NUM_FEATURE: 111
POS_WEIGHT: 2
LEARNING_RATE: 0.001
K: 8


In [8]:
if __name__ == '__main__':
    trainer.train()
    trainer.test()



100%|██████████| 61/61 [00:00<00:00, 204.40it/s]


Train Epoch: 1
Loss: 13.885458883692007


AttributeError: 'Metric' object has no attribute 'init_metric'