## 1 Import libraries or packges that we need

In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from model import mf, mfDataset

## 2 Set the training configuration, i.e. hyper-paramters, computing device, dataset path

In [2]:
file_path = './ml-1m/ratings.dat'
batch_size = 2048
device = torch.device('cuda:0')
learning_rate = 1e-2
weight_decay = 1e-5
epochs = 10

## 3 Preprocessing before training

In [3]:
df = pd.read_csv(file_path, header=None, delimiter='::')
x, y = df.iloc[:, :2], df.iloc[:, 2]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1)

train_dataset = mfDataset(np.array(x_train[0]), np.array(
        x_train[1]),  np.array(y_train).astype(np.float32))
test_dataset = mfDataset(np.array(x_test[0]), np.array(
        x_test[1]), np.array(y_test).astype(np.float32))

train_DataLoader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_DataLoader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

mean_rating = df.iloc[:, 2].mean()
num_users = max(df[0])+1
num_items = max(df[1])+1
print(f"num_users:{num_users-1}")
print(f"num_items:{num_items-1}")

# generate two mf model objects: model using pytorch auto-gradient，model_my_SGD using my implementation of gradient descent
model = mf(num_users, num_items, mean_rating).to(device)
model_my_SGD = mf(num_users, num_items, mean_rating).to(device)

# l2 normalization
optimizer = torch.optim.SGD(
        params=model.parameters(), lr=learning_rate, weight_decay=weight_decay)

# optimizer = torch.optim.Adam(
#         params=model.parameters(), lr=learning_rate, weight_decay=weight_decay)

loss_func = torch.nn.MSELoss().to(device)

  return func(*args, **kwargs)


num_users:6040
num_items:3952


## 4.1 Training and evaluation：my implemention for gradient descent
optimization method: Stochastic Gradient Descent (SGD)

In [5]:
list(model_my_SGD.parameters())

[Parameter containing:
 tensor([3.5816], device='cuda:0'),
 Parameter containing:
 tensor([[-0.0253,  0.1679, -0.0207,  ...,  0.0874,  0.1041, -0.1503],
         [-0.0715,  0.0070, -0.0165,  ..., -0.0576, -0.0096,  0.0791],
         [-0.0632,  0.0122, -0.1540,  ..., -0.0274, -0.1025, -0.0105],
         ...,
         [ 0.1010,  0.1880,  0.1345,  ...,  0.0947, -0.0709, -0.0941],
         [ 0.0536, -0.1759, -0.1022,  ...,  0.0584,  0.0765, -0.1517],
         [-0.0850, -0.0641, -0.0068,  ...,  0.0331, -0.1700, -0.0363]],
        device='cuda:0', requires_grad=True),
 Parameter containing:
 tensor([[ 0.0075],
         [ 0.1644],
         [-0.1334],
         ...,
         [ 0.0620],
         [ 0.0478],
         [ 0.0403]], device='cuda:0', requires_grad=True),
 Parameter containing:
 tensor([[-0.1983, -0.0945,  0.0466,  ...,  0.0370,  0.2087,  0.0359],
         [ 0.0436,  0.1404, -0.0421,  ..., -0.0293,  0.1588,  0.1411],
         [-0.0680, -0.1402,  0.0332,  ..., -0.0433,  0.1388,  0.0046],

## 4.2 Training and evaluation：pytorch
optimization method: Stochastic Gradient Descent (SGD) or Adam

In [4]:
print("my implementation round")
print("__________________________________________________________________")

for epoch in range(epochs):
    # training phase
    model.train()
    total_loss, total_len = 0, 0
    for x_u, x_i, y in train_DataLoader:
        x_u, x_i, y = x_u.to(device), x_i.to(device), y.to(device)
        y_pre, p_u, q_i = model_my_SGD(x_u, x_i)
        # l2 normalization
        loss = loss_func(y_pre, y) + weight_decay * (torch.sum(torch.pow(p_u, 2)) + torch.sum(torch.pow(q_i, 2)))

        # my implementation for gradient descent of mf model
        e_ui = (y - y_pre).unsqueeze(1)
        model_my_SGD.my_gradient_descent(x_u, x_i, e_ui, learning_rate, weight_decay)

        total_loss += loss.item()*len(y)
        total_len += len(y)
    train_loss = total_loss/total_len

    # evaluation phase
    model.eval()
    labels, predicts = [], []
    with torch.no_grad():
        for x_u, x_i, y in test_DataLoader:
            x_u, x_i, y = x_u.to(device), x_i.to(device), y.to(device)
            y_pre, p_u, q_i = model(x_u, x_i)
            labels.extend(y.tolist())
            predicts.extend(y_pre.tolist())
    mse = mean_squared_error(np.array(labels), np.array(predicts))

    print("epoch {}, train loss is {}, val mse is {}".format(
        epoch, train_loss, mse))

my implementation round
__________________________________________________________________
tensor([[-0.1505,  0.0789, -0.0449,  ...,  0.0254, -0.0016, -0.1366],
        [-0.1467, -0.0782, -0.0597,  ..., -0.0115,  0.0028,  0.0806],
        [-0.0667, -0.0718, -0.1231,  ...,  0.0016, -0.2394, -0.0391],
        ...,
        [-0.0357,  0.3117,  0.1836,  ..., -0.1788, -0.0315,  0.0518],
        [-0.0494,  0.0659,  0.1187,  ...,  0.0187, -0.0242, -0.0659],
        [ 0.0240, -0.0067,  0.0934,  ..., -0.0267, -0.0526, -0.0227]],
       device='cuda:0', grad_fn=<EmbeddingBackward>)


AttributeError: 'Tensor' object has no attribute 'weight'

In [None]:
# print("pytorch auto-gradient round")
print("__________________________________________________________________")

# 重新生成torch的dataloader部分
train_dataset = mfDataset(np.array(x_train[0]), np.array(
        x_train[1]),  np.array(y_train).astype(np.float32))
test_dataset = mfDataset(np.array(x_test[0]), np.array(
        x_test[1]), np.array(y_test).astype(np.float32))

train_DataLoader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_DataLoader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

for epoch in range(epochs):
    # training phase

    model.train()
    total_loss, total_len = 0, 0
    for x_u, x_i, y in train_DataLoader:
        x_u, x_i, y = x_u.to(device), x_i.to(device), y.to(device)
        y_pre, p_u, q_i = model(x_u, x_i)
        loss = loss_func(y_pre, y)

        # auto gradient computing and gradient descent based on pytorch
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()*len(y)
        total_len += len(y)
    train_loss = total_loss/total_len


    # evaluation phase
    model.eval()
    labels, predicts = [], []
    with torch.no_grad():
        for x_u, x_i, y in test_DataLoader:
            x_u, x_i, y = x_u.to(device), x_i.to(device), y.to(device)
            y_pre, p_u, q_i = model(x_u, x_i)
            labels.extend(y.tolist())
            predicts.extend(y_pre.tolist())
    mse = mean_squared_error(np.array(labels), np.array(predicts))

    print("epoch {}, train loss is {}, val mse is {}".format(
        epoch, train_loss, mse))
print("__________________________________________________________________")

In [6]:
a=nn.Embedding(5,1)
b=nn.Embedding(5,1)

In [16]:
type(a(torch.tensor([1,2])))

torch.Tensor

In [17]:
b(torch.tensor([1,2]))

tensor([[ 0.3988],
        [-0.7444]], grad_fn=<EmbeddingBackward>)

In [19]:
a(torch.tensor([1,2]))

tensor([[-0.2928],
        [-0.0324]], grad_fn=<EmbeddingBackward>)

In [28]:
a(torch.tensor([1,2]))=a(torch.tensor([1,2]))-torch.tensor([1,1])
print(a(torch.tensor([1,2])))

SyntaxError: can't assign to function call (2206252653.py, line 1)

In [29]:
a=torch.empty(3)
print(a)
nn.init.normal_(a, mean=0, std=0.1)
print(a)

tensor([1.5695e-43, 1.5554e-43, 1.5975e-43])
tensor([0.0254, 0.0013, 0.1023])


In [47]:
b=torch.gather(a, dim=0, index=torch.tensor([0,2]))
print(a)
print(b)

tensor([0.0254, 0.0013, 0.1023])
tensor([0.0254, 0.1023])


In [48]:
b+=torch.tensor([1,1])
print(a)

tensor([0.0254, 0.0013, 0.1023])
