# FM

In [60]:
# 加载依赖
import numpy as np
import torch
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim

from tqdm import tqdm

from utils.utils import CriteoDataset, CTRMetric, Trainer

## 1.读取数据集

In [61]:
# 使用的超参数
config = {
    'TRAIN_BATCH_SIZE': 128,
    'VALID_BATCH_SIZE': 128,
    'TEST_BATCH_SIZE': 128,
    'DEVICE': 'mps',
    'NUM_WORKERS': 6,
    'EPOCH': 30,
    'NUM_FEATURE': 111,
    'POS_WEIGHT': 2,
    'LEARNING_RATE': 1e-3,
    'K': 8,
}

In [62]:
# 使用的数据集为Criteo数据集
train_pth = '../dataset/criteo-100k-train.txt'
valid_pth = '../dataset/criteo-100k-valid.txt'
test_pth = '../dataset/criteo-100k-test.txt'
train_set = CriteoDataset(train_pth, mode='train')
valid_set = CriteoDataset(valid_pth, mode='valid', encoders=train_set.encoders)
test_set = CriteoDataset(valid_pth, mode='test', encoders=train_set.encoders)



In [63]:
train_loader = DataLoader(
    dataset=train_set,
    batch_size=config['TRAIN_BATCH_SIZE'],
    shuffle=True,
    num_workers=config['NUM_WORKERS']
)

valid_loader = DataLoader(
    dataset=valid_set,
    batch_size=config['VALID_BATCH_SIZE'],
    shuffle=False,
    num_workers=config['NUM_WORKERS']
)

test_loader = DataLoader(
    dataset=test_set,
    batch_size=config['TEST_BATCH_SIZE'],
    shuffle=False,
    num_workers=config['NUM_WORKERS']
)

# for e in range(2):
#     for step, (batch_y, batch_X) in enumerate(tqdm(train_loader)):
#         print(f'epoch: {e}\tstep: {step}\tbatch_X: {batch_X}\tbatch_y: {batch_y}')
#         if step >= 10:
#             break

## 2.模型构建与训练

In [67]:
class FM(nn.Module):
    def __init__(self, num_features, k):
        super(FM, self).__init__()
        self.bn = nn.BatchNorm1d(num_features)
        self.linear = nn.Linear(num_features, 1)

        v = torch.zeros(num_features, k)
        nn.init.xavier_uniform_(v)
        self.V = nn.Parameter(v)


    def forward(self, batch_x):
        # batch_x: batch_size * num_features
        batch_x = self.bn(batch_x)

        linear_part = self.linear(batch_x) # batch_size * 1
        # print(torch.mean(linear_part))
        assert linear_part.shape == (batch_x.shape[0], 1)

        cross_part_one = torch.pow(torch.matmul(batch_x, self.V), 2) # batch_size * k
        cross_part_two = torch.matmul(torch.pow(batch_x, 2), torch.pow(self.V, 2)) # batch_size * k
        cross_part = torch.sum(cross_part_one - cross_part_two, dim=1, keepdim=True) / 2
        assert cross_part.shape == (batch_x.shape[0], 1)

        output = linear_part + cross_part # batch_size * 1
        assert output.shape == (batch_x.shape[0], 1)
        return output

In [68]:
model = FM(num_features=config['NUM_FEATURE'], k=config['K'])
weight = torch.tensor(config['POS_WEIGHT'])
loss_func = nn.BCEWithLogitsLoss(pos_weight=weight)
optimizer = optim.Adam(lr=config['LEARNING_RATE'], params=model.parameters())
metric = CTRMetric()

trainer = Trainer(
    model=model,
    loss_func=loss_func,
    optimizer=optimizer,
    metric=metric,
    train_loader=train_loader,
    valid_loader=valid_loader,
    test_loader=test_loader,
    config=config
)

TRAIN_BATCH_SIZE: 128
VALID_BATCH_SIZE: 128
TEST_BATCH_SIZE: 128
DEVICE: mps
NUM_WORKERS: 6
EPOCH: 30
NUM_FEATURE: 111
POS_WEIGHT: 2
LEARNING_RATE: 0.001
K: 8


In [69]:
if __name__ == '__main__':
    trainer.train()
    trainer.test()



100%|██████████| 625/625 [00:10<00:00, 60.97it/s] 


Train Epoch: 1
Loss: 0.7027184306144715


100%|██████████| 79/79 [00:07<00:00, 11.05it/s]


Valid Epoch: 1
loss: 0.7920971671237221
accuracy: 0.774624
precision: 0.513410
recall: 0.352539
F1: 0.414027
AUC: 0.743126



100%|██████████| 625/625 [00:09<00:00, 62.80it/s] 


Train Epoch: 2
Loss: 0.5497484426498414


100%|██████████| 79/79 [00:07<00:00, 11.18it/s]


Valid Epoch: 2
loss: 0.7767308950424194
accuracy: 0.778085
precision: 0.523817
recall: 0.351898
F1: 0.417512
AUC: 0.744747



100%|██████████| 625/625 [00:09<00:00, 63.39it/s] 


Train Epoch: 3
Loss: 0.5362244345188141


100%|██████████| 79/79 [00:06<00:00, 11.40it/s]


Valid Epoch: 3
loss: 0.7733046850071678
accuracy: 0.777097
precision: 0.519133
recall: 0.352676
F1: 0.416079
AUC: 0.745068



100%|██████████| 625/625 [00:09<00:00, 64.03it/s] 


Train Epoch: 4
Loss: 0.5312279460906982


100%|██████████| 79/79 [00:06<00:00, 11.34it/s]


Valid Epoch: 4
loss: 0.7623399639431434
accuracy: 0.776998
precision: 0.521343
recall: 0.338911
F1: 0.406921
AUC: 0.743572



100%|██████████| 625/625 [00:09<00:00, 64.02it/s] 


Train Epoch: 5
Loss: 0.5293331715106964


100%|██████████| 79/79 [00:06<00:00, 11.41it/s]


Valid Epoch: 5
loss: 0.7800632722770111
accuracy: 0.778877
precision: 0.524230
recall: 0.364465
F1: 0.426022
AUC: 0.743983



100%|██████████| 625/625 [00:09<00:00, 64.45it/s] 


Train Epoch: 6
Loss: 0.5263361932277679


100%|██████████| 79/79 [00:06<00:00, 11.40it/s]


Valid Epoch: 6
loss: 0.7773931222625926
accuracy: 0.778184
precision: 0.524660
recall: 0.341787
F1: 0.410011
AUC: 0.740364



100%|██████████| 625/625 [00:09<00:00, 63.66it/s] 


Train Epoch: 7
Loss: 0.5261185575962066


100%|██████████| 79/79 [00:07<00:00, 11.11it/s]


Valid Epoch: 7
loss: 0.7726753512515298
accuracy: 0.773141
precision: 0.504822
recall: 0.354483
F1: 0.413304
AUC: 0.735912



100%|██████████| 625/625 [00:09<00:00, 63.46it/s] 


Train Epoch: 8
Loss: 0.5259076407909393


100%|██████████| 79/79 [00:07<00:00, 11.21it/s]


Valid Epoch: 8
loss: 0.7769113077393061
accuracy: 0.776899
precision: 0.520084
recall: 0.345021
F1: 0.410702
AUC: 0.741583



100%|██████████| 625/625 [00:09<00:00, 64.00it/s] 


Train Epoch: 9
Loss: 0.5255820212364197


100%|██████████| 79/79 [00:06<00:00, 11.40it/s]


Valid Epoch: 9
loss: 0.7774409266966807
accuracy: 0.776108
precision: 0.518073
recall: 0.350112
F1: 0.413995
AUC: 0.740809



100%|██████████| 625/625 [00:09<00:00, 63.33it/s] 


Train Epoch: 10
Loss: 0.5242493866920471


100%|██████████| 79/79 [00:07<00:00, 10.94it/s]


Valid Epoch: 10
loss: 0.7780119324032264
accuracy: 0.778283
precision: 0.523695
recall: 0.352823
F1: 0.417610
AUC: 0.742124



100%|██████████| 625/625 [00:09<00:00, 62.89it/s] 


Train Epoch: 11
Loss: 0.5258041555881501


  0%|          | 0/79 [00:03<?, ?it/s]
Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/Users/xansar/.conda/envs/RecommenderSystem/lib/python3.8/multiprocessing/spawn.py", line 116, in spawn_main
    exitcode = _main(fd, parent_sentinel)
  File "/Users/xansar/.conda/envs/RecommenderSystem/lib/python3.8/multiprocessing/spawn.py", line 126, in _main
    self = reduction.pickle.load(from_parent)
  File "/Users/xansar/PycharmProjects/RecommenderSystem/Recommender-System-Pytorch/MyImplement/utils/utils.py", line 3, in <module>
    import pandas_profiling as pp
  File "/Users/xansar/.conda/envs/RecommenderSystem/lib/python3.8/site-packages/pandas_profiling/__init__.py", line 6, in <module>
    from pandas_profiling.controller import pandas_decorator
  File "/Users/xansar/.conda/envs/RecommenderSystem/lib/python3.8/site-packages/pandas_profiling/controller/pandas_decorator.py", line 4, in <module>
    from pandas_profiling.profile_report import ProfileReport


KeyboardInterrupt: 