## 数据下载地址Display Advertising Challenge

https://www.kaggle.com/c/criteo-display-ad-challenge/data

In [13]:
import numpy as np
import pandas as pd
import os

import torch
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import sampler
import torch.nn as nn
import torch.nn.functional as F
from time import time

Num_train = 90
data_content = "/cloudide/workspace/All-in-One/DeepFM/data"

data=pd.read_csv(os.path.join(data_content, 'train.txt'), header=None)
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,30,31,32,33,34,35,36,37,38,39
0,1,1,5,0,1382,4,15,2,181,1,...,21,1,1,0,0,2,2134,2,1841,0
1,2,0,44,1,102,8,2,2,4,1,...,16,1,2,12,0,2,17,2,9,0
2,2,0,1,14,767,89,4,2,245,1,...,312,0,0,3,1,2,3,0,0,0
3,0,893,0,0,4392,0,0,0,0,0,...,30,0,0,0,0,2,7,0,0,0
4,3,-1,0,0,2,0,3,0,0,1,...,296,0,0,16,0,1,2,0,0,0


In [14]:
# 训练数据是DataFrame中除了最后一列之外的所有列的值。
train_data = data.iloc[:, :-1].values

In [15]:
loader_train = DataLoader(train_data, batch_size=16,sampler=sampler.SubsetRandomSampler(range(Num_train)))
print(len(loader_train.sampler.indices))
loader_iter = iter(loader_train)
datal = next(loader_iter)
print("加载数据的第一行：", datal[0])


90
加载数据的第一行： tensor([   0,    0,   14,    6, 7132,  171,    2,    2,    6,    0,    1,    0,
           6,    1,    1, 2300, 1106,    1,    2,  670,    2,    1,    9,  114,
        2360,   42,    1, 2020, 1907,    2,  676,    0,    0, 1735,    0,    2,
           4,    0,    0])


In [16]:
val_data = data.iloc[:, :-1].values

In [17]:
loader_val = DataLoader(val_data, batch_size=16,
        sampler=sampler.SubsetRandomSampler(range(Num_train, 100)))
print(len(loader_val.sampler.indices))
loader_iter = iter(loader_val)
datal = next(loader_iter)
print("加载数据的第一行：", datal[0])


10
加载数据的第一行： tensor([    0,     0,    78,     1, 15835,   220,     1,     1,     1,     0,
            1,     0,     1,     1,     7,     0,  1019,     1,     3,   959,
            1,     1,    42,   417,     0,   418,     2,     6,  1465,     3,
            5,   631,     1,  3577,     0,     2,     4,     2,   306])


In [18]:
feature_sizes = np.loadtxt(data_content + '/feature_sizes.txt', delimiter=',')
feature_sizes = [int(x) for x in feature_sizes]
print("feature_size:", feature_sizes)

feature_size: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 532, 533, 11289, 13822, 151, 13, 9005, 254, 4, 11557, 4191, 11560, 3037, 27, 5640, 12459, 11, 2790, 1391, 4, 12005, 10, 15, 9617, 52, 7599]


In [19]:
if torch.cuda.is_available():
  device = torch.device('cuda')
  print('Using GPU')
# 否则将模型放在 CPU 上
else:
  device = torch.device('cpu')
  print('Using CPU')

Using CPU


In [21]:

fm_first_order_embeddings = nn.ModuleList([nn.Embedding(feature_size, 1) for feature_size in feature_sizes])
print('fm_first_order_embeddings:', fm_first_order_embeddings)
print(len(fm_first_order_embeddings))
print(fm_first_order_embeddings[0].weight.data.size())

fm_first_order_embeddings: ModuleList(
  (0-12): 13 x Embedding(1, 1)
  (13): Embedding(532, 1)
  (14): Embedding(533, 1)
  (15): Embedding(11289, 1)
  (16): Embedding(13822, 1)
  (17): Embedding(151, 1)
  (18): Embedding(13, 1)
  (19): Embedding(9005, 1)
  (20): Embedding(254, 1)
  (21): Embedding(4, 1)
  (22): Embedding(11557, 1)
  (23): Embedding(4191, 1)
  (24): Embedding(11560, 1)
  (25): Embedding(3037, 1)
  (26): Embedding(27, 1)
  (27): Embedding(5640, 1)
  (28): Embedding(12459, 1)
  (29): Embedding(11, 1)
  (30): Embedding(2790, 1)
  (31): Embedding(1391, 1)
  (32): Embedding(4, 1)
  (33): Embedding(12005, 1)
  (34): Embedding(10, 1)
  (35): Embedding(15, 1)
  (36): Embedding(9617, 1)
  (37): Embedding(52, 1)
  (38): Embedding(7599, 1)
)
39
torch.Size([1, 1])


In [22]:
embedding_size=4
fm_second_order_embeddings = nn.ModuleList([nn.Embedding(feature_size, embedding_size) for feature_size in feature_sizes])
print('fm_second_order_embeddings:', fm_second_order_embeddings)
print(len(fm_second_order_embeddings))

fm_second_order_embeddings: ModuleList(
  (0-12): 13 x Embedding(1, 4)
  (13): Embedding(532, 4)
  (14): Embedding(533, 4)
  (15): Embedding(11289, 4)
  (16): Embedding(13822, 4)
  (17): Embedding(151, 4)
  (18): Embedding(13, 4)
  (19): Embedding(9005, 4)
  (20): Embedding(254, 4)
  (21): Embedding(4, 4)
  (22): Embedding(11557, 4)
  (23): Embedding(4191, 4)
  (24): Embedding(11560, 4)
  (25): Embedding(3037, 4)
  (26): Embedding(27, 4)
  (27): Embedding(5640, 4)
  (28): Embedding(12459, 4)
  (29): Embedding(11, 4)
  (30): Embedding(2790, 4)
  (31): Embedding(1391, 4)
  (32): Embedding(4, 4)
  (33): Embedding(12005, 4)
  (34): Embedding(10, 4)
  (35): Embedding(15, 4)
  (36): Embedding(9617, 4)
  (37): Embedding(52, 4)
  (38): Embedding(7599, 4)
)
39


Bad pipe message: %s [b'fd93e17\r\nX-Real-IP: 10.0.102.249\r\nX-Forwarded-For: 10.0.102.249\r\nHost: 58jfq96t-8ducspsf-45depob']
Bad pipe message: %s [b'g73.c2.mcprev.cn\r\nX-Forwarded-Host: 58jfq96t-8ducspsf-45depobrug73.c2.mcprev.cn\r\nX-Forwarded-Proto: https\r\nX-Forw', b'ded-Scheme: https\r\nX-Scheme: https\r\nsec-ch-ua: "Microsoft Edge";v="117", "Not;A=Brand";v="8", "C']
Bad pipe message: %s [b'omium";v="117"\r\nsec-ch-ua-mobile: ?0\r\nsec-ch-ua-platform: "Windows"\r\nUpgrade-Insecure-Requests: 1\r\nUser']
Bad pipe message: %s [b'gent: Mozilla/5.0 (Windows NT 10.0; Win64; x', b') AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0']
Bad pipe message: %s [b'.0 Safari/537.36 Edg/117.0.2045.36\r\nAccept: t']
Bad pipe message: %s [b't/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signe']
Bad pipe message: %s [b'exchange;v=b3;q=0.7\r\nSec-Fetch-Site: same-origin\r\nSec-Fetch-Mode: navigate\r\nSec-Fetch-Dest: iframe\r', b'eferer: h']
Bad p

In [1]:
dropout=[0.5, 0.5]
dropout[0]

0.5