In [1]:
import torch
assert torch.__version__>='1.2.0', 'Expect PyTorch>=1.2.0 but get {}'.format(torch.__version__)
from torch import nn
import torch.nn.functional as F

import numpy as np

import time
import os
import sys
import pickle

imp_dir = '../Implementations'
sys.path.insert(1, imp_dir)
data_dir = '../Data/criteo'
sys.path.insert(1, data_dir)

import logging
import importlib
importlib.reload(logging)

log_path = 'xDeepFM_notebook.log'
if os.path.isfile(log_path): os.remove(log_path)
    
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

formatter = logging.Formatter('%(asctime)s %(levelname)-s: %(message)s', datefmt='%H:%M:%S')

fh = logging.FileHandler(log_path)
fh.setLevel(logging.INFO)
fh.setFormatter(formatter)
logger.addHandler(fh)

sh = logging.StreamHandler(sys.stdout)
sh.setLevel(logging.INFO)
sh.setFormatter(formatter)
logger.addHandler(sh)

In [2]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
logger.info('Device in Use: {}'.format(DEVICE))
torch.cuda.empty_cache()
t = torch.cuda.get_device_properties(DEVICE).total_memory/1024**3
c = torch.cuda.memory_cached(DEVICE)/1024**3
a = torch.cuda.memory_allocated(DEVICE)/1024**3
logger.info('CUDA Memory: Total {:.2f} GB, Cached {:.2f} GB, Allocated {:.2f} GB'.format(t,c,a))

17:50:38 INFO: Device in Use: cuda
17:50:38 INFO: CUDA Memory: Total 11.17 GB, Cached 0.00 GB, Allocated 0.00 GB


## Prepare Data

### Load Dict

In [3]:
embedding_map_dict_pkl_path = os.path.join(data_dir, 'criteo_feature_dict_artifact/categorical_feature_map_dict.pkl')
with open(embedding_map_dict_pkl_path, 'rb') as f:
    embedding_map_dict = pickle.load(f)

### List all available files

In [4]:
np_artifact_dir = os.path.join(data_dir, 'criteo_train_numpy_artifact')
index_artifact = sorted(list(filter(lambda x: x.split('-')[1]=='index', os.listdir(np_artifact_dir))), key = lambda x: int(x.split('.')[0].split('-')[-1]))
value_artifact = sorted(list(filter(lambda x: x.split('-')[1]=='value', os.listdir(np_artifact_dir))), key = lambda x: int(x.split('.')[0].split('-')[-1]))
label_artifact = sorted(list(filter(lambda x: x.split('-')[1]=='label', os.listdir(np_artifact_dir))), key = lambda x: int(x.split('.')[0].split('-')[-1]))

In [5]:
start = time.time()

train_data = (
    np.vstack([np.load(os.path.join(np_artifact_dir, f)) for f in index_artifact[:10]]),
    np.vstack([np.load(os.path.join(np_artifact_dir, f)) for f in value_artifact[:10]]),
    np.vstack([np.load(os.path.join(np_artifact_dir, f)) for f in label_artifact[:10]]),
)

logger.info('Training data loaded after {:.2f}s'.format(time.time()-start))

start = time.time()

test_data = (
    np.vstack([np.load(os.path.join(np_artifact_dir, f)) for f in index_artifact[10:]]),
    np.vstack([np.load(os.path.join(np_artifact_dir, f)) for f in value_artifact[10:]]),
    np.vstack([np.load(os.path.join(np_artifact_dir, f)) for f in label_artifact[10:]]),
)

logger.info('Test data loaded after {:.2f}s'.format(time.time()-start))

19:01:03 INFO: Training data loaded after 4194.25s
19:22:22 INFO: Test data loaded after 1279.05s


## Create Model

In [109]:
import execution
import xDeepFM_BinClf_Torch

In [110]:
xDeepFM = xDeepFM_BinClf_Torch.xDeepFM_Layer(len(embedding_map_dict)+60,
                                             10,
                                             26,
                                             13,
                                             [400, 400, 400, 400],
                                             [0, 0, 0, 0, 0],
                                             [100, 100, 100],
                                             True
                                            ).to(DEVICE)

## Training

In [111]:
cwd = os.getcwd()
checkpoint_dir = os.path.join(cwd, 'xDeepFM_artifact')
checkpoint_prefix = 'xDeepFM'

In [None]:
execution.train_model_separate_inp(xDeepFM, 
                                   train_data, 
                                   test_data, 
                                   F.binary_cross_entropy_with_logits, 
                                   torch.optim.Adam(xDeepFM.parameters()), 
                                   DEVICE, 
                                   checkpoint_dir, 
                                   checkpoint_prefix,
                                   logger=logger
                                  )

04:57:43 INFO: Epoch 1/5 - Batch 1000/8000 Done - Train Loss: 0.461659, Val Loss: 0.458999
05:02:25 INFO: Epoch 1/5 - Batch 2000/8000 Done - Train Loss: 0.456410, Val Loss: 0.479403
05:07:07 INFO: Epoch 1/5 - Batch 3000/8000 Done - Train Loss: 0.455004, Val Loss: 0.458273
05:11:57 INFO: Epoch 1/5 - Batch 4000/8000 Done - Train Loss: 0.454135, Val Loss: 0.432131
05:16:46 INFO: Epoch 1/5 - Batch 5000/8000 Done - Train Loss: 0.452750, Val Loss: 0.449463
05:21:36 INFO: Epoch 1/5 - Batch 6000/8000 Done - Train Loss: 0.451491, Val Loss: 0.420265


## Quick Sample Test

In [107]:
import execution
importlib.reload(execution)
import xDeepFM_BinClf_Torch
importlib.reload(xDeepFM_BinClf_Torch)

<module 'xDeepFM_BinClf_Torch' from '../Implementations/xDeepFM_BinClf_Torch.py'>

In [103]:
xDeepFM = xDeepFM_BinClf_Torch.xDeepFM_Layer(len(embedding_map_dict)+60,
                                             10,
                                             26,
                                             13,
                                             [400, 400, 400, 400],
                                             [0, 0, 0, 0, 0],
                                             [100, 100, 100],
                                             True
                                            )

In [104]:
inp_dense = torch.from_numpy(np.ones((10, 13))).float()
inp_sparse = torch.from_numpy(np.arange(260).reshape((10,26))).long()

In [105]:
xDeepFM(inp_dense, inp_sparse)

tensor([[ 0.0788],
        [-0.2363],
        [ 0.4149],
        [ 0.1767],
        [ 0.2753],
        [ 0.3456],
        [-0.1220],
        [ 0.5456],
        [ 0.0888],
        [ 0.4277]], grad_fn=<AddmmBackward>)