In [1]:
import torch
assert torch.__version__>='1.2.0', 'Expect PyTorch>=1.2.0 but get {}'.format(torch.__version__)
from torch import nn
import torch.nn.functional as F

import numpy as np

import time
import os
import sys
import pickle

imp_dir = '../Implementations'
sys.path.insert(1, imp_dir)
data_dir = '../Data/criteo'
sys.path.insert(1, data_dir)

from FM_BinClf_Torch import FM_2D_Layer

import logging
import importlib
importlib.reload(logging)

logging.basicConfig(stream=sys.stdout, format='%(asctime)s %(levelname)-6s %(message)s', level=logging.INFO, datefmt='%H:%M:%S')
logger = logging.getLogger(__name__)

In [2]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
logger.info('Device in Use: {}'.format(DEVICE))

00:25:43 INFO   Device in Use: cuda


## Prepare Data

### List all available files

In [3]:
np_artifact_dir = os.path.join(data_dir, 'criteo_train_numpy_artifact')
index_artifact = sorted(list(filter(lambda x: x.split('-')[1]=='index', os.listdir(np_artifact_dir))), key = lambda x: int(x.split('.')[0].split('-')[-1]))
value_artifact = sorted(list(filter(lambda x: x.split('-')[1]=='value', os.listdir(np_artifact_dir))), key = lambda x: int(x.split('.')[0].split('-')[-1]))
label_artifact = sorted(list(filter(lambda x: x.split('-')[1]=='label', os.listdir(np_artifact_dir))), key = lambda x: int(x.split('.')[0].split('-')[-1]))

In [4]:
start = time.time()

train_data = (
    np.vstack([np.load(os.path.join(np_artifact_dir, f)) for f in index_artifact[:10]]),
    np.vstack([np.load(os.path.join(np_artifact_dir, f)) for f in value_artifact[:10]]),
    np.vstack([np.load(os.path.join(np_artifact_dir, f)) for f in label_artifact[:10]]),
)

logger.info('Training data loaded after {:.2f}s'.format(time.time()-start))

start = time.time()

test_data = (
    np.vstack([np.load(os.path.join(np_artifact_dir, f)) for f in index_artifact[10:]]),
    np.vstack([np.load(os.path.join(np_artifact_dir, f)) for f in value_artifact[10:]]),
    np.vstack([np.load(os.path.join(np_artifact_dir, f)) for f in label_artifact[10:]]),
)

logger.info('Test data loaded after {:.2f}s'.format(time.time()-start))

00:26:03 INFO   Training data loaded after 20.07s
00:26:30 INFO   Test data loaded after 26.78s


## Create Model

In [5]:
embedding_map_dict_pkl_path = os.path.join(data_dir, 'criteo_feature_dict_artifact/categorical_feature_map_dict.pkl')
with open(embedding_map_dict_pkl_path, 'rb') as f:
    embedding_map_dict = pickle.load(f)

In [6]:
FM = FM_2D_Layer(len(embedding_map_dict)+60,39,5).to(DEVICE)

## Sample Run

In [7]:
import execution
importlib.reload(execution)

<module 'execution' from '../Data/criteo/execution.py'>

In [10]:
cwd = os.getcwd()
checkpoint_dir = os.path.join(cwd, 'FM_artifact')
checkpoint_prefix = 'FM'

In [11]:
execution.train_model(FM, train_data, test_data, F.binary_cross_entropy_with_logits, torch.optim.Adam(FM.parameters()), DEVICE, checkpoint_dir, checkpoint_prefix)

00:31:11 INFO   Epoch 1/5 - Batch 1000/16000 Done - Train Loss: 0.467114, Val Loss: 0.474068
00:31:21 INFO   Epoch 1/5 - Batch 2000/16000 Done - Train Loss: 0.461353, Val Loss: 0.463995
00:31:30 INFO   Epoch 1/5 - Batch 3000/16000 Done - Train Loss: 0.458274, Val Loss: 0.466305
00:31:40 INFO   Epoch 1/5 - Batch 4000/16000 Done - Train Loss: 0.458031, Val Loss: 0.463272
00:31:49 INFO   Epoch 1/5 - Batch 5000/16000 Done - Train Loss: 0.457944, Val Loss: 0.471607
00:31:59 INFO   Epoch 1/5 - Batch 6000/16000 Done - Train Loss: 0.457793, Val Loss: 0.458427
00:32:08 INFO   Epoch 1/5 - Batch 7000/16000 Done - Train Loss: 0.457810, Val Loss: 0.466020
00:32:17 INFO   Epoch 1/5 - Batch 8000/16000 Done - Train Loss: 0.457837, Val Loss: 0.450748
00:32:27 INFO   Epoch 1/5 - Batch 9000/16000 Done - Train Loss: 0.457430, Val Loss: 0.452050
00:32:36 INFO   Epoch 1/5 - Batch 10000/16000 Done - Train Loss: 0.457175, Val Loss: 0.463292
00:32:46 INFO   Epoch 1/5 - Batch 11000/16000 Done - Train Loss: 0.45

## Model Performance

In [12]:
os.listdir(checkpoint_dir)

['FM_4', 'FM_1', 'FM_5', 'FM_2', 'FM_3']

In [13]:
model = FM_2D_Layer(len(embedding_map_dict)+60,39,5)
model.load_state_dict(torch.load(os.path.join(checkpoint_dir, 'FM_1')))
model.eval()
model = model.to(DEVICE)

In [14]:
logger.info('Model ROC Score: {:.6f}'.format(execution.get_roc_auc_score(model, train_data, DEVICE)))

00:49:15 INFO   Model ROC Score: 0.805470


In [15]:
logger.info('Model ROC Score: {:.6f}'.format(execution.get_roc_auc_score(model, test_data, DEVICE)))

00:49:53 INFO   Model ROC Score: 0.792765


In [16]:
model = FM_2D_Layer(len(embedding_map_dict)+60,39,5)
model.load_state_dict(torch.load(os.path.join(checkpoint_dir, 'FM_5')))
model.eval()
model = model.to(DEVICE)

In [17]:
logger.info('Model ROC Score: {:.6f}'.format(execution.get_roc_auc_score(model, train_data, DEVICE)))

00:51:26 INFO   Model ROC Score: 0.813573


In [18]:
logger.info('Model ROC Score: {:.6f}'.format(execution.get_roc_auc_score(model, test_data, DEVICE)))

00:52:03 INFO   Model ROC Score: 0.781985
