# xDeepFM

<img src="../img/xdeepfm.png" width="600">

In [1]:
import pandas as pd
import numpy as np
import json
import torch
from torch import nn
from torch.utils.data.dataset import Dataset
from collections import OrderedDict

In [2]:
learning_rate = 0.0001
batch_size = 64
num_epochs = 5
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [3]:
with open('../data/movielens/feature_map.json') as obj:
    feature_map = json.load(obj)
feature_map

{'dataset_id': 'movielens',
 'num_fields': 25,
 'feature_specs': {'movieId': {'source': 'item',
   'type': 'categorical',
   'vocab_size': 935,
   'index': 0},
  'userId': {'source': 'user',
   'type': 'categorical',
   'vocab_size': 22540,
   'index': 1},
  'timestamp': {'source': 'user',
   'type': 'numerical',
   'vocab_size': 1,
   'index': 2},
  'releaseYear': {'source': 'item',
   'type': 'numerical',
   'vocab_size': 1,
   'index': 3},
  'movieGenre1': {'source': 'item',
   'type': 'categorical',
   'vocab_size': 18,
   'index': 4},
  'movieGenre2': {'source': 'item',
   'type': 'categorical',
   'vocab_size': 18,
   'index': 5},
  'movieGenre3': {'source': 'item',
   'type': 'categorical',
   'vocab_size': 15,
   'index': 6},
  'movieRatingCount': {'source': 'item',
   'type': 'numerical',
   'vocab_size': 1,
   'index': 7},
  'movieAvgRating': {'source': 'item',
   'type': 'numerical',
   'vocab_size': 1,
   'index': 8},
  'movieRatingStddev': {'source': 'item',
   'type': 'nu

In [4]:
class MovielensDataset(Dataset):
    def __init__(self, url):
        self.df = pd.read_csv(url)
    
    def __getitem__(self, idx):
        x, y = self.df.iloc[idx, :-1].values.astype(np.float32), self.df.iloc[idx, -1].astype(np.float32)
        return x, y
    
    def __len__(self):
        return self.df.shape[0]

In [5]:
train_dataset = MovielensDataset('../data/movielens/data_for_train.csv')
test_dataset = MovielensDataset('../data/movielens/data_for_test.csv')

In [6]:
# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)

In [7]:
class xDeepFM(nn.Module):
    def __init__(self,
                 feature_map,
                 embedding_dim=10,
                 hidden_units=[256, 128, 64],
                 cin_units=[16, 16, 16]):
        super(xDeepFM, self).__init__()
        self.feature_map = feature_map
        # Embedding
        self.embedding = nn.ModuleDict()
        for feature, feature_spec in feature_map['feature_specs'].items():
            if feature_spec['type'] == 'numerical':
                self.embedding[feature] = nn.Linear(
                    1, embedding_dim, bias=False)
            elif feature_spec['type'] == 'categorical':
                padding_idx = feature_spec.get('padding_idx', None)
                self.embedding[feature] = nn.Embedding(feature_spec['vocab_size'],
                                                       embedding_dim,
                                                       padding_idx=padding_idx)
        # LR
        self.batch_norm = nn.BatchNorm1d(feature_map['num_fields'])
        self.lr = nn.Linear(feature_map['num_fields'], 1)
        # Compressed Interaction Network
        self.cin = CompressedInteractionNet(
            feature_map['num_fields'], cin_units, output_dim=1)
        # Deep Neural Network
        input_dim = feature_map['num_fields'] * embedding_dim
        hidden_units = [input_dim] + hidden_units
        hidden_layers = []
        for i in range(len(hidden_units) - 1):
            hidden_layers.append(
                nn.Linear(hidden_units[i], hidden_units[i + 1]))
            hidden_layers.append(nn.ReLU())
        hidden_layers.append(nn.Linear(hidden_units[-1], 1))
        self.dnn = nn.Sequential(*hidden_layers)
        # Sigmoid
        self.output_activation = nn.Sigmoid()

    def forward(self, X):
        feature_emb_list = []
        for feature, feature_spec in self.feature_map['feature_specs'].items():
            if feature_spec['type'] == 'numerical':
                raw_feature = X[:, feature_spec['index']].float().view(-1, 1)
            elif feature_spec['type'] == 'categorical':
                raw_feature = X[:, feature_spec['index']].long()
            embedding_vec = self.embedding[feature](raw_feature)
            feature_emb_list.append(embedding_vec)
        feature_emb = torch.stack(feature_emb_list, dim=1)
        flat_feature_emb = feature_emb.flatten(start_dim=1)
        lr_out = self.lr(self.batch_norm(X))
        cin_out = self.cin(feature_emb)
        dnn_out = self.dnn(flat_feature_emb)
        out = lr_out + cin_out + dnn_out
        y_pred = self.output_activation(out).squeeze(1)
        return y_pred


class CompressedInteractionNet(nn.Module):
    def __init__(self,
                 num_fields,
                 cin_units,
                 output_dim=1):
        super(CompressedInteractionNet, self).__init__()
        self.cin_units = cin_units
        self.conv = nn.ModuleDict()
        for i, unit in enumerate(cin_units):
            in_channels = num_fields * \
                self.cin_units[i - 1] if i > 0 else num_fields ** 2
            out_channels = unit
            self.conv['layer_' + str(i)] = nn.Conv1d(in_channels,
                                                     out_channels,  # how many filters
                                                     kernel_size=1)  # kernel output shape
        self.fc = nn.Linear(sum(cin_units), output_dim)

    def forward(self, feature_emb):
        X_0 = feature_emb
        pooling_vec_list = []
        batch_size = X_0.shape[0]
        embedding_dim = X_0.shape[-1]
        X_i = X_0
        for i in range(len(self.cin_units)):
            hadamard_tensor = torch.einsum('bhd,bmd->bhmd', X_0, X_i)
            hadamard_tensor = hadamard_tensor.view(
                batch_size, -1, embedding_dim)
            X_i = self.conv['layer_' +
                            str(i)](hadamard_tensor).view(batch_size, -1, embedding_dim)
            pooling_vec_list.append(X_i.sum(dim=-1))
        concat_vec = torch.cat(pooling_vec_list, dim=-1)
        out = self.fc(concat_vec)
        return out

In [8]:
model = xDeepFM(feature_map).to(device)
# Loss and optimizer
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (X, y) in enumerate(train_loader):
        X = X.to(device)
        y = y.to(device)

        # Forward pass
        output = model(X)
        loss = criterion(output, y)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i + 1) % 300 == 0:
            print("Epoch [{}/{}], Step [{}/{}] Loss: {:.4f}".format(
                epoch + 1, num_epochs, i + 1, total_step, loss.item()))

Epoch [1/5], Step [300/1388] Loss: 0.6292
Epoch [1/5], Step [600/1388] Loss: 0.6063
Epoch [1/5], Step [900/1388] Loss: 0.5344
Epoch [1/5], Step [1200/1388] Loss: 0.6174
Epoch [2/5], Step [300/1388] Loss: 0.6643
Epoch [2/5], Step [600/1388] Loss: 0.4939
Epoch [2/5], Step [900/1388] Loss: 0.5171
Epoch [2/5], Step [1200/1388] Loss: 0.4998
Epoch [3/5], Step [300/1388] Loss: 0.5913
Epoch [3/5], Step [600/1388] Loss: 0.5742
Epoch [3/5], Step [900/1388] Loss: 0.5561
Epoch [3/5], Step [1200/1388] Loss: 0.4882
Epoch [4/5], Step [300/1388] Loss: 0.4916
Epoch [4/5], Step [600/1388] Loss: 0.5015
Epoch [4/5], Step [900/1388] Loss: 0.5310
Epoch [4/5], Step [1200/1388] Loss: 0.5598
Epoch [5/5], Step [300/1388] Loss: 0.4284
Epoch [5/5], Step [600/1388] Loss: 0.6740
Epoch [5/5], Step [900/1388] Loss: 0.4679
Epoch [5/5], Step [1200/1388] Loss: 0.6481


In [9]:
# Test the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for X, y in test_loader:
        X = X.to(device)
        y = y.to(device).bool()
        output = model(X)
        y_pred = output > 0.5
        total += y.shape[0]
        correct += (y_pred == y).sum().item()

    print('Accuracy of the model on the test images: {:.2f} %'.format(
        100 * correct / total))

Accuracy of the model on the test images: 69.72 %
