In [3]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import sys
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from keras.utils import np_utils
from sklearn.model_selection import train_test_split

sys.path.append('../')

In [6]:
from src.layers import (
    NumericalFeatureEmbedding,
    CategoricalFeatureEmbedding, 
    FeatureEmbedding, 
    TransformerEncoder, 
    MLP
)

### Numerical Features Embeddings

In [29]:

num_features = 2
dim_token = 6

x_num = np.random.random(size=(100, 2))

d_sqrt_inv = 1 / np.sqrt(dim_token)

# Learnable weights
w = np.random.uniform(low=-d_sqrt_inv, high=d_sqrt_inv, size=(num_features,dim_token))
b = np.random.uniform(low=-d_sqrt_inv, high=d_sqrt_inv, size=(num_features,dim_token))

output = w[np.newaxis] *x_num[...,np.newaxis] + b

print("weights shape: ",w[np.newaxis].shape)
print("x shape: ",x_num[...,np.newaxis].shape)
print("output shape: ",(w[np.newaxis] *x_num[...,np.newaxis]).shape)

weights shape:  (1, 2, 6)
x shape:  (100, 2, 1)
output shape:  (100, 2, 6)


In [36]:
from sklearn.datasets import make_classification

num_features = 10
dim_token = 32

X, y = make_classification(
    n_samples=1000, n_features=num_features, 
    n_informative=num_features, n_redundant=0, 
    n_repeated=0, n_classes=2, 
    n_clusters_per_class=2,
    shuffle=True, random_state=123
)

ne = NumericalFeatureEmbedding(num_features=num_features,dim_token=dim_token)

ne(X).shape

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2)


TensorShape([1000, 10, 32])

In [72]:
# Basic MLP classification without NumericalFeatureEmbedding 
inputs = keras.layers.Input(shape=X.shape[1])
x = keras.layers.Dense(32, activation='relu')(inputs)
x = keras.layers.Dropout(0.1)(x)
x = keras.layers.Dense(16, activation='relu')(x)
outputs = keras.layers.Dense(1, activation='sigmoid')(x)

base_model = keras.Model(inputs=inputs, outputs=outputs)

base_model.compile(loss='binary_crossentropy',metrics=['accuracy'], optimizer='adam')

base_history = base_model.fit(x=X_train, y=y_train, validation_data=(X_val, y_val), epochs=20)

base_result = base_model.evaluate(X_val, y_val)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [91]:
# Basic MLP classification with NumericalFeatureEmbedding 
inputs = keras.layers.Input(shape=X.shape[1])
x = NumericalFeatureEmbedding(num_features=num_features,dim_token=dim_token)(inputs)
# x = keras.layers.Lambda(lambda x: tf.reduce_sum(x, axis=-2))(x)
x = keras.layers.Flatten(name="flatten_embeddings")(x)
x = keras.layers.Dense(32, activation='relu')(x)
x = keras.layers.Dropout(0.1)(x)
x = keras.layers.Dense(16, activation='relu')(x)
outputs = keras.layers.Dense(1, activation='sigmoid')(x)

linear_model = keras.Model(inputs=inputs, outputs=outputs)

linear_model.compile(loss='binary_crossentropy',metrics=['accuracy'], optimizer='adam')

linear_history=linear_model.fit(x=X_train, y=y_train, validation_data=(X_val, y_val), epochs=20)

linear_result=linear_model.evaluate(X_val, y_val)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [92]:
inputs = keras.layers.Input(shape=X.shape[1])
x = NumericalFeatureEmbedding(num_features=num_features,dim_token=dim_token)(inputs)
x = TransformerEncoder(embed_dim=dim_token,num_heads=2,ff_dim=8)(x)
x = keras.layers.Flatten(name="flatten_embeddings")(x)
x = keras.layers.Dense(32, activation='relu')(x)
x = keras.layers.Dropout(0.1)(x)
x = keras.layers.Dense(16, activation='relu')(x)
outputs = keras.layers.Dense(1, activation='sigmoid')(x)

transformer_model = keras.Model(inputs=inputs, outputs=outputs)

transformer_model.compile(loss='binary_crossentropy',metrics=['accuracy'], optimizer='adam')

transformer_history=transformer_model.fit(x=X_train, y=y_train, validation_data=(X_val, y_val), epochs=20)

transformer_result=transformer_model.evaluate(X_val, y_val)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [97]:
print(f"Base results: epochs={len(base_history.history['loss'])} - loss={base_result[0]:.2f} - acc={base_result[1]:.2f}")
print(f"Linear Model results: epochs={len(linear_history.history['loss'])} - loss={linear_result[0]:.2f} - acc={linear_result[1]:.2f}")
print(f"Transformer Model results: epochs={len(transformer_history.history['loss'])} - loss={transformer_result[0]:.2f} - acc={transformer_result[1]:.2f}")


Base results: epochs=20 - loss=0.32 - acc=0.86
Linear Model results: epochs=20 - loss=0.26 - acc=0.90
Transformer Model results: epochs=20 - loss=0.24 - acc=0.92


### Categorical Features Embeddings

In [45]:
cardinalities = [5, 10, 20, 10,5]
num_features = len(cardinalities)
dim_token = 6

x_cat = np.concatenate([np.random.randint(low=0,high=c,size=(100,1)) for c in cardinalities], axis=1)

offsets = np.cumsum([0] + cardinalities[:-1], axis=0)

total_tokens = sum(cardinalities)

# # Learnable weights
d_sqrt_inv = 1 / np.sqrt(dim_token)

emb = keras.layers.Embedding(input_dim=total_tokens, output_dim=dim_token)
b = np.random.uniform(low=-d_sqrt_inv, high=d_sqrt_inv, size=(num_features,dim_token))
output = emb(x_cat + offsets) + b

print("input shape: ",x_num.shape)
print("output shape: ",output.shape)

input shape:  (100, 2)
output shape:  (100, 5, 6)


In [139]:

cardinalities = [3, 5, 10, 15, 20, 25, 30, 35]
num_features = len(cardinalities)
dim_token = 32

X, y = make_classification(
    n_samples=1000, n_features=num_features, 
    n_informative=num_features, n_redundant=0, 
    n_repeated=0, n_classes=2, 
    n_clusters_per_class=2,
    shuffle=True, random_state=123
)

# convert continuous to cat with quantils
X_cat = np.concatenate([
    np.digitize(X[:,i], bins=np.quantile(X[:,i], np.linspace(0,1,c+1))[1:-1])[...,np.newaxis]
    for i, c in enumerate(cardinalities)
], axis=1)

X_train, X_val, y_train, y_val = train_test_split(X_cat, y, test_size=0.2)

ne = CategoricalFeatureEmbedding(cardinalities=cardinalities,dim_token=dim_token)

ne(X_cat).shape


TensorShape([1000, 8, 32])

In [142]:
# Basic MLP classification with CategoricalFeatureEmbedding 

inputs = keras.layers.Input(shape=X_train.shape[1], dtype='int64')

x = CategoricalFeatureEmbedding(cardinalities=cardinalities, dim_token=dim_token)(inputs)
# x = keras.layers.Lambda(lambda x: tf.reduce_sum(x, axis=-2))(x)
x = keras.layers.Flatten(name="flatten_embeddings")(x)
x = keras.layers.Dense(32, activation='relu')(x)
x = keras.layers.Dropout(0.1)(x)
x = keras.layers.Dense(16, activation='relu')(x)
outputs = keras.layers.Dense(1, activation='sigmoid')(x)

linear_model = keras.Model(inputs=inputs, outputs=outputs)

linear_model.compile(loss='binary_crossentropy',metrics=['accuracy'], optimizer='adam')

base_history = linear_model.fit(x=X_train, y=y_train, validation_data=(X_val, y_val), epochs=20)

base_result = linear_model.evaluate(X_val, y_val)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
