In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.datasets import make_classification

In [2]:
# create a dataset
X, y = make_classification(
    n_samples=10000,
    n_features=500,
    n_classes=2,
    n_informative=10,
    random_state=42
)

In [3]:
# create train and test splits
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    stratify=y,
    random_state=42
)

In [4]:
# scale the data
scaler = MinMaxScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [5]:
class AutoEncoder(tf.keras.Model):
  def __init__(self, n_features, latent_dim):
    super().__init__()
    self.n_features = n_features
    self.latent_dim = latent_dim
    self.encoder = tf.keras.Sequential([
        tf.keras.layers.Dense(4096, activation='relu'),
        tf.keras.layers.Dense(2048, activation='relu'),
        tf.keras.layers.Dense(1024, activation='relu'),
        tf.keras.layers.Dense(512, activation='relu'),
        tf.keras.layers.Dense(latent_dim, activation='relu')
    ])
    self.decoder = tf.keras.Sequential([
        tf.keras.layers.Dense(512, activation='relu'),
        tf.keras.layers.Dense(1024, activation='relu'),
        tf.keras.layers.Dense(2048, activation='relu'),
        tf.keras.layers.Dense(4096, activation='relu'),
        tf.keras.layers.Dense(n_features, activation='sigmoid')
    ])

  def call(self, inputs):
    encoded = self.encoder(inputs)
    return self.decoder(encoded)

  def reduce_dim_with_encoder(self, inputs):
    return self.encoder(inputs)

In [6]:
# get number of features and define size of latent space
n_features = X.shape[1]
latent_space_dim = 10

In [7]:
model = AutoEncoder(n_features, latent_space_dim)

model.compile(optimizer='adam', loss='mae', metrics=['mae'])

model.fit(
    x=X_train,
    y=X_train,
    batch_size=256,
    epochs=50,
    validation_data=(X_test, X_test)
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x7d1b1050c4c0>

In [8]:
X_train_new = model.reduce_dim_with_encoder(X_train)
X_test_new = model.reduce_dim_with_encoder(X_test)

In [9]:
print(f'Shape of train data before: {X_train.shape}')
print(f'Shape of train data after: {X_train_new.shape}')

print(f'Shape of test data before: {X_test.shape}')
print(f'Shape of test data after: {X_test_new.shape}')

Shape of train data before: (8000, 500)
Shape of train data after: (8000, 10)
Shape of test data before: (2000, 500)
Shape of test data after: (2000, 10)
