In [5]:
# Let's start with necessary imports
import os
import numpy as np

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

from matplotlib import pyplot as plt
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, auc

In [2]:
!wget -O public_data.zip https://www.codabench.org/datasets/download/e703ab84-4444-4972-9ef7-1ebd0fc09c88/

--2025-01-12 17:57:59--  https://www.codabench.org/datasets/download/e703ab84-4444-4972-9ef7-1ebd0fc09c88/
Resolving www.codabench.org (www.codabench.org)... 129.175.8.21
Connecting to www.codabench.org (www.codabench.org)|129.175.8.21|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://miniodis-rproxy.lisn.upsaclay.fr/coda-v2-prod-private/dataset/2024-04-10-1712755618/1df8aa91cfc4/Datasets.zip?AWSAccessKeyId=EASNOMJFX9QFW4QIY4SL&Signature=HNsGQ%2FBqixg02M6lIrYLKMHEP%2BQ%3D&Expires=1737136379 [following]
--2025-01-12 17:58:00--  https://miniodis-rproxy.lisn.upsaclay.fr/coda-v2-prod-private/dataset/2024-04-10-1712755618/1df8aa91cfc4/Datasets.zip?AWSAccessKeyId=EASNOMJFX9QFW4QIY4SL&Signature=HNsGQ%2FBqixg02M6lIrYLKMHEP%2BQ%3D&Expires=1737136379
Resolving miniodis-rproxy.lisn.upsaclay.fr (miniodis-rproxy.lisn.upsaclay.fr)... 129.175.8.29
Connecting to miniodis-rproxy.lisn.upsaclay.fr (miniodis-rproxy.lisn.upsaclay.fr)|129.175.8.29|:443... connected.
HTTP

In [4]:
!!unzip public_data.zip -d files

['Archive:  public_data.zip',
 'replace files/sglf_for_challenge.npy? [y]es, [n]o, [A]ll, [N]one, [r]ename:  NULL',
 '(EOF or read error, treating as "[N]one" ...)']

Split the data into train and test

In [11]:
# load data and normalize it
background = np.load('files/background.npz')['data']
stds = np.std(background, axis=-1)[:, :, np.newaxis]
background = background/stds
background = np.swapaxes(background, 1, 2)

bbh = np.load('files/bbh_for_challenge.npy')
stds = np.std(bbh, axis=-1)[:, :, np.newaxis]
bbh = bbh/stds
bbh = np.swapaxes(bbh, 1, 2)

sglf = np.load('files/sglf_for_challenge.npy')
stds = np.std(sglf, axis=-1)[:, :, np.newaxis]
sglf = sglf/stds
sglf = np.swapaxes(sglf, 1, 2)

# Create train and test datasets
x_train, x_test, y_train, y_test = train_test_split(
     background, background, test_size=0.2, random_state=42)

print(f'x train/test shapes: {x_train.shape} {x_test.shape}')
print(f'y train/test shapes: {y_train.shape} {y_test.shape}')

x train/test shapes: (80000, 200, 2) (20000, 200, 2)
y train/test shapes: (80000, 200, 2) (20000, 200, 2)


Create the VAE model 

In [42]:
class Sampling(layers.Layer):
    """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""
    def call(self, inputs):
        mean, log_var = inputs
        batch = tf.shape(mean)[0]
        dim = tf.shape(mean)[1]
        epsilon = tf.random.normal(shape=(batch, dim))
        return mean + tf.exp(0.5 * log_var) * epsilon


class MSELossLayer(layers.Layer):
    """Custom layer to calculate MSE loss."""
    def call(self, inputs):
        y_true, y_pred = inputs
        loss = tf.keras.losses.mse(y_true, y_pred)
        # Change the axis to (1,) to sum over the sequence dimension
        return tf.reduce_sum(loss, axis=(1,))  # Sum over sequence


import tensorflow as tf
from tensorflow.keras import layers, models

class VAE(tf.keras.Model):
    def __init__(self):
        super().__init__()

    def transformer_encoder(self, inputs, head_size, num_heads, ff_dim, dropout=0):
        x = layers.MultiHeadAttention(
            key_dim=head_size, num_heads=num_heads, dropout=dropout
        )(inputs, inputs)
        x = layers.Dropout(dropout)(x)
        x = layers.LayerNormalization(epsilon=1e-6)(x)
        res = x + inputs

        x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(res)
        x = layers.Dropout(dropout)(x)
        x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
        x = layers.LayerNormalization(epsilon=1e-6)(x)
        return x + res

    def dense_decoder(self, inputs, ff_dim, output_dim, dropout=0):
        # Flatten the input to apply dense layers
        x = layers.Flatten()(inputs)
        x = layers.Dense(ff_dim, activation="relu")(x)
        x = layers.Dropout(dropout)(x)
        x = layers.LayerNormalization(epsilon=1e-6)(x)
        res = layers.Dense(ff_dim)(x)  # Align dimensions for residual

        x = layers.Dense(ff_dim, activation="relu")(x)
        x = layers.Dropout(dropout)(x)
        x = layers.LayerNormalization(epsilon=1e-6)(x)
        x = x + res

        x = layers.Dense(ff_dim, activation="relu")(x)
        x = layers.Dropout(dropout)(x)
        x = layers.Dense(np.prod(inputs.shape[1:]))(x)  # Output dimension should match the flattened input dimension
        x = layers.LayerNormalization(epsilon=1e-6)(x)

        # Reshape back to original input shape
        x = layers.Reshape(inputs.shape[1:])(x)
        return x + inputs  # Adding input directly, assuming output_dim matches inputs shape[-1]

    def build_model(self, input_shape, latent_dim, head_size, num_heads, ff_dim, num_transformer_blocks, dropout=0.1):
        # Encoder
        inputs = layers.Input(shape=input_shape, name="encoder_input")
        x = inputs
        for _ in range(num_transformer_blocks):
            x = self.transformer_encoder(x, head_size, num_heads, ff_dim, dropout)
            print("Encoder layer shape is " + str(x.shape))

        # Flatten for latent space
        x = layers.Flatten()(x)  # Shape: (None, 200 * 2)

        # Latent space
        print("Latent space shape is " + str(x.shape))
        
        z_mean = layers.Dense(latent_dim, name="z_mean")(x)
        print("z_mean: " + str(z_mean.shape))
        z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)
        print("z_log_var: " + str(z_log_var.shape))

        #Sampling layer
        sampling_layer = Sampling()
        print("sampling layer: " + str(sampling_layer))
        z = sampling_layer([z_mean, z_log_var])
        
        # Decoder
        latent_inputs = layers.Input(shape=(latent_dim,), name="z_sampling")
        x = layers.Dense(np.prod(input_shape), activation="relu")(latent_inputs)
        x = layers.Reshape(input_shape)(x)
        for _ in range(num_transformer_blocks):
            x = self.dense_decoder(x, ff_dim, input_shape[-1], dropout)
            print("Decoder layer shape is " + str(x.shape))
        outputs = layers.Dense(input_shape[-1])(x)

        # Build models
        self.encoder = models.Model(inputs, [z_mean, z_log_var, z], name="encoder")
        self.decoder = models.Model(latent_inputs, outputs, name="decoder")
        reconstructed = self.decoder(z)

        self.vae = models.Model(inputs, reconstructed, name="vae")
        self.vae.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4))

      def train_step(self, data):
        with tf.GradientTape() as tape:
            # Pass data through encoder
            z_mean, z_log_var, z = self.encoder(data)

            # Reconstruct data from latent space
            reconstruction = self.decoder(z)

            # Calculate reconstruction loss (Mean Squared Error for continuous data)
            reconstruction_loss = tf.reduce_mean(
                tf.reduce_sum(
                    tf.keras.losses.mean_squared_error(data, reconstruction), axis=(1, 2)
                )
            )

            # Calculate KL divergence
            kl_loss = -0.5 * tf.reduce_mean(
                tf.reduce_sum(1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var), axis=-1)
            )

            # Total loss
            total_loss = reconstruction_loss + kl_loss

        # Apply gradients
        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))

        # Update custom metrics
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)

        # Return metrics
        return {
            "loss": self.total_loss_tracker.result(),
            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result(),
        }

    def test_step(self, data):
        # For evaluation, calculate losses without updating weights
        z_mean, z_log_var, z = self.encoder(data)
        reconstruction = self.decoder(z)
        reconstruction_loss = tf.reduce_mean(
            tf.reduce_sum(
                tf.keras.losses.mean_squared_error(data, reconstruction), axis=(1, 2)
            )
        )
        kl_loss = -0.5 * tf.reduce_mean(
            tf.reduce_sum(1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var), axis=-1)
        )
        total_loss = reconstruction_loss + kl_loss

        # Update metrics
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)

        return {
            "loss": self.total_loss_tracker.result(),
            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result(),
        }

    @property
    def metrics(self):
        # List all metrics for reset and display
        return [self.total_loss_tracker, self.reconstruction_loss_tracker, self.kl_loss_tracker]
    def fit(self, x_train, **kwargs):
        history = self.vae.fit(x_train, x_train, **kwargs)
        return history

    def save(self, path):
        self.encoder.save(path + "_encoder.keras")
        self.decoder.save(path + "_decoder.keras")
        self.vae.save(path + "_vae.keras")

    def load(self, path):
        self.encoder = models.load_model(path + "_encoder.keras", custom_objects={"Sampling": Sampling})
        self.decoder = models.load_model(path + "_decoder.keras")
        self.vae = models.load_model(path + "_vae.keras", custom_objects={"Sampling": Sampling})

    def predict(self, X, batch_size=32):
        return self.vae.predict(X, batch_size=batch_size)

vae = VAE()
vae = vae.build_model(
    input_shape=x_train.shape[1:],
    latent_dim=16,
    head_size=64,
    num_heads=8,
    ff_dim=128,
    num_transformer_blocks=4,
    dropout=0.1,
)

history = vae.fit(
    x_train,
    validation_split=0.2,
    epochs=50,
    batch_size=32,
    callbacks=[tf.keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)],
)


Encoder layer shape is (None, 200, 2)
Encoder layer shape is (None, 200, 2)
Encoder layer shape is (None, 200, 2)
Encoder layer shape is (None, 200, 2)
Latent space shape is (None, 400)
z_mean: (None, 16)
z_log_var: (None, 16)
sampling layer: <Sampling name=sampling_21, built=False>
Decoder layer shape is (None, 200, 2)
Decoder layer shape is (None, 200, 2)
Decoder layer shape is (None, 200, 2)
Decoder layer shape is (None, 200, 2)


AttributeError: 'NoneType' object has no attribute 'fit'

ATTEMPT 2