In [None]:
import tensorflow as tf
from keras.losses import CategoricalCrossentropy
from keras.layers import Input, LSTM, RepeatVector, TextVectorization, Embedding  
from keras.layers import Dense, Lambda # Wraps arbitrary expressions as a Layer object.--> can turn an arbitrary function into a layer 
from keras.models import Model
import keras.backend as K

from tensorflow import keras
import os, pathlib, shutil, random
import re 
import string


## Defining Model and Loss 

In [None]:
def crossKL_loss_fn(y_true, y_pred, z_log_sigma, z_mean): 
  
  cce_loss = CategoricalCrossentropy()
  xent = cce_loss(y_true, y_pred)
  kl_loss = 0.5 * K.mean(1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma))

  return xent + kl_loss


In [None]:
class Sampler(tf.keras.layers.Layer):
  def __init__(self, units=32):
        super().__init__()

  def call(self, inputs, trainable= False):
    z_mean, z_log_var = inputs 
    batch_size = tf.shape(z_mean)[0]
    latent_dim = tf.shape(z_mean)[1]
    epsilon = K.random_normal(shape=(batch_size, latent_dim), mean = 0, stddev=1.0)
    
    return z_mean + z_log_var * epsilon 


class LSTM_VAE(tf.keras.Model):
  def __init__(self, encoder, decoder, **kwargs):
    super(LSTM_VAE, self).__init__(**kwargs)
    self.encoder = encoder
    self.decoder = decoder 
    self.sampler = Sampler() 
    self.embed = Embedding(input_dim = 20000, output_dim=600)
    self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
    self.reconstruction_loss_tracker = keras.metrics.Mean(
    name="reconstruction_loss")
    self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")

  @property
  def metrics(self):
    return [self.total_loss_tracker,
            self.reconstruction_loss_tracker,
            self.kl_loss_tracker]

  def train_step(self, data):
    with tf.GradientTape() as tape: 
      feats, targets = data
      #print('feats shape: ', feats.shape)
      embedded_inputs = self.embed(feats)
      #print('embedded shape: ', embedded_inputs.shape)
      z_mean, z_log_var = self.encoder(embedded_inputs)
      #print('z_mean shape', z_mean.shape)
      #print('z log var shape: ', z_log_var.shape)
      z = self.sampler([z_mean, z_log_var])
      #print('z shape after sampler', z.shape)
      z = RepeatVector(3)(z)
      #print('z shape after repeat vector: ', z.shape)
      #z = self.latent2hidden(z)
      reconstruction = self.decoder(z)
      #print('reconstruction: ', reconstruction.shape)
      reconstruction = tf.reshape(reconstruction, (32, -1))
      #print('reconstruction: ', reconstruction.shape)
      reconstruction_loss = tf.reduce_mean(
          tf.reduce_sum(
              keras.losses.categorical_crossentropy(feats, reconstruction),
              #axis =(1,2)
          )
      )

      kl_loss = 0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
      total_loss = reconstruction_loss + tf.reduce_mean(kl_loss)

    grads = tape.gradient(total_loss, self.trainable_weights)
    self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
    self.total_loss_tracker.update_state(total_loss)
    self.reconstruction_loss_tracker.update_state(reconstruction_loss)
    self.kl_loss_tracker.update_state(kl_loss)

    return {
        "total_loss": self.total_loss_tracker.result(),
        "reconstruction_loss": self.reconstruction_loss_tracker.result(),
        "kl_loss": self.kl_loss_tracker.result(),
    }

  # def call(self, inputs, training=None):

  #   ### vae 
  #   print('building encoder')
  #   embedded_inputs = self.embed(inputs)
  #   z_mean, z_log_var = self.encoder(embedded_inputs)
  #   z = self.sampler(z_mean, z_log_var)
  #   #z = self.latent2hidden(z)
  #   print('building decoder')
  #   self.outputs = self.decoder(z)

  #   return self.outputs 

  

## Preparing Data

In [None]:
!curl -O https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 80.2M  100 80.2M    0     0  9319k      0  0:00:08  0:00:08 --:--:-- 16.6M


In [None]:
!tar -xf aclImdb_v1.tar.gz

In [None]:
!mv aclImdb/train/unsup aclImdb/

In [None]:
base_dir = pathlib.Path('aclImdb')
val_dir = base_dir / 'val'
train_dir = base_dir / 'train'

for category in ('neg', 'pos'): 
  os.makedirs(val_dir / category)
  files = os.listdir(train_dir / category)
  random.Random(1337).shuffle(files)
  num_val_samples = int(0.2 * len(files))
  val_files = files[-num_val_samples:]
  for fname in val_files: 
    shutil.move(train_dir / category / fname, val_dir / category / fname)

FileExistsError: ignored

In [None]:
batch_size = 32
train_ds = keras.utils.text_dataset_from_directory(
    "aclImdb/train", batch_size=batch_size
    )
val_ds = keras.utils.text_dataset_from_directory(
    "aclImdb/val", batch_size=batch_size
    )
test_ds = keras.utils.text_dataset_from_directory(
    "aclImdb/test", batch_size=batch_size
    )

Found 25000 files belonging to 2 classes.
Found 5000 files belonging to 2 classes.
Found 25000 files belonging to 2 classes.


In [None]:
# limit the vocabulary and encode output tokens as multi-hot binary vectors 
max_length = 600
max_tokens = 20000
text_vectorization = TextVectorization(
    max_tokens=max_tokens,
    output_mode='int',
    output_sequence_length=max_length,
    )

# yields only raw text inputs (e.g., without labels)
text_only_train_ds = train_ds.map(lambda x, y: x)
# use above defined dataset to index the dataset vocabulary using the adapt method 
text_vectorization.adapt(text_only_train_ds)


In [None]:
## create unigram bag of words from tokenized data 
int_train_ds = train_ds.map(
    lambda x, y: (text_vectorization(x), y),
    num_parallel_calls=4)
int_val_ds = val_ds.map(
    lambda x, y: (text_vectorization(x), y),
    num_parallel_calls=4)
int_test_ds = test_ds.map(
    lambda x, y: (text_vectorization(x), y),
    num_parallel_calls=4)

## Model Training 

In [None]:
input_dim=600
intermediate_dim=32
latent_dim=8
vocab_size=20000
timesteps = 3

In [None]:
enc_inputs = Input(shape=(None, input_dim,))
x = LSTM(units = intermediate_dim)(enc_inputs)
z_mean = Dense(units=latent_dim, name = 'z_mean')(x)
z_log_var = Dense(units=latent_dim, name = 'z_log_var')(x)
encoder = keras.Model(enc_inputs, [z_mean, z_log_var], name = 'encoder')

encoder.summary()

Model: "encoder"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, None, 600)]  0           []                               
                                                                                                  
 lstm (LSTM)                    (None, 32)           81024       ['input_1[0][0]']                
                                                                                                  
 z_mean (Dense)                 (None, 8)            264         ['lstm[0][0]']                   
                                                                                                  
 z_log_var (Dense)              (None, 8)            264         ['lstm[0][0]']                   
                                                                                            

In [None]:
latent_inputs = Sampler(units=(latent_dim,))([z_mean, z_log_var])

In [None]:
latent_inputs = Input(shape=(None,latent_dim))
#latent_inputs = RepeatVector(timesteps)(latent_inputs)
z = Dense(units=intermediate_dim, activation = 'linear')(latent_inputs)
z, _, _ = LSTM(intermediate_dim, return_sequences=True, return_state=True)(z)
decoder_outputs = Dense(200, activation='softmax')(z) 
decoder = keras.Model(latent_inputs, decoder_outputs, name = 'decoder')

decoder.summary()

Model: "decoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, None, 8)]         0         
                                                                 
 dense (Dense)               (None, None, 32)          288       
                                                                 
 lstm_1 (LSTM)               [(None, None, 32),        8320      
                              (None, 32),                        
                              (None, 32)]                        
                                                                 
 dense_1 (Dense)             (None, None, 200)         6600      
                                                                 
Total params: 15,208
Trainable params: 15,208
Non-trainable params: 0
_________________________________________________________________


In [None]:
## Instantiate Model
vae = LSTM_VAE(encoder, decoder)

vae.compile(optimizer="rmsprop",
            run_eagerly = True
            )

#vae.build((600,))
#vae.summary()

vae.fit(int_train_ds, epochs=30)

Epoch 1/30






KeyboardInterrupt: ignored