In [9]:
# A dataset
# Define the input sequences and their labels
# Generate all permutations (Number of ways to label 4 vertices of a square 1-4)
X = list(itertools.permutations([1, 2, 3, 4]))
# Generate labels to classify each labeling as a specific type
labels = random.choices([0, 1, 2, 3], k=24)
# Create a train/test set
X = np.array(X)
y = np.array(labels)
X_train = X[:int(0.8*len(X))]
y_train = y[:int(0.8*len(y))]
X_test = X[int(0.8*len(X)):]
y_test = y[int(0.8*len(y)):]

# Understanding RNN Variants

1. Simple RNN
2. LSTM
3. GRU
4. Comparison of variants

In [15]:


import numpy as np
import random
import itertools
import tensorflow as tf

from tensorflow.keras.models import Sequential

from tensorflow.keras.layers import Embedding, SimpleRNN, Dense

## Simple RNN
1. Basic variant of RNNs, also known as the vanilla RNN.
2. It processes sequential data by maintaining a hidden state that is updated at each time step.
3. SimpleRNN suffers from the vanishing gradient problem.


In [16]:
# Define the SimpleRNN model

model1 = Sequential()

model1.add(Embedding(5, 8, input_length=4))  # Embedding layer to represent input sequences

model1.add(SimpleRNN(16))  # SimpleRNN layer to capture sequential information

model1.add(Dense(4, activation='softmax'))  # Output layer with softmax activation for classification

## LSTM (Long Short-Term Memory):
1. Advanced RNN variant designed to overcome the vanishing gradient problem.
2. Introduces memory cells and gating mechanisms to retain or forget information over time.
3. Effective in capturing long-term dependencies in sequential data.


In [17]:
model2 = Sequential()

model2.add(Embedding(5, 8, input_length=4))  # Embedding layer to represent input sequences

model2.add(tf.keras.layers.LSTM(16))  # SimpleRNN layer to capture sequential information

model2.add(Dense(4, activation='softmax'))  # Output layer with softmax activation for classification

## GRU (Gated Recurrent Unit):
1. Another RNN variant that addresses the vanishing gradient problem.
2. Combines memory cell and gating mechanisms but with a simplified architecture compared to LSTM.
3. GRU has fewer parameters and is computationally more efficient than LSTM.

In [18]:


# Define the GRU model

model3 = Sequential()

model3.add(Embedding(5, 8, input_length=4))  # Embedding layer to represent input sequences

model3.add(tf.keras.layers.GRU(16))  # SimpleRNN layer to capture sequential information

model3.add(Dense(4, activation='softmax')) 

## Comparison of variants

In [10]:

# Compile the models

model1.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model2.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model3.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Train the models
history1 = model1.fit(X, y, epochs=10, validation_split=0.2)
history2 = model2.fit(X, y, epochs=10, validation_split=0.2)
history3 = model3.fit(X, y, epochs=10, validation_split=0.2)



In [12]:
# Get accuracy of each model
train_accuracy1 = history1.history['accuracy']
train_accuracy2 = history2.history['accuracy']
train_accuracy3 = history3.history['accuracy']
best1 = train_accuracy1[np.argmax(train_accuracy1)]
best2 = train_accuracy2[np.argmax(train_accuracy2)]
best3 = train_accuracy3[np.argmax(train_accuracy3)]

In [19]:

# Compare RNN variants
print(f'Vanilla Rnn: {best1},LSTM: {best2},GRU: {best3}')

Vanilla Rnn: 0.42105263471603394,LSTM: 0.31578946113586426,GRU: 0.31578946113586426


In [None]:
# Accessing training scores
# train_loss1 = history1.history['loss']
# train_loss2 = history2.history['loss']
# train_loss3 = history3.history['loss']