In [1]:
import numpy as np

In [2]:
docs = ['recurrent neural network #@!',
		'neural network @',
		'artificial neural',
		'connections between nodes',
		'can create a cycle',
		'allowing output',
		'some nodes to affect subsequent',
		'exhibit temporal',
		'dynamic behavior',
		'type of Neural Network',
    'affect subsequent']

In [3]:
pip install tensorflow==2.13.1

Note: you may need to restart the kernel to use updated packages.


### Above tensorflow 2.2, keras is available as wrapper over tf

In [4]:
pip install keras==2.13.1

Note: you may need to restart the kernel to use updated packages.


In [5]:
import tensorflow as tf
print(tf.__version__)

ModuleNotFoundError: No module named 'tensorflow.python'

In [None]:
from keras.preprocessing.text import Tokenizer

### Performing tokenization

**tf.keras.layers.TextVectorization** is used now

Here any OOV token will be assigned \<UNK>

In [None]:
tokenizer = Tokenizer(oov_token="<UNK>")

In [None]:
tokenizer.fit_on_texts(docs)

In [None]:
tokenizer.word_counts

### 0 isn't used for index because it is used for padding

In [None]:
# Checking index for each word
tokenizer.word_index

In [None]:
# Counting no. of sentences
tokenizer.document_count

In [None]:
# Converting each word to a token id
# Each word is replaced by its index in the generated vocubalary as its token id
sequences = tokenizer.texts_to_sequences(docs)
sequences

### Perform padding

In [None]:
# Makes every sentence length equal to maxlen
from keras.utils import pad_sequences

n_sequences = pad_sequences(sequences=sequences, maxlen=10, padding='post')
n_sequences

### Sentiment Analysis using RNN

In [None]:
from keras.datasets import imdb

data = imdb.load_data()

In [None]:
print(np.array(data).shape, type(data), len(data))

In [None]:
print(f"Total number of movies = {len(data[0][0])}")

### Loading in splitted format

In [None]:
(X_train, y_train), (X_test, y_test) = imdb.load_data()

In [None]:
print(type(X_train), type(y_train))

X_train and X_test are 2D array having no. of rows=25000 and no. of columns vary. Since the reviews are of different lengths, the dataset is loaded as a 1D array of lists, not as a 2D matrix.

In [None]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

In [None]:
# Max number of words in a sentence
num_words = 0
for i in range(len(X_train)):
    num_words = max(num_words, len(X_train[i]))

num_words

### Perform padding

In [None]:
from keras.utils import pad_sequences

X_train = pad_sequences(sequences=X_train, maxlen=num_words, padding='post')
X_test = pad_sequences(sequences=X_test, maxlen=num_words, padding='post')

### Build model
- for Seq 2 Seq data, we use **Sequential**
-----
##### Visualizing RNN model
- **return_sequences** return whether RNN should return full sequence of output for each timesteps(No. of words in a sentence) or only the output from last timestep

- If **True**, return shape = (None, #Timesteps, #units)
- If **False**, returnn shape = (None, #units)

where **None** refers to the batch size

---------
### Parameters of a RNN
- Weights for the input to the hidden state: These are learned weights that map the input data to the hidden state.
- Weights for the recurrent connections: These are the weights that connect the hidden state at time t to the hidden state at time t+1.
- Biases: Each unit has a bias term.
- The formula to calculate the total number of parameters in an RNN is: **RNN Parameters =(units×input size)+(units×units)+units**

Here:

units = 32
input size = 1 (since each time step has only 1 feature)

Substituting these values:

RNN Parameters=(32×1)+(32×32)+32 = 1088

In [None]:
# units refer to the number of neurons 1 one layer
from keras import Sequential
from keras.layers import Dense, SimpleRNN, Embedding, Flatten

# X_train.shape => ((25000, 2494))

model = Sequential([
SimpleRNN(units=32, input_shape=(X_train.shape[1], 1), return_sequences=False),
Dense(units=1, activation='sigmoid')
])

model.summary()

In [None]:
# model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# model.fit(X_train, y_train, epochs=5, validation_data=(X_test, y_test))

In [None]:
# model.save('model.h5')
# model.save('model.pkl')

In [None]:
print(model.weights)
model.load_weights('model.h5')
print(model.weights)

In [None]:
predictions = model.predict(X_test)
predictions.shape