In [None]:
# LEARNING WORD EMBEDDINGS WITH THE EMBEDDING LAYER
# The simplest way to associate a dense vector with a word is to choose the vector at random. The problem with this approach is 
# that the resulting embedding space has no structure: for instance, the words accurate and exact may end up with completely
# different embeddings, even though they’re interchangeable in most sentences. It’s difficult for a deep neural network to make
# sense of such a noisy, unstructured embedding space



In [None]:
# Listing 6.5 Instantiating an Embedding layer
# The Embedding layer is best understood as a dictionary that maps integer indices (which stand for specific words) to dense
# vectors. It takes integers as input, it looks up these integers in an internal dictionary, and it returns the associated
# vectors. It’s effectively a dictionary lookup.

# Word index >>>>> Embedding layer  >>>>>> Corresponding word vector

# The Embedding layer takes at least two  arguments: the number of possible tokens  (here, 1,000: 1 + maximum word index) 
# and the dimensionality of the embeddings (here, 64).


In [10]:
# Listing 6.6 Loading the IMDB data for use with an Embedding layer
from tensorflow.keras.datasets import imdb
from tensorflow.keras import preprocessing
from tensorflow.keras.layers import Embedding

#from keras.utils.data_utils import pad_sequences

max_features = 10000  # Number of words to consider as features

maxlen = 20  # Cuts off the text after this number of words (among  the max_features most  common words)

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features) # Loads the data as lists of integer


x_train = preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen)
# Turns the lists of integers into a 2D integer tensor of shape (samples, maxlen)
                                               
x_test = preprocessing.sequence.pad_sequences(x_test, maxlen=maxlen)

In [11]:
# Listing 6.7 Using an Embedding layer and classifier on the IMDB data
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense

model = Sequential()

model.add(Embedding(10000, 8, input_length=maxlen))
# Specifies the maximum input length to the Embedding layer so you can later flatten the embedded inputs. After the Embedding 
# layer,  the activations have shape (samples, maxlen, 8)

model.add(Flatten()) # Flattens the 3D tensor of embeddings into a 2D  tensor of shape (samples,  maxlen * 8)

model.add(Dense(1, activation='sigmoid')) # Adds the classifier on top

model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])

model.summary()
history = model.fit(x_train, y_train,
epochs=10,
batch_size=32,
validation_split=0.2)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (None, 20, 8)             80000     
                                                                 
 flatten (Flatten)           (None, 160)               0         
                                                                 
 dense (Dense)               (None, 1)                 161       
                                                                 
Total params: 80,161
Trainable params: 80,161
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
