In [1]:
import tensorflow as tf

In [2]:
embedding_layer = tf.keras.layers.Embedding(1000, 5) # 1000 words, 5 dimensions for each word embedding vector. When we pass a word index to this layer, it will return the word embedding vector for that word. Why 5 dimensions? It is a hyperparameter that we can tune.

In [3]:
result = embedding_layer(tf.constant([1,2,3])) # We pass 3 words to the embedding layer. The words are represented by their indices. The layer will return the word embedding vectors for these words. every indices converted to 5 dimentional vector. If we use one hot encoding, it will be 1000 dimentional vector. It is not efficient.

In [4]:
result.numpy()

array([[-0.03873558,  0.03796942,  0.00286834, -0.04318116,  0.01481909],
       [ 0.04927064, -0.01525002,  0.03634815, -0.02884519, -0.02440195],
       [ 0.00588899, -0.04854077,  0.03761308, -0.0149603 , -0.01793743]],
      dtype=float32)

In [5]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Embedding(1000,64, input_length=10)) # 1000 words, 64 dimensions for each word embedding vector, 10 words in each input sequence. When we pass a sequence of word indices to this layer, it will return the word embedding vectors for the words in the sequence. 



In [6]:
import numpy as np
input_array = np.random.randint(1000, size=(32,10)) # 32 sequences, each containing 10 words. The words are represented by their indices.

In [7]:
model.compile("rmsprop","mse")

In [8]:
output_array = model.predict(input_array)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 207ms/step


In [9]:
output_array.shape # 32 sequences, each containing 10 words, each word represented by a 64-dimensional vector.

(32, 10, 64)

In [10]:
input_array[:1]

array([[119, 833, 241, 661, 995, 865, 108,   5, 749, 259]])

In [11]:
output_array[:1].shape

(1, 10, 64)

In [12]:
output_array[:1]

array([[[-0.04831066, -0.0126847 , -0.03327896, -0.03227427,
          0.03471934, -0.03602015, -0.01254258, -0.01194878,
         -0.01801217,  0.03084828, -0.0177116 , -0.01869587,
          0.04158319,  0.03757073,  0.00879034, -0.01722378,
          0.00451397, -0.01715682, -0.01215886, -0.00280943,
         -0.01040454,  0.01928646,  0.0206168 , -0.04317675,
          0.04424162, -0.02474617, -0.00964953, -0.03712086,
          0.01184503, -0.02392553, -0.01801158,  0.024403  ,
         -0.02756354,  0.00523787, -0.02167902,  0.02650633,
          0.01996474, -0.00807261,  0.0490081 , -0.03301023,
          0.01657598, -0.02023555,  0.03215567,  0.03158436,
          0.03526057,  0.01628515,  0.02472908,  0.01507478,
          0.03853222, -0.00595973, -0.034357  , -0.02493422,
          0.0039093 ,  0.02170738, -0.02159535,  0.00570116,
         -0.04396848,  0.00298246,  0.00050966,  0.04889074,
          0.0273147 , -0.02729944, -0.02567494, -0.02556026],
        [-0.04986435,  