## **Working of word embedding**

In [1]:
import numpy as np
import pandas as pd 
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Dense, Embedding ,Flatten



reviews = { 
    'review': ['This movie was great', 'This movie was terrible', 'I love this movie', 'I hate this movie' , 'will go ahead' , 'need imporvement'],
    'sentiment': [1, 0, 1, 0, 1, 0]
}

vocab_size = 30 
encoded_reviews = []
for i in reviews['review']:
    encoded_reviews.append(one_hot(i, vocab_size)) 

encoded_reviews

[[7, 14, 12, 18],
 [7, 14, 12, 24],
 [20, 3, 7, 14],
 [20, 27, 7, 14],
 [29, 22, 25],
 [22, 1]]

In [2]:
max_length = 4 
padded_reviews = pad_sequences(encoded_reviews, maxlen=max_length, padding='post')
padded_reviews = np.array(padded_reviews)
padded_reviews

array([[ 7, 14, 12, 18],
       [ 7, 14, 12, 24],
       [20,  3,  7, 14],
       [20, 27,  7, 14],
       [29, 22, 25,  0],
       [22,  1,  0,  0]])

In [3]:
embedding_vector_length = 8

model = Sequential()
    
model.add(Embedding(vocab_size, embedding_vector_length, input_length=max_length , name = "embedding"))
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))

x = padded_reviews 
y = np.array(reviews['sentiment']) 
        



In [4]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) 
model.fit(x, y, epochs=50, verbose=0)
loss, accuracy = model.evaluate(x, y)
model.summary()

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 423ms/step - accuracy: 1.0000 - loss: 0.6118


In [5]:
embd = model.get_layer('embedding').get_weights()[0]
len(embd)

30

In [None]:
embd.shape


(30, 8)

In [7]:
embd

array([[-3.38120125e-02,  3.05222627e-02,  5.47654070e-02,
        -1.90903470e-02,  3.01795769e-02,  8.65986720e-02,
         5.96349351e-02,  6.02794774e-02],
       [-8.46794397e-02,  7.16528669e-03,  9.26483888e-03,
         8.07155296e-02,  2.76326872e-02,  7.03332052e-02,
        -2.95971474e-03, -4.45726924e-02],
       [ 4.56497185e-02, -2.10633762e-02,  4.19584997e-02,
        -3.66406664e-02,  1.82031654e-02,  4.23274375e-02,
        -1.61368959e-02,  4.86282259e-03],
       [ 7.38838455e-03, -6.33115917e-02, -8.86425450e-02,
        -4.28287759e-02, -6.18432164e-02, -9.48656127e-02,
         4.25875820e-02,  2.98895240e-02],
       [-2.51669809e-03,  2.62612104e-03,  9.40479338e-04,
        -9.43630934e-03,  3.69300582e-02, -6.89815730e-04,
        -7.00893253e-03, -2.90542729e-02],
       [ 4.52038310e-02,  2.83527113e-02,  3.09013836e-02,
        -3.25490162e-03,  4.65427712e-03,  2.87307836e-02,
         4.75421548e-05, -1.31180137e-03],
       [-3.00268773e-02, -4.034240