# Implementing Word Embedding Using Keras- NLP | Deep Learning


- Word embeddings provide a dense representation of words and their relative meanings.They are an improvement over sparse representations used in simpler bag of word model representations.Word embeddings can be learned from text data and reused among projects. They can also be learned as part of fitting a neural network on text data.


In [2]:
# https://www.youtube.com/watch?v=TsXR7_vtusQ&list=PLZoTAELRMXVPGU70ZGsckrMdr0FteeRUi&index=38
    
# https://github.com/krishnaik06/Word-Embedding/blob/master/Untitled2.ipynb
    
# Ref:
# https://machinelearningmastery.com/use-word-embedding-layers-deep-learning-keras/

In [3]:
# Word Embedding Techniques using Embedding Layer in Keras

In [None]:
### Libraries USed Tensorflow> 2.0  and keras


In [4]:
##tensorflow >2.0
from tensorflow.keras.preprocessing.text import one_hot

In [5]:
### sentences
sent=[  'the glass of milk',
     'the glass of juice',
     'the cup of tea',
    'I am a good boy',
     'I am a good developer',
     'understand the meaning of words',
     'your videos are good',]

In [6]:
sent

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good boy',
 'I am a good developer',
 'understand the meaning of words',
 'your videos are good']

In [7]:
### Vocabulary size
voc_size=10000

### One Hot Representation

In [8]:
onehot_repr=[one_hot(words,voc_size)for words in sent] 
print(onehot_repr)

[[7807, 2580, 970, 7519], [7807, 2580, 970, 6007], [7807, 4088, 970, 5114], [5939, 2207, 7554, 9714, 7522], [5939, 2207, 7554, 9714, 5395], [4876, 7807, 6248, 970, 2805], [8873, 9311, 5945, 9714]]


### Word Embedding Represntation

In [9]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential

In [10]:
import numpy as np

In [11]:
sent_length=8
embedded_docs=pad_sequences(onehot_repr,padding='pre',maxlen=sent_length)
print(embedded_docs)

[[   0    0    0    0 7807 2580  970 7519]
 [   0    0    0    0 7807 2580  970 6007]
 [   0    0    0    0 7807 4088  970 5114]
 [   0    0    0 5939 2207 7554 9714 7522]
 [   0    0    0 5939 2207 7554 9714 5395]
 [   0    0    0 4876 7807 6248  970 2805]
 [   0    0    0    0 8873 9311 5945 9714]]


In [12]:
dim=10

In [14]:
model=Sequential()
model.add(Embedding(voc_size,10,input_length=sent_length))
model.compile('adam','mse')

In [15]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 8, 10)             100000    
Total params: 100,000
Trainable params: 100,000
Non-trainable params: 0
_________________________________________________________________


In [16]:
print(model.predict(embedded_docs))

[[[ 0.04530971  0.02463075 -0.0482289  -0.02750393 -0.01050154
   -0.04526439  0.03085481  0.03744813  0.02937749 -0.01964959]
  [ 0.04530971  0.02463075 -0.0482289  -0.02750393 -0.01050154
   -0.04526439  0.03085481  0.03744813  0.02937749 -0.01964959]
  [ 0.04530971  0.02463075 -0.0482289  -0.02750393 -0.01050154
   -0.04526439  0.03085481  0.03744813  0.02937749 -0.01964959]
  [ 0.04530971  0.02463075 -0.0482289  -0.02750393 -0.01050154
   -0.04526439  0.03085481  0.03744813  0.02937749 -0.01964959]
  [ 0.00908674 -0.01666036  0.04075693  0.03008072  0.04532022
   -0.04086731  0.00087715  0.01718768 -0.00196205 -0.01791043]
  [ 0.04887618 -0.00048695 -0.03490623 -0.02049473  0.0311589
   -0.03075628  0.02093086  0.01975503 -0.01698303 -0.02296509]
  [ 0.03220804  0.02699448  0.03114159  0.03546004  0.04857793
    0.0447088   0.00290766  0.00396122 -0.04509492 -0.0230849 ]
  [-0.03675474  0.00586413 -0.04080603  0.00866491  0.01364977
    0.02065065 -0.01312591 -0.00946391 -0.0356066

In [17]:
embedded_docs[0]

array([   0,    0,    0,    0, 7807, 2580,  970, 7519])

In [18]:
print(model.predict(embedded_docs)[0])

[[ 0.04530971  0.02463075 -0.0482289  -0.02750393 -0.01050154 -0.04526439
   0.03085481  0.03744813  0.02937749 -0.01964959]
 [ 0.04530971  0.02463075 -0.0482289  -0.02750393 -0.01050154 -0.04526439
   0.03085481  0.03744813  0.02937749 -0.01964959]
 [ 0.04530971  0.02463075 -0.0482289  -0.02750393 -0.01050154 -0.04526439
   0.03085481  0.03744813  0.02937749 -0.01964959]
 [ 0.04530971  0.02463075 -0.0482289  -0.02750393 -0.01050154 -0.04526439
   0.03085481  0.03744813  0.02937749 -0.01964959]
 [ 0.00908674 -0.01666036  0.04075693  0.03008072  0.04532022 -0.04086731
   0.00087715  0.01718768 -0.00196205 -0.01791043]
 [ 0.04887618 -0.00048695 -0.03490623 -0.02049473  0.0311589  -0.03075628
   0.02093086  0.01975503 -0.01698303 -0.02296509]
 [ 0.03220804  0.02699448  0.03114159  0.03546004  0.04857793  0.0447088
   0.00290766  0.00396122 -0.04509492 -0.0230849 ]
 [-0.03675474  0.00586413 -0.04080603  0.00866491  0.01364977  0.02065065
  -0.01312591 -0.00946391 -0.03560666  0.00610951]]
