In [1]:
import numpy as np
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Embedding

reviews = ['nice food',
        'amazing restaurant',
        'too good',
        'just loved it!',
        'will go again',
        'horrible food',
        'never go there',
        'poor service',
        'poor quality',
        'needs improvement']

#The first 5 are positive reviews an the bottom five are negative reviews
sentiment = np.array([1,1,1,1,1,0,0,0,0,0])

In [2]:
#This generates a number from 0 to 30 for every word in the string given
one_hot("amazing restaurant",30)

[24, 25]

In [3]:
vocab_size = 30  #we set the numebr of words in our data is 30

#convert string to number for every word in the string the range of number is 0 to 30 as set in vocab_size
encoded_reviews = [one_hot(d, vocab_size) for d in reviews]

print(encoded_reviews)

[[22, 6], [24, 25], [12, 7], [4, 27, 9], [13, 1, 2], [24, 6], [1, 1, 23], [28, 7], [28, 17], [1, 5]]


In [4]:
"""
Weve seen that for 2 words we get two numbers so to make all lists length same we add padding and set max length to 4

We add 0 at end[post] to make max length 4
"""
max_length = 4
padded_reviews = pad_sequences(encoded_reviews, maxlen=max_length, padding='post')
print(padded_reviews)

[[22  6  0  0]
 [24 25  0  0]
 [12  7  0  0]
 [ 4 27  9  0]
 [13  1  2  0]
 [24  6  0  0]
 [ 1  1 23  0]
 [28  7  0  0]
 [28 17  0  0]
 [ 1  5  0  0]]


In [5]:
# each word is represented by vector sized 5
embeded_vector_size = 5 

model = Sequential() #Neurons are connected layer by layer

#we pass vacab size, each word vector size input sentence length
model.add(Embedding(vocab_size, embeded_vector_size, input_length=max_length,name="embedding"))

#The resultant embedding matrix is 2/2 we flatten it
# for example very good food gives us 3 vectors of size three we flatten them to a col matrix of 12 size
model.add(Flatten())

"""
1 unit: 
This means the output of the model will be a single nureon. 

The sigmoid activation function squashes the output to a value between 0 and 1, making it ideal for binary classification. 
This means the model will output a probability between 0 and 1, which can be interpreted as the likelihood of the positive class
"""
model.add(Dense(1, activation='sigmoid'))



In [6]:
X = padded_reviews
y = sentiment

In [7]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
print(model.summary())

None


In [8]:
"""
This means no output will be shown during training. 
The training process runs silently without printing any information (like loss, accuracy, or progress bars).

verbose=0 if you don’t need any feedback or are running experiments in the background.
verbose=1 when you want to track the progress interactively.
verbose=2 if you prefer a less cluttered output but still want detailed logs for each epoch.
"""
model.fit(X, y, epochs=50, verbose=0)

<keras.src.callbacks.history.History at 0x190db15da30>

In [9]:
# evaluate the model
loss, accuracy = model.evaluate(X, y)
accuracy

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 224ms/step - accuracy: 0.9000 - loss: 0.6541


0.8999999761581421

In [17]:
weights = model.get_layer('embedding').get_weights()[0]

weights
# len(weights) gives output 30

array([[ 0.00088638, -0.0180603 , -0.00680055,  0.0076575 ,  0.03529859],
       [-0.02245655,  0.03537659, -0.01065235, -0.01384369, -0.04821618],
       [-0.01576259, -0.0108992 ,  0.09856898, -0.04145789, -0.02359428],
       [ 0.01640204,  0.04416865, -0.02634202,  0.03902683, -0.01753344],
       [-0.06775293,  0.07221265, -0.06934162,  0.02534495,  0.08928314],
       [-0.02605165, -0.09836814,  0.0751266 , -0.05770465,  0.0636599 ],
       [ 0.01577486,  0.00196374,  0.00647211,  0.03236306,  0.01852922],
       [ 0.03755089,  0.0215859 ,  0.00899677, -0.04075366,  0.031982  ],
       [-0.01983212, -0.04892723,  0.00321841, -0.04574267,  0.04189975],
       [-0.08810661, -0.03967899,  0.06934293, -0.00621028, -0.03901733],
       [ 0.03393484,  0.0435287 , -0.03607516, -0.02900426,  0.04231438],
       [ 0.0095498 , -0.00742118, -0.01955201,  0.02658648, -0.04590303],
       [-0.09193432,  0.09091119, -0.06162275,  0.01922112,  0.06462791],
       [-0.10093212,  0.08830682, -0.0

In [14]:
"""
13 means the weight of the word denoted by 13 in th embeddings here it is in 5th sentence first word [will]
"""
weights[13]

array([-0.10093212,  0.08830682, -0.03760545,  0.05981652,  0.08096015],
      dtype=float32)

In [15]:
"""
4 means the weight of the word denoted by 4 in th embeddings here it is in 4th sentence first word [just]
"""
weights[4]

array([-0.06775293,  0.07221265, -0.06934162,  0.02534495,  0.08928314],
      dtype=float32)

In [18]:
"""
1 means the weight of the word denoted by 1 in th embeddings here it is in 10th sentence first word [needs]
"""
weights[1]

array([-0.02245655,  0.03537659, -0.01065235, -0.01384369, -0.04821618],
      dtype=float32)