In [1]:
import numpy as np
import tensorflow as tf

In [2]:
# Step 2 - Take a Sample text

raw_text = 'He is the king . The king is royal . She is the royal  queen '
raw_text = raw_text.lower()

In [3]:
# Step 3 - Create Dictionary

My_words = []
for word in raw_text.split():
    if word != '.': 
        My_words.append(word)
My_words = set(My_words) 
My_word2int = {}
My_int2word = {}
vocabulary_size = len(My_words) 
for i,word in enumerate(My_words):
    My_word2int[word] = i

In [4]:
# Step 4 - Create list of Sentences

# raw sentences is a list of sentences.
raw_text_2 = raw_text.split('.')
sentences = []
for sentence in raw_text_2:
    sentences.append(sentence.split())
print("The list of sentenses is:",sentences)

The list of sentenses is: [['he', 'is', 'the', 'king'], ['the', 'king', 'is', 'royal'], ['she', 'is', 'the', 'royal', 'queen']]


In [5]:
# Step 5 - Generate training data

king_data = []
Set_window_size = 2
for sentence in sentences:
    for word_index, word in enumerate(sentence):
        for nb_word in sentence[max(word_index - Set_window_size, 0) : min(word_index + Set_window_size, len(sentence)) + 1] : 
            if nb_word != word:
                king_data.append([word, nb_word])
print("The list of word pairs is:",king_data)

The list of word pairs is: [['he', 'is'], ['he', 'the'], ['is', 'he'], ['is', 'the'], ['is', 'king'], ['the', 'he'], ['the', 'is'], ['the', 'king'], ['king', 'is'], ['king', 'the'], ['the', 'king'], ['the', 'is'], ['king', 'the'], ['king', 'is'], ['king', 'royal'], ['is', 'the'], ['is', 'king'], ['is', 'royal'], ['royal', 'king'], ['royal', 'is'], ['she', 'is'], ['she', 'the'], ['is', 'she'], ['is', 'the'], ['is', 'royal'], ['the', 'she'], ['the', 'is'], ['the', 'royal'], ['the', 'queen'], ['royal', 'is'], ['royal', 'the'], ['royal', 'queen'], ['queen', 'the'], ['queen', 'royal']]


In [8]:
# Step 6 - One hot encoding

def one_hot(index_data_point, vocabulary_size):
    temp = np.zeros(vocabulary_size)
    temp[index_data_point] = 1
    return temp
x_train_data = [] # this is for input word
y_train_data = [] # this is for output word
for word_data in king_data:

    x_train_data.append(one_hot(My_word2int[word_data[0]], vocabulary_size))
    y_train_data.append(one_hot(My_word2int[word_data[1]], vocabulary_size))
# convert them to numpy arrays

In [11]:
# Step 7 - Shape of train data

print("This is the shape for x_train and y_train:",len(x_train_data), len(y_train_data))

This is the shape for x_train and y_train: 34 34


In [12]:
# Step 8 - Disable the eagre execution

tf.compat.v1.disable_eager_execution()

In [13]:
# Step 9 - Make a tensorflow model

x_new = tf.compat.v1.placeholder(tf.float32, shape=(None, vocabulary_size))
label_y = tf.compat.v1.placeholder(tf.float32, shape=(None, vocabulary_size))

In [14]:
# Step 10 - Convert data into embedded representation

dim_embbed = 5
W1_data = tf.Variable(tf.compat.v1.random_normal([vocabulary_size, dim_embbed]))
b1_data = tf.Variable(tf.compat.v1.random_normal([dim_embbed])) #bias
representation_hidden = tf.add(tf.matmul(x_new,W1_data), b1_data)

In [15]:
# Step 11 - Make Predictions

W2_data = tf.Variable(tf.compat.v1.random_normal([dim_embbed, vocabulary_size]))
b2_data = tf.Variable(tf.compat.v1.random_normal([vocabulary_size]))
Make_prediction = tf.nn.softmax(tf.add( tf.matmul(representation_hidden, W2_data), b2_data))

In [16]:
# Step 12 - Train the data

sess = tf.compat.v1.Session()
initialize_var = tf.compat.v1.global_variables_initializer()
sess.run(initialize_var)
# define the loss function:
cross_entropy_loss = tf.reduce_mean(-tf.reduce_sum(label_y * tf.compat.v1.log(Make_prediction), axis=[1]))
# define the training step:
Step_train = tf.compat.v1.train.GradientDescentOptimizer(0.1).minimize(cross_entropy_loss)
iters = 10000
# train for defined iterations
for ele in range(iters):
    sess.run(Step_train, feed_dict={x_new: x_train_data, label_y: y_train_data})
    print('The loss is : ', sess.run(cross_entropy_loss, feed_dict={x_new: x_train_data, label_y: y_train_data}))

The loss is :  3.1886606
The loss is :  2.9891226
The loss is :  2.840277
The loss is :  2.7191966
The loss is :  2.6169872
The loss is :  2.5292654
The loss is :  2.4534187
The loss is :  2.3876977
The loss is :  2.3307807
The loss is :  2.2815254
The loss is :  2.2388504
The loss is :  2.2017133
The loss is :  2.1691418
The loss is :  2.1402717
The loss is :  2.1143742
The loss is :  2.090859
The loss is :  2.0692627
The loss is :  2.0492296
The loss is :  2.03049
The loss is :  2.01284
The loss is :  1.996126
The loss is :  1.9802309
The loss is :  1.9650632
The loss is :  1.9505523
The loss is :  1.9366407
The loss is :  1.9232824
The loss is :  1.9104381
The loss is :  1.8980746
The loss is :  1.8861638
The loss is :  1.8746799
The loss is :  1.8636006
The loss is :  1.8529055
The loss is :  1.842576
The loss is :  1.832595
The loss is :  1.8229465
The loss is :  1.8136159
The loss is :  1.8045892
The loss is :  1.795853
The loss is :  1.7873955
The loss is :  1.7792045
The loss i