In [391]:
import pickle 
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding,LSTM,Dense
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import save_model
from tensorflow.keras.callbacks import ModelCheckpoint


In [392]:
faq = """
Dear Sir/Madam, I hope this message finds you in good health and high spirits.
I am writing to formally request an extension for the submission deadline of my project.
Kindly find the attached document for your perusal and further action.
This is to bring to your notice that we have updated the meeting schedule.
Please confirm your availability for the proposed date and time.
We would appreciate it if you could revert with your feedback by the end of the day.
I hope you are doing well and staying safe during these challenging times.
Thank you for your interest in the opportunity presented.
We are pleased to inform you that your application has been shortlisted.
In reference to our previous correspondence, we would like to proceed with the plan discussed.
We sincerely apologize for the inconvenience caused.
It was a pleasure speaking with you earlier today regarding the upcoming event.
We are writing to follow up on the invoice shared last week.
Please let us know if you require any clarifications regarding the attached proposal.
Your cooperation and timely response in this matter will be highly appreciated.
We hereby acknowledge the receipt of your email and the documents enclosed therein.
This email serves as a reminder for the upcoming submission deadline on Friday.
Should you have any further questions, please do not hesitate to contact us.
Looking forward to your valuable input and suggestions.
We hope to continue this collaboration for mutual benefit and growth.
It would be highly appreciated if you could prioritize this request.
We thank you for your patience and understanding in this matter.
We regret to inform you that we are unable to process your request at this time.
Our team is currently reviewing your query and will get back to you shortly.
We kindly request you to make the necessary arrangements for the meeting.
Attached herewith is the updated version of the draft for your review.
Thank you for the opportunity to present our proposal.
We are grateful for your continued trust and support.
This is to inform you that your appointment has been rescheduled to next week.
We wish to invite you to participate in our upcoming workshop.
We appreciate your interest and would like to move forward with the next steps.
I am reaching out to follow up on the status of the pending documents.
We value your feedback and are committed to improving our services.
Kindly ensure that all relevant documents are submitted before the deadline.
Thank you for confirming your attendance for the session.
Please be advised that all team members must complete the form by Thursday.
The agenda for the meeting has been attached for your convenience.
We are committed to maintaining transparent and timely communication with our stakeholders.
Let us know if you need any further assistance in this regard.
We would like to take this opportunity to thank you for your contribution to the project.
We request your presence at the official launch ceremony scheduled next month.
We understand the urgency of the matter and are working to resolve it promptly.
Your presence at the event would be greatly valued.
We would be honored to have you as our guest speaker for the occasion.
Please note that all entries must be submitted no later than 5:00 PM tomorrow.
We look forward to hearing from you at your earliest convenience.
We thank you once again for your active participation.
It has been a pleasure working with you, and we hope to collaborate again soon.
In accordance with our previous discussion, the final deadline remains unchanged.
We seek your kind approval for proceeding with the attached plan.
I am pleased to inform you that your recommendation has been approved.
Please take a moment to review the attached terms and conditions.
Your attention to this matter is highly appreciated.
We are currently processing your request and will notify you once completed.
On behalf of the entire team, I extend our heartfelt thanks for your continued partnership.
We have scheduled a virtual meeting on Zoom to address your concerns.
Kindly register for the session using the link provided below.
It is with great enthusiasm that we invite you to join us for the orientation session.
All official communication regarding the event will be sent through this email address.
Please disregard the previous version and consider only the updated file attached herewith.
We would like to acknowledge the significant contribution you have made to the initiative.
Your suggestions have been duly noted and shared with the relevant department.
We trust that this email clarifies the matter.
We hope you will consider our invitation and confirm your participation.
Thank you for your unwavering support and collaboration throughout this project.
Looking forward to working together on future initiatives.
Kindly arrange to send the signed copy of the agreement at your earliest.
Let us know if there is any change in the proposed schedule.
We appreciate the efforts taken by you to ensure timely delivery.
We are currently evaluating your proposal and will get back to you within the week.
Thank you for updating us regarding the status of the report.
Please find the minutes of the meeting attached for your records.
We believe your insights will be valuable to the success of this program.
We hereby confirm the successful completion of your registration process.
This communication is in continuation of our previous discussion held on June 3rd.
We hope you had a pleasant experience working with our team.
We kindly remind you that the final draft must be submitted within the stipulated timeline.
We are pleased to extend to you an invitation to our Annual Conclave on Technology and Innovation.
We appreciate your prompt attention to this urgent matter.
Thank you once again for your cooperation.
Best regards,
Warm wishes,
Yours faithfully,
Sincerely,
Respectfully,
Thank you for your continued support.
With warm regards,
Kindly acknowledge receipt of this email.
We wish you all the best in your future endeavors.
Please let us know if this is acceptable to you.
We are thankful for your active engagement in this initiative.
Your role in this project has been instrumental and deeply appreciated.
We hope the above information is helpful.
We are looking forward to your confirmation.
Please be informed that the office will remain closed on Friday.
We anticipate your cooperation in ensuring timely execution of this task.
Looking forward to a successful event ahead.
We value your association with our organization.
Thank you for your time and support.
"""

In [393]:
checkpoint = ModelCheckpoint(
    'best_lstm_model.h5', 
    monitor='val_accuracy', 
    save_best_only=True,
    mode='max',  
    verbose=1
)

In [394]:
tokenizer = Tokenizer()

In [395]:
tokenizer.fit_on_texts([faq])

In [396]:
len(tokenizer.word_index)

380

In [397]:
with open('tokenizer.pkl', 'wb') as f:
    pickle.dump(tokenizer, f)

In [398]:
input_sequences=[]
for sentence in faq.split("\n"):
  tokenized_sentence = tokenizer.texts_to_sequences([sentence])[0]
  for i in range(1, len(tokenized_sentence)):
    input_sequences.append(tokenized_sentence[:i+1])

In [399]:
max_len = max([len(x) for x in input_sequences])

In [400]:
padded_input_sequences = pad_sequences(input_sequences, maxlen=max_len, padding="pre")
padded_input_sequences

array([[  0,   0,   0, ...,   0, 141, 142],
       [  0,   0,   0, ..., 141, 142, 143],
       [  0,   0,   0, ..., 142, 143,  28],
       ...,
       [  0,   0,   0, ...,   6,   3,  61],
       [  0,   0,   0, ...,   3,  61,   7],
       [  0,   0,   0, ...,  61,   7,  55]], dtype=int32)

In [401]:
X = padded_input_sequences[:,:-1]

In [402]:
y = padded_input_sequences[:,-1]

In [403]:
y = to_categorical(y,num_classes=len(tokenizer.word_index)+1)

In [404]:
y.shape

(985, 381)

In [405]:
model = Sequential()
model.add(Embedding(len(tokenizer.word_index)+1,100,input_length=max_len-1))
model.add(LSTM(256,return_sequences=True))
model.add(LSTM(256))
model.add(Dense(len(tokenizer.word_index)+1,activation='softmax'))

In [406]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
model.compile(loss='categorical_crossentropy',optimizer=optimizer,metrics=['accuracy'])

In [407]:
model.summary()

In [408]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [409]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=100,
    callbacks=[checkpoint]
)

Epoch 1/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step - accuracy: 0.0424 - loss: 5.6848
Epoch 1: val_accuracy improved from -inf to 0.04061, saving model to best_lstm_model.h5




[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 56ms/step - accuracy: 0.0426 - loss: 5.6835 - val_accuracy: 0.0406 - val_loss: 5.6131
Epoch 2/100
[1m24/25[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 52ms/step - accuracy: 0.0623 - loss: 5.1575
Epoch 2: val_accuracy did not improve from 0.04061
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 57ms/step - accuracy: 0.0618 - loss: 5.1658 - val_accuracy: 0.0406 - val_loss: 6.0459
Epoch 3/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step - accuracy: 0.0660 - loss: 5.1256
Epoch 3: val_accuracy did not improve from 0.04061
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 49ms/step - accuracy: 0.0661 - loss: 5.1276 - val_accuracy: 0.0406 - val_loss: 6.1250
Epoch 4/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step - accuracy: 0.0652 - loss: 5.1210
Epoch 4: val_accuracy did not improve from 0.04061
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━



[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 69ms/step - accuracy: 0.0634 - loss: 4.8768 - val_accuracy: 0.0558 - val_loss: 5.9315
Epoch 7/100
[1m24/25[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 56ms/step - accuracy: 0.0654 - loss: 4.6606
Epoch 7: val_accuracy improved from 0.05584 to 0.09137, saving model to best_lstm_model.h5




[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 62ms/step - accuracy: 0.0662 - loss: 4.6641 - val_accuracy: 0.0914 - val_loss: 5.9338
Epoch 8/100
[1m24/25[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 58ms/step - accuracy: 0.1157 - loss: 4.4282
Epoch 8: val_accuracy improved from 0.09137 to 0.10152, saving model to best_lstm_model.h5




[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 64ms/step - accuracy: 0.1145 - loss: 4.4379 - val_accuracy: 0.1015 - val_loss: 6.1152
Epoch 9/100
[1m24/25[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 54ms/step - accuracy: 0.1171 - loss: 4.3942
Epoch 9: val_accuracy did not improve from 0.10152
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 60ms/step - accuracy: 0.1160 - loss: 4.3958 - val_accuracy: 0.1015 - val_loss: 6.0929
Epoch 10/100
[1m24/25[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 54ms/step - accuracy: 0.1170 - loss: 4.2575
Epoch 10: val_accuracy did not improve from 0.10152
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 59ms/step - accuracy: 0.1170 - loss: 4.2582 - val_accuracy: 0.0964 - val_loss: 6.3297
Epoch 11/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - accuracy: 0.1394 - loss: 4.0875
Epoch 11: val_accuracy improved from 0.10152 to 0.12183, saving model to best_lstm_mo



[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 58ms/step - accuracy: 0.1391 - loss: 4.0893 - val_accuracy: 0.1218 - val_loss: 6.3513
Epoch 12/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step - accuracy: 0.1546 - loss: 3.9472
Epoch 12: val_accuracy did not improve from 0.12183
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 97ms/step - accuracy: 0.1543 - loss: 3.9485 - val_accuracy: 0.0761 - val_loss: 6.2943
Epoch 13/100
[1m24/25[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 56ms/step - accuracy: 0.1663 - loss: 3.7666
Epoch 13: val_accuracy did not improve from 0.12183
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 61ms/step - accuracy: 0.1660 - loss: 3.7726 - val_accuracy: 0.0964 - val_loss: 6.2277
Epoch 14/100
[1m24/25[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 67ms/step - accuracy: 0.1525 - loss: 3.6908
Epoch 14: val_accuracy did not improve from 0.12183
[1m25/25[0m [32m━━━━━━━━━━━━



[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 63ms/step - accuracy: 0.1917 - loss: 3.4982 - val_accuracy: 0.1523 - val_loss: 6.4219
Epoch 16/100
[1m24/25[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 56ms/step - accuracy: 0.2319 - loss: 3.3307
Epoch 16: val_accuracy did not improve from 0.15228
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 61ms/step - accuracy: 0.2308 - loss: 3.3372 - val_accuracy: 0.0964 - val_loss: 6.4589
Epoch 17/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step - accuracy: 0.2480 - loss: 3.2949
Epoch 17: val_accuracy did not improve from 0.15228
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 66ms/step - accuracy: 0.2482 - loss: 3.2938 - val_accuracy: 0.1218 - val_loss: 6.5025
Epoch 18/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step - accuracy: 0.2638 - loss: 3.1462
Epoch 18: val_accuracy improved from 0.15228 to 0.15736, saving model to best_lstm_



[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 71ms/step - accuracy: 0.2639 - loss: 3.1471 - val_accuracy: 0.1574 - val_loss: 6.6735
Epoch 19/100
[1m24/25[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 55ms/step - accuracy: 0.2848 - loss: 3.0897
Epoch 19: val_accuracy did not improve from 0.15736
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 60ms/step - accuracy: 0.2845 - loss: 3.0875 - val_accuracy: 0.1523 - val_loss: 6.8885
Epoch 20/100
[1m24/25[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 59ms/step - accuracy: 0.3013 - loss: 2.8992
Epoch 20: val_accuracy did not improve from 0.15736
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 64ms/step - accuracy: 0.3009 - loss: 2.9039 - val_accuracy: 0.1472 - val_loss: 6.8369
Epoch 21/100
[1m24/25[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 54ms/step - accuracy: 0.3562 - loss: 2.7699
Epoch 21: val_accuracy did not improve from 0.15736
[1m25/25[0m [32m━━━━━━━━━━━━



[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 60ms/step - accuracy: 0.5721 - loss: 1.6358 - val_accuracy: 0.1624 - val_loss: 7.8314
Epoch 34/100
[1m24/25[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 58ms/step - accuracy: 0.6255 - loss: 1.5299
Epoch 34: val_accuracy did not improve from 0.16244
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 62ms/step - accuracy: 0.6221 - loss: 1.5371 - val_accuracy: 0.1523 - val_loss: 7.8762
Epoch 35/100
[1m24/25[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 54ms/step - accuracy: 0.6154 - loss: 1.4136
Epoch 35: val_accuracy did not improve from 0.16244
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 59ms/step - accuracy: 0.6129 - loss: 1.4245 - val_accuracy: 0.1574 - val_loss: 7.8643
Epoch 36/100
[1m24/25[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 56ms/step - accuracy: 0.6379 - loss: 1.4335
Epoch 36: val_accuracy did not improve from 0.16244
[1m25/25[0m [32m━━━━━━━━━━━━



[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 66ms/step - accuracy: 0.6847 - loss: 1.1912 - val_accuracy: 0.1675 - val_loss: 8.2283
Epoch 40/100
[1m24/25[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 59ms/step - accuracy: 0.6947 - loss: 1.1489
Epoch 40: val_accuracy did not improve from 0.16751
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 64ms/step - accuracy: 0.6923 - loss: 1.1533 - val_accuracy: 0.1624 - val_loss: 8.2232
Epoch 41/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step - accuracy: 0.6974 - loss: 1.1629
Epoch 41: val_accuracy improved from 0.16751 to 0.17766, saving model to best_lstm_model.h5




[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 72ms/step - accuracy: 0.6968 - loss: 1.1634 - val_accuracy: 0.1777 - val_loss: 8.1062
Epoch 42/100
[1m24/25[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 55ms/step - accuracy: 0.7189 - loss: 1.0456
Epoch 42: val_accuracy did not improve from 0.17766
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 60ms/step - accuracy: 0.7152 - loss: 1.0530 - val_accuracy: 0.1675 - val_loss: 8.3584
Epoch 43/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step - accuracy: 0.7105 - loss: 1.0429
Epoch 43: val_accuracy did not improve from 0.17766
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 76ms/step - accuracy: 0.7093 - loss: 1.0456 - val_accuracy: 0.1726 - val_loss: 8.3966
Epoch 44/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step - accuracy: 0.7239 - loss: 1.0027
Epoch 44: val_accuracy did not improve from 0.17766
[1m25/25[0m [32m━━━━━━━━━━━━



[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 90ms/step - accuracy: 0.8390 - loss: 0.5550 - val_accuracy: 0.1878 - val_loss: 9.2717
Epoch 59/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step - accuracy: 0.8295 - loss: 0.5322
Epoch 59: val_accuracy did not improve from 0.18782
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 78ms/step - accuracy: 0.8292 - loss: 0.5337 - val_accuracy: 0.1726 - val_loss: 9.2751
Epoch 60/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step - accuracy: 0.8444 - loss: 0.5204
Epoch 60: val_accuracy did not improve from 0.18782
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 82ms/step - accuracy: 0.8440 - loss: 0.5212 - val_accuracy: 0.1675 - val_loss: 9.3707
Epoch 61/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step - accuracy: 0.8297 - loss: 0.5199
Epoch 61: val_accuracy did not improve from 0.18782
[1m25/25[0m [32m━━━━━━━━━━━━

In [410]:
save_model(model, 'best_lstm_model.keras')

In [439]:
text = "I am writing"

for i in range(5):

  token_text = tokenizer.texts_to_sequences([text])[0]
  padded_token_input = pad_sequences([token_text],maxlen=max_len-1,padding='pre')
  #print(padded_token_input)
  pos = np.argmax(model.predict(padded_token_input))
  for word,index in tokenizer.word_index.items():
    if index == pos:
      text = text + " " + word
      print(text)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
I am writing to
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
I am writing to formally
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
I am writing to formally request
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
I am writing to formally request an
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
I am writing to formally request an extension
