In [1]:
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn.model_selection import train_test_split
from keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.models import Model
from keras.layers import Input, Embedding,AdditiveAttention, GRU,Multiply, Dense, LSTM, Concatenate, TimeDistributed, Bidirectional, BatchNormalization, LayerNormalization, MultiHeadAttention , Dropout
from keras.callbacks import EarlyStopping, ReduceLROnPlateau , TensorBoard, CSVLogger, ModelCheckpoint
import tensorflow as tf

In [2]:
# # Preprocessing function
# def preprocess_text(txt):
#     txt = txt.lower()
#     txt = re.sub(r"i'm", "i am", txt)
#     txt = re.sub(r"he's", "he is", txt)
#     txt = re.sub(r"she's", "she is", txt)
#     txt = re.sub(r"that's", "that is", txt)
#     txt = re.sub(r"what's", "what is", txt)
#     txt = re.sub(r"where's", "where is", txt)
#     txt = re.sub(r"\'ll", " will", txt)
#     txt = re.sub(r"\'ve", " have", txt)
#     txt = re.sub(r"\'re", " are", txt)
#     txt = re.sub(r"\'d", " would", txt)
#     txt = re.sub(r"won't", "will not", txt)
#     txt = re.sub(r"can't", "can not", txt)
#     txt = re.sub(r"wanna", "want to", txt)
#     txt = re.sub(r'[()\[\]{}]', ' ', txt)
#     txt = re.sub(r'[-]', ' ', txt)
#     txt = re.sub(r'[\'"]', '', txt)
#     txt = re.sub(r"[^\w\s]", "", txt)
#     tokens = word_tokenize(txt)
#     # stop_words = set(stopwords.words('english'))
#     # tokens = [word for word in tokens if word not in stop_words]
#     lemmatizer = WordNetLemmatizer()
#     tokens = [lemmatizer.lemmatize(word) for word in tokens]
#     processed_text = ' '.join(tokens)
#     return processed_text

# # Load and preprocess data
# df = pd.read_csv('./med-en-data.csv')
# df = df.astype(str)
# columns_to_drop = ['Unnamed: 0', 'Unnamed: 0.1', 'Unnamed: 0.2']
# df = df.drop(columns_to_drop, axis=1)
# df = df[:15000]
# print('date loaded')

# # Filter rows where 'Quesition' or 'Answer' have more than 223 words
# df = df[df['Answer'].apply(lambda x: len(x.split()) <= 224)]
# df = df[df['Quesition'].apply(lambda x: len(x.split()) <= 224)]


# print('data filtered')

# df['Quesition'] = df['Quesition'].apply(lambda x : preprocess_text(x))
# df['Answer'] = df['Answer'].apply(lambda x : preprocess_text(x))

# print('done')


# Preprocessing function
def preprocess_text(txt , is_answer = False):
    txt = txt.lower()
    contractions = {
        "i'm": "i am", "he's": "he is", "she's": "she is", "that's": "that is",
        "what's": "what is", "where's": "where is", "'ll": " will", "'ve": " have",
        "'re": " are", "'d": " would", "won't": "will not", "can't": "can not",
        "wanna": "want to" , "q:a": "q: a" , "healthcaremagic.comi" :"healthcaremagic.com i"
    }
    for contraction, replacement in contractions.items():
        txt = re.sub(contraction, replacement, txt)
    
    txt = re.sub(r'[^\w\s\?]', '', txt)
    # txt = re.sub(r"healthcaremagic.comi", "healthcaremagic.com i", txt)

    # Remove special characters
    # txt = re.sub(r'[()\[\]{}]', ' ', txt)
    # txt = re.sub(r'[-]', ' ', txt)
    # txt = re.sub(r'[\'"]', '', txt)
    # txt = re.sub(r"[^\w\s]", "", txt)
    txt = re.sub(r'[^\w\s\?]', '', txt)

    if not is_answer:
        txt = re.sub(r"q ",'', txt)
        txt = re.sub(r'\?{2,}','?' , txt)


    
    # Tokenize
    tokens = word_tokenize(txt)
    
    # Remove stopwords (customized for medical texts)
    # stop_words = set(stopwords.words('english'))
    # custom_stopwords = {'q'}
    # stop_words = stop_words.union(custom_stopwords)
    # tokens = [word for word in tokens if word not in custom_stopwords]
    
    # Lemmatize
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(word) for word in tokens]
    
    # Join tokens back to a single string
    processed_text = ' '.join(tokens)

    # Modify multiple-choice answers to include the format "the answer is x: y"
    if is_answer:
        match = re.match(r'([a-e]) (.*)', processed_text)
        if match:
            processed_text = f"the answer is {match.group(1)} {match.group(2)}"
    if processed_text.endswith(','):
        processed_text = processed_text[:-2]
    
    return processed_text

    
# Load and preprocess data
df = pd.read_csv('./med-en-data.csv')
df = df.astype(str)
columns_to_drop = ['Unnamed: 0', 'Unnamed: 0.1', 'Unnamed: 0.2']
df = df.drop(columns_to_drop, axis=1)
print('Data loaded')

# Filter rows where 'Quesition' or 'Answer' have more than 223 words
# df = df[df['Answer'].apply(lambda x: len(x.split()) <= 256)]
# df = df[df['Quesition'].apply(lambda x: len(x.split()) <= 256)]
print('Data filtered')

# Preprocess questions and answers
df['Quesition'] = df['Quesition'].apply(preprocess_text)
df['Answer'] = df['Answer'].apply(lambda x : preprocess_text(x , is_answer= True))
print('Preprocessing done')

Data loaded
Data filtered
Preprocessing done


In [3]:
# Define the check function
def check(row):
    q = row['Quesition']
    a = row['Answer']
    # q_clean = re.sub(r'\?$', '', q)  # Remove trailing question mark for comparison
    if a.startswith(q):
        a = a[len(q):].strip()
    return q, a

# Apply the check function
df[['Quesition', 'Answer']] = df.apply(lambda row: pd.Series(check(row)), axis=1)
print('Check function applied')

Check function applied


In [4]:
from keras import backend as K

K.clear_session()

In [5]:
df.shape

(16209, 2)

In [6]:
# def preprocess_with_special_tokens(text):
#     text = preprocess_text(text)
#     return f"<start> {text} <end>"

In [7]:
# df['Quesition'] = df['Quesition'].apply(lambda x : '<start> '+ x + ' <end>')

In [8]:
df.isna().any()

Quesition    False
Answer       False
dtype: bool

In [9]:
x = df['Quesition'].values
y = df['Answer'].values
max_sequence_length = 256



In [10]:
x[222]

'what cause oculopharyngeal muscular dystrophy ?'

In [11]:
y[222]

'oculopharyngeal muscular dystrophy opmd is caused by mutation in the pabpn1 gene the pabpn1 gene provides instruction for making a protein that is active expressed throughout the body in cell the pabpn1 protein play an important role in processing molecule called messenger rna mrna which serve a genetic blueprint for making protein the protein act to protect the mrna from being broken down and allows it to move within the cell mutation in the pabpn1 gene that cause opmd result in a pabpn1 protein that form clump within muscle cell and hence they can not be broken down these clump are thought to impair the normal function of muscle cell and eventually cause cell to die the progressive loss of muscle cell most likely cause the muscle weakness seen in people with opmd it is not known why abnormal pabpn1 protein seem to affect muscle cell in only certain part of the body'

In [12]:
# Tokenize and pad sequences
tokenizer = Tokenizer(num_words=None, oov_token='<OOV>' , filters='!"#$%&()*+-./;<=>@[\\]^_`{|}~\t\n')
tokenizer.fit_on_texts(df['Quesition'].tolist() + df['Answer'].tolist())
# Manually add start and end tokens to the word index
start_token = '<start>'
end_token = '<end>'
start_token_index = len(tokenizer.word_index) + 1
end_token_index = len(tokenizer.word_index) + 2
tokenizer.word_index[start_token] = start_token_index
tokenizer.word_index[end_token] = end_token_index

# Update the index_word dictionary to reflect these changes
tokenizer.index_word[start_token_index] = start_token
tokenizer.index_word[end_token_index] = end_token


In [13]:
input_sequences = tokenizer.texts_to_sequences(x)
# input_sequences = [[start_token_index] + list(seq) for seq in input_sequences]
input_sequences_padded = pad_sequences(input_sequences, maxlen=max_sequence_length, padding='post')

target_sequences = tokenizer.texts_to_sequences(y)
target_sequences_padded = pad_sequences(target_sequences, maxlen=max_sequence_length, padding='post')


In [17]:
# Get the index for the <start> and <end> tokens
start_token_index = tokenizer.word_index['<start>']
end_token_index = tokenizer.word_index['<end>']

# Initialize decoder input and target data with the correct shape
decoder_input_data = np.zeros((target_sequences_padded.shape[0], max_sequence_length), dtype=np.int64)
decoder_target_data = np.zeros((target_sequences_padded.shape[0], max_sequence_length), dtype=np.int64)

# Process each sequence
for i in range(len(target_sequences_padded)):
    # Initialize decoder input with <start> token
    decoder_input_data[i, 0] = start_token_index
    
    # Fill decoder_input_data with the shifted target sequences
    for t in range(1, max_sequence_length):
        if t <= len(target_sequences_padded[i]):
            if t - 1 < len(target_sequences_padded[i]) and target_sequences_padded[i, t - 1] != 0:
                decoder_input_data[i, t] = target_sequences_padded[i, t - 1]
            else:
                decoder_input_data[i, t] = 0  # Padding

    # Fill decoder_target_data with the target sequences and add the <end> token
    for t in range(max_sequence_length - 1):
        if t < len(target_sequences_padded[i]) and target_sequences_padded[i, t] != 0:
            decoder_target_data[i, t] = target_sequences_padded[i, t]
        else:
            break
    if t + 1 < max_sequence_length:
        decoder_target_data[i, t] = end_token_index


In [20]:
decoder_target_data.shape , decoder_input_data.shape , input_sequences_padded.shape

((16209, 256), (16209, 256), (16209, 256))

In [21]:
tokenizer.sequences_to_texts([input_sequences_padded[100]])

['what are the treatment for what i need to know about kidney failure and how it treated ? <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OO

In [22]:
tokenizer.sequences_to_texts([decoder_input_data[100]])

['<start> if you have kidney failure learn about the treatment and think about which one best fit you talk with people who are on hemodialysis or peritoneal dialysis ask what is good and bad about each treatment if you make a choice and find you dont like it talk with your doctor about trying something else ask your doctor about the transplant waiting list and the medicine needed after a transplant talk with people who have had kidney transplant and ask how it ha changed their life if you plan to keep working think about which treatment choice would make working easier if spending time with family and friend mean a lot to you ask which treatment give you the most free time find out which treatment will give you the best chance to be healthy and live longer if you are thinking about conservative management you may wish to speak with your family friend doctor or mental health counselor a you decide you can take control of your care by talking with your doctor you may need time to get use

In [23]:
tokenizer.sequences_to_texts([decoder_target_data[100]])

['if you have kidney failure learn about the treatment and think about which one best fit you talk with people who are on hemodialysis or peritoneal dialysis ask what is good and bad about each treatment if you make a choice and find you dont like it talk with your doctor about trying something else ask your doctor about the transplant waiting list and the medicine needed after a transplant talk with people who have had kidney transplant and ask how it ha changed their life if you plan to keep working think about which treatment choice would make working easier if spending time with family and friend mean a lot to you ask which treatment give you the most free time find out which treatment will give you the best chance to be healthy and live longer if you are thinking about conservative management you may wish to speak with your family friend doctor or mental health counselor a you decide you can take control of your care by talking with your doctor you may need time to get used to you

In [27]:
x_train_q, x_test_q1, y_train_a, y_test_a1 = train_test_split(input_sequences_padded, decoder_target_data, test_size=0.15, random_state=16)
x_test_q, x_val_q, y_test_a, y_val_a = train_test_split(x_test_q1, y_test_a1, test_size=0.5, random_state=16)


x_train_m , x_test_m1 = train_test_split(decoder_input_data , test_size=0.15,random_state=16)
x_test_m , x_val_m = train_test_split(x_test_m1 , test_size=0.5 , random_state=16)

In [28]:
x_train_m.shape , x_train_q.shape , y_train_a.shape

((13777, 256), (13777, 256), (13777, 256))

In [29]:
tokenizer.sequences_to_texts([x_val_q[1]]) , tokenizer.sequences_to_texts([x_val_m[1]]) , tokenizer.sequences_to_texts([y_val_a[1]]) 

(['qthree day after starting a new drug for malaria prophylaxis a 19 year old college student come to the physician because of dark colored urine and fatigue he ha not had any fever dysuria or abdominal pain he ha no history of serious illness physical examination show scleral icterus laboratory study show a hemoglobin of 97 g dl and serum lactate dehydrogenase of 234 u l peripheral blood smear show poikilocytes with bite shaped irregularity which of the following drug ha the patient most likely been taking ? a pyrimethamine b primaquine c wouldapsone would ivermectin e wouldoxycycline <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV> <OOV>

In [30]:
max_sequence_length , len(tokenizer.word_index)+1

(256, 31989)

In [31]:
from tensorflow import keras 
from keras import layers

embed_dim = 128
latent_dim = 400
vocab_size = len(tokenizer.word_index)+1




enc_inp = Input(shape=(max_sequence_length,))
dec_inp = Input(shape=(max_sequence_length,))

embed = Embedding(vocab_size, output_dim=embed_dim, trainable=True)

enc_embed = embed(enc_inp)
enc_gru = GRU(latent_dim, return_sequences=True, return_state=True, dropout=0.24)
enc_op, enc_state = enc_gru(enc_embed)
enc_op = LayerNormalization()(enc_op)

# enc_gru = GRU(latent_dim, return_sequences=True, return_state=True, dropout=0.3)
# enc_op1, enc_state1 = enc_gru(enc_op)
# enc_op1 = LayerNormalization()(enc_op1)

dec_embed = embed(dec_inp)
dec_gru = GRU(latent_dim, return_sequences=True, return_state=True, dropout=0.25)
dec_op, _ = dec_gru(dec_embed, initial_state=enc_state)
dec_op = LayerNormalization()(dec_op)

dec_gru = GRU(latent_dim, return_sequences=True, return_state=True, dropout=0.3 )
dec_op1, _ = dec_gru(dec_op, initial_state=enc_state)
dec_op1 = LayerNormalization()(dec_op1)

# dec_gru = GRU(latent_dim, return_sequences=True, return_state=True, dropout=0.3)
# dec_op2, _ = dec_gru(dec_op1, initial_state=enc_state)
# dec_op2 = LayerNormalization()(dec_op2)

attention = AdditiveAttention()
context_vector, attention_weights = attention([dec_op1, enc_op], return_attention_scores=True)
context_add = Concatenate()([context_vector, dec_op1])

# dense1 = TimeDistributed(Dense(embed_dim, activation='relu'))
# dense_op1 = dense1(context_add)
# dense_op1 = Dropout(0.3)(dense_op1)

dense = TimeDistributed(Dense(vocab_size, activation='softmax'))
dense_op = dense(context_add)

model = Model([enc_inp, dec_inp], dense_op)


In [32]:
# from keras.optimizers import RMSprop
model.compile(optimizer='adam', loss="sparse_categorical_crossentropy")

In [33]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, 256)]        0           []                               
                                                                                                  
 input_1 (InputLayer)           [(None, 256)]        0           []                               
                                                                                                  
 embedding (Embedding)          (None, 256, 128)     4094592     ['input_1[0][0]',                
                                                                  'input_2[0][0]']                
                                                                                                  
 gru (GRU)                      [(None, 256, 400),   636000      ['embedding[0][0]']          

In [34]:
callbacks = [
    ModelCheckpoint('./check_model_position_11.h5',verbose=1 , save_best_only=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=4, min_lr=0.00001),
    CSVLogger('data_train.csv'),
    TensorBoard(),
    EarlyStopping(monitor='val_loss' ,patience=10 , restore_best_weights=False)
]

In [35]:
model.fit(
    [x_train_q , x_train_m],
    y_train_a,
    validation_data=([x_val_q , x_val_m] , y_val_a),
    epochs=50,
    shuffle = True,
    callbacks = callbacks,
    batch_size=2
)

Epoch 1/50
Epoch 1: val_loss improved from inf to 1.28673, saving model to .\check_model_position_11.h5
Epoch 2/50
Epoch 2: val_loss improved from 1.28673 to 1.20597, saving model to .\check_model_position_11.h5
Epoch 3/50
Epoch 3: val_loss improved from 1.20597 to 1.15855, saving model to .\check_model_position_11.h5
Epoch 4/50
Epoch 4: val_loss improved from 1.15855 to 1.12799, saving model to .\check_model_position_11.h5
Epoch 5/50
Epoch 5: val_loss improved from 1.12799 to 1.12254, saving model to .\check_model_position_11.h5
Epoch 6/50
Epoch 6: val_loss improved from 1.12254 to 1.12183, saving model to .\check_model_position_11.h5
Epoch 7/50
Epoch 7: val_loss did not improve from 1.12183
Epoch 8/50
Epoch 8: val_loss did not improve from 1.12183
Epoch 9/50
Epoch 9: val_loss did not improve from 1.12183
Epoch 10/50
Epoch 10: val_loss did not improve from 1.12183
Epoch 11/50
Epoch 11: val_loss improved from 1.12183 to 1.12064, saving model to .\check_model_position_11.h5
Epoch 12/50


<keras.callbacks.History at 0x211907fc0a0>

In [36]:
# model.fit(
#     [x_train_q[5000:], x_train_m[5000:]],
#     y_train_a[5000:],
#     validation_data=([x_val_q , x_val_m] , y_val_a),
#     epochs=100,
#     shuffle = True,
#     callbacks = callbacks,
#     batch_size=2
# )

In [37]:
# from keras.models import load_model

# model111 = load_model("C:/Users/Ahmed/Downloads/dataset/check_model_position_11.h5")

In [38]:
model.evaluate([x_test_q , x_test_m] , y_test_a , batch_size=2)



1.193995475769043

In [39]:
def preprocess_single_text(text, tokenizer, max_sequence_length):
    text = preprocess_text(text)
    # text = f"<start> {text} <end>"
    sequence = tokenizer.texts_to_sequences([text])
    # sequence = [[start_token_index] + list(seq) + [end_token_index] for seq in sequence]
    print(tokenizer.sequences_to_texts(sequence))
    padded_sequence = pad_sequences(sequence, maxlen=max_sequence_length, padding='post')
    return padded_sequence

In [40]:
def predict_answer(question, model, tokenizer, max_sequence_length):
    question_seq = preprocess_single_text(question, tokenizer, max_sequence_length)
    
    # start_token = np.array([[tokenizer.word_index['<start>']]])
    answer_seq = np.ones((1, max_sequence_length))
    answer_seq[0, 0] = tokenizer.word_index['<start>']
    reverse_word_index = {index: word for word, index in tokenizer.word_index.items()}


    for i in range(1, 64):
        output_tokens = model.predict([question_seq, answer_seq], verbose=0 , batch_size=2)
        sampled_token_index = np.argmax(output_tokens[0, i-1, :])
        answer_seq[0, i] = sampled_token_index

        if sampled_token_index == tokenizer.word_index['<end>']:
            break

    answer_tokens = [reverse_word_index[int(idx)] for idx in answer_seq[0] if idx > 0]
    
    if '<start>' in answer_tokens:
        answer_tokens.remove('<start>')
    if '<end>' in answer_tokens:
        answer_tokens.remove('<end>')
    
    
    answer_tokens = [token for token in answer_tokens if token != '<OOV>']
    answer_tokens = [token for token in answer_tokens if token != '<start>']

    

    answer = ' '.join(answer_tokens)
    return answer


In [41]:
# question = "Q:A 2 year old boy is brought to the physician because of fatigue and yellow discoloration of his skin for 2 days. One week ago, he had a 3 day course of low grade fever and runny nose. As a newborn, he underwent a 5 day course of phototherapy for neonatal jaundice. His vital signs are within normal limits. Examination shows jaundice of the skin and conjunctivae. The spleen tip is palpated 3 cm below the left costal margin. His hemoglobin is 9.8 g dl and mean corpuscular hemoglobin concentration is 38 Hb cell. A Coombs test is negative. A peripheral blood smear is shown. This patient is at greatest risk for which of the following complications?? 'A': 'Malaria', 'B': 'Acute chest syndrome', 'C': 'Osteomyelitis', 'D': 'Acute myelogenous leukemia', 'E': 'Cholecystitis' ,"
# predicted_answer = predict_answer(question, model, tokenizer, max_sequence_length)
# print("Question:", question)
# print("Answer:", predicted_answer)

In [42]:
xx = df['Quesition'].values
yy = df['Answer'].values


In [43]:
xx[1] , yy[1]

('is aicardi goutieres syndrome inherited ?',
 'aicardi goutieres syndrome can have different inheritance pattern in most case it is inherited in an autosomal recessive pattern which mean both copy of the gene in each cell have mutation the parent of an individual with an autosomal recessive condition each carry one copy of the mutated gene but they typically do not show sign and symptom of the condition rarely this condition is inherited in an autosomal dominant pattern which mean one copy of the altered gene in each cell is sufficient to cause the disorder these case result from new mutation in the gene and occur in people with no history of the disorder in their family')

In [44]:
question = xx[1]
predicted_answer = predict_answer(question, model, tokenizer, max_sequence_length)
print("Question:", question)
print("Answer:", predicted_answer)

['is aicardi goutieres syndrome inherited ?']
Question: is aicardi goutieres syndrome inherited ?
Answer: aicardi goutieres syndrome is inherited in an autosomal dominant pattern which mean one copy of the altered gene in each cell is sufficient to cause the disorder


In [45]:
question = "how are you"
predicted_answer = predict_answer(question, model, tokenizer, max_sequence_length)
print("Question: ", question)
print("Answer: ", predicted_answer)

['how are you']
Question:  how are you
Answer:  i am specialized in medical advice please ask health related question


In [46]:
question = "hello how are you"
predicted_answer = predict_answer(question, model, tokenizer, max_sequence_length)
print("Question: ", question)
print("Answer: ", predicted_answer)

['hello how are you']
Question:  hello how are you
Answer:  hello how i can i help you ?


In [47]:
question = "what are the sign of the flu ?"
predicted_answer = predict_answer(question, model, tokenizer, max_sequence_length)
print("Question: ", question)
print("Answer: ", predicted_answer)

['what are the sign of the flu ?']
Question:  what are the sign of the flu ?
Answer:  the answer is a small piece of material that surround the lung and the large intestine branch away from the lung to the rest of the body the study of the small intestine that surround the lung and the large intestine are small bean shaped organ and each of the small intestine and large intestine are small bean shaped structure that help break


In [48]:
question = "what is love ?"
predicted_answer = predict_answer(question, model, tokenizer, max_sequence_length)
print("Question: ", question)
print("Answer: ", predicted_answer)

['what is love ?']
Question:  what is love ?
Answer:  the answer is a mental health disorder characterised by a harmful substance called a plaque in the inner ear which is a part of the brain that control the movement of the arm and leg the most common symptom of a person with a spinal cancer may be a sign of the disease a well a those of the same family a well


In [49]:
question = "what going on with you?"
predicted_answer = predict_answer(question, model, tokenizer, max_sequence_length)
print("Question: ", question)
print("Answer: ", predicted_answer)

['what going on with you ?']
Question:  what going on with you?
Answer:  i am zkzk bot your personal health care ai assistant how are you feeling today


In [50]:
question = "what's cold?"
predicted_answer = predict_answer(question, model, tokenizer, max_sequence_length)
print("Question: ", question)
print("Answer: ", predicted_answer)

['what is cold ?']
Question:  what's cold?
Answer:  vre is a rare condition that cause the symptom of mental state and the disease the symptom of this condition typically begin in mid adulthood the underlying cause of the condition is unknown there is no evidence of the various type of treatment the underlying condition is available


In [51]:
question = "am I feel happy?"
predicted_answer = predict_answer(question, model, tokenizer, max_sequence_length)
print("Question: ", question)
print("Answer: ", predicted_answer)

['am i feel happy ?']
Question:  am I feel happy?
Answer:  how might i be treated ?


In [52]:
model.save('try_the_best_51.h5')

In [53]:
from keras.models import load_model
model1 = load_model('C:/Users/Ahmed/Downloads/dataset/try_the_best_51.h5')

# Freeze the layers of the pre-trained model
for layer in model1.layers[:int(0.80 * len(model1.layers))]:
    layer.trainable = False

# Recompile the model
model1.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy')


In [54]:
model1.output_shape

(None, 256, 31989)

In [55]:
question = "am I feel happy?"
predicted_answer = predict_answer(question, model, tokenizer, max_sequence_length)
print("Question: ", question)
print("Answer: ", predicted_answer)

['am i feel happy ?']
Question:  am I feel happy?
Answer:  how might i be treated ?


In [56]:
len(tokenizer.word_index)

31988

In [68]:
model1.evaluate([x_test_q , x_test_m] , y_test_a , batch_size=2)



0.9723271727561951

In [69]:
# from keras.models import load_model
# model1 = load_model('C:/Users/Ahmed/Downloads/dataset/try_the_best_3.h5')

# # Freeze the layers of the pre-trained model
# for layer in model1.layers[:int(0.7 * len(model1.layers))]:
#     layer.trainable = False

# # Recompile the model
model1.compile(optimizer='adam', loss='sparse_categorical_crossentropy')


In [70]:
model1.fit(
    [x_test_q , x_test_m] , y_test_a,
    epochs = 50,
    batch_size=2,
    validation_data=([x_val_q , x_val_m] , y_val_a),
    callbacks = callbacks,
)

Epoch 1/50
Epoch 1: val_loss did not improve from 1.12064
Epoch 2/50
Epoch 2: val_loss did not improve from 1.12064
Epoch 3/50
Epoch 3: val_loss did not improve from 1.12064
Epoch 4/50
Epoch 4: val_loss did not improve from 1.12064
Epoch 5/50
Epoch 5: val_loss did not improve from 1.12064
Epoch 6/50
Epoch 6: val_loss did not improve from 1.12064
Epoch 7/50
Epoch 7: val_loss did not improve from 1.12064
Epoch 8/50
Epoch 8: val_loss did not improve from 1.12064
Epoch 9/50
Epoch 9: val_loss did not improve from 1.12064
Epoch 10/50
Epoch 10: val_loss did not improve from 1.12064
Epoch 11/50
Epoch 11: val_loss did not improve from 1.12064
Epoch 12/50
Epoch 12: val_loss did not improve from 1.12064


<keras.callbacks.History at 0x2129ff50b20>

In [71]:
question = "what are the sign of the flu ?"
predicted_answer = predict_answer(question, model1, tokenizer, max_sequence_length)
print("Question: ", question)
print("Answer: ", predicted_answer)

['what are the sign of the flu ?']
Question:  what are the sign of the flu ?
Answer:  the most common cause of the condition is not known however some people with a blockage of the have no symptom do not have symptom associated with the condition however some people with a blockage of the pancreas do not have symptom associated with the condition however some people with a blockage of the pancreas may not become susceptible to a respiratory infection


In [72]:
question = "hello , how are you"
predicted_answer = predict_answer(question, model1, tokenizer, max_sequence_length)
print("Question: ", question)
print("Answer: ", predicted_answer)

['hello how are you']
Question:  hello , how are you
Answer:  hello how i i can help you ?


In [73]:
question = "hello"
predicted_answer = predict_answer(question, model1, tokenizer, max_sequence_length)
print("Question: ", question)
print("Answer: ", predicted_answer)

['hello']
Question:  hello
Answer:  hi how is i am here


In [74]:
question = xx[0]
predicted_answer = predict_answer(question, model1, tokenizer, max_sequence_length)
print("Question:", question)
print("Answer:", predicted_answer)

['a 2 year old boy is brought to the physician because of fatigue and yellow discoloration of his skin for 2 day one week ago he had a 3 day course of low grade fever and runny nose a a newborn he underwent a 5 day course of phototherapy for neonatal jaundice his vital sign are within normal limit examination show jaundice of the skin and conjunctiva the spleen tip is palpated 3 cm below the left costal margin his hemoglobin is 98 g dl and mean corpuscular hemoglobin concentration is 38 hb cell a coombs test is negative a peripheral blood smear is shown this patient is at greatest risk for which of the following complication ? a malaria b acute chest syndrome c osteomyelitis would acute myelogenous leukemia e cholecystitis']
Question: a 2 year old boy is brought to the physician because of fatigue and yellow discoloration of his skin for 2 day one week ago he had a 3 day course of low grade fever and runny nose a a newborn he underwent a 5 day course of phototherapy for neonatal jaundi

In [75]:
question = 'i feel bad today'
predicted_answer = predict_answer(question, model1, tokenizer, max_sequence_length)
print("Question:", question)
print("Answer:", predicted_answer)

['i feel bad today']
Question: i feel bad today
Answer: i am here for you with your query i can help you ?


In [76]:
question = 'i have flu what should I do  to reveal'
predicted_answer = predict_answer(question, model1, tokenizer, max_sequence_length)
print("Question:", question)
print("Answer:", predicted_answer)

['i have flu what should i do to reveal']
Question: i have flu what should I do  to reveal
Answer: i can not see if you are experiencing a eating a eating a balanced diet when you are a very severe with a healthy diet


In [None]:
model.save('lstm_2ly_tuned_4.h5')

In [None]:
import pickle

# saving
with open('tokenizer.pkl', 'wb') as handle:
    pickle.dump(tokenizer, handle)

# loading
with open('tokenizer.pkl', 'rb') as handle:
    tokenizer_1 = pickle.load(handle)

In [None]:
# load model
from keras.models import load_model
import pickle
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from tensorflow.keras.preprocessing.sequence import pad_sequences


model1 = load_model('C:/Users/Ahmed/Downloads/dataset/check_model_position_11.h5')
# load token
with open('tokenizer.pkl', 'rb') as handle:
    tokenizer_1 = pickle.load(handle)
# load prerpocess functoion
def preprocess_text(txt , is_answer = False):
    txt = txt.lower()
    contractions = {
        "i'm": "i am", "he's": "he is", "she's": "she is", "that's": "that is",
        "what's": "what is", "where's": "where is", "'ll": " will", "'ve": " have",
        "'re": " are", "'d": " would", "won't": "will not", "can't": "can not",
        "wanna": "want to" , "q:a": "q: a" , "healthcaremagic.comi" :"healthcaremagic.com i"
    }
    for contraction, replacement in contractions.items():
        txt = re.sub(contraction, replacement, txt)
    
    txt = re.sub(r'[^\w\s\?]', '', txt)
    txt = re.sub(r'[^\w\s\?]', '', txt)
    if not is_answer:
        txt = re.sub(r"q ",'', txt)
        txt = re.sub(r'\?{2,}','?' , txt)
    # Tokenize
    tokens = word_tokenize(txt)
    # Lemmatize
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(word) for word in tokens]
    
    # Join tokens back to a single string
    processed_text = ' '.join(tokens)

    # Modify multiple-choice answers to include the format "the answer is x: y"
    if is_answer:
        match = re.match(r'([a-e]) (.*)', processed_text)
        if match:
            processed_text = f"the answer is {match.group(1)} {match.group(2)}"
    if processed_text.endswith(','):
        processed_text = processed_text[:-2]
    
    return processed_text

#  load the prediction function
def preprocess_single_text(text, tokenizer, max_sequence_length):
    text = preprocess_text(text)
    sequence = tokenizer.texts_to_sequences([text])
    padded_sequence = pad_sequences(sequence, maxlen=max_sequence_length, padding='post')
    return padded_sequence

def predict_answer(question, model, tokenizer, max_sequence_length):
    question_seq = preprocess_single_text(question, tokenizer, max_sequence_length)
    answer_seq = np.ones((1, max_sequence_length))
    answer_seq[0, 0] = tokenizer.word_index['<start>']
    reverse_word_index = {index: word for word, index in tokenizer.word_index.items()}
    for i in range(1, 64):
        output_tokens = model.predict([question_seq, answer_seq], verbose=0 , batch_size=2)
        sampled_token_index = np.argmax(output_tokens[0, i-1, :])
        answer_seq[0, i] = sampled_token_index

        if sampled_token_index == tokenizer.word_index['<end>']:
            break

    answer_tokens = [reverse_word_index[int(idx)] for idx in answer_seq[0] if idx > 0]
    
    if '<start>' in answer_tokens:
        answer_tokens.remove('<start>')
    if '<end>' in answer_tokens:
        answer_tokens.remove('<end>')
    answer_tokens = [token for token in answer_tokens if token != '<OOV>']
    answer_tokens = [token for token in answer_tokens if token != '<start>']

    answer = ' '.join(answer_tokens)
    return answer



In [None]:
len(tokenizer_1.word_index)

In [None]:
text = 'hello xkxk'
len(text)

In [None]:
question = 'how can I code in love'
predicted_answer = predict_answer(question, model1, tokenizer_1, 224)
print("Question:", question)
print("Answer:", predicted_answer)

In [None]:
from keras.models import Sequential
from keras import layers

In [None]:
# class TransformerEncoder(layers.Layer):
#     def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):
#         super().__init__(**kwargs)
#         self.embed_dim = embed_dim 
#         self.dense_dim = dense_dim 
#         self.num_heads = num_heads 
#         self.attention = MultiHeadAttention(
#         num_heads=num_heads, key_dim=embed_dim)
#         self.dense_proj = Sequential(
#         [Dense(dense_dim, activation="relu"),
#         Dense(embed_dim),]
#         )
#         self.layernorm_1 = LayerNormalization()
#         self.layernorm_2 = LayerNormalization()
#     def call(self, inputs, mask=None): 
#         if mask is not None: 
#             mask = mask[:, tf.newaxis, :] 
#         attention_output = self.attention(
#             inputs, inputs, attention_mask=mask)
#         proj_input = self.layernorm_1(inputs + attention_output)
#         proj_output = self.dense_proj(proj_input)
#         return self.layernorm_2(proj_input + proj_output)
#     def get_config(self): 
#         config = super().get_config()
#         config.update({
#         "embed_dim": self.embed_dim,
#         "num_heads": self.num_heads,
#         "dense_dim": self.dense_dim,
#         })
#         return config

class TransformerEncoder(layers.Layer):
    def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):
        super().__init__(**kwargs)
        self.embed_dim = embed_dim 
        self.dense_dim = dense_dim 
        self.num_heads = num_heads 
        self.attention = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.dense_proj = Sequential([
            Dense(dense_dim, activation="relu"),
            Dense(embed_dim)  # Match the input embedding dimension
        ])
        self.layernorm_1 = LayerNormalization()
        self.layernorm_2 = LayerNormalization()

    def call(self, inputs, mask=None): 
        if mask is not None: 
            mask = mask[:, tf.newaxis, :] 
        attention_output = self.attention(inputs, inputs, attention_mask=mask)
        proj_input = self.layernorm_1(inputs + attention_output)
        proj_output = self.dense_proj(proj_input)
        return self.layernorm_2(proj_input + proj_output)

    def get_config(self): 
        config = super().get_config()
        config.update({
            "embed_dim": self.embed_dim,
            "num_heads": self.num_heads,
            "dense_dim": self.dense_dim,
        })
        return config


In [None]:
# class PositionalEmbedding(layers.Layer):
#     def __init__(self, sequence_length, input_dim, output_dim, **kwargs): 
#         super().__init__(**kwargs)
#         self.token_embeddings = layers.Embedding( 
#         input_dim=input_dim, output_dim=output_dim)
#         self.position_embeddings = layers.Embedding(
#         input_dim=sequence_length, output_dim=output_dim) 
#         self.sequence_length = sequence_length
#         self.input_dim = input_dim
#         self.output_dim = output_dim
#     def call(self, inputs):
#         length = tf.shape(inputs)[-1]
#         positions = tf.range(start=0, limit=length, delta=1)
#         embedded_tokens = self.token_embeddings(inputs)
#         embedded_positions = self.position_embeddings(positions)
#         return embedded_tokens + embedded_positions 
#     def compute_mask(self, inputs, mask=None): 
#         return tf.math.not_equal(inputs, 0) 
#     def get_config(self): 
#         config = super().get_config()
#         config.update({
#         "output_dim": self.output_dim,
#         "sequence_length": self.sequence_length,
#         "input_dim": self.input_dim,
#         })
#         return config

class PositionalEmbedding(layers.Layer):
    def __init__(self, sequence_length, input_dim, output_dim, **kwargs): 
        super().__init__(**kwargs)
        self.token_embeddings = Embedding(input_dim=input_dim, output_dim=output_dim)
        self.position_embeddings = Embedding(input_dim=sequence_length, output_dim=output_dim) 
        self.sequence_length = sequence_length
        self.input_dim = input_dim
        self.output_dim = output_dim

    def call(self, inputs):
        length = tf.shape(inputs)[-1]
        positions = tf.range(start=0, limit=length, delta=1)
        embedded_tokens = self.token_embeddings(inputs)
        embedded_positions = self.position_embeddings(positions)
        return embedded_tokens + embedded_positions 

    def compute_mask(self, inputs, mask=None): 
        return tf.math.not_equal(inputs, 0) 

    def get_config(self): 
        config = super().get_config()
        config.update({
            "output_dim": self.output_dim,
            "sequence_length": self.sequence_length,
            "input_dim": self.input_dim,
        })
        return config

In [None]:
# class TransformerDecoder(layers.Layer):
#     def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):
#         super().__init__(**kwargs)
#         self.embed_dim = embed_dim
#         self.dense_dim = dense_dim
#         self.num_heads = num_heads
#         self.attention_1 = layers.MultiHeadAttention(
#         num_heads=num_heads, key_dim=embed_dim)
#         self.attention_2 = layers.MultiHeadAttention(
#         num_heads=num_heads, key_dim=embed_dim)
#         self.dense_proj = Sequential(
#             [layers.Dense(dense_dim, activation="relu"),
#         layers.Dense(embed_dim),]
#         )
#         self.layernorm_1 = layers.LayerNormalization()
#         self.layernorm_2 = layers.LayerNormalization()
#         self.layernorm_3 = layers.LayerNormalization()
#         self.supports_masking = True 
#     def get_config(self):
#         config = super().get_config()
#         config.update({
#         "embed_dim": self.embed_dim,
#         "num_heads": self.num_heads,
#         "dense_dim": self.dense_dim,
#         })
#         return config
#     def get_causal_attention_mask(self, inputs):
#         input_shape = tf.shape(inputs)
#         batch_size, sequence_length = input_shape[0], input_shape[1]
#         i = tf.range(sequence_length)[:, tf.newaxis]
#         j = tf.range(sequence_length)
#         mask = tf.cast(i >= j, dtype="int32") 
#         mask = tf.reshape(mask, (1, input_shape[1], input_shape[1])) 
#         mult = tf.concat( 
#         [tf.expand_dims(batch_size, -1), 
#         tf.constant([1, 1], dtype=tf.int32)], axis=0) 
#         return tf.tile(mask, mult)
    
#     def call(self, inputs, encoder_outputs, mask=None):
#         causal_mask = self.get_causal_attention_mask(inputs) 
#         if mask is not None: 
#             padding_mask = tf.cast( 
#                 mask[:, tf.newaxis, :], dtype="int32") 
#             padding_mask = tf.minimum(padding_mask, causal_mask) 
#         attention_output_1 = self.attention_1(
#             query=inputs,
#             value=inputs,
#             key=inputs,
#             attention_mask=causal_mask
#         ) 
#         attention_output_1 = self.layernorm_1(inputs + attention_output_1)
#         attention_output_2 = self.attention_2(
#             query=attention_output_1,
#             value=encoder_outputs,
#             key=encoder_outputs,
#             attention_mask=padding_mask, 
#         )
#         attention_output_2 = self.layernorm_2(
#             attention_output_1 + attention_output_2
#         )
#         proj_output = self.dense_proj(attention_output_2)
#         return self.layernorm_3(attention_output_2 + proj_output)


class TransformerDecoder(layers.Layer):
    def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):
        super().__init__(**kwargs)
        self.embed_dim = embed_dim
        self.dense_dim = dense_dim
        self.num_heads = num_heads
        self.attention_1 = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.attention_2 = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.dense_proj = Sequential([
            Dense(dense_dim, activation="relu"),
            Dense(embed_dim)
        ])
        self.layernorm_1 = LayerNormalization()
        self.layernorm_2 = LayerNormalization()
        self.layernorm_3 = LayerNormalization()
        self.supports_masking = True 

    def get_config(self):
        config = super().get_config()
        config.update({
            "embed_dim": self.embed_dim,
            "num_heads": self.num_heads,
            "dense_dim": self.dense_dim,
        })
        return config

    def get_causal_attention_mask(self, inputs):
        input_shape = tf.shape(inputs)
        batch_size, sequence_length = input_shape[0], input_shape[1]
        i = tf.range(sequence_length)[:, tf.newaxis]
        j = tf.range(sequence_length)
        mask = tf.cast(i >= j, dtype="int32") 
        mask = tf.reshape(mask, (1, input_shape[1], input_shape[1])) 
        mult = tf.concat([tf.expand_dims(batch_size, -1), tf.constant([1, 1], dtype=tf.int32)], axis=0) 
        return tf.tile(mask, mult)
    
    def call(self, inputs, encoder_outputs, mask=None):
        causal_mask = self.get_causal_attention_mask(inputs) 
        if mask is not None: 
            padding_mask = tf.cast(mask[:, tf.newaxis, :], dtype="int32") 
            padding_mask = tf.minimum(padding_mask, causal_mask) 
        attention_output_1 = self.attention_1(query=inputs, value=inputs, key=inputs, attention_mask=causal_mask) 
        attention_output_1 = self.layernorm_1(inputs + attention_output_1)
        attention_output_2 = self.attention_2(query=attention_output_1, value=encoder_outputs, key=encoder_outputs, attention_mask=padding_mask)
        attention_output_2 = self.layernorm_2(attention_output_1 + attention_output_2)
        proj_output = self.dense_proj(attention_output_2)
        return self.layernorm_3(attention_output_2 + proj_output)

In [None]:
embed_dim = 128
dense_dim = 128
gru_units = 256
num_heads = 2
vocab_size = len(tokenizer.word_index) + 1

# encoder_inputs = Input(shape=(None,), dtype="int64", name="question")
# x = PositionalEmbedding(max_sequence_length, vocab_size, embed_dim)(encoder_inputs)
# encoder_outputs = TransformerEncoder(embed_dim, dense_dim, num_heads)(x) 
# decoder_inputs = Input(shape=(None,), dtype="int64", name="answer")
# x = PositionalEmbedding(max_sequence_length, vocab_size, embed_dim)(decoder_inputs)
# x = TransformerDecoder(embed_dim, dense_dim, num_heads)(x, encoder_outputs) 
# x = layers.Dropout(0.5)(x)
# decoder_outputs = layers.Dense(vocab_size, activation="softmax")(x) 
# transformer = Model([encoder_inputs, decoder_inputs], decoder_outputs)

# Encoder inputs
encoder_inputs = Input(shape=(None,), dtype="int64", name="question")
x = PositionalEmbedding(max_sequence_length, vocab_size, embed_dim)(encoder_inputs)
x = GRU(gru_units, return_sequences=True)(x)  # Add GRU layer
encoder_outputs = TransformerEncoder(gru_units, dense_dim, num_heads)(x)

# Decoder inputs
decoder_inputs = Input(shape=(None,), dtype="int64", name="answer")
x = PositionalEmbedding(max_sequence_length, vocab_size, embed_dim)(decoder_inputs)
x = GRU(gru_units, return_sequences=True)(x)  # Add GRU layer
x = TransformerDecoder(gru_units, dense_dim, num_heads)(x, encoder_outputs)

# Dropout and final dense layer
x = Dropout(0.5)(x)
decoder_outputs = Dense(vocab_size, activation="softmax")(x)

# Define the model
transformer = Model([encoder_inputs, decoder_inputs], decoder_outputs)



In [None]:
transformer.compile(
 optimizer="rmsprop",
 loss="sparse_categorical_crossentropy",
 metrics=["accuracy"])

In [None]:
transformer.summary()

In [None]:
callbacks = [
    ModelCheckpoint('./check_model_1_2.h5',verbose=1 , save_best_only=True),
    ReduceLROnPlateau(monitor='val_loss'  , factor=0.1 ,patience=25 , min_lr=0.0001 , verbose=1),
    CSVLogger('data_train.csv'),
    TensorBoard(),
    EarlyStopping(monitor='val_loss' ,patience=50 , restore_best_weights=False)
]

In [None]:
transformer.fit(
    [x_train_q, x_train_m],
    y_train_a,
    validation_data=([x_val_q , x_val_m] , y_val_a),
    epochs=75,
    shuffle = True,
    callbacks = callbacks,
    batch_size=2
)

In [None]:
transformer.evaluate([x_test_q , x_test_m] , y_test_a , batch_size=4)

In [None]:
def preprocess_single_text(text, tokenizer, max_sequence_length):
    text = preprocess_text(text)
    # text = f"<START> {text} <END>"
    sequence = tokenizer.texts_to_sequences([text])
    # sequence = [[start_token_index] + list(seq) + [end_token_index] for seq in sequence]
    print(tokenizer.sequences_to_texts(sequence))
    padded_sequence = pad_sequences(sequence, maxlen=max_sequence_length, padding='post')
    return padded_sequence

In [None]:
def predict_answer(question, model, tokenizer, max_sequence_length):
    question_seq = preprocess_single_text(question, tokenizer, max_sequence_length)
    
    start_token = np.array([[tokenizer.word_index['<start>']]])
    answer_seq = np.ones((1, max_sequence_length))
    answer_seq[0, 0] = tokenizer.word_index['<start>']
    reverse_word_index = {index: word for word, index in tokenizer.word_index.items()}


    for i in range(1, max_sequence_length):
        output_tokens = model.predict([question_seq, answer_seq], verbose=0 , batch_size=2)
        sampled_token_index = np.argmax(output_tokens[0, i-1, :])
        answer_seq[0, i] = sampled_token_index

        if sampled_token_index == tokenizer.word_index['<end>']:
            break

    answer_tokens = [reverse_word_index[int(idx)] for idx in answer_seq[0] if idx > 0]
    
    if '<start>' in answer_tokens:
        answer_tokens.remove('<start>')
    if '<end>' in answer_tokens:
        answer_tokens.remove('<end>')
    
    
    answer_tokens = [token for token in answer_tokens if token != '<OOV>']
    answer_tokens = [token for token in answer_tokens if token != '<start>']

    

    answer = ' '.join(answer_tokens)
    return answer

In [None]:
xx = df['Quesition'].values
xx[1]

In [None]:
question = xx[5000]
predicted_answer = predict_answer(question, transformer, tokenizer, max_sequence_length)
print("Question: ", question)
print("Answer: ", predicted_answer)

In [None]:
question = xx[5]
predicted_answer = predict_answer(question, transformer, tokenizer, max_sequence_length)
print("Question: ", question)
print("Answer: ", predicted_answer)

In [None]:
question = xx[1]
predicted_answer = predict_answer(question, transformer, tokenizer, max_sequence_length)
print("Question: ", question)
print("Answer: ", predicted_answer)

In [None]:
question = "what are the symptom of the cold ?"
predicted_answer = predict_answer(question, transformer, tokenizer, max_sequence_length)
print("Question: ", question)
print("Answer: ", predicted_answer)

In [None]:
question = "what is love?"
predicted_answer = predict_answer(question, transformer, tokenizer, max_sequence_length)
print("Question: ", question)
print("Answer: ", predicted_answer)

In [None]:
question = "how are you"
predicted_answer = predict_answer(question, transformer, tokenizer, max_sequence_length)
print("Question: ", question)
print("Answer: ", predicted_answer)

In [None]:
question = "hello how are you"
predicted_answer = predict_answer(question, transformer, tokenizer, max_sequence_length)
print("Question: ", question)
print("Answer: ", predicted_answer)

In [None]:
question = "what are the sign of the flu ?"
predicted_answer = predict_answer(question, transformer, tokenizer, max_sequence_length)
print("Question: ", question)
print("Answer: ", predicted_answer)

In [None]:
# Save the model
# transformer.save('transformer_model_with_gru.h5')
from keras.models import load_model
# Load the model for further training
model1 = load_model('check_model_1_2.h5', custom_objects={'PositionalEmbedding': PositionalEmbedding, 'TransformerEncoder': TransformerEncoder, 'TransformerDecoder': TransformerDecoder})


In [None]:
model1.evaluate([x_test_q , x_test_m] , y_test_a , batch_size=4)

In [None]:
# Compile the model again (just to be sure)
model1.compile(
    optimizer="rmsprop",
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

In [None]:
model1.fit(
    [x_test_q , x_test_m] , y_test_a,
    epochs = 50,
    batch_size=4,
    validation_data=([x_val_q , x_val_m] , y_val_a),
    callbacks = callbacks,
)

In [None]:
question = "how are you"
predicted_answer = predict_answer(question, model1, tokenizer, max_sequence_length)
print("Question: ", question)
print("Answer: ", predicted_answer)

In [None]:
question = "hello how are you"
predicted_answer = predict_answer(question, model1, tokenizer, max_sequence_length)
print("Question: ", question)
print("Answer: ", predicted_answer)

In [None]:
question = "what are the sign of the flu ?"
predicted_answer = predict_answer(question, model1, tokenizer, max_sequence_length)
print("Question: ", question)
print("Answer: ", predicted_answer)