<a href="https://colab.research.google.com/github/venukashoju/ML/blob/main/Build_a_Deep_Learning_based_Medical_Diagnoser.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input ,Dense,Embedding,LSTM

In [2]:
df = pd.read_csv('https://media.geeksforgeeks.org/wp-content/uploads/20240319150242/medical_data.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 407 entries, 0 to 406
Data columns (total 3 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Patient_Problem  407 non-null    object
 1   Disease          407 non-null    object
 2   Prescription     407 non-null    object
dtypes: object(3)
memory usage: 9.7+ KB


In [3]:
tokenizer = Tokenizer(num_words=5000, oov_token="<OOV>")
tokenizer.fit_on_texts(df['Patient_Problem'])
sequences = tokenizer.texts_to_sequences(df['Patient_Problem'])

In [4]:
max_len = max(len(x) for x in sequences)
padded_sequences = pad_sequences(sequences,maxlen=max_len)

In [5]:
label_encoder_disease = LabelEncoder()
label_encoder_prescription = LabelEncoder()
disease_labels = label_encoder_disease.fit_transform(df['Disease'])
prescription_labels = label_encoder_prescription.fit_transform(df['Prescription'])
disease_labels_categorical = to_categorical(disease_labels)
prescription_labels_categorical = to_categorical(prescription_labels)
disease_labels_categorical

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [6]:
Y = np.hstack((disease_labels_categorical, prescription_labels_categorical))

In [9]:
input_layer = Input(shape = (max_len,))
embedding_layer = Embedding(input_dim=5000, output_dim=64)(input_layer)
lstm_layer = LSTM(64)(embedding_layer)
disease_output = Dense(len(label_encoder_disease.classes_), activation='softmax', name='disease_output')(lstm_layer)
prescription_output = Dense(len(label_encoder_prescription.classes_), activation='softmax', name='prescription_output')(lstm_layer)

In [10]:
model = Model(inputs=input_layer, outputs=[disease_output, prescription_output])
model.compile(
    loss={'disease_output': 'categorical_crossentropy',
    'prescription_output': 'categorical_crossentropy'},
    optimizer='adam',
    metrics={'disease_output': ['accuracy'], 'prescription_output': ['accuracy']}
)
model.summary()

In [11]:
model.fit(padded_sequences, {'disease_output': disease_labels_categorical, 'prescription_output':prescription_labels_categorical}, epochs=100, batch_size=32)

Epoch 1/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 19ms/step - disease_output_accuracy: 0.0315 - loss: 11.1430 - prescription_output_accuracy: 0.0050
Epoch 2/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - disease_output_accuracy: 0.0560 - loss: 11.1196 - prescription_output_accuracy: 0.0352
Epoch 3/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - disease_output_accuracy: 0.0428 - loss: 11.0515 - prescription_output_accuracy: 0.0181
Epoch 4/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - disease_output_accuracy: 0.0274 - loss: 10.8849 - prescription_output_accuracy: 0.0166
Epoch 5/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - disease_output_accuracy: 0.0431 - loss: 10.7827 - prescription_output_accuracy: 0.0144
Epoch 6/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - disease_output_accuracy: 0.0635 - los

<keras.src.callbacks.history.History at 0x7ae160fdffa0>

In [20]:
def make_prediction(patient_problem):
    sequence = tokenizer.texts_to_sequences([patient_problem])
    padded_sequence = pad_sequences(sequence, maxlen=max_len, padding='post')
    prediction = model.predict(padded_sequence)
    disease_index = np.argmax(prediction[0], axis=1)[0]
    prescription_index = np.argmax(prediction[1], axis=1)[0]
    disease_predicted = label_encoder_disease.inverse_transform([disease_index])[0]
    prescription_predicted = label_encoder_prescription.inverse_transform([prescription_index])[0]
    print(f"Predicted Disease: {disease_predicted}")
    print(f"Suggested Prescription: {prescription_predicted}")
patient_input = "I am feeling like vomiting."
make_prediction(patient_input)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
Predicted Disease: Typhoid Fever
Suggested Prescription: Anticoagulant therapy; wearing compression stockings.
