In [23]:
#%pip install transformers datasets
#%pip install tensorflow
%pip install scikit-learn

Defaulting to user installation because normal site-packages is not writeable

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.1.1[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3 install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [61]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pandas as pd

# to ensure sentiment analysis is working properly
high_urgency_keywords = {
    "burning": 1,
    "excruciating": 2, 
    "intense": 1, 
    "severe": 2, 
    "chest pain": 2, 
    "difficulty breathing": 2, 
    "high fever": 2, 
    "loss of consciousness": 3
}

low_urgency_keywords = {
    "sore throat": 1, 
    "mild cough": 1, 
    "headache": 1, 
    "runny nose": 1, 
    "fatigue": 1
}

# extract urgency features based on keywords
def keyword_feature_extraction(symptoms):
    high_score = 0
    low_score = 0
    
    for word, weight in high_urgency_keywords.items():
        if word in symptoms.lower():
            high_score += weight
    
    for word, weight in low_urgency_keywords.items():
        if word in symptoms.lower():
            low_score += weight
    
    urgency_score = high_score - low_score

    if urgency_score < 1:
        urgency_score = 1
    elif urgency_score > 5:
        urgency_score = 5
    
    return urgency_score

# load training data (10,000 rows)
data_train = pd.read_csv('synthetic_patient_data.csv')  
descriptions_train = data_train['Description_of_Symptoms']
labels_train = data_train['Urgency_Score']

# load testing data (500 rows)
data_test = pd.read_csv('synthetic_patient_data2.csv')  
descriptions_test = data_test['Description_of_Symptoms']
labels_test = data_test['Urgency_Score']

# tokenize the text
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(descriptions_train)
sequences = tokenizer.texts_to_sequences(descriptions_train)
X = pad_sequences(sequences, maxlen=100)

# apply the keyword extraction function to create a new feature
X_keywords = np.array([keyword_feature_extraction(desc) for desc in descriptions_train])

# combine text sequences & keyword feature into one input array
X_combined = np.hstack((X, X_keywords.reshape(-1, 1)))

# encode labels manually
unique_labels = np.unique(labels_train)
label_to_index = {label: index for index, label in enumerate(unique_labels)}
y = np.array([label_to_index[label] for label in labels_train])

# build neural network
model = tf.keras.models.Sequential([
    tf.keras.layers.Embedding(5000, 128, input_length=100),
    tf.keras.layers.LSTM(64, dropout=0.3, recurrent_dropout=0.2),
    tf.keras.layers.Dense(len(unique_labels), activation='softmax')
])

# compile model
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# train model on training dataset
model.fit(X, y, epochs=10, batch_size=32)

# tokenize testing data
test_sequences = tokenizer.texts_to_sequences(descriptions_test)
X_test = pad_sequences(test_sequences, maxlen=100)

# encode testing labels
y_test = np.array([label_to_index[label] for label in labels_test])

# evaluate the model on the test data
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {test_accuracy:.4f}')  


Epoch 1/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 23ms/step - accuracy: 0.4206 - loss: 1.3144
Epoch 2/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 23ms/step - accuracy: 0.5594 - loss: 0.9681
Epoch 3/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 23ms/step - accuracy: 0.6273 - loss: 0.8479
Epoch 4/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 23ms/step - accuracy: 0.6892 - loss: 0.7337
Epoch 5/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 23ms/step - accuracy: 0.7171 - loss: 0.6491
Epoch 6/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 23ms/step - accuracy: 0.7543 - loss: 0.5826
Epoch 7/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 23ms/step - accuracy: 0.7890 - loss: 0.5016
Epoch 8/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 23ms/step - accuracy: 0.8056 - loss: 0.4592
Epoch 9/10
[1m313/313[0m [32m

Website stuff

In [64]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pandas as pd
import warnings

warnings.filterwarnings('ignore')

# predict urgency score based on input symptoms
def predict_urgency(symptoms, tokenizer, model, label_to_index):
    # tokenize and pad input symptoms
    sequences = tokenizer.texts_to_sequences([symptoms])
    X_input = pad_sequences(sequences, maxlen=100)
    
    prediction = model.predict(X_input)
    
    predicted_class_index = np.argmax(prediction, axis=1)[0]
    
    urgency_score = list(label_to_index.keys())[list(label_to_index.values()).index(predicted_class_index)]
    
    return urgency_score

def main():
    patient_name = input("Enter your name: ")
    patient_age = input("Enter your age: ")
    patient_id = input("Enter your ID: ")
    patient_symptoms = input("Describe your symptoms: ")
    
    print(f"\nPatient Information:\nName: {patient_name}\nAge: {patient_age}\nID: {patient_id}\nSymptoms: {patient_symptoms}\n")

    urgency_score = predict_urgency(patient_symptoms, tokenizer, model, label_to_index)
    
    print(f"\nPredicted Urgency Score: {urgency_score}\n")

if __name__ == '__main__':
    main()



Patient Information:
Name: Jack
Age: 8
ID: 1234
Symptoms: I have a headache and mild cough.

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step

Predicted Urgency Score: 2

