In [24]:
import pandas as pd

In [114]:
DATA_PATH = 'data_symptoms38_diagnosis.csv'
PREC_PATH = 'data_prec.csv'

data = pd.read_csv(DATA_PATH)
data_prec = pd.read_csv(PREC_PATH)

In [165]:
data.head()

Unnamed: 0,itching,skin_rash,continuous_sneezing,shivering,chills,joint_pain,muscle_wasting,vomiting,burning_micturition,fatigue,...,weakness_of_one_body_side,bladder_discomfort,passage_of_gases,toxic_look_(typhos),depression,family_history,blood_in_sputum,pus_filled_pimples,skin_peeling,diagnosis
0,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,15
1,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,15
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,15
3,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,15
4,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,15


In [166]:
data_prec.head()

Unnamed: 0,diagnosis,precautions
0,Fungal infection,"bath twice, use detol or neem in bathing water..."
1,Allergy,"allergy-proof your home, avoid allergens, use ..."
2,GERD,"avoid fatty spicy food, avoid lying down after..."
3,Chronic cholestasis,"cold baths, anti itch medicine, consult doctor..."
4,Drug Reaction,"avoid known allergens, read medication labels ..."


In [63]:
from sklearn.preprocessing import LabelEncoder

In [117]:
# Data encoding
encoder = LabelEncoder()
encoder_params = pd.DataFrame(data['diagnosis'])
encoder_params['code'] = encoder.fit_transform(data['diagnosis'])
data['diagnosis'] = encoder.fit_transform(data['diagnosis'])

In [170]:
encoder_params.drop_duplicates().sort_values('code')

Unnamed: 0,diagnosis,code
360,(vertigo) Paroymsal Positional Vertigo,0
60,AIDS,1
370,Acne,2
240,Alcoholic hepatitis,3
10,Allergy,4
350,Arthritis,5
90,Bronchial Asthma,6
120,Cervical spondylosis,7
160,Chicken pox,8
30,Chronic cholestasis,9


In [64]:
from sklearn.model_selection import train_test_split

In [None]:
X = data.iloc[:,:-1]
y = data.iloc[:, -1]
X_train, X_test, y_train, y_test = train_test_split(
  X, y, test_size = 0.2, random_state = 24, stratify=y)

print(f'Training: {X_train.shape}, {y_train.shape}')
print(f'Testing: {X_test.shape}, {y_test.shape}')

# Load the saved models

In [69]:
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from tensorflow.keras.models import load_model
import joblib

In [159]:
nn_model = load_model('nn_model.h5')
nb_model = joblib.load('nb_model.joblib')
rf_model  = joblib.load('rf_model.joblib')

# Main function realisation

In [None]:
import numpy as np

In [160]:
def disease_diagnosis(symptoms): #symptoms - vector of symptoms [0, 1, 0, 0,...] 38 elements
  input_data = np.array(symptoms).reshape(1, -1)


  nb_prediction = data_dict['predictions_classes'][np.argmax(nb_model.predict_proba(input_data))]
  rf_prediction = data_dict['predictions_classes'][np.argmax(rf_model.predict_proba(input_data))]
  nn_prediction = data_dict['predictions_classes'][np.argmax(nn_model.predict(input_data))]

  predictions = [nn_prediction, nb_prediction, rf_prediction]
  unique, counts = np.unique(predictions, return_counts=True)
  index = np.argmax(counts)
  final_prediction = unique[index]

  predictions = {
      'nb_model_prediction': nb_prediction,
      'rf_model_prediction': rf_prediction,
      'nn_model_prediction': nn_prediction,
      'final_prediction': final_prediction
  }

  return predictions['final_prediction']

#Telegram bot realisation

In [None]:
!pip install pyTelegramBotApi

In [96]:
import telebot
from telebot import types

In [161]:
symptoms = X.columns.values
symptom_names = {}
for index, value in enumerate(symptoms):
  symptom = ' '.join([i.capitalize() for i in value.split('_')])
  symptom_names[symptom] = 0

data_dict = {
    'symptom_index': symptom_names,
    'predictions_classes': encoder.classes_
}

symptom_list = list(symptom_names.keys())
symptom_list

In [None]:
token = 'YOUR_TELEGRAM_BOT_TOKEN'
bot = telebot.TeleBot(token)

i = 0
@bot.message_handler(commands=['symptoms'])
def start_survey(message):
    symptom_list = list(symptom_names.keys())
    # Define a nested function to handle the user's response
    def handle_response(message, symptom):
        global i
        # Get the user's response
        response = message.text.lower()
        # Check if the response is valid
        if response in ['yes', 'no']:
            # Add the symptom to the dictionary
            symptom_names[symptom] = 1 if response == 'yes' else 0
            # Check if there are more questions to ask
            i += 1
            # Send the next question
            if (i < len(symptom_list)):
              symptom = symptom_list[i]
              bot.send_message(message.chat.id, f'Do you have {symptom}?')
              bot.register_next_step_handler(message, handle_response, symptom)
            else:
                # All questions have been asked, call the add_symptom function
                diagnosis = disease_diagnosis(list(symptom_names.values()))
                precautions = data_prec.loc[data_prec['diagnosis'] == diagnosis, 'precautions'].values[0] if len(data_prec.loc[data_prec['diagnosis'] == diagnosis, 'precautions'].values) > 0 else "No precautions found"
                i = 0
                bot.send_message(message.chat.id, f'There is your diagnosis: {diagnosis}. There are precautions: {precautions.capitalize()}.')

        else:
            # Invalid response, send an error message
            bot.send_message(message.chat.id, 'Invalid response. Please answer with "yes" or "no".')
            bot.register_next_step_handler(message, handle_response, symptom)

    # Send the first question
    bot.send_message(message.chat.id, f'Do you have {symptom_list[0]}?')
    bot.register_next_step_handler(message, handle_response, symptom_list[0])

bot.polling(none_stop=True) # Uninterrupted of bot