In [63]:
######## A Healthcare Domain Chatbot to simulate the predictions of a General Physician ########
######## A pragmatic Approach for Diagnosis ############

# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [64]:
from sklearn.tree import export_graphviz

In [65]:
# Importing the dataset
training_dataset = pd.read_csv('Training.csv')
test_dataset = pd.read_csv('Testing.csv')


In [66]:
# Slicing and Dicing the dataset to separate features from predictions
X = training_dataset.iloc[:, 0:132].values
y = training_dataset.iloc[:, -1].values

In [67]:
# Dimensionality Reduction for removing redundancies
dimensionality_reduction = training_dataset.groupby(training_dataset['prognosis']).max()


In [68]:
# Encoding String values to integer constants
from sklearn.preprocessing import LabelEncoder
labelencoder = LabelEncoder()
y = labelencoder.fit_transform(y)


In [69]:
# Splitting the dataset into training set and test set
from sklearn.model_selection import train_test_split
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0, random_state = 0)
X_train, X_test = X, y


In [70]:
# Implementing the Decision Tree Classifier
from sklearn.tree import DecisionTreeClassifier
classifier = DecisionTreeClassifier()
classifier.fit(X_train, y_train)


DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

In [50]:
# Saving the information of columns
cols     = training_dataset.columns
cols     = cols[:-1]


In [51]:
# Checking the Important features
importances = classifier.feature_importances_
indices = np.argsort(importances)[::-1]
features = cols

In [52]:
# Implementing the Visual Tree
from sklearn.tree import _tree


In [53]:
# Method to simulate the working of a Chatbot by extracting and formulating questions
def execute_bot():

    print("Please reply with yes/Yes or no/No for the following symptoms") 
    def print_disease(node):
        #print(node)
        node = node[0]
        #print(len(node))
        val  = node.nonzero() 
        #print(val)
        disease = labelencoder.inverse_transform(val[0])
        return disease
    def tree_to_code(tree, feature_names):
        tree_ = tree.tree_
        #print(tree_)
        

        
        feature_name = [
            feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!"
            for i in tree_.feature
        ]
        #print("def tree({}):".format(", ".join(feature_names)))
        symptoms_present = []
        def recurse(node, depth):
            indent = "  " * depth
            if tree_.feature[node] != _tree.TREE_UNDEFINED:
                name = feature_name[node]
                threshold = tree_.threshold[node]
                print(name + " ?")
                ans = input()
                ans = ans.lower()
                if ans == 'yes':
                    val = 1
                else:
                    val = 0
                if  val <= threshold:
                    recurse(tree_.children_left[node], depth + 1)
                else:
                    symptoms_present.append(name)
                    recurse(tree_.children_right[node], depth + 1)
            else:
                present_disease = print_disease(tree_.value[node])
                print( "You may have " +  present_disease )
                print()
                red_cols = dimensionality_reduction.columns 
                symptoms_given = red_cols[dimensionality_reduction.loc[present_disease].values[0].nonzero()]
                print("symptoms present  " + str(list(symptoms_present)))
                print()
                print("symptoms given "  +  str(list(symptoms_given)) )  
                print()
                confidence_level = (1.0*len(symptoms_present))/len(symptoms_given)
                print("confidence level is " + str(confidence_level))
                print()
                print('The model suggests:')
                print()
                row = doctors[doctors['disease'] == present_disease[0]]
                print('Consult ', str(row['name'].values))
                print()
                print('Visit ', str(row['link'].values))
                #print(present_disease[0])
                
    
        recurse(0, 1)
    
    tree_to_code(classifier,cols)



In [54]:
# This section of code to be run after scraping the data

doc_dataset = pd.read_csv('doctors_dataset.csv', names = ['Name', 'Description'])


In [55]:
diseases = dimensionality_reduction.index
diseases = pd.DataFrame(diseases)

In [56]:
doctors = pd.DataFrame()
doctors['name'] = np.nan
doctors['link'] = np.nan
doctors['disease'] = np.nan


In [57]:
doctors['disease'] = diseases['prognosis']

In [58]:
doctors['name'] = doc_dataset['Name']
doctors['link'] = doc_dataset['Description']

In [59]:
record = doctors[doctors['disease'] == 'AIDS']
record['name']
record['link']

1    https://www.practo.com/delhi/doctor/dr-54-gene...
Name: link, dtype: object

In [60]:
execute_bot()

Please reply with yes/Yes or no/No for the following symptoms
loss_of_smell ?

stomach_bleeding ?

receiving_unsterile_injections ?
no
abnormal_menstruation ?
no
fast_heart_rate ?
yes
['You may have Pneumonia']

symptoms present  ['fast_heart_rate']

symptoms given ['chills', 'fatigue', 'cough', 'high_fever', 'breathlessness', 'sweating', 'malaise', 'phlegm', 'chest_pain', 'fast_heart_rate', 'rusty_sputum']

confidence level is 0.09090909090909091

The model suggests:

Consult  ['Dr. Lipy Gupta']

Visit  ['https://www.practo.com/delhi/doctor/dr-lipy-gupta-dermatologist-cosmetologist?specialization=Dermatologist&practice_id=689248']


In [61]:
execute_bot()

Please reply with yes/Yes or no/No for the following symptoms
loss_of_smell ?
yes
['You may have Common Cold']

symptoms present  ['loss_of_smell']

symptoms given ['continuous_sneezing', 'chills', 'fatigue', 'cough', 'high_fever', 'headache', 'swelled_lymph_nodes', 'malaise', 'phlegm', 'throat_irritation', 'redness_of_eyes', 'sinus_pressure', 'runny_nose', 'congestion', 'chest_pain', 'loss_of_smell', 'muscle_pain']

confidence level is 0.058823529411764705

The model suggests:

Consult  ['Dr. Manish Munjal']

Visit  ['https://www.practo.com/delhi/doctor/dr-manish-munjal-ear-nose-throat-ent-specialist-1?specialization=Ear-Nose-Throat%20(ENT)%20Specialist&practice_id=1045243']


In [71]:
export_graphviz(classifier, out_file='tree1.dot')