In [15]:
######## A Healthcare Domain Chatbot to simulate the predictions of a General Physician ########
######## A pragmatic Approach for Diagnosis ############

# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd



from sklearn.tree import export_graphviz

# Importing the dataset
training_dataset = pd.read_csv('Training.csv')
test_dataset = pd.read_csv('Testing.csv')

# Slicing and Dicing the dataset to separate features from predictions
X = training_dataset.iloc[:, 0:132].values
y = training_dataset.iloc[:, -1].values

# Dimensionality Reduction for removing redundancies
dimensionality_reduction = training_dataset.groupby(training_dataset['prognosis']).max()

# Encoding String values to integer constants
from sklearn.preprocessing import LabelEncoder
labelencoder = LabelEncoder()
y = labelencoder.fit_transform(y)

# Splitting the dataset into training set and test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0, random_state = 0)

# Implementing the Decision Tree Classifier
from sklearn.tree import DecisionTreeClassifier
classifier = DecisionTreeClassifier()
classifier.fit(X_train, y_train)

# Saving the information of columns
cols     = training_dataset.columns
cols     = cols[:-1]


# Checking the Important features
importances = classifier.feature_importances_
indices = np.argsort(importances)[::-1]
features = cols

# Implementing the Visual Tree
from sklearn.tree import _tree

# Method to simulate the working of a Chatbot by extracting and formulating questions
def execute_bot():

    print("Please reply with yes/Yes or no/No for the following symptoms") 
    def print_disease(node):
        #print(node)
        node = node[0]
        #print(len(node))
        val  = node.nonzero() 
        #print(val)
        disease = labelencoder.inverse_transform(val[0])
        return disease
    def tree_to_code(tree, feature_names):
        tree_ = tree.tree_
        #print(tree_)
        
        export_graphviz(tree, out_file='vtree.dot')
        
        feature_name = [
            feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!"
            for i in tree_.feature
        ]
        #print("def tree({}):".format(", ".join(feature_names)))
        symptoms_present = []
        def recurse(node, depth):
            indent = "  " * depth
            if tree_.feature[node] != _tree.TREE_UNDEFINED:
                name = feature_name[node]
                threshold = tree_.threshold[node]
                print(name + " ?")
                ans = input()
                ans = ans.lower()
                if ans == 'yes':
                    val = 1
                else:
                    val = 0
                if  val <= threshold:
                    recurse(tree_.children_left[node], depth + 1)
                else:
                    symptoms_present.append(name)
                    recurse(tree_.children_right[node], depth + 1)
            else:
                present_disease = print_disease(tree_.value[node])
                print( "You may have " +  present_disease )
                print()
                red_cols = dimensionality_reduction.columns 
                symptoms_given = red_cols[dimensionality_reduction.loc[present_disease].values[0].nonzero()]
                print("symptoms present  " + str(list(symptoms_present)))
                print()
                print("symptoms given "  +  str(list(symptoms_given)) )  
                print()
                confidence_level = (1.0*len(symptoms_present))/len(symptoms_given)
                print("confidence level is " + str(confidence_level))
                print()
                print('The model suggests:')
                print()
                row = doctors[doctors['disease'] == present_disease[0]]
                print('Consult ', str(row['name'].values))
                print()
                print('Visit ', str(row['link'].values))
                #print(present_disease[0])
                
    
        recurse(0, 1)
    
    tree_to_code(classifier,cols)



# This section of code to be run after scraping the data

doc_dataset = pd.read_csv('doctors_dataset.csv', names = ['Name', 'Description'])


diseases = dimensionality_reduction.index
diseases = pd.DataFrame(diseases)

doctors = pd.DataFrame()
doctors['name'] = np.nan
doctors['link'] = np.nan
doctors['disease'] = np.nan

doctors['disease'] = diseases['prognosis']


doctors['name'] = doc_dataset['Name']
doctors['link'] = doc_dataset['Description']

record = doctors[doctors['disease'] == 'AIDS']
record['name']
record['link']




# Execute the bot and see it in Action
execute_bot()


Please reply with yes/Yes or no/No for the following symptoms
slurred_speech ?
no
throat_irritation ?
no
increased_appetite ?
no
brittle_nails ?
no
pain_behind_the_eyes ?
no
coma ?
no
red_spots_over_body ?
yes
['You may have Chicken pox']

symptoms present  ['red_spots_over_body']

symptoms given ['itching', 'skin_rash', 'fatigue', 'lethargy', 'high_fever', 'headache', 'loss_of_appetite', 'mild_fever', 'swelled_lymph_nodes', 'malaise', 'red_spots_over_body']

confidence level is 0.09090909090909091

The model suggests:

Consult  ['Dr. Inderjeet Singh']

Visit  ['https://www.practo.com/delhi/doctor/inderjeet-singh-ayurveda-sexologist?specialization=Homoeopath&practice_id=1219975']


In [19]:
def execute_bot():

    print("Please reply with yes/Yes or no/No for the following symptoms") 
    def print_disease(node):
        #print(node)
        node = node[0]
        #print(len(node))
        val  = node.nonzero() 
        #print(val)
        disease = labelencoder.inverse_transform(val[0])
        return disease
    def tree_to_code(tree, feature_names):
        tree_ = tree.tree_
        #print(tree_)
        
        export_graphviz(tree, out_file='vtree.dot')
        
        feature_name = [
            feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!"
            for i in tree_.feature
        ]
        #print("def tree({}):".format(", ".join(feature_names)))
        symptoms_present = []
        def recurse(node, depth):
            indent = "  " * depth
            if tree_.feature[node] != _tree.TREE_UNDEFINED:
                name = feature_name[node]
                threshold = tree_.threshold[node]
                print(name + " ?")
                ans = input()
                ans = ans.lower()
                if ans == 'yes':
                    val = 1
                else:
                    val = 0
                if  val <= threshold:
                    recurse(tree_.children_left[node], depth + 1)
                else:
                    symptoms_present.append(name)
                    recurse(tree_.children_right[node], depth + 1)
            else:
                present_disease = print_disease(tree_.value[node])
                print( "You may have " +  present_disease )
                print()
                red_cols = dimensionality_reduction.columns 
                symptoms_given = red_cols[dimensionality_reduction.loc[present_disease].values[0].nonzero()]
                print("symptoms present  " + str(list(symptoms_present)))
                print()
                print("symptoms given "  +  str(list(symptoms_given)) )  
                print()
                confidence_level = (1.0*len(symptoms_present))/len(symptoms_given)
                print("confidence level is " + str(confidence_level))
                print()
                print('The model suggests:')
                print()
                row = doctors[doctors['disease'] == present_disease[0]]
                print('Consult ', str(row['name'].values))
                print()
                print('Visit ', str(row['link'].values))
                #print(present_disease[0])
                
    
        recurse(0, 1)
    
    tree_to_code(classifier,cols)



# This section of code to be run after scraping the data

doc_dataset = pd.read_csv('doctors_dataset.csv', names = ['Name', 'Description'])


diseases = dimensionality_reduction.index
diseases = pd.DataFrame(diseases)

doctors = pd.DataFrame()
doctors['name'] = np.nan
doctors['link'] = np.nan
doctors['disease'] = np.nan

doctors['disease'] = diseases['prognosis']


doctors['name'] = doc_dataset['Name']
doctors['link'] = doc_dataset['Description']

record = doctors[doctors['disease'] == 'AIDS']
record['name']
record['link']




# Execute the bot and see it in Action
execute_bot()


Please reply with yes/Yes or no/No for the following symptoms
slurred_speech ?
no
throat_irritation ?
no
increased_appetite ?
yes
['You may have Diabetes ']

symptoms present  ['increased_appetite']

symptoms given ['fatigue', 'weight_loss', 'restlessness', 'lethargy', 'irregular_sugar_level', 'blurred_and_distorted_vision', 'obesity', 'excessive_hunger', 'increased_appetite', 'polyuria']

confidence level is 0.1

The model suggests:

Consult  ['Dr. Anshul Gupta']

Visit  ['https://www.practo.com/delhi/doctor/dr-anshul-gupta-ear-nose-throat-ent-specialist-1?specialization=Ear-Nose-Throat%20(ENT)%20Specialist&practice_id=712546']


In [17]:
indices

array([ 25, 111, 113,  51,  50,  41, 104,  36,  72, 101,  77,  99,  97,
        58,   5, 116,  66,  95,  86,  60, 126, 109,  93, 131,  89,  98,
        83,  79, 107,  19,  40,  24,  64,  56,  33, 106, 122, 102,  13,
        43,   0,  14,   6,   4, 124,   2,   8, 130,  85,  12,  90,  26,
        39,  32,  31,  11, 128,  10,   7,  57,  65,  87,  96,  82,  70,
       100,  20,  21,  22,  23, 125,  76,  18,  27,  28,  29,  30,  75,
        74, 127,  16,  17,  34,  15,  78,  80,  81,   9,  94,  84,  92,
       129,   3,  88,   1, 123,  35,  69,  61,  55, 114, 103, 112,  59,
       110,  62,  53,  63, 108,  71, 105,  67,  68,  54,  52, 121, 118,
        37,  38,  73, 120, 119,  42,  44, 115,  91,  46,  47,  48,  49,
       117,  45])

In [20]:
indices

array([ 25, 111, 113,  51,  50,  41, 104,  36,  72, 101,  77,  99,  97,
        58,   5, 116,  66,  95,  86,  60, 126, 109,  93, 131,  89,  98,
        83,  79, 107,  19,  40,  24,  64,  56,  33, 106, 122, 102,  13,
        43,   0,  14,   6,   4, 124,   2,   8, 130,  85,  12,  90,  26,
        39,  32,  31,  11, 128,  10,   7,  57,  65,  87,  96,  82,  70,
       100,  20,  21,  22,  23, 125,  76,  18,  27,  28,  29,  30,  75,
        74, 127,  16,  17,  34,  15,  78,  80,  81,   9,  94,  84,  92,
       129,   3,  88,   1, 123,  35,  69,  61,  55, 114, 103, 112,  59,
       110,  62,  53,  63, 108,  71, 105,  67,  68,  54,  52, 121, 118,
        37,  38,  73, 120, 119,  42,  44, 115,  91,  46,  47,  48,  49,
       117,  45])

In [5]:
classifier

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

In [27]:
# Checking the Important features
importances = classifier.feature_importances_
indices = np.argsort(importances)[::-1]
features = cols

In [28]:
indices

array([ 25, 111, 113,  51,  50,  41, 104,  36,  72, 101,  77,  99,  97,
        58,   5, 116,  66,  95,  86,  60, 126, 109,  93, 131,  89,  98,
        83,  79, 107,  19,  40,  24,  64,  56,  33, 106, 122, 102,  13,
        43,   0,  14,   6,   4, 124,   2,   8, 130,  85,  12,  90,  26,
        39,  32,  31,  11, 128,  10,   7,  57,  65,  87,  96,  82,  70,
       100,  20,  21,  22,  23, 125,  76,  18,  27,  28,  29,  30,  75,
        74, 127,  16,  17,  34,  15,  78,  80,  81,   9,  94,  84,  92,
       129,   3,  88,   1, 123,  35,  69,  61,  55, 114, 103, 112,  59,
       110,  62,  53,  63, 108,  71, 105,  67,  68,  54,  52, 121, 118,
        37,  38,  73, 120, 119,  42,  44, 115,  91,  46,  47,  48,  49,
       117,  45])