## Installation of Dependencies:

In [1]:
# Import the required Operating System, Numpy, and Tensorflow:

import os
import numpy as np
import tensorflow as tf

In [2]:
# Import Pandas:

import pandas as pd

In [3]:
# Import Keras dependencies:

from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Dense
from tensorflow.keras.datasets import mnist

In [4]:
# Possibly useful Machine Learning libraries:

from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report,accuracy_score
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
import math

## Data Import

In [5]:
# Import our data from CSVs stored in AWS and make Pandas dataframes.

train_df = pd.read_csv('https://brandon-12-07-2022.s3.ap-southeast-2.amazonaws.com/Training.csv')
test_df = pd.read_csv('https://brandon-12-07-2022.s3.ap-southeast-2.amazonaws.com/Testing.csv')

In [6]:
# Show first 2 records of training data to verify the shape of the dataframe:

train_df.head(2)

Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze,prognosis,Unnamed: 133
0,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,Fungal infection,
1,0,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,Fungal infection,


In [7]:
# Show first 2 records of testing data to verify the shape of the dataframe::

test_df.head(2)

Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,blackheads,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze,prognosis
0,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
1,0,0,0,1,1,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Allergy


## Data ETL

In [8]:
# Verify details of train_df dataframe:

train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4920 entries, 0 to 4919
Columns: 134 entries, itching to Unnamed: 133
dtypes: float64(1), int64(132), object(1)
memory usage: 5.0+ MB


In [9]:
# Verify details of test_df dataframe:

test_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 42 entries, 0 to 41
Columns: 133 entries, itching to prognosis
dtypes: int64(132), object(1)
memory usage: 43.8+ KB


In [10]:
# Remove unwanted column called "Unnamed: 133" in the raw data as it is irrelevent:

del train_df["Unnamed: 133"]

In [11]:
# Remove any rows where a value is NaN:

cleaned_train_df=train_df.dropna(how='any')
cleaned_test_df=test_df.dropna(how='any')

In [12]:
# Compare cleaned dataframe characteristics with original.

cleaned_test_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 42 entries, 0 to 41
Columns: 133 entries, itching to prognosis
dtypes: int64(132), object(1)
memory usage: 44.0+ KB


In [13]:
# Compare cleaned dataframe characteristics with original.

cleaned_train_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4920 entries, 0 to 4919
Columns: 133 entries, itching to prognosis
dtypes: int64(132), object(1)
memory usage: 5.0+ MB


In [14]:
# To ensure that the prognoses are in alphbetical order, rename all that start with a bracket 
# as they will sit above prognoses beginning with 'a' on the list.

Vsearch=cleaned_train_df.loc[cleaned_train_df['prognosis']=='(vertigo) Paroymsal  Positional Vertigo', :]
Vsearch.head()

Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,blackheads,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze,prognosis
360,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,(vertigo) Paroymsal Positional Vertigo
361,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,(vertigo) Paroymsal Positional Vertigo
362,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,(vertigo) Paroymsal Positional Vertigo
363,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,(vertigo) Paroymsal Positional Vertigo
364,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,(vertigo) Paroymsal Positional Vertigo


In [15]:
# Identify a list of the uniques prognoses. (There are 41.)

cleaned_train_df['prognosis'].unique()

array(['Fungal infection', 'Allergy', 'GERD', 'Chronic cholestasis',
       'Drug Reaction', 'Peptic ulcer diseae', 'AIDS', 'Diabetes ',
       'Gastroenteritis', 'Bronchial Asthma', 'Hypertension ', 'Migraine',
       'Cervical spondylosis', 'Paralysis (brain hemorrhage)', 'Jaundice',
       'Malaria', 'Chicken pox', 'Dengue', 'Typhoid', 'hepatitis A',
       'Hepatitis B', 'Hepatitis C', 'Hepatitis D', 'Hepatitis E',
       'Alcoholic hepatitis', 'Tuberculosis', 'Common Cold', 'Pneumonia',
       'Dimorphic hemmorhoids(piles)', 'Heart attack', 'Varicose veins',
       'Hypothyroidism', 'Hyperthyroidism', 'Hypoglycemia',
       'Osteoarthristis', 'Arthritis',
       '(vertigo) Paroymsal  Positional Vertigo', 'Acne',
       'Urinary tract infection', 'Psoriasis', 'Impetigo'], dtype=object)

In [16]:
# Normalise prognosis names within the TWO dataframes:

cleaned_test_df['prognosis'].replace('(vertigo) Paroymsal  Positional Vertigo','Vertigo', inplace=True)
cleaned_train_df['prognosis'].replace('(vertigo) Paroymsal  Positional Vertigo','Vertigo', inplace=True)
cleaned_test_df['prognosis'].replace('Dimorphic hemmorhoids(piles)','Dimorphic hemmorhoids', inplace=True)
cleaned_train_df['prognosis'].replace('Dimorphic hemmorhoids(piles)','Dimorphic hemmorhoids', inplace=True)
cleaned_test_df['prognosis'].replace('hepatitis A','Hepatitis A', inplace=True)
cleaned_train_df['prognosis'].replace('hepatitis A','Hepatitis A', inplace=True)
cleaned_test_df['prognosis'].replace('Peptic ulcer diseae','Peptic ulcer disease', inplace=True)
cleaned_train_df['prognosis'].replace('Peptic ulcer diseae','Peptic ulcer disease', inplace=True)
cleaned_test_df['prognosis'].replace('Diabetes ','Diabetes', inplace=True)
cleaned_train_df['prognosis'].replace('Diabetes ','Diabetes', inplace=True)
cleaned_test_df['prognosis'].replace('Hypertension ','Hypertension', inplace=True)
cleaned_train_df['prognosis'].replace('Hypertension ','Hypertension', inplace=True)

In [17]:
# Normalise symptom names within the TWO dataframes:

cleaned_test_df=cleaned_test_df.rename(columns={"spotting_ urination":"spotting_urination", "cold_hands_and_feets":"cold_hands_and_feet", "foul_smell_of urine":"foul_smell_of_urine", "toxic_look_(typhos)":"toxic_look_typhos", "dischromic _patches":"dischromic_patches", "fluid_overload.1":"fluid_overload_1"})
cleaned_train_df=cleaned_train_df.rename(columns={"spotting_ urination":"spotting_urination", "cold_hands_and_feets":"cold_hands_and_feet", "foul_smell_of urine":"foul_smell_of_urine", "toxic_look_(typhos)":"toxic_look_typhos", "dischromic _patches":"dischromic_patches", "fluid_overload.1":"fluid_overload_1"})


In [18]:
# Check that the cleaned TRAIN dataframe has the same list of prognoses. (There is STILL 41.)

cleaned_train_df['prognosis'].unique()

array(['Fungal infection', 'Allergy', 'GERD', 'Chronic cholestasis',
       'Drug Reaction', 'Peptic ulcer disease', 'AIDS', 'Diabetes',
       'Gastroenteritis', 'Bronchial Asthma', 'Hypertension', 'Migraine',
       'Cervical spondylosis', 'Paralysis (brain hemorrhage)', 'Jaundice',
       'Malaria', 'Chicken pox', 'Dengue', 'Typhoid', 'Hepatitis A',
       'Hepatitis B', 'Hepatitis C', 'Hepatitis D', 'Hepatitis E',
       'Alcoholic hepatitis', 'Tuberculosis', 'Common Cold', 'Pneumonia',
       'Dimorphic hemmorhoids', 'Heart attack', 'Varicose veins',
       'Hypothyroidism', 'Hyperthyroidism', 'Hypoglycemia',
       'Osteoarthristis', 'Arthritis', 'Vertigo', 'Acne',
       'Urinary tract infection', 'Psoriasis', 'Impetigo'], dtype=object)

In [19]:
# Check that the cleaned TEST dataframe has the same list of prognoses. (There is STILL 41.)

cleaned_test_df['prognosis'].unique()

array(['Fungal infection', 'Allergy', 'GERD', 'Chronic cholestasis',
       'Drug Reaction', 'Peptic ulcer disease', 'AIDS', 'Diabetes',
       'Gastroenteritis', 'Bronchial Asthma', 'Hypertension', 'Migraine',
       'Cervical spondylosis', 'Paralysis (brain hemorrhage)', 'Jaundice',
       'Malaria', 'Chicken pox', 'Dengue', 'Typhoid', 'Hepatitis A',
       'Hepatitis B', 'Hepatitis C', 'Hepatitis D', 'Hepatitis E',
       'Alcoholic hepatitis', 'Tuberculosis', 'Common Cold', 'Pneumonia',
       'Dimorphic hemmorhoids', 'Heart attack', 'Varicose veins',
       'Hypothyroidism', 'Hyperthyroidism', 'Hypoglycemia',
       'Osteoarthristis', 'Arthritis', 'Vertigo', 'Acne',
       'Urinary tract infection', 'Psoriasis', 'Impetigo'], dtype=object)

In [20]:
# Create a dataframe of all unique prognoses with an eye to making a key:

prognoses_train_df=pd.DataFrame(cleaned_train_df["prognosis"])
prognoses_unique_df=prognoses_train_df.drop_duplicates(keep='first')
prognoses_unique_df.head()

Unnamed: 0,prognosis
0,Fungal infection
10,Allergy
20,GERD
30,Chronic cholestasis
40,Drug Reaction


In [21]:
# Sort the prognoses into alphbetical order:

prognoses_unique_sorted_df=prognoses_unique_df.sort_values(by='prognosis')
prognoses_unique_sorted_df.reset_index(drop=True, inplace=True)
prognoses_unique_sorted_df.head()

Unnamed: 0,prognosis
0,AIDS
1,Acne
2,Alcoholic hepatitis
3,Allergy
4,Arthritis


In [22]:
# Match all prognoses with an ID (unique integer) to create a key 'reference field':

prognosis_id=[]

for x in range(0, 41):
    prognosis_id.append(x)

print(prognosis_id)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40]


In [23]:
# Convert the key reference field into a dataframe:

prognosis_id_df=pd.DataFrame({"Prognosis_ID": prognosis_id})


In [24]:
# Join the prognosis_id_df with the previously constructed prognoses_unique_sorted_df.

prognosis_key_df=prognosis_id_df.join(prognoses_unique_sorted_df, lsuffix='Prognosis_ID', rsuffix='prognosis')
prognosis_key_df.head(15)

Unnamed: 0,Prognosis_ID,prognosis
0,0,AIDS
1,1,Acne
2,2,Alcoholic hepatitis
3,3,Allergy
4,4,Arthritis
5,5,Bronchial Asthma
6,6,Cervical spondylosis
7,7,Chicken pox
8,8,Chronic cholestasis
9,9,Common Cold


In [25]:
# Add the 'prognosis_ID' column to train_df and remove the 'prognosis' column. 
# This step replaces the string name of each prognosis with its numerical reference counterpart 
# to allow ML to interpret the data:

cleaned_test_df['prognosis'].replace(
    
    ['AIDS', 'Acne', 'Bronchial Asthma',
     'Alcoholic hepatitis', 'Allergy', 'Arthritis', 'Cervical spondylosis', 'Chicken pox', 
     'Chronic cholestasis', 'Common Cold', 'Dengue', 'Diabetes', 'Dimorphic hemmorhoids', 
     'Drug Reaction', 'Fungal infection', 'GERD', 'Gastroenteritis', 'Heart attack', 'Hepatitis A',
     'Hepatitis B', 'Hepatitis C', 'Hepatitis D', 'Hepatitis E', 'Hypertension', 'Hyperthyroidism',
     'Hypoglycemia', 'Hypothyroidism', 'Impetigo', 'Jaundice', 'Malaria', 'Migraine', 'Osteoarthristis',
     'Paralysis (brain hemorrhage)', 'Peptic ulcer disease', 'Pneumonia', 'Psoriasis', 'Tuberculosis',
     'Typhoid', 'Urinary tract infection', 'Varicose veins', 'Vertigo'], 
    [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 
       27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], inplace=True)

cleaned_test_df.head()

Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,blackheads,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze,prognosis
0,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,14
1,0,0,0,1,1,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,4
2,0,0,0,0,0,0,0,1,1,1,...,0,0,0,0,0,0,0,0,0,15
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,8
4,1,1,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,13


In [26]:
# Add the 'prognosis_ID' column to test_df and remove the 'prognosis' column. 
# This step replaces the string name of each prognosis with its numerical reference counterpart to allow ML 
# to interpret the data:

cleaned_train_df['prognosis'].replace(
    
    ['AIDS', 'Acne', 'Bronchial Asthma',
     'Alcoholic hepatitis', 'Allergy', 'Arthritis', 'Cervical spondylosis', 'Chicken pox', 
     'Chronic cholestasis', 'Common Cold', 'Dengue', 'Diabetes', 'Dimorphic hemmorhoids', 
     'Drug Reaction', 'Fungal infection', 'GERD', 'Gastroenteritis', 'Heart attack', 'Hepatitis A',
     'Hepatitis B', 'Hepatitis C', 'Hepatitis D', 'Hepatitis E', 'Hypertension', 'Hyperthyroidism',
     'Hypoglycemia', 'Hypothyroidism', 'Impetigo', 'Jaundice', 'Malaria', 'Migraine', 'Osteoarthristis',
     'Paralysis (brain hemorrhage)', 'Peptic ulcer disease', 'Pneumonia', 'Psoriasis', 'Tuberculosis',
     'Typhoid', 'Urinary tract infection', 'Varicose veins', 'Vertigo'], 
    [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 
       27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], inplace=True)

cleaned_train_df.head()

Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,blackheads,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze,prognosis
0,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,14
1,0,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,14
2,1,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,14
3,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,14
4,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,14


## Machine Learning

In [27]:
# For each row in the training and testing data:

# (a) Assign the "symptoms" data to X values (independent variables)
# (b) Assign the "prognosis" to a Y value (dependent variable)

X_train = cleaned_train_df.iloc[:, :-1].values 
y_train = cleaned_train_df.iloc[:, 132].values 
X_test = cleaned_test_df.iloc[:, :-1].values 
y_test = cleaned_test_df.iloc[:, 132].values

In [28]:
# Check that all prognoses are present in the cleaned_train_df. (There should be unique numbers, 0 through 40 inclusive.)

cleaned_train_df.iloc[:,132].unique()

array([14,  4, 15,  8, 13, 33,  0, 11, 16,  2, 23, 30,  6, 32, 28, 29,  7,
       10, 37, 18, 19, 20, 21, 22,  3, 36,  9, 34, 12, 17, 39, 26, 24, 25,
       31,  5, 40,  1, 38, 35, 27], dtype=int64)

In [29]:
# Verify the dimensions of each data set to ensure that they are compatible with each other:

print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(4920, 132)
(4920,)
(42, 132)
(42,)


In [30]:
# We need to convert our prognoses to categorical data.
# Converts a class vector (integers) to binary class matrix. 
# E.g. for use with categorical_crossentropy.

num_classes = 41
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)

y_train[0]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0.], dtype=float32)

## Build our model:

In [31]:
# Create an empty sequential model.

model = Sequential()

In [32]:
# Add the first layer where the input dimensions are the 132 symptom values.
# We chose our activation function as a `relu` layer.

model.add(Dense(100, activation='relu', input_dim=X_train.shape[1]))

In [33]:
# Add a second hidden layer, again, 'relu' in type.

model.add(Dense(100, activation='relu'))

In [34]:
# Add our final output layer where the number of nodes corresponds to the number of y labels.
# Function for logistic regression.

model.add(Dense(num_classes, activation='softmax'))

In [35]:
# Summarize our model to gain an understanding of it.

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 100)               13300     
_________________________________________________________________
dense_1 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_2 (Dense)              (None, 41)                4141      
Total params: 27,541
Trainable params: 27,541
Non-trainable params: 0
_________________________________________________________________


In [36]:
# Use categorical crossentropy for categorical data and mean squared error for regression.

model.compile(optimizer='adam', 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

In [37]:
# Train the model.
# Based on the number of y values (prognoses) it was deemed that a reliable result was attained within 10 epochs (iterations).

model.fit(
    X_train,
    y_train,
    epochs=10,
    shuffle=True,
    verbose=2
)

Train on 4920 samples
Epoch 1/10
4920/4920 - 1s - loss: 1.4722 - accuracy: 0.8567
Epoch 2/10
4920/4920 - 0s - loss: 0.0277 - accuracy: 1.0000
Epoch 3/10
4920/4920 - 0s - loss: 0.0081 - accuracy: 1.0000
Epoch 4/10
4920/4920 - 0s - loss: 0.0040 - accuracy: 1.0000
Epoch 5/10
4920/4920 - 0s - loss: 0.0024 - accuracy: 1.0000
Epoch 6/10
4920/4920 - 0s - loss: 0.0016 - accuracy: 1.0000
Epoch 7/10
4920/4920 - 0s - loss: 0.0011 - accuracy: 1.0000
Epoch 8/10
4920/4920 - 0s - loss: 8.5344e-04 - accuracy: 1.0000
Epoch 9/10
4920/4920 - 0s - loss: 6.5898e-04 - accuracy: 1.0000
Epoch 10/10
4920/4920 - 0s - loss: 5.2169e-04 - accuracy: 1.0000


<tensorflow.python.keras.callbacks.History at 0x29f62adf908>

In [38]:
# Save the model.

model.save("mnist_trained.h5")

In [39]:
# Load the model to speed up the process.

from tensorflow.keras.models import load_model
model = load_model("mnist_trained.h5")

In [40]:
# Evaluate the model using the test data:

model_loss, model_accuracy = model.evaluate(X_test, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

42/42 - 0s - loss: 0.0574 - accuracy: 0.9762
Loss: 0.05736243793563474, Accuracy: 0.976190447807312


## Make predictions:

In [41]:
# Select the first row (record) within the 'train' dataframe and feed it to the model for testing purposes.
# Check its dimensions.

test=X_train[:1]
test.shape

(1, 132)

In [42]:
# Use the model to make a prediction. The resulting class will match a digit from the prognosis key made earlier.

print(f"One-Hot-Encoded Prediction: {model.predict(test).round()}")
print(f"Predicted class: {model.predict_classes(test)}")

One-Hot-Encoded Prediction: [[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
Predicted class: [14]


The test result indicates a prognosis ID of 14 which corresponds to a prognosis of 'Fungal Infection'.

# Populating data for prognosis prediction.

An html page was created to collect patient symptom data for 1 person. The symptoms that were inputted were consistent with those experienced by a turberculosis patient.

The page then transposed the data to JSON format.
The data was then imported to MongoDB in order to finally convert it to CSV format, via GitBash as shown below.


## Importing Patient Data to feed the predictor:

To export the Patient_Data from MongoDB a command must be written for GitBash which includes all of the fields in the database, in the order they appear.



In [43]:
# Create a list of columns which represent the 132 symptoms.

symptom_field_list=cleaned_test_df.columns.values
symptom_field_list

array(['itching', 'skin_rash', 'nodal_skin_eruptions',
       'continuous_sneezing', 'shivering', 'chills', 'joint_pain',
       'stomach_pain', 'acidity', 'ulcers_on_tongue', 'muscle_wasting',
       'vomiting', 'burning_micturition', 'spotting_urination', 'fatigue',
       'weight_gain', 'anxiety', 'cold_hands_and_feet', 'mood_swings',
       'weight_loss', 'restlessness', 'lethargy', 'patches_in_throat',
       'irregular_sugar_level', 'cough', 'high_fever', 'sunken_eyes',
       'breathlessness', 'sweating', 'dehydration', 'indigestion',
       'headache', 'yellowish_skin', 'dark_urine', 'nausea',
       'loss_of_appetite', 'pain_behind_the_eyes', 'back_pain',
       'constipation', 'abdominal_pain', 'diarrhoea', 'mild_fever',
       'yellow_urine', 'yellowing_of_eyes', 'acute_liver_failure',
       'fluid_overload', 'swelling_of_stomach', 'swelled_lymph_nodes',
       'malaise', 'blurred_and_distorted_vision', 'phlegm',
       'throat_irritation', 'redness_of_eyes', 'sinus_pressur

In [44]:
# The syntax for importing a CSV file from MongoDB does not accept the use of quotation marks or spaces.
# Remove them from the list of symptoms.

symptom_field_list2=print ('[%s]' % ','.join(map(str, symptom_field_list)))

[itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,muscle_wasting,vomiting,burning_micturition,spotting_urination,fatigue,weight_gain,anxiety,cold_hands_and_feet,mood_swings,weight_loss,restlessness,lethargy,patches_in_throat,irregular_sugar_level,cough,high_fever,sunken_eyes,breathlessness,sweating,dehydration,indigestion,headache,yellowish_skin,dark_urine,nausea,loss_of_appetite,pain_behind_the_eyes,back_pain,constipation,abdominal_pain,diarrhoea,mild_fever,yellow_urine,yellowing_of_eyes,acute_liver_failure,fluid_overload,swelling_of_stomach,swelled_lymph_nodes,malaise,blurred_and_distorted_vision,phlegm,throat_irritation,redness_of_eyes,sinus_pressure,runny_nose,congestion,chest_pain,weakness_in_limbs,fast_heart_rate,pain_during_bowel_movements,pain_in_anal_region,bloody_stool,irritation_in_anus,neck_pain,dizziness,cramps,bruising,obesity,swollen_legs,swollen_blood_vessels,puffy_face_and_eyes,enlarged_thyroid

Copy the list of symptoms into the following command and run it in Gitbash to get a CSV of Patient_Data:

mongoexport --db=Patient_Data --collection=Survey1 --type=csv --fields=itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,muscle_wasting,vomiting,burning_micturition,spotting_urination,fatigue,weight_gain,anxiety,cold_hands_and_feet,mood_swings,weight_loss,restlessness,lethargy,patches_in_throat,irregular_sugar_level,cough,high_fever,sunken_eyes,breathlessness,sweating,dehydration,indigestion,headache,yellowish_skin,dark_urine,nausea,loss_of_appetite,pain_behind_the_eyes,back_pain,constipation,abdominal_pain,diarrhoea,mild_fever,yellow_urine,yellowing_of_eyes,acute_liver_failure,fluid_overload,swelling_of_stomach,swelled_lymph_nodes,malaise,blurred_and_distorted_vision,phlegm,throat_irritation,redness_of_eyes,sinus_pressure,runny_nose,congestion,chest_pain,weakness_in_limbs,fast_heart_rate,pain_during_bowel_movements,pain_in_anal_region,bloody_stool,irritation_in_anus,neck_pain,dizziness,cramps,bruising,obesity,swollen_legs,swollen_blood_vessels,puffy_face_and_eyes,enlarged_thyroid,brittle_nails,swollen_extremeties,excessive_hunger,extra_marital_contacts,drying_and_tingling_lips,slurred_speech,knee_pain,hip_joint_pain,muscle_weakness,stiff_neck,swelling_joints,movement_stiffness,spinning_movements,loss_of_balance,unsteadiness,weakness_of_one_body_side,loss_of_smell,bladder_discomfort,foul_smell_of_urine,continuous_feel_of_urine,passage_of_gases,internal_itching,toxic_look_typhos,depression,irritability,muscle_pain,altered_sensorium,red_spots_over_body,belly_pain,abnormal_menstruation,dischromic_patches,watering_from_eyes,increased_appetite,polyuria,family_history,mucoid_sputum,rusty_sputum,lack_of_concentration,visual_disturbances,receiving_blood_transfusion,receiving_unsterile_injections,coma,stomach_bleeding,distention_of_abdomen,history_of_alcohol_consumption,fluid_overload_1,blood_in_sputum,prominent_veins_on_calf,palpitations,painful_walking,pus_filled_pimples,blackheads,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze --out=Survey1.csv

In [45]:
# Make a reference to the Survey1.csv file path:

csv_path = "Resources/Survey1.csv"

# Import the Survey1.csv file as a DataFrame:

patient_1_df = pd.read_csv(csv_path, encoding="utf-8")
patient_1_df

Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,pus_filled_pimples,blackheads,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze
0,0,0,0,0,1,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [46]:
# Feed the dataframe to our predictor.

model.predict_classes(patient_1_df)

array([36], dtype=int64)

The model predictor has diagnosed the patient with TB (Prognosis ID 36).