<a href="https://colab.research.google.com/github/whaldsz/deep-learning/blob/main/Disease_Indicators_Prediction_v4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Disease Prediction



## Setup and initialization

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import tensorflow as tf

from tensorflow.keras import layers
from tensorflow.keras import regularizers
from sklearn.model_selection import train_test_split

print(tf.__version__)

# let's set the random seed to make the results reproducible
tf.random.set_seed(299)

2.9.2


In [None]:
#!pip install git+https://github.com/tensorflow/docs

try:
  import tensorflow_docs as tfdocs
  import tensorflow_docs.modeling
  import tensorflow_docs.plots
except:
  !pip install git+https://github.com/tensorflow/docs
  import tensorflow_docs as tfdocs
  import tensorflow_docs.modeling
  import tensorflow_docs.plots
  

In [None]:
from  IPython import display
from matplotlib import pyplot as plt

import numpy as np

import pathlib
import shutil
import tempfile

In [None]:
# currentdir
import os

logdir = os.path.join(os.getcwd(), "tensorboard_logs")
shutil.rmtree(logdir, ignore_errors=True)

## 1. Dataset Preparation

In [None]:
import pandas as pd


disease_training = pd.read_csv('/content/drive/MyDrive/projects/oman-gulf-college/dataset/Disease_Prediction/Training.csv')
disease_testing = pd.read_csv('/content/drive/MyDrive/projects/oman-gulf-college/dataset/Disease_Prediction/Testing.csv')
disease_training.head()

FileNotFoundError: ignored

## 3 Remove last column

In [None]:
disease_training.isna().sum()

In [None]:
disease_training.drop('Unnamed: 133', inplace=True, axis=1)

disease_training.isna().sum()

In [None]:
#disease_training.head()

## Convert category to numeric values

In [None]:
#get class labels

class_names = np.unique(disease_training.prognosis)
disease_training.prognosis = pd.Categorical(disease_training.prognosis)
disease_testing.prognosis = pd.Categorical(disease_testing.prognosis)
class_names.shape

In [None]:
#disease_training.prognosis.cat.codes
#disease_training
#disease_testing.head()

## Separate Features and Label - Training

### Training Set

In [None]:
X = disease_training.drop('prognosis', axis=1)
y = disease_training.prognosis.cat.codes
np.unique(y)

### Unseen Test Set

In [None]:
X_unseen = disease_testing.drop('prognosis', axis=1)
y_unseen = disease_testing.prognosis.cat.codes
np.unique(X_unseen)

## Split into Training & Validation Test

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=74)
#print(X_train.shape)
#print(y_train.shape)
#print(X_test.shape)
#print(y_test.shape)


In [None]:
X_test

In [None]:
# Number of features
FEATURES = 132
FEATURES

## 2. Model Training

### Training configuration

In [None]:
FEATURES=X_train.shape[1]
N_VALIDATION = X_train.shape[0] *.2 #int(1e3)
N_TRAIN = X_train.shape[0]*.8 #int(1e4)
BUFFER_SIZE = int(100)
BATCH_SIZE = 50
STEPS_PER_EPOCH = N_TRAIN//BATCH_SIZE

[FEATURES, N_VALIDATION, N_TRAIN, BUFFER_SIZE, BATCH_SIZE, STEPS_PER_EPOCH]

### Create Model

### Find the ideal learning rate

In [None]:
lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(
  0.001,
  decay_steps=STEPS_PER_EPOCH*100,
  decay_rate=1,
  staircase=False)

def get_optimizer():
  return tf.keras.optimizers.Adam(lr_schedule)

In [None]:
step = np.linspace(0,100000)
lr = lr_schedule(step)
plt.figure(figsize = (6,4))
plt.plot(step/STEPS_PER_EPOCH, lr)
plt.ylim([0,max(plt.ylim())])
plt.xlabel('Epoch')
_ = plt.ylabel('Learning Rate')

In [None]:

#metrics = [
#    tfma.metrics.ExampleCount(name='example_count'),
#    tf.keras.metrics.SparseCategoricalCrossentropy(
#        name='sparse_categorical_crossentropy'),
#    tf.keras.metrics.SparseCategoricalAccuracy(name='accuracy'),
#    tf.keras.metrics.Precision(name='precision', top_k=1),
#    tf.keras.metrics.Precision(name='precision', top_k=3),
#    tf.keras.metrics.Recall(name='recall', top_k=1),
#    tf.keras.metrics.Recall(name='recall', top_k=3),
#    tfma.metrics.MultiClassConfusionMatrixPlot(
#        name='multi_class_confusion_matrix_plot'),
#]

METRICS = 'accuracy'
LOSS = tf.keras.losses.SparseCategoricalCrossentropy()

### Settings for automation

In [None]:
def get_callbacks(name):
  return [
    tfdocs.modeling.EpochDots(),
    tf.keras.callbacks.EarlyStopping(monitor='acc', patience=100),
    tf.keras.callbacks.TensorBoard(os.path.join(logdir,name)),
  ]

In [None]:
def compile_and_fit(model, name, loss=None, optimizer=None, metrics = None, max_epochs=10000):
  if optimizer is None:
    optimizer = get_optimizer()

  if loss is None:
    loss = LOSS
  if metrics is None:
    metrics = [METRICS]

  model.compile(
      optimizer=optimizer,
      loss=loss,
      metrics=metrics
  )

  model.summary()

  history = model.fit(
    X_train,
    y_train,
    steps_per_epoch = STEPS_PER_EPOCH,
    epochs=max_epochs,
    validation_split=0.1,
    #validation_data=[X_test, y_test],
    callbacks=get_callbacks(name),
    verbose=0)
  return history

### Models

In [None]:
size_histories = {}

#### Model 1

Simple model with 3 layers

In [None]:
model1 = tf.keras.Sequential([
    layers.Dense(4, activation='elu', input_shape=(FEATURES,)),
    layers.Dense(41, activation=tf.keras.activations.softmax)
])

In [None]:
model1_history = compile_and_fit(
    model1, 
    'models/model1',
    loss=LOSS,
    metrics=['acc']
)

In [None]:

size_histories['model1'] = model1_history

In [None]:
plotter = tfdocs.plots.HistoryPlotter(metric = 'acc', smoothing_std=10)
plotter.plot(size_histories)
a = plt.xscale('log')

plt.xlim([.01, max(plt.xlim())])
plt.ylim([.01, max(plt.ylim())])
plt.xlabel("Epochs [Log Scale]")

In [None]:
loss, acc = model1.evaluate(X_test, y_test)
print(f"Model Loss (Test Set) : {loss}")
print(f"Model Accuracy (Test Set): {acc}")

In [None]:
#lrs = 1e-4 * (10 ** (tf.range(BATCH_SIZE)/20))
#plt.figure(figsize=(6,4))
#plt.semilogx(lrs, size_histories['models/model1'].history['loss'])
#plt.xlabel("Learning Rate")
#plt.ylabel("Loss")
#plt.title("Learning Rate vs Loss")

#### Model 2

In [None]:
model2 = tf.keras.Sequential([
    layers.Dense(4, activation='elu', input_shape=(FEATURES,)),
    layers.Dense(4, activation='elu', input_shape=(FEATURES,)),
    layers.Dense(41, activation=tf.keras.activations.softmax)
])

#model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(),
#                optimizer = tf.keras.optimizers.Adam(),
#                #metrics=['MultiClassConfusionMatrixPlot'])
#                metrics=["accuracy"])

#scheduler = tf.keras.callbacks.LearningRateScheduler(lambda epoch: 1e-4 * 10 **(epoch/20))

#history = model.fit(X_train, y_train, epochs=40, callbacks=[scheduler])

In [None]:
model2_history = compile_and_fit(
    model2, 
    'models/model2',
    loss=LOSS,
    metrics=['acc']
)

In [None]:
size_histories['model2'] = model2_history

In [None]:
plotter = tfdocs.plots.HistoryPlotter(metric = 'acc', smoothing_std=10)
plotter.plot(size_histories)
a = plt.xscale('log')

plt.xlim([.01, max(plt.xlim())])
plt.ylim([.01, max(plt.ylim())])
plt.xlabel("Epochs [Log Scale]")

#### Model 3

In [None]:
model3 = tf.keras.Sequential([
    layers.Dense(64, activation='elu', input_shape=(FEATURES,)),
    layers.Dense(64, activation='elu', input_shape=(FEATURES,)),
    layers.Dense(64, activation='elu', input_shape=(FEATURES,)),
    layers.Dense(41, activation=tf.keras.activations.softmax)
])

In [None]:
model3_history = compile_and_fit(
    model3, 
    'models/model3',
    loss=LOSS,
    metrics=['acc']
)

In [None]:

size_histories['model3'] = model3_history

In [None]:
plotter = tfdocs.plots.HistoryPlotter(metric = 'acc', smoothing_std=10)
plotter.plot(size_histories)
a = plt.xscale('log')

plt.xlim([.01, max(plt.xlim())])
plt.ylim([.01, max(plt.ylim())])
plt.xlabel("Epochs [Log Scale]")

#### Model 4

In [None]:
model4 = tf.keras.Sequential([
    layers.Dense(512, activation='elu', input_shape=(FEATURES,)),
    layers.Dense(512, activation='elu', input_shape=(FEATURES,)),
    layers.Dense(512, activation='elu', input_shape=(FEATURES,)),
    layers.Dense(512, activation='elu', input_shape=(FEATURES,)),
    layers.Dense(41, activation=tf.keras.activations.softmax)
])


In [None]:
model4_history = compile_and_fit(
    model4, 
    'models/model4',
    loss=LOSS,
    metrics=['acc']
)


In [None]:

size_histories['model4'] = model4_history

In [None]:
plotter = tfdocs.plots.HistoryPlotter(metric = 'acc', smoothing_std=10)
plotter.plot(size_histories)
a = plt.xscale('log')

plt.xlim([.01, max(plt.xlim())])
plt.ylim([.01, max(plt.ylim())])
plt.xlabel("Epochs [Log Scale]")

#### Model 5

### 3. Evaluate Model

#### Evaluate with test data

In [None]:
loss, acc = model1.evaluate(X_test, y_test)
print(f"Model Loss (Test Set) : {loss}")
print(f"Model Accuracy (Test Set): {acc}")

#### Evaluate with unseen data (Loss vs Accuracy)

In [None]:
loss, acc = model1.evaluate(X_unseen, y_unseen)
print("Model 1:")
print(f"Model Loss: {loss}")
print(f"Model Accuracy: {acc}")

loss, acc = model2.evaluate(X_unseen, y_unseen)
print("Model 2:")
print(f"Model Loss: {loss}")
print(f"Model Accuracy: {acc}")

loss, acc = model3.evaluate(X_unseen, y_unseen)
print("Model 3:")
print(f"Model Loss: {loss}")
print(f"Model Accuracy: {acc}")

loss, acc = model4.evaluate(X_unseen, y_unseen)
print("Model 4:")
print(f"Model Loss: {loss}")
print(f"Model Accuracy: {acc}")

## 3. 

#### Test Set

In [None]:
### Model 1
predictions1 = model1.predict(X_test)

predicted1=tf.argmax(predictions1, axis=1)
res1= pd.DataFrame({'Test':y_test, 'B':predicted1})

summary1 = pd.DataFrame({'Test Set':y_test, 'Predicted':predicted1})
summary1

In [None]:
### Model 2
predictions2 = model2.predict(X_test)

predicted2=tf.argmax(predictions2, axis=1)
res2= pd.DataFrame({'Test':y_test, 'B':predicted2})

summary2 = pd.DataFrame({'Test Set':y_test, 'Predicted':predicted2})
summary2


In [None]:
### Model 3
predictions3 = model3.predict(X_test)

predicted3=tf.argmax(predictions3, axis=1)
res3= pd.DataFrame({'Test':y_test, 'B':predicted3})

summary3 = pd.DataFrame({'Test Set':y_test, 'Predicted':predicted3})
summary3



#### Unseen Test Data

In [None]:
### Unseen set
predictions1 = model1.predict(X_unseen)
result1=tf.argmax(predictions1, axis=1)
res1= pd.DataFrame({'Unseem Set':y_unseen, 'B':result1})

predictions2 = model2.predict(X_unseen)
result2=tf.argmax(predictions2, axis=1)
res2= pd.DataFrame({'Unseem Set':y_unseen, 'B':result2})


predictions3 = model3.predict(X_unseen)
result3=tf.argmax(predictions3, axis=1)
res3= pd.DataFrame({'Unseem Set':y_unseen, 'B':result3})


predictions4 = model4.predict(X_unseen)
result4=tf.argmax(predictions4, axis=1)
res4= pd.DataFrame({'Unseem Set':y_unseen, 'B':result4})



### Confusion Matrix

In [None]:
!pip install tensorflow_addons

import tensorflow_addons as tfa
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

y_pred = model1.predict(X_test)


#  !pip install git+https://github.com/tensorflow/docs

metric = tfa.metrics.MultiLabelConfusionMatrix(num_classes=41)
rr=np.argmax(y_pred, axis=1)




Accuracy Score

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, rr)

Multilabel confusion matrix

In [None]:
print("Actual \n", y_test)
print("\nPredicted \n",rr)


In [None]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, rr)

In [None]:
from sklearn.metrics import classification_report
print("Model 1:")
print(classification_report(y_unseen, result1,target_names=class_names))
print("Model 2:")
print(classification_report(y_unseen, result2,target_names=class_names))
print("Model 3:")
print(classification_report(y_unseen, result3,target_names=class_names))
print("Model 4:")
print(classification_report(y_unseen, result4,target_names=class_names))

In [None]:
figsize=[15,15]
cm = confusion_matrix(y_test, rr) 
cm_display = ConfusionMatrixDisplay(cm).plot(ax=plt.subplots(figsize=figsize)[1])
