In [1]:
import numpy as np
from numpy import mean
from numpy import std
from numpy import dstack 
from pandas import read_csv
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, auc, RocCurveDisplay, classification_report, ConfusionMatrixDisplay, RocCurveDisplay, precision_score, recall_score, roc_auc_score

import tensorflow as tf 
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense 
from tensorflow.keras.layers import Input 
from tensorflow.keras.layers import Flatten 
from tensorflow.keras.layers import Dropout 
from tensorflow.keras.layers  import Conv1D  
from tensorflow.keras.utils import plot_model

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import concatenate
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers  import MaxPooling1D

from tensorflow import keras 
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import  LSTM, GRU, SimpleRNN 

# DATASET PROCESSING

In [2]:
# load a single file as a numpy array
def load_file(filepath): 
	dataframe = read_csv(filepath, header=None, delim_whitespace=True)
	return dataframe.values 

In [3]:
# load a list of files and return as a 3d numpy array
def load_group(filenames, prefix=''):
	loaded = list() 
	for name in filenames:
		data = load_file(prefix + name)
		loaded.append(data)
        
	# stack group so that features are the 3rd dimension
	loaded = np.concatenate(loaded)
	return loaded     

In [4]:
# load a dataset group, such as train or test
def load_dataset_group():
	filepath = "C:\\Users\\vvsat\\Documents\\machine learning\\parkinsons speech\\"
    
	# load all files as a single array
	filenames = list()
    
	filenames = ['dataset.txt'] 
    
	# load input data
	X = load_group(filenames, filepath)
    
    
	# load class output
	# load input data
	y = load_file("C:\\Users\\vvsat\\Documents\\machine learning\\parkinsons speech\\target.txt")
	return X, y

In [5]:
# load the dataset, returns train and test X and y elements
def load_dataset(prefix=''):
    
	X, y = load_dataset_group()
	print(X.shape, y.shape) 
       
	# load all train
	trainX, testX, trainy, testy = train_test_split(X, y, test_size=0.20, random_state=27) 
    
	# zero-offset class values
	trainy = trainy - 1
	testy = testy - 1
    
	# one hot encode y
	trainy = to_categorical(trainy)
	testy = to_categorical(testy)
    
	print(trainX.shape, trainy.shape, testX.shape, testy.shape)
	return trainX, trainy, testX, testy

# MODEL BUILDING

# HYPERPARAMETER TUNING

In [6]:
def model_builder(hp):
   
    # hyperparameter
    hp_dropout = hp.Choice('drop_out', values=[0.2, 0.3, 0.5, 0.7, 0.8]) 
    hp_learning_rate = hp.Choice('learning_rate', values=[0.0001, 0.001, 0.01, 0.05, 0.1])
    
    # load data
    trainX, trainy, testX, testy = load_dataset()
    
    verbose, epochs, batch_size = 0, 10, 32
    n_timesteps, n_features, n_outputs = trainX.shape[0], trainX.shape[1], trainy.shape[0]
    print(n_timesteps, n_features, n_outputs)
   
    print("trainX shape : ",trainX.shape)
    print("trainy shape : ",trainy.shape)  
    
    #Build the LSTM model 
    # LSTM Layer returns whole_seq_output, final_memory_state, and final_carry_state 
    model = Sequential()
    model.add(SimpleRNN(64, return_sequences=True, input_shape=(None, 1)))
    model.add(SimpleRNN(128, return_sequences= True))
    model.add(SimpleRNN(256, return_sequences= False))  
    
    model.add(Dropout(rate = hp_dropout)) 
 
    model.add(Dense(2)) 
    model.summary()  

    # learning rate decay
    initial_learning_rate = hp_learning_rate
    
    lr_schedule = keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate, decay_steps=100000, decay_rate=0.96, staircase=True
    )

    # Compile the model 
    model.compile(optimizer = keras.optimizers.Adam(learning_rate=lr_schedule), 
                  loss=keras.losses.BinaryCrossentropy(from_logits=True),
                  metrics=['accuracy']) 
    
    return model   

In [7]:
import keras_tuner as kt 

In [8]:
#creating a hyperband tuner using keras tuner 
tuner = kt.Hyperband(model_builder, 
                     objective='accuracy', 
                     max_epochs=10, 
                     hyperband_iterations=10,        
                     factor=50,
                     directory='pd_speech_gru',
                     project_name='intro_to_kt')    

(756, 752) (756, 1)
(604, 752) (604, 2) (152, 752) (152, 2)
604 752 604
trainX shape :  (604, 752)
trainy shape :  (604, 2)
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn (SimpleRNN)      (None, None, 64)          4224      
                                                                 
 simple_rnn_1 (SimpleRNN)    (None, None, 128)         24704     
                                                                 
 simple_rnn_2 (SimpleRNN)    (None, 256)               98560     
                                                                 
 dropout (Dropout)           (None, 256)               0         
                                                                 
 dense (Dense)               (None, 2)                 514       
                                                                 
Total params: 128,002
Trainable params: 128,002
Non-trainable pa

In [9]:
#early stopping callback to stop the model when the loss shoots up 
stop_early = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3, mode = 'auto', restore_best_weights=True)    

In [10]:
# load data
trainX, trainy, testX, testy = load_dataset()

#run the hyperband tuner    
tuner.search(trainX, trainy, callbacks=[stop_early])  

# Get the optimal hyperparameters
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]

#printing the optimal hyperparameter values
print('best dropout', best_hps.get('drop_out'))
print('best learning rate', best_hps.get('learning_rate'))  

(756, 752) (756, 1)
(604, 752) (604, 2) (152, 752) (152, 2)

Search: Running Trial #1

Hyperparameter    |Value             |Best Value So Far 
drop_out          |0.5               |?                 
learning_rate     |0.001             |?                 
tuner/epochs      |10                |?                 
tuner/initial_e...|0                 |?                 
tuner/bracket     |0                 |?                 
tuner/round       |0                 |?                 

(756, 752) (756, 1)
(604, 752) (604, 2) (152, 752) (152, 2)
604 752 604
trainX shape :  (604, 752)
trainy shape :  (604, 2)
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn (SimpleRNN)      (None, None, 64)          4224      
                                                                 
 simple_rnn_1 (SimpleRNN)    (None, None, 128)         24704     
                                       

KeyboardInterrupt: 

   # TRAINING THE  MODEL  

In [None]:
# load data
trainX, trainy, testX, testy = load_dataset()
    
verbose, epochs, batch_size = 0, 10, 32
n_timesteps, n_features, n_outputs = trainX.shape[0], trainX.shape[1], trainy.shape[0]
print(n_timesteps, n_features, n_outputs)
   
print("trainX shape : ",trainX.shape)
print("trainy shape : ",trainy.shape)  
    
#Build the LSTM model 
# LSTM Layer returns whole_seq_output, final_memory_state, and final_carry_state 
model = Sequential()
model.add(SimpleRNN(64, return_sequences=True, input_shape=(None, 1)))
model.add(SimpleRNN(128, return_sequences= True))
model.add(SimpleRNN(256, return_sequences= False))  
    
model.add(Dropout(rate = best_hps.get('drop_out'))) 
 
model.add(Dense(2)) 
model.summary()  

# learning rate decay
initial_learning_rate = best_hps.get('learning_rate')
    
lr_schedule = keras.optimizers.schedules.ExponentialDecay(
       initial_learning_rate, decay_steps=100000, decay_rate=0.96, staircase=True
       )

# Compile the model 
model.compile(optimizer = keras.optimizers.Adam(learning_rate=lr_schedule), 
              loss=keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])   

In [None]:
#fitting the model  
history = model.fit(trainX, 
                    trainy, 
                    epochs=10,
                    batch_size=32,
                    validation_split=0.2,
                    verbose=1)           

# SAVING THE MODEL

In [None]:
model.save('simple_rnn.h5') 

In [None]:
model = tf.keras.models.load_model('simple_rnn.h5')

# PLOTTING THE METRICS

In [None]:
train_acc = history.history['accuracy'] 
train_loss = history.history['loss'] 
val_acc = history.history['val_accuracy'] 
val_loss = history.history['val_loss']

epochs = range(len(train_acc))  

plt.plot(epochs, train_acc, 'b', label='Training Accuracy')
plt.plot(epochs, val_acc, 'r', label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend()
plt.figure()
plt.show()

plt.plot(epochs, train_loss, 'b', label='Training Loss') 
plt.plot(epochs, val_loss, 'r', label='Validation Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.show() 

# MODEL EVALUATION

In [None]:
# Evaluate the model on the test data using `evaluate`
print("Evaluate on test data")
results = model.evaluate(testX, testy, batch_size=800)
print("test loss, test acc:", results) 

In [None]:
y_pred = model.predict(testX) 
y_pred = np.argmax(y_pred, axis=1)
y_test = np.argmax(testy, axis=1) 

# CONFUSION MATRIX

In [None]:
cm = confusion_matrix(y_test, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot()
plt.show() 

# DISPLAYING PRECISION, RECALL AND AUC

In [None]:
print('precision score : ', precision_score(y_test, y_pred, average = 'micro'),
      '\n', 'recall score : ', recall_score(y_test, y_pred, average = 'micro'))

In [None]:
from sklearn import metrics
fpr, tpr, thresholds = metrics.roc_curve(y_test, y_pred, pos_label=1)
roc_auc = metrics.auc(fpr, tpr) 
roc_auc 

In [None]:
display = metrics.RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc)
display.plot()