In [1]:
# Inception Model for Parkinson's Disease detection from voice spectral data.  
# The Inception V3 model, pretrained on Imagenet is adapted using transfer learning to
# extract features from spectrogram images of the sustained vowel /a/ to distinguish people 
# with Parkinson’s Disease (PwPD) and healthy controls (HC).  Audio files were preprocessed using
# the R packages Create_Liner(Mel)Spectrograms_(dataset). 
# Spectra must be oranized into a directory structure as described in 
# https://vijayabhaskar96.medium.com/tutorial-image-classification-with-keras-flow-from-directory-and-generators-95f75ebe5720
# The pathname to this directory strucutre must be changed for each data set being analyzed.
# For comparison with other analyses performeed on these data, training for transfer learing is re-initialized on each of 100 iterations retaining ROC data.


# Copyright (C) 2024 University of Arkansas for Medical Sciences
# Author: Anu Iyer, Fred Prior, PhD FWPrior@uams.edu
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.

# You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.

In [2]:
import pandas as pd 
import numpy as np
import tensorflow as tf
import timeit
import datetime
import math
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense, Dropout, Flatten, BatchNormalization, Conv2D, MaxPooling2D, Input
from tensorflow.keras.optimizers import Adam
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import classification_report, confusion_matrix

from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.models import Model
from keras.callbacks import EarlyStopping

from sklearn import metrics
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from matplotlib import pyplot as plt
from sklearn.metrics import roc_curve

In [None]:
#Start
start = timeit.default_timer()
data_path = '../../mPowerHiFreqData' # Change Pathname for each experiment 
img_rows = 600
img_cols = 600
batch_size = 4
epochs = 10
n_runs = 100 

all_auc = []
for run in range(n_runs):
    print("run={}".format(run))    
    # Load data for each run. 
    train_datagen = ImageDataGenerator(rescale=1. / 255,
                                       fill_mode='nearest',
                                       validation_split=0.3)
    
    train_generator = train_datagen.flow_from_directory(data_path,
                                                        target_size=(img_rows, img_cols),
                                                        batch_size=batch_size,
                                                        class_mode='categorical',
                                                        shuffle=True)

    validation_generator = train_datagen.flow_from_directory(data_path,
                                                            target_size=(img_rows, img_cols),
                                                            batch_size=batch_size,
                                                            class_mode='categorical',
                                                            shuffle = False)

    # load pre-trained InceptionV3
    pre_trained = InceptionV3(weights='imagenet', include_top=False, input_shape=(600,600,3), pooling='avg')
    # Remove classifier and replace with a simple MLP
    for layer in pre_trained.layers:
        layer.trainable = False

    x = pre_trained.output
    x = BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001)(x)
    x = Dense(1024, activation='relu')(x)
    x = Dense(1024, activation='relu')(x)
    x = Dropout(0.2)(x)

    predictions = Dense(2, activation='sigmoid')(x) #softmax for multiclass

    model = Model(inputs = pre_trained.input, outputs = predictions)
    model.compile(loss='categorical_crossentropy',
                  optimizer = Adam(learning_rate=0.001),
                  metrics=['accuracy'],
                 )

    #Train
    newmodel=model.fit(train_generator,
                    steps_per_epoch=train_generator.samples // batch_size,
                    epochs=epochs,
                    validation_data=validation_generator,
                    validation_steps=validation_generator.samples // batch_size,
                    callbacks=[
                        tf.keras.callbacks.ModelCheckpoint(filepath = 'mPowerHiFreqData2model_{accuracy:.3f}.h5', save_best_only=True,
                        save_weights_only=False, monitor='accuracy')
                    ])
                    
        
    Y_pred = model.predict(validation_generator, batch_size)
    y_pred = np.argmax(Y_pred, axis=1)
    # Generate ROC curve values: fpr, tpr, thresholds
    fpr, tpr, thresholds = metrics.roc_curve(validation_generator.classes, y_pred)
    metrics.auc(fpr, tpr)
    print("auc: {}".format(round(metrics.auc(fpr, tpr), 2)))
    all_auc.append(metrics.auc(fpr, tpr))
    
    print("\n")

        
model.save("mPowerHiFreqData2Best.h5")
print("\n")
#  Record Runtime

stop = timeit.default_timer()
print('RunTime: ', round(stop - start, 2), 'Seconds')
print("\n")

#Confution Matrix and Classification Report
Y_pred = model.predict(validation_generator, batch_size)
y_pred = np.argmax(Y_pred, axis=1)
print('Confusion Matrix')
print(confusion_matrix(validation_generator.classes, y_pred))
print('Classification Report:')
target_names = ['healthy', 'parkinson']
print(classification_report(validation_generator.classes, y_pred, target_names=target_names))

print("avg auc: {} ({})".format(np.round(np.average(all_auc), 4), np.round(np.std(all_auc), 3)))

In [None]:
# Plot training data

accs = newmodel.history['accuracy']
val_accs = newmodel.history['val_accuracy']

plt.plot(range(len(accs)),accs, label = 'Training_accuracy')
plt.plot(range(len(accs)),val_accs, label = 'Validation_accuracy')
plt.legend()
plt.show()

accs = newmodel.history['loss']
val_accs = newmodel.history['val_loss']

plt.plot(range(len(accs)),accs, label = 'Training_loss')
plt.plot(range(len(accs)),val_accs, label = 'Validation_loss')
plt.legend()
plt.show()


# Print and Store AUC vector

print('AUC vector: ', all_auc)

DF = pd.DataFrame(all_auc) # convert array into dataframe 
  
DF.to_csv("mPowerHiFreqData2AUC.csv") # save the dataframe as a csv file 
