In [None]:
import librosa
import librosa.display
import IPython.display as ipd
import os
import numpy as np
import pandas as pd
from ipywidgets import Dropdown, Button, Output, VBox
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
import joblib
from tensorflow.keras import layers, models
import sounddevice as sd
from scipy.io.wavfile import write
import time
import tempfile

# Function to display the available files in the directory
def list_audio_files(directory):
    return [file for file in os.listdir(directory) if file.endswith('.wav')]

# Feature extraction function with labeled MFCCs
def feature_extraction(file_path):
    """Extracts MFCC features from an audio file."""
    # Load the audio file
    x, sample_rate = librosa.load(file_path, res_type='kaiser_fast')
    
    # Extract MFCC features (22 coefficients)
    mfcc = np.mean(librosa.feature.mfcc(y=x, sr=sample_rate, n_mfcc=25).T, axis=0)
    
    # Label MFCC features (MFCC Coefficient 1, MFCC Coefficient 2, etc.)
    mfcc_labels = [f'MFCC_Coefficient_{i+1}' for i in range(len(mfcc))]
    return mfcc, mfcc_labels

# Directories containing the audio files for Healthy Controls and PwPD
hc_directory = r'C:\Users\S M N RAZA\Downloads\HC_AH1\HC_AH'   # Update with your HC directory path
pwpd_directory = r'C:\Users\S M N RAZA\Downloads\PD_AH1\PD_AH'     # Update with your PwPD directory path

# List all audio files in both directories
hc_audio_files = list_audio_files(hc_directory)
pwpd_audio_files = list_audio_files(pwpd_directory)

# Create an empty DataFrame to store all MFCC features
mfcc_df = pd.DataFrame()

# Extract features from Healthy Controls
for audio_file in hc_audio_files:
    audio_path = os.path.join(hc_directory, audio_file)
    mfcc_features, mfcc_labels = feature_extraction(audio_path)
    temp_df = pd.DataFrame(mfcc_features.reshape(1, -1), columns=mfcc_labels)
    temp_df['Audio_File'] = audio_file
    temp_df['Label'] = 'HC' 
    mfcc_df = pd.concat([mfcc_df, temp_df], ignore_index=True)

# Extract features from PwPD
for audio_file in pwpd_audio_files:
    audio_path = os.path.join(pwpd_directory, audio_file)
    mfcc_features, mfcc_labels = feature_extraction(audio_path)
    temp_df = pd.DataFrame(mfcc_features.reshape(1, -1), columns=mfcc_labels)
    temp_df['Audio_File'] = audio_file
    temp_df['Label'] = 'PwPD'  
    mfcc_df = pd.concat([mfcc_df, temp_df], ignore_index=True)

# Prepare data for model training
X = mfcc_df.drop(columns=['Audio_File', 'Label'])
y = mfcc_df['Label']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Random Forest model
rfc_model = RandomForestClassifier(random_state=42)
rfc_model.fit(X_train, y_train)

# Initialize and train the SVM model
svm_model = SVC(probability=True)
svm_model.fit(X_train, y_train)

# Initialize and train the KNN model
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(X_train, y_train)

# Make predictions with all models
rfc_pred = rfc_model.predict(X_test)
svm_pred = svm_model.predict(X_test)
knn_pred = knn_model.predict(X_test)

# Print classification reports and confusion matrices
print("RFC Classification Report:")
print(classification_report(y_test, rfc_pred))

print("RFC Confusion Matrix:")
print(confusion_matrix(y_test, rfc_pred))

print("SVM Classification Report:")
print(classification_report(y_test, svm_pred))

print("SVM Confusion Matrix:")
print(confusion_matrix(y_test, svm_pred))

print("KNN Classification Report:")
print(classification_report(y_test, knn_pred))

print("KNN Confusion Matrix:")
print(confusion_matrix(y_test, knn_pred))

# Save the trained models
joblib.dump(rfc_model, 'rfc_trained_model.joblib')
joblib.dump(svm_model, 'svm_trained_model.joblib')
joblib.dump(knn_model, 'knn_trained_model.joblib')

# Prepare data for CNN
X_cnn = X.values.reshape(X.shape[0], X.shape[1], 1)
X_train_cnn, X_test_cnn, y_train_cnn, y_test_cnn = train_test_split(X_cnn, y, test_size=0.2, random_state=42)


def build_cnn_model(input_shape):
    model = models.Sequential()
    model.add(layers.Input(shape=input_shape))
    model.add(layers.Conv1D(32, kernel_size=3, activation='relu'))
    model.add(layers.MaxPooling1D(pool_size=2))
    model.add(layers.Conv1D(64, kernel_size=3, activation='relu'))
    model.add(layers.MaxPooling1D(pool_size=2))
    model.add(layers.Flatten())
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model


cnn_model = build_cnn_model((X_train_cnn.shape[1], 1))
cnn_model.fit(X_train_cnn, y_train_cnn.map({'HC': 0, 'PwPD': 1}).values, epochs=10, batch_size=32, validation_data=(X_test_cnn, y_test_cnn.map({'HC': 0, 'PwPD': 1}).values))


y_pred_cnn = (cnn_model.predict(X_test_cnn) > 0.5).astype("int32").reshape(-1)
print("CNN Classification Report:")
print(classification_report(y_test_cnn.map({'HC': 0, 'PwPD': 1}), y_pred_cnn))
print("CNN Confusion Matrix:")
print(confusion_matrix(y_test_cnn.map({'HC': 0, 'PwPD': 1}), y_pred_cnn))


cnn_model.save('cnn_parkinsons_model.h5')


def display_audio_and_classify(audio_file, directory):
    audio_path = os.path.join(directory, audio_file)
    print(f"Playing: {audio_file}")
    ipd.display(ipd.Audio(audio_path))
    
    mfcc_features, mfcc_labels = feature_extraction(audio_path)
    mfcc_df_single = pd.DataFrame(mfcc_features.reshape(1, -1), columns=mfcc_labels)
    
    rfc_prediction = rfc_model.predict(mfcc_df_single)[0]
    svm_prediction = svm_model.predict(mfcc_df_single)[0]
    knn_prediction = knn_model.predict(mfcc_df_single)[0]
    cnn_prediction = (cnn_model.predict(mfcc_df_single.values.reshape(1, -1, 1)) > 0.5).astype("int32")[0][0]

    print(f"Random Forest Prediction for {audio_file}: {'Parkinson Disease (PwPD)' if rfc_prediction == 'PwPD' else 'Healthy Control (HC)'}")
    print(f"SVM Prediction for {audio_file}: {'Parkinson Disease (PwPD)' if svm_prediction == 'PwPD' else 'Healthy Control (HC)'}")
    print(f"KNN Prediction for {audio_file}: {'Parkinson Disease (PwPD)' if knn_prediction == 'PwPD' else 'Healthy Control (HC)'}")
    print(f"CNN Prediction for {audio_file}: {'Parkinson Disease (PwPD)' if cnn_prediction == 1 else 'Healthy Control'}")


audio_dropdown = Dropdown(
    options=hc_audio_files + pwpd_audio_files,
    description='Select Audio:'
)
classify_button = Button(description='Classify')
output = Output()

# Function to record live audio 
def record_audio(duration=5, sample_rate=22050):
    print("Recording...")
    audio = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1)
    sd.wait()  # Wait until the recording is finished
    print("Recording complete.")
    
    # Save the audio to a temporary file
    temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
    write(temp_file.name, sample_rate, audio)  # Save as .wav file
    return temp_file.name

# Button to capture live audio and classify it
record_button = Button(description="Record and Classify Live Audio")



ValueError: at least one array or dtype is required

In [None]:
# Button click 
def on_classify_button_clicked(b):
    with output:
        output.clear_output()
        selected_audio = audio_dropdown.value
        directory = hc_directory if selected_audio in hc_audio_files else pwpd_directory
        display_audio_and_classify(selected_audio, directory)

# Button click 
def on_record_button_clicked(b):
    with output:
        output.clear_output()
        temp_file_path = record_audio(duration=5)  # Record 5 seconds of audio
        print("Classifying recorded audio...")
        display_audio_and_classify(temp_file_path, os.path.dirname(temp_file_path))

# Bind the button 
classify_button.on_click(on_classify_button_clicked)
record_button.on_click(on_record_button_clicked)

# Display the widgets
display(VBox([audio_dropdown, classify_button, record_button, output]))

VBox(children=(Dropdown(description='Select Audio:', options=('AH_064F_7AB034C9-72E4-438B-A9B3-AD7FDA1596C5.wa…