# BREATHING WAVE
## DEEP LEARNING - LSTM
### 04 March 2023

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf

df = pd.read_csv("breathing_waveform_data.csv").iloc[:, :-1] # get rid of last column ("notes")

In [None]:
df

In [None]:
df.iloc[:, :-1] += 0.01

In [None]:
df

In [None]:
X = df.iloc[:, :-1]
Y = df.iloc[:, -1]

# Check if the data do not have any NULL 
print("X have a null? \t{}".format(X.isnull().values.any()))
print("Y have a null? \t{}".format(Y.isnull().values.any()))

In [None]:
X

In [None]:
Y.value_counts()

### Program Starting
# PART 1 : Data Preprocessing

## Hot Encoded The Label Data 

In [None]:
from sklearn.preprocessing import LabelEncoder
from keras.utils import np_utils

# encode class values as integers [0,0,0,0,0,0,0,1,1,1,1,1,2,2,2,2]
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)

# convert integers to dummy variables (i.e. one hot encoded)
hot_y = np_utils.to_categorical(encoded_Y)

In [None]:
hot_y

## Extract using MFCC (if you not want, just skip this)

In [None]:
import librosa
from tqdm import tqdm

def extract_mfcc(df_, sr=60, n_mfcc=85):
  df_mfcc = []
  with tqdm(total=df_.shape[0]) as pbar: 
      for i,row in df_.iterrows():
        pbar.update(1)
        y = np.array(row).astype(np.float32)
        #print("y : {}".format(y))
        #print("y shape: {}".format(np.array(y).shape))
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
        #print("mfccs before squeze : {}".format(mfccs))
        #print("mfccs before squeze : {}".format(np.array(mfccs).shape))
        
        mfccs = np.squeeze(mfccs, axis=1)
        #print("mfccs after squeze: {}".format(mfccs))
        #print("mfccs after squeze : {}".format(np.array(mfccs).shape))
        
        df_mfcc.append([*mfccs])
        #print("df_mfcc : {}".format(df_mfcc))
        #print("df_mfcc shape : {}".format(np.array(df_mfcc).shape))
      df_mfcc = pd.DataFrame(df_mfcc, columns=[*np.arange(0,85)])
  return df_mfcc

In [None]:
X = extract_mfcc(X)

## Scale The Training Data (STD)

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X = sc.fit_transform(X)

## Reshaping The Training Data to 3-Dimensional Numpy Array
### STRUCTURE : (batch_size, timestep, feature)

In [None]:
timestep = 5
X = np.reshape(X, (X.shape[0], int(85/timestep), timestep))
# (26400, 17, 5)
# 5 indicator will be used per sequence/timestep per sample/row

## Train Test Split (80% training 20% Testing)
### REMEMBER : seed must same (random_state = 21)

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, hot_y, test_size=.2, random_state=21)

# PART 2 : Building The RNN

In [None]:
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import Dropout

## Creating Layer of RNN

In [None]:
# Configuration for Model Structure
from keras.optimizers import Adam
_optimizer = Adam()
_loss = "categorical_crossentropy"
_metric = ["accuracy"]

In [None]:
classifier = Sequential()

# first layer
classifier.add(LSTM(units=60, return_sequences=True, input_shape=(17, 5)))
classifier.add(Dropout(0.2))    # Ignore 20% of the neuron (ex. 50 * 20% = 10 neuoron will be ignored) 

# second layer
classifier.add(LSTM(units=60, return_sequences=True))
classifier.add(Dropout(0.2))

# third layer
# classifier.add(LSTM(units=20, return_sequences=True))
# classifier.add(Dropout(0.2))

# fourth layer
classifier.add(LSTM(units=60))
classifier.add(Dropout(0.2))

# last layer
classifier.add(Dense(units=5, activation='softmax'))

# Compile
classifier.compile(optimizer=_optimizer, loss=_loss, metrics=_metric)

# Plot Summary of Model
classifier.summary()

# PART 3 : Training Time

## Train the Model

In [None]:
# Setting for training arguments (epoch, batch_size)
ep = 15        # epoch
bt = 32        # batch_size
# Without Cross-Validation
classifier.fit(X_train, Y_train, epochs=ep, batch_size=bt, validation_data=(X_test, Y_test))

## Evaluate Model

In [None]:
# evaluate the model
score = classifier.evaluate(X_test, Y_test)
print("Accuracy \t: {:.2f}".format(score[1]*100))
print("Loss \t\t: {:.2f}".format(score[0]*100))

In [None]:
pred = classifier.predict(X_test)

In [None]:
y_true = np.argmax(Y_test, axis=1)
y_pred = np.argmax(pred, axis=1)

## Plot Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt

# Define the confusion matrix
conf_matrix = confusion_matrix(y_true, y_pred)

# Plot the confusion matrix
plt.imshow(conf_matrix, cmap=plt.cm.Greens)

# Add labels to the plot
tick_marks = np.arange(len(conf_matrix))
plt.xlabel('Predicted label')
plt.ylabel('True label')

# Add values to the plot
for i in range(len(conf_matrix)):
    for j in range(len(conf_matrix)):
        plt.text(j, i, conf_matrix[i, j], ha='center', va='center')

# Show the plot
plt.show()

## Saving the model into a file

In [None]:
import os

# saving the model
filename = "{}\\{}\\{}.h5".format(os.getcwd(), "MODELS\\[3-layer] - 3L1", _optimizer)
classifier.save(filename)

# PART 4 : Testing the Loaded Model

In [None]:
from tensorflow.keras.models import load_model

# _optimizer = "Adam"
filename = "{}\\{}\\{}.h5".format(os.getcwd(), "MODELS\\[3-layer] - 3L1", _optimizer)

# load model
loaded_model = load_model(filename)

## evaluate the model

In [None]:
score = loaded_model.evaluate(X_test, Y_test)
print("Accuracy \t: {:.2f}".format(score[1]*100))
print("Loss \t\t: {:.2f}".format(score[0]*100))