# Phase 2 Notebook

In [None]:
import helperFunctions
import warnings
warnings.filterwarnings('ignore')
import tensorflow.keras.utils as kutil
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.preprocessing import LabelEncoder as le

## Step 1: Data Importing
This step creates a dataframe with start, end, (cycle start end) crackles, wheezes, pid, and filename

In [None]:
completeData = helperFunctions.getCompleteData()
completeData

## Step 2: Data Preprocessing
This step splits the audio into cycles and updates the dataframe with the new filenames. In order to split up audio, we must choose a standard length for all audio samples, however a sample too large would be difficult to process and a sample too small may eliminate too much data. We chose 6 seconds as a standard length for all audio samples (see figure).

In [None]:
helperFunctions.getCycleGraph(completeData, maxLength=6)

In [None]:
# this will take a long time (~20 minutes)
splitData = helperFunctions.getSplitData(completeData, maxLength=6)
splitData

## Step 3: Data Categorization and Division
this step adds categories to each cycle based on the presence of crackles and wheezes. The categories are as follows: none, crackles only, wheezes only, and both crackles and wheezes. The categories are added to the dataframe as a new column. Furthermore, the data is divided into training and testing sets based on each category. As seen in the "Category Distribution" figure, the dataset is imbalanced.

In [None]:
categorizedData = helperFunctions.getCategorizedData(splitData)
categorizedData

In [None]:
trainData, testData = helperFunctions.getTestTrainSplit(categorizedData, testSize=0.2)


## Step 4: Feature Extraction and Modeling
This creates CNN and RNN models for three different features, MFCC, STFT, and Mel-Spectrogram which are all ways of compressing audio information. Furthermore, this step creates a dense network, combining all three features into a singular CNN and RNN models.

### Step 4.1: Mel-Frequency Cepstral Coefficients (MFCC)

In [None]:
mfccCnnModel, mfccRnnModel, mfccTrain, mfccTest = helperFunctions.getMFCCModel(trainData, testData)

In [None]:
helperFunctions.createMFCCPlot(mfccTrain, index=420)

#### Step 4.1.1: MFCC CNN Model

In [None]:
mfccCnnModel.summary()

#### Step 4.1.2: MFCC RNN Model

In [None]:
mfccRnnModel.summary()

### Step 4.2: Short-Time Fourier Transform (STFT)

In [None]:
stftCnnModel, stftRnnModel, stftTrain, stftTest = helperFunctions.getSTFTModel(trainData, testData)

In [None]:
helperFunctions.createSTFTPlot(stftTrain, index=420)

#### Step 4.2.1: STFT CNN Model

In [None]:
stftCnnModel.summary()

#### Step 4.2.2: STFT RNN Model

In [None]:
stftRnnModel.summary()

### Step 4.3: Mel-Spectrogram

In [None]:
melCnnModel, melRnnModel, melTrain, melTest = helperFunctions.getMELModel(trainData, testData)

In [None]:
helperFunctions.createMELPlot(melTrain, index=420)

#### Step 4.3.1: Mel-Spectrogram CNN Model

In [None]:
melCnnModel.summary()

#### Step 4.3.2: Mel-Spectrogram RNN Model

In [None]:
melRnnModel.summary()

### Step 4.4: Dense Network

#### Step 4.4.1: Dense CNN Model

In [None]:
denseCnnModel = helperFunctions.getDenseModel(mfccCnnModel, stftCnnModel, melCnnModel)

In [None]:
denseCnnModel.summary()

#### Step 4.4.2: Dense RNN Model

In [None]:
denseRnnModel = helperFunctions.getDenseModel(mfccRnnModel, stftRnnModel, melRnnModel)

In [None]:
denseRnnModel.summary()

#### Step 4.4.3: Dense Combined Model

In [None]:
denseCombinedModel = helperFunctions.getDenseModel2(mfccCnnModel, stftCnnModel, melCnnModel, mfccRnnModel, stftRnnModel, melRnnModel)

In [None]:
denseCombinedModel.summary()

## Step 5: Data Training
This step uses the models created in step 4 and trains them on the training data. The training data is then tested on the testing data and the results are plotted. Figures showing the accuracy and loss of each model are shown below.

In [None]:
dtr, dte = helperFunctions.encode(trainData.category, testData.category)

### Step 5.1: MFCC

#### Step 5.1.1: MFCC CNN

In [None]:
mfccCnnModel, mfccCnnHistory = helperFunctions.trainModel(mfccCnnModel, dtr, dte, mfccTrain, mfccTest)

In [None]:
helperFunctions.plotModel(mfccCnnHistory)

In [None]:
mfccCnnModel.evaluate(mfccTest, dte)

#### Step 5.1.2: MFCC RNN

In [None]:
mfccRnnModel, mfccRnnHistory = helperFunctions.trainModel(mfccRnnModel, dtr, dte, mfccTrain, mfccTest)

In [None]:
helperFunctions.plotModel(mfccRnnHistory)

In [None]:
mfccRnnModel.evaluate(mfccTest, dte)

### Step 5.2: STFT

#### Step 5.2.1: STFT CNN

In [None]:
stftCnnModel, stftCnnHistory = helperFunctions.trainModel(stftCnnModel, dtr, dte, stftTrain, stftTest)

In [None]:
helperFunctions.plotModel(stftCnnHistory)

In [None]:
stftCnnModel.evaluate(stftTest, dte)

#### Step 5.2.2: STFT RNN

In [None]:
stftRnnModel, stftRnnHistory = helperFunctions.trainModel(stftRnnModel, dtr, dte, stftTrain, stftTest)

In [None]:
helperFunctions.plotModel(stftRnnHistory)

In [None]:
stftRnnModel.evaluate(stftTest, dte)

### Step 5.3: Mel

#### Step 5.3.1: Mel-Spectrogram CNN

In [None]:
melCnnModel, melCnnHistory = helperFunctions.trainModel(melCnnModel, dtr, dte, melTrain, melTest)

In [None]:
helperFunctions.plotModel(melCnnHistory)

In [None]:
melCnnModel.evaluate(melTest, dte)

#### Step 5.3.2: Mel-Spectrogram RNN

In [None]:
melRnnModel, melRnnHistory = helperFunctions.trainModel(melRnnModel, dtr, dte, melTrain, melTest)

In [None]:
helperFunctions.plotModel(melRnnHistory)

In [None]:
melRnnModel.evaluate(melTest, dte)

### Step 5.4: Dense

#### Step 5.4.1: Dense CNN

In [None]:
denseCnnModel, denseCnnHistory = helperFunctions.trainModel(denseCnnModel, dtr, dte, [mfccTrain, stftTrain, melTrain], [mfccTest, stftTest, melTest])

In [None]:
helperFunctions.plotModel(denseCnnHistory)

In [None]:
denseCnnModel.evaluate([mfccTest, stftTest, melTest], dte)

In [None]:
helperFunctions.plotClassificationAndConfusion(denseCnnModel, [mfccTest, stftTest, melTest], dte)

#### Step 5.4.2: Dense RNN

In [None]:
denseRnnModel, denseRnnHistory = helperFunctions.trainModel(denseRnnModel, dtr, dte, [mfccTrain, stftTrain, melTrain], [mfccTest, stftTest, melTest])

In [None]:
helperFunctions.plotModel(denseRnnHistory)

In [None]:
denseRnnModel.evaluate([mfccTest, stftTest, melTest], dte)

In [None]:
helperFunctions.plotClassificationAndConfusion(denseRnnModel, [mfccTest, stftTest, melTest], dte)

#### Step 5.4.3: Dense Combined

In [None]:
denseCombinedModel, denseCombinedHistory = helperFunctions.trainModel(denseCombinedModel, dtr, dte, [mfccTrain, stftTrain, melTrain, mfccTrain, stftTrain, melTrain], [mfccTest, stftTest, melTest, mfccTest, stftTest, melTest])

In [None]:
helperFunctions.plotModel(denseCombinedHistory)

In [None]:
denseCombinedModel.evaluate([mfccTest, stftTest, melTest, mfccTest, stftTest, melTest], dte)

In [None]:
helperFunctions.plotClassificationAndConfusion(denseCombinedModel, [mfccTest, stftTest, melTest, mfccTest, stftTest, melTest], dte)