<a href="https://colab.research.google.com/github/warrengmatin/CSEE_903_GroupProject/blob/main/Demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Clone Github Repo

In [9]:
from google.colab import userdata
import os

# Path to the directory you want to delete
dir_path = '/content/CSEE_903_GroupProject'

# Deleting the directory
if os.path.isdir(dir_path):
  !rm -rf {dir_path}

%cd /content/
!git clone https://github.com/warrengmatin/CSEE_903_GroupProject.git
%cd {dir_path}

print('List files: ', os.listdir())

/content
Cloning into 'CSEE_903_GroupProject'...
remote: Enumerating objects: 118, done.[K
remote: Counting objects: 100% (118/118), done.[K
remote: Compressing objects: 100% (96/96), done.[K
remote: Total 118 (delta 43), reused 66 (delta 18), pack-reused 0[K
Receiving objects: 100% (118/118), 26.50 MiB | 22.43 MiB/s, done.
Resolving deltas: 100% (43/43), done.
/content/CSEE_903_GroupProject
List files:  ['Demo.ipynb', 'LICENSE', '.git', 'cough_segmentation_project', 'Setup_Project_in_Github_Repo.ipynb', 'README.md']


# Load Audio File

In [30]:
from google.colab import files
import librosa
import numpy as np
import pandas as pd
import IPython.display as ipd

path_dir = "cough_segmentation_project/cough_segmentation_package/"
path_utils = f"{path_dir}utils/"
path = f"{path_dir}data/saved/"

%load_ext autoreload
%autoreload 2

import sys
sys.path.append(path_utils)

def get_audio_files( path ):
  print("******************************")
  print("A. Data Loading")
  print("******************************")
  xx = input("Enter: \n 1. Upload Audio Test File in .wav format \n 2. Run test with sample audio files \n 3. Run test with pre-labelled test data \n\n")
  if xx.isnumeric():
    x = int(xx)
  else:
    x = 3

  dfiles = []
  pre_label = False
  if x == 1:
    print("Select an Audio File in wav format")
    uploaded = files.upload()
    # Assume the uploaded file is named 'audio_file.wav'
    dfiles.append( list(uploaded.keys())[0] )
  elif x == 2:
    for filename in os.listdir(path):
      if filename.endswith('.wav'):
          dfiles.append(os.path.join(path, filename))
  else:
    pre_label = True

  if pre_label or len(dfiles):
    if pre_label:
      df = pd.read_feather(f"{path}audio_data_12_before_framing_16Khz.feather")
      df['key'] = df['index']
      df.drop(columns=['index'], inplace=True)
    else:
      dic = {"key":[], "amp":[], "sf":[], "cough_start_end":[], "label":[]}
      for file_name in dfiles:
        # Extract the file name
        file_name_only = os.path.basename(file_name)

        print(f'Loading the audio file: {file_name_only}')

        # Extract the file name without the extension
        if os.path.splitext(file_name_only)[1].lower() == '.wav':
          file_name_only = os.path.splitext(file_name_only)[0]
          amp, sf = librosa.load(file_name, sr=None)
          sf_resampled = 16000
          amp_resampled = librosa.resample(amp, orig_sr=sf, target_sr=sf_resampled)
          dic["key"].append(file_name_only)
          dic["amp"].append(amp_resampled)
          dic["sf"].append(sf_resampled)
          dic["label"].append( list(np.zeros(amp_resampled.shape[0])) )
          dic["cough_start_end"].append(None)
        else:
          print("Only wav format are accepted\n")

      df = pd.DataFrame(dic)

    df.set_index('key', inplace=True)
    return df
  else:
    print("No file found")

def process(dfl):
  if dfl.shape[0] > 0:
    print("******************************")
    print("B. Create 1024 Frame Size")
    print("******************************")
    from framing import Framing
    frame = Framing()
    df_frame = frame.apply_framing(audio_df=dfl, frame_sizes=[1024])[1024]
    visual_set = {"1st":df_frame.head(1)["key"].values[0],"Last":df_frame.tail(1)["key"].values[0]}
    frame.visualize_overlapping_frames(title='Test to Validate Overlapping Frames', framed_df=df_frame, visual_set=visual_set, figsize=(6,2))

    metrics = False
    if len(df_frame[df_frame["label"]==1]) > 0:
      metrics = True


    print("******************************")
    print("C. Model Prediction")
    print("******************************")
    import joblib
    xx = input("Enter: \n 1. Spectrogram with CNN \n 2. Mel-spectrogram with CNN \n 3. MFCC, ZCR & RMS with Logistic Regression \n\n")
    if xx.isnumeric():
      x = int(xx)
    else:
      x = 1

    files = []
    if x < 3:
      import tensorflow as tf
      from sono_test_nn_model import SonoTestNNModel
      sono_test_nn = SonoTestNNModel()

      if x == 1:
        print("\nCNN Spectrogram\n")
        model_path = f"{path}cnn_model_spectrogram_dropout_7_batch784_epoch10.h5"
      else:
        print("\nCNN Mel-spectrogram\n")
        model_path = f"{path}cnn_model_melspectrogram_drop5_5_batch784_epoch10.h5"

      # Load the model
      cnn = tf.keras.models.load_model(model_path)
      # Display the model architecture
      #cnn.summary()
      if x == 1:
        sono_test_nn.predict_nn(cnn, df_frame, metrics=metrics, debug=False)
      else:
        sono_test_nn.predict_nn(cnn, df_frame, percent_proba=0.5, key='mel', metrics=metrics, debug=False)

    else:
      print("\nLogistic Regression\n")
      from sono_test_ml_model import SonoTestMlModel
      sono_test_ml = SonoTestMlModel()
      # Logistic Regression
      model_path = f"{path}logisticbest_params_default.sav"
      scaler_path = f"{path}logistic_scaler.pkl"
      LR = joblib.load(model_path)
      mdr = sono_test_ml.predict_ml(model=LR, df_frame=df_frame, scaler=scaler_path, metrics=metrics, debug=False)

    print("******************************")
    print("D. Play Audio File")
    print("******************************")
    for index, row in dfl.iterrows():
      print('Audio', index)
      display(ipd.Audio(row["amp"], rate=row["sf"]))
      print("\n")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Process Sono

In [31]:
process(get_audio_files(path=path))

Output hidden; open in https://colab.research.google.com to view.