In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import glob
import os

import librosa.display
import soundfile

# 0. Overview

1. Spectrogram
2. Mel Spectrogram
3. MFCC
4. Chromagram
5. Combinition 1: Spectrogram(mean) with Spectral centroid and bandwidth
6. Combinition 2: Combinition1 + Mel Spectrogram(mean) + MFCC + Chromagram(mean)

# Wrap related functions into a file: src/feature_extraction.py

```python
def feature_spectrogram(waveform, sample_rate):
    # power spoectrogram (2D full spectrogram)
    stft_spectrum_matrix = librosa.stft(waveform)
    spectrogram = librosa.amplitude_to_db(np.abs(stft_spectrum_matrix), ref=np.max)
    return spectrogram

def feature_spectrogram_mean(waveform, sample_rate):
    # power spoectrogram (get the mean of each column)
    stft_spectrum_matrix = librosa.stft(waveform)
    spectrogram_mean = np.mean(librosa.amplitude_to_db(np.abs(stft_spectrum_matrix), ref=np.max).T,axis=0)
    return spectrogram_mean

def feature_centroid(waveform, sample_rate):
    centroid = librosa.feature.spectral_centroid(y=waveform, sr=sample_rate)
    return centroid

def feature_bandwidth(waveform, sample_rate):
    bandwidth = librosa.feature.spectral_bandwidth(y=waveform, sr=sample_rate)
    return bandwidth

def feature_melspectrogram(waveform, sample_rate):
    # Mel spoectrogram (2D full spectrogram)
    melspectrogram=librosa.feature.melspectrogram(y=waveform, sr=sample_rate, n_mels=128, fmax=8000)
    return melspectrogram

def feature_melspectrogram_mean(waveform, sample_rate):
    # Mel spoectrogram (get the mean of each column)
    # Produce the mel spectrogram for all STFT frames and get the mean of each column of the resulting matrix to create a feature array
    # Using 8khz as upper frequency bound should be enough for most speech classification tasks
    melspectrogram=np.mean(librosa.feature.melspectrogram(y=waveform, sr=sample_rate, n_mels=128, fmax=8000).T,axis=0)
    return melspectrogram

def feature_mfcc(waveform, sample_rate):
    # Compute the MFCCs for all STFT frames and get the mean of each column of the resulting matrix to create a feature array
    # 40 filterbanks = 40 coefficients
    mfc_coefficients=np.mean(librosa.feature.mfcc(y=waveform, sr=sample_rate, n_mfcc=40).T, axis=0) 
    return mfc_coefficients

def feature_chromagram(waveform, sample_rate):
    # Chromagram (2D full spectrogram)
    stft_spectrogram=np.abs(librosa.stft(waveform))
    chromagram=librosa.feature.chroma_stft(S=stft_spectrogram, sr=sample_rate)
    return chromagram

def feature_chromagram_mean(waveform, sample_rate):
    # Chromagram (get the mean of each column)
    stft_spectrogram=np.abs(librosa.stft(waveform))
    # Produce the chromagram for all STFT frames and get the mean of each column of the resulting matrix to create a feature array
    chromagram_mean=np.mean(librosa.feature.chroma_stft(S=stft_spectrogram, sr=sample_rate).T,axis=0)
    return chromagram_mean
```

```python
def create_feature_set_1(file):
    # feature set 1: Power Spectrogram
    with soundfile.SoundFile(file) as audio:
        waveform = audio.read(dtype='float32')
        sample_rate = audio.samplerate

        # compute features
        spectrogram = feature_spectrogram(waveform, sample_rate)

        return spectrogram

def create_feature_set_2(file):
    # feature set 2: Mel Spectrogram
    with soundfile.SoundFile(file) as audio:
        waveform = audio.read(dtype='float32')
        sample_rate = audio.samplerate

        # compute features
        melspectrogram = feature_melspectrogram(waveform, sample_rate)

        return melspectrogram

def create_feature_set_3(file):
    # feature set 3: MFCC
    with soundfile.SoundFile(file) as audio:
        waveform = audio.read(dtype='float32')
        sample_rate = audio.samplerate

        # compute features
        mfcc = feature_mfcc(waveform, sample_rate)

        return mfcc

def create_feature_set_4(file):
    # feature set 4: Chromagram
    with soundfile.SoundFile(file) as audio:
        waveform = audio.read(dtype='float32')
        sample_rate = audio.samplerate

        # compute features
        chromagram = feature_chromagram(waveform, sample_rate)

        return chromagram

def create_feature_set_5(file):
    # feature set 5: Combination 1: spectrogram with spectral centroid and bandwidth
    with soundfile.SoundFile(file) as audio:
        waveform = audio.read(dtype='float32')
        sample_rate = audio.samplerate

        # compute features
        spectrogram_mean = feature_spectrogram_mean(waveform, sample_rate)
        centroid = feature_centroid(waveform, sample_rate)
        bandwidth = feature_bandwidth(waveform, sample_rate)
        # upper_bound = centroid[0] + bandwidth[0]
        # lower_bound = centroid[0] - bandwidth[0]
        
        feature_matrix=np.array([])
        # use np.hstack to stack our feature arrays horizontally to create a feature matrix
        # feature_matrix = np.hstack((spectrogram, upper_bound, lower_bound))
        feature_matrix = np.hstack((spectrogram_mean, centroid.flatten(), bandwidth.flatten()))
        
        return feature_matrix

def create_feature_set_6(file):
    # feature set 6: All
    with soundfile.SoundFile(file) as audio:
        waveform = audio.read(dtype='float32')
        sample_rate = audio.samplerate

        # compute features
        spectrogram_mean = feature_spectrogram_mean(waveform, sample_rate)
        centroid = feature_centroid(waveform, sample_rate)
        bandwidth = feature_bandwidth(waveform, sample_rate)
        # upper_bound = centroid[0] + bandwidth[0]
        # lower_bound = centroid[0] - bandwidth[0]

        melspectrogram_mean = feature_melspectrogram_mean(waveform, sample_rate)
        mfcc = feature_mfcc(waveform, sample_rate)
        chromagram_mean = feature_chromagram_mean(waveform, sample_rate)

        feature_matrix=np.array([])
        # use np.hstack to stack our feature arrays horizontally to create a feature matrix
        feature_matrix = np.hstack((spectrogram_mean, centroid.flatten(), bandwidth.flatten(), melspectrogram_mean, mfcc, chromagram_mean))

        return feature_matrix

```

```python
# Classes in the dataset
activities ={
  'I':0,
  'E':1
}
```

# 1. Have a look at the feature dimentions

In [2]:
from src.feature_extraction import *

In [3]:
# load a sample
with soundfile.SoundFile( r'..\0. data\2. Data_wav\E79.wav') as audio:
    waveform = audio.read(dtype='float32')
    sample_rate = audio.samplerate

In [4]:
spectrogram = feature_spectrogram(waveform, sample_rate)
spectrogram_mean = feature_spectrogram_mean(waveform, sample_rate)
centroid = feature_centroid(waveform, sample_rate)
bandwidth = feature_bandwidth(waveform, sample_rate)
# upper_bound = centroid[0] + bandwidth[0]
# lower_bound = centroid[0] - bandwidth[0]
melspectrogram = feature_melspectrogram(waveform, sample_rate)
melspectrogram_mean = feature_melspectrogram_mean(waveform, sample_rate)
mfcc = feature_mfcc(waveform, sample_rate)
chromagram = feature_chromagram(waveform, sample_rate)
chromagram_mean = feature_chromagram_mean(waveform, sample_rate)

In [5]:
spectrogram.shape

(1025, 142)

In [6]:
spectrogram_mean.shape

(1025,)

In [7]:
centroid.shape

(1, 142)

In [8]:
bandwidth.shape

(1, 142)

In [9]:
melspectrogram.shape

(128, 142)

In [10]:
melspectrogram_mean.shape

(128,)

In [11]:
mfcc.shape

(40,)

In [12]:
chromagram.shape

(12, 142)

In [13]:
chromagram_mean.shape

(12,)

# 2. Scaling, splitting and save the features

Data Split & Feature Transformation

In [14]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

## 2.1 Data Split

In [15]:
def load_data(feature_set_number):
    data_folder = r'..\0. data\2. Data_wav' 
    X,y=[],[]
    count = 0
    for file in glob.glob(data_folder+"\\*.wav"):
        file_name=os.path.basename(file)
        activity=activities[file_name[0]]
        if feature_set_number == 1:
            features = create_feature_set_1(file)
        elif feature_set_number == 2:
            features = create_feature_set_2(file)
        elif feature_set_number == 3:
            features = create_feature_set_3(file)
        elif feature_set_number == 4:
            features = create_feature_set_4(file)
        elif feature_set_number == 5:
            features = create_feature_set_5(file)
        elif feature_set_number == 6:
            features = create_feature_set_6(file)
            
        X.append(features)
        y.append(activity)
        count += 1
        # '\r' + end='' results in printing over same line
        print('\r' + f' Processed {count}/{300} audio samples',end=' ')
    # # Return arrays to plug into sklearn's cross-validation algorithms
    return np.array(X), np.array(y)

## 2.1.1 Feature set 1

In [16]:
X,y = load_data(feature_set_number=1)

X_train_1, X_test_1, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True)

# Output the train and test data size and shape
print(f"Train and Test Size {len(X_train_1)}, {len(X_test_1)}")
print("Feature set shape: ", X_train_1.shape)

 Processed 300/300 audio samples  Train and Test Size 240, 60
Feature set shape:  (240, 1025, 142)


## 2.1.2 Feature set 2

In [17]:
X,y = load_data(feature_set_number=2)

X_train_2, X_test_2, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True)

# Output the train and test data size and shape
print(f"Train and Test Size {len(X_train_2)}, {len(X_test_2)}")
print("Feature set shape: ", X_train_2.shape)

 Processed 300/300 audio samples Train and Test Size 240, 60
Feature set shape:  (240, 128, 142)


## 2.1.3 Feature set 3

In [18]:
X,y = load_data(feature_set_number=3)

X_train_3, X_test_3, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True)

# Output the train and test data size and shape
print(f"Train and Test Size {len(X_train_3)}, {len(X_test_3)}")
print("Feature set shape: ", X_train_3.shape)

 Processed 300/300 audio samples Train and Test Size 240, 60
Feature set shape:  (240, 40)


## 2.1.4 Feature set 4

In [19]:
X,y = load_data(feature_set_number=4)

X_train_4, X_test_4, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True)

# Output the train and test data size and shape
print(f"Train and Test Size {len(X_train_4)}, {len(X_test_4)}")
print("Feature set shape: ", X_train_4.shape)

 Processed 300/300 audio samples Train and Test Size 240, 60
Feature set shape:  (240, 12, 142)


## 2.1.5 Feature set 5

In [20]:
X,y = load_data(feature_set_number=5)

X_train_5, X_test_5, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True)

# Output the train and test data size and shape
print(f"Train and Test Size {len(X_train_5)}, {len(X_test_5)}")
print("Feature set shape: ", X_train_5.shape)

 Processed 300/300 audio samples Train and Test Size 240, 60
Feature set shape:  (240, 1309)


## 2.1.6 Feature set 6

In [21]:
X,y = load_data(feature_set_number=6)

X_train_6, X_test_6, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True)

# Output the train and test data size and shape
print(f"Train and Test Size {len(X_train_6)}, {len(X_test_6)}")
print("Feature set shape: ", X_train_6.shape)

 Processed 300/300 audio samples Train and Test Size 240, 60
Feature set shape:  (240, 1489)


## 2.2 Save all feature sets

In [22]:
y_train = pd.DataFrame(y_train)
y_test = pd.DataFrame(y_test)

X_train_1 = pd.DataFrame(X_train_1.reshape(240,-1))
X_test_1 = pd.DataFrame(X_test_1.reshape(60,-1))

X_train_2 = pd.DataFrame(X_train_2.reshape(240,-1))
X_test_2 = pd.DataFrame(X_test_2.reshape(60,-1))

X_train_3 = pd.DataFrame(X_train_3)
X_test_3 = pd.DataFrame(X_test_3)

X_train_4 = pd.DataFrame(X_train_4.reshape(240,-1))
X_test_4 = pd.DataFrame(X_test_4.reshape(60,-1))

X_train_5 = pd.DataFrame(X_train_5)
X_test_5 = pd.DataFrame(X_test_5)

X_train_6 = pd.DataFrame(X_train_6)
X_test_6 = pd.DataFrame(X_test_6)

In [23]:
# Save the data to Excel
data_path = 'features'

# Check if the directory exists, if not, create it
if not os.path.exists(data_path):
    os.makedirs(data_path)

# Save to csv files
file_path = os.path.join(data_path, 'y_train.csv')
y_train.to_csv(file_path)
print(f"File: {file_path}, saved")

file_path = os.path.join(data_path, 'y_test.csv')
y_test.to_csv(file_path)
print(f"File: {file_path}, saved")

file_path = os.path.join(data_path, 'X_train_1.csv')
X_train_1.to_csv(file_path)
print(f"File: {file_path}, saved")

file_path = os.path.join(data_path, 'X_test_1.csv')
X_test_1.to_csv(file_path)
print(f"File: {file_path}, saved")

file_path = os.path.join(data_path, 'X_train_2.csv')
X_train_2.to_csv(file_path)
print(f"File: {file_path}, saved")

file_path = os.path.join(data_path, 'X_test_2.csv')
X_test_2.to_csv(file_path)
print(f"File: {file_path}, saved")

file_path = os.path.join(data_path, 'X_train_3.csv')
X_train_3.to_csv(file_path)
print(f"File: {file_path}, saved")

file_path = os.path.join(data_path, 'X_test_3.csv')
X_test_3.to_csv(file_path)
print(f"File: {file_path}, saved")

file_path = os.path.join(data_path, 'X_train_4.csv')
X_train_4.to_csv(file_path)
print(f"File: {file_path}, saved")

file_path = os.path.join(data_path, 'X_test_4.csv')
X_test_4.to_csv(file_path)
print(f"File: {file_path}, saved")

file_path = os.path.join(data_path, 'X_train_5.csv')
X_train_5.to_csv(file_path)
print(f"File: {file_path}, saved")

file_path = os.path.join(data_path, 'X_test_5.csv')
X_test_5.to_csv(file_path)
print(f"File: {file_path}, saved")

file_path = os.path.join(data_path, 'X_train_6.csv')
X_train_6.to_csv(file_path)
print(f"File: {file_path}, saved")

file_path = os.path.join(data_path, 'X_test_6.csv')
X_test_6.to_csv(file_path)
print(f"File: {file_path}, saved")

File: features\y_train.csv, saved
File: features\y_test.csv, saved
File: features\X_train_1.csv, saved
File: features\X_test_1.csv, saved
File: features\X_train_2.csv, saved
File: features\X_test_2.csv, saved
File: features\X_train_3.csv, saved
File: features\X_test_3.csv, saved
File: features\X_train_4.csv, saved
File: features\X_test_4.csv, saved
File: features\X_train_5.csv, saved
File: features\X_test_5.csv, saved
File: features\X_train_6.csv, saved
File: features\X_test_6.csv, saved


# Wrap datasets into a file: src/data_loader.py

```python
# data_loader.py

import pandas as pd

def load_data():
    # Load y_train
    y_train = pd.read_csv('features/y_train.csv', index_col=0)
    # Load y_test
    y_test = pd.read_csv('features/y_test.csv', index_col=0)

    # Feature set 1: Spectrogram
    X_train_1 = pd.read_csv('features/X_train_1.csv', index_col=0)
    X_test_1 = pd.read_csv('features/X_test_1.csv', index_col=0)

    # Feature set 2: Melspectrogram
    X_train_2 = pd.read_csv('features/X_train_2.csv', index_col=0)
    X_test_2 = pd.read_csv('features/X_test_2.csv', index_col=0)

    # Feature set 3: MFCC
    X_train_3 = pd.read_csv('features/X_train_3.csv', index_col=0)
    X_test_3 = pd.read_csv('features/X_test_3.csv', index_col=0)
    
    # Feature set 4: Chromagram
    X_train_4 = pd.read_csv('features/X_train_4.csv', index_col=0)
    X_test_4 = pd.read_csv('features/X_test_4.csv', index_col=0)

    # Feature set 5: Combination 1
    X_train_5 = pd.read_csv('features/X_train_5.csv', index_col=0)
    X_test_5 = pd.read_csv('features/X_test_5.csv', index_col=0)

    # Feature set 6: Combination 2
    X_train_6 = pd.read_csv('features/X_train_6.csv', index_col=0)
    X_test_6 = pd.read_csv('features/X_test_6.csv', index_col=0)
    
    return (y_train, y_test, 
            X_train_1, X_test_1,
            X_train_2, X_test_2,
            X_train_3, X_test_3,
            X_train_4, X_test_4,
            X_train_5, X_test_5,
            X_train_6, X_test_6)

```

In [24]:
from src.data_loader import load_data

# Load the data
(y_train, y_test, 
X_train_1, X_test_1,
X_train_2, X_test_2,
X_train_3, X_test_3,
X_train_4, X_test_4,
X_train_5, X_test_5,
X_train_6, X_test_6)= load_data(transform=True)

In [25]:
y_test.shape

(60, 1)

In [26]:
y_test

Unnamed: 0,0
0,0
1,1
2,1
3,0
4,0
5,0
6,1
7,0
8,0
9,1
