In [1]:
# ! pip install --user librosa

In [2]:
from pathlib import Path
from scipy.io import wavfile
import scipy.signal
import pandas as pd
from tqdm.auto import tqdm
import seaborn as sns
import matplotlib.pyplot as plt
from collections import Counter
import numpy as np
import os
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.ensemble import RandomForestClassifier
import librosa

In [3]:
intermediate_folder = Path('..') / 'data' / 'intermediate'

In [4]:
X_train = np.load(intermediate_folder / 'train_main_1_sec_audio.npy').astype(float)
X_train.shape

(33566, 16000)

In [5]:
X_val = np.load(intermediate_folder / 'val_main_1_sec_audio.npy').astype(float)
X_val.shape

(4619, 16000)

In [6]:
X_test = np.load(intermediate_folder / 'test_main_1_sec_audio.npy').astype(float)
X_test.shape

(4689, 16000)

In [7]:
SAMPLE_RATE = 16000

In [8]:
mfcc_train_list = []

for i in tqdm(range(len(X_train))):
    mfcc_train_list.append(librosa.feature.mfcc(y=X_train[i], sr=SAMPLE_RATE))

X_train_mfcc_np = np.stack(mfcc_train_list)
del mfcc_train_list
X_train_mfcc_np.shape

  0%|          | 0/33566 [00:00<?, ?it/s]

(33566, 20, 32)

In [9]:
mfcc_val_list = []

for i in tqdm(range(len(X_val))):
    mfcc_val_list.append(librosa.feature.mfcc(y=X_val[i], sr=SAMPLE_RATE))

X_val_mfcc_np = np.stack(mfcc_val_list)
del mfcc_val_list
X_val_mfcc_np.shape

  0%|          | 0/4619 [00:00<?, ?it/s]

(4619, 20, 32)

In [10]:
mfcc_test_list = []

for i in tqdm(range(len(X_test))):
    mfcc_test_list.append(librosa.feature.mfcc(y=X_test[i], sr=SAMPLE_RATE))

X_test_mfcc_np = np.stack(mfcc_test_list)
del mfcc_test_list
X_test_mfcc_np.shape

  0%|          | 0/4689 [00:00<?, ?it/s]

(4689, 20, 32)

In [11]:
np.save(intermediate_folder / 'train_main_1_sec_audio_mfcc.npy', X_train_mfcc_np)

In [12]:
np.save(intermediate_folder / 'val_main_1_sec_audio_mfcc.npy', X_val_mfcc_np)

In [13]:
np.save(intermediate_folder / 'test_main_1_sec_audio_mfcc.npy', X_test_mfcc_np)