In [1]:
import numpy as np
import pandas as pd
import os,sys
import matplotlib.pyplot as plt
import librosa
import librosa.display
from tqdm import tqdm
from sklearn.model_selection import train_test_split

In [2]:
#Loading the ground truth csv file
df = pd.read_csv('../input/esc50/ESC-50-master/meta/esc50.csv')

In [3]:
#Loading the saved numpy array
original_audio_data_numpy = np.load('../input/esc-50-cip-project-sound/original_audio_samples.npy',allow_pickle=True)

In [4]:
df_org_np = pd.DataFrame(original_audio_data_numpy, columns = ['data','sampling_rate','category'])
df_org_np

Unnamed: 0,data,sampling_rate,category
0,"[-0.0018663798, 0.004723502, -0.00090402894, 0...",22050,15
1,"[-0.019782359, -0.026251452, -0.02057997, -0.0...",22050,19
2,"[-0.03421642, 0.02844932, 0.04358507, -0.29408...",22050,36
3,"[0.0013461491, 0.0019932226, 0.0017267463, 0.0...",22050,22
4,"[-0.13718751, -0.1813196, -0.11306347, -0.0987...",22050,40
...,...,...,...
1995,"[0.00080164516, 0.00087302254, 0.0004070742, 0...",22050,48
1996,"[-0.050393637, -0.07131728, -0.058072377, -0.0...",22050,47
1997,"[6.0981706e-06, -2.7409707e-05, 1.337673e-05, ...",22050,15
1998,"[0.009146113, 0.003665812, -0.011603184, -0.01...",22050,14


In [5]:
train_data, test_data = train_test_split(df_org_np,test_size=0.1,stratify=df_org_np['category'],random_state=42)

In [6]:
original_test_audio_samples = np.array(test_data['data'])
original_test_category = np.array(test_data['category'])

In [7]:
test_data = np.array(test_data)

In [8]:
np.save('test_data',test_data)

Fourier Test Data with scaling

In [9]:
fourier_test_audio = []
for audio in original_test_audio_samples:
    fourier_test_audio.append(librosa.amplitude_to_db(abs(librosa.stft(audio))))

In [10]:
fourier_test_audio = np.asarray(fourier_test_audio)

In [11]:
fourier_test_audio = fourier_test_audio[:,:,:,np.newaxis]

In [12]:
fourier_test_audio.shape

(200, 1025, 216, 1)

In [13]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

In [14]:
for i in range(200):
  fourier_test_audio[i,:,:,0] = scaler.fit_transform(fourier_test_audio[i,:,:,0])

In [15]:
fourier_test_data_cat = []
for i in range(200):
    fourier_test_data_cat.append((fourier_test_audio[i],original_test_category[i]))

In [16]:
fourier_test_data_cat = np.asarray(fourier_test_data_cat)

  return array(a, dtype, copy=False, order=order)


In [17]:
np.save('fourier_test_data_cat',fourier_test_data_cat)

Mel Spectrogram with scaling

In [18]:
mel_test_audio = []
for audio in original_test_audio_samples:
    mel_test_audio.append(librosa.amplitude_to_db(librosa.feature.melspectrogram(audio, 22050)))

In [19]:
mel_test_audio = np.asarray(mel_test_audio)

In [20]:
mel_test_audio = mel_test_audio[:,:,:,np.newaxis]

In [21]:
mel_test_audio.shape

(200, 128, 216, 1)

In [22]:
for i in range(200):
  mel_test_audio[i,:,:,0] = scaler.fit_transform(mel_test_audio[i,:,:,0])

In [23]:
mel_test_data_cat = []
for i in range(200):
    mel_test_data_cat.append((mel_test_audio[i],original_test_category[i]))

In [24]:
mel_test_data_cat = np.asarray(mel_test_data_cat)

  return array(a, dtype, copy=False, order=order)


In [25]:
np.save('mel_test_data_cat',mel_test_data_cat)

MFCC Spectrogram with scaling

In [26]:
mfcc_test_audio = []
for audio in original_test_audio_samples:
    mfcc_test_audio.append(librosa.feature.mfcc(audio, 22050))

In [27]:
mfcc_test_audio = np.asarray(mfcc_test_audio)

In [28]:
mfcc_test_audio = mfcc_test_audio[:,:,:,np.newaxis]

In [29]:
mfcc_test_audio.shape

(200, 20, 216, 1)

In [30]:
for i in range(200):
  mfcc_test_audio[i,:,:,0] = scaler.fit_transform(mfcc_test_audio[i,:,:,0])

In [31]:
mfcc_test_data_cat = []
for i in range(200):
    mfcc_test_data_cat.append((mfcc_test_audio[i],original_test_category[i]))

In [32]:
mfcc_test_data_cat = np.asarray(mfcc_test_data_cat)

  return array(a, dtype, copy=False, order=order)


In [33]:
np.save('mfcc_test_data_cat',mfcc_test_data_cat)