In [1]:
import warnings
import os
from IPython.display import Audio, display
import numpy as np
import librosa
from librosa.display import specshow
import pandas as pd
import matplotlib.pyplot as plt
from scipy.fftpack import fft, dct, ifft
from sklearn.model_selection import train_test_split
from scipy.stats import norm
warnings.simplefilter('error')
%matplotlib inline

In [21]:
DATA_PATH = os.path.join('data', 'genres')

files = []
for root, directories, filenames in os.walk(DATA_PATH):
    for filename in filenames:
        files.append(os.path.join(DATA_PATH, os.path.basename(root), filename))

In [29]:
SAMPLE_RATE = 22050
waves = {}
for i, file in enumerate(files):
    print("Progress : %d / %d" % (i + 1, len(files)), end='\r')
    try:
        waves[file] = librosa.load(file, sr=SAMPLE_RATE)[0]
    except:
        continue

Progress : 851 / 851

In [38]:
lens = set()
for v in waves.values():
    lens.add(len(v))
len(lens)

32

In [39]:
max(lens) - min(lens)

15808

In [41]:
COMMON_LEN = min(lens) 
for f, wave in waves.items():
    waves[f] = wave[:COMMON_LEN]

In [53]:
COMMON_LEN

660000

In [42]:
## check lens
for wave in waves.values():
    if len(wave) != COMMON_LEN:
        print("FAILED!!!")
        break
else:
    print("SUCCESS")

SUCCESS


In [43]:
len(waves)

850

In [54]:
MFCC_COEFS = 12
MEL_COEFS = 128
def mfcc_extractor(sound):
    train_mfcc = librosa.feature.mfcc(sound, sr=SAMPLE_RATE, n_mfcc=MFCC_COEFS, 
                                      n_mels=MEL_COEFS, n_fft=441, hop_length=330)
    grad = librosa.feature.delta(train_mfcc)
    gess = librosa.feature.delta(train_mfcc, order=2)
    en = np.sum(train_mfcc ** 2, axis=0)
    en1 = np.sum(grad ** 2, axis=0)
    en2 = np.sum(gess ** 2, axis=0)
    return np.vstack([train_mfcc, grad, gess,
                            en.reshape(1, -1), en1.reshape(1, -1), en2.reshape(1, -1)])

In [55]:
mfccs = {}
for i, (f, wave) in enumerate(waves.items()):
    print("Progress: %d / %d" % (i + 1, len(waves)), end='\r')
    try:
        mfccs[f] = mfcc_extractor(wave)
    except:
        continue
len(mfccs)

Progress: 850 / 850

850

In [58]:
MFCC_SHAPE = (39, 2000)
for mfcc in mfccs.values():
    if mfcc.shape != MFCC_SHAPE:
        print("FAILED")
else:
    print("SUCCESS")

SUCCESS


In [60]:
cls = os.listdir(DATA_PATH)
cls

['blues',
 'classical',
 'country',
 'disco',
 'hiphop',
 'jazz',
 'metal',
 'pop',
 'reggae']

In [65]:
DST_PATH = 'data/mfccs_genres'
os.mkdir(DST_PATH)
for cl in cls:
    os.mkdir(os.path.join(DST_PATH, cl))

In [67]:
for i, (f, mfcc) in enumerate(mfccs.items()):
    print("Progess: %d / %d" % (i + 1, len(mfccs)), end='\r')
    np.save(os.path.join(DST_PATH, '/'.join(f.split('\\')[-2:])), mfcc)

Progess: 850 / 850