In [5]:
import numpy as np
import tensorflow as tf
import os

import librosa
from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder

from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split

opj = os.path.join

In [6]:
gtzan_path = r'datasets\gtzan'
genres_dir = opj(gtzan_path, 'genres_original')

In [7]:
def load_data_by_classes(dataset_path):
    data = []
    labels = []
    classes = os.listdir(dataset_path)
    for cl in tqdm(classes):
        full_p = opj(dataset_path, cl)
        for file in os.listdir(full_p):
            for i in range(0, 20, 2):
                filepath = os.path.join(full_p, file)  
                try:
                    wavedata, _ = librosa.load(filepath, sr=None, mono=True, offset=i, duration=2)
                except:
                    continue
                wavedata = wavedata[:, np.newaxis]
                data.append(wavedata)
                labels.append(cl)

    return data, labels

In [8]:
data, labels = load_data_by_classes(genres_dir)

100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:05<00:00,  1.87it/s]


In [9]:
len(data)

9990

In [5]:
sizes = [len(item) for item in data]

In [6]:
min_size = min(sizes)

In [7]:
data = [item[:min_size] for item in data]

# int(min_size/16)

In [8]:
data = np.array(data)
# data = data[..., np.newaxis]


le = LabelEncoder()
labels = le.fit_transform(labels)
labels_1hot = to_categorical(labels)

In [9]:
data.shape

(9990, 44100, 1)

In [10]:
#BATCH_SIZE=8

In [11]:
#AUTOTUNE = tf.data.experimental.AUTOTUNE

In [12]:
# dataset = tf.data.Dataset.from_tensor_slices((data, labels_1hot)).shuffle(4096).batch(BATCH_SIZE).prefetch(AUTOTUNE)

In [13]:
# DATASET_SIZE = len(dataset)

# train_size = int(0.7 * DATASET_SIZE)
# val_size = int(0.15 * DATASET_SIZE)

# train_dataset = dataset.take(train_size)
# val_dataset = dataset.skip(train_size)
# test_dataset = dataset.skip(val_size)

In [14]:
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.15)

In [15]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2)

In [16]:
# data = tf.convert_to_tensor(data)

In [17]:
X_train = tf.convert_to_tensor(X_train)
X_val = tf.convert_to_tensor(X_val)
X_test = tf.convert_to_tensor(X_test)

y_train = to_categorical(y_train)
y_val = to_categorical(y_val)
y_test = to_categorical(y_test)

In [18]:
# data = tf.convert_to_tensor(data)
# labels_one_hot = tf.one_hot(labels, depth=10)

In [19]:
from tensorflow.keras.layers import Conv1D, Dropout, LSTM, TimeDistributed, Activation, Dense, Input, MaxPooling1D, Lambda, Flatten, BatchNormalization

In [20]:
N_LAYERS = 4
CONV_FILTER_COUNT = 64
FILTER_LENGTH = 25

GENRES = 10

In [21]:
data.shape

(9990, 44100, 1)

In [22]:
import tensorflow.keras.backend as K

from tensorflow.keras import Model, Sequential

from tensorflow.keras.optimizers import RMSprop

In [23]:
LSTM_COUNT = 256
BATCH_SIZE = 32
EPOCH_COUNT = 80

kernel_size = 100

In [24]:
model = Sequential()

model.add(Conv1D(32, kernel_size, input_shape=data.shape[1:], activation='relu'))
model.add(Conv1D(32, kernel_size, activation='relu'))
#model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=4)) 

model.add(Conv1D(32, kernel_size, activation='relu'))
#model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=4)) 

model.add(Flatten())
model.add(Dense(100, activation='relu')) 
model.add(Dropout(0.5))
#model.add(BatchNormalization())
model.add(Dense(10 ,activation='softmax'))

model.compile(loss='categorical_crossentropy' , optimizer=RMSprop(learning_rate=0.0001) , metrics=['accuracy'])

In [None]:
model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs = 24)

Epoch 1/24
Epoch 2/24

In [None]:
model.fit(train_dataset, validation_data=val_dataset, epochs = 24)

In [21]:
model.evaluate(test_dataset)



[0.04299801588058472, 0.9900990128517151]

In [None]:
model.evaluate(X_test, y_test)

In [23]:
inp = Input((None, data.shape[1]))

In [24]:
conv1 = Conv1D(filters=64, kernel_size=(FILTER_LENGTH,))(inp)
mp1 = MaxPooling1D(2)(conv1)
act1 = Activation('relu')(mp1)


conv2 = Conv1D(filters=128, kernel_size=(FILTER_LENGTH,))(act1)
mp2 = MaxPooling1D(2)(conv2)
act2 = Activation('relu')(mp1)


conv3 = Conv1D(filters=256, kernel_size=(FILTER_LENGTH,))(act2)
mp3 = MaxPooling1D(2)(conv3)
act3 = Activation('relu')(mp3)

layer = Dropout(0.5)(act3)
layer = LSTM(LSTM_COUNT, return_sequences=True)(layer)
layer = Dropout(0.5)(layer)
layer = TimeDistributed(Dense(GENRES))(layer)
layer = Activation('softmax', name='output_realtime')(layer)


In [25]:
time_distributed_merge_layer = Lambda(
        function=lambda x: K.mean(x, axis=1),
        output_shape=lambda shape: (shape[0],) + shape[2:],
        name='output_merged')

model_output = time_distributed_merge_layer(layer)
model = Model(inp, model_output)
opt = RMSprop(lr=0.00001)  # Optimizer
model.compile(
    loss='categorical_crossentropy',
    optimizer=opt,
    metrics=['accuracy'])

In [None]:
model.fit(data, labels_one_hot)

In [None]:
inp = Input((data.shape[1]))

layer = inp

for i in range(N_LAYERS):
    # Convolutional layer names are used by extract_filters.py
    layer = Conv1D(
        filters=(None, CONV_FILTER_COUNT),
        kernel_size=(FILTER_LENGTH,),
        name='convolution_' + str(i + 1))(layer)
    layer = Activation('relu')(layer)
    layer = MaxPooling1D(2)(layer)

layer = Dropout(0.5)(layer)
layer = LSTM(LSTM_COUNT, return_sequences=True)(layer)
layer = Dropout(0.5)(layer)
layer = TimeDistributed(Dense(len(GENRES)))(layer)
layer = Activation('softmax', name='output_realtime')(layer)
time_distributed_merge_layer = Lambda(
    function=lambda x: K.mean(x, axis=1),
    output_shape=lambda shape: (shape[0],) + shape[2:],
    name='output_merged')
model_output = time_distributed_merge_layer(layer)
model = Model(model_input, model_output)


In [None]:
opt = RMSprop(lr=0.00001)  # Optimizer
model.compile(
    loss='categorical_crossentropy',
    optimizer=opt,
    metrics=['accuracy'])