In [10]:
import os

import cv2
import keras.applications
import pandas as pd
import numpy as np

from keras import callbacks
from keras.applications import ResNet50
from keras.applications import VGG16
from keras.layers import Conv2D, BatchNormalization, Activation, MaxPooling2D, Dropout, Flatten, Dense, Input, Lambda, \
    Concatenate
from keras.models import Model
from keras.models import load_model
from keras.optimizers import Adam
from keras.regularizers import l2
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split


# Resources


In [11]:
DATA = 'data/spectrum_data.npy'
LABELS = 'data/spectrum_labels.npy'
IDS = 'data/spectrum_ids.npy'


# Prepare data
## Load data

In [12]:
X = np.load(DATA)
y = np.load(LABELS)
ids = np.load(IDS)

print(X.shape)
print(y.shape)
print(ids.shape)


(7872, 200, 500, 3)
(7872,)
(7872,)


## Prepare training data


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

y_train = to_categorical(y_train)
y_test = to_categorical(y_test)


# SubSpectralNet
## Build net


In [7]:
def build_subnet(input_layer, sub_classify=-1):
    CONV = Conv2D(32, kernel_size=(7, 7), padding='same', kernel_initializer="he_normal")(input_layer)
    CONV = BatchNormalization(gamma_regularizer=l2(0.0001), beta_regularizer=l2(0.0001))(CONV)
    CONV = Activation('relu')(CONV)
    
    # Max pool by SubSpectrogram <mel-bin>/10 size. For example for sub-spec of 30x500, max pool by 3 vertically.
    CONV = MaxPooling2D((2,5))(CONV)
    CONV = Dropout(0.3)(CONV)
    
    # Second conv-layer -- 64 kernels
    CONV = Conv2D(64, kernel_size=(7, 7), padding='same', kernel_initializer="he_normal")(CONV)
    CONV = BatchNormalization(gamma_regularizer=l2(0.0001), beta_regularizer=l2(0.0001))(CONV)
    CONV = Activation('relu')(CONV)
    
    # Max pool
    CONV = MaxPooling2D((4,100))(CONV)
    CONV = Dropout(0.30)(CONV)
    
    # Flatten
    FLATTEN = Flatten()(CONV)
    
    OUT_LAYER = Dense(32, activation='relu')(FLATTEN)
    
    # Sub-Classifier Layer
    if sub_classify > 0:
        DROPOUT = Dropout(0.30)(OUT_LAYER)
        FINAL_OUTPUT = Dense(sub_classify, activation='softmax')(DROPOUT)
        return OUT_LAYER, FINAL_OUTPUT
    
    return OUT_LAYER

input_layer = Input((200,500,3))
toconcat = list()

input_1 = Lambda(lambda x: x[:,0:20,:,:], output_shape=(20,500,3), name='lambda_01')(input_layer)
new_toconcat = build_subnet(input_1)
toconcat.append(new_toconcat)

input_2 = Lambda(lambda x: x[:,10:30,:,:], output_shape=(20,500,3), name='lambda_02')(input_layer)
new_toconcat = build_subnet(input_2)
toconcat.append(new_toconcat)

input_3 = Lambda(lambda x: x[:,20:40,:,:], output_shape=(20,500,3), name='lambda_03')(input_layer)
new_toconcat = build_subnet(input_3)
toconcat.append(new_toconcat)

input_4 = Lambda(lambda x: x[:,30:50,:,:], output_shape=(20,500,3), name='lambda_04')(input_layer)
new_toconcat = build_subnet(input_4)
toconcat.append(new_toconcat)

input_5 = Lambda(lambda x: x[:,40:60,:,:], output_shape=(20,500,3), name='lambda_05')(input_layer)
new_toconcat = build_subnet(input_5)
toconcat.append(new_toconcat)

input_6 = Lambda(lambda x: x[:,50:70,:,:], output_shape=(20,500,3), name='lambda_06')(input_layer)
new_toconcat = build_subnet(input_6)
toconcat.append(new_toconcat)

input_7 = Lambda(lambda x: x[:,60:80,:,:], output_shape=(20,500,3), name='lambda_07')(input_layer)
new_toconcat = build_subnet(input_7)
toconcat.append(new_toconcat)

input_8 = Lambda(lambda x: x[:,70:90,:,:], output_shape=(20,500,3), name='lambda_08')(input_layer)
new_toconcat = build_subnet(input_8)
toconcat.append(new_toconcat)

input_9 = Lambda(lambda x: x[:,80:100,:,:], output_shape=(20,500,3), name='lambda_09')(input_layer)
new_toconcat = build_subnet(input_9)
toconcat.append(new_toconcat)

input_10 = Lambda(lambda x: x[:,90:110,:,:], output_shape=(20,500,3), name='lambda_10')(input_layer)
new_toconcat = build_subnet(input_10)
toconcat.append(new_toconcat)

input_11 = Lambda(lambda x: x[:,100:120,:,:], output_shape=(20,500,3), name='lambda_11')(input_layer)
new_toconcat = build_subnet(input_11)
toconcat.append(new_toconcat)

input_12 = Lambda(lambda x: x[:,110:130,:,:], output_shape=(20,500,3), name='lambda_12')(input_layer)
new_toconcat = build_subnet(input_12)
toconcat.append(new_toconcat)

input_13 = Lambda(lambda x: x[:,120:140,:,:], output_shape=(20,500,3), name='lambda_13')(input_layer)
new_toconcat = build_subnet(input_13)
toconcat.append(new_toconcat)

input_14 = Lambda(lambda x: x[:,130:150,:,:], output_shape=(20,500,3), name='lambda_14')(input_layer)
new_toconcat = build_subnet(input_14)
toconcat.append(new_toconcat)

input_15 = Lambda(lambda x: x[:,140:160,:,:], output_shape=(20,500,3), name='lambda_15')(input_layer)
new_toconcat = build_subnet(input_15)
toconcat.append(new_toconcat)

input_16 = Lambda(lambda x: x[:,150:170,:,:], output_shape=(20,500,3), name='lambda_16')(input_layer)
new_toconcat = build_subnet(input_16)
toconcat.append(new_toconcat)

input_17 = Lambda(lambda x: x[:,160:180,:,:], output_shape=(20,500,3), name='lambda_17')(input_layer)
new_toconcat = build_subnet(input_17)
toconcat.append(new_toconcat)

input_18 = Lambda(lambda x: x[:,170:190,:,:], output_shape=(20,500,3), name='lambda_18')(input_layer)
new_toconcat = build_subnet(input_18)
toconcat.append(new_toconcat)

input_19 = Lambda(lambda x: x[:,180:200,:,:], output_shape=(20,500,3), name='lambda_19')(input_layer)
new_toconcat = build_subnet(input_19)
toconcat.append(new_toconcat)

new_layer = Concatenate()(toconcat)
new_layer = Dense(512, activation='relu')(new_layer)
new_layer = Dropout(0.30)(new_layer)
new_layer = Dense(256, activation='relu')(new_layer)
new_layer = Dropout(0.30)(new_layer)
new_layer = Dense(128, activation='relu')(new_layer)
new_layer = Dropout(0.30)(new_layer)
new_layer = Dense(64, activation='relu', name='dense_feature_extraction')(new_layer)
new_layer = Dropout(0.30)(new_layer)
output_layer = Dense(6, activation='softmax', name='dense_output')(new_layer)

sspr_net = Model(input_layer, [output_layer])
sspr_net.compile(
    loss='categorical_crossentropy',
    optimizer=Adam(lr=0.001), 
    metrics=['accuracy']
)

sspr_net.summary()


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 200, 500, 3)  0                                            
__________________________________________________________________________________________________
lambda_01 (Lambda)              (None, 20, 500, 3)   0           input_1[0][0]                    
__________________________________________________________________________________________________
lambda_02 (Lambda)              (None, 20, 500, 3)   0           input_1[0][0]                    
__________________________________________________________________________________________________
lambda_03 (Lambda)              (None, 20, 500, 3)   0           input_1[0][0]                    
__________________________________________________________________________________________________
lambda_04 

## Train net


In [None]:
# Create the CSV logs.
type = 'SubSpectralNet-200-20-10'
log = callbacks.CSVLogger(os.path.join('sspr_results', 'log_' + type + '.csv'))
tb = callbacks.TensorBoard(log_dir='sspr_results/tensorboard-logs')
checkpoint = callbacks.ModelCheckpoint(
    os.path.join('sspr_results', 'model_' + type + '.h5'),
    monitor='val_acc',verbose=1
)

sspr_history = sspr_net.fit(
    X_train, y_train,
    batch_size=16, epochs=5,
    callbacks=[log,tb,checkpoint],
    validation_data=(X_test, y_test),
    shuffle=True
)


Train on 6448 samples, validate on 1612 samples

Epoch 1/20

Epoch 2/20

Epoch 3/20

Epoch 4/20

Epoch 5/20

Epoch 6/20

Epoch 7/20

Epoch 8/20

Epoch 9/20

Epoch 10/20

Epoch 11/20

Epoch 12/20

Epoch 13/20

Epoch 14/20

Epoch 15/20

Epoch 16/20

Epoch 17/20

Epoch 18/20

Epoch 19/20

Epoch 20/20

Epoch 00020: saving model to /root/aml/My Drive/aml-audio_dataset/cnn_train/result/model_SubSpectralNet-200-20-10.h5


## Save model


In [None]:
sspr_feat_extractor = Model(
    inputs=sspr_net.input,
    outputs=sspr_net.get_layer('dense_feature_extraction').output
)

sspr_feat_extractor.save('spectrum_model.h5')

test_model = load_model('spectrum_model.h5')
assert(np.allclose(test_model.predict(X_test[0:10,:,:,:])[0],  sspr_feat_extractor.predict(X_test[0:10,:,:,:])[0]))


# Final feature extraction


In [None]:
model = load_model('spectrum_model.h5')

X_predict = model.predict(X)
X_predict.shape


In [None]:
result_df = pd.DataFrame(data=X_predict, columns=(f'spectrum_{i}' for i in range(1, 65)))
result_df['movie_id'] = ids

result_df = result_df[['movie_id'] + list(result_df.columns.values[:-1])] # Put id column first
result_df.head(10)


In [None]:
result_df.to_csv('featurized_spectrograms.csv', index=False)
