In [66]:
import tensorflow as tf
import pickle
import numpy as np
import librosa
import skimage.io

# Load audio file

In [67]:
audio, sampling_rate = librosa.load('test_hiphop.wav', duration=40)
mfcc = librosa.feature.mfcc(audio)

mfcc = np.mean(mfcc, axis=1).tolist() + np.var(mfcc, axis=1).tolist()

mel_spec = librosa.feature.melspectrogram(audio)
mel_spec = (mel_spec - np.min(mel_spec)) / np.max(mel_spec) ** 3
skimage.io.imsave('test_hiphop.png', np.flip(librosa.power_to_db(mel_spec, ref=np.max), axis=0))



In [78]:
img = tf.keras.preprocessing.image.load_img(
    'test_hiphop.png', target_size=(128, 108)
)
img_array = tf.keras.preprocessing.image.img_to_array(img)
img_array = tf.expand_dims(img_array, 0)

# SVM and SVM_Mixed

In [51]:
svm = pickle.load(open('models/SVM.sav', 'rb'))
svm_mixed = pickle.load(open('models/SVM_Mixed.sav', 'rb'))

In [52]:
classes = {'blues': 0, 'classical': 1, 'country': 2, 'disco': 3, 'hiphop': 4, 'jazz': 5, 'metal': 6, 'pop': 7, 'reggae': 8, 'rock': 9}

In [53]:
classes = {v: k for k, v in classes.items()}

In [54]:
print(classes[svm.predict([mfcc])[0]])

hiphop


In [55]:
print(classes[svm_mixed.predict([mfcc])[0]])

hiphop


# NN and NN_Mixed

In [56]:
nn = tf.keras.models.load_model('models/NN')
nn.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_8 (Dense)              (None, 512)               20992     
_________________________________________________________________
dropout_6 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_9 (Dense)              (None, 256)               131328    
_________________________________________________________________
dropout_7 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_10 (Dense)             (None, 32)                8224      
_________________________________________________________________
dropout_8 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_11 (Dense)             (None, 10)               

In [57]:
nn_mixed = tf.keras.models.load_model('models/NN_Mixed')
nn_mixed.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_24 (Dense)             (None, 512)               20992     
_________________________________________________________________
dropout_18 (Dropout)         (None, 512)               0         
_________________________________________________________________
dense_25 (Dense)             (None, 256)               131328    
_________________________________________________________________
dropout_19 (Dropout)         (None, 256)               0         
_________________________________________________________________
dense_26 (Dense)             (None, 32)                8224      
_________________________________________________________________
dropout_20 (Dropout)         (None, 32)                0         
_________________________________________________________________
dense_27 (Dense)             (None, 10)               

In [82]:
prediction = classes[nn.predict_classes([x])[0]]
print(prediction)

pop


In [63]:
prediction = classes[nn_mixed.predict_classes([x])[0]]
print(prediction)

hiphop


# Convolutional Neural Network

In [69]:
cnn = tf.keras.models.load_model('models/CNN')
cnn.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 108, 128, 16)      448       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 54, 64, 16)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 54, 64, 32)        4640      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 27, 32, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 27, 32, 64)        18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 13, 16, 64)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 13, 16, 128)       7

In [64]:
cnn_mixed = tf.keras.models.load_model('models/CNN_Mixed')
cnn_mixed.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 108, 128, 16)      448       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 54, 64, 16)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 54, 64, 32)        4640      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 27, 32, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 27, 32, 64)        18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 13, 16, 64)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 13, 16, 128)       7

In [79]:
prediction = cnn.predict(img_array)
score = tf.nn.softmax(prediction[0])

print(classes[np.argmax(score)])

hiphop


In [81]:
prediction = cnn_mixed.predict(img_array)
score = tf.nn.softmax(prediction[0])

print(classes[np.argmax(score)])

hiphop
