This notebook contains the code used to load a model and make predictions with that model. This model uses librosa instead of wave

In [1]:
# Import Statements
from __future__ import absolute_import, division, print_function, unicode_literals
import os
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tqdm import tqdm
import librosa
import librosa.display
from librosa.feature import mfcc
from librosa.feature import melspectrogram
import wave
import time

In [5]:
# Local Variables
# If you sync your google drive with your hard drive, you should be able to make everything work
# by just changing the drive directory.
drive_directory = "E:\\Google Drive\\Big Data Project - Group 20"
# drive_directory = "C:\\Users\\Owner\\Google Drive\\Big Data Project - Group 20"
notebook_directory = drive_directory + "\\Code\\Sukris\\features-10_accuracy-81"
audio_directory = drive_directory + "\\test_audio"
model_directory = notebook_directory + "\\model"
sample_rate = 22050

# Tuning Variables
genres = ['blues','classical','country','disco','hiphop','jazz','metal','pop','reggae','rock']
train_split = 0.8 # Percentage of data that will be used to train
segment_width = 0.2 # Length of each audio segment in seconds
num_frequencies = 128 # Number of frequency bins
max_frequency = 4000 # Frequencies above this value will not be included
num_epochs = 10 # Number of epochs to train with
training_data_used = 1.0 # Percentage of the training data that is used
test_data_used = 1.0 # Percentage of the test data that is used

In [6]:
# This function predicts the genre and its confidence based on the input numpy data and model
def predict_genre(frames,model):
    
    # Segment Data
    print("Segmenting data...")
    sample_size = int(sample_rate * segment_width)
    segments = []
    c = 0
    while True:
        start = c * sample_size
        end = start + sample_size
        if end >= len(frames) * training_data_used:
            break
        segments.append(frames[start:end])
        c += 1
    segments = np.array(segments)
        
    # Get mfcc of segments
    print("Taking MFCC of segments...")
    mfcc_arr = []
    for s in segments:
        mfcc_arr.append(mfcc(y=s, sr=sample_rate, n_mfcc=num_frequencies, fmax=max_frequency))
    mfcc_arr = np.array(mfcc_arr)
    
    # Predict each segment
    print("Making segment predictions...")
    counts = np.zeros(len(genres))
    predictions = np.argmax(model.predict(mfcc_arr), axis=1)
    for p in predictions:
        counts[p] += 1
    predictions = np.array(predictions)
    counts = np.array(counts)
    
    # Predict the overall genre
    genre_prediction = np.argmax(counts)
    confidence = counts/len(predictions)
    
    return genre_prediction, confidence

In [19]:
# Load the model and wav file
# file_path = "E:\\Google Drive\\My Jazz Stuff\\One Man Band\\Solar\\Solar Complete.wav"
# file_path = audio_directory + "\\hiphop\\hiphop.00016.wav"
file_path = audio_directory + "\\dream_theater_panic_attack.wav"
model = tf.keras.models.load_model(model_directory)
print("Loading audio file...")
frames,_ = librosa.load(file_path, sr=sample_rate)

# Make a prediction
prediction,confidence = predict_genre(frames,model)

# Round confidence values for readability
confidence = np.round(confidence,2)

# Sort based on confidence
values_sorted = []
g = genres.copy()
c = confidence.copy().tolist()
for _ in range(len(genres)):
    i = np.argmax(c)
    values_sorted.append((c[i],g[i]))
    del c[i]
    del g[i]

# Display results
print("Predicted genre:", genres[prediction])
print("Confidence:     ", np.max(confidence))
print()
for pair in values_sorted:
    print(pair[0], "confidence for", pair[1])

Segmenting data...
Taking MFCC of segments...
Making segment predictions...
Predicted genre: metal
Confidence:      0.8

0.8 confidence for metal
0.06 confidence for pop
0.06 confidence for rock
0.05 confidence for blues
0.01 confidence for disco
0.01 confidence for hiphop
0.01 confidence for jazz
0.0 confidence for classical
0.0 confidence for country
0.0 confidence for reggae


In [17]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 1152)              0         
_________________________________________________________________
dense (Dense)                (None, 1152)              1328256   
_________________________________________________________________
dropout (Dropout)            (None, 1152)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 576)               664128    
_________________________________________________________________
dropout_1 (Dropout)          (None, 576)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 288)               166176    
_________________________________________________________________
dropout_2 (Dropout)          (None, 288)               0

In [26]:
print(model_directory)
print("C:\\Users\\Owner\\Google Drive\\Big Data Project - Group 20\\Code\\Sukris\\features-10_accuracy-81\\model")

C:\Google Drive\Big Data Project - Group 20\Code\Sukris\features-10_accuracy-81\model
C:\Users\Owner\Google Drive\Big Data Project - Group 20\Code\Sukris\features-10_accuracy-81\model


In [30]:
model.save('model.h5')