<a href="https://colab.research.google.com/github/zoltan23/Audio-Classification/blob/master/audio_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install python_speech_features
!pip install scipy
!pip install pandas
!pip install keras
!pip install sklearn
!pip install tqdm
!pip install librosa
!apt-get install libsndfile1 -y
!apt-get install git -y

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
import os
os.chdir('/content/gdrive/My Drive/music-wav-files')

In [None]:
!mkdir clean samples models

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import os
import librosa
import librosa.display
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
from scipy.io import wavfile as wav
import numpy as np

from datetime import datetime
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.callbacks import ModelCheckpoint
from keras.optimizers import Adam
from keras.utils import np_utils
from sklearn import metrics

In [None]:
def extract_features(file_name):

    try:
        audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 

        mask = envelope(audio, sample_rate, 0.0005)

        mfccs = librosa.feature.mfcc(y=audio[mask], sr=sample_rate, n_mfcc=40)
        mfccsscaled = np.mean(mfccs.T,axis=0)

        #print("mfccsscaled", mfccsscaled.shape)
    
    except Exception as e:
        print("Error encountered while parsing file: ", file_name)
        return None 
    
    return mfccsscaled

def envelope(y, rate, threshold):
    mask = []
    y = pd.Series(y).apply(np.abs)
    y_mean = y.rolling(window=int(rate/10), min_periods=1, center=True).mean()
    for mean in y_mean:
        if mean > threshold:
            mask.append(True)
        else:
            mask.append(False)
    return mask

In [None]:
file_name = os.path.join(os.path.abspath(fulldatasetpath), str(row["fname"]))
class_label = row["label"]
data = extract_features('wavfiles/5eadbc6d.wav')


In [None]:
!ls 

# The following code was extracted from a module that was created to train a CNN model.  It will be called to retrain the model to improve accuracy as more data becomes available from the app.  

In [None]:
fulldatasetpath = 'wavfiles/'

metadata = pd.read_csv('instruments.csv')

features = []

# Iterate through each sound file and extract the features 
for index, row in metadata.iterrows():

    file_name = os.path.join(os.path.abspath(fulldatasetpath), str(row["fname"]))

    class_label = row["label"]
    data = extract_features(file_name)
    features.append([data, class_label])

    # Convert into a Panda dataframe 
    featuresdf = pd.DataFrame(features, columns=['feature','class_label'])

    #print('Finished feature extraction from ', len(featuresdf), ' files') 


# Convert features and corresponding classification labels into numpy arrays
X = np.array(featuresdf.feature.tolist())
y = np.array(featuresdf.class_label.tolist())

# Encode the classification labels
le = LabelEncoder()
yy = to_categorical(le.fit_transform(y))

# split the dataset 
x_train, x_test, y_train, y_test = train_test_split(X, yy, test_size=0.2, random_state = 42)

num_labels = yy.shape[1]
print("num_labels", num_labels)
filter_size = 2

model = Sequential()
model.add(Dense(256, input_shape=(40,)))
model.add(Dense(256, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(num_labels, activation='softmax'))

# Compile the model
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam') 

# Display model architecture summary 
model.summary()

# Calculate pre-training accuracy 
score = model.evaluate(x_test, y_test, verbose=0)
accuracy = 100*score[1]

print("Pre-training accuracy: %.4f%%" % accuracy)

num_epochs = 15
num_batch_size = 8

start = datetime.now()

#callbacks=[checkpointer],
model.fit(x_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(x_test, y_test),  verbose=1)
model.save('models/model4.h5')

duration = datetime.now() - start
print("Training completed in time: ", duration)

# Evaluating the model on the training and testing set
score = model.evaluate(x_train, y_train, verbose=0)
print("Training Accuracy: ", score[1])

score = model.evaluate(x_test, y_test, verbose=0)
print("Testing Accuracy: ", score[1])

In [None]:
def classify(filename):
    
    model = load_model('models/model4.h5')

    def extract_feature(file_name):
   
        try:
            audio_data, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 
            mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=40)
            mfccsscaled = np.mean(mfccs.T,axis=0)
            
        except Exception as e:
            print("Error encountered while parsing file: ", file_name)
            return None, None
        return np.array([mfccsscaled])

    #Get class labels for predicted_instrument output
    le = LabelEncoder()
    metadata = pd.read_csv('instruments.csv')
    le.fit(metadata['label'])

    def print_prediction(file_name):
  
        prediction_feature = extract_feature(file_name) 

        predicted_vector = model.predict_classes(prediction_feature)
        #print("predicted_vector", predicted_vector)

        predicted_class = le.inverse_transform(predicted_vector)
        predicted_instrument = predicted_class[0]
        #print("The predicted class is:", predicted_class[0], '\n') 

        predicted_proba_vector = model.predict_proba(prediction_feature) 
        predicted_proba = predicted_proba_vector[0]
        for i in range(len(predicted_proba)):
            category = le.inverse_transform(np.array([i]))
            #print(category[0], "\t\t : ", format(predicted_proba[i], '.32f') )
            
        return predicted_instrument
    
    return print_prediction(filename)
    


# The following function takes a wave file and its instrument classification as inputs and will predict its classification.

In [None]:
def predictedInstrumentTest(filename, instrument):

    predicted_instrument = classify(filename)
    print("The predicted instrument is", predicted_instrument, ". The actual instrument is", instrument, ".")
    if (instrument.strip() == predicted_instrument.strip()):
        print("Test passed!!!")
    else:
        print("Test Failed")

In [None]:
for file_name in os.listdir('trumpets'):
  predictedInstrumentTest('trumpets/' + file_name, 'trumpet')