In [1]:
import os
import librosa
import math
import json

DATASET_PATH = "E:/Acoustic"  # Path of folder with training audios.
JSON_PATH = "E:/Acoustic/mfcc_data.json"  # Location and file name to save feature extracted data.

SAMPLE_RATE = 22050  # Sample rate in Hz.
DURATION = 10  # Length of audio files fed. Measured in seconds.
SAMPLES_PER_TRACK = SAMPLE_RATE * DURATION

def save_mfcc(dataset_path, json_path, n_mfcc=20, n_fft=2048, hop_length=512, num_segments=5):
    # num_segments let's you chop up track into different segments to create a bigger dataset.
    # Value is changed at the bottom of the script.

    # Dictionary to store data into JSON_PATH
    data = {
        "mapping": [],  # Used to map labels (0 and 1) to category name (UAV and no UAV).
        "mfcc": [],  # MFCCs are the training input, labels are the target.
        "labels": []  # Features are mapped to a label (0 or 1).
    }

    num_samples_per_segment = int(SAMPLES_PER_TRACK / num_segments)
    expected_num_mfcc_vectors_per_segment = math.ceil(num_samples_per_segment / hop_length) # 1.2 -> 2

    # Loops through all the folders in the training audio folder.
    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):
        # Ensures that we're not at the root level.
        if dirpath is not dataset_path:

            # Saves the semantic label for the mapping.
            dirpath_components = dirpath.split("/")     # class/background => ["class", "background"]
            semantic_label = dirpath_components[-1]     # considering only the last value
            data["mapping"].append(semantic_label)
            print("\nProcessing {}".format(semantic_label))

            # Processes all the audio files for a specific class.
            for f in filenames:

                # Loads audio file.
                file_path = os.path.join(dirpath, f)
                signal, sr = librosa.load(file_path, sr=SAMPLE_RATE)

                # Process segments, extracting mfccs and storing data to JSON_PATH.
                for s in range(num_segments):
                    
                    start_sample = num_samples_per_segment * s # s=0 --> 0
                    finish_sample = start_sample + num_samples_per_segment # s=0 --> num_samples_per_segment
                    #print(start_sample, finish_sample, sr, n_fft, n_mfcc, hop_length)
                    mfcc = librosa.feature.mfcc(y=signal[start_sample:finish_sample],
                                                sr=sr,
                                                n_fft=n_fft,
                                                n_mfcc=n_mfcc,
                                                hop_length=hop_length)
                    mfcc = mfcc.T

                    # Stores mfccs for segment, if it has the expected length.
                    if len(mfcc) == expected_num_mfcc_vectors_per_segment:
                        data["mfcc"].append(mfcc.tolist())
                        data["labels"].append(i-1)
                        print("{}, segment:{}".format(file_path, s+1))

    with open(json_path, "w") as fp:
        json.dump(data, fp, indent=4)

if __name__ == "__main__":
    save_mfcc(DATASET_PATH, JSON_PATH, num_segments=10)
    # num_segments can be changed. 10 with 10 second audio equates to a segment equalling 1 second.


Processing Acoustic\UAV
E:/Acoustic\UAV\drone+traffic_garden_center_carpark_103.wav, segment:1
E:/Acoustic\UAV\drone+helicopter_01.wav, segment:1
E:/Acoustic\UAV\drone+helicopter_02.wav, segment:1




E:/Acoustic\UAV\drone+helicopter_03.wav, segment:1
E:/Acoustic\UAV\drone+helicopter_04.wav, segment:1
E:/Acoustic\UAV\drone+helicopter_05.wav, segment:1
E:/Acoustic\UAV\drone+helicopter_06.wav, segment:1
E:/Acoustic\UAV\drone+helicopter_07.wav, segment:1
E:/Acoustic\UAV\drone+helicopter_08.wav, segment:1
E:/Acoustic\UAV\drone+helicopter_09.wav, segment:1
E:/Acoustic\UAV\drone+helicopter_10.wav, segment:1
E:/Acoustic\UAV\drone+helicopter_11.wav, segment:1
E:/Acoustic\UAV\drone+helicopter_12.wav, segment:1
E:/Acoustic\UAV\drone+helicopter_13.wav, segment:1
E:/Acoustic\UAV\drone+helicopter_14.wav, segment:1
E:/Acoustic\UAV\drone+helicopter_15.wav, segment:1
E:/Acoustic\UAV\drone+helicopter_16.wav, segment:1
E:/Acoustic\UAV\drone+helicopter_17.wav, segment:1
E:/Acoustic\UAV\drone+helicopter_18.wav, segment:1
E:/Acoustic\UAV\drone+helicopter_19.wav, segment:1
E:/Acoustic\UAV\drone+helicopter_20.wav, segment:1
E:/Acoustic\UAV\drone+helicopter_21.wav, segment:1
E:/Acoustic\UAV\drone+helicopte

E:/Acoustic\UAV\drone+traffic_garden_center_carpark_076.wav, segment:1
E:/Acoustic\UAV\drone+traffic_garden_center_carpark_077.wav, segment:1
E:/Acoustic\UAV\drone+traffic_garden_center_carpark_078.wav, segment:1
E:/Acoustic\UAV\drone+traffic_garden_center_carpark_079.wav, segment:1
E:/Acoustic\UAV\drone+traffic_garden_center_carpark_080.wav, segment:1
E:/Acoustic\UAV\drone+traffic_garden_center_carpark_081.wav, segment:1
E:/Acoustic\UAV\drone+traffic_garden_center_carpark_082.wav, segment:1
E:/Acoustic\UAV\drone+traffic_garden_center_carpark_083.wav, segment:1
E:/Acoustic\UAV\drone+traffic_garden_center_carpark_084.wav, segment:1
E:/Acoustic\UAV\drone+traffic_garden_center_carpark_085.wav, segment:1
E:/Acoustic\UAV\drone+traffic_garden_center_carpark_086.wav, segment:1
E:/Acoustic\UAV\drone+traffic_garden_center_carpark_087.wav, segment:1
E:/Acoustic\UAV\drone+traffic_garden_center_carpark_088.wav, segment:1
E:/Acoustic\UAV\drone+traffic_garden_center_carpark_089.wav, segment:1
E:/Aco

E:/Acoustic\Background\traffic_garden_center_carpark_050.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_051.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_052.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_053.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_054.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_055.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_056.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_057.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_058.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_059.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_060.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_061.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_062.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_063.wav, se

E:/Acoustic\Background\traffic_garden_center_carpark_166.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_167.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_168.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_169.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_170.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_171.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_172.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_173.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_224.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_175.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_176.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_177.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_178.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_179.wav, se

# Mel preprocess and feature extraction

In [3]:
import os
import librosa
import math
import json

DATASET_PATH = "E:/Acoustic"  # Path of folder with training audios.
JSON_PATH = "E:/Acoustic/mel_data.json"  # Location and file name to save feature extracted data.

SAMPLE_RATE = 22050  # Sample rate in Hz.
DURATION = 10  # Length of audio files fed. Measured in seconds.
SAMPLES_PER_TRACK = SAMPLE_RATE * DURATION


def save_mfcc(dataset_path, json_path, n_mels=90, n_fft=2048, hop_length=512, num_segments=5):
    # num_segments let's you chop up track into different segments to create a bigger dataset.
    # Value is changed at the bottom of the script.

    # Dictionary to store data into JSON_PATH
    data = {
        "mapping": [],  # Used to map labels (0 and 1) to category name (UAV and no UAV).
        "mel": [],  # Mels are the training input, labels are the target.
        "labels": []  # Features are mapped to a label (0 or 1).
    }

    num_samples_per_segment = int(SAMPLES_PER_TRACK / num_segments)
    expected_num_mel_vectors_per_segment = math.ceil(num_samples_per_segment / hop_length)

    # Loops through all the folders in the training audio folder.
    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):

        # Ensures that we're not at the root level.
        if dirpath is not dataset_path:

            # Saves the semantic label for the mapping.
            dirpath_components = dirpath.split("/")  # class/background => ["class", "background"]
            semantic_label = dirpath_components[-1]  # considering only the last value
            data["mapping"].append(semantic_label)
            print("\nProcessing {}".format(semantic_label))

            # Processes all the audio files for a specific class.
            for f in filenames:

                # Loads audio file.
                file_path = os.path.join(dirpath, f)
                signal, sr = librosa.load(file_path, sr=SAMPLE_RATE)

                # Process segments, extracting mels and storing data to JSON_PATH.
                for s in range(num_segments):
                    start_sample = num_samples_per_segment * s
                    finish_sample = start_sample + num_samples_per_segment  # s=0 --> num_samples_per_segment

                    mel = librosa.feature.melspectrogram(y=signal[start_sample:finish_sample],
                                                         sr=sr,
                                                         n_fft=n_fft,
                                                         n_mels=n_mels,
                                                         hop_length=hop_length)
                    db_mel = librosa.power_to_db(mel)
                    mel = db_mel.T

                    # Stores mels for segment, if it has the expected length.
                    if len(mel) == expected_num_mel_vectors_per_segment:
                        data["mel"].append(mel.tolist())
                        data["labels"].append(i - 1)
                        print("{}, segment:{}".format(file_path, s + 1))

    with open(json_path, "w") as fp:
        json.dump(data, fp, indent=4)


if __name__ == "__main__":
    save_mfcc(DATASET_PATH, JSON_PATH, num_segments=10)
    # num_segments can be changed. 10 with 10 second audio equates to a segment equalling 1 second.


Processing Acoustic\UAV
E:/Acoustic\UAV\drone+traffic_garden_center_carpark_103.wav, segment:1
E:/Acoustic\UAV\drone+helicopter_01.wav, segment:1
E:/Acoustic\UAV\drone+helicopter_02.wav, segment:1




E:/Acoustic\UAV\drone+helicopter_03.wav, segment:1
E:/Acoustic\UAV\drone+helicopter_04.wav, segment:1
E:/Acoustic\UAV\drone+helicopter_05.wav, segment:1
E:/Acoustic\UAV\drone+helicopter_06.wav, segment:1
E:/Acoustic\UAV\drone+helicopter_07.wav, segment:1
E:/Acoustic\UAV\drone+helicopter_08.wav, segment:1
E:/Acoustic\UAV\drone+helicopter_09.wav, segment:1
E:/Acoustic\UAV\drone+helicopter_10.wav, segment:1
E:/Acoustic\UAV\drone+helicopter_11.wav, segment:1
E:/Acoustic\UAV\drone+helicopter_12.wav, segment:1
E:/Acoustic\UAV\drone+helicopter_13.wav, segment:1
E:/Acoustic\UAV\drone+helicopter_14.wav, segment:1
E:/Acoustic\UAV\drone+helicopter_15.wav, segment:1
E:/Acoustic\UAV\drone+helicopter_16.wav, segment:1
E:/Acoustic\UAV\drone+helicopter_17.wav, segment:1
E:/Acoustic\UAV\drone+helicopter_18.wav, segment:1
E:/Acoustic\UAV\drone+helicopter_19.wav, segment:1
E:/Acoustic\UAV\drone+helicopter_20.wav, segment:1
E:/Acoustic\UAV\drone+helicopter_21.wav, segment:1
E:/Acoustic\UAV\drone+helicopte

E:/Acoustic\UAV\drone+traffic_garden_center_carpark_077.wav, segment:1
E:/Acoustic\UAV\drone+traffic_garden_center_carpark_078.wav, segment:1
E:/Acoustic\UAV\drone+traffic_garden_center_carpark_079.wav, segment:1
E:/Acoustic\UAV\drone+traffic_garden_center_carpark_080.wav, segment:1
E:/Acoustic\UAV\drone+traffic_garden_center_carpark_081.wav, segment:1
E:/Acoustic\UAV\drone+traffic_garden_center_carpark_082.wav, segment:1
E:/Acoustic\UAV\drone+traffic_garden_center_carpark_083.wav, segment:1
E:/Acoustic\UAV\drone+traffic_garden_center_carpark_084.wav, segment:1
E:/Acoustic\UAV\drone+traffic_garden_center_carpark_085.wav, segment:1
E:/Acoustic\UAV\drone+traffic_garden_center_carpark_086.wav, segment:1
E:/Acoustic\UAV\drone+traffic_garden_center_carpark_087.wav, segment:1
E:/Acoustic\UAV\drone+traffic_garden_center_carpark_088.wav, segment:1
E:/Acoustic\UAV\drone+traffic_garden_center_carpark_089.wav, segment:1
E:/Acoustic\UAV\drone+traffic_garden_center_carpark_090.wav, segment:1
E:/Aco

E:/Acoustic\Background\traffic_garden_center_carpark_050.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_051.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_052.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_053.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_054.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_055.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_056.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_057.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_058.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_059.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_060.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_061.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_062.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_063.wav, se

E:/Acoustic\Background\traffic_garden_center_carpark_167.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_168.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_169.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_170.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_171.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_172.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_173.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_224.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_175.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_176.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_177.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_178.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_179.wav, segment:1
E:/Acoustic\Background\traffic_garden_center_carpark_180.wav, se