In [None]:
import os
import muda

In [None]:
def CreateDataset(Files_Dir,Parameters,Property,EventLabel='speech'):
    #Process all the .wav, .txt, in the assigned folder

    #Files_Dir: Target folder for saving all raw audio data
    #Parameters: Using  for processing audio files
    #Property:Create train/test/validation set
    #EventLabel: The label in string which indicate the class you want to learn,see sed_eval doc to see detail

    data = []
    labels = []

    #Calculate the Transition Matrix From Training set,saving the probability of each status transition
    SS = []#Speech to Speech
    NN = []#NonSpeech to NonSpeech
    SN = []#Speech to NonSpeech
    NS = []#NonSpeech to Speech

    file_index = 0
    for root, dirs,files in os.walk(Files_Dir):
        #.txt-based: which means processing txt first then find its corresponding .wav file

        #root_path,subfolders = root,dirs
        for file in os.listdir(root):
            if file.endswith('.txt'):
                file_path = root + '/' + file
                
                processed_filename =  os.path.splitext(file)[0]
                #Load Annotated Information
                annotated_event = sed_eval.io.load_event_list(file_path)
                target_event = sed_eval.util.event_list.filter_event_list(annotated_event, scene_label=None, event_label=EventLabel, filename=None)

                #Load Audio for feature extraction
                audio_file_name = os.path.splitext(file)[0] + '.wav'
                audio_file_path = root + '/' + audio_file_name
                feature_vector,audio_data, audio_sr = FeatureExtraction(audio_file_path,Parameters)

                #Using Annotation info to create the vector labels
                label_vector = CreateLabelVector(Data=audio_data,
                                                     EventList=target_event,
                                                     Parameters=Parameters,
                                                     LabelIndex=1)
                if feature_vector.shape[1] != len(label_vector):
                    print('===========Waring! Unmatched data size,will skip this file:==========')
                    print(file_path)
                    continue
                #Calculate the probability in the transition state matrix If data contains speech
                if sum(label_vector) != 0:
                    #probability of ss,nn,sn,ns; n=nonspeech, s = speech
                    p_ss, p_nn, p_sn, p_ns = ComputeStateTransition(label_vector)
                    SS.append(p_ss)
                    NN.append(p_nn)
                    SN.append(p_sn)
                    NS.append(p_ns)

                data.append([feature_vector,label_vector])
                file_index += 1

    #Transition Matrix:
    trans_matrix = np.array([[np.mean(SS),1 - np.mean(SS)],[1 - np.mean(NN),np.mean(NN)]])
    #Saving the transition Matrix
    np.save(os.getcwd()+'/JPNotebookExported/' + Property + '_TransitionMatrix.npy', trans_matrix)
    np.save(os.getcwd()+'/JPNotebookExported/' + Property + '_Dataset.npy', np.asarray(data))
    print(trans_matrix)

    return np.asarray(data),trans_matrix


In [1]:

def DeformDataset(DatasetPath,OutputPath,DeformerBook):
    for item in DeformerBook:
        #Create directory for saving deformed dataset(original dataset excluded)
        dirName = OutputPath + '/' + item
        os.mkdir(dirName)
        deformer = DeformerBook[item]
        for root, dirs,files in os.walk(DatasetPath):
            for file in os.listdir(root):  
                if file.endswith(".jams"):
                    file_name = os.path.splitext(file)[0]

                    jams_path = root + '/' + file_name + '.jams'
                    audio_path = root + '/' + file_name + '.wav'

                    output_path = dirName + '/' + file_name
                    # Load an example audio file with annotation
                    j_orig = muda.load_jam_audio(jams_path, audio_path)            

                    #for j_new in pipeline.transform(j_orig):
                        #print(j_new)
                    for i, jam_out in enumerate(deformer.transform(j_orig)):
                        muda.save(output_path + '_deformed_{:02d}.wav'.format(i),
                                   output_path + '_deformed_{:02d}.jams'.format(i),
                                   jam_out)

                        #load scaper annotations
                        ann = jam_out.annotations.search(namespace='scaper')[0]
                        #Write new txt file
                        txt_path = output_path + '_deformed_{:02d}.txt'.format(i)
                        csv_data = []
                        for obs in ann.data:
                            if obs.value['role'] == 'foreground':
                                csv_data.append(
                                    [obs.time, obs.time+obs.duration, obs.value['label']])

                        with open(txt_path, 'w') as csv_file:
                            writer = csv.writer(csv_file, delimiter='\t')
                            writer.writerows(csv_data)

In [None]:

# initialize deformer object
pitch_shift_1 = muda.deformers.PitchShift(n_semitones=-1)
pitch_shift_2 = muda.deformers.PitchShift(n_semitones=1)
time_stretch = muda.deformers.RandomTimeStretch(n_samples=2,scale=0.3)
colored_noise = muda.deformers.ColoredNoise(n_samples=1,color=['white'])
drc = muda.deformers.DynamicRangeCompression(preset='speech')
#ir_convolution = muda.deformers.IRConvolution()

#setup the directory of dataset
dataset_dir = os.getcwd() + '/AugmentedDataset/soundbanks'
output_dir = os.getcwd() + '/AugmentedDataset/augmentedsoundbanks'

deformers = {#'PitchShift1':pitch_shift_1,
             #'PitchShift2':pitch_shift_2,
             #'TimeStretched':time_stretch,
             #'ColoredNoise':colored_noise,
             'DynamicCompressed':drc}

DeformDataset(DatasetPath = dataset_dir,
             OutputPath = output_dir,
             DeformerBook = deformers)

#Create the deformed dataset
DeformedTrainData,DeformedTrainTransMatrix = CreateDataset(Files_Dir = output_dir,
                                            Parameters = Params,
                                            Property = 'DeformedTrain')

In [13]:
from sklearn.model_selection import RandomizedSearchCV
import numpy as np


{'n_estimators': [5, 11, 18, 25, 32, 38, 45, 52, 59, 66, 72, 79, 86, 93, 100], 'max_features': ['auto', None], 'max_depth': [2, 9, 17, 25, 33, 41, 48, 56, 64, 72, 80, None], 'min_samples_split': [2, 5, 10], 'min_samples_leaf': [1, 2, 4], 'bootstrap': [True, False]}
