In [1]:
class HParams(object):
	def __init__(self):
		# Dataset Settings
		self.dataset_path = 'D:\download\gtzan_0'
		self.feature_path = './feature0'
		self.genres = ['classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae']

		# Feature Parameters
		self.sample_rate = 22050
		self.fft_size = 1024
		self.win_size = 1024
		self.hop_size = 512
		self.num_mels = 128
		self.feature_length = 1024  # audio length = feature_length*hop_size/sample_dPrate (s)

		# Training Parameters
		self.device = 1  # 0: CPU, 1: GPU0, 2: GPU1, ...
		self.batch_size = 10
		self.num_epochs = 100
		self.learning_rate = 1e-2
		self.stopping_rate = 1e-10
		self.weight_decay = 1e-6
		self.momentum = 0.9
		self.factor = 0.2
		self.patience = 3.

hparams = HParams() 

In [2]:
import os
import numpy as np
import librosa

def load_list(list_name, hparams):
    with open(os.path.join(hparams.dataset_path, list_name)) as f:
        file_names = f.read().splitlines()

    return file_names

def melspectrogram(file_name, hparams):

    #librosa로 load 및 변환 
    y, sr = librosa.load(os.path.join(hparams.dataset_path, file_name), hparams.sample_rate)
    S = librosa.stft(y, n_fft=hparams.fft_size, hop_length=hparams.hop_size, win_length=hparams.win_size)

    mel_basis = librosa.filters.mel(hparams.sample_rate, n_fft=hparams.fft_size, n_mels=hparams.num_mels)
    mel_S = np.dot(mel_basis, np.abs(S))
    mel_S = np.log10(1+10*mel_S)
    mel_S = mel_S.T

    return mel_S

def resize_array(array, length):
    resized_array = np.zeros((length, array.shape[1]))
    if array.shape[0] >= length:
        resized_array = array[:length]
    else:
        resized_array[:array.shape[0]] = array

    return resized_array

def mfcc_extraction():
    print('Extracting Feature')
    list_names = ['train_list.txt', 'valid_list.txt', 'test_list.txt']

    for list_name in list_names:
        set_name = list_name.replace('_list.txt', '')
        file_names = load_list(list_name, hparams)

        for file_name in file_names:
            feature = melspectrogram(file_name, hparams)
            feature = resize_array(feature, hparams.feature_length)

            save_path = os.path.join(hparams.feature_path, set_name, file_name.split('/')[0])
            save_name = file_name.split('/')[1].replace('.wav','.npy' )

            if not os.path.exists(save_path):
                os.makedirs(save_path)
            np.save(os.path.join(save_path, save_name), feature.astype(np.float32))
            print(os.path.join(save_path, save_name))

    print('finished')

In [6]:
import random
def update_file_list():
    #하위 파일 리스트 생성, Old/New data 구분(Name 길이 15이상)
    old_file_list=[]
    new_file_list=[]
    file_list=[]
    for genre in hparams.genres:
        path = os.path.join(hparams.dataset_path, genre) 
        for root, dirs, files in os.walk(path):
            file_list.append(files)
    
    #old/new 구분        
    for genre in file_list:
        old_files_ingenre =[]
        new_files_ingenre =[]
        for file in genre:
            num=file.split('.')[1]
            if len(num)>4:
                old_files_ingenre.append(file)
            else :
                new_files_ingenre.append(file)
        
        old_file_list.append(old_files_ingenre)
        new_file_list.append(new_files_ingenre)
            
#     print(new_file_list)
#     print(len(new_file_list))
    
    
    #txt file open
    train_list = open(os.path.join(hparams.dataset_path, 'train_list.txt'),'w')
    test_list = open(os.path.join(hparams.dataset_path, 'test_list.txt'),'w')
    valid_list = open(os.path.join(hparams.dataset_path, 'valid_list.txt'),'w')
    
    #기존 데이터셋 랜덤추출 train 50%, test30% ,validation 20%씩
    train=[]
    valid=[]
    test=[]
    
    #기존데이터 적용
    for i in range(len(old_file_list)):
        random.shuffle(old_file_list[i])
        random.shuffle(new_file_list[i])
        
        
        train.append(old_file_list[i][:60])
    
        for file in new_file_list[i]:
            train[i].append(file)
            
        valid.append(old_file_list[i][60:80])
        test.append(old_file_list[i][80:])
    
    #train set txt 작성
    
    for genre in train:
        for file in genre:
            label = file.split('.')[0]
            train_list.writelines(label+'/'+file + '\n')
            
    for genre in valid:
        for file in genre: 
            label = file.split('.')[0]
            valid_list.writelines(label+'/'+file + '\n')
            
    for genre in test:
        for file in genre:
            label = file.split('.')[0]
            test_list.writelines(label+'/'+file + '\n')
            
            
    train_list.close()
    valid_list.close()
    test_list.close()

In [7]:
update_file_list()

In [4]:
main()

NameError: name 'main' is not defined

In [3]:
# wav to npy feature 생성
def main2():
    print('Extracting Feature')
    list_names = ['train_list.txt', 'valid_list.txt', 'test_list.txt']

    for list_name in list_names:
        set_name = list_name.replace('_list.txt', '')
        file_names = load_list(list_name, hparams)

        for file_name in file_names:
            feature = melspectrogram(file_name, hparams)
            feature = resize_array(feature, hparams.feature_length)

            save_path = os.path.join(hparams.feature_path, set_name, file_name.split('/')[0])
            save_name = file_name.split('/')[1].replace('.wav','.npy' )

            if not os.path.exists(save_path):
                os.makedirs(save_path)
            np.save(os.path.join(save_path, save_name), feature.astype(np.float32))
            print(os.path.join(save_path, save_name))

    print('finished')

In [7]:
hparams.feature_path = './feature7'

In [4]:
main2()

Extracting Feature
./feature0\train\classical\classical.00030.npy
./feature0\train\classical\classical.00030a.npy
./feature0\train\classical\classical.00030b.npy
./feature0\train\classical\classical.00030c.npy
./feature0\train\classical\classical.00030d.npy
./feature0\train\classical\classical.00030e.npy
./feature0\train\classical\classical.00030f.npy
./feature0\train\classical\classical.00030g.npy
./feature0\train\classical\classical.00030h.npy
./feature0\train\classical\classical.00030i.npy
./feature0\train\classical\classical.00030a.npy


FileNotFoundError: [Errno 2] No such file or directory: 'D:\\download\\gtzan_0\\classical\\classical.00030aa.wav'

In [9]:
def load_list(set_name,genre):
    file_names = []
    for (dirpath, dirnames, filenames) in os.walk('./feature7/'+set_name+'/'+genre):
        for file in filenames :
            file_names.append(file)
    return file_names

In [10]:
#feature 를 분할하여 저장

def split_feature():
    print('start')
    set_names = ['train']
    genres = ['classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae']
    
    for set_name in set_names:
        for genre in genres:
            file_names = load_list(set_name, genre)
            #기존 파일 호출
            for file in file_names:
                file_data = np.load('./feature2/'+set_name+'/'+genre+'/'+file)
                #분할하여 저장
                for i in range(2):
                    sub = file_data[i*512:512+i*512,:].copy()
                    sub = np.vstack([sub,sub])
                    
                    save_name = file.replace('.npy',str(i)+'.npy')
                    save_path = './slice1/'+set_name+'/'+genre
                    if not os.path.exists(save_path):
                        os.makedirs(save_path)
                    np.save(os.path.join(save_path, save_name), sub.astype(np.float32))
                    print(os.path.join(save_path, save_name))

In [93]:
split_feature()

start
./slice1/train/classical\classical.00.npy
./slice1/train/classical\classical.01.npy
./slice1/train/classical\classical.000000.npy
./slice1/train/classical\classical.000001.npy
./slice1/train/classical\classical.000040.npy
./slice1/train/classical\classical.000041.npy
./slice1/train/classical\classical.000060.npy
./slice1/train/classical\classical.000061.npy
./slice1/train/classical\classical.000080.npy
./slice1/train/classical\classical.000081.npy
./slice1/train/classical\classical.000090.npy
./slice1/train/classical\classical.000091.npy
./slice1/train/classical\classical.000120.npy
./slice1/train/classical\classical.000121.npy
./slice1/train/classical\classical.000130.npy
./slice1/train/classical\classical.000131.npy
./slice1/train/classical\classical.000140.npy
./slice1/train/classical\classical.000141.npy
./slice1/train/classical\classical.000160.npy
./slice1/train/classical\classical.000161.npy
./slice1/train/classical\classical.000170.npy
./slice1/train/classical\classical.0

In [96]:
def split_feature2():
    print('start')
    set_names = ['train']
    genres = ['classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae']
    
    for set_name in set_names:
        for genre in genres:
            file_names = load_list(set_name, genre)
            #기존 파일 호출
            for file in file_names:
                file_data = np.load('./feature2/'+set_name+'/'+genre+'/'+file)
                #분할하여 저장
                for i in range(3):
                    sub = file_data[i*256:(2+i)*256,:].copy()
                    sub = np.vstack([sub,sub,sub])
                    
                    save_name = file.replace('.npy',str(i)+'.npy')
                    save_path = './slice2/'+set_name+'/'+genre
                    if not os.path.exists(save_path):
                        os.makedirs(save_path)
                    np.save(os.path.join(save_path, save_name), sub.astype(np.float32))
                    print(os.path.join(save_path, save_name))

In [97]:
split_feature2()

start
./slice2/train/classical\classical.00.npy
./slice2/train/classical\classical.01.npy
./slice2/train/classical\classical.02.npy
./slice2/train/classical\classical.000000.npy
./slice2/train/classical\classical.000001.npy
./slice2/train/classical\classical.000002.npy
./slice2/train/classical\classical.000040.npy
./slice2/train/classical\classical.000041.npy
./slice2/train/classical\classical.000042.npy
./slice2/train/classical\classical.000060.npy
./slice2/train/classical\classical.000061.npy
./slice2/train/classical\classical.000062.npy
./slice2/train/classical\classical.000080.npy
./slice2/train/classical\classical.000081.npy
./slice2/train/classical\classical.000082.npy
./slice2/train/classical\classical.000090.npy
./slice2/train/classical\classical.000091.npy
./slice2/train/classical\classical.000092.npy
./slice2/train/classical\classical.000120.npy
./slice2/train/classical\classical.000121.npy
./slice2/train/classical\classical.000122.npy
./slice2/train/classical\classical.00013

In [87]:
l = np.array([1,2,3,4,5,6,3,3])

In [88]:
np.hstack([l,l,l])

array([1, 2, 3, 4, 5, 6, 3, 3, 1, 2, 3, 4, 5, 6, 3, 3, 1, 2, 3, 4, 5, 6,
       3, 3])

In [94]:
l=np.load('./slice1/'+'train'+'/'+'disco'+'/'+'disco.00.npy')

In [95]:
l.shape

(1024, 128)

In [12]:
## wav file 변형
import librosa
import numpy as np
import os

def get_genre(hparams):
    return hparams.genres

def load_list(list_name, hparams):
    with open(os.path.join(hparams.dataset_path, list_name)) as f:
        file_names = f.read().splitlines()

    return file_names

def get_item(hparams, genre):
    return librosa.util.find_files(hparams.dataset_path + '/' + str(genre))


def readfile(file_name, hparams):
    y, sr = librosa.load(file_name, hparams.sample_rate)
    return y, sr


def change_pitch_and_speed(data):
    y_pitch_speed = data.copy()
    # you can change low and high here
    length_change = np.random.uniform(low=0.8, high=1)
    speed_fac = 1.0 / length_change
    tmp = np.interp(np.arange(0, len(y_pitch_speed), speed_fac), np.arange(0, len(y_pitch_speed)), y_pitch_speed)
    minlen = min(y_pitch_speed.shape[0], tmp.shape[0])
    y_pitch_speed *= 0
    y_pitch_speed[0:minlen] = tmp[0:minlen]
    return y_pitch_speed


def change_pitch(data, sr):
    y_pitch = data.copy()
    bins_per_octave = 12
    pitch_pm = 2
    pitch_change = pitch_pm * 2 * (np.random.uniform())
    y_pitch = librosa.effects.pitch_shift(y_pitch.astype('float64'), sr, n_steps=pitch_change,
                                          bins_per_octave=bins_per_octave)
    return y_pitch

def value_aug(data):
    y_aug = data.copy()
    dyn_change = np.random.uniform(low=1.5, high=3)
    y_aug = y_aug * dyn_change
    return y_aug


def add_noise(data):
    noise = np.random.randn(len(data))
    data_noise = data + 0.005 * noise
    return data_noise


def hpss(data):
    y_harmonic, y_percussive = librosa.effects.hpss(data.astype('float64'))
    return y_harmonic, y_percussive


def shift(data):
    return np.roll(data, 1600)


def stretch(data, rate=1):
    input_length = len(data)
    streching = librosa.effects.time_stretch(data, rate)
    if len(streching) > input_length:
        streching = streching[:input_length]
    else:
        streching = np.pad(streching, (0, max(0, input_length - len(streching))), "constant")
    return streching

def change_speed(data):
    y_speed = data.copy()
    speed_change = np.random.uniform(low=0.9, high=1.1)
    tmp = librosa.effects.time_stretch(y_speed.astype('float64'), speed_change)
    minlen = min(y_speed.shape[0], tmp.shape[0])
    y_speed *= 0
    y_speed[0:minlen] = tmp[0:minlen]
    return y_speed


def main():
    print('Augmentation')
    genres = get_genre(hparams)
    list_names = ['train_list.txt']
    for list_name in list_names:
        file_names = load_list(list_name, hparams)
        with open(os.path.join(hparams.dataset_path, list_name),'w') as f:
            for i in file_names:
                f.writelines(i+'\n')
                f.writelines(i.replace('.wav', 'a.wav' + '\n'))
#                 f.writelines(i.replace('.wav', 'b.wav' + '\n'))
#                 f.writelines(i.replace('.wav', 'c.wav' + '\n'))
#                 f.writelines(i.replace('.wav', 'd.wav' + '\n'))
#                 f.writelines(i.replace('.wav', 'e.wav' + '\n'))
                f.writelines(i.replace('.wav', 'f.wav' + '\n'))
#                 f.writelines(i.replace('.wav', 'g.wav' + '\n'))
#                 f.writelines(i.replace('.wav', 'h.wav' + '\n'))
#                 f.writelines(i.replace('.wav', 'i.wav' + '\n'))
                f.writelines(i.replace('.wav', 'j.wav' + '\n'))

    for genre in genres:
        item_list = get_item(hparams, genre)
        for file_name in item_list:
            y, sr = readfile(file_name, hparams)
            data_noise = add_noise(y)
            data_roll = shift(y)
            data_stretch = stretch(y)
            pitch_speed = change_pitch_and_speed(y)
            pitch = change_pitch(y, hparams.sample_rate)
            speed = change_speed(y)
            value = value_aug(y)
            y_harmonic, y_percussive = hpss(y)
            #y_shift = shift(y)

            save_path = os.path.join(file_name.split(genre + '.')[0])
            save_name =  genre + '.'+file_name.split(genre + '.')[1]
            print(save_name)

            librosa.output.write_wav(os.path.join(save_path, save_name.replace('.wav', 'a.wav')), data_noise, hparams.sample_rate)
            #librosa.output.write_wav(os.path.join(save_path, save_name.replace('.wav', 'b.wav')), data_roll, hparams.sample_rate)
            #librosa.output.write_wav(os.path.join(save_path, save_name.replace('.wav', 'c.wav')), data_stretch, hparams.sample_rate)
            #librosa.output.write_wav(os.path.join(save_path, save_name.replace('.wav', 'd.wav')), pitch_speed, hparams.sample_rate)
#             librosa.output.write_wav(os.path.join(save_path, save_name.replace('.wav', 'e.wav')), pitch, hparams.sample_rate)
            librosa.output.write_wav(os.path.join(save_path, save_name.replace('.wav', 'f.wav')), speed, hparams.sample_rate)
#             librosa.output.write_wav(os.path.join(save_path, save_name.replace('.wav', 'g.wav')), value, hparams.sample_rate)
            #librosa.output.write_wav(os.path.join(save_path, save_name.replace('.wav', 'h.wav')), y_percussive, hparams.sample_rate)
            #librosa.output.write_wav(os.path.join(save_path, save_name.replace('.wav', 'i.wav')), y_shift, hparams.sample_rate)
            librosa.output.write_wav(os.path.join(save_path, save_name.replace('.wav', 'j.wav')), y_harmonic, hparams.sample_rate)
        
        print('finished')



In [100]:
file_name='D:\다운로드\gtzan\jazz\jazz.0.wav'

y, sr = readfile(file_name, hparams)
data_noise = add_noise(y)
data_roll = shift(y)
data_stretch = stretch(y)
pitch_speed = change_pitch_and_speed(y)
pitch = change_pitch(y, hparams.sample_rate)
speed = change_speed(y)
value = value_aug(y)
y_harmonic, y_percussive = hpss(y)
y_shift = shift(y)

In [101]:
# Test
librosa.output.write_wav(os.path.join('.', 'a.wav'), data_noise, hparams.sample_rate)
librosa.output.write_wav(os.path.join('.', 'b.wav'), data_roll, hparams.sample_rate)
librosa.output.write_wav(os.path.join('.', 'c.wav'), data_stretch, hparams.sample_rate)
librosa.output.write_wav(os.path.join('.', 'd.wav'), pitch_speed, hparams.sample_rate)
librosa.output.write_wav(os.path.join('.', 'e.wav'), pitch, hparams.sample_rate)
librosa.output.write_wav(os.path.join('.', 'f.wav'), speed, hparams.sample_rate)
librosa.output.write_wav(os.path.join('.', 'g.wav'), value, hparams.sample_rate)
librosa.output.write_wav(os.path.join('.', 'h.wav'), y_percussive, hparams.sample_rate)
librosa.output.write_wav(os.path.join('.', 'i.wav'), y_shift, hparams.sample_rate)

In [102]:
librosa.output.write_wav(os.path.join('.', 'j.wav'), y_harmonic, hparams.sample_rate)

In [14]:
main()

Augmentation
classical.0.wav
classical.00000.wav
classical.00001.wav
classical.00002.wav
classical.00003.wav
classical.00004.wav
classical.00005.wav
classical.00006.wav
classical.00007.wav
classical.00008.wav
classical.00009.wav
classical.00010.wav
classical.00011.wav
classical.00012.wav
classical.00013.wav
classical.00014.wav
classical.00015.wav
classical.00016.wav
classical.00017.wav
classical.00018.wav
classical.00019.wav
classical.00020.wav
classical.00021.wav
classical.00022.wav
classical.00023.wav
classical.00024.wav
classical.00025.wav
classical.00026.wav
classical.00027.wav
classical.00028.wav
classical.00029.wav
classical.00030.wav
classical.00031.wav
classical.00032.wav
classical.00033.wav
classical.00034.wav
classical.00035.wav
classical.00036.wav
classical.00037.wav
classical.00038.wav
classical.00039.wav
classical.00040.wav
classical.00041.wav
classical.00042.wav
classical.00043.wav
classical.00044.wav
classical.00045.wav
classical.00046.wav
classical.00047.wav
classical.0

In [7]:
mfcc_extraction()

Extracting Feature
./feature5\train\classical\classical.00061.npy
./feature5\train\classical\classical.00061a.npy
./feature5\train\classical\classical.00061e.npy
./feature5\train\classical\classical.00061f.npy
./feature5\train\classical\classical.00061g.npy
./feature5\train\classical\classical.00061j.npy
./feature5\train\classical\classical.00090.npy
./feature5\train\classical\classical.00090a.npy
./feature5\train\classical\classical.00090e.npy
./feature5\train\classical\classical.00090f.npy
./feature5\train\classical\classical.00090g.npy
./feature5\train\classical\classical.00090j.npy
./feature5\train\classical\classical.00016.npy
./feature5\train\classical\classical.00016a.npy
./feature5\train\classical\classical.00016e.npy
./feature5\train\classical\classical.00016f.npy
./feature5\train\classical\classical.00016g.npy
./feature5\train\classical\classical.00016j.npy
./feature5\train\classical\classical.00087.npy
./feature5\train\classical\classical.00087a.npy
./feature5\train\classica