## 測試項目
1. 倒水聲(15)及馬桶沖水聲(18)
2. 使用acdnet20_20khz_fold4 keras h5-format model
3. export to cc file

In [1]:
import os
import sys
import datetime
import shutil
import numpy as np
import tensorflow as tf
from tensorflow import keras
import zipfile
import wavio
from common import utils as U

### Loading Keras Model

In [2]:
keras_model_path = "./models/keras_h5/acdnet20_20khz_fold4.h5"
model = keras.models.load_model(keras_model_path)

In [3]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 1, 30225, 1)]     0         
                                                                 
 conv2d (Conv2D)             (None, 1, 15109, 4)       36        
                                                                 
 batch_normalization (Batch  (None, 1, 15109, 4)       16        
 Normalization)                                                  
                                                                 
 re_lu (ReLU)                (None, 1, 15109, 4)       0         
                                                                 
 conv2d_1 (Conv2D)           (None, 1, 7553, 32)       640       
                                                                 
 batch_normalization_1 (Bat  (None, 1, 7553, 32)       128       
 chNormalization)                                            

### loading dataset from npz format
1. ACDNet input length is 30225
2. sr is 44100 and 20000
3. need to convert 16K to 20000
### ACDNet Config Setting
#### Training Parameters
1. opt.batchSize = 64;
2. opt.weightDecay = 5e-4;
3. opt.momentum = 0.9;
4. opt.nEpochs = 2000;
5. opt.LR = 0.1;
6. opt.schedule = [0.3, 0.6, 0.9];
7. opt.warmup = 10; 
#### Basic Net Configuration
- nClasses = 50
- nFolds = 5
- splits = \[i for in range(1, nFolds + 1)\]
- sr = 20000
- inputLength = 30225
<br>ngth = 30225;
### How to convert 16K sound to 44.1K with python and sox

if using sox the command is as following: <br />
    sox old.wav -b 16 new.wav 
if using python you can do as following: <br />
    import soundfile
    
data, samplerate = soundfile.read('old.wav
    <br />)
soundfile.write('new.wav', data, samplerate, subtype='PCM_1
6')

In [4]:
### convert to 16-bit
# def convertAllFilesInDirectoryTo16Bit(directory):
#     for file in os.listdir(directory):
#          if(file.endswith('.wav')):
#              nameSolo = file.rsplit('.', 1)[0]
#              print(directory + nameSolo )
#              data, samplerate = soundfile.read(directory + file)                

#            soundfile.write('/Users/yournamehere/Desktop/folderwhereyouwanttosae/' + nameSolo + '16BIT.wav', data, samplerate, subtype='PCM_16')
#             print("converting " + file + "to 16 - bit")

In [5]:
def npz_headers(npz):
    """
    Takes a path to an .npz file, which is a Zip archive of .npy files.
    Generates a sequence of (name, shape, np.dtype).
    """
    with zipfile.ZipFile(npz) as archive:
        for name in archive.namelist():
            if not name.endswith('.npy'):
                continue

            npy = archive.open(name)
            version = np.lib.format.read_magic(npy)
            shape, fortran, dtype = np.lib.format._read_array_header(npy, version)
            yield name[:-4], shape, dtype

In [6]:
print(list(npz_headers("./datasets/esc50/wav44.npz")))

[('fold1', (), dtype('O')), ('fold2', (), dtype('O')), ('fold3', (), dtype('O')), ('fold4', (), dtype('O')), ('fold5', (), dtype('O'))]


In [7]:
data = np.load("./datasets/esc50/wav44.npz", allow_pickle=True);
# test_20_X = data['x'];
# test_20_Y = data['y'];
list(data.keys())
data['fold1'].size
data['fold1'].shape
fold1_sounds_list = data['fold1'].item()['sounds']
fold1_labels_list = data['fold1'].item()['labels']
type(fold1_sounds_list)
len(fold1_labels_list)
fold1_sounds_ary = np.asarray(fold1_sounds_list,dtype="object") #set dtype="ojbect" is workable here
fold1_sounds_ary[0]
# data['fold1'].item()['labels']

array([-1,  0,  0, ...,  0,  0, -1], dtype=int16)

In [8]:
test_data = np.load("./datasets/esc50/test_44khz/", allow_pickle=True);
# test_20_X = data['x'];
# test_20_Y = data['y'];
list(data.keys())
data['fold1'].size
data['fold1'].shape
fold1_sounds_list = data['fold1'].item()['sounds']
fold1_labels_list = data['fold1'].item()['labels']
type(fold1_sounds_list)
len(fold1_labels_list)
fold1_sounds_ary = np.asarray(fold1_sounds_list,dtype="object") #set dtype="ojbect" is workable here
fold1_sounds_ary[0]

PermissionError: [Errno 13] Permission denied: './datasets/esc50/test_44khz/'

### Read Test Wav File

In [58]:
test_sound_file = "./test_sounds/toilet_flushing/toilet_flushing_01.wav"
ec50_18_sound = "./test_sounds/toilet_flushing/1-20736-A-18.wav"

sound = wavio.read(test_sound_file).data.T[0]
start = sound.nonzero()[0].min()
end = sound.nonzero()[0].max()
sound = sound[start: end + 1]  # Remove silent sections
label = 18 #int(os.path.splitext(test_sound_file)[0].split('-')[-1])

if len(sound)> 220500:
    sound = sound[:220500]

ec50_sound1 =  wavio.read(ec50_18_sound).data.T[0]
start_ec50 = ec50_sound1.nonzero()[0].min()
end_ec50 = ec50_sound1.nonzero()[0].max()
ec50_sound1 = ec50_sound1[start_ec50:end_ec50+1]
ec50_18_label = 18

In [59]:
print(sound)
print(len(sound))
print(ec50_sound1)
print(len(ec50_sound1))

[   -1     0     1 ... -1330 -1411 -1409]
220500
[1434 1648 1945 ... -809  523 -730]
220500


In [60]:
_inputLen = 30225
_nCrops = 10
def preprocess_setup():
    funcs = []
    funcs += [U.padding( _inputLen// 2),
              U.normalize(32768.0),
              U.multi_crop(_inputLen, _nCrops)]

    return funcs

def preprocess(sound, funcs):
    for f in funcs:
        sound = f(sound)

    return sound;

In [61]:
_funcs = preprocess_setup()

In [62]:
sound = preprocess(sound, _funcs)

In [63]:
print(sound.shape)

(10, 30225)


In [64]:
sound = np.expand_dims(sound, axis=1)
sound = np.expand_dims(sound, axis=3)
print(sound.shape)

(10, 1, 30225, 1)


In [65]:
scores = model.predict(sound, batch_size=len(sound), verbose=0);
print(type(scores))
print(scores.shape)

for res in scores:
    max_value = res.max()
    max_index = np.argmax(res)
    print(f"max value:{max_value:.5f} and index is {max_index}")
    print('\n'.join('{}: {:.5f}'.format(*k) for k in enumerate(res)))

<class 'numpy.ndarray'>
(10, 50)
max value:0.14976 and index is 48
0: 0.02089
1: 0.00920
2: 0.01522
3: 0.01064
4: 0.01116
5: 0.01254
6: 0.00811
7: 0.01380
8: 0.00581
9: 0.00685
10: 0.00122
11: 0.00854
12: 0.00112
13: 0.00187
14: 0.00439
15: 0.07874
16: 0.00267
17: 0.01940
18: 0.01060
19: 0.02611
20: 0.00529
21: 0.04162
22: 0.00808
23: 0.01187
24: 0.02780
25: 0.01830
26: 0.01558
27: 0.00153
28: 0.03831
29: 0.12634
30: 0.05225
31: 0.03831
32: 0.00732
33: 0.09516
34: 0.01023
35: 0.00628
36: 0.00490
37: 0.00652
38: 0.01954
39: 0.00562
40: 0.00611
41: 0.00352
42: 0.00272
43: 0.00618
44: 0.00310
45: 0.00521
46: 0.00440
47: 0.00405
48: 0.14976
49: 0.00520
max value:0.21250 and index is 45
0: 0.00370
1: 0.00382
2: 0.00763
3: 0.02212
4: 0.00357
5: 0.00874
6: 0.00334
7: 0.02871
8: 0.00756
9: 0.00562
10: 0.00119
11: 0.00901
12: 0.00143
13: 0.00085
14: 0.00566
15: 0.01414
16: 0.02045
17: 0.00054
18: 0.00634
19: 0.07517
20: 0.01037
21: 0.01930
22: 0.00741
23: 0.00719
24: 0.14637
25: 0.04677
26: 0.0