In [1]:
import os
import sys
import datetime
import shutil
import numpy as np
import tensorflow as tf
from tensorflow import keras
import zipfile
import wavio
from common import utils as U

## Loading Model

In [2]:
model_path = "./models/keras_h5/acdnet20_20khz_fold4.h5"
model = keras.models.load_model(model_path)
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 1, 30225, 1)]     0         
                                                                 
 conv2d (Conv2D)             (None, 1, 15109, 4)       36        
                                                                 
 batch_normalization (Batch  (None, 1, 15109, 4)       16        
 Normalization)                                                  
                                                                 
 re_lu (ReLU)                (None, 1, 15109, 4)       0         
                                                                 
 conv2d_1 (Conv2D)           (None, 1, 7553, 32)       640       
                                                                 
 batch_normalization_1 (Bat  (None, 1, 7553, 32)       128       
 chNormalization)                                            

### setting global variables

In [67]:
_inputLen = 30225
_nCrops = 5
default_sr = 20000
#tag:17:pouring water; 18:toilet_flushing
ground_true_tag = 17
taglst = [ground_true_tag for i in range(_nCrops)]
print(taglst)
target_tags = np.asarray(taglst)
target_tags = np.expand_dims(target_tags,axis=1)

[17, 17, 17, 17, 17]


### Setting Test Sound Path

In [68]:
# original_sound_path = "./test_sounds/toilet_flushing/toilet_flushing_02.wav"
# converted_sound_path = "./test_sounds/toilet_flushing/toilet_flushing_02_20000hz.wav"

original_sound_path = "./test_sounds/pouring_water/pouring_water_in_official.wav"
converted_sound_path = "./test_sounds/pouring_water/pouring_water_in_official_20000hz.wav"

# original_sound_path = "./test_sounds/pouring_water/pouring_coffee_test01.wav"
# converted_sound_path = "./test_sounds/pouring_water/pouring_coffee_test01_20000hz.wav"


wobj = wavio.read(original_sound_path)
sound_sig = wobj.data.T[0]
print(wobj.rate)
if wobj.rate != default_sr:
    U.convert_sr_for_single_file(original_sound_path, converted_sound_path,default_sr)
    wobj = wavio.read(converted_sound_path)
    print(f"Converted wav sampling rate is {wobj.rate}")
    sound_sig = wobj.data.T[0]


48000
* ./test_sounds/pouring_water/pouring_water_in_official.wav -> ./test_sounds/pouring_water/pouring_water_in_official_20000hz.wav
Converted wav sampling rate is 20000


In [69]:
# reading sound

start = sound_sig.nonzero()[0].min()
end = sound_sig.nonzero()[0].max()
sound_sig = sound_sig[start: end + 1]
print(f"Original length of sound_sig is {len(sound_sig)}")
if len(sound_sig) > 220500:
    sound_sig = sound_sig[:220500]
print(f"sound_sig length is {len(sound_sig)}")

Original length of sound_sig is 151449
sound_sig length is 151449


### Preprocessing sound

In [70]:
def preprocess_setup():
    funcs = []
    funcs += [U.padding( _inputLen// 2),
              U.normalize(32768.0),
              # U.rms_normalize(rms_level=2),
              # U.minmax_normalize(),
              U.multi_crop(_inputLen, _nCrops)]

    return funcs

def preprocess(sound, funcs):
    for f in funcs:
        sound = f(sound)
    return sound;

In [71]:
_funcs = preprocess_setup()
sound_sig = preprocess(sound_sig, _funcs)
print(f"The sound_sig shape after preprocessing is {sound_sig.shape}")

The sound_sig shape after preprocessing is (5, 30225)


### expanding sound dimension for input

In [72]:
sound_sig = np.expand_dims(sound_sig, axis=1)
sound_sig = np.expand_dims(sound_sig, axis=3)
print(sound_sig.shape)

(5, 1, 30225, 1)


### performing predict

In [73]:
# def compute_accuracy(y_pred, y_target):
#     #Reshape y_pred to shape it like each sample comtains 10 samples.
#     if _nCrops > 1:
#         y_pred = (y_pred.reshape(y_pred.shape[0]//_nCrops, _nCrops, y_pred.shape[1])).mean(axis=1);
#         y_target = (y_target.reshape(y_target.shape[0]//_nCrops, _nCrops, y_target.shape[1])).mean(axis=1);

#     loss = keras.losses.KLD(y_target, y_pred).numpy().mean();

#     #Get the indices that has highest average value for each sample
#     y_pred = y_pred.argmax(axis=1);
#     y_target = y_target.argmax(axis=1);
#     accuracy = (y_pred==y_target).mean()*100;

#     return accuracy, loss;

In [74]:
scores = model.predict(sound_sig, batch_size=len(sound_sig), verbose=0);
# print(type(scores))
# print(scores.shape)
# print(target_tags.shape)
# acc, loss = compute_accuracy(scores, target_tags)
# print(f"Accuracy:{acc} and loss:{loss}")
acc_count = 0;
for res in scores:
    max_value = res.max()
    max_index = np.argmax(res)
    if max_index == ground_true_tag:
        acc_count += 1
    print(f"max value:{max_value:.5f} and index is {max_index}")
    print('\n'.join('{}: {:.5f}'.format(*k) for k in enumerate(res)))
print(f"The final accuracy is {(acc_count/_nCrops)*100}%.")

max value:0.25584 and index is 21
0: 0.02036
1: 0.01523
2: 0.00327
3: 0.01500
4: 0.00185
5: 0.01500
6: 0.00157
7: 0.03687
8: 0.00566
9: 0.00626
10: 0.00041
11: 0.00889
12: 0.00241
13: 0.00130
14: 0.00768
15: 0.01073
16: 0.00192
17: 0.00268
18: 0.01037
19: 0.02054
20: 0.01094
21: 0.25584
22: 0.00096
23: 0.05338
24: 0.03441
25: 0.00356
26: 0.03165
27: 0.00074
28: 0.23878
29: 0.01576
30: 0.02520
31: 0.01125
32: 0.00176
33: 0.01128
34: 0.00916
35: 0.00525
36: 0.00324
37: 0.00399
38: 0.00588
39: 0.01264
40: 0.00174
41: 0.00749
42: 0.00147
43: 0.00530
44: 0.00686
45: 0.01179
46: 0.00207
47: 0.00199
48: 0.01546
49: 0.02216
max value:0.72939 and index is 17
0: 0.00595
1: 0.00425
2: 0.01920
3: 0.00225
4: 0.01214
5: 0.00149
6: 0.00239
7: 0.00093
8: 0.00085
9: 0.00064
10: 0.00163
11: 0.00295
12: 0.00102
13: 0.00275
14: 0.00290
15: 0.00239
16: 0.00052
17: 0.72939
18: 0.06877
19: 0.00340
20: 0.00320
21: 0.01109
22: 0.03436
23: 0.00097
24: 0.00340
25: 0.00483
26: 0.00607
27: 0.00430
28: 0.00086
29: 