In [None]:
import numpy as np
import os
from keras.models import Model
import pandas as pd
import os
import matplotlib.pyplot
%matplotlib inline
import librosa
import lib.util as ut
import tensorflow as tf

## Load the model we want to make inferences with

In [2]:
model_path='./trained_models/model_paper/model_25epochs.h5'

## To divide audio files. When making inferences over whole audio files, we pass interval_step=sample_rate/2 to produce 50% overlapping chunks of 3 second audio

In [13]:
def divide_audio_file(path, file, intervals_seconds, interval_step, sample_rate=8000):
    """

    """
    # loads file and converts to the specified sample rate    
    file_path = path + "/" + file +".wav"
    audio, fs = librosa.load(file_path, sample_rate)
    audio_array = []
    j=0
    for i in range(0, audio.shape[0], int(interval_step*intervals_seconds/2)):   
        interval = audio[i:i+sample_rate*intervals_seconds]
        file_path_chunk=path+"/"+file+str(j)+".wav"
        librosa.output.write_wav(file_path_chunk, interval, sample_rate, norm=False)
#         print("interval from {} to {}".format(i, i+sample_rate*intervals_seconds))
        
        # if the last interval is shorter han the interval in seconds we define we are going to ignore it
        if interval.shape[0] < sample_rate*intervals_seconds:
            break
        else:
            if (not ut.is_silence(interval,thresold_samples=0.70)):
                audio_array.append(interval)
            else:
                print("Omitting chunk with silences in file {}".format(path))
        j=j+1

    return np.array(audio_array)

## makes inference of a whole audio by dividing it into overlapping (50%) chunks and predicting the majority of chunks class as the class of the file

In [21]:
def predict_one_audio(path,file,label,interval_seconds,interval_step,sample_rate):
#    file_path = audio_path + "/" + file_name
    audio_chunks=divide_audio_file(path, file, interval_seconds, interval_step, sample_rate)
    audio_chunks = audio_chunks.reshape([audio_chunks.shape[0],interval_seconds * sample_rate,1])
    labels_array = np.ones(audio_chunks.shape[0]) * label
    predictions = model.predict(audio_chunks)
    vc = pd.DataFrame(predictions)[0].apply(lambda x: 0 if x < 0.5 else 1).value_counts()
    if (vc[0]<vc[1]):
        res = 1
    else:
        res = 0
        
    return(res,vc,predictions)
    
    

## Begin of the execution

In [15]:
from keras.models import load_model
model = load_model(model_path)
model.compile(optimizer='Adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 24000, 1)          0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 24000, 64)         8320      
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 3000, 64)          0         
_________________________________________________________________
reshape_1 (Reshape)          (None, 3000, 64, 1)       0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 3000, 64, 96)      5952      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 375, 64, 96)       0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 375, 64, 128)      430208    
__________

In [40]:
audio_path = "./data/audiodata/"


In [37]:
test_df = pd.read_csv("./data/test_files.csv")

## Prediction on whole audio files

In [33]:
test_df[1:30]

Unnamed: 0,0,1,class
1,06-09-13_2310_2340,8.5,1
2,07-05-16_1440_1470,-5.9,0
3,07-09-17_720_750,1.1,1
4,06-11-08_1410_1440,7.4,1
5,06-05-10_1500_1530,6.8,1
6,06-12-13_1680_1710,-2.5,0
7,06-12-20_1410_1440,-1.7,0
8,08-01-09_1500_1530,-4.8,0
9,07-02-21_1770_1800,4.7,1
10,07-02-28_390_420,2.8,1


## Predict a positive (CONFLICT) sample

In [42]:
fileName="06-09-13_2310_2340"
IPython.display.Audio("./data/audiodata/"+fileName+".wav")

In [43]:
globalPred,labelTable,chunkPred = predict_one_audio('./data/audiodata',fileName,1.0,interval_seconds=3, interval_step=12000,sample_rate=8000)
print("Prediction for whole file:",globalPred)
print("Table of label predictions:")
print(labelTable)
print("Predictions on each chunk of 3 seconds")
print(chunkPred)




Prediction for whole file: 1
Table of label predictions:
1    8
0    5
Name: 0, dtype: int64
Predictions on each chunk of 3 seconds
[[0.94004405]
 [0.88304025]
 [0.99207425]
 [0.6163503 ]
 [0.8591105 ]
 [0.73453265]
 [0.4647157 ]
 [0.74546194]
 [0.9575749 ]
 [0.3368271 ]
 [0.43863755]
 [0.01397596]
 [0.05557631]]


In [34]:
fileName="07-05-16_1440_1470"
IPython.display.Audio("./data/audiodata/"+fileName+".wav")


In [35]:
globalPred,labelTable,chunkPred = predict_one_audio('./data/audiodata',fileName,1.0,interval_seconds=3, interval_step=12000,sample_rate=8000)
print("Prediction for whole file:",globalPred)
print("Table of label predictions:")
print(labelTable)
print("Predictions on each chunk of 3 seconds")
print(chunkPred)





Prediction for whole file: 0
Table of label predictions:
0    10
1     3
Name: 0, dtype: int64
Predictions on each chunk of 3 seconds
[[9.9473298e-03]
 [5.3702247e-01]
 [1.2868643e-04]
 [5.9960783e-03]
 [5.9604645e-08]
 [2.2719017e-01]
 [7.2718263e-03]
 [7.2956777e-01]
 [7.1989700e-02]
 [6.2233355e-02]
 [5.0533493e-04]
 [7.7199548e-01]
 [2.3298885e-03]]
