In [None]:
import librosa
import librosa.display
import IPython.display as ipd
import matplotlib.pyplot as plt
import numpy as np
import os

## Extract Root-Mean Square Energy and Zero-Crossing Rate from Audio

In [None]:
figpath =  os.path.join(os.getcwd(),"figures")

In [None]:
filename = librosa.example("nutcracker")

In [None]:
y,sr  =  librosa.load(filename)

In [None]:
ipd.Audio(filename)

In [None]:
# Extract RMSE with librosa


FRAME_LENGHT =  1024
HOP_LENGTH =  512

rms_y =  librosa.feature.rms(y,frame_length=FRAME_LENGHT,hop_length=HOP_LENGTH)[0]

In [None]:
rms_y

In [None]:
rms_y.shape

In [None]:
# Next we want to visualize the rmse for the  audio wave 
frames =  range(0,rms_y.size)
t  = librosa.frames_to_time(frames,hop_length=HOP_LENGTH)
plt.figure(1,figsize=(14,4),dpi=100)
librosa.display.waveplot(y,alpha=0.5)
plt.plot(t,rms_y,color="r")
plt.title("RMSE Feature Extration")
plt.savefig(f'{figpath}/RMSE.pdf', dpi=300)
plt.legend(["RMSE","Waveform"])
plt.show()


In [None]:
y.shape

In [None]:
rms_y.shape

$$
 \text{RMS}_t = \sqrt{\frac{1}{K}  \sum_{k = t \cdot K}^{(t+1) \cdot K-1} s(k)^2   }
$$

In [None]:
# building rmse from first principles

def rms(signal,frame_length,hop_length):
    rms = []
    for i in range(0,signal.size,hop_length):
        current_rms =  np.sqrt(np.sum(signal[i:i+frame_length]**2 ) / frame_length)
        rms.append(current_rms)
    return np.array(rms)

In [None]:
p_rms  =  rms(y,FRAME_LENGHT,HOP_LENGTH)
p_rms.size

In [None]:
plt.figure(1,figsize=(14,4),dpi=100)
plt.subplot(2,1,1)
librosa.display.waveplot(y,alpha=0.5)
plt.plot(t,p_rms,color="r")
plt.title("RMSE Feature Extration FP")
# plt.savefig(f'{figpath}/RMSE.pdf', dpi=300)
plt.legend(["RMSE","Waveform"])
plt.subplot(2,1,2)
librosa.display.waveplot(y,alpha=0.5)
plt.plot(t,rms_y,color="r")
plt.title("RMSE Feature Extration")
# plt.savefig(f'{figpath}/RMSE.pdf', dpi=300)
plt.legend(["RMSE","Waveform"])
plt.tight_layout()
plt.show()


In [None]:
# Zero-cross rate


zcr_y =  librosa.feature.zero_crossing_rate(y,frame_length=FRAME_LENGHT,hop_length=HOP_LENGTH)[0]

In [None]:
zcr_y.shape

In [None]:
# Next we want to visualize the rmse for the  audio wave 
frames =  range(0,zcr_y.size)
t  = librosa.frames_to_time(frames,hop_length=HOP_LENGTH)
plt.figure(1,figsize=(14,4),dpi=100)
librosa.display.waveplot(y,alpha=0.5)
plt.plot(t,zcr_y,color="r")
plt.title("ZCR Feature Extration")
plt.savefig(f'{figpath}/ZCR.pdf', dpi=300)
plt.legend(["ZCR","Waveform"])
plt.show()


In [None]:
p = "/net/projects/scratch/winter/valid_until_31_July_2022/krumnack/animal-communication-data/Chimp_IvoryCoast/aru_continuous_recordings"

In [None]:
files = [os.path.join(p,f) for f in os.listdir(p) if os.path.isfile(os.path.join(p,f)) and f.endswith('.wav')]

In [None]:
files

In [None]:
f = 'ARU11_20120415_170000.wav'
