### This script will segment audio recordings according to the voice activity: covert vs overt

### The result can be used for a binary classification between covert vs overt.

Method: detect the offset of covert/overt speech in each denoised audio trial, then use this offset to segment the EEG data.

In [2]:
from speech_pinyin_Ruijin.config import *
import glob
from scipy.io import wavfile
import matplotlib.pylab as plt
import mne
import numpy as np
from utils.util_MNE import keep_annotation
import itertools

2024-09-04 03:00:04 - Start Program

pre_all: Running from CMD.
common_dl.py: Using CUDA.


In [104]:
sid=1
folder=data_dir+str(sid)+'-*'
folder=os.path.normpath(glob.glob(folder)[0])
folder=folder.replace("\\", "/")
audio_folder=folder+'/raw/matlab/result/'
files=['name_202408280901','name_202408280930','name_202408280959']
session=3

In [127]:
# read the denoised audio and its length
filename=folder+'/processed/session'+str(session)+'_audio_length_in_sample.npy' # length
lengths=np.load(filename, allow_pickle=True)
filename=folder+'/processed/session'+str(session)+'_clean_audio_padded_denoised.wav' # denoised audio
sf_audio,audio=wavfile.read(filename)

In [128]:
def get_sample(index): # index: 0-->143.
    if index==0:
        start=0
    else:
        start=sum(lengths[:index])
    end=start+lengths[index]
    return start,end

### Check the data and try different methods to find the voicing threshold

In [111]:
start,end=get_sample(143) # get the 17th audio trial index
trial=audio[start:end]

In [112]:
%matplotlib qt
plt.plot(trial)

[<matplotlib.lines.Line2D at 0x23838b3cb00>]

Zoom in the transition region:

![](img\8.png)

Further zoom in, it shows high frequency power increases:

![](img\9.png)

According to Google, man voice concentrate around 120Hz, but Adacity produces below spectrogram for a trial:

![](img\10.png)

Extract high frequency component and use it as marker should be a better solution. But, for now, I can tolerate some mis-alignment.


In [113]:
from speech_Dutch.baseline_linear_regression.extract_features import hilbert3
import scipy
sr=48000
# Linear detrend
data = scipy.signal.detrend(audio, axis=0)  # low frequency trend
# Extract 90-130 Hz
sos = scipy.signal.iirfilter(4, [90 / (sr / 2), 130 / (sr / 2)], btype='bandpass', output='sos')
data = scipy.signal.sosfiltfilt(sos, data, axis=0)  # (307511, 127)
data_env = np.abs(hilbert3(data)) # (307523, 127)

In [114]:
start,end=get_sample(16) # get the 17th audio trial index
plt.plot(data[start:end])
plt.plot(data_env[start:end])

[<matplotlib.lines.Line2D at 0x23838bfd0a0>]

Extrac 90-130hz is not a good idea

![](img\11.png)

In [51]:
# use the MNE method
ch_types = ['eeg']
ch_names = ['audio']
info = mne.create_info(ch_names=ch_names, sfreq=sr, ch_types=ch_types)
raw = mne.io.RawArray(audio[np.newaxis,:], info)

In [61]:
data2=raw.filter(90,130).get_data().squeeze().tolist() # (1, 40720882)
data_env2 = np.abs(hilbert3(data2))  

In [63]:
plt.plot(data2[start:end])
plt.plot(data_env2[start:end])

[<matplotlib.lines.Line2D at 0x23836d4a390>]

# same as previous methods:

![](img\12.png)

In [131]:
# work on the raw audio directly
# Linear detrend
sos = scipy.signal.iirfilter(4, [10 / (sr / 2)], btype='highpass', output='sos')
tmp = scipy.signal.sosfiltfilt(sos, audio, axis=0)  # (307511, 127)
#tmp = scipy.signal.detrend(audio, axis=0)  # low frequency trend
audio_env = np.abs(hilbert3(tmp)) # (307523, 127)

In [116]:
plt.plot(tmp[start:end])
plt.plot(audio_env[start:end])

[<matplotlib.lines.Line2D at 0x23838cc6930>]

Looking good. A simple threshold method can be used:

![](img\13.png)

### Extract intervals

In [137]:
# read the denoised audio and its length
filename=folder+'/processed/session'+str(session)+'_audio_length_in_sample.npy' # length
lengths=np.load(filename, allow_pickle=True)
filename=folder+'/processed/session'+str(session)+'_clean_audio_padded_denoised.wav' # denoised audio
sf_audio,audio=wavfile.read(filename)

In [138]:
# work on the raw audio directly
# Linear detrend
sos = scipy.signal.iirfilter(4, [10 / (sr / 2)], btype='highpass', output='sos')
tmp = scipy.signal.sosfiltfilt(sos, audio, axis=0)  # (307511, 127)
#tmp = scipy.signal.detrend(audio, axis=0)  # low frequency trend
audio_env = np.abs(hilbert3(tmp)) # (307523, 127)

In [158]:
import copy
threshold=50
marker=max(audio)
starts=[]
ends=[]
trial_list_VAD=[]
for i in range(144):
    #if i >0: # discard the first trial
    start,end=get_sample(i) # get the 17th audio trial index
    trial=copy.deepcopy(audio[start:end])
    trial_env=audio_env[start:end]
    if i==0:
        start1=116500/48000
        start2=start1+0.7
        end1=188400/48000
        end2=end1-0.7
    if i>0:
        start1=0.7
        start2=2.8
        end1=2.8
        end2=0.7
    for j in range(int(start1*48000),int(start2*48000)):
        if trial_env[j]>threshold:
            starts.append(j)
            trial[j]=marker
            break
    for k in range(int(end1*48000),int(end2*48000),-1):
        if trial_env[k]>threshold:
            ends.append(k)
            trial[k]=marker
            break
    trial_list_VAD.append(trial)


In [159]:
# sanity check
ind=27
plt.plot(audio_env[get_sample(ind)[0]:get_sample(ind)[1]])
plt.plot(audio[get_sample(ind)[0]:get_sample(ind)[1]])

[<matplotlib.lines.Line2D at 0x2383ac4da00>]

In [160]:
audio_VAD=np.asarray(list(itertools.chain(*trial_list_VAD)))
print(len(audio_VAD)/48000) #ceil:842.3416666666667  #floor: 842.3388125  EEG: first trial + 835.21=842.376
filename=folder+'/processed/session'+str(session)+'_audio_VAD.wav'
wavfile.write(filename,48000,audio_VAD)

848.3517083333334


In [161]:
ind=27
plt.plot(audio_env[get_sample(ind)[0]:get_sample(ind)[1]])
plt.plot(audio_VAD[get_sample(ind)[0]:get_sample(ind)[1]])

[<matplotlib.lines.Line2D at 0x2383acfbce0>]

Looking good to me:

![](img\14.png)

![](img\15.png)

In [162]:
print(len(starts))
print(len(ends))

144
144


In [167]:
result=[[i,j] for i,j in zip(starts,ends)]
filename=folder+'/processed/session'+str(session)+'_audio_VAD.npy'
np.save(filename, np.array(result, dtype=object), allow_pickle=True)