/
get_delay.py
67 lines (45 loc) · 1.65 KB
/
get_delay.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import sys
import wave
import numpy as np
if len(sys.argv) != 3:
print('Usage: {} near.wav far.wav'.format(sys.argv[0]))
sys.exit(1)
near = wave.open(sys.argv[1], 'rb')
far = wave.open(sys.argv[2], 'rb')
rate = near.getframerate()
channels = near.getnchannels()
N = rate
def gcc_phat(sig, refsig, fs=1, max_tau=None, interp=1):
'''
This function computes the offset between the signal sig and the reference signal refsig
using the Generalized Cross Correlation - Phase Transform (GCC-PHAT)method.
'''
# make sure the length for the FFT is larger or equal than len(sig) + len(refsig)
n = sig.shape[0] + refsig.shape[0]
# Generalized Cross Correlation Phase Transform
SIG = np.fft.rfft(sig, n=n)
REFSIG = np.fft.rfft(refsig, n=n)
R = SIG * np.conj(REFSIG)
cc = np.fft.irfft(R / np.abs(R), n=(interp * n))
max_shift = int(interp * n / 2)
if max_tau:
max_shift = np.minimum(int(interp * fs * max_tau), max_shift)
cc = np.concatenate((cc[-max_shift:], cc[:max_shift+1]))
# find max cross correlation index
shift = np.argmax(np.abs(cc)) - max_shift
tau = shift / float(interp * fs)
return tau, cc
while True:
sig = near.readframes(N)
if len(sig) != 2 * N * channels:
break
ref = far.readframes(N)
ref_buf = np.fromstring(ref, dtype='int16')
data = np.fromstring(sig, dtype='int16')
offsets = []
for ch in range(channels):
sig_buf = data[ch::channels]
tau, _ = gcc_phat(sig_buf, ref_buf, fs=1, max_tau=N/2, interp=1)
# tau, _ = gcc_phat(sig_buf, ref_buf, fs=rate, max_tau=1)
offsets.append(tau)
print(offsets)