In [1]:
import time
import numpy as np
import scipy
import matplotlib
import matplotlib.pyplot as plt
import pyaudio
import librosa
import librosa.display
import realtime_analysis as ra

In [2]:
matplotlib.rcParams["figure.figsize"] = (14, 4)

CHANNELS = 1
RATE = 44100
WIN_LENGTH = 1024
HOP_LENGTH = 512
SAMPLES_PER_BUFFER = 1024
print("sample resolution:", 1/RATE)
print("frame resolution:", HOP_LENGTH/RATE)
print("buffer length:", SAMPLES_PER_BUFFER/RATE)

sample resolution: 2.2675736961451248e-05
frame resolution: 0.011609977324263039
buffer length: 0.023219954648526078


# test

In [3]:
# generate click sound for test
click_duration = 8*4 + 1
click_offset = 1
click_bps = 1
click_interval = int(RATE/click_bps)
click_first = int(click_offset*RATE)
total_length = int(click_duration*RATE)

click_times = np.arange(click_offset, click_duration, 1/click_bps)
click = librosa.clicks(click_times, sr=RATE, length=total_length)

In [4]:
# output stream callback
playback_index = 0
def output_callback(in_data, frame_count, time_info, status):
    global playback_index
    if playback_index >= click.shape[0]:
        return None, pyaudio.paComplete

    out_data = click[playback_index:playback_index+frame_count].tobytes()
    playback_index += frame_count
    return out_data, pyaudio.paContinue

In [5]:
# input stream callback
dect = ra.pipe(ra.frame(WIN_LENGTH, HOP_LENGTH),
               ra.power_freq(RATE, WIN_LENGTH),
               ra.onset_strength(),
               ra.onset_detect(RATE, HOP_LENGTH, delta=1))
next(dect)
lag = 0.1

onset_times = []
record_index = 0
def input_callback(in_data, frame_count, time_info, status):
    global record_index
    audio_data = np.frombuffer(in_data, dtype=np.float32)
    
    for i in range(0, frame_count, HOP_LENGTH):
        record_index += 1
        if dect.send(audio_data[i:i+HOP_LENGTH]):
            time = record_index*HOP_LENGTH/RATE - lag
            onset_times.append(time)
            print(time - round(time))
    
    return in_data, pyaudio.paContinue

In [6]:
# execute test
p = pyaudio.PyAudio()

input_stream = p.open(format=pyaudio.paFloat32,
                      channels=CHANNELS,
                      rate=RATE,
                      input=True,
                      output=False,
                      frames_per_buffer=SAMPLES_PER_BUFFER,
                      stream_callback=input_callback)
input_stream.start_stream()

output_stream = p.open(format=pyaudio.paFloat32,
                       channels=CHANNELS,
                       rate=RATE,
                       input=False,
                       output=True,
                       frames_per_buffer=SAMPLES_PER_BUFFER,
                       stream_callback=output_callback)
output_stream.start_stream()

while output_stream.is_active() and input_stream.is_active():
    time.sleep(0.1)

output_stream.stop_stream()
input_stream.stop_stream()
output_stream.close()
input_stream.close()

p.terminate()

0.016099773242630386
0.12058956916099772
0.04784580498866209
0.023083900226757326
0.021541950113379116
0.020000000000000462
0.0068480725623585315
-0.029523809523809064
0.0037641723356021117
0.0022222222222225696
-0.01092970521541936
-0.012471655328797127
-0.014013605442176669
-0.03877551020408099
-0.028707482993196365
-0.007029478458049354
0.01464852607709588
-0.021723356009072603
-0.02326530612245037
-0.03641723356009052
-0.003129251700681124
-0.051111111111111995
-0.029433106575964985
0.015464852607706803
0.06036281179138214
0.0007709750566888829
-0.0007709750566924356
-0.01392290249433259
-0.003854875283447967
-0.01700680272108812
-0.041768707482994216
-0.031700680272109594
-0.02163265306122497


In [8]:
onset_error = np.array(onset_times[2:]) - np.round(onset_times[2:])
print("error: {:.5f} ± {:5f}".format(np.mean(onset_error), np.std(onset_error)))

error: -0.00712 ± 0.025109
