In [1]:
import os
import pathlib

import numpy as np
import tensorflow as tf

from tensorflow.keras import layers
from tensorflow.keras import models
from IPython import display

# Set the seed value for experiment reproducibility.
seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)

  '{0}.{1}.{2}'.format(*version.hdf5_built_version_tuple)


In [2]:
from scipy import signal
import matplotlib.pyplot as plt

def get_melcepstrum(spectrogram):
    #1.频率转梅尔频率
    #2.梅尔频率下，滤波器线性排列n个滤波器
    #3.把滤波器梅尔频率转回频率，得到的滤波器三角形底边长就是逐渐增长的
    #4.得到离散的三角形
    #5.将每个离散的三角形滤波器*频谱图，得到梅尔频谱
    #PS: 无论傅里叶变化得到的频谱图还是梅尔频谱，w轴本质都是个尺度问题。频谱图有自己的尺度，梅尔频谱有自己的尺度。
    #每个滤波器三角形是重叠1/2的，这意味着，当我们在[0,513]设置40个距离相等的梅尔频率点时，我们得到了80个可选的三角形滤波器。
    #但是由于我们只需要40个。所以我们从左到右选择40个。
    #这40个三角形滤波器的频率范围为0-1848Hz。当滤波器数量为80个时，频率范围与上相同，但是滤波器
    #更加密集。每个滤波器代表的频率范围更小。
    #参数作用 当最低频率为0 最高频率(不能超过采样率/2)为8000Hz时
    #滤波器数量越多，被选中的范围也越大
    #当滤波器数量为40时，被提取的频率点范围为0-1848Hz 接近人耳听力范围
    #当滤波器数量不变，被选中的范围缩小时，被提取的频率点范围缩小
    low_freq_mel = 0 #低频点hz
    high_freq_mel = 512 #高频点hz（当对于STFT来说，0-8000Hz 对应着0-513个频率点
    high_freq_mel = (2595 * np.log10(1 + high_freq_mel / 700)) #根据公式将频率转换为梅尔频率
    nfilter = 40 #梅尔滤波器数量
    hz_points = np.array([])#梅尔滤波器三角形 左端点 中间点 右端点
    hz_points_center  = np.array([])#每个梅尔滤波器中间点坐标
    mel_points = np.linspace(low_freq_mel, high_freq_mel, nfilter+1) #生成距离相等的梅尔频率点

    # 将生成的梅尔频率转换成频率点
    hz_points_1 = (700 * (10**(mel_points / 2595) - 1)) #不含中间点
    for i in range(0,int(hz_points_1.shape[0]-1)):
        hz_points_center = np.append(hz_points_center, np.floor((hz_points_1[i]+hz_points_1[i+1])/2))
        

    hz_points = np.append(hz_points,np.floor(hz_points_1))
    hz_points = np.append(hz_points,hz_points_center)
    hz_points = np.sort(hz_points) #所有滤波器三角形坐标(左端坐标，中间坐标，右端坐标)由小到大排列

    mel_filter = np.zeros([nfilter, 513])#存储滤波器的系数

    for n_mel_filter in range(1, nfilter + 1): #以中心坐标标记滤波器，第一个滤波器中心坐标1，第二个滤波器中心坐标3...
        mel_filter_left = int(hz_points[n_mel_filter - 1])   # 一个滤波器左端坐标
        mel_filter_center = int(hz_points[n_mel_filter])             # 一个滤波器中心坐标
        mel_filter_right = int(hz_points[n_mel_filter + 1])    # 一个滤波器右端坐标
        for k in range(mel_filter_left, mel_filter_center): #计算每个滤波器的系数 
            mel_filter[n_mel_filter - 1, k] = (k - hz_points[n_mel_filter - 1]) / (hz_points[n_mel_filter] - hz_points[n_mel_filter - 1])
        for k in range(mel_filter_center, mel_filter_right):
            mel_filter[n_mel_filter - 1, k] = (hz_points[n_mel_filter + 1] - k) / (hz_points[n_mel_filter + 1] - hz_points[n_mel_filter])

    #依次将每个T时刻的频谱转换为梅尔频谱，并拼接
    for i,spectrogram_1t in enumerate(tf.squeeze(spectrogram, axis=-1).numpy().T):
        melcepstrum = np.dot(spectrogram_1t, mel_filter.T)
        #melcepstrum = np.where(melcepstrum == 0, np.finfo(float).eps, melcepstrum)  # Numerical Stability
        #melcepstrum = 20 * np.log10(melcepstrum)  # dB
        melcepstrum = melcepstrum[:,np.newaxis]
        if i == 0:
            melcepstrum_ = melcepstrum
        else:
            melcepstrum_ = np.append(melcepstrum_,melcepstrum,axis=1)
    melcepstrum_tf = tf.convert_to_tensor(melcepstrum_.T)
    melcepstrum_tf = melcepstrum_tf[..., tf.newaxis]# 新增一个aixs
    
    #打印梅尔频谱图
    #plt.figure()
    #plt.imshow(tf.math.log(tf.squeeze(melcepstrum_tf, axis=-1)).numpy())
    
    return melcepstrum_tf
        
def get_spectrogram(waveform):
  # Zero-padding for an audio waveform with less than 16,000 samples.
  input_len = 16000
  waveform = waveform[:input_len]
  zero_padding = tf.zeros(
      [16000] - tf.shape(waveform),
      dtype=tf.float32)
  # Cast the waveform tensors' dtype to float32.
  waveform = tf.cast(waveform, dtype=tf.float32)
  # Concatenate the waveform with `zero_padding`, which ensures all audio
  # clips are of the same length.
  equal_length = tf.concat([waveform, zero_padding], 0)
  # Convert the waveform to a spectrogram via a STFT.
  #f频率数组，时间数组，STFT结果
  equal_length_np = equal_length.numpy()
  [f,t,spectrogram]=signal.spectral.spectrogram(equal_length.numpy(),nperseg=513,nfft=1024,detrend=False)
  #spectrogram = tf.signal.stft(
  #    equal_length, frame_length=1024, frame_step=512)
  # Obtain the magnitude of the STFT.
  spectrogram = tf.abs(spectrogram)
  # Add a `channels` dimension, so that the spectrogram can be used
  # as image-like input data with convolution layers (which expect
  # shape (`batch_size`, `height`, `width`, `channels`).
  # Batch Size，样本数（时间序列长度/frame_step=125），频率点数(样本窗口/2)，通道
    #显示波形和频谱
  spectrogram = spectrogram[:,:30]
  spectrogram = spectrogram[..., tf.newaxis]
  
  #打印频谱图
  #plt.figure()
  #plt.imshow(tf.math.log(tf.squeeze(spectrogram, axis=-1)).numpy().T)
  #display.display(display.Audio(waveform, rate=16000))
  
  '''
  plt.figure()
  plt.imshow(tf.math.log(spectrogram).numpy())
  #plt.figure()
  # 把之前加在梅尔频谱最后的通道维度删除
  #plt.imshow(tf.math.log(tf.squeeze(melspectrogram, axis=-1)).numpy())
  #plt.imshow(tf.math.log(spectrogram.numpy())
  print('Waveform shape:', waveform.shape)
  print('Padding Waveform shape:',equal_length_np.shape)
  print('Spectrogram shape:', spectrogram.shape)
  #print('melspectrogram shape:', melspectrogram.shape)
  print('Audio playback')
  display.display(display.Audio(waveform, rate=16000))
  '''

  return spectrogram

In [3]:
import apa102

PIXELS_N = 3

rgb_led = apa102.APA102(num_led=PIXELS_N)

#RGB

rgb_led.set_pixel(0, 0, 0, 0) #没有信号

rgb_led.set_pixel(0, 0, 0, 0) #有打印正常声音信号

rgb_led.show()

In [4]:
import pyaudio
import wave
import numpy as np
import datetime
RESPEAKER_RATE = 16000
RESPEAKER_CHANNELS = 2
RESPEAKER_WIDTH = 2
# run getDeviceInfo.py to get index
RESPEAKER_INDEX = 0  # refer to input device id
CHUNK = 1024
RECORD_SECONDS = 1 #录音一秒
WAVE_OUTPUT_FILENAME = "./dataset/output_one_channel"

model = tf.keras.models.load_model('./SLA3dPrintAssitant For Raspi.h5')#加载模型

for i in range(0,3*60*60):#录音3h，生成3*3600个wav文件
    p = pyaudio.PyAudio()
    stream = p.open(
                rate=RESPEAKER_RATE,
                format=p.get_format_from_width(RESPEAKER_WIDTH),
                channels=RESPEAKER_CHANNELS,
                input=True,
                input_device_index=RESPEAKER_INDEX,)
    a=np.array([])
    frames = [] 
    while(1):
        #读取1秒 16000个点
        for i in range(0, int(RESPEAKER_RATE / CHUNK * RECORD_SECONDS)):
            data = stream.read(CHUNK,exception_on_overflow = False)
            # extract channel 0 data from 2 channels, if you want to extract channel 1, please change to [1::2]
            #a = np.fromstring(data,dtype=np.int16)[0::2]
            a = np.append(a,np.fromstring(data,dtype=np.int16)[0::2])
        #print("Wave shape:",a.shape)
        spectrogram = get_spectrogram(a)#信号转频谱
        a=np.array([]) #清空波形
        #print("spectrogram shape:",spectrogram.shape)
        melcepstrum = get_melcepstrum(spectrogram)#频谱转梅尔频谱
        #print("Model Input melcepstrum shape:",melcepstrum.shape)
        #print("Model Input melcepstrum Type:",type(melcepstrum))
        melcepstrum = tf.reshape(melcepstrum, (-1, 30, 40, 1))#调整维度，满足输入维度
        y_pred = np.argmax(model.predict(melcepstrum), axis=1) #0:Good 1:Background
        #print(y_pred)
        if(y_pred[0]==0):#有打印正常声音信号
            rgb_led.set_pixel(0, 0, 20, 0) #有打印正常声音信号
            rgb_led.show()
        else:
            rgb_led.set_pixel(0, 20, 0, 0) #没有信号
            rgb_led.show()
        #while(1):
        #    pass
        '''
        melcepstrum = get_melcepstrum(spectrogram)
        model = tf.keras.models.load_model('./SLA3dPrintAssitant For Raspi.h5')#模型输入 30个时刻,40个梅尔滤波器输出
        y_pred = np.argmax(model.predict(melcepstrum), axis=1)
        print(y_pred)
        '''
    stream.stop_stream()
    stream.close()
    p.terminate()
    wf = wave.open(store_path, 'wb')
    wf.setnchannels(1)
    wf.setsampwidth(p.get_sample_size(p.get_format_from_width(RESPEAKER_WIDTH)))
    wf.setframerate(RESPEAKER_RATE)
    wf.writeframes(b''.join(frames))
    wf.close()
    print("* done recording"+store_path)
    
print("Dataset Recording Finished!")

ALSA lib pcm.c:2660:(snd_pcm_open_noupdate) Unknown PCM cards.pcm.front
ALSA lib pcm.c:2660:(snd_pcm_open_noupdate) Unknown PCM cards.pcm.rear
ALSA lib pcm.c:2660:(snd_pcm_open_noupdate) Unknown PCM cards.pcm.center_lfe
ALSA lib pcm.c:2660:(snd_pcm_open_noupdate) Unknown PCM cards.pcm.side
ALSA lib pcm.c:2660:(snd_pcm_open_noupdate) Unknown PCM cards.pcm.surround21
ALSA lib pcm.c:2660:(snd_pcm_open_noupdate) Unknown PCM cards.pcm.surround21
ALSA lib pcm.c:2660:(snd_pcm_open_noupdate) Unknown PCM cards.pcm.surround40
ALSA lib pcm.c:2660:(snd_pcm_open_noupdate) Unknown PCM cards.pcm.surround41
ALSA lib pcm.c:2660:(snd_pcm_open_noupdate) Unknown PCM cards.pcm.surround50
ALSA lib pcm.c:2660:(snd_pcm_open_noupdate) Unknown PCM cards.pcm.surround51
ALSA lib pcm.c:2660:(snd_pcm_open_noupdate) Unknown PCM cards.pcm.surround71
ALSA lib pcm.c:2660:(snd_pcm_open_noupdate) Unknown PCM cards.pcm.iec958
ALSA lib pcm.c:2660:(snd_pcm_open_noupdate) Unknown PCM cards.pcm.iec958
ALSA lib pcm.c:2660:(snd

KeyboardInterrupt: 