<p>声音信号则代表着空气压力随时间的变化</p>
<p>麦克风能够测量这种变化并转换成可以表示声音的电信号(A->D)</p>
<p>扬声器可以接收这种电信号并产生相应的声音(D->A)</p>
<p>语音信号属于短时平稳信号，一般认为在10~30ms内语音信号特性基本上是不变的，或是缓慢的，于是可截取一小段进行频谱分析</p>

<p><font color=red>频率:</font> 一个周期函数一秒内重复出现的次数 </p> 
<p><font color=red>采样率:</font> 一秒内采样的个数</p>

<p><font color=red>$y(t) = A * sin ( (2 * \pi * f) * t + phase)$</font></p>
<p>角速度 $\omega = \frac {2 * \pi} {T} = 2 * \pi * f$

A: 16bit 表示[32767($2^{15}-1$)-32768($-2^{15}$)]  选择为16000作为最大的声音幅度

<p>纯信号分析
<p>前提要求python
<p>常用库：numpy, scipy, matplotlib, pandas, wave, struct
    
wav 格式：


Reference:
    https://www.pythonforengineers.com/audio-and-digital-signal-processingdsp-in-python/

In [None]:
import os

import numpy as np
import wave
import struct
import matplotlib.pyplot as plt

frequency = 1000
sample_rate = 48000

duration = 1 # unit s
amplitude = 16000
phase = 0

num_samples = sample_rate * duration

file = ("/home/leliang/project/baidu/tmp/leliang/introduction_%(freq)dHz.wav" % {'freq':frequency})

if os.path.exists(file):
    os.remove(file)

delta_t = 1.0 / sample_rate

# 生成音频信号
sine_wave = [np.sin(2 * np.pi * frequency * (x * delta_t) + phase) for x in range(num_samples)]  #采样

nframes=num_samples
comptype='NONE'
compname='not compressed'
nchannels=1
sample_width=2 # byte(16bits)

wave_file=wave.open(file, 'w')
wave_file.setparams((nchannels, sample_width, int(sample_rate), nframes, comptype, compname))

for value in sine_wave:
    wave_file.writeframes(struct.pack('h', int(value * amplitude)))  #量化，编码
    
print("space occupy: ", num_samples * 2 + 44)

In [None]:
struct.pack('i', 512)
#help(struct)

In [None]:
wave_file = wave.open(file, 'r')
data = wave_file.readframes(num_samples)

data_s = struct.unpack('{n}h'.format(n=num_samples), data)
ys = np.array(data_s)

plt.subplot(3,1,1)
plt.plot(data_s[0:960])

ts = np.linspace(0, duration, num_samples)
plt.subplot(3, 1, 2)
plt.subplots_adjust(hspace=0.8)
plt.xlim(0, 0.02) # 只显示前0.02s
plt.xlabel("Time(s)")
plt.plot(ts, ys)

scale = 2 ** 15
print(scale)
plt.subplot(3, 1, 3)
plt.subplots_adjust(hspace=0.8)
plt.xlim(0, 0.02)
plt.xlabel("Time(s)")
plt.plot(ts, ys/scale)

wave_file.close()

In [None]:
wave_file = wave.open(file, 'r')

nframes = 480 # wave_file.getnframes() # wave_file.getnframes(), 10ms = 480
framerate = wave_file.getframerate()
sampwidth = wave_file.getsampwidth()

data = wave_file.readframes(nframes)

data_s = struct.unpack('{n}h'.format(n=nframes), data)

data_fft = np.fft.fft(data_s)

print(data_fft[0:10]) # fft 结果

magnitudes = np.abs(data_fft)

print("The frequency is {} Hz".format(np.argmax(magnitudes)))

fs = np.linspace(0, int(framerate), nframes)

plt.xlim(0, 2000)
#plt.plot(magnitudes)
plt.plot(fs, magnitudes)

In [None]:
def my_spectrum(input_segment, framerate):
    """
    input_segment: np array with a slice audio data(real data)
    framerate: the sampling rate
    return: (hs, fs) fs is the frequency sequences, hs is the value in frequency domain
    """
    n = len(input_segment)
    d = 1.0 / framerate
    
    hs = np.fft.rfft(input_segment)
    fs = np.fft.rfftfreq(n, d)
    return (hs, fs)
    
def my_plot_spectrum(input_segment, framerate, xl = None, xh = None, yl = None, yh = None):
    """
    input_segment: np array with a slice audio data(real data)
    framerate: the sampling rate
    xl,yl: x,y low value
    xh, yh: x,y high value
    """
    hs, fs = my_spectrum(input_segment, framerate)
    amp = np.abs(hs)
    if xl and xh:
        plt.xlim(xl, xh)
    elif xh:
        plt.xlim(0, xh)
    
    if yl and yh:
        plt.ylim(yl, yh)
    elif yh:
        plt.xlim(0, yh)
    plt.plot(fs, amp)

In [None]:
segment_len = 48000 #512 1024 4096
segment_start = 0
segment_end = segment_start + segment_len

wave_file = wave.open(file, 'r')
framerate = wave_file.getframerate()
sampwidth = wave_file.getsampwidth()

data = wave_file.readframes(segment_len)
scale = 2 ** (8 * sampwidth - 2) # 因为生成时幅度为最大的一半

data_s = struct.unpack('{n}h'.format(n=segment_len), data)
ys = np.array(data_s) / scale #归一化


segment = ys[segment_start:segment_end]

#添0扩充提高精度, 类似加窗
padded = True
if segment_len == 256 and padded:
    new_len = 256 * 32 #
    zerospadded = np.zeros(new_len)
    new_start = int(new_len / 2 - 128)
    new_end = new_start + 256
    zerospadded[new_start:new_end] = segment
    segment = zerospadded

hs, fs = my_spectrum(segment, framerate)
hs = np.abs(hs)
print("频率精度:", framerate/len(segment))
print(fs[hs.argmax()])

my_plot_spectrum(segment, framerate, xh=5000)

wave_file.close()

In [None]:
import matplotlib
def my_spectrogram(spec_map, seg_len):
    """
    spec_map: k: time, value: (hs, fs)
    """
    hs, fs = spec_map[next(iter(spec_map))]
    ts = sorted(iter(spec_map))
    size = len(fs), len(ts)
    array = np.zeros(size, dtype=np.float)
    #print(ts)
    for j, t in enumerate(ts):
        jhs, jfs= spec_map[t]
        array[:, j] = np.abs(jhs[:])
    #print(array)
    #mask = array < 10
    #array[mask] = 0
    #print(array)
    return (ts, fs, array)

def my_plot_spectrogram(time_sequence, yvalue, segment_len, samplerate):
    step = int(segment_len / 2)
    i = 0
    j = segment_len

    spec_map = {}

    while j < len(yvalue):
        segment = yvalue[i:j]
        #加窗
        win = np.hamming(segment_len)
        segment = segment * win

        t = (time_sequence[i] + time_sequence[j]) / 2
        hs, fs = my_spectrum(segment, samplerate)
        spec_map[t] = (hs, fs)

        i += step
        j += step  

    (ts, fs, array) = my_spectrogram(spec_map, segment_len)
    plt.ylim(0, 2000)
    x_formatter = matplotlib.ticker.ScalarFormatter(useOffset=False)
    axes = plt.gca()
    axes.xaxis.set_major_formatter(x_formatter)
    plt.pcolormesh(ts, fs, array, cmap=matplotlib.cm.Blues)

In [None]:
wave_file = wave.open(file, 'r')

seg_len = 512 #512 1024 4096

nframes = wave_file.getnframes()
framerate = wave_file.getframerate()
sampwidth = wave_file.getsampwidth()
scale = 2 ** (8 * sampwidth - 2)

data = wave_file.readframes(nframes)
data_s = struct.unpack('{n}h'.format(n=nframes), data)
ys = np.array(data_s) / scale #归一化

time_start = 0
time_end = nframes / framerate
ts = np.linspace(time_start, time_end, nframes)

my_plot_spectrogram(ts, ys, seg_len, framerate)

wave_file.close()