In [None]:
# This notebook magic automatically reloads the sine_wave_speech module on edit
%load_ext autoreload
%autoreload 2

import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import IPython.display

import sine_wave_speech.synthesis

In [None]:
SAMPLE_RATE = 8000

def play(audio, sr=SAMPLE_RATE):
    IPython.display.display(IPython.display.Audio(audio, rate=sr))

def spectrogram(audio, n_fft=512, sr=SAMPLE_RATE):
    S = librosa.stft(audio, n_fft=n_fft)
    S_db = librosa.amplitude_to_db(np.abs(S), ref=np.max)
    librosa.display.specshow(S_db, y_axis="linear", sr=sr, n_fft=n_fft)

In [None]:
audio, _ = librosa.load("./data/sentence.wav", sr=SAMPLE_RATE)

### Plot and play the original audio

In [None]:
spectrogram(audio)

In [None]:
play(audio)

### Convert to sine wave speech

In [None]:
resynthesized = sine_wave_speech.synthesis.to_sine_wave_speech(audio, n_waves=4)

### Plot and play the resynthesized audio
As you can tell from the spectrogram, the resynthesized audio consists of four sine waves:

In [None]:
spectrogram(resynthesized)

In [None]:
play(resynthesized)

### Detailed control

Instead of `to_sine_wave_speech()`, you can also go through the individual steps:

In [None]:
from sine_wave_speech.lpc import fit_lpc, lpc_coefficients_to_frequencies

n_waves = 4
lpc_coefficients, gain, residual = fit_lpc(audio, p=n_waves * 2)

frequencies, magnitudes = lpc_coefficients_to_frequencies(lpc_coefficients, gain)

In [None]:
# The frequencies are in radians/sample, so convert to Hz for the plot.
frequencies_hz = frequencies * SAMPLE_RATE / (2 * np.pi)
plt.plot(frequencies_hz[:100])

spectrogram(audio[128:100 * 128], n_fft=512)

In [None]:
import scipy

# The synthesize() function also allows you to replace the sine wave
# with something more funky, like a sawtooth wave.
resynthesized = sine_wave_speech.synthesis.synthesize(
    frequencies, magnitudes, wave_fn=lambda x: scipy.signal.sawtooth(x, width=1)
)

play(resynthesized, sr=SAMPLE_RATE)