In [1]:
import os
import sys
import argparse
from pathlib import Path
import importlib
import array
import shutil
import glob
import math
import random
import wave


import numpy as np
import librosa
import matplotlib.pyplot as plt

#repos_dir = r'/home/takkan/repos'
repos_dir = r'/home/akikun/repos'
sys.path.append(repos_dir)

imgan_dir = os.path.join(repos_dir, 'Intelligibility-MetricGAN')
sys.path.append(imgan_dir)

from sak import display as dp

In [9]:
# Signal-to-Noise
snr = -10.0

# open iMetricGAN sample files.
# wav_dir = os.path.join(imgan_dir, 'JR_database')
# clean_file = os.path.join(wav_dir, 'Train/Clean/Train_001.wav')
# noise_file = os.path.join(wav_dir, 'Train/Noise/Train_001.wav')
wav_dir = os.path.join(imgan_dir, 'database')
clean_file = os.path.join(wav_dir, 'Train', 'Clean', 'Train_1.wav')
noise_file = os.path.join(wav_dir, 'Train', 'Noise', 'Train_1.wav')
clean_wav = wave.open(clean_file, "r")
noise_wav = wave.open(noise_file, "r")

In [10]:
'''
Reference: https://github.com/Sato-Kunihiko/audio-SNR
'''
def cal_amp(wf):
    buffer = wf.readframes(wf.getnframes())
    amptitude = (np.frombuffer(buffer, dtype="int16")).astype(np.float64)
    return amptitude

def cal_rms(amp):
    return np.sqrt(np.mean(np.square(amp), axis=-1))

def cal_adjusted_rms(clean_rms, snr):
    a = float(snr) / 20
    noise_rms = clean_rms / (10**a) 
    return noise_rms


In [11]:
# Calculation of amp
clean_amp = cal_amp(clean_wav)
noise_amp = cal_amp(noise_wav)

print(clean_amp, noise_amp)

[0. 0. 0. ... 5. 3. 1.] [-5625. -5716. -5087. ... -5234. -4326. -3110.]


In [12]:
# Calculation of RMS
start = random.randint(0, len(noise_amp)-len(clean_amp))
clean_rms = cal_rms(clean_amp)
split_noise_amp = noise_amp[start: start + len(clean_amp)]
noise_rms = cal_rms(split_noise_amp)

print(clean_rms, noise_rms)

655.3601012333713 3685.362157923615


In [13]:
# Synthesize waveforms of any size
adjusted_noise_rms = cal_adjusted_rms(clean_rms, snr)
        
adjusted_noise_amp = split_noise_amp * (adjusted_noise_rms / noise_rms) 
mixed_amp = (clean_amp + adjusted_noise_amp)

# Normalized so as not to crack the sound
if (mixed_amp.max(axis=0) > 32767): 
    mixed_amp = mixed_amp * (32767/mixed_amp.max(axis=0))
    clean_amp = clean_amp * (32767/mixed_amp.max(axis=0))
    adjusted_noise_amp = adjusted_noise_amp * (32767/mixed_amp.max(axis=0))


In [14]:
# Save the waveform as a wav file
noisy_wave = wave.Wave_write('output_noisy_file.wav')
noisy_wave.setparams(clean_wav.getparams())
noisy_wave.writeframes(array.array('h', mixed_amp.astype(np.int16)).tostring() )
noisy_wave.close()

clean_wave = wave.Wave_write('output_clean_file.wav')
clean_wave.setparams(clean_wav.getparams())
clean_wave.writeframes(array.array('h', clean_amp.astype(np.int16)).tostring() )
clean_wave.close()

noise_wave = wave.Wave_write('output_noise_file.wav')
noise_wave.setparams(clean_wav.getparams())
noise_wave.writeframes(array.array('h', adjusted_noise_amp.astype(np.int16)).tostring() )
noise_wave.close()

## listen the audio.

In [15]:
dp.disp_wav('output_noisy_file.wav')