In [None]:
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

In [None]:
from numpy import load
from numpy import zeros
from numpy import ones
import numpy as np
from numpy.random import randint
from keras.optimizers import Adam
from keras.initializers import RandomNormal
from keras.models import Model
from keras.models import Input
from keras.layers import Conv2D
from keras.layers import Conv2DTranspose
from keras.layers import LeakyReLU
from keras.layers import Activation
from keras.layers import Concatenate
from keras.layers import Dropout
from keras.layers import BatchNormalization
from keras.layers import LeakyReLU
from matplotlib import pyplot
import keras
from keras.callbacks import EarlyStopping
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))
 

def define_encoder_block(layer_in, n_filters, batchnorm=True):
    init = RandomNormal(stddev=0.02)
    g = Conv2D(n_filters, (4,4), strides=(2,2), padding='same', kernel_initializer=init)(layer_in)
    if batchnorm:
        g = BatchNormalization()(g, training=True)
    g = LeakyReLU(alpha=0.2)(g)
    return g
 

def decoder_block(layer_in, skip_in, n_filters, dropout=True):
    init = RandomNormal(stddev=0.02)
    g = Conv2DTranspose(n_filters, (4,4), strides=(2,2), padding='same', kernel_initializer=init)(layer_in)
    g = BatchNormalization()(g, training=True)
    if dropout:
        g = Dropout(0.5)(g, training=True)
    g = Concatenate()([g, skip_in])
    g = Activation('relu')(g)
    return g
 

def define_generator(image_shape=(128,128,1)):
    init = RandomNormal(stddev=0.02)
    in_image = Input(shape=image_shape)
    e1 = define_encoder_block(in_image, 64, batchnorm=False)
    e2 = define_encoder_block(e1, 128)
    e3 = define_encoder_block(e2, 256)
    e4 = define_encoder_block(e3, 512)
    e5 = define_encoder_block(e4, 512)
    e6 = define_encoder_block(e5, 512)
    b = Conv2D(128, (4,4), strides=(2,2), padding='same', kernel_initializer=init)(e6)
    b = Activation('relu')(b)
    d2 = decoder_block(b, e6, 512)
    d3 = decoder_block(d2, e5, 512)
    d4 = decoder_block(d3, e4, 512, dropout=False)
    d5 = decoder_block(d4, e3, 256, dropout=False)
    d6 = decoder_block(d5, e2, 256, dropout=False)
    d7 = decoder_block(d6, e1, 64, dropout=False)
    g = Conv2DTranspose(1, (4,4), strides=(2,2), padding='same', kernel_initializer=init)(d7)
    out_image = Activation('tanh')(g)
    model = Model(in_image, out_image)
    return model

In [None]:
import keras
keras.__version__

In [None]:
model = define_generator()
model.compile(optimizer = 'adam', loss = keras.losses.Huber(), metrics = ['mae'])

In [None]:
model.summary()

In [None]:
import os
np_combined = np.array(os.listdir("spectrogram_audio"))
np_speech = np.array(os.listdir("spectrogram_speech"))

In [None]:
Train_x = []
Train_y = []
j = 0
for s in np_combined:
    Train_x.append((np.load("spectrogram_audio/"+s)))
    Train_y.append((np.load("spectrogram_speech/"+s)))
    print(j)
    j += 1

In [None]:
train_x = np.array(Train_x).reshape([len(Train_x),128,128,1])/80
train_y = np.array(Train_y).reshape([len(Train_y),128,128,1])/80

In [None]:
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1)

In [None]:
train_x.shape

In [None]:
model.fit(train_x[:4500], train_y[:4500], validation_data=(train_x[4500:], train_y[4500:]), epochs=20,batch_size=32, verbose=1, shuffle=True)

In [None]:
import librosa

In [None]:
audio,sr = librosa.load("combined_street/6.wav",8000)
n_fft = 255
hop_length_fft = 63
frame_length = 8064
chunk_mag = []
chunk_pha = []
i = 0
while (i+frame_length<len(audio)):
    stftaudio = librosa.stft(audio[i:i+frame_length], n_fft=n_fft, hop_length=hop_length_fft)
    stftaudio_magnitude, stftaudio_phase = librosa.magphase(stftaudio)

    stftaudio_magnitude_db = librosa.amplitude_to_db(
        stftaudio_magnitude, ref=np.max)
    chunk_mag.append(stftaudio_magnitude_db/80)
    chunk_pha.append(stftaudio_phase)
    i += frame_length



In [None]:
test_x = np.array(chunk_mag).reshape([len(chunk_mag),128,128,1])

In [None]:
pred_y = model.predict(test_x)

In [None]:
np.shape(pred_y)

In [None]:
def magnitude_db_and_phase_to_audio(frame_length, hop_length_fft, stftaudio_magnitude_db, stftaudio_phase):

    stftaudio_magnitude_rev = librosa.db_to_amplitude(stftaudio_magnitude_db, ref=1.0)

    audio_reverse_stft = stftaudio_magnitude_rev * stftaudio_phase
    audio_reconstruct = librosa.core.istft(audio_reverse_stft, hop_length=hop_length_fft, length=frame_length)

    return audio_reconstruct

In [None]:
def matrix_spectrogram_to_numpy_audio(m_mag_db, m_phase, frame_length, hop_length_fft)  :

    list_audio = []

    nb_spec = m_mag_db.shape[0]

    for i in range(nb_spec):

        audio_reconstruct = magnitude_db_and_phase_to_audio(frame_length, hop_length_fft, m_mag_db[i], m_phase[i])
        list_audio.append(audio_reconstruct)

    return np.vstack(list_audio)

In [None]:
audio_denoise_recons = matrix_spectrogram_to_numpy_audio(pred_y[:,:,:,0]*80, np.array(chunk_pha), frame_length, hop_length_fft)
nb_samples = audio_denoise_recons.shape[0]
denoise_long = audio_denoise_recons.reshape(1, nb_samples * frame_length)*10

In [None]:
import matplotlib.pyplot as plt
plt.plot(denoise_long[0, :])

In [None]:
audio,sr = librosa.load("parallel_speech/6.wav",8000)

In [None]:
plt.plot(audio)

In [None]:
import IPython.display as ipd
ipd.Audio(data = denoise_long[0, :], rate = sr )

In [None]:
ipd.Audio(data = audio, rate = sr )

In [None]:
audio,sr = librosa.load("combined_street/6.wav")

In [None]:
ipd.Audio(data = audio, rate = sr )

In [None]:
len(audio)

In [None]:
audio,sr = librosa.load("combined_street/195.wav", sr = 8000)

In [None]:
len(audio)