In [0]:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import signal
from google.colab import files
#!pip install librosa # in colab, you'll need to install this
import librosa

In [0]:
train_clean_vector, train_clean_sr=librosa.load('train_clean_male.wav', sr=None)
train_clean_stft=librosa.stft(train_clean_vector, n_fft=1024, hop_length=512)

train_noisy_vector, train_noisy_sr=librosa.load('train_dirty_male.wav', sr=None)
train_noisy_stft=librosa.stft(train_noisy_vector, n_fft=1024, hop_length=512)

test1_vector, test1_sr=librosa.load('test_x_01.wav', sr=None)
test1_stft=librosa.stft(test1_vector, n_fft=1024, hop_length=512)

test2_vector, test2_sr=librosa.load('test_x_02.wav', sr=None)
test2_stft=librosa.stft(test2_vector, n_fft=1024, hop_length=512)

## Data Transformation

In [0]:
train_Y=np.abs(train_clean_stft).transpose()
train_X=np.abs(train_noisy_stft).transpose()
test1_X=np.abs(test1_stft).transpose()
test2_X=np.abs(test2_stft).transpose()

In [0]:
x = tf.placeholder(tf.float32, [None, 513])
y = tf.placeholder(tf.float32, [None, 513])
y_raw=tf.placeholder(tf.float32, [None, 513])
input_layer = tf.reshape(x, [-1, 1,513])

## 1D CNN Configuration

In [0]:
conv1 = tf.layers.conv1d(inputs=input_layer, filters=16, kernel_size=16, padding="same", activation=tf.nn.relu)
pool1 = tf.layers.max_pooling1d(inputs=conv1, pool_size=2, strides=2, padding="same")
conv2 = tf.layers.conv1d(inputs=pool1, filters=32, kernel_size=8, padding="same", activation=tf.nn.relu)
pool2 = tf.layers.max_pooling1d(inputs=conv2, pool_size=2, strides=2, padding="same")
fc = tf.layers.dense(inputs=pool2,units=2048,activation=tf.nn.relu)
output = tf.layers.dense(inputs=fc,units=513,activation=tf.nn.relu)
output=tf.reshape(output,[-1,513])

In [0]:
mse = tf.losses.mean_squared_error(y,output)*10
numerator=tf.log(tf.reduce_sum(tf.math.pow(y,2))/tf.reduce_sum(tf.math.pow(tf.math.subtract(y,output),2)))
denominator = tf.log(tf.constant(10, dtype=numerator.dtype))
snr=10*(numerator/denominator)
opt=100/snr

## Model Training

In [8]:
learning_rate = 1
epochs = 1000
batch_size = 256
optimiser = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(mse)

with tf.Session() as sess:
  init_op = tf.global_variables_initializer()
  sess.run(init_op)
  total_batch = int(len(train_X) / batch_size)
  
  for epoch in range(epochs):
    a = np.arange(len(train_X))
    index=np.random.shuffle(a)
    X_shuffled, Y_shuffled = train_X[index][0],train_Y[index][0]
    avg_cost = 0
    snr_value=0
    opt_value=0
    
    for i in range(total_batch):
      batch_x, batch_y = X_shuffled[i*batch_size:(i*batch_size)+batch_size],Y_shuffled[i*batch_size:(i*batch_size)+batch_size]
      _ , c, snr1,opt1 = sess.run([optimiser, mse,snr,opt], feed_dict={x: batch_x, y: batch_y})
      avg_cost += c / total_batch
      snr_value += snr1/total_batch
      opt_value += opt1/total_batch
    if(epoch==0 or (epoch+1)%10==0):
      print("Epoch:", (epoch + 1), "cost =", "{:.3f}".format(avg_cost),"SNR: ","{:.3f}".format(snr_value),"opt: ","{:.3f}".format(opt_value))
    if(epoch>=300 and epoch%100==0):
      learning_rate=learning_rate/2
      optimiser = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(mse)
  
  #Reconstruction with the trained model
  train_clean=sess.run(output, feed_dict={x: train_X})
  test1_clean=sess.run(output, feed_dict={x: test1_X})
  test2_clean=sess.run(output, feed_dict={x: test2_X})

Epoch: 1 cost = 0.950 SNR:  0.393 opt:  1548.906
Epoch: 10 cost = 0.772 SNR:  1.298 opt:  98.409
Epoch: 20 cost = 0.473 SNR:  3.163 opt:  33.923
Epoch: 30 cost = 0.401 SNR:  3.884 opt:  26.850
Epoch: 40 cost = 0.315 SNR:  4.848 opt:  21.801
Epoch: 50 cost = 0.248 SNR:  5.832 opt:  17.954
Epoch: 60 cost = 0.228 SNR:  6.189 opt:  16.869
Epoch: 70 cost = 0.222 SNR:  6.283 opt:  16.847
Epoch: 80 cost = 0.189 SNR:  6.987 opt:  14.999
Epoch: 90 cost = 0.187 SNR:  7.031 opt:  14.858
Epoch: 100 cost = 0.173 SNR:  7.358 opt:  14.220
Epoch: 110 cost = 0.156 SNR:  7.833 opt:  13.297
Epoch: 120 cost = 0.153 SNR:  7.932 opt:  13.192
Epoch: 130 cost = 0.141 SNR:  8.249 opt:  12.735
Epoch: 140 cost = 0.129 SNR:  8.639 opt:  12.087
Epoch: 150 cost = 0.143 SNR:  8.240 opt:  12.797
Epoch: 160 cost = 0.130 SNR:  8.629 opt:  12.160
Epoch: 170 cost = 0.117 SNR:  9.058 opt:  11.622
Epoch: 180 cost = 0.109 SNR:  9.402 opt:  11.083
Epoch: 190 cost = 0.106 SNR:  9.486 opt:  10.996
Epoch: 200 cost = 0.113 SNR: 

In [9]:
Phase = np.divide(train_clean_stft, np.abs(train_clean_stft))
train_reconstruction = np.multiply(np.transpose(train_clean),Phase)
train_reconstruction = librosa.istft(train_reconstruction,hop_length=512)
s_reduced = train_clean_vector[:len(train_reconstruction)]
print("SNR of training data:",round(10*np.log10(np.sum(s_reduced**2)/np.sum((s_reduced-train_reconstruction)**2)),2))

SNR of training data: 13.87


## Reconstructing and saving denoised test1 audio

In [0]:
Phase = np.divide(test1_stft, np.abs(test1_stft))
test1_reconstruction = np.multiply(np.transpose(test1_clean),Phase)
test1_reconstruction = librosa.istft(test1_reconstruction,hop_length=512)
librosa.output.write_wav('test1_cleaned_1d.wav', test1_reconstruction, test1_sr)
files.download('test1_cleaned_1d.wav')

## Reconstructing and saving denoised test2 audio

In [0]:
Phase = np.divide(test2_stft, np.abs(test2_stft))
test2_reconstruction = np.multiply(np.transpose(test2_clean),Phase)
test2_reconstruction = librosa.istft(test2_reconstruction,hop_length=512)
librosa.output.write_wav('test2_cleaned_1d.wav', test2_reconstruction, test2_sr)
files.download('test2_cleaned_1d.wav')