In [None]:
import os
import torch
import numpy as np
from pesq import pesq
import torch.nn as nn
from pystoi import stoi
from scipy.signal import get_window
import numpy as np
import matplotlib.pyplot as plt
from models import DCCRN
import librosa

In [None]:
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

In [None]:
model = DCCRN().cuda()
chkpt_path = "/work/hyerim/remixIT/DNN-based-Speech-Enhancement-in-the-frequency-domain/models/EXPERIMENT_NUMBER_6.30_DCCRN_SDR/chkpt_97.pt"
checkpoint = torch.load(chkpt_path)
model.load_state_dict(checkpoint['model'])

### Tools for Score

In [None]:
###############################################################################
#                           PESQ (another ref)                                #
###############################################################################
# interface to PESQ evaluation, taking in two waveforms as input
def cal_pesq(fs, dirty_wavs, clean_wavs):
    scores = []
    for i in range(len(dirty_wavs)):
        try:
            pesq_score = pesq(fs, dirty_wavs[i], clean_wavs[i], 'nb')
            scores.append(pesq_score)
        except:
            scores.append(5.0)
    return scores


###############################################################################
#                                     STOI                                    #
###############################################################################
def cal_stoi(fs, estimated_speechs, clean_speechs):
    stoi_scores = []
    for i in range(len(estimated_speechs)):
        stoi_score = stoi(
            clean_speechs[i], estimated_speechs[i], fs, extended=False)
        stoi_scores.append(stoi_score)
    return stoi_scores


###############################################################################
#                                     SNR                                     #
###############################################################################
def cal_snr(s1, s2, eps=1e-8):
    signal = s2
    mean_signal = np.mean(signal)
    signal_diff = signal - mean_signal
    # # variance of orignal data
    var_signal = np.sum(np.mean(signal_diff ** 2))

    noisy_signal = s1
    noise = noisy_signal - signal
    mean_noise = np.mean(noise)
    noise_diff = noise - mean_noise
    var_noise = np.sum(np.mean(noise_diff ** 2))  # # variance of noise

    if var_noise == 0:
        snr_score = 100  # # clean
    else:
        snr_score = (np.log10(var_signal/var_noise + eps))*10
    return snr_score


def cal_snr_array(estimated_speechs, clean_speechs):
    snr_score = []
    for i in range(len(estimated_speechs)):
        snr = cal_snr(estimated_speechs[i], clean_speechs[i])
        snr_score.append(snr)
    return snr_score

### Test

In [None]:

def preprocess_wav(file_path):
    y, sr = librosa.load(file_path, sr=16000)
    return y

In [None]:

batch_num = 0

avg_pesq_score = 0
avg_stoi_score = 0

In [None]:
folder_path = '/work/hyerim/remixIT/DNN-based-Speech-Enhancement-in-the-frequency-domain/data/wav/test'
clean_files = [os.path.join(folder_path, f)
               for f in os.listdir(folder_path) if f.endswith('.wav')]

In [None]:
print(f"{clean_files}")

In [None]:


model.eval()
fs = 16000


for db in ["-10db", "-5db", "0db", "5db", "10db"]:

    batch_num = 0

    folder_path = f"/work/hyerim/remixIT/DNN-based-Speech-Enhancement-in-the-frequency-domain/data/noisy/{db}/test"
    noisy_files = [os.path.join(folder_path, f)
                   for f in os.listdir(folder_path) if f.endswith('.wav')]

    avg_pesq_score = 0
    avg_stoi_score = 0

    with torch.no_grad():
        for index, clean_file in enumerate(clean_files):

            batch_num += 1

            noisy_file = noisy_files[index]

            if os.path.basename(clean_file) != os.path.basename(noisy_file):
                print(f"Not matched {clean_file} {noisy_file}")
                continue

            inputs, _ = librosa.load(noisy_file, sr=16000)
            targets, _ = librosa.load(clean_file, sr=16000)

            inputs = torch.Tensor(inputs).unsqueeze(dim=0).cuda()
            targets = torch.Tensor(targets).unsqueeze(dim=0).cuda()

            _, _, outputs = model(inputs, targets)

            # estimate the output speech with pesq and stoi
            estimated_wavs = outputs.cpu().detach().numpy()
            clean_wavs = targets.cpu().detach().numpy()

            pesq_score = cal_pesq(fs, estimated_wavs, clean_wavs)
            stoi_score = cal_stoi(fs, estimated_wavs, clean_wavs)

            # pesq: 0.1 better / stoi: 0.01 better
            # for i in range(len(stoi)):
            #     f_score.write('PESQ {:.6f} | STOI {:.6f}\n'.format(pesq[i], stoi[i]))

            avg_stoi_score += stoi_score[0]
            avg_pesq_score += pesq_score[0]

        # save the samples to tensorboard
        # if epoch % 10 == 0:
        #     writer.log_wav(inputs[0], targets[0], outputs[0], epoch)

        avg_pesq_score /= batch_num
        avg_stoi_score /= batch_num

        print(f"{db} avg_pesq {avg_pesq_score} avg_stoi {avg_stoi_score}")

In [None]:


model.eval()
fs = 16000


for db in ["-10db", "-5db", "0db", "5db", "10db"]:

    batch_num = 0

    folder_path = f"/work/hyerim/remixIT/DNN-based-Speech-Enhancement-in-the-frequency-domain/data/noisy/{db}/test"
    noisy_files = [os.path.join(folder_path, f)
                   for f in os.listdir(folder_path) if f.endswith('.wav')]

    avg_pesq_score = 0
    avg_stoi_score = 0

    with torch.no_grad():
        for index, clean_file in enumerate(clean_files):

            batch_num += 1

            noisy_file = noisy_files[index]

            if os.path.basename(clean_file) != os.path.basename(noisy_file):
                print(f"Not matched {clean_file} {noisy_file}")
                continue

            inputs, _ = librosa.load(noisy_file, sr=16000)
            targets, _ = librosa.load(clean_file, sr=16000)

            inputs = torch.Tensor(inputs).unsqueeze(dim=0).cuda()
            targets = torch.Tensor(targets).unsqueeze(dim=0).cuda()

            _, _, outputs = model(inputs, targets)

            # estimate the output speech with pesq and stoi
            estimated_wavs = outputs.cpu().detach().numpy()
            clean_wavs = targets.cpu().detach().numpy()

            pesq_score = cal_pesq(fs, estimated_wavs, clean_wavs)
            stoi_score = cal_stoi(fs, estimated_wavs, clean_wavs)

            # pesq: 0.1 better / stoi: 0.01 better
            # for i in range(len(stoi)):
            #     f_score.write('PESQ {:.6f} | STOI {:.6f}\n'.format(pesq[i], stoi[i]))

            avg_stoi_score += stoi_score[0]
            avg_pesq_score += pesq_score[0]

        # save the samples to tensorboard
        # if epoch % 10 == 0:
        #     writer.log_wav(inputs[0], targets[0], outputs[0], epoch)

        avg_pesq_score /= batch_num
        avg_stoi_score /= batch_num

        print(f"{db} avg_pesq {avg_pesq_score} avg_stoi {avg_stoi_score}")