# Prepare Dataset For Pix2Pix GAN Network

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import os
from scipy.stats import norm
import pandas as pd

%matplotlib inline
import IPython.display
from ipywidgets import interact, interactive, fixed

In [2]:
# Packages we're using
import numpy as np
import matplotlib.pyplot as plt
import copy
from scipy.io import wavfile
from scipy.signal import butter, lfilter
import scipy.ndimage

In [3]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'

# Try 
https://github.com/sikora507/elgen/blob/master/src/audio%20analysis.ipynb

In [5]:
import numpy as np
import math
from PIL import Image
import time

In [6]:
FFT_LENGTH = 512
WINDOW_LENGTH = 512
WINDOW_STEP = int(WINDOW_LENGTH / 2)
magnitudeMin = float("inf")
magnitudeMax = float("-inf")
phaseMin = float("inf")
phaseMax = float("-inf")

In [7]:
def amplifyMagnitudeByLog(d):
    return 188.301 * math.log10(d + 1)

def weakenAmplifiedMagnitude(d):
    return math.pow(10, d/188.301)-1

def generateLinearScale(magnitudePixels, phasePixels, 
                        magnitudeMin, magnitudeMax, phaseMin, phaseMax):
    height = magnitudePixels.shape[0]
    width = magnitudePixels.shape[1]
    magnitudeRange = magnitudeMax - magnitudeMin
    phaseRange = phaseMax - phaseMin
    rgbArray = np.zeros((height, width, 3), 'uint8')
    
    for w in range(width):
        for h in range(height):
            magnitudePixels[h,w] = (magnitudePixels[h,w] - magnitudeMin) / (magnitudeRange) * 255 * 2
            magnitudePixels[h,w] = amplifyMagnitudeByLog(magnitudePixels[h,w])
            phasePixels[h,w] = (phasePixels[h,w] - phaseMin) / (phaseRange) * 255
            red = 255 if magnitudePixels[h,w] > 255 else magnitudePixels[h,w]
            green = (magnitudePixels[h,w] - 255) if magnitudePixels[h,w] > 255 else 0
            blue = phasePixels[h,w]
            rgbArray[h,w,0] = int(red)
            rgbArray[h,w,1] = int(green)
            rgbArray[h,w,2] = int(blue)
    return rgbArray

def recoverLinearScale(rgbArray, magnitudeMin, magnitudeMax, 
                       phaseMin, phaseMax):
    width = rgbArray.shape[1]
    height = rgbArray.shape[0]
    magnitudeVals = rgbArray[:,:,0].astype(float) + rgbArray[:,:,1].astype(float)
    phaseVals = rgbArray[:,:,2].astype(float)
    phaseRange = phaseMax - phaseMin
    magnitudeRange = magnitudeMax - magnitudeMin
    for w in range(width):
        for h in range(height):
            phaseVals[h,w] = (phaseVals[h,w] / 255 * phaseRange) + phaseMin
            magnitudeVals[h,w] = weakenAmplifiedMagnitude(magnitudeVals[h,w])
            magnitudeVals[h,w] = (magnitudeVals[h,w] / (255*2) * magnitudeRange) + magnitudeMin
    return magnitudeVals, phaseVals

In [8]:
def generateSpectrogramForWave(signal):
    start_time = time.time()
    global magnitudeMin
    global magnitudeMax
    global phaseMin
    global phaseMax
    buffer = np.zeros(int(signal.size + WINDOW_STEP - (signal.size % WINDOW_STEP)))
    buffer[0:len(signal)] = signal
    height = int(FFT_LENGTH / 2 + 1)
    width = int(len(buffer) / (WINDOW_STEP) - 1)
    magnitudePixels = np.zeros((height, width))
    phasePixels = np.zeros((height, width))

    for w in range(width):
        buff = np.zeros(FFT_LENGTH)
        stepBuff = buffer[w*WINDOW_STEP:w*WINDOW_STEP + WINDOW_LENGTH]
        # apply hanning window
        stepBuff = stepBuff * np.hanning(WINDOW_LENGTH)
        buff[0:len(stepBuff)] = stepBuff
        #buff now contains windowed signal with step length and padded with zeroes to the end
        fft = np.fft.rfft(buff)
        for h in range(len(fft)):
            #print(buff.shape)
            #return
            magnitude = math.sqrt(fft[h].real**2 + fft[h].imag**2)
            if magnitude > magnitudeMax:
                magnitudeMax = magnitude 
            if magnitude < magnitudeMin:
                magnitudeMin = magnitude 

            phase = math.atan2(fft[h].imag, fft[h].real)
            if phase > phaseMax:
                phaseMax = phase
            if phase < phaseMin:
                phaseMin = phase
            magnitudePixels[height-h-1,w] = magnitude
            phasePixels[height-h-1,w] = phase
    rgbArray = generateLinearScale(magnitudePixels, phasePixels,
                                  magnitudeMin, magnitudeMax, phaseMin, phaseMax)
    elapsed_time = time.time() - start_time
    print('%.2f' % elapsed_time, 's', sep='')
    img = Image.fromarray(rgbArray, 'RGB')
    return img

In [9]:
# Make Spectrogram 

In [10]:
WavPathA = './WAVA'
WavPathB = './WAVB'
SpecPathA = './SPECA'
SpecPathB = './SPECB'
SpecPathAB = './SPECAB'

In [11]:
A_wav = os.path.join(WavPathA)
B_wav = os.path.join(WavPathB)
A_spec = os.path.join(SpecPathA)
B_spec = os.path.join(SpecPathB)
AB_spec = os.path.join(SpecPathAB)

# Check Folder - WAVA and WAVB ( Delete all < 5 sec Wav Files)

# Generate wav to spectogram

In [12]:
wav_list_A = os.listdir(A_wav)
num_wavs = len(wav_list_A)

# Take 5 sec sample for making all spectogram of same height and width

In [29]:
for n in range(num_wavs):
    name_A = wav_list_A[n]
    path_A = os.path.join(A_wav,name_A)
    rate, data = wavfile.read(path_A)
    # Average over windows.
    if len(data.shape) >= 2 and data.size > 0:
        if data.shape[-1] > 1:
            data = data.mean(axis=-1)
        else:
            data = np.reshape(data, data.shape[:-1])
    # Only use a short clip for our demo
    if np.shape(data)[0] / float(rate) > 5:
        data = data[0 : rate * 5]
    img = generateSpectrogramForWave(data)
    img.save(path_A[:-4]+'.png')

In [14]:
wav_list_B = os.listdir(B_wav)
num_wavs = len(wav_list_B)

In [28]:
for n in range(num_wavs):
    name_B = wav_list_B[n]
    path_B = os.path.join(B_wav,name_B)
    rate, data = wavfile.read(path_B)
    # Average over windows.
    if len(data.shape) >= 2 and data.size > 0:
        if data.shape[-1] > 1:
            data = data.mean(axis=-1)
        else:
            data = np.reshape(data, data.shape[:-1])
    # Only use a short clip for our demo
    if np.shape(data)[0] / float(rate) > 5:
        data = data[0 : rate * 5]
    img = generateSpectrogramForWave(data)
    img.save(path_B[:-4]+'.png')

In [16]:
import os
from tqdm import tqdm
import glob
import shutil

for filename in glob.glob(os.path.join(WavPathA, '*.png')):
    shutil.copy(filename, SpecPathA)
    os.remove(filename)


for filename in glob.glob(os.path.join(WavPathB, '*.png')):
    shutil.copy(filename, SpecPathB)
    os.remove(filename)

In [17]:
# import os
# import numpy as np
# from PIL import Image
# import cv2

# folderA = './SPECA'
# img_fold_A = os.path.join(folderA)
# img_list = os.listdir(img_fold_A)
# num_imgs = len(img_list)

# for n in range(num_imgs):
#     name_A = img_list[n]
#     name_B = name_A
#     path_A = os.path.join(img_fold_A,name_A)
#     im_A = Image.open(path_A)
#     im_A = im_A.resize((HEIGHT,WIDTH))
#     im_A.save(path_A)

### Pix2Pix specification - Join A(noisy data) and B(clean speech)

In [27]:
# JOIN A+B = AB

import os
import numpy as np
from PIL import Image
import cv2


# define paths for translation from domain A (images in folderA) -> domain B (images in folderB)
folderB = './SPECA'
folderA = './SPECB'
dest_path = './SPECAB'

img_fold_A = os.path.join(folderA)
img_fold_B = os.path.join(folderB)

img_list = os.listdir(img_fold_A)
num_imgs = len(img_list)

img_fold_AB = os.path.join(dest_path)

for n in range(num_imgs):
    name_A = img_list[n]
    name_B = name_A
    path_A = os.path.join(img_fold_A,name_A)
    path_B = os.path.join(img_fold_B,name_B)
#     print(path_A,path_B)
    if os.path.isfile(path_A) and os.path.isfile(path_B):
        name_AB = name_A
        path_AB = os.path.join(img_fold_AB, name_AB)
        im_A1 = Image.open(path_A)
        im_A = np.array(im_A1)
        im_B1 = Image.open(path_B)
        im_B = np.array(im_B1)
        im_AB = np.concatenate([im_A, im_B], 1)
        cv2.imwrite(path_AB, cv2.cvtColor(im_AB, cv2.COLOR_RGB2BGR))
        print("Write AB::",name_AB)

# Recover wav from spectogram
* Test recovery of wav

In [19]:
rate = 16000

In [20]:
def recoverSignalFromSpectrogram(filePath):
    img = Image.open(filePath)
    data = np.array( img, dtype='uint8' )
    width = data.shape[1]
    height = data.shape[0]

    magnitudeVals, phaseVals \
    = recoverLinearScale(data, magnitudeMin, magnitudeMax, phaseMin, phaseMax)
    
    recovered = np.zeros(WINDOW_LENGTH * width // 2 + WINDOW_STEP, dtype=np.int16)
    recovered = np.array(recovered,dtype=np.int16)
    
    for w in range(width):
        toInverse = np.zeros(height, dtype=np.complex_)
        for h in range(height):
            magnitude = magnitudeVals[height-h-1,w]
            phase = phaseVals[height-h-1,w]
            toInverse[h] = magnitude * math.cos(phase) + (1j * magnitude * math.sin(phase))
        signal = np.fft.irfft(toInverse)
        recovered[w*WINDOW_STEP:w*WINDOW_STEP + WINDOW_LENGTH] += signal[:WINDOW_LENGTH].astype(np.int16)
    return recovered

In [22]:
SpecPathA = './SPECA'
specAwave = os.path.join(SpecPathA)

In [30]:
data = recoverSignalFromSpectrogram(specAwave+'/'+'mif_03397_01395384626.png')
scipy.io.wavfile.write(specAwave+'/'+'recovered.wav', rate, data)