In [1]:
import matplotlib.pyplot as plt
from scipy.fftpack import fft
from scipy.io import wavfile
from scipy import linalg
import numpy as np

# Function which inputs path of the file and returns spectrogram, after slicing from the FFT of the data 

In [2]:
def spectrogram(path):
    sample_rate, data = wavfile.read(path) # Reading the file and decomposing into sample rate and amplitude vector
    
    # Defining Hamming window particulars
    window = 0.025
    shift = 0.01
    
    # Translating into number of samples from given Hamming window particulars
    frame_length, frame_step = int(window * sample_rate), int(shift * sample_rate)
    signal_length = len(data)

    # Calculating number of frames. Converting to inn tas the values are floats and the calculated values need to be discrete integers.
    num_frames = int((signal_length - frame_length) / frame_step)

    # Creating an envelope of array that shifts with Hamming window 
    envelope_length = num_frames * num_frames + frame_length

    # Storing data into a temporary array, to be used for appending
    temp = np.zeros((envelope_length - signal_length))
    envelope = np.append(data, temp) 

    # Using np.tile to create repeating arrays while using arange to set-up frame length and corresponding entries
    storage = np.tile(np.arange(0, frame_length), (num_frames, 1)) + np.tile(np.arange(0, num_frames * frame_step, frame_step), (frame_length, 1)).T

    # Storing the data into the frames
    frames = envelope[storage]
    
    point_FFT = 256

    fft_data = np.absolute(np.fft.fft(frames, point_FFT))  # Magnitude of the FFT
    
    fft_data = fft_data.T
    
    fft_data = fft_data[0:128,:] # Slicing into top 128 vectors 
    
    spectrogram = fft_data.T - np.mean(fft_data.T, axis=0)

    spectrogram = spectrogram.T
    
    return spectrogram

# Function to input the spectrogram and return the transform matrix

In [3]:
def transform(spectrogram):
    
    # Creating the covariance matrix
    xcov = spectrogram@spectrogram.T

    eigval, eigvec = linalg.eigh(xcov) # Gives eigen decomposition in ascending form

    # Computing lambda^(-1/2)
    eigval_matrix = np.diag(1/(eigval**0.5))

    transform_matrix = (np.dot(eigval_matrix, eigvec.T))
    
    return transform_matrix

# Function to apply it all

In [4]:
def apply_whitening_to_get_avg(transform_from, transform_to):
    
    path_of_from = 'Data/speechFiles/'+transform_from+'.wav'
    
    transform_matrix = transform(spectrogram(path_of_from))
    
    path_of_to = 'Data/speechFiles/'+transform_to+'.wav'
    
    y = transform_matrix@spectrogram(path_of_to)
    
    cov = (y@y.T)/(y.shape[1])
    
    cov = np.absolute(cov)
    
    return (np.sum(cov) - np.trace(cov))/((cov.shape[0]*cov.shape[1])-cov.shape[0])

To get avg of absolute values of the non diagonal values, entered in string form first the data from which the transform
has to be derived and then in string for the data to which the transform has to be applied

In [5]:
apply_whitening_to_get_avg('clean', 'clean')

4.982378518944804e-16

As expected, applying whitening to clean data from transform learnt from clean data gives almost zero value. This shows that the co-variance between clean file features is zero.

In [6]:
apply_whitening_to_get_avg('clean', 'noisy')

0.12407493705300103

Applying whitening to noisy data from transform learnt from clean matrix gives a considerable magnitude non-zero value. This shows that the co-variance between noisy file features with respect to clean files is tangible, but not very high. This means that there is decent amount of noise in the audio.

In [7]:
apply_whitening_to_get_avg('noisy', 'noisy')

5.330518397637393e-18

As expected, applying whitening to noisy data from transform learnt from noisy data gives almost zero value. This shows that the co-variance between noisy file features is closer to zero than that with the clean files.

In [8]:
apply_whitening_to_get_avg('noisy', 'clean')

0.00017047872267251726

Applying whitening to clean data from the transform learnt through noisy data gives a very little covariance value between features. This shows that noisy features are closer to clean features when compared to the vice versa condition.