# Image Generation

This notebook takes in the raw h5 file of strain vs time data and generates a h5 file containing square spectrogram images. 

In [7]:
import h5py
import numpy as np 
from gwpy.timeseries import TimeSeries
from PIL import Image
from tqdm import tqdm 

# Load Dataset 

In [8]:
#load in the dataset from the h5 file and store it in np arrays 
data_path = '/home/toomeh/uw/spring-24/phys-417/final-project/data/GW2_Andy.h5'
with h5py.File(data_path, 'r') as f:

    background_array = np.array(f['background'])
    binaryblackhole_array = np.array(f['binaryblackhole'])
    ccsn_array = np.array(f['ccsn'])
    glitch_array = np.array(f['glitch'])

# H and L are hanford and livingston the two ligo detectors 
array_list = [background_array, binaryblackhole_array, ccsn_array, glitch_array]

In [9]:
def generate_square_spectrogram(strain_input:np.ndarray, frequency_bounds=(30, 500), fduration=0.01, fftlength=0.01, image_size=128):
    """Generate one square spectrogram given a set of time series strain data

    Args:
        strain_input (np.ndarray): input strain vs time data 
        frequency_bounds (tuple): frequency to create a spectrogram for. Defaults to 0.01
        fduration (float): duration of the qtransform. Defaults to 0.01
        fftlength (float): length of the fft to use when calculating the qtransform. Defaults to (30, 500)
        output_image_size (int): output size. Defaults to 128

    Returns:
        np.ndarray: square image of the spectrogram for the corresponding strain data. 
    """
    # here I use the gwpy package to create a strain time series object, so that we can then use the q transform method that is built in
    time_series = TimeSeries(strain_input, sample_rate = 4096)
    # Generate a long and short q transform to average over 

    q_transform = time_series.q_transform(frange=frequency_bounds, fduration=fduration, fftlength=fftlength)
    spectrogram_image = Image.fromarray(q_transform.value.T)
    square_size = image_size
    resized_image = spectrogram_image.resize((square_size, square_size))
    resized_array = np.array(resized_image)
    normalized_array = (resized_array / 255.0)
    return normalized_array

# Call the function on each part of the dataset

In [10]:
def process_array(array:np.ndarray, frequency_bounds:tuple, fduration:float, fftlength:float, output_image_size:int) -> np.ndarray:
    """_summary_

    Args:
        array (np.ndarray): raw data input array
        frequency_bounds (tuple): frequency to create a spectrogram for 
        fduration (float): duration of the qtransform
        fftlength (float): length of the fft to use when calculating the qtransform
        output_image_size (int): output size

    Returns:
        np.ndarray: array containing square spectrogram images 
    """
    N_datapoints = array.shape[0]
    N_detectors = array.shape[1]
    output_array = np.empty((N_datapoints, N_detectors, output_image_size, output_image_size))
    for datapoint_index in tqdm(range(N_datapoints)):
        for detector_index in range(N_detectors):
            datapoint_to_alter = array[datapoint_index, detector_index, :]
            square_spectrogram = generate_square_spectrogram(datapoint_to_alter, frequency_bounds=frequency_bounds, 
                                                             fduration=fduration, fftlength=fftlength, image_size=output_image_size)
            output_array[datapoint_index, detector_index, :, :] = square_spectrogram
    return output_array



In [11]:
def generate_output_file(image_size:int, fduration:float, fftlength:float):
    """creates the output h5 that contains the qtransform of each datapoint in the raw dataset

    Args:
        image_size (int): Output image size
        fduration (float): duration of the qtransform
        fftlength (float): length of the fft to use when calculating the qtransform
    """    
    fduration_str = str(fduration).split('.')[1]
    fftlength_str = str(fftlength).split('.')[1]
    output_file_path = f'/home/toomeh/uw/spring-24/phys-417/final-project/data/square_spectrograms/{image_size}x{image_size}_0{fduration_str}_0{fftlength_str}_long_images.h5' 
    notes = f'qtransform with params (30, 500), {fduration_str},{fftlength_str}, {image_size} '
    array_names = ['background', 'bbh', 'ccsn', 'glitch']

    # Loop through each array in the raw data and call our image generation function 
    for index, array in enumerate(array_list):
        reshaped_array = process_array(array, (30, 500), fduration,fftlength, image_size)
        # write each image array to an h5 file
        with h5py.File(output_file_path, 'a') as hf:
            # Write array to the HDF5 file
            hf.create_dataset(array_names[index], data=reshaped_array)
            # Add metadata notes to array1
            hf[array_names[index]].attrs['notes'] = notes


In [12]:
generate_output_file(128, 0.03, 0.03)

100%|██████████| 2048/2048 [10:01<00:00,  3.41it/s]
100%|██████████| 2048/2048 [08:39<00:00,  3.94it/s]
100%|██████████| 2048/2048 [09:06<00:00,  3.75it/s]
100%|██████████| 2048/2048 [09:24<00:00,  3.63it/s]
