In [1]:
# Imports
import numpy as np
import os
from astropy.io import fits
from tqdm import tqdm

In [2]:
# Some utility functions
def id_from_filename(filename):
    """
    Extracts the object ID from the file name
    """
    return int(filename.split('ID=')[1].split(',')[0])
def band_from_filename(filename):
    """
    Extracts the image band from the file name
    """
    return filename[-1]

def grab_numpy(filename):
    """
    Extracts the numpy image from the relevant fits file
    """
    return trim_to_shape(
        fits.getdata(
            f'./img_files/{filename}' # The folder where all the image files are in/extracted to
            ),
        px=28)

def trim_to_shape(img, px=28):
    """
    Trims an input numpy array to a px*px square
    """
    assert px % 2 == 0
    img_shape = img.shape
    center_x = img_shape[0] // 2
    center_y = img_shape[1] // 2
    trimmed_img = img[center_x - px//2:center_x + px//2,
                     center_y - px//2:center_y + px//2]
    return trimmed_img

In [3]:
# Getting a sorted list of image files
img_files_list = os.listdir('./img_files') # Listing image files
img_files_list_sorted = sorted(img_files_list,
                               key=id_from_filename) # Sorting image files according to ID

In [4]:
# Count the number of IDs present, needed to initialise later arrays
prev_id = id_from_filename(img_files_list_sorted[0])
num_ids = 1
for filename in img_files_list_sorted:
    this_id = id_from_filename(filename)
    if this_id != prev_id:
        prev_id = this_id
        num_ids += 1
num_ids

2747

In [6]:
all_imgs = np.zeros((num_ids,28,28,5)) # Will contain all the images
all5_2828_mask = np.zeros((num_ids,))  # Which objects have images in all 5 bands that can be cropped to 28*28?
all5_2828_ids = []                     # A list of successfully extracted objects' IDs

band_dict = {'g':0,  # Changes the order from alphabetical to wavelength
             'r':1,
             'i':2,
             'z':3,
             'Y':4}

# Initialising utility variables
prev_file_id = None # The ID of the last file looked at
num_obj_files = 5   # The number of files of the current object looked at
obj_num = -1        # The object we are currently looking at

for filename in tqdm(img_files_list_sorted):
    file_id = id_from_filename(filename)
    if file_id != prev_file_id:                      # We've looked at all the files for the previous object
        if num_obj_files == 5:                       # Previous object had 5 successful bands
            if prev_file_id is not None:             # Ignoring the initialisation
                all5_2828_mask[obj_num] = 1          # Previous object was successful
                all5_2828_ids.append(prev_file_id)
        # We are now done with the previous object
        obj_num += 1
        num_obj_files = 0 # Reset counter to 0 for this obj
    file_img = grab_numpy(filename)                # Extract image from file
    file_img_shape = file_img.shape                # Find image shape
    file_band_name = band_from_filename(filename)  # Extract band from file
    all_imgs[obj_num,                              # Adding this image to the big array of images
             :file_img_shape[0],
             :file_img_shape[1],
             band_dict[file_band_name]] = file_img
    if file_img_shape != (28,28):
        num_obj_files -= 1 # Means that this object will eventually be rejected at the end, and masked out
    num_obj_files += 1
    prev_file_id = file_id
# The above doesn't deal with the final object!
if num_obj_files == 5:
    all5_2828_mask[obj_num] = 1
    all5_2828_ids.append(prev_file_id)

100%|████████████████████████████| 13735/13735 [02:48<00:00, 81.51it/s]


In [7]:
imgs_all5_2828 = all_imgs[all5_2828_mask==1,:,:,:] # Filtering out failed images
print(f'Images successfully collected for {int(np.sum(all5_2828_mask))} out of {len(all5_2828_mask)} objects')
print(f'Shape of images array: {imgs_all5_2828.shape}; Number of successful IDs: {len(all5_2828_ids)}')

Images successfully collected for 2747 out of 2747 objects
Shape of images array: (2747, 28, 28, 5); Number of successful IDs: 2747


In [8]:
# Saving images and their ids
np.savez_compressed('./images.npz',
                    imgs=imgs_all5_2828,
                    ids=np.array(all5_2828_ids))