# Making the trainig sample from CANDELS GOODS-S 

By Shooby, last edited Sep 3 2020



In [1]:
import numpy as np
import astropy.io.fits as pyfits
import matplotlib.pyplot as plt
import astropy.wcs as wcs
from astropy import units as u
from astropy.coordinates import SkyCoord
from scipy import ndimage
from PIL import Image


%matplotlib inline

# High resolution GOODS-S galaxies

In [8]:
gs = pyfits.getdata('../../../WFIRST_WPS/CANDELS_fits/gds.fits')
sel1 = (gs['zbest']>0.05)&(gs['zbest']<2.0)&(gs['CLASS_STAR']<0.8)&(gs['Hmag']<25.5)&(gs['ISOAREA_IMAGE_F160W']>=60)&(gs['B_IMAGE_1']/gs['A_IMAGE_1']>0.4)

print (len(gs[sel1]))

4156


In [9]:
def radec2xy(ra,dec,wc):
    coords = SkyCoord(ra,dec, unit='deg')
    a=wcs.utils.skycoord_to_pixel(coords, wc, origin=0,mode=u'wcs')
    return a[0],a[1]
    
def cut(ra,dec,andaze,filename):
    '''gets coordinates of the galaxy and the filter to return a cutout
    (also called a postage stamp) of the galaxy with given size'''
    hdr = pyfits.getheader(filename)
    w = wcs.WCS(hdr)
    x,y=radec2xy(ra,dec,w)
    x,y=np.int(x),np.int(y)
    im=pyfits.getdata(filename)[y-andaze:y+andaze,x-andaze:x+andaze]
    return im


In [14]:
#GOODS-S all
ra1,dec1=gs['RA_1'][sel1],gs['DEC_1'][sel1]

im = np.zeros([64,64])
for boz in range(3000):
    n = np.int(boz)#np.random.uniform(0,len(ra1)-1))
    data = cut(ra1[n],dec1[n],50,'/Users/shemmati/Desktop/GOODS/goodss_all_acs_wfc_f775w_060mas_v1.5_drz.fits')
    if data[0,0]!=0 :
        angle = np.random.uniform(0,180)
        s = ndimage.rotate(data,angle,mode='nearest',reshape=False)
        da = np.arcsinh(s[18:-18,18:-18])
        rescaled = (255.0 / (da.max()+0.1) * (da - da.min())).astype(np.uint8)
        im = Image.fromarray(rescaled)
        im.save('images/training_images/1/'+str(boz)+'.jpg')
    
    n = np.int(boz)#np.random.uniform(0,len(ra1)-1))
    data = cut(ra1[n],dec1[n],50,'/Users/shemmati/Desktop/GOODS/goodss_all_acs_wfc_f775w_060mas_v1.5_drz.fits')
    if data[0,0]!=0 :
        angle = np.random.uniform(0,180)
        s = ndimage.rotate(data,angle,mode='nearest',reshape=False)
        da = np.arcsinh(s[18:-18,18:-18])
        rescaled = (255.0 / (da.max()+0.1) * (da - da.min())).astype(np.uint8)
        im = Image.fromarray(rescaled)
        im.save('images/training_images/2/'+str(boz)+'.jpg')

im = np.zeros([64,64])
for boz in range(500):
    n = np.int(boz)#np.random.uniform(0,len(ra1)-1))
    data = cut(ra1[n],dec1[n],50,'/Users/shemmati/Desktop/GOODS/goodss_all_acs_wfc_f775w_060mas_v1.5_drz.fits')
    if data[0,0]!=0 :
        angle = np.random.uniform(0,180)
        s = ndimage.rotate(data,angle,mode='nearest',reshape=False)
        da = np.arcsinh(s[18:-18,18:-18])
        rescaled = (255.0 / (da.max()+0.1) * (da - da.min())).astype(np.uint8)
        im = Image.fromarray(rescaled)
        im.save('images/test_images/1/'+str(boz)+'.jpg')
    
    n = np.int(boz)#np.random.uniform(0,len(ra1)-1))
    data = cut(ra1[n],dec1[n],50,'/Users/shemmati/Desktop/GOODS/goodss_all_acs_wfc_f775w_060mas_v1.5_drz.fits')
    if data[0,0]!=0 :
        angle = np.random.uniform(0,180)
        s = ndimage.rotate(data,angle,mode='nearest',reshape=False)
        da = np.arcsinh(s[18:-18,18:-18])
        rescaled = (255.0 / (da.max()+0.1) * (da - da.min())).astype(np.uint8)
        im = Image.fromarray(rescaled)
        im.save('images/test_images/2/'+str(boz)+'.jpg')

    

In [15]:
import os
from PIL import Image
from array import *
from random import shuffle
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

# Load from and save to
Names = [['images/training_images','train'], ['images/test_images','test']]

for name in Names:

    data_image = array('B')
    data_label = array('B')
    FileList = []
    for dirname in os.listdir(name[0])[1:]: # [1:] Excludes .DS_Store from Mac OS

        path = os.path.join(name[0],dirname)
        for filename in os.listdir(path):

            if filename.endswith(".jpg"):
                FileList.append(os.path.join(name[0],dirname,filename))

    shuffle(FileList) # Usefull for further segmenting the validation set

    for filename in FileList:
        label = int(filename.split("/")[2])        
        Im = Image.open(filename)
     
        
        #plt.imshow(Im)
        pixel = Im.load()
        width, height = Im.size

        
        for x in range(0,width):
            for y in range(0,height):
                data_image.append(pixel[y,x])

        data_label.append(label) # labels start (one unsigned byte each)

    hexval = "{0:#0{1}x}".format(len(FileList),6) # number of files in HEX

    # header for label array

    header = array('B')
    header.extend([0,0,8,1,0,0])
    header.append(int('0x'+hexval[2:][:2],16))
    header.append(int('0x'+hexval[2:][2:],16))
    
    data_label = header + data_label

    # additional header for images array

    if max([width,height]) <= 256:
        header.extend([0,0,0,width,0,0,0,height])
    else:
        raise ValueError('Image exceeds maximum size: 256x256 pixels');

    header[3] = 3 # Changing MSB for image data (0x00000803)
    
    data_image = header + data_image

    output_file = open(name[1]+'-images-idx3-ubyte', 'wb')
    data_image.tofile(output_file)
    output_file.close()

    output_file = open(name[1]+'-labels-idx1-ubyte', 'wb')
    data_label.tofile(output_file)
    output_file.close()

# gzip resulting files

for name in Names:
    os.system('gzip '+name[1]+'-images-idx3-ubyte')
    os.system('gzip '+name[1]+'-labels-idx1-ubyte')