In [46]:
import numpy as np
from astropy.io import fits
from astropy.io.fits import HDUList
from astropy.wcs import WCS
from numpy import arcsinh
import matplotlib.pyplot as plt
from astropy.visualization import astropy_mpl_style
from reproject import reproject_interp
import aplpy
from aplpy.rgb import make_rgb_cube
import matplotlib.pyplot as pyplot
from PIL import Image
plt.rcParams.update(plt.rcParamsDefault)

import sdss_gz_data as sgd
import os

In [45]:
%ls -l data/

total 1694112
-rw-r--r--@ 1 will  staff  349816837 11 Mar 15:14 astromonical_data.csv.gz
-rw-r--r--@ 1 will  staff   12447360 20 Jan 21:44 frame-g-000756-2-0427.fits
-rw-r--r--@ 1 will  staff   12447360  8 Dec 12:20 frame-g-002141-3-0076.fits
-rw-r--r--@ 1 will  staff   12447360 20 Jan 21:44 frame-i-000756-2-0427.fits
-rw-r--r--@ 1 will  staff   12447360  8 Dec 12:20 frame-i-002141-3-0076.fits
-rw-r--r--@ 1 will  staff   12447360 20 Jan 21:44 frame-r-000756-2-0427.fits
-rw-r--r--@ 1 will  staff   12447360 20 Jan 14:14 frame-r-002141-3-0076.fits
-rw-r--r--@ 1 will  staff  319952110 31 Jan 20:01 input.csv
-rw-r--r--@ 1 will  staff    3409920  9 Dec 12:39 photoObj-002141-3-0076.fits
-rw-r--r--  1 will  staff   36979200 11 Feb 19:42 rgb-000756-2-0427.fits
-rw-r--r--  1 will  staff   12329280 11 Feb 19:42 rgb-000756-2-0427_2d.fits
-rw-r--r--  1 will  staff   36933120 20 Jan 17:16 rgb-002141-3-0076.fits
-rw-r--r--  1 will  staff   12314880 20 Jan 17:16 rgb-002141-3-0076_2d.fits

In [3]:
orig_data = sgd.load_data('data/astromonical_data.csv.gz')

In [13]:
orig_data.objid = orig_data.objid.astype(str)

In [14]:
data = orig_data[['objid','run','rerun','camcol','field','obj','ra','dec','petroRad_r']]

In [15]:
data.loc[0]

objid         1237648702982979791
run                           752
rerun                         301
camcol                          1
field                         518
obj                           207
ra                        221.077
dec                      -1.10511
petroRad_r                4.75825
Name: 0, dtype: object

In [51]:
fields = data[['run','camcol','field']].drop_duplicates(['run','camcol','field'])

In [52]:
selector = np.all([
        fields.run == 756,
        fields.camcol == 2,
        fields.field == 427
    ], axis=0)
fields = fields[selector]
fields

Unnamed: 0,run,camcol,field
1103,756,2,427


In [109]:
def scale_rgb(data, sigma=1/3, gains=[0.9,1.1,1.8], gamma=0.1):
    min = 0
    max = np.max(data)

    R_IDX = 0
    G_IDX = 1
    B_IDX = 2
    
    if min < 0:
        data = data - min
        max = max - min
        min = 0

    r = data[R_IDX].copy()
    g = data[G_IDX].copy()
    b = data[B_IDX].copy()

    slope = 255 / arcsinh((max - min)/sigma)

    mean = (r + g + b)/3
    mean[mean < min] = 0
    r[mean == 0] = 0
    g[mean == 0] = 0
    b[mean == 0] = 0
    
    scale = slope * arcsinh((mean - min) / sigma) / mean

    r = (r * scale).astype(int)
    g = (g * scale).astype(int)
    b = (b * scale).astype(int)
    
    r = (r * gains[R_IDX]).astype(int)
    g = (g * gains[G_IDX]).astype(int)
    b = (b * gains[B_IDX]).astype(int)
    
    r += (gamma * (r - g)).astype(int)
    b += (gamma * (b - g)).astype(int)

    r[r < 0] = 0
    r[r > 255] = 255
    g[g < 0] = 0
    g[g > 255] = 255
    b[b < 0] = 0
    b[b > 255] = 255
    
    result = np.empty(data.shape, dtype=np.uint8)
    result[0] = r
    result[1] = g
    result[2] = b

    return result


In [111]:
def save_cutout(obj, data):
    from astropy.io import fits

    filename = f'./data/obj-{obj.objid}.fits'
    hdu = fits.PrimaryHDU(data)
    hdu.writeto(filename, overwrite=True)

def resize(data, size):
    return np.array(Image.fromarray(data).resize((size, size), Image.BICUBIC))

def download_fits(field):
    from shutil import copy

    filenames = np.empty(3, dtype=object)
    for idx, band in enumerate(['i', 'r', 'g']):
        file_dir = f'fits/{field.run}/{field.camcol}/{field.field}'
        filename = f'frame-{band}-{field.run:06d}-{field.camcol}-{field.field:04d}.fits'

        file = f'{file_dir}/{filename}'
        outfile = f'./data/{filename}'
        print(f'Downloading {file}')
        copy(f'./data/{file}', outfile, follow_symlinks=True)
        filenames[idx] = outfile

    return filenames

def isolate_image_extension(fits_file, extension):
    '''
        Saves the data + header of the specified extension as
        new FITS file

        input
        ------
        fits_file: file path to FITS image
        extension: Number of HDU extension containing the image data
    '''
    from astropy.io import fits

    header = fits.getheader(fits_file, extension)
    data = fits.getdata(fits_file, extension)

    fits.writeto(fits_file, data, header, overwrite=True)

def make_data_cube(field, filenames):
    for filename in filenames:
        isolate_image_extension(filename, 0)

    output = f'./data/rgb-{field.run:06d}-{field.camcol}-{field.field:04d}.fits'
    
    make_rgb_cube(filenames, output)
    return output

def save_png(obj, data, vmax):
    from PIL import Image
    filename = f'./data/{obj.objid}.png'
    
    data = data.copy()
    # cropy image
    data = data[:,106:318,106:308]

    png_data = scale_rgb(data)
    Image.fromarray(np.transpose(png_data)).transpose(Image.ROTATE_90).save(filename)

def cutout_object_img(obj, data_cube_filename, sizing_ratio=7.8567420132):
    def cutout_band(band, position, size, wcs):
        from astropy.nddata import Cutout2D
        
        return Cutout2D(band,position=position,size=size, wcs=wcs)

    from astropy.io import fits
    from astropy.coordinates import SkyCoord, ICRS
    import astropy.units as u
    from astropy.nddata import Cutout2D

    obj_hash = {
        'run': obj.run,
        'camcol': obj.camcol,
        'field': obj.field,
        'ra': obj.ra,
        'dec': obj.dec
    }
    print(f'Creating cutout for {obj.objid}: {obj_hash}')
    
    angular_size = sizing_ratio * obj.petroRad_r
    
    fits_file = fits.open(data_cube_filename)
    hdu = fits_file[0]
    data = hdu.data
    header = hdu.header
    wcs = WCS(header, naxis=2)
    vmax = np.max(data)
    
    position = SkyCoord(ra=obj.ra, dec=obj.dec, frame=ICRS, unit=u.deg)
    cutout_size = u.Quantity((angular_size, angular_size), u.arcsec)
    image_data = np.empty((3, 424, 424), dtype=float)
    
    for idx in range(3):
        cutout = Cutout2D(data[idx], position=position, size=cutout_size, wcs=wcs)
        image_data[idx] = resize(cutout.data, 424)
    
    # save data as a fits for the data cutout
    save_cutout(obj, image_data)
    
    save_png(obj, image_data, vmax)

for _, row in fields.iterrows():
    filenames = download_fits(row)
    data_cube_filename = make_data_cube(row, filenames)
    
    selector = np.all([
        data.run == row['run'],
        data.camcol == row['camcol'],
        data.field == row['field']
    ], axis=0)
    
    curr_field_data = data[selector]
    for _, obj in curr_field_data.iterrows():
        cutout_object_img(obj, data_cube_filename)


Downloading fits/756/2/427/frame-i-000756-2-0427.fits
Downloading fits/756/2/427/frame-r-000756-2-0427.fits
Downloading fits/756/2/427/frame-g-000756-2-0427.fits
Creating cutout for 1237648720693756176: {'run': 756, 'camcol': 2, 'field': 427, 'ra': 179.754212141942, 'dec': -0.545451447409319}




Creating cutout for 1237648720693756035: {'run': 756, 'camcol': 2, 'field': 427, 'ra': 179.80368305327, 'dec': -0.523819565383534}
Creating cutout for 1237648720693756115: {'run': 756, 'camcol': 2, 'field': 427, 'ra': 179.68640985059898, 'dec': -0.6029183904689069}
Creating cutout for 1237648720693755918: {'run': 756, 'camcol': 2, 'field': 427, 'ra': 179.68929342839297, 'dec': -0.454379058425512}
Creating cutout for 1237648720693756163: {'run': 756, 'camcol': 2, 'field': 427, 'ra': 179.71801538001802, 'dec': -0.530204164688646}


In [112]:
data = input_data.copy()
combined_spiral = data.spiralclock + data.spiralanticlock + data.edgeon
data['galaxy_type'] = UNKNOWN_GALAXY_TYPE
data['combined_spiral'] = combined_spiral
data.loc[data.debiased_elliptical > CONFIDENCE_LEVEL, 'galaxy_type'] = ELLIPTICAL_GALAXY_TYPE
data.loc[data.debiased_spiral > CONFIDENCE_LEVEL, 'galaxy_type'] = SPIRIAL_GALAXY_TYPE

num_of_elliptical = data[data.galaxy_type == ELLIPTICAL_GALAXY_TYPE].size
num_of_spirial = data[data.galaxy_type == SPIRIAL_GALAXY_TYPE].size
num_of_unknown = data[data.galaxy_type == UNKNOWN_GALAXY_TYPE].size
total_count = data.size

print(num_of_elliptical / total_count)
print(num_of_spirial / total_count)
print(num_of_unknown / total_count)
print(num_of_spirial / (num_of_elliptical + num_of_spirial))

NameError: name 'input_data' is not defined

In [None]:
# http://skyserver.sdss.org/dr12/SkyserverWS/ImgCutout/getjpeg?ra=224.5941&dec=-1.09&width=512
from urllib.request import urlopen
from PIL import Image

In [None]:
GZ_IMAGE_SIZE = 424
BASE_CUTOUT_SCALE = 0.008

def download_image(row, image_size=GZ_IMAGE_SIZE, padding_scale=1.0):
    petroRad = row['petroRad_r']
    ra = row['ra']
    dec = row['dec']
    scale = BASE_CUTOUT_SCALE * GZ_IMAGE_SIZE/image_size * petroRad * padding_scale

    url = f'http://skyserver.sdss.org/dr15/SkyserverWS/ImgCutout/getjpeg?ra={ra}&dec={dec}&width={image_size}&height={image_size}&scale={scale}'
    return Image.open(urlopen(url))


In [None]:
img = download_image(data.loc[0])
plt.imshow(img, cmap=plt.get_cmap('gray'))

In [None]:
img = download_image(data.loc[0], image_size=224)
plt.imshow(img, cmap=plt.get_cmap('gray'))

In [None]:
orig_size = 424
small_size = 64

scale = small_size/float(orig_size)
small_img = img.resize((64,64), Image.ANTIALIAS)
plt.imshow(small_img, cmap=plt.get_cmap('gray'))

In [None]:
rand_scale = np.random.uniform(0.9, 1.1)
new_size = int(rand_scale * orig_size)
new_size
resized_img = img.resize((new_size, new_size), Image.ANTIALIAS)
plt.imshow(resized_img, cmap=plt.get_cmap('gray'))

In [None]:
left = (orig_size - 212)/2
top = left
right = (orig_size + 212)/2
bottom = right

cropped_image = img.crop((left, top, right, bottom))
plt.imshow(cropped_image, cmap=plt.get_cmap('gray'))

In [None]:
def crop_dimensions(curr_size, new_size, top_offset=0, left_offset=0):
    top = int((curr_size - new_size)/2 + top_offset)
    bottom = top + new_size
    left = int((curr_size - new_size)/2 + left_offset)
    right = left + new_size
    
    return (left, top, right, bottom)

def centre_crop(img):
#    return img.crop(crop_dimensions(image_size, crop_size))
    (left, top, right, bottom) = crop_dimensions(image_size, crop_size)
    return img[left:right,top:bottom,:]

def create_crops(img, size=224):
    (width, height) = img.size
    imgs = []
    
    imgs.append(img.crop(crop_dimensions(width, size)))
    # do the middle third range in the quadrant
    max_offset = (width - size)/3
    min_offset = max_offset / 2
    for idx in range(0,4):
        offset = np.random.uniform(min_offset, max_offset, 2)
        if idx < 2:
            offset[0] = -offset[0]
        if idx % 2 == 0:
            offset[1] = -offset[1]
        
        cropped_img = img.crop(crop_dimensions(width, size, top_offset=int(offset[0]), left_offset=int(offset[1])))
        cropped_img.show()
        imgs.append(cropped_img)

    return imgs

In [None]:
def plot_images(imgs, size=batch_size):
    num_rows = int(np.ceil(size/3.0))
    print(num_rows)
    figsize_y = 5 * num_rows

    fig = plt.figure(figsize=(20,figsize_y))
    for idx in range(0, size):
        img = imgs[idx]
        # make scale between 0 and 1.0 plotting
        img_min = img.min()
        img_max = img.max()
        img = (img - img_min) / (img_max - img_min)

        fig.add_subplot(num_rows, 3, idx + 1)
        plt.imshow(img, cmap=plt.get_cmap('gray'))

    plt.show()

In [None]:
from keras.preprocessing.image import ImageDataGenerator
from keras import backend as K
K.set_image_dim_ordering('tf')

def augment_images(datagen, X_train, y_train):
    imgs = X_train.copy()
    if not datagen is None:
        imgs = apply_augmentation(datagen, X_train, y_train)

    result_imgs = np.empty((imgs.shape[0], crop_size, crop_size, 3))
    for idx, img in enumerate(imgs):
        result_imgs[idx] = centre_crop(img)
        
    plot_images(result_imgs)
    return result_imgs    
    
def apply_augmentation(datagen, X_train, y_train):
    # Convert to float32 in here
    X_train = X_train.astype('float32')
    datagen.fit(X_train)

    for X_batch, y_batch in datagen.flow(X_train, y_train, shuffle=False, batch_size=batch_size):
        return X_batch


## Load Image Cutouts

In [None]:
batch_size = 9
image_size = 350
crop_size = 224
padding_scale = float(350)/crop_size

X_train = np.empty((batch_size, image_size, image_size, 3), dtype=int)
y_train = []

for idx in range(0, batch_size):
    img = download_image(data.loc[idx], image_size=image_size, padding_scale=padding_scale)
    X_train[idx] = np.asarray(img)
    y_train.append(data.loc[idx, 'galaxy_type'])



# Data Augmentation

In [None]:
augmented_imgs = np.empty((8, batch_size, crop_size, crop_size, 3))
augmented_imgs[0] = augment_images(None, X_train, y_train)

## Normalise Features

In [None]:
channels = np.moveaxis(X_train, 3, 0)
fill = int(np.mean(channels))
fill

In [None]:
datagen = ImageDataGenerator(featurewise_center=True,
                             featurewise_std_normalization=True
                            )
augmented_imgs[1] = augment_images(datagen, X_train, y_train)

## Random Rotations

In [None]:
datagen = ImageDataGenerator(
                             rotation_range=180,fill_mode='constant',cval=fill
                            )
augmented_imgs[2] = augment_images(datagen, X_train, y_train)

## Random Shifts

In [None]:
shift = 0.1
datagen = ImageDataGenerator(
                             width_shift_range=shift,
                             height_shift_range=shift,
                             fill_mode='constant',
                             cval=fill
                            )

augmented_imgs[3] = augment_images(datagen, X_train, y_train)

## Random Flips

In [None]:
datagen = ImageDataGenerator(horizontal_flip=True,
                             vertical_flip=True
                            )
augmented_imgs[4] = augment_images(datagen, X_train, y_train)

## Random Scaling

In [None]:
datagen = ImageDataGenerator(rescale=0.1, fill_mode='constant')

augmented_imgs[5] = augment_images(datagen, X_train, y_train)

## Samplewise normalisation

In [None]:
datagen = ImageDataGenerator(samplewise_center=True,
                             samplewise_std_normalization=True
                            )
augmented_imgs[6] = augment_images(datagen, X_train, y_train)

## Multiple Augmentations

In [None]:
shift = 0.1
datagen = ImageDataGenerator(featurewise_center=True,
                             featurewise_std_normalization=True,
#                              samplewise_center=True,
#                              samplewise_std_normalization=True,
#                              width_shift_range=shift,
#                              height_shift_range=shift,
                             horizontal_flip=True,
                             vertical_flip=True,
                             fill_mode='constant',
                             rotation_range=180,
                             rescale=0.1,
                             brightness_range=(0.9,1.1),
                             cval=fill
                            )

augmented_imgs[7] = augment_images(datagen, X_train, y_train)

In [None]:
for augmentations in np.moveaxis(augmented_imgs, 0, 1):
    plot_images(augmentations, size=8)

In [None]:
base_img = augmented_imgs[0]/255
mod_img = augmented_imgs[7]

In [None]:
np.min(mod_img)