In [1]:
import csv
import glob
from astropy.io import fits
import matplotlib.pyplot as plt
import os
from PIL import Image
import numpy as np
import sys
import random
import datetime
import traceback

In [2]:
def fits_to_tiff(filepath, img_type, outp_name=None):

    name, ext = os.path.splitext(filepath)
    
    if not outp_name:
        outp_name = name.replace(name.split('/')[-2], name.split('/')[-2]+'/tiff') #places new files in separate directory
    
    img = fits.open(filepath)
    img.verify('fix')

    #img.info()

    data = img[1].data #has to be img[1]
    width = data.shape[1]
    height = data.shape[0]
    
    
    if img_type == 'aia':
        data = np.log10(data - data.min()+1)
    elif img_type == 'hmi':
        data = abs(data)
        data = data - data.min()
            
    data = data.astype(float)/data.max() * 225 #normalize data and multiply by 225 to map to uint8 values
    
    outputArray = np.array(data, dtype=np.uint8)

    output = Image.fromarray(outputArray.reshape((height, width)), "L")

    if not os.path.exists(outp_name + '/' +  name.split('/')[-2]):
        os.makedirs(outp_name + '/' +  name.split('/')[-2])
    
    output.save(outp_name + '/' +  name.split('/')[-2]+ '/'+ name.split('/')[-1]+".tiff")
    img.close()
    
    return(outp_name + '/' +  name.split('/')[-2]+ '/'+ name.split('/')[-1]+".tiff")

In [3]:
#for sorting file lists by time
def timestamp(filename):
    t = filename.split('/')[-1].split('.')[2]
    return t

In [20]:
aia_files = '/home/zenalisa/data/smallrun/aia/'
hmi_files = '/home/zenalisa/data/smallrun/hmi/'

for r in [1131, 1140, 1147]:
    
    aia_file_list = glob.glob(aia_files+str(r)+'/*.fits')
    aia_file_list.sort(key = timestamp)
    hmi_file_list = glob.glob(hmi_files+str(r)+'/*.fits')
    hmi_file_list.sort(key = timestamp)
    

    l = 32
    aia_file_list = aia_file_list[:l]
    hmi_file_list = hmi_file_list[:l]

    
    train_table = open('/home/zenalisa/data/smallrun/tiff/train_list_'+str(r)+'.csv', 'w')
    test_table = open('/home/zenalisa/data/smallrun/tiff/test_list_'+str(r)+'.csv', 'w')
    w = csv.writer(train_table, delimiter = ',')
    w.writerow(['path_signal', 'path_target'])
    v = csv.writer(test_table, delimiter = ',')
    v.writerow(['path_signal', 'path_target'])
    
    aia_file_list_new = []
    hmi_file_list_new = []
    
    #for n in range(len(aia_file_list)):
    #    if n%100 == 0:
    #        print n
    for n in range(len(aia_file_list)):

        a_fits = fits.open(aia_file_list[n])
        a_fits.verify('fix')
        a = a_fits[1].data
        h_fits = fits.open(hmi_file_list[n])
        h_fits.verify('fix')
        h = h_fits[1].data

        a_fits.close()
        h_fits.close()

        if np.isnan(a).any() or np.isnan(h).any():
            pass
        else:
            aia_file_list_new.append(aia_file_list[n])
            hmi_file_list_new.append(hmi_file_list[n])

            
    print "nan's removed"
    
    for n in range(len(aia_file_list)):

        a = fits_to_tiff(aia_file_list[n], 'aia', outp_name='/home/zenalisa/data/smallrun/tiff/aia')
        h = fits_to_tiff(hmi_file_list[n], 'hmi', outp_name='/home/zenalisa/data/smallrun/tiff/hmi')
        
        #save the middle 10% of images (wrt time) for testing
        #center10 = range((len(aia_file_list)/2 - int(.1*len(aia_file_list)/2)), (len(aia_file_list)/2 + int(.1*len(aia_file_list)/2))) 
        center10= range(14, 17)
        if n in center10:
            v.writerow([a, h])
        else:
            w.writerow([a, h])


    print "file list "+str(r)+" complete"
    train_table.close()
    test_table.close()

nan's removed
file list 1131 complete
nan's removed
file list 1140 complete
nan's removed
file list 1147 complete
