# Code to extract slices from hdf5 data files

April 7, 2020 Added slicing along x,y,z axes \
June 9, 2020: Adding gaussian smoothing  

In [1]:
import numpy as np
import h5py
import os

import glob
import argparse
import time

from scipy.ndimage import gaussian_filter   ### For gausian filter

### Read in list of file

In [2]:
### Location of hdf5 files
data_dir='/global/project/projectdirs/m3363/www/cosmoUniverse_2019_08_const/'
### Extract list of hdf5 files
f_list=glob.glob(data_dir+'*.hdf5')
len(f_list)


200

In [9]:

def f_get_slices_all_axes(f_list,smoothing=False,splice_interval=8):
    '''
    Get 2D slices of 512^3 images along all 3 axes
    splice_interval is the spacing between layers 
    '''
    
    slices = []
    img_dim = 128
    perside = 512//img_dim
    
    for fname in f_list:
        with h5py.File(fname, 'r') as inputdata:
            img_arr=np.array(inputdata['full'])
            if smoothing: img_arr=gaussian_filter(img_arr,sigma=0.5,mode='wrap') ### Implement Gaussian smoothing
            for i1 in range(perside):
                for i2 in range(perside):
                    # Select slices along planes : xy,yz, zx, for redshift=0 
                    # (128 * 128 images from 512 x 512 images-> 16 images)
                    ## yz axis: 
                    data = img_arr[::splice_interval, i1*img_dim:(i1+1)*img_dim, i2*img_dim:(i2+1)*img_dim, 0]
                    data2=np.transpose(data,(0,1,2)) ### Transpose to get array in the form (samples,128,128)
                    np.random.shuffle(data2) ### Shuffle samples (along first axis)
                    slices.append(np.expand_dims(data2, axis=-1))

                    ## xy axis: 
                    data = img_arr[i1*img_dim:(i1+1)*img_dim,i2*img_dim:(i2+1)*img_dim,::splice_interval,0]
                    data2=np.transpose(data,(2,0,1)) ### Transpose to get array in the form (samples,128,128)
                    np.random.shuffle(data2) ### Shuffle samples (along first axis)
                    slices.append(np.expand_dims(data2, axis=-1))      

                    ## xz axis: 
                    data = img_arr[i1*img_dim:(i1+1)*img_dim,::splice_interval,i2*img_dim:(i2+1)*img_dim,0]
                    data2=np.transpose(data,(1,0,2))  ### Transpose to get array in the form (samples,128,128)
                    np.random.shuffle(data2) ### Shuffle samples (along first axis)
                    slices.append(np.expand_dims(data2, axis=-1))

        print('Sliced %s'%fname)
    slices = np.concatenate(slices)
    print(slices.shape)
    
    return slices

# slices=f_get_slices_all_axes(f_list[:2],smoothing=False,splice_interval=4)

In [None]:
# np.vsplit(data,data.shape[0])[0].shape

In [22]:
t1=time.time()
slices=f_get_slices_all_axes(f_list[:2],smoothing=False,splice_interval=8)
t2=time.time()
print('Time taken',t2-t1)

Sliced /global/project/projectdirs/m3363/www/cosmoUniverse_2019_08_const/univ_ics_2019-03_a11625921.hdf5
Sliced /global/project/projectdirs/m3363/www/cosmoUniverse_2019_08_const/univ_ics_2019-03_a8101968.hdf5
(6144, 128, 128, 1)
Time taken 6.215606451034546


In [None]:
# ### Save data to files

# ### Location to store the .npy files generated by this code
# des_dir='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/raw_data/temp/'
# file_prefix='large_dataset'

# train_index=np.int(0.90*len(slices))
# train = slices[:train_index]
# val = slices[train_index:]

# train_fname = dest_dir+file_prefix+'_train.npy'
# print('Saving file %s'%train_fname)
# print('shape='+str(train.shape))
# np.save(train_fname, train)

# val_fname = dest_dir+file_prefix+'_val.npy'
# print('Saving file %s'%val_fname)
# print('shape='+str(val.shape))
# np.save(val_fname, val)

In [3]:
# fname='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/raw_data/dataset_2_smooothing_200k/full_with_smoothing_1.npy'
fname='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/raw_data/dataset_2_smooothing_200k/val.npy'
a1=np.load(fname)
print(a1.shape,type(a1[0,0,0,0]))

(3000, 128, 128, 1) <class 'numpy.int16'>
