# Compare datasets

June 11, 2020

In [1]:
import numpy as np
import matplotlib.pyplot as plt
# import pandas as pd

import subprocess as sp
import sys
import os
import glob

from scipy import fftpack
from ipywidgets import *

In [2]:
%matplotlib widget

In [3]:
sys.path.append('/global/u1/v/vpa/project/jpt_notebooks/Cosmology/Cosmo_GAN/repositories/lbann_cosmogan/3_analysis')
from modules_image_analysis import *

This application is used to convert notebook files (*.ipynb) to various other
formats.


Options
-------

Arguments that take values are actually convenience aliases to full
Configurables, whose aliases are listed on the help line. For more information
on full configurables, see '--help-all'.

--debug
    set log level to logging.DEBUG (maximize logging output)
--generate-config
    generate default config file
-y
    Answer yes to any questions instead of prompting.
--execute
    Execute the notebook prior to export.
--allow-errors
    Continue notebook execution even if one of the cells throws an error and include the error message in the cell output (the default behaviour is to abort conversion). This flag is only relevant if '--execute' was specified, too.
--stdin
    read a single notebook file from stdin. Write the resulting notebook with default basename 'notebook.*'
--stdout
    Write notebook output to stdout instead of files.
--inplace
    Run nbconvert in place, overwriting 

In [4]:
### Transformation functions for image pixel values
def f_transform(x):
    return 2.*x/(x + 4.) - 1.

def f_invtransform(s):
    return 4.*(1. + s)/(1. - s)


## Compare datasets

In [16]:
main_dir='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/raw_data/'


dataset1='dataset_2_smoothing_200k/train.npy'
dataset2='dataset_2_smoothing_200k/norm_1_train_val.npy'
dataset3='dataset_2_smoothing_200k/full_with_smoothing_1.npy'

# dataset1='dataset_1_no_smoothing_200k/train.npy'
# dataset2='dataset_1_no_smoothing_200k/norm_1_train_val.npy'
# dataset3='dataset_1_no_smoothing_200k/full_1_.npy'

dataset_list=[dataset1,dataset2,dataset3]
print(dataset_list)

s_input=[[] for i in range(len(dataset_list))]
for count,fname in enumerate(dataset_list):
    print(fname)
    arr=np.load(main_dir+fname)
    num_samples=arr.shape[0]
    print(count,arr.shape)
    idxs=np.random.choice(np.arange(num_samples),size=3000,replace=False)
    arr=arr[idxs]
    if count in [1]:
        s_input[count]=arr[:,0,:,:]      
    else:
        s_input[count]=f_transform(arr[:,:,:,0])
        
    print(count,s_input[count].shape)


['dataset_2_smoothing_200k/train.npy', 'dataset_2_smoothing_200k/norm_1_train_val.npy', 'dataset_2_smoothing_200k/full_with_smoothing_1.npy']
dataset_2_smoothing_200k/train.npy
0 (200000, 128, 128, 1)
0 (3000, 128, 128)
dataset_2_smoothing_200k/norm_1_train_val.npy
1 (253751, 1, 128, 128)
1 (3000, 128, 128)
dataset_2_smoothing_200k/full_with_smoothing_1.npy
2 (614400, 128, 128, 1)
2 (3000, 128, 128)


### Compare without widget

In [None]:
img_new='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/raw_data/temp_data/full_1_.npy'
a1=np.load(img_new)
print(a1.shape)
s_new=f_transform(a1[:,:,:,0])[:6000]

In [None]:
# img_raw='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/raw_data/dataset_2_smooothing_200k/val.npy'
img_raw='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/raw_data/dataset_1_no_smoothing_200k/train.npy'
a1=np.load(img_raw)[:6000]
print(a1.shape)
s_raw=f_transform(a1[:,:,:,0])[:6000]

In [18]:
img_lst=[s_input[0],s_input[1],s_input[2]];label_lst=['a','b','c']
# img_lst=[s_input[0],s_input[1],s_input[2],s_input[3],s_input[4]];label_lst=['a','b','c','d','e']
# img_lst=[s_raw,s_new];label_lst=['raw','new']


In [45]:
for arr in img_lst:
    condition=((arr.ndim==3) & (arr.shape[1]==arr.shape[2]))
    assert condition,"images do not have the right dimension. {0} instead of (:,128,128)".format(arr.shape)

In [46]:
f_compare_spectrum(img_lst,label_lst,log_scale=True)


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [47]:
img_lst=[f_invtransform(i) for i in img_lst]
bins=np.concatenate([np.array([-0.5]),np.arange(0.5,20.5,1),np.arange(20.5,100.5,5),np.arange(100.5,1000.5,50),np.array([2000])]) #bin edges to use
# bins=200
f_compare_pixel_intensity(img_lst,label_lst,normalize=True,log_scale=True, mode='avg',bins=bins,hist_range=None)


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

### Compare with widgets

In [None]:
def f_widget_compare(sample_names,sample_dict,Fig_type='pixel',rescale=True,log_scale=True,bins=25,mode='avg',normalize=True,bkgnd=[]):
    '''
    Module to make widget plots for pixel intensity or spectrum comparison for multiple sample sets
    '''
    
#     ### Crop out large pixel values
#     for key in sample_names:
#         print(sample_dict[key].shape)
#         sample_dict[key]=np.array([arr for arr in sample_dict[key] if np.max(arr)<=0.994])
#         print(sample_dict[key].shape)
    
    img_list=[sample_dict[key] for key in sample_names]
    label_list=list(sample_names)
    
    bins=np.concatenate([np.array([-0.5]),np.arange(0.5,20.5,1),np.arange(20.5,100.5,5),np.arange(100.5,1000.5,50),np.array([2000])]) #bin edges to use
    
    if rescale: 
        for count,img in enumerate(img_list):
            img_list[count]=f_invtransform(img)
        if len(bkgnd): bkgnd=f_invtransform(bkgnd)
    else:
        bins=f_transform(bins)
    assert Fig_type in ['pixel','spectrum'],"Invalid mode %s"%(mode)
    
    if Fig_type=='pixel':
        f_compare_pixel_intensity(img_lst=img_list,label_lst=label_list,normalize=normalize,log_scale=log_scale, mode=mode,bins=bins,hist_range=None,bkgnd_arr=bkgnd)

    elif Fig_type=='spectrum':
        f_compare_spectrum(img_lst=img_list,label_lst=label_list,log_scale=log_scale,bkgnd_arr=bkgnd)



In [None]:
# dict_samples={'old_data_diff_cosmology': s_input[0][:10000], 'smoothing':s_input[1][:10000],'no_smoothing':s_input[2][:10000],'another_smoothing':s_input[3][:10000],'prenormed_no_smoothing':s_input[4][:10000]}
dict_samples={'smoothing': s_input[0], 'smoothing_scaled':s_input[1],'no_smoothing':s_input[2],'no_smoothing_scaled':s_input[3]}
bkgnd=[]

interact_manual(f_widget_compare,sample_dict=fixed(dict_samples),
                sample_names=SelectMultiple(options=dict_samples.keys()),
                Fig_type=ToggleButtons(options=['pixel','spectrum']),
                bins=SelectionSlider(options=np.arange(10,200,10),value=50),
                mode=['avg','simple'],bkgnd=fixed(bkgnd))
