## Compare results for multiple results

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

import subprocess as sp
import os
import glob
import sys

import itertools
import time

from ipywidgets import *

In [3]:
%matplotlib widget

In [4]:
sys.path.append('/global/u1/v/vpa/project/jpt_notebooks/Cosmology/Cosmo_GAN/repositories/lbann_cosmogan/3_analysis')
from modules_image_analysis import *

In [5]:
### Transformation functions for image pixel values
def f_transform(x):
    return 2.*x/(x + 4. + 1e-8) - 1.

def f_invtransform(s):
    return 4.*(1. + s)/(1. - s + 1e-8)

In [6]:
def f_compute_hist_spect(sample,bins):
    ''' Compute pixel intensity histograms and radial spectrum for 2D arrays
    Input : Image arrays and bins
    Output: dictionary with 5 arrays : Histogram values, errors and bin centers, Spectrum values and errors.
    '''
    ### Compute pixel histogram for row
    gen_hist,gen_err,hist_bins=f_batch_histogram(sample,bins=bins,norm=True,hist_range=None)
    ### Compute spectrum for row
    spec,spec_sdev=f_compute_spectrum(sample,plot=False)

    dict1={'hist_val':gen_hist,'hist_err':gen_err,'hist_bin_centers':hist_bins,'spec_val':spec,'spec_sdev':spec_sdev }
    return dict1

def f_compute_chisqr(dict_val,dict_sample,img_size):
    '''
    Compute chi-square values for sample w.r.t input images
    Input: 2 dictionaries with 4 keys for histogram and spectrum values and errors
    '''
    ### !!Both pixel histograms MUST have same bins and normalization!
    ### Compute chi-sqr
    ### Used in keras code : np.sum(np.divide(np.power(valhist - samphist, 2.0), valhist))
    ###  chi_sqr :: sum((Obs-Val)^2/(Val))
    
    chisqr_dict={}
    
    try: 
        val_dr=dict_val['hist_val'].copy()
        val_dr[val_dr<=0.]=1.0    ### Avoiding division by zero for zero bins

        sq_diff=(dict_val['hist_val']-dict_sample['hist_val'])**2

        size=len(dict_val['hist_val'])
        l1,l2=int(size*0.3),int(size*0.7)
        keys=['chi_1a','chi_1b','chi_1c','chi_1']
        
        for (key,start,end) in zip(keys,[0,l1,l2,0],[l1,l2,None,None]):  # 4 lists : small, medium, large pixel values and full 
            chisqr_dict.update({key:np.sum(np.divide(sq_diff[start:end],val_dr[start:end]))})

        idx=None  # Choosing the number of histograms to use. Eg : -5 to skip last 5 bins
    #     chisqr_dict.update({'chi_sqr1':})

        chisqr_dict.update({'chi_2':np.sum(np.divide(sq_diff[:idx],1.0))}) ## chi-sqr without denominator division
        chisqr_dict.update({'chi_imgvar':np.sum(dict_sample['hist_err'][:idx])/np.sum(dict_val['hist_err'][:idx])}) ## measures total spread in histograms wrt to input data

        idx=int(img_size/2)
        spec_diff=(dict_val['spec_val']-dict_sample['spec_val'])**2
        ### computing the spectral loss chi-square
        chisqr_dict.update({'chi_spec1':np.sum(spec_diff[:idx]/dict_sample['spec_val'][:idx]**2)})

        ### computing the spectral loss chi-square
        chisqr_dict.update({'chi_spec2':np.sum(spec_diff[:idx]/dict_sample['spec_sdev'][:idx]**2)})
        
        spec_loss=1.0*np.log(np.mean((dict_val['spec_val'][:idx]-dict_sample['spec_val'][:idx])**2))+1.0*np.log(np.mean((dict_val['spec_sdev'][:idx]-dict_sample['spec_sdev'][:idx])**2))
        print(spec_loss)
        chisqr_dict.update({'chi_spec3':spec_loss})
    
    except Exception as e: 
        print(e)
        
        keys=['chi_1a','chi_1b','chi_1c','chi_1','chi_2','chi_imgvar','chi_spec1','chi_spec2']
        chisqr_dict=dict.fromkeys(keys,np.nan)
        pass
    
    return chisqr_dict

In [7]:
# bins=np.concatenate([np.array([-0.5]),np.arange(0.5,20.5,1),np.arange(20.5,100.5,5),np.arange(100.5,1000.5,50),np.array([2000])]) #bin edges to use
# bins=np.concatenate([np.array([-0.5]),np.arange(0.5,300.5,5),np.arange(300.5,1000.5,50),np.array([2000])]) #bin edges to use
bins=np.concatenate([np.array([-0.5]),np.arange(0.5,100.5,5),np.arange(100.5,300.5,20),np.arange(300.5,1000.5,50),np.array([2000])]) #bin edges to use
bins=f_transform(bins)   ### scale to (-1,1)

### Extract validation data
fname='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/raw_data/128_square/dataset_2_smoothing_200k/norm_1_train_val.npy'
# s_val=np.load(fname,mmap_mode='r')[10000:20000][:,0,:,:]
num_bkgnd=4000
s_val=np.load(fname,mmap_mode='r')[:num_bkgnd][:,0,:,:]
print(s_val.shape)
img_size=s_val.shape[1]
### Compute histogram and spectrum of raw data 
dict_val=f_compute_hist_spect(s_val,bins)
del(s_val)

(4000, 128, 128)


  hist_arr=np.array([np.histogram(arr.flatten(), bins=bins, range=(llim,ulim), density=norm) for arr in img_arr]) ## range is important


In [8]:
dict_val.keys()

dict_keys(['hist_val', 'hist_err', 'hist_bin_centers', 'spec_val', 'spec_sdev'])

In [9]:
df_runs=pd.DataFrame([])

In [10]:
# ### Load raw images ( a different set of input images for comparison)
# dict1={}
# dict1.update({'label':'raw','fname':'/global/cfs/cdirs/m3363/vayyar/cosmogan_data/raw_data/128_square/dataset_2_smoothing_200k/norm_1_train_val.npy'})
# images=np.load(dict1['fname'],mmap_mode='r')[-500:][:,0,:,:]
# dict1['num_imgs']=images.shape[0]
# ### Compute spectrum and histograms
# dict_sample=f_compute_hist_spect(images,bins)
# ### Compute chi squares
# dict_chisqrs=f_compute_chisqr(dict_val,dict_sample,img_size)

# dict1.update(dict_sample)
# dict1.update(dict_chisqrs)
# del(images)
# df_runs=df_runs.append(dict1,ignore_index=True)


In [11]:
### Load keras images
parent_dir='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_from_other_code/exagan1/'
lst=['run5_fixed_cosmology','run6_fixed_cosmology','run7_no_truncated_normal','run8_no_truncated_normal']
prefix='keras_'

for strg,fldr in zip([str(i) for i in range(len(lst))],lst):
    key=prefix+strg
    fname=parent_dir+fldr+'/models/gen_imgs.npy'
    dict1={'label':key,'fname':fname}

    images=np.load(dict1['fname'])[:500]     ### No transform needed here
#     print(np.max(images),np.min(images))

    dict1['num_imgs']=images.shape[0]
    ### Compute spectrum and histograms
    dict_sample=f_compute_hist_spect(images,bins)
    ### Compute chi squares
    dict_chisqrs=f_compute_chisqr(dict_val,dict_sample,img_size)
    
    dict1.update(dict_sample)
    dict1.update(dict_chisqrs)
    del(images)
    df_runs=df_runs.append(dict1,ignore_index=True)


24.147434387872742
22.210071103786824
26.16713963923638
20.457959827363467


In [12]:
# parent_dir='/global/cscratch1/sd/vpa/proj/cosmogan/results_dir/128square/20210226_175209_bsize256_scale0.1_deterministic_off'
# lst=[(11,10290), (12,10680), (13,11980), (14,12460) ,(19,17570)]

# parent_dir='/global/cscratch1/sd/vpa/proj/cosmogan/results_dir/128square/20210408_100355_bsize128_layer-norm_200kdata'
# lst=[(13,18300),(18,25850),(19,26800),(19,28100)]

# parent_dir='/global/cscratch1/sd/vpa/proj/cosmogan/results_dir/128square/20210409_083631_bsize128_with_bnorm_200kdata'
# lst=[(15,21400), (11,16500), (10,14750),(19,28100)]

# parent_dir='/global/cscratch1/sd/vpa/proj/cosmogan/results_dir/128square/20210422_132856_bsize128_bnorm_new_decay'
# lst=[(13,18850), (17,24250), (18,25800)]

parent_dir='/global/cscratch1/sd/vpa/proj/cosmogan/results_dir/128square/20210507_084712_bsize128_bnorm_new_decay'
lst=[(9, 13050),  (11, 15800),  (11, 16500), (16, 22950), (17, 24150) ]

### Load LBANN images

In [13]:
## Load best models

prefix='lbann_train_1_'
for count,run in enumerate(lst):
    epoch,step=run[0],run[1]
    key=prefix+'{0}-{1}'.format(epoch,step)
    fname=parent_dir+'/dump_outs/trainer0/model0/'+'sgd.training.epoch.{0}.step.{1}_gen_img_instance1_activation_output0.npy'.format(epoch,step)
    dict1={'label':key,'fname':fname}
    images=np.load(fname)[:,0,:,:]
    dict1['num_imgs']=images.shape[0]
    print(dict1)
    ### Compute spectrum and histograms
    dict_sample=f_compute_hist_spect(images,bins)
    ### Compute chi squares
    dict_chisqrs=f_compute_chisqr(dict_val,dict_sample,img_size)
    
    dict1.update(dict_sample)
    dict1.update(dict_chisqrs)
    del(images)
    df_runs=df_runs.append(dict1,ignore_index=True)

{'label': 'lbann_train_1_9-13050', 'fname': '/global/cscratch1/sd/vpa/proj/cosmogan/results_dir/128square/20210507_084712_bsize128_bnorm_new_decay/dump_outs/trainer0/model0/sgd.training.epoch.9.step.13050_gen_img_instance1_activation_output0.npy', 'num_imgs': 128}
31.691541473756814
{'label': 'lbann_train_1_11-15800', 'fname': '/global/cscratch1/sd/vpa/proj/cosmogan/results_dir/128square/20210507_084712_bsize128_bnorm_new_decay/dump_outs/trainer0/model0/sgd.training.epoch.11.step.15800_gen_img_instance1_activation_output0.npy', 'num_imgs': 128}
27.097661279983818
{'label': 'lbann_train_1_11-16500', 'fname': '/global/cscratch1/sd/vpa/proj/cosmogan/results_dir/128square/20210507_084712_bsize128_bnorm_new_decay/dump_outs/trainer0/model0/sgd.training.epoch.11.step.16500_gen_img_instance1_activation_output0.npy', 'num_imgs': 128}
29.04974889623216
{'label': 'lbann_train_1_16-22950', 'fname': '/global/cscratch1/sd/vpa/proj/cosmogan/results_dir/128square/20210507_084712_bsize128_bnorm_new_dec

In [14]:
## LBANN generated images

# # fldr='batch_begin.epoch.11.step.10290_20210302_140615'
fname=parent_dir+'/gen_imgs_chkpt/batch_begin.*'
fldr_lst=glob.glob(fname)
print(fldr_lst)
epochs=4

for ct,fldr in enumerate(fldr_lst):
    ep=fldr.split('/')[-1].split('.epoch.')[-1].split('.')[0]
    stp=fldr.split('/')[-1].split('.epoch.')[-1].split('_')[0].split('.step.')[-1]
    prefix='lbann_gen_{0}_'.format(ep +'-'+ stp)
#     print(prefix)
    for count,epoch in enumerate(range(epochs)):
        key=prefix+'{0}'.format(epoch)
        fname=fldr+'/dump_outs/trainer0/model0/sgd.testing.epoch.{0}.step.{0}_gen_img_instance1_activation_output0.npy'.format(epoch)

        dict1={'label':key,'fname':fname}
        images=np.load(fname)[:,0,:,:]
        dict1['num_imgs']=images.shape[0]
#         print(dict1)
        ### Compute spectrum and histograms
        dict_sample=f_compute_hist_spect(images,bins)
        ### Compute chi squares
        dict_chisqrs=f_compute_chisqr(dict_val,dict_sample,img_size)

        dict1.update(dict_sample)
        dict1.update(dict_chisqrs)
    #     del(images)
        df_runs=df_runs.append(dict1,ignore_index=True)
    

['/global/cscratch1/sd/vpa/proj/cosmogan/results_dir/128square/20210507_084712_bsize128_bnorm_new_decay/gen_imgs_chkpt/batch_begin.epoch.17.step.24150_20210507_113012', '/global/cscratch1/sd/vpa/proj/cosmogan/results_dir/128square/20210507_084712_bsize128_bnorm_new_decay/gen_imgs_chkpt/batch_begin.epoch.16.step.22950_20210507_112811', '/global/cscratch1/sd/vpa/proj/cosmogan/results_dir/128square/20210507_084712_bsize128_bnorm_new_decay/gen_imgs_chkpt/batch_begin.epoch.11.step.15800_20210507_112711', '/global/cscratch1/sd/vpa/proj/cosmogan/results_dir/128square/20210507_084712_bsize128_bnorm_new_decay/gen_imgs_chkpt/batch_begin.epoch.11.step.16500_20210507_112812', '/global/cscratch1/sd/vpa/proj/cosmogan/results_dir/128square/20210507_084712_bsize128_bnorm_new_decay/gen_imgs_chkpt/batch_begin.epoch.9.step.13050_20210507_112617']
27.554294253457428
27.53559347357933
27.872422566627016
27.088425654142412
27.19683577838344
27.297031834089584
28.18508040945307
26.91463480174292
30.871228759

In [15]:
# ### LBANN test images
# ## Load best models
# lst=[(0,0),(0,50),(0,100),(0,150),(0,500)]

# # lst=[(0,0),(0,100),(0,150),(0,950),(0,1500)]
# prefix='lbann_test_last_1_'
# for count,run in enumerate(lst):
#     epoch,step=run[0],run[1]
#     key=prefix+'{0}-{1}'.format(epoch,step)
#     fname=parent_dir+'/dump_outs/trainer0/model0/'+'sgd.testing.epoch.{0}.step.{1}_gen_img_instance1_activation_output0.npy'.format(epoch,step)
#     dict1={'label':key,'fname':fname}
#     images=np.load(fname)[:,0,:,:]
#     dict1['num_imgs']=images.shape[0]
#     print(dict1)
#     ### Compute spectrum and histograms
#     dict_sample=f_compute_hist_spect(images,bins)
#     ### Compute chi squares
#     dict_chisqrs=f_compute_chisqr(dict_val,dict_sample,img_size)
    
#     dict1.update(dict_sample)
#     dict1.update(dict_chisqrs)
#     del(images)
#     df_runs=df_runs.append(dict1,ignore_index=True)
    

In [16]:
# ## LBANN generated images

# parent_dir='/global/cscratch1/sd/vpa/proj/cosmogan/results_dir/128square/20210226_175209_bsize256_scale0.1_deterministic_off/gen_imgs_chkpt/'
# fldr='batch_begin.epoch.10.step.9570_20210203_112317'
# epochs=5

# prefix='lbann_gen_2_'
# for count,epoch in enumerate(range(epochs)):
#     key=prefix+'{0}'.format(epoch)
#     fname=parent_dir+fldr+'/dump_outs/trainer0/model0/sgd.testing.epoch.{0}.step.{0}_gen_img_instance1_activation_output0.npy'.format(epoch)
    
#     dict1={'label':key,'fname':fname}
#     images=np.load(fname)[:,0,:,:]
#     dict1['num_imgs']=images.shape[0]
#     print(dict1)
#     ### Compute spectrum and histograms
#     dict_sample=f_compute_hist_spect(images,bins)
#     ### Compute chi squares
#     dict_chisqrs=f_compute_chisqr(dict_val,dict_sample,img_size)
    
#     dict1.update(dict_sample)
#     dict1.update(dict_chisqrs)
# #     del(images)
#     df_runs=df_runs.append(dict1,ignore_index=True)
    

In [17]:
col_list=['label','fname', 'hist_bin_centers', 'hist_err', 'hist_val','spec_sdev', 'spec_val','chi_1', 'chi_1a', 'chi_1b', 'chi_1c', 'chi_2',
       'chi_imgvar', 'chi_spec1', 'chi_spec2','chi_spec3', 'num_imgs']
df_runs=df_runs.reindex(columns=col_list)


### Compare chi-squares

In [18]:
chi_keys=['label','chi_1','chi_spec1', 'chi_spec3', 'num_imgs', 'chi_1a', 'chi_1b', 'chi_1c', 'chi_2','chi_imgvar', 'chi_spec2','fname']
df_runs[chi_keys]

Unnamed: 0,label,chi_1,chi_spec1,chi_spec3,num_imgs,chi_1a,chi_1b,chi_1c,chi_2,chi_imgvar,chi_spec2,fname
0,keras_0,0.001519,0.054939,24.147434,500.0,9.3e-05,0.000626,0.0008,2e-05,2.877194,5.637779,/global/cfs/cdirs/m3363/vayyar/cosmogan_data/r...
1,keras_1,0.001419,0.070767,22.210071,500.0,0.000113,0.000377,0.000929,2e-05,2.901052,6.21285,/global/cfs/cdirs/m3363/vayyar/cosmogan_data/r...
2,keras_2,0.002432,0.020358,26.16714,500.0,0.001166,0.000528,0.000738,0.000864,2.841467,1.29368,/global/cfs/cdirs/m3363/vayyar/cosmogan_data/r...
3,keras_3,0.002574,0.051291,20.45796,500.0,0.000821,0.000659,0.001094,0.000566,2.81116,5.499685,/global/cfs/cdirs/m3363/vayyar/cosmogan_data/r...
4,lbann_train_1_9-13050,0.01787,0.819024,31.691541,128.0,0.010735,0.005053,0.002082,0.002436,4.076537,73.77199,/global/cscratch1/sd/vpa/proj/cosmogan/results...
5,lbann_train_1_11-15800,0.011075,0.452704,27.097661,128.0,0.004408,0.005258,0.001409,0.001559,4.523655,71.810523,/global/cscratch1/sd/vpa/proj/cosmogan/results...
6,lbann_train_1_11-16500,0.01005,0.419401,29.049749,128.0,0.001227,0.00783,0.000993,0.000309,3.798865,53.081418,/global/cscratch1/sd/vpa/proj/cosmogan/results...
7,lbann_train_1_16-22950,0.025185,0.186721,26.998066,128.0,0.014517,0.008921,0.001746,0.009739,3.920227,24.786521,/global/cscratch1/sd/vpa/proj/cosmogan/results...
8,lbann_train_1_17-24150,0.01336,0.624515,27.05627,128.0,0.007084,0.004637,0.001638,0.005262,4.021161,71.97947,/global/cscratch1/sd/vpa/proj/cosmogan/results...
9,lbann_gen_17-24150_0,0.01123,0.631575,27.554294,500.0,0.002841,0.006425,0.001965,0.001811,2.139474,68.734702,/global/cscratch1/sd/vpa/proj/cosmogan/results...


In [19]:

def f_plot_hist_spec(df,dict_bkg,plot_type):

    img_size=128
    assert plot_type in ['hist','spec','spec_relative','grid'],"Invalid mode %s"%(plot_type)

    if plot_type in ['hist','spec','spec_relative']:     fig=plt.figure(figsize=(6,6))
    
    for (i,row),marker in zip(df.iterrows(),itertools.cycle('>^*sDHPdpx_')):
        label=row.label
        if plot_type=='hist':
            x1=row.hist_bin_centers
            y1=row.hist_val
            yerr1=row.hist_err
            x1=f_invtransform(x1)
            
            plt.errorbar(x1,y1,yerr1,marker=marker,markersize=5,linestyle='',label=label)
        if plot_type=='spec':
            
            y2=row.spec_val
            yerr2=row.spec_sdev/np.sqrt(row.num_imgs)
            x2=np.arange(len(y2))
            
            plt.fill_between(x2, y2 - yerr2, y2 + yerr2, alpha=0.4)
            plt.plot(x2, y2, marker=marker, linestyle=':',label=label)

        if plot_type=='spec_relative':
            
            y2=row.spec_val
            yerr2=row.spec_sdev
            x2=np.arange(len(y2))
            
            ### Reference spectrum
            y1,yerr1=dict_bkg['spec_val'],dict_bkg['spec_sdev']
            y=y2/(1.0*y1)
            ## Variance is sum of variance of both variables, since they are uncorrelated
            
            # delta_r= |r| * sqrt(delta_a/a)^2 +(\delta_b/b)^2) / \sqrt(N)
            yerr=(np.abs(y))*np.sqrt((yerr1/y1)**2+(yerr2/y2)**2)/np.sqrt(row.num_imgs)

            plt.fill_between(x2, y - yerr, y + yerr, alpha=0.4)
            plt.plot(x2, y, marker=marker, linestyle=':',label=label)
            plt.xlim(0,img_size/2)
            plt.ylim(0.5,2)
            plt.legend()
        
        if plot_type=='grid':
            images=np.load(row.fname)[:,0,:,:]
            f_plot_grid(images[:18],cols=6,fig_size=(10,5))
            
    ### Plot input data
    if plot_type=='hist':
        x,y,yerr=dict_bkg['hist_bin_centers'],dict_bkg['hist_val'],dict_bkg['hist_err']
        x=f_invtransform(x)
        plt.errorbar(x, y,yerr,color='k',linestyle='-',label='bkgnd')   
        plt.title('Pixel Intensity Histogram')
        plt.xscale('symlog',linthreshx=300)
        
    if plot_type=='spec':
        y,yerr=dict_bkg['spec_val'],dict_bkg['spec_sdev']/np.sqrt(num_bkgnd)
        x=np.arange(len(y))
        plt.fill_between(x, y - yerr, y + yerr, color='k',alpha=0.8)
        plt.title('Spectrum')
        
    if plot_type=='spec_relative':
        plt.axhline(y=1.0,color='k',linestyle='-.')


#     plt.legend(bbox_to_anchor=(0.3, 0.75),ncol=2, fancybox=True, shadow=True,prop={'size':6})
    plt.legend(loc='best')

    if plot_type in ['hist','spec']:     plt.yscale('log')    
    

# f_plot_hist_spec(df_runs,dict_val,'hist')
# f_plot_hist_spec(df_runs,dict_val,'spec')
# f_plot_hist_spec(df_best,dict_val,'grid')



In [20]:
def f_widget_compare(df,labels_list,bkgnd,plot_type):
    df_temp=df[df.label.isin(labels_list)]
    f_plot_hist_spec(df_temp,dict_val,plot_type)
    
interact_manual(f_widget_compare,df=fixed(df_runs),
                labels_list=SelectMultiple(options=df_runs.label.values),bkgnd=fixed(dict_val),
                plot_type=ToggleButtons(options=['hist','spec','grid','spec_relative']))

interactive(children=(SelectMultiple(description='labels_list', options=('keras_0', 'keras_1', 'keras_2', 'ker…

<function __main__.f_widget_compare(df, labels_list, bkgnd, plot_type)>

In [None]:
col_list=['label','chi_1','chi_spec1','num_imgs','chi_2','chi_spec2']
df_runs[col_list]

In [None]:
# f_plot_intensity_grid(images[:20],cols=5)
# f_pixel_intensity(f_transform(images),bins=100)