## Compare results for multiple results

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

import subprocess as sp
import os
import glob
import sys

import itertools
import time

from ipywidgets import *

In [2]:
%matplotlib notebook

In [3]:
# sys.path.append('/global/u1/v/vpa/project/jpt_notebooks/Cosmology/Cosmo_GAN/repositories/cosmogan_pytorch/code/modules_image_analysis/')
from modules_img_analysis import *

In [4]:
### Transformation functions for image pixel values
def f_transform(x,a):
    return 2.*x/(x + float(a)) - 1.

def f_invtransform(s,a):
    return float(a)*(1. + s)/(1. - s)


In [5]:
def f_compute_hist_spect(sample,bins):
    ''' Compute pixel intensity histograms and radial spectrum for 2D arrays
    Input : Image arrays and bins
    Output: dictionary with 5 arrays : Histogram values, errors and bin centers, Spectrum values and errors.
    '''
    ### Compute pixel histogram for row
    gen_hist,gen_err,hist_bins=f_batch_histogram(sample,bins=bins,norm=True,hist_range=None)
    ### Compute spectrum for row
    spec,spec_sdev=f_plot_spectrum_3d(sample,plot=False)

    dict1={'hist_val':gen_hist,'hist_err':gen_err,'hist_bin_centers':hist_bins,'spec_val':spec,'spec_sdev':spec_sdev }
    return dict1


def f_compute_chisqr(dict_val,dict_sample,img_size):
    '''
    Compute chi-square values for sample w.r.t input images
    Input: 2 dictionaries with 4 keys for histogram and spectrum values and errors
    '''
    ### !!Both pixel histograms MUST have same bins and normalization!
    ### Compute chi-sqr
    ### Used in keras code : np.sum(np.divide(np.power(valhist - samphist, 2.0), valhist))
    ###  chi_sqr :: sum((Obs-Val)^2/(Val))
    
    chisqr_dict={}
    
    try: 
        val_dr=dict_val['hist_val'].copy()
        val_dr[val_dr<=0.]=1.0    ### Avoiding division by zero for zero bins

        sq_diff=(dict_val['hist_val']-dict_sample['hist_val'])**2

        size=len(dict_val['hist_val'])
        l1,l2=int(size*0.3),int(size*0.7)
        keys=['chi_1a','chi_1b','chi_1c','chi_1']
        
        for (key,start,end) in zip(keys,[0,l1,l2,0],[l1,l2,None,None]):  # 4 lists : small, medium, large pixel values and full 
            chisqr_dict.update({key:np.sum(np.divide(sq_diff[start:end],val_dr[start:end]))})

        idx=None  # Choosing the number of histograms to use. Eg : -5 to skip last 5 bins
    #     chisqr_dict.update({'chi_sqr1':})

        chisqr_dict.update({'chi_2':np.sum(np.divide(sq_diff[:idx],1.0))}) ## chi-sqr without denominator division
        chisqr_dict.update({'chi_imgvar':np.sum(dict_sample['hist_err'][:idx])/np.sum(dict_val['hist_err'][:idx])}) ## measures total spread in histograms wrt to input data

        idx=img_size
        spec_diff=(dict_val['spec_val']-dict_sample['spec_val'])**2
        ### computing the spectral loss chi-square
        chisqr_dict.update({'chi_spec1':np.sum(spec_diff[:idx]/dict_sample['spec_val'][:idx]**2)})

        ### computing the spectral loss chi-square
        chisqr_dict.update({'chi_spec2':np.sum(spec_diff[:idx]/dict_sample['spec_sdev'][:idx]**2)})
        
        spec_loss=1.0*np.log(np.mean((dict_val['spec_val'][:idx]-dict_sample['spec_val'][:idx])**2))+1.0*np.log(np.mean((dict_val['spec_sdev'][:idx]-dict_sample['spec_sdev'][:idx])**2))
        chisqr_dict.update({'chi_spec3':spec_loss})
    
    except Exception as e: 
        print(e)
        
        keys=['chi_1a','chi_1b','chi_1c','chi_1','chi_2','chi_imgvar','chi_spec1','chi_spec2']
        chisqr_dict=dict.fromkeys(keys,np.nan)
        pass
    
    return chisqr_dict
    

In [6]:
img_size=64
# img_size=128

kappa=4

In [7]:
val_data_dict={'64':'/p/vast1/lbann/datasets/exagan/portal.nersc.gov/project/m3363/transfer_data_livermore/64cube_dataset/norm_1_train_val.npy',
              '128':''}


In [8]:
### Read validation data
# bins=np.concatenate([np.array([-0.5]),np.arange(0.5,20.5,1),np.arange(20.5,100.5,5),np.arange(100.5,1000.5,50),np.array([2000])]) #bin edges to use
bins=np.concatenate([np.array([-0.5]),np.arange(0.5,100.5,5),np.arange(100.5,300.5,20),np.arange(300.5,1000.5,50),np.array([2000])]) #bin edges to use
bins=f_transform(bins,kappa)   ### scale to (-1,1)

bkgnd_dict={}
num_bkgnd=100

### Extract validation data   
samples=np.load(val_data_dict[str(img_size)],mmap_mode='r')[500:600].copy()[:,0,:,:,:]
dict_val=f_compute_hist_spect(samples,bins)
del samples


In [9]:
# a1=f_invtransform(samples[0],4.0)
# np.max(a1),np.min(a1)

### Add pytorch runs

In [33]:
# parent_dir='/usr/WS2/ayyar1/cosmogan/lbann_work/runs/runs_feb1_2023/ExaGAN/'
parent_dir='/usr/WS2/ayyar1/cosmogan/lbann_work/runs/runs_June19_2023/ExaGAN/'

fldr_lst=glob.glob(parent_dir+'/202306*')

print(fldr_lst)
for i in fldr_lst: 
    print(i.split('/')[-1])

run_fldr=fldr_lst[1]
print(run_fldr)


output_fldr=run_fldr+'/outputs/trainer0/model0/'
file_lst=glob.glob(output_fldr+'sgd.validation.epoch*_gen_img*.npy')
len(file_lst)

['/usr/WS2/ayyar1/cosmogan/lbann_work/runs/runs_June19_2023/ExaGAN/20230621_055125_lbann_cosmo3DGAN_n32_D16_bs64_specloss', '/usr/WS2/ayyar1/cosmogan/lbann_work/runs/runs_June19_2023/ExaGAN/20230621_055144_lbann_cosmo3DGAN_n32_D1_bs64_specloss', '/usr/WS2/ayyar1/cosmogan/lbann_work/runs/runs_June19_2023/ExaGAN/20230621_055226_lbann_cosmo3DGAN_n32_D2_bs64_specloss', '/usr/WS2/ayyar1/cosmogan/lbann_work/runs/runs_June19_2023/ExaGAN/20230620_185238_lbann_cosmo3DGAN_n32_D8_bs64_specloss']
20230621_055125_lbann_cosmo3DGAN_n32_D16_bs64_specloss
20230621_055144_lbann_cosmo3DGAN_n32_D1_bs64_specloss
20230621_055226_lbann_cosmo3DGAN_n32_D2_bs64_specloss
20230620_185238_lbann_cosmo3DGAN_n32_D8_bs64_specloss
/usr/WS2/ayyar1/cosmogan/lbann_work/runs/runs_June19_2023/ExaGAN/20230621_055144_lbann_cosmo3DGAN_n32_D1_bs64_specloss


210

In [34]:

### Load images
df_runs=pd.DataFrame([])

parent_dir='/usr/WS2/ayyar1/cosmogan/lbann_work/runs/runs_feb1_2023/ExaGAN/'


lst=file_lst[::10]
# lst=file_lst[::3]

# lst=file_lst[80:200][::10]
# lst=file_lst[85:95] + file_lst[175:185]
# lst=file_lst[-10:]

# Prediction images 
for count,fname in enumerate(lst[:]):
#     print(fname)
    epoch=int(fname.split('/')[-1].split('.')[3])
    step=int(fname.split('/')[-1].split('.')[5].split('_')[0])
    
    max_epoch=60;min_epoch=30;
    if ((epoch<=max_epoch) & (epoch>=min_epoch)):
        
        prefix='run_%s_%s'%(epoch,step)
        key=prefix
        print(key)

        dict1={'label':key,'fname':fname}
        images=np.load(dict1['fname'])     ### No transform needed here

        images=images[:,0,:,:,:]
        print(images.shape)
        print(np.max(images),np.min(images))

        dict1['num_imgs']=images.shape[0]
        print(dict1)
        ### Compute spectrum and histograms
        dict_sample=f_compute_hist_spect(images,bins)
        ### Compute chi squares
        dict_chisqrs=f_compute_chisqr(dict_val,dict_sample,img_size)

        dict1.update(dict_sample)
        dict1.update(dict_chisqrs)
        del(images)
    #     df_runs=pd.concat([df_runs,pd.DataFrame(dict1,index=[df_runs.shape[0]+1])])
        df_runs=df_runs.append(dict1,ignore_index=True)




run_42_1500
(64, 64, 64, 64)
0.9895401 -0.9999988
{'label': 'run_42_1500', 'fname': '/usr/WS2/ayyar1/cosmogan/lbann_work/runs/runs_June19_2023/ExaGAN/20230621_055144_lbann_cosmo3DGAN_n32_D1_bs64_specloss/outputs/trainer0/model0/sgd.validation.epoch.42.step.1500_gen_img_instance1_activation_output0.npy', 'num_imgs': 64}


  df_runs=df_runs.append(dict1,ignore_index=True)


run_57_2000
(64, 64, 64, 64)
0.985936 -0.99999994
{'label': 'run_57_2000', 'fname': '/usr/WS2/ayyar1/cosmogan/lbann_work/runs/runs_June19_2023/ExaGAN/20230621_055144_lbann_cosmo3DGAN_n32_D1_bs64_specloss/outputs/trainer0/model0/sgd.validation.epoch.57.step.2000_gen_img_instance1_activation_output0.npy', 'num_imgs': 64}


  df_runs=df_runs.append(dict1,ignore_index=True)


In [35]:
## Add a reference set of images

prefix='run_ref'
key=prefix
print(key)

dict1={'label':key,'fname':fname}

images=np.load(val_data_dict[str(img_size)],mmap_mode='r')[16:80].copy()[:,0,:,:,:]  ### No transform needed here
print(images.shape)
print(np.max(images),np.min(images))

dict1['num_imgs']=images.shape[0]
print(dict1)
### Compute spectrum and histograms
dict_sample=f_compute_hist_spect(images,bins)
### Compute chi squares
dict_chisqrs=f_compute_chisqr(dict_val,dict_sample,img_size)

dict1.update(dict_sample)
dict1.update(dict_chisqrs)
del(images)
#     df_runs=pd.concat([df_runs,pd.DataFrame(dict1,index=[df_runs.shape[0]+1])])
df_runs=df_runs.append(dict1,ignore_index=True)



run_ref
(64, 64, 64, 64)
0.9872242 -0.99946105
{'label': 'run_ref', 'fname': '/usr/WS2/ayyar1/cosmogan/lbann_work/runs/runs_June19_2023/ExaGAN/20230621_055144_lbann_cosmo3DGAN_n32_D1_bs64_specloss/outputs/trainer0/model0/sgd.validation.epoch.285.step.10000_gen_img_instance1_activation_output0.npy', 'num_imgs': 64}


  df_runs=df_runs.append(dict1,ignore_index=True)


In [36]:
dict1.keys()

dict_keys(['label', 'fname', 'num_imgs', 'hist_val', 'hist_err', 'hist_bin_centers', 'spec_val', 'spec_sdev', 'chi_1a', 'chi_1b', 'chi_1c', 'chi_1', 'chi_2', 'chi_imgvar', 'chi_spec1', 'chi_spec2', 'chi_spec3'])

In [37]:
df_runs.columns

Index(['label', 'fname', 'num_imgs', 'hist_val', 'hist_err',
       'hist_bin_centers', 'spec_val', 'spec_sdev', 'chi_1a', 'chi_1b',
       'chi_1c', 'chi_1', 'chi_2', 'chi_imgvar', 'chi_spec1', 'chi_spec2',
       'chi_spec3'],
      dtype='object')

In [38]:
col_list=['label', 'chi_1', 'chi_1a', 'chi_spec1', 'chi_spec3','chi_1b', 'chi_1c', 'chi_2','chi_imgvar', 'chi_spec2', 'hist_bin_centers', 'hist_err', 'hist_val','spec_sdev', 'spec_val','num_imgs','fname']

df_runs=df_runs.reindex(columns=col_list)


In [39]:
df_runs

Unnamed: 0,label,chi_1,chi_1a,chi_spec1,chi_spec3,chi_1b,chi_1c,chi_2,chi_imgvar,chi_spec2,hist_bin_centers,hist_err,hist_val,spec_sdev,spec_val,num_imgs,fname
0,run_42_1500,0.034114,0.010306,11.242231,46.131924,0.021983,0.001825,0.000922,0.918818,842.613898,"[-1.0317460317460316, -0.30994152046783624, 0....","[0.00530730300939742, 0.002295740327568192, 0....","[1.129969093948603, 0.42726281024515633, 0.057...","[2576181.238744982, 742977.5034487662, 441669....","[10365672.104389768, 4767106.650790955, 252045...",64,/usr/WS2/ayyar1/cosmogan/lbann_work/runs/runs_...
1,run_57_2000,0.061704,0.026372,15.697793,50.028728,0.033816,0.001516,0.011576,0.603247,4170.046041,"[-1.0317460317460316, -0.30994152046783624, 0....","[0.002534534398129289, 0.0011621817037410307, ...","[1.2009654566645624, 0.3873539809137582, 0.060...","[1629698.1947711387, 582062.4759182694, 319483...","[7783063.813670559, 4293477.673962602, 2405424...",64,/usr/WS2/ayyar1/cosmogan/lbann_work/runs/runs_...
2,run_ref,0.004049,0.001507,0.037544,46.354776,0.001157,0.001385,0.000399,1.262765,2.682891,"[-1.0317460317460316, -0.30994152046783624, 0....","[0.008657245242647028, 0.003950801014530303, 0...","[1.1280816905200484, 0.43271128199994563, 0.04...","[2604600.872121751, 1018029.2442484308, 504096...","[9114044.08325639, 4314889.871858945, 2269621....",64,/usr/WS2/ayyar1/cosmogan/lbann_work/runs/runs_...


### Compare chi-squares

In [40]:
chi_keys=['label','chi_1','chi_spec1', 'num_imgs', 'chi_1a', 'chi_1b', 'chi_1c', 'chi_2','chi_imgvar', 'chi_spec2','chi_spec3']
# df_runs[chi_keys].sort_values(by=['chi_1'])
# df_runs[chi_keys].sort_values(by=['chi_spec1'])
df_runs[chi_keys]

Unnamed: 0,label,chi_1,chi_spec1,num_imgs,chi_1a,chi_1b,chi_1c,chi_2,chi_imgvar,chi_spec2,chi_spec3
0,run_42_1500,0.034114,11.242231,64,0.010306,0.021983,0.001825,0.000922,0.918818,842.613898,46.131924
1,run_57_2000,0.061704,15.697793,64,0.026372,0.033816,0.001516,0.011576,0.603247,4170.046041,50.028728
2,run_ref,0.004049,0.037544,64,0.001507,0.001157,0.001385,0.000399,1.262765,2.682891,46.354776


## Plot

In [41]:
def f_plot_1d_sum(arr):
    
    # Ensure 4D array: samples,x,y,z
    assert len(arr.shape)==4, "Array must have only 4 dimensions %s. You have "%(arr.shape[0])
    assert np.min(arr)>=0, "Array has negative values. Need to be all positive for sum. min:%s\t max:%s"%(np.min(arr),np.max(arr))

    num_images=arr.shape[0]

    if num_images> 30:
        print("Too many images. Need less than or equal to 15",arr.shape)
        raise SystemExit
        
    # Get array summed along y and z axes
    a1=np.sum(arr,axis=(2,3))
    
    fig=plt.figure()
    fig.add_subplot(1,1,1)
    for count,(i,marker) in enumerate(zip(arr,itertools.cycle('>^*sDHPdpx_'))):
        plt.plot(a1[count],label=str(count),marker=marker)
        
    
    plt.legend()
    plt.show()
    
def f_print_sdev_1d(arr):
    
    # c1=np.sum(arr,axis=(2,3))
    # sd=np.std(c1,axis=0)
    
    # Compute Std deviation among images. img[idx,x,y,z]
    ## axis=0 because you need variation over image index 
    sd1=np.std(arr,axis=0)[:,0,0]/np.sum(arr)
    sd2=np.std(arr,axis=0)[0,:,0]/np.sum(arr)
    sd3=np.std(arr,axis=0)[0,0,:]/np.sum(arr)
    
    return np.sum(sd1),np.sum(sd2),np.sum(sd3)



In [42]:
def f_plot_hist_spec(df,dict_val,plot_type,img_size):    
    
    assert plot_type in ['hist','spec','spec_relative','grid_2dslice','grid_zsum','1d_sum','1d_sdev'],"Invalid mode %s"%(plot_type)

    
    if plot_type in ['hist','spec','spec_relative']:     fig=plt.figure(figsize=(6,6))
    
    for (i,row),marker in zip(df.iterrows(),itertools.cycle('>^*sDHPdpx_')):
        label=row.label
        if plot_type=='hist':
            x1=row.hist_bin_centers
            y1=row.hist_val
            yerr1=row.hist_err
            x1=f_invtransform(x1,kappa)
            
            plt.errorbar(x1,y1,yerr1,marker=marker,markersize=5,linestyle='',label=label)
            
        if plot_type=='spec':
            
            y=row.spec_val
            yerr=row.spec_sdev
            x=np.arange(len(y))
            # y=x**2*y; yerr=x**2*yerr ## Plot k^2 P(y)
            
            plt.fill_between(x, y - yerr, y + yerr, alpha=0.4)
            plt.plot(x, y, marker=marker, linestyle=':',label=label)

        if plot_type=='spec_relative':
            
            y2=row.spec_val
            yerr2=row.spec_sdev
            x2=np.arange(len(y2))
            
            dict_bkg=dict_val


            ### Reference spectrum
            y1,yerr1=dict_bkg['spec_val'],dict_bkg['spec_sdev']
            y=y2/(1.0*y1)
            ## Variance is sum of variance of both variables, since they are uncorrelated
            
            # delta_r=sqrt(\delta_c2 ^ 2/ c1^2 + \delta_c1^2 * c_2^2 / c_1^4)
            yerr=(np.abs(y))*np.sqrt((yerr1/y1)**2+(yerr2/y2)**2)/np.sqrt(row.num_imgs)
            
            plt.fill_between(x2, y - yerr, y + yerr, alpha=0.4)
            plt.plot(x2, y, marker=marker, linestyle=':',label=label)
            plt.xlim(0,img_size)
            plt.legend()
            plt.title("relative spectrum")
        
        if plot_type=='grid_2dslice':
            images=np.load(row.fname)[:,0,:,:,img_size//2]
            f_plot_grid(images[:8],cols=4,fig_size=(8,4))
              

        if plot_type=='grid_zsum':
            images=np.load(row.fname)[:,0,:,:,:]
            images=np.sum(f_invtransform(images,kappa),axis=3)
            f_plot_grid(images[:8],cols=4,fig_size=(8,4))

        if plot_type=='1d_sum':
            images=np.load(row.fname)[:,0,:,:,:]
            f_plot_1d_sum(f_invtransform(images[:8],kappa))

        if plot_type=='1d_sdev':
            images=np.load(row.fname)[:,0,:,:,:]
            print(f_print_sdev_1d(f_invtransform(images[:8],kappa)))               
            
            
    ### Plot input data
    dict_bkg=dict_val
    if plot_type=='hist':
        x,y,yerr=dict_bkg['hist_bin_centers'],dict_bkg['hist_val'],dict_bkg['hist_err']
        x=f_invtransform(x,kappa)
        plt.errorbar(x, y,yerr,color='k',linestyle='-',label='bkgnd')   
        plt.title('Pixel Intensity Histogram')
#         plt.xscale('symlog',linthreshx=50)
        plt.xscale('log')

    if plot_type=='spec':
        y,yerr=dict_bkg['spec_val'],dict_bkg['spec_sdev']

        x=np.arange(len(y))
        # y=x**2*y; yerr=x**2*yerr ## Plot k^2 P(y)
        plt.fill_between(x, y - yerr, y + yerr, color='k',alpha=0.8)
        plt.title('Spectrum')
        plt.xlim(0,img_size/2)


    if plot_type=='spec_relative':
        plt.axhline(y=1.0,color='k',linestyle='-.')
        plt.title("relative spectrum")
        plt.xlim(0,img_size/2)
        plt.ylim(0.5,2)    

    if plot_type in ['hist','spec']:     
        plt.yscale('log')
    plt.legend(bbox_to_anchor=(0.5, 0.75),ncol=2, fancybox=True, shadow=True,prop={'size':6})

    
    plt.show()
    
# f_plot_hist_spec(df_runs,dict_val,'hist',img_size)
# f_plot_hist_spec(df_runs,dict_val,'spec',img_size)
# f_plot_hist_spec(df_runs,dict_val,'spec_relative',img_size)
# f_plot_hist_spec(df_runs,dict_val,'grid',img_size)


## Widget

In [43]:
def f_widget_compare(df,labels_list,bkgnd,plot_type):
    df_temp=df[df.label.isin(labels_list)]
    f_plot_hist_spec(df_temp,bkgnd,plot_type,img_size)

# df_runs=df_runs.sort_values(by=['chi_1'])
# df_runs=df_runs.sort_values(by=['chi_spec3'])

interact_manual(f_widget_compare,df=fixed(df_runs),
                labels_list=SelectMultiple(options=df_runs.label.values), 
                img_size=fixed(img_size),
                bkgnd=fixed(dict_val),plot_type=ToggleButtons(options=['hist','spec','spec_relative','grid_2dslice','grid_zsum','1d_sum','1d_sdev']))


interactive(children=(SelectMultiple(description='labels_list', options=('run_42_1500', 'run_57_2000', 'run_re…

<function __main__.f_widget_compare(df, labels_list, bkgnd, plot_type)>

In [44]:
col_list=['label','chi_1','chi_spec1','num_imgs','chi_2','chi_spec2','chi_spec3']
df_runs[col_list]

Unnamed: 0,label,chi_1,chi_spec1,num_imgs,chi_2,chi_spec2,chi_spec3
0,run_42_1500,0.034114,11.242231,64,0.000922,842.613898,46.131924
1,run_57_2000,0.061704,15.697793,64,0.011576,4170.046041,50.028728
2,run_ref,0.004049,0.037544,64,0.000399,2.682891,46.354776


In [23]:

# label_lst=['run_114_4000']
# df=df_runs[df_runs.label.isin(label_lst)]
# df=df_runs[4:10]

# f_plot_hist_spec(df,dict_val,'spec_relative',img_size)
# f_plot_hist_spec(df,dict_val,'hist',img_size)
# f_plot_hist_spec(df,dict_val,'grid',img_size)
