# Analyze results for 3D images
Jan 21, 2021

In [1]:
import     numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import subprocess as sp
import sys
import os
import glob
import pickle 

from matplotlib.colors import LogNorm, PowerNorm, Normalize
import seaborn as sns

from ipywidgets import *

In [2]:
%matplotlib widget

In [3]:
sys.path.append('/global/u1/v/vpa/project/jpt_notebooks/Cosmology/Cosmo_GAN/repositories/cosmogan_pytorch/code/modules_image_analysis/')
from modules_3d_image_analysis import *

# sys.path.append('/global/u1/v/vpa/project/jpt_notebooks/Cosmology/Cosmo_GAN/repositories/cosmogan_pytorch/code/1_basic_GAN/1_main_code/')
# import post_analysis_pandas as post

In [4]:
### Transformation functions for image pixel values
def f_transform(x):
    return 2.*x/(x + 4.) - 1.

def f_invtransform(s):
    return 4.*(1. + s)/(1. - s)


## Read data

In [5]:
img_size=64
dict1={'64':'/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_from_other_code/pytorch/results/3d/',
      '512':'/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_from_other_code/pytorch/results/512sq/'}


### Read validation data and compute metrics

In [6]:
# Get location for validation data
raw_data_dir='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/raw_data/'
input_files_dict={'64':raw_data_dir+'3d_data/dataset1_smoothing_const_params_64cube_100k/val.npy'}

In [7]:
def f_compute_hist_spect(sample,bins):
    ''' Compute pixel intensity histograms and radial spectrum for 2D arrays
    Input : Image arrays and bins
    Output: dictionary with 5 arrays : Histogram values, errors and bin centers, Spectrum values and errors.
    '''
    ### Compute pixel histogram for row
    gen_hist,gen_err,hist_bins=f_batch_histogram(sample,bins=bins,norm=True,hist_range=None)
    ### Compute spectrum for row
    spec,spec_sdev=f_plot_spectrum_3d(sample,plot=False)

    dict1={'hist_val':gen_hist,'hist_err':gen_err,'hist_bin_centers':hist_bins,'spec_val':spec,'spec_sdev':spec_sdev }
    return dict1

### Extract validation data
num_bkgnd=1000
fname=input_files_dict[str(img_size)]
s_val=np.load(fname,mmap_mode='r')[:num_bkgnd][:,0,:,:]
s_val=f_transform(s_val)
print(s_val.shape)

bins=np.concatenate([np.array([-0.5]),np.arange(0.5,20.5,1),np.arange(20.5,100.5,5),np.arange(100.5,1000.5,50),np.array([2000])]) #bin edges to use
bins=f_transform(bins)   ### scale to (-1,1) 
### Compute histogram and spectrum of raw data 
dict_val=f_compute_hist_spect(s_val,bins)

# del(s_val)

(1000, 64, 64, 64)


  hist_arr=np.array([np.histogram(arr.flatten(), bins=bins, range=(llim,ulim), density=norm) for arr in img_arr]) ## range is important


### Read run data

In [8]:
# main_dir='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_from_other_code/pytorch/results/128sq/'
# results_dir=main_dir+'20201002_064327'

In [9]:
u=interactive(lambda x: dict1[x], x=Select(options=dict1.keys()))
# display(u)

In [13]:
# parent_dir=u.result
parent_dir=dict1[str(img_size)]

dir_lst=[i.split('/')[-1] for i in glob.glob(parent_dir+'202105*')]
q=interactive(lambda x: x, x=Dropdown(options=dir_lst))
display(q)

interactive(children=(Dropdown(description='x', options=('20210514_120637_128cube_bs4_lr0.002_nodes16', '20210…

In [14]:
result=q.result
result_dir=parent_dir+result
print(result_dir)

/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_from_other_code/pytorch/results/3d/20210520_160514_bs8_lr0.0003_nodes8_spec0.1


## Plot Losses

In [15]:
df_metrics=pd.read_pickle(result_dir+'/df_metrics.pkle').astype(np.float64)


In [16]:
df_metrics.tail(10)

Unnamed: 0,step,epoch,Dreal,Dfake,Dfull,G_adv,G_full,spec_loss,hist_loss,spec_chi,hist_chi,gp_loss,fm_loss,D(x),D_G_z1,D_G_z2,time,lr_d,lr_g
20271,20271.0,129.0,0.428968,0.619423,1.048392,3.371859,13.97438,10.602522,-1.536588,10.593342,-1.474523,,,2.462882,-0.499293,-3.333269,0.208633,0.0003,0.0003
20272,20272.0,129.0,0.495261,0.316096,0.811357,1.120684,12.245335,11.124651,-1.540152,11.30213,-1.575233,,,1.377573,-2.441894,-0.650785,0.206193,0.0003,0.0003
20273,20273.0,129.0,0.363567,0.540944,0.904511,3.344988,14.672888,11.3279,-1.333884,11.390173,-2.01274,,,2.490647,-0.563386,-3.305193,0.221536,0.0003,0.0003
20274,20274.0,129.0,0.453146,0.334961,0.788106,1.1688,11.870712,10.701913,-1.118842,10.331396,-1.054985,,,0.960598,-2.433017,-0.711088,0.207787,0.0003,0.0003
20275,20275.0,129.0,0.373776,0.813609,1.187385,2.949316,14.124215,11.174899,-1.203689,10.792004,-1.22346,,,2.73711,-0.114001,-2.891088,0.205564,0.0003,0.0003
20276,20276.0,129.0,0.496138,0.410957,0.907095,1.180687,11.778049,10.597363,-1.470014,10.621691,-0.987059,,,0.699479,-2.246506,-0.783776,0.208628,0.0003,0.0003
20277,20277.0,129.0,0.394582,0.401143,0.795725,3.713576,14.679605,10.966029,-1.17112,10.951794,-1.520035,,,2.401865,-0.937757,-3.682337,0.211184,0.0003,0.0003
20278,20278.0,129.0,0.478134,0.376891,0.855024,1.151755,11.924525,10.77277,-1.378241,10.891121,-1.528887,,,0.834123,-2.383873,-0.691001,0.206419,0.0003,0.0003
20279,20279.0,129.0,0.400159,0.530656,0.930814,3.580388,14.229033,10.648644,-1.813033,10.894617,-0.966315,,,2.239819,-0.34848,-3.549086,0.20979,0.0003,0.0003
20280,,,,,,,,,,10.699663,-1.846734,,,,,,,,


In [17]:
def f_plot_metrics(df,col_list):
    
    plt.figure()
    for key in col_list:
        plt.plot(df_metrics[key],label=key,marker='*',linestyle='')
    plt.legend()
    
#     col_list=list(col_list)
#     df.plot(kind='line',x='step',y=col_list)
    
# f_plot_metrics(df_metrics,['spec_chi','hist_chi'])

interact_manual(f_plot_metrics,df=fixed(df_metrics), col_list=SelectMultiple(options=df_metrics.columns.values))

interactive(children=(SelectMultiple(description='col_list', options=('step', 'epoch', 'Dreal', 'Dfake', 'Dful…

<function __main__.f_plot_metrics(df, col_list)>

In [19]:

chi=df_metrics.quantile(q=0.005,axis=0)['hist_chi']
print(chi)
df_metrics[df_metrics['hist_chi']<=chi].sort_values(by=['hist_chi']).head(10)

-1.9113263028860092


Unnamed: 0,step,epoch,Dreal,Dfake,Dfull,G_adv,G_full,spec_loss,hist_loss,spec_chi,hist_chi,gp_loss,fm_loss,D(x),D_G_z1,D_G_z2,time,lr_d,lr_g
271,271.0,1.0,0.387601,0.352876,0.740477,1.336675,12.173975,10.837299,-2.491022,11.02037,-2.929108,,,1.592813,-2.668342,-1.004593,0.208002,0.0003,0.0003
269,269.0,1.0,0.364427,0.488687,0.853113,1.38505,13.059587,11.674537,-2.533821,11.365255,-2.765854,,,2.253974,-3.474521,-1.089299,0.209395,0.0003,0.0003
274,274.0,1.0,0.499277,0.606604,1.105881,4.213061,16.193663,11.980601,-2.289834,12.587029,-2.552146,,,3.234594,-0.270072,-4.197946,0.209071,0.0003,0.0003
266,266.0,1.0,0.608839,0.625593,1.234432,4.950359,16.390629,11.440269,-2.570421,11.06296,-2.500271,,,3.258773,-0.189043,-4.943007,0.209395,0.0003,0.0003
272,272.0,1.0,0.489977,0.623133,1.11311,3.282914,14.395551,11.112637,-2.463273,11.143387,-2.470031,,,3.087348,-0.160827,-3.233571,0.207109,0.0003,0.0003
197,197.0,1.0,0.587374,0.872213,1.459587,5.268948,16.47319,11.204243,-2.056571,11.261269,-2.401237,,,3.848173,0.443298,-5.263782,0.237236,0.0003,0.0003
273,273.0,1.0,0.372385,0.389949,0.762334,0.992643,11.849177,10.856534,-2.24322,11.116541,-2.320076,,,1.634055,-2.039962,-0.528018,0.208776,0.0003,0.0003
18297,18297.0,117.0,0.323642,0.344675,0.668317,4.393013,14.833994,10.440981,-2.045204,10.587193,-2.313997,,,2.171846,-1.846855,-4.280635,0.21155,0.0003,0.0003
265,265.0,1.0,0.503984,0.388284,0.892268,1.109241,11.912251,10.80301,-2.170639,11.052377,-2.302068,,,1.624729,-2.138512,-0.702841,0.208093,0.0003,0.0003
198,198.0,1.0,0.33899,0.497866,0.836855,1.274011,12.460126,11.186115,-2.391535,11.250569,-2.297518,,,2.083364,-3.77189,-0.945675,0.208253,0.0003,0.0003


In [20]:
# display(df_metrics.sort_values(by=['hist_chi']).head(8))
# display(df_metrics.sort_values(by=['spec_chi']).head(8))

## Read stored chi-squares for images

In [21]:
df=pd.read_pickle(result_dir+'/df_processed.pkle')
df[['epoch','step']]=df[['epoch','step']].astype(int)
df['label']=df.epoch.astype(str)+'-'+df.step.astype(str) # Add label column for plotting

col_list=['label','chi_1','chi_spec1', 'chi_spec3', 'chi_1a', 'chi_1b', 'chi_1c', 'chi_2','chi_imgvar', 'chi_spec2','epoch','step']


In [22]:
def f_slice_df(df,cutoff=0.2,sort_col='chi_1',col_mode='all',head=10,epoch_range=[0,None],display_flag=False):
    ''' View dataframe after slicing
    '''
    col_list=['label','chi_1','chi_spec1', 'chi_spec3', 'chi_1a', 'chi_1b', 'chi_1c', 'chi_2','chi_imgvar', 'chi_spec2','epoch','step']
    if (col_mode=='short'): col_list=['label','chi_1','chi_spec1']
        
    print(epoch_range)
    if epoch_range[1]==None: epoch_range[1]=df.max()['epoch']
    df=df[(df.epoch<=epoch_range[1])&(df.epoch>=epoch_range[0])]

    ## Apply cutoff to keep reasonable chi1 and chispec1
    best_keys=[ 'chi_1', 'chi_spec1']
    q_dict=dict(df.quantile(q=cutoff,axis=0)[best_keys])   
    print(q_dict)
    df=df.query('chi_1 < {0} & chi_spec1 < {1}'.format(q_dict['chi_1'],q_dict['chi_spec1']))
    
    # Sort dataframe
    df1=df[df.epoch>0].sort_values(by=sort_col)
        
    df2=df1.head(head)[col_list]

    if display_flag:   # Display df
        display_cols=['label','chi_1','chi_spec1', 'chi_spec3', 'chi_1a', 'chi_1b', 'chi_1c', 'chi_2','chi_imgvar', 'chi_spec2']
        display(df2)
    
    return df2

In [23]:
w=interactive(f_slice_df,df=fixed(df),
cutoff=widgets.FloatSlider(value=0.2, min=0, max=1.0, step=0.01), 
col_mode=['all','short'], display_flag=widgets.Checkbox(value=False),
head=widgets.IntSlider(value=10,min=1,max=20,step=1),
epoch_range=widgets.IntRangeSlider(value=[0,np.max(df.epoch.values)],min=0,max=np.max(df.epoch.values),step=1),
sort_col=['chi_1','chi_spec1', 'chi_spec3', 'chi_1a', 'chi_1b', 'chi_1c', 'chi_2','chi_imgvar', 'chi_spec2'],
)
display(w)

interactive(children=(FloatSlider(value=0.2, description='cutoff', max=1.0, step=0.01), Dropdown(description='…

In [24]:
df_sliced=w.result
# df_sliced

In [41]:
best_step=[]
df_test=df.copy()
df_test=df[df.epoch>30]
best_step.append(f_slice_df(df_test,cutoff=0.9,sort_col='chi_1',head=4,display_flag=False).step.values)
best_step.append(f_slice_df(df_test,cutoff=0.8,sort_col='chi_1c',head=2,display_flag=False).step.values)
best_step.append(f_slice_df(df_test,cutoff=0.4,sort_col='chi_spec1',head=2,display_flag=False).step.values)
print(best_step)
best_step=np.unique([i for j in best_step for i in j])
print(best_step)

[0, 130]
{'chi_1': 1.7867359786356234, 'chi_spec1': 2.7900369934198794}
[0, 130]
{'chi_1': 1.1870547918490852, 'chi_spec1': 2.0149943869380964}
[0, 130]
{'chi_1': 0.37570629050397747, 'chi_spec1': 0.8467621560386938}
[array([20350, 12680, 15600, 16440]), array([12630, 11360]), array([19160, 19730])]
[11360 12630 12680 15600 16440 19160 19730 20350]


In [42]:
# best_step=[58800]
# best_step=np.arange(32800,32900,10).astype(int)

In [43]:
df_best=df[df.step.isin(best_step)]
print(df_best.shape)
print([(df_best[df_best.step==step].epoch.values[0],df_best[df_best.step==step].step.values[0]) for step in best_step])
# print([(df_best.loc[idx].epoch,df_best.loc[idx].step) for idx in best_idx])

(8, 22)
[(72, 11360), (80, 12630), (81, 12680), (99, 15600), (105, 16440), (122, 19160), (126, 19730), (130, 20350)]


In [44]:
df_best[col_list]


Unnamed: 0,label,chi_1,chi_spec1,chi_spec3,chi_1a,chi_1b,chi_1c,chi_2,chi_imgvar,chi_spec2,epoch,step
1135,72-11360,0.275874,1.621129,50.744595,0.02603,0.249844,0.0,0.000181,9.742078,129.350986,72,11360
1262,80-12630,0.550143,1.123945,49.816907,0.032205,0.517939,0.0,0.000227,11.46139,94.217016,80,12630
1267,81-12680,0.063231,1.298404,46.1659,0.056665,0.006555,1.1e-05,0.00042,4.983218,126.130779,81,12680
1559,99-15600,0.066061,0.539161,46.601562,0.041332,0.024723,6e-06,0.00029,6.920225,62.83526,99,15600
1643,105-16440,0.074339,1.3205,46.508411,0.069915,0.004403,2.1e-05,0.000523,6.037074,84.626558,105,16440
1915,122-19160,0.37129,0.169786,40.521535,0.020852,0.350438,0.0,0.000123,8.675865,16.756051,122,19160
1972,126-19730,0.275077,0.174873,48.292263,0.021613,0.253433,3.1e-05,9.4e-05,14.083271,14.454367,126,19730
2034,130-20350,0.050111,0.618193,44.763954,0.030728,0.019379,3e-06,0.00024,7.23637,30.640469,130,20350


### Plots 2: stored data

In [45]:

def f_plot_hist_spec(df,dict_bkg,plot_type,img_size):

    
    assert plot_type in ['hist','spec','grid','spec_relative','hist_relative'],"Invalid mode %s"%(plot_type)

    if plot_type in ['hist','spec','spec_relative','hist_relative']:     fig=plt.figure(figsize=(6,6))


    for (i,row),marker in zip(df.iterrows(),itertools.cycle('>^*sDHPdpx_')):
        label=row.label
        if plot_type=='hist':
            x1=row.hist_bin_centers
            y1=row.hist_val
            yerr1=row.hist_err
            x1=f_invtransform(x1)
            
            plt.errorbar(x1,y1,yerr1,marker=marker,markersize=5,linestyle='',label=label)
        if plot_type=='spec':
            
            y2=row.spec_val
            yerr2=row.spec_sdev/np.sqrt(row.num_imgs)
            x2=np.arange(len(y2))
            
            plt.fill_between(x2, y2 - yerr2, y2 + yerr2, alpha=0.4)
            plt.plot(x2, y2, marker=marker, linestyle=':',label=label)
            plt.xlim(0,img_size/2)

        if plot_type=='spec_relative':
            
            y2=row.spec_val
            yerr2=row.spec_sdev
            x2=np.arange(len(y2))
            
            ### Reference spectrum
            y1,yerr1=dict_bkg['spec_val'],dict_bkg['spec_sdev']
            y=y2/(1.0*y1)
            ## Variance is sum of variance of both variables, since they are uncorrelated
            
            # delta_r=sqrt(\delta_c2 ^ 2/ c1^2 + \delta_c1^2 * c_2^2 / c_1^4)
            yerr=(np.abs(y))*np.sqrt((yerr1/y1)**2+(yerr2/y2)**2)/np.sqrt(row['num_imgs'])

            plt.fill_between(x2, y - yerr, y + yerr, alpha=0.4)
            plt.plot(x2, y, marker=marker, linestyle=':',label=label)            
            
            
        if plot_type=='hist_relative':

            x2=row.hist_bin_centers
            y2=row.hist_val
            yerr2=row.hist_err
            x2=f_invtransform(x2)
            
#             plt.errorbar(x1,y1,yerr1,marker=marker,markersize=5,linestyle='',label=label)
            
            ### Reference spectrum
            y1,yerr1=dict_bkg['hist_val'],dict_bkg['hist_err']

            y=y2/(1.0*y1)
            ## Variance is sum of variance of both variables, since they are uncorrelated
            
            # delta_r=sqrt(\delta_c2 ^ 2/ c1^2 + \delta_c1^2 * c_2^2 / c_1^4)
            yerr=(np.abs(y))*np.sqrt((yerr1/y1)**2+(yerr2/y2)**2)/np.sqrt(row['num_imgs'])

            plt.fill_between(x2, y - yerr, y + yerr, alpha=0.4)
            plt.plot(x2, y, marker=marker, linestyle=':',label=label)            
            plt.xlim(0,img_size/2)
            plt.ylim(0,5)
            plt.legend()

        if plot_type=='grid':
            images=np.load(row.fname)[:,0,:,:,0]
            print(images.shape)
            f_plot_grid(images[:8],cols=4,fig_size=(8,4))
            
    ### Plot input data
    if plot_type=='hist':
        x,y,yerr=dict_bkg['hist_bin_centers'],dict_bkg['hist_val'],dict_bkg['hist_err']
        x=f_invtransform(x)
        plt.errorbar(x, y,yerr,color='k',linestyle='-',label='bkgnd')   
        plt.title('Pixel Intensity Histogram')
        plt.xscale('symlog',linthreshx=50)
        
    if plot_type=='spec':
        y,yerr=dict_bkg['spec_val'],dict_bkg['spec_sdev']/np.sqrt(num_bkgnd)
        x=np.arange(len(y))
        plt.fill_between(x, y - yerr, y + yerr, color='k',alpha=0.8)
        plt.xlim(0,img_size/2)
        plt.title('Spectrum')
        
    if plot_type=='spec_relative':
        plt.axhline(y=1.0,color='k',linestyle='-.')
        plt.xlim(0,img_size/2)
        plt.ylim(0.5,2)
        plt.legend()
        
    if plot_type=='hist_relative':
        plt.axhline(y=1.0,color='k',linestyle='-.')
        
    plt.legend(bbox_to_anchor=(0.3, 0.75),ncol=2, fancybox=True, shadow=True,prop={'size':6})
    if plot_type in ['hist','spec']:     plt.yscale('log')

# f_plot_hist_spec(df_runs,dict_val,'hist')
# f_plot_hist_spec(df_runs,dict_val,'spec')
# f_plot_hist_spec(df_best,dict_val,'grid')


In [46]:
def f_widget_compare(df,labels_list,bkgnd,plot_type):
    df_temp=df[df.label.isin(labels_list)]
    f_plot_hist_spec(df_temp,dict_val,plot_type,img_size)
    
interact_manual(f_widget_compare,df=fixed(df_best),
                labels_list=SelectMultiple(options=df_best.label.values), 
                bkgnd=fixed(dict_val),plot_type=ToggleButtons(options=['hist','spec','grid','spec_relative','hist_relative']))

interactive(children=(SelectMultiple(description='labels_list', options=('72-11360', '80-12630', '81-12680', '…

<function __main__.f_widget_compare(df, labels_list, bkgnd, plot_type)>

## Plot images

In [94]:
print(s_val.shape)


(1000, 64, 64, 64)


In [95]:
fname='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_from_other_code/pytorch/results/3d/20210123_050147_3d_l0.5_80k/images/gen_img_epoch-28_step-71620.npy'
a1=np.load(fname)

In [96]:
s_val.shape,a1.shape

((1000, 64, 64, 64), (32, 64, 64, 64))

In [97]:
f_plot_grid(a1[6:14,:,:,0],cols=4,fig_size=(4,2))

2 4


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [98]:
f_plot_grid(s_val[100:108,:,:,0],cols=4,fig_size=(4,2))

2 4


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [99]:
df_best[col_list]

Unnamed: 0,label,chi_1,chi_spec1,chi_spec3,chi_1a,chi_1b,chi_1c,chi_2,chi_imgvar,chi_spec2,epoch,step
655,2-6560,0.246885,10.027608,49.063214,0.192645,0.053953,0.000288,0.115025,3.904391,17956.06961,2,6560
2819,11-28200,0.036677,6.01482,49.570766,0.019308,0.014379,0.00299,0.010212,9.884761,1447.230356,11,28200
2920,11-29210,0.248065,6.946046,59.081433,0.241302,0.006456,0.000306,0.089805,33.648975,50.338523,11,29210
3378,13-33790,0.032535,6.31783,50.120442,0.028484,0.002187,0.001864,0.013548,6.696044,1461.858703,13,33790
4015,16-40160,0.032709,5.377288,50.452657,0.027667,0.003498,0.001544,0.013563,10.93214,240.525028,16,40160
4024,16-40250,0.100561,3.0858,43.490086,0.071879,0.0257,0.002983,0.051874,7.51061,71.322752,16,40250
4107,16-41080,0.15936,3.133683,47.506667,0.103007,0.053081,0.003272,0.063929,15.40241,45.373778,16,41080
4489,17-44900,0.023849,4.55496,53.793261,0.017225,0.005775,0.000849,0.004996,17.831019,80.620578,17,44900


In [100]:
dims = [2,3,4,7]
np_x = np.random.uniform(size=dims).astype(np.float32)

In [31]:
90000/(32*32)

87.890625

In [30]:
fname='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/raw_data/3d_data/dataset1_smoothing_const_params_64cube_100k/norm_1_train_val.npy'
np.load(fname,mmap_mode='r').shape


(101251, 1, 64, 64, 64)

In [32]:
! du -hs /global/cfs/cdirs/m3363/vayyar/cosmogan_data/raw_data/3d_data/dataset1_smoothing_const_params_64cube_100k/norm_1_train_val.npy



99G	/global/cfs/cdirs/m3363/vayyar/cosmogan_data/raw_data/3d_data/dataset1_smoothing_const_params_64cube_100k/norm_1_train_val.npy
