# Analyze results
Aug 28, 2020

In [1]:
import     numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import subprocess as sp
import sys
import os
import glob
import pickle 

from matplotlib.colors import LogNorm, PowerNorm, Normalize
import seaborn as sns

In [2]:
from ipywidgets import *

In [3]:
%matplotlib widget

In [4]:
sys.path.append('/global/u1/v/vpa/project/jpt_notebooks/Cosmology/Cosmo_GAN/repositories/cosmogan_pytorch/code/modules_image_analysis/')
from modules_img_analysis import *

In [5]:
sys.path.append('/global/u1/v/vpa/project/jpt_notebooks/Cosmology/Cosmo_GAN/repositories/cosmogan_pytorch/code/1_basic_GAN/1_main_code/')
import post_analysis_pandas as post


In [6]:
### Transformation functions for image pixel values
def f_transform(x):
    return 2.*x/(x + 4.) - 1.

def f_invtransform(s):
    return 4.*(1. + s)/(1. - s)


In [7]:
# main_dir='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_from_other_code/pytorch/results/128sq/'
# results_dir=main_dir+'20201002_064327'

In [8]:
img_size=128

### Get validation data

In [9]:
# Get location for validation data
raw_data_dir='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/raw_data/'
input_files_dict={'128':raw_data_dir+'128_square/dataset_2_smoothing_200k/norm_1_train_val.npy',
                 '512':raw_data_dir+'512_square/dataset1_smoothing_single_universe/norm_1_train_val.npy'}

In [10]:
def f_compute_hist_spect(sample,bins):
    ''' Compute pixel intensity histograms and radial spectrum for 2D arrays
    Input : Image arrays and bins
    Output: dictionary with 5 arrays : Histogram values, errors and bin centers, Spectrum values and errors.
    '''
    ### Compute pixel histogram for row
    gen_hist,gen_err,hist_bins=f_batch_histogram(sample,bins=bins,norm=True,hist_range=None)
    ### Compute spectrum for row
    spec,spec_sdev=f_compute_spectrum(sample,plot=False)

    dict1={'hist_val':gen_hist,'hist_err':gen_err,'hist_bin_centers':hist_bins,'spec_val':spec,'spec_sdev':spec_sdev }
    return dict1

### Extract validation data
num_bkgnd=1000
fname=input_files_dict[str(img_size)]
s_val=np.load(fname,mmap_mode='r')[:num_bkgnd][:,0,:,:]
print(s_val.shape)

bins=np.concatenate([np.array([-0.5]),np.arange(0.5,20.5,1),np.arange(20.5,100.5,5),np.arange(100.5,1000.5,50),np.array([2000])]) #bin edges to use
bins=f_transform(bins)   ### scale to (-1,1) 
### Compute histogram and spectrum of raw data 
dict_val=f_compute_hist_spect(s_val,bins)

(1000, 128, 128)


## Read data

In [11]:
dict1={'128':'/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_from_other_code/pytorch/results/128sq/',
      '512':'/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_from_other_code/pytorch/results/512sq/'}

u=interactive(lambda x: dict1[x], x=Select(options=dict1.keys()))
# display(u)


In [12]:
# parent_dir=u.result
parent_dir=dict1[str(img_size)]

dir_lst=[i.split('/')[-1] for i in glob.glob(parent_dir+'20*')]
w=interactive(lambda x: x, x=Dropdown(options=dir_lst))
display(w)

interactive(children=(Dropdown(description='x', options=('20210415_140527_gclip1.0', '20210415_180935_cgan_opt…

In [13]:
result=w.result
result_dir=parent_dir+result
print(result_dir)

/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_from_other_code/pytorch/results/128sq/20210419_131244_2dgan


## Plot Losses

In [14]:
df_metrics=pd.read_pickle(result_dir+'/df_metrics.pkle').astype(np.float64)


In [15]:
df_metrics.tail(10)

Unnamed: 0,step,epoch,Dreal,Dfake,Dfull,G_adv,G_full,spec_loss,hist_loss,spec_chi,hist_chi,gp_loss,fm_loss,D(x),D_G_z1,D_G_z2,time
9611,9611.0,36.0,0.094781,0.205753,0.300534,3.367013,84.038254,80.659744,-0.778411,84.469597,-0.957107,,0.011498,3.516963,-2.800541,-3.315242,0.319989
9612,9612.0,36.0,0.146899,0.14991,0.296809,3.937065,86.159134,82.206635,-0.797706,84.584946,-0.610035,,0.015433,3.569234,-3.576746,-3.900607,0.320215
9613,9613.0,36.0,0.170224,0.183177,0.353401,3.902407,82.306206,78.381416,-0.787425,84.968895,-0.752601,,0.022385,4.170608,-3.93361,-3.865974,0.319893
9614,9614.0,36.0,0.172574,0.212011,0.384584,4.109015,88.154381,84.035568,-0.685999,85.999809,-0.767048,,0.009795,4.465695,-3.099623,-4.077849,0.347633
9615,9615.0,36.0,0.152319,0.170248,0.322567,3.820232,87.730637,83.894592,-0.750823,85.246086,-0.690179,,0.015805,3.762444,-3.33974,-3.787165,0.320646
9616,9616.0,36.0,0.197281,0.149218,0.3465,3.494235,83.992386,80.458778,-0.768591,84.975052,-0.550982,,0.039378,2.946738,-3.480665,-3.440476,0.32001
9617,9617.0,36.0,0.269565,0.196981,0.466546,3.965628,83.000427,79.017067,-0.848403,84.019646,-0.762404,,0.017731,3.131191,-2.634433,-3.933103,0.319901
9618,9618.0,36.0,0.154533,0.176233,0.330766,3.609898,82.292664,78.623657,-0.777622,84.462929,-0.637185,,0.059107,3.451602,-3.589351,-3.566309,0.320061
9619,9619.0,36.0,0.195663,0.140917,0.33658,3.141366,84.102867,80.936607,-0.848025,83.381958,-0.934803,,0.024891,3.114254,-3.116623,-3.064414,0.320366
9620,,,,,,,,,,85.331314,-0.931842,,,,,,


In [16]:
def f_plot_metrics(df,col_list):
    
    plt.figure()
    for key in col_list:
        plt.plot(df_metrics[key],label=key,marker='*',linestyle='')
    plt.legend()
    
#     col_list=list(col_list)
#     df.plot(kind='line',x='step',y=col_list)
    
# f_plot_metrics(df_metrics,['spec_chi','hist_chi'])

interact_manual(f_plot_metrics,df=fixed(df_metrics), col_list=SelectMultiple(options=df_metrics.columns.values))

interactive(children=(SelectMultiple(description='col_list', options=('step', 'epoch', 'Dreal', 'Dfake', 'Dful…

<function __main__.f_plot_metrics(df, col_list)>

In [17]:
# import matplotlib
# import ipympl
# ipympl.__version__,matplotlib.__version__

In [18]:

chi=df_metrics.quantile(q=0.005,axis=0)['hist_chi']
print(chi)
df_metrics[df_metrics['hist_chi']<=chi].sort_values(by=['hist_chi']).head(10)

-0.7845160141587257


Unnamed: 0,step,epoch,Dreal,Dfake,Dfull,G_adv,G_full,spec_loss,hist_loss,spec_chi,hist_chi,gp_loss,fm_loss,D(x),D_G_z1,D_G_z2,time
9365,9365.0,36.0,0.19539,0.641016,0.836406,11.492018,93.650291,82.153015,-1.083248,84.595879,-1.258723,,0.005253,4.304197,-0.396628,-11.491996,0.320507
9527,9527.0,36.0,0.184432,0.130057,0.314489,3.365751,83.02771,79.633148,-0.642789,84.4049,-1.198606,,0.02881,2.904001,-3.365993,-3.3013,0.320474
9370,9370.0,36.0,0.371221,0.763903,1.135124,7.973608,92.787796,84.81263,-1.078289,84.549675,-1.181164,,0.00156,10.484652,-1.169419,-7.972972,0.3198
9356,9356.0,35.0,0.185262,0.297468,0.48273,3.94678,87.448616,83.492729,-0.953964,85.422134,-1.179091,,0.009106,2.469711,-2.687454,-3.91981,0.320806
9369,9369.0,36.0,0.240891,0.155975,0.396866,3.238089,86.41861,83.109131,-1.07302,82.878769,-1.124571,,0.071389,6.964556,-5.023929,-3.110277,0.320179
9371,9371.0,36.0,0.217614,0.164694,0.382308,7.378116,89.323051,81.892075,-0.942388,84.480179,-1.101773,,0.052861,6.07032,-4.347625,-7.374741,0.338324
9367,9367.0,36.0,0.335894,4.396216,4.73211,14.239371,94.986954,80.739243,-0.987974,82.074203,-1.09959,,0.008339,11.424485,4.589842,-14.239365,0.319945
119,119.0,0.0,0.183446,0.306032,0.489478,5.408072,92.560768,87.134033,-0.004542,87.243546,-1.090259,,0.018659,2.30266,-1.587834,-5.40176,0.318244
9355,9355.0,35.0,0.135629,0.242011,0.37764,4.340821,83.154678,78.812157,-0.962928,84.007339,-1.089805,,0.001703,2.875203,-2.420212,-4.316606,0.320501
99,99.0,0.0,0.387929,0.405484,0.793413,6.329274,93.511597,87.166138,0.955763,87.286995,-1.082433,,0.016181,1.48196,-1.034299,-6.32682,0.317945


In [19]:
display(df_metrics.sort_values(by=['hist_chi']).head(8))
display(df_metrics.sort_values(by=['spec_chi']).head(8))

Unnamed: 0,step,epoch,Dreal,Dfake,Dfull,G_adv,G_full,spec_loss,hist_loss,spec_chi,hist_chi,gp_loss,fm_loss,D(x),D_G_z1,D_G_z2,time
9365,9365.0,36.0,0.19539,0.641016,0.836406,11.492018,93.650291,82.153015,-1.083248,84.595879,-1.258723,,0.005253,4.304197,-0.396628,-11.491996,0.320507
9527,9527.0,36.0,0.184432,0.130057,0.314489,3.365751,83.02771,79.633148,-0.642789,84.4049,-1.198606,,0.02881,2.904001,-3.365993,-3.3013,0.320474
9370,9370.0,36.0,0.371221,0.763903,1.135124,7.973608,92.787796,84.81263,-1.078289,84.549675,-1.181164,,0.00156,10.484652,-1.169419,-7.972972,0.3198
9356,9356.0,35.0,0.185262,0.297468,0.48273,3.94678,87.448616,83.492729,-0.953964,85.422134,-1.179091,,0.009106,2.469711,-2.687454,-3.91981,0.320806
9369,9369.0,36.0,0.240891,0.155975,0.396866,3.238089,86.41861,83.109131,-1.07302,82.878769,-1.124571,,0.071389,6.964556,-5.023929,-3.110277,0.320179
9371,9371.0,36.0,0.217614,0.164694,0.382308,7.378116,89.323051,81.892075,-0.942388,84.480179,-1.101773,,0.052861,6.07032,-4.347625,-7.374741,0.338324
9367,9367.0,36.0,0.335894,4.396216,4.73211,14.239371,94.986954,80.739243,-0.987974,82.074203,-1.09959,,0.008339,11.424485,4.589842,-14.239365,0.319945
119,119.0,0.0,0.183446,0.306032,0.489478,5.408072,92.560768,87.134033,-0.004542,87.243546,-1.090259,,0.018659,2.30266,-1.587834,-5.40176,0.318244


Unnamed: 0,step,epoch,Dreal,Dfake,Dfull,G_adv,G_full,spec_loss,hist_loss,spec_chi,hist_chi,gp_loss,fm_loss,D(x),D_G_z1,D_G_z2,time
7736,7736.0,29.0,0.237041,0.218125,0.455166,4.598262,86.429573,81.827034,-0.013113,76.317108,0.090414,,0.00428,5.541838,-3.061221,-4.580764,0.320756
7092,7092.0,27.0,0.159562,0.161832,0.321394,3.25527,91.43293,88.138107,0.193399,76.664986,0.704638,,0.039549,3.589399,-3.72874,-3.189087,0.320607
7125,7125.0,27.0,0.340456,0.135513,0.475969,2.554361,83.993698,81.432213,0.108783,76.752625,0.309127,,0.007125,1.700259,-4.133356,-2.417646,0.320209
7130,7130.0,27.0,0.190389,0.204833,0.395222,5.292545,89.707703,84.408463,0.110825,76.846245,0.166334,,0.006699,3.989639,-2.358533,-5.281581,0.319875
6440,6440.0,24.0,0.16834,0.150764,0.319104,3.656404,88.155121,84.418945,0.228582,76.869576,0.415523,,0.079771,3.244243,-3.454065,-3.609424,0.320537
7757,7757.0,29.0,0.255494,0.128293,0.383787,4.783778,87.828239,83.020592,0.213736,77.04612,0.243027,,0.023869,2.088441,-5.311906,-4.761963,0.319964
8530,8530.0,32.0,0.29396,0.171942,0.465902,4.163918,85.80481,81.598381,0.082361,77.098511,0.665942,,0.04251,2.690846,-2.564223,-4.127306,0.319916
7400,7400.0,28.0,0.171987,0.182566,0.354554,3.685335,84.363274,80.67054,0.17456,77.196091,0.489405,,0.007401,2.571205,-3.521571,-3.647216,0.320096


## Read stored chi-squares for images

In [20]:
df=pd.read_pickle(result_dir+'/df_processed.pkle')
df[['epoch','step']]=df[['epoch','step']].astype(int)
df['label']=df.epoch.astype(str)+'-'+df.step.astype(str) # Add label column for plotting

col_list=['label','chi_1','chi_spec1', 'chi_spec3', 'chi_1a', 'chi_1b', 'chi_1c', 'chi_2','chi_imgvar', 'chi_spec2','epoch','step']


In [21]:
def f_slice_df(df,cutoff=0.2,sort_col='chi_1',col_mode='all',head=10,epoch_range=[0,None],display_flag=False):
    ''' View dataframe after slicing
    '''
    col_list=['label','chi_1','chi_spec1', 'chi_spec3', 'chi_1a', 'chi_1b', 'chi_1c', 'chi_2','chi_imgvar', 'chi_spec2','epoch','step']
    if (col_mode=='short'): col_list=['label','chi_1','chi_spec1']
        
    print(epoch_range)
    if epoch_range[1]==None: epoch_range[1]=df.max()['epoch']
    df=df[(df.epoch<=epoch_range[1])&(df.epoch>=epoch_range[0])]

    ## Apply cutoff to keep reasonable chi1 and chispec1
    best_keys=[ 'chi_1', 'chi_spec1']
    q_dict=dict(df.quantile(q=cutoff,axis=0)[best_keys])   
    print(q_dict)
    df=df.query('chi_1 < {0} & chi_spec1 < {1}'.format(q_dict['chi_1'],q_dict['chi_spec1']))
    
    # Sort dataframe
    df1=df[df.epoch>0].sort_values(by=sort_col)
        
    df2=df1.head(head)[col_list]

    if display_flag:   # Display df
        display_cols=['label','chi_1','chi_spec1', 'chi_spec3', 'chi_1a', 'chi_1b', 'chi_1c', 'chi_2','chi_imgvar', 'chi_spec2']
        display(df2)
    
    return df2

In [22]:
w=interactive(f_slice_df,df=fixed(df),
cutoff=widgets.FloatSlider(value=0.2, min=0, max=1.0, step=0.01), 
col_mode=['all','short'], display_flag=widgets.Checkbox(value=False),
head=widgets.IntSlider(value=10,min=1,max=20,step=1),
epoch_range=widgets.IntRangeSlider(value=[0,np.max(df.epoch.values)],min=0,max=np.max(df.epoch.values),step=1),
sort_col=['chi_1','chi_spec1', 'chi_spec3', 'chi_1a', 'chi_1b', 'chi_1c', 'chi_2','chi_imgvar', 'chi_spec2'],
)
display(w)

interactive(children=(FloatSlider(value=0.2, description='cutoff', max=1.0, step=0.01), Dropdown(description='…

In [23]:
df_sliced=w.result
# df_sliced

In [24]:
best_step=[]
best_step.append(f_slice_df(df,cutoff=0.4,sort_col='chi_1',head=4,display_flag=False).step.values)
best_step.append(f_slice_df(df,cutoff=0.8,sort_col='chi_1c',head=2,display_flag=False).step.values)
best_step.append(f_slice_df(df,cutoff=0.4,sort_col='chi_spec1',head=2,display_flag=False).step.values)
print(best_step)
best_step=np.unique([i for j in best_step for i in j])
print(best_step)

[0, None]
{'chi_1': 0.11736039841596267, 'chi_spec1': 4.8352293345977335}
[0, 37]
{'chi_1': 0.7877911538413561, 'chi_spec1': 12.229480308735878}
[0, 37]
{'chi_1': 0.11736039841596267, 'chi_spec1': 4.8352293345977335}
[array([7950, 6530, 8980, 3630]), array([ 680, 7040]), array([8950, 9200])]
[ 680 3630 6530 7040 7950 8950 8980 9200]


In [25]:
# best_step=[58800]
# best_step=np.arange(32800,32900,10).astype(int)

In [26]:
df_best=df[df.step.isin(best_step)]
print(df_best.shape)
print([(df_best[df_best.step==step].epoch.values[0],df_best[df_best.step==step].step.values[0]) for step in best_step])
# print([(df_best.loc[idx].epoch,df_best.loc[idx].step) for idx in best_idx])

(8, 22)
[(2, 680), (13, 3630), (25, 6530), (27, 7040), (30, 7950), (34, 8950), (34, 8980), (35, 9200)]


In [27]:
df_best[col_list]

Unnamed: 0,label,chi_1,chi_spec1,chi_spec3,chi_1a,chi_1b,chi_1c,chi_2,chi_imgvar,chi_spec2,epoch,step
67,2-680,0.254132,10.876071,36.796163,0.249326,0.004806,0.0,0.003537,4.214662,651.898836,2,680
362,13-3630,0.041306,3.772322,30.701756,0.035785,0.005521,0.0,0.000799,11.948332,232.654322,13,3630
652,25-6530,0.040459,3.588964,29.810063,0.035653,0.004806,0.0,0.0001,7.933197,200.50569,25,6530
703,27-7040,0.047788,2.764903,32.389032,0.042577,0.005211,0.0,0.000514,9.466738,112.441466,27,7040
794,30-7950,0.036399,1.678053,30.543225,0.031594,0.004806,0.0,9.6e-05,8.094815,77.799306,30,7950
894,34-8950,0.051711,1.421375,28.253772,0.046905,0.004806,0.0,0.000174,4.714734,83.412643,34,8950
897,34-8980,0.040794,2.259233,33.969505,0.035988,0.004806,0.0,9.6e-05,7.623771,79.95425,34,8980
919,35-9200,0.065019,1.506052,26.545558,0.060213,0.004806,0.0,0.000361,5.501958,78.084978,35,9200


### Plots 2: stored data

In [28]:
def f_plot_hist_spec(df,dict_bkg,plot_type):
    
    img_size=128
    assert plot_type in ['hist','spec','spec_relative','grid'],"Invalid mode %s"%(plot_type)

    if plot_type in ['hist','spec','spec_relative']:     fig=plt.figure(figsize=(6,6))
    
    for (i,row),marker in zip(df.iterrows(),itertools.cycle('>^*sDHPdpx_')):
        label=row.label
        if plot_type=='hist':
            x1=row.hist_bin_centers
            y1=row.hist_val
            yerr1=row.hist_err
            x1=f_invtransform(x1)
            
            plt.errorbar(x1,y1,yerr1,marker=marker,markersize=5,linestyle='',label=label)
        if plot_type=='spec':
            
            y2=row.spec_val
            yerr2=row.spec_sdev/np.sqrt(row.num_imgs)
            x2=np.arange(len(y2))
            
            plt.fill_between(x2, y2 - yerr2, y2 + yerr2, alpha=0.4)
            plt.plot(x2, y2, marker=marker, linestyle=':',label=label)

        if plot_type=='spec_relative':
            
            y2=row.spec_val
            yerr2=row.spec_sdev
            x2=np.arange(len(y2))
            
            ### Reference spectrum
            y1,yerr1=dict_bkg['spec_val'],dict_bkg['spec_sdev']
            y=y2/(1.0*y1)
            ## Variance is sum of variance of both variables, since they are uncorrelated
            
            # delta_r= |r| * sqrt(delta_a/a)^2 +(\delta_b/b)^2) / \sqrt(N)
            yerr=(np.abs(y))*np.sqrt((yerr1/y1)**2+(yerr2/y2)**2)/np.sqrt(row.num_imgs)

            plt.fill_between(x2, y - yerr, y + yerr, alpha=0.4)
            plt.plot(x2, y, marker=marker, linestyle=':',label=label)
            plt.xlim(0,img_size/2)
            plt.ylim(0.5,2)
            plt.legend()
            
        if plot_type=='grid':
            images=np.load(row.fname)[:,0,:,:]
            print(images.shape)
            f_plot_grid(images[:18],cols=6,fig_size=(10,5))
            
    ### Plot input data
    if plot_type=='hist':
        x,y,yerr=dict_bkg['hist_bin_centers'],dict_bkg['hist_val'],dict_bkg['hist_err']
        x=f_invtransform(x)
        plt.errorbar(x, y,yerr,color='k',linestyle='-',label='bkgnd')   
        plt.title('Pixel Intensity Histogram')
        plt.xscale('symlog',linthreshx=50)
    
    if plot_type=='spec':
        y,yerr=dict_bkg['spec_val'],dict_bkg['spec_sdev']/np.sqrt(num_bkgnd)
        x=np.arange(len(y))
        plt.fill_between(x, y - yerr, y + yerr, color='k',alpha=0.8)
        plt.title('Spectrum')
        
    if plot_type=='spec_relative':
        plt.axhline(y=1.0,color='k',linestyle='-.')
    
    plt.legend(bbox_to_anchor=(0.3, 0.75),ncol=2, fancybox=True, shadow=True,prop={'size':6})
    if plot_type in ['hist','spec']:     plt.yscale('log')

# f_plot_hist_spec(df_runs,dict_val,'hist')
# f_plot_hist_spec(df_runs,dict_val,'spec')
# f_plot_hist_spec(df_runs,dict_val,'spec_relative')
# f_plot_hist_spec(df_best,dict_val,'grid')


In [29]:
def f_widget_compare(df,labels_list,bkgnd,plot_type):
    df_temp=df[df.label.isin(labels_list)]
    f_plot_hist_spec(df_temp,dict_val,plot_type)

interact_manual(f_widget_compare,df=fixed(df_best),
                labels_list=SelectMultiple(options=df_best.label.values), 
                bkgnd=fixed(dict_val),plot_type=ToggleButtons(options=['hist','spec','grid','spec_relative']))

interactive(children=(SelectMultiple(description='labels_list', options=('2-680', '13-3630', '25-6530', '27-70…

<function __main__.f_widget_compare(df, labels_list, bkgnd, plot_type)>

## Plot images

In [35]:
ip_fname='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/raw_data/128_square/dataset_2_smoothing_200k/norm_1_train_val.npy'
s_val=np.load(ip_fname,mmap_mode='r')[:1000,0,:,:]
print(s_val.shape)
f_plot_grid(s_val[100:118],cols=6,fig_size=(6,3))


(1000, 128, 128)
3 6


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [31]:
epoch,step=20,1960
fname='/images/gen_img_epoch-{0}_step-{1}.npy'.format(epoch,step)
img=np.load(result_dir+fname,mmap_mode='r')[:1000,0,:,:]
print(img.shape)
f_plot_grid(img[100:118],cols=6,fig_size=(6,3))

FileNotFoundError: [Errno 2] No such file or directory: '/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_from_other_code/pytorch/results/128sq/20210407_174739_new_loss_noddpcode/images/gen_img_epoch-20_step-1960.npy'