# Extract data from output files
### Analyze the output from a single LBANN run
March 9, 2020 \
April 6, 2020 : to store files in order of epochs \
April 21, 2020: added jupyter widgets to compare pixel intensity plots \
July 30, 2020: Perform analysis with stored histograms.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

import subprocess as sp
import os
import glob
import sys

import itertools
import time

from ipywidgets import *

In [4]:
%matplotlib widget

In [5]:
sys.path.append('/global/u1/v/vpa/project/jpt_notebooks/Cosmology/Cosmo_GAN/repositories/lbann_cosmogan/3_analysis')
from modules_image_analysis import *

In [6]:
### Transformation functions for image pixel values
def f_transform(x):
    return 2.*x/(x + 4. + 1e-8) - 1.

def f_invtransform(s):
    return 4.*(1. + s)/(1. - s + 1e-8)

## Extract image data 

In [7]:
### Extract validation data
fname='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/raw_data/128_square/dataset_2_smoothing_200k/norm_1_train_val.npy'
s_val=np.load(fname,mmap_mode='r')[:8000][:,0,:,:]
print(s_val.shape)

(8000, 128, 128)


## Read dataframe

In [8]:
fldr_name='20200725_204329_batchsize_256/'
# fldr_name='20200725_172458_batchsize_64/'
# fldr_name='20200803_055550_batchsize_256/'
fldr_name='20200804_152954_batchsize_256/'
fldr_name='20200805_124242_batchsize_64/'

main_dir='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_data/{0}'.format(fldr_name)
### Load data
df=pd.read_pickle(main_dir+'df_processed.pkle')
df[['epoch','step']]=df[['epoch','step']].astype(int)

In [9]:
df.head()
df.shape

(2379, 18)

### View best epochs

#### Locations with best chi_sqr

In [10]:
chi_sqr_keys=['epoch','step','chi_1a','chi_1b','chi_1c','chi_1','chi_2','chi_imgvar','chi_spec1','chi_spec2']
# index location of min/max values of chi squares
inds=[]
for key in ['chi_1a','chi_1b','chi_1c','chi_1','chi_2','chi_imgvar','chi_spec1','chi_spec2']:
    inds.append(df[key].idxmin(axis=1))
inds=np.array(inds)
df.loc[inds][chi_sqr_keys]

Unnamed: 0,epoch,step,chi_1a,chi_1b,chi_1c,chi_1,chi_2,chi_imgvar,chi_spec1,chi_spec2
1573,19,62920,0.000404,0.004823,0.039651,0.044878,0.000117,10.908815,0.580928,3469.822
1257,15,50280,0.037258,0.00094,0.015134,0.053332,0.032309,11.449739,0.423692,2430.602
1565,19,62600,0.031476,0.005177,0.001047,0.0377,0.027021,11.064089,0.591548,4150.938
2202,27,88080,0.00167,0.001417,0.001283,0.004369,0.000545,10.118731,0.84673,6571.624
557,7,22280,0.000457,0.003804,0.007119,0.01138,8.6e-05,14.406475,0.59196,2233.916
2236,28,89440,1.321232,0.29428,0.013186,1.628698,0.901496,1.143094,2047.775792,59500640.0
496,6,19840,0.007926,0.010697,0.097006,0.115628,0.006816,12.910574,0.108318,620.1232
653,8,26120,0.032204,0.003503,0.063605,0.099312,0.017716,15.363506,0.17064,537.2725


In [11]:
def f_get_best_chisqr_models(df,cutoff=0.2):
    '''
    Pick models with lowest 20% chi-square for multiple categories
    '''
    chi_sqr_keys=[ 'chi_1a', 'chi_1b', 'chi_1c','chi_1', 'chi_2', 'chi_imgvar', 'chi_spec1', 'chi_spec2']
    q_dict=dict(df.quantile(q=cutoff,axis=0)[chi_sqr_keys])
#     print(q_dict)
    
    df_sliced=df.query('chi_1 < {0} & chi_spec1 < {1}'.format(q_dict['chi_1'],q_dict['chi_spec1']))
    
    return df_sliced

In [12]:
df_sliced=f_get_best_chisqr_models(df,cutoff=0.1)
print(df_sliced.shape)

(21, 18)


In [13]:
df_sliced.sort_values(by=['chi_1','chi_spec1'])[['epoch','step','chi_1','chi_spec1']].head(5)

Unnamed: 0,epoch,step,chi_1,chi_spec1
423,5,16920,0.009885,0.303728
505,6,20200,0.011645,0.292486
368,4,14720,0.012351,0.20401
378,4,15120,0.012522,0.218324
562,7,22480,0.012686,0.212147


In [14]:
df_sliced.sort_values(by=['chi_spec1','chi_1'])[['epoch','step','chi_1','chi_spec1']].head(5)

Unnamed: 0,epoch,step,chi_1,chi_spec1
592,7,23680,0.014951,0.145187
368,4,14720,0.012351,0.20401
489,6,19560,0.015864,0.210282
562,7,22480,0.012686,0.212147
378,4,15120,0.012522,0.218324


In [13]:
### Plot chi-sqr values
df_sliced.plot(x="epoch", y=["chi_1", "chi_imgvar", "chi_spec1"],style='.',marker='*')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.axes._subplots.AxesSubplot at 0x2aaade3941d0>

### High Pixel images

In [None]:
### Plot number of high pixel images
plt.figure()
plt.plot(df[df.img_type=='train_gen'].step,df[df.img_type=='train_gen'].num_large,linestyle='',marker='*')
plt.xlabel('Steps in Epochs')
plt.ylabel('Number of large pixel images from a batch of images')

In [None]:
# df[df.num_large>10]

## Compare samples

In [30]:
df_sliced.sort_values(by=['chi_spec1','chi_1']).head(5)
# df_sliced.sort_values(by=['chi_1','chi_spec1']).head(5)

Unnamed: 0,epoch,step,img_type,num_large,chi_1a,chi_1b,chi_1c,chi_1,chi_2,chi_imgvar,chi_spec1,chi_spec2
592,7,23680,train_gen,0,0.00731,0.004488,0.003154,0.014951,0.002972,13.006232,0.145187,631.703864
368,4,14720,train_gen,1,0.006209,0.001852,0.004291,0.012351,0.001063,11.166003,0.20401,1180.028713
489,6,19560,train_gen,0,0.00469,0.007349,0.003826,0.015864,0.003007,9.902317,0.210282,1043.306889
562,7,22480,train_gen,0,0.006897,0.003311,0.002477,0.012686,0.004576,10.616311,0.212147,1229.070121
378,4,15120,train_gen,1,0.001914,0.004525,0.006083,0.012522,0.000439,9.611897,0.218324,1597.291966


## Plot pixel intensity and spectrum

In [38]:
def f_plot_hist_spec_best(df,dict_bkg):

    fig=plt.figure(figsize=(6,6))
    ax1=fig.add_subplot(121)
    ax2=fig.add_subplot(122)
    for (i,row),marker in zip(df.iterrows(),itertools.cycle('>^*sDHPdpx_')):

        x1=row.hist_bin_centers
        y1=row.hist_val
        yerr1=row.hist_err
        x1=f_invtransform(x1)

        y2=row.spec_val
        yerr2=row.spec_err
        x2=np.arange(len(y2))

        label='{0}_{1}_{2}'.format(i,row.epoch,row.step)
        ax1.errorbar(x1,y1,yerr1,marker=marker,markersize=5,linestyle='',label=label)
    #     ax2.errorbar(x2,y2,yerr2,marker=marker,markersize=5,linestyle='',label='{0}-{1}'.format(epoch,step))

        ax2.fill_between(x2, y2 - yerr2, y2 + yerr2, alpha=0.4)
        ax2.plot(x2, y2, marker=marker, linestyle=':',label=label)

    ### Plot input data
    x,y,yerr=dict_bkg['hist_bin_centers'],dict_bkg['hist_val'],dict_bkg['hist_err']
    x=f_invtransform(x)
    ax1.errorbar(x, y,yerr,color='k',linestyle='-',label='bkgnd')   

    y,yerr=dict_bkg['spec_val'],dict_bkg['spec_err']
    x=np.arange(len(y))
    ax2.fill_between(x, y - yerr, y + yerr, color='k',alpha=0.8)

    plt.legend()
    # plt.yscale('log')
    ax1.set_xscale('symlog',linthreshx=50)
    ax1.set_yscale('log')
    ax2.set_yscale('log')

In [None]:
def f_compute_hist_spect(sample,bins):
    ''' Compute pixel intensity histograms and radial spectrum for 2D arrays
    Input : Image arrays and bins
    Output: dictionary with 5 arrays : Histogram values, errors and bin centers, Spectrum values and errors.
    '''
    ### Compute pixel histogram for row
    gen_hist,gen_err,hist_bins=f_batch_histogram(sample,bins=bins,norm=True,hist_range=None)
    ### Compute spectrum for row
    spec,spec_err=f_compute_spectrum(sample,plot=False)

    dict1={'hist_val':gen_hist,'hist_err':gen_err,'hist_bin_centers':hist_bins,'spec_val':spec,'spec_err':spec_err }
    return dict1

bins=np.concatenate([np.array([-0.5]),np.arange(0.5,20.5,1),np.arange(20.5,100.5,5),np.arange(100.5,1000.5,50),np.array([2000])]) #bin edges to use
bins=f_transform(bins)   ### scale to (-1,1) 
### Compute histogram and spectrum of raw data 
dict_val=f_compute_hist_spect(s_val,bins)



In [26]:
best_idx=[]
best_idx.append(df_sliced.sort_values(by=['chi_1','chi_spec1']).head(2).index)
best_idx.append(df_sliced.sort_values(by=['chi_spec1','chi_1']).head(2).index)

best_idx=[i for j in best_idx for i in j]
best_idx

[423, 505, 592, 368]

In [39]:
f_plot_hist_spec_best(df_best,dict_val)


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## View image block

In [None]:
def f_get_img(df,epoch,step):
    df_temp=df[(df.epoch==epoch)&(df.step==step)]
    images=np.load(df_temp.fname.values[0])[:,0,:,:]
    return images

img_arr=f_get_img(df,7,22680)

In [None]:
img_arr.shape

In [None]:
f_plot_grid(img_arr[:18],cols=6,fig_size=(10,6))

In [None]:
f_plot_grid(s_val[100:118],cols=6,fig_size=(10,6))