# Extract data from output files
### Analyze the output from a single LBANN run
March 9, 2020 \
April 6, 2020 : Major edit to store files in order of epochs \
April 21, 2020: Major edit, added jupyter widgets to compare pixel intensity plots \
May 8, 2020: Major edit, using all images for a given batch

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import subprocess as sp
import os
import glob
import sys

import time
from scipy import fftpack
from ipywidgets import *

In [3]:
%matplotlib widget

In [4]:
sys.path.append('/global/u1/v/vpa/project/jpt_notebooks/Cosmology/Cosmo_GAN/LBANN/lbann_cosmogan/3_analysis/')
from modules_image_analysis import *

In [5]:
### Transformation functions for image pixel values
def f_transform(x):
    return 2.*x/(x + 4. + 1e-8) - 1.


def f_invtransform(s):
    return 4.*(1. + s)/(1. - s + 1e-8)

## Compare lbann images with input and keras code images

In [6]:
### Load validation input samples
img_raw='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/raw_data/128_square/dataset_2_smoothing_200k/train.npy'
a1=np.load(img_raw)[:10000]
s_raw=f_transform(a1[:,:,:,0])[:10000]

print(s_raw.shape)


(10000, 128, 128)


In [7]:
### Load images from keras code
img_keras='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_from_other_code/exagan1/run5/models/gen_imgs.npy'
img_keras='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_from_other_code/exagan1/run6/models/gen_imgs.npy'

s_keras=[[] for i in range(2)]
for count,i in enumerate([5,6]):
    img_keras='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_from_other_code/exagan1/run{0}_fixed_cosmology/models/gen_imgs.npy'.format(str(i))
    a1=np.load(img_keras)
    s_keras[count]=a1[:,:,:]

img_keras='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_from_other_code/exagan1/run7_no_truncated_normal/models/gen_imgs.npy'
a1=np.load(img_keras)
s_keras.append(a1[:,:,:])
print(s_keras[2].shape)

img_keras='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_from_other_code/exagan1/run8_no_truncated_normal/models/gen_imgs.npy'
a1=np.load(img_keras)
s_keras.append(a1[:,:,:])
print(s_keras[3].shape)



(5000, 128, 128)
(5000, 128, 128)


In [35]:
# ### Extract a few images generated by Lbann
# parent_dir='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_data/generate_images/20200702_074157_gen_img_070005_batchsize_256/'
parent_dir='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_data/generate_images/'

# fldr='20200806_065947_gen_img_exagan/'

fldr='20200806_070111_gen_img_exagan/'


f_strg=parent_dir+fldr+'dump_outs/trainer0/model0/sgd.testing.epoch.*.step.*_gen_img_instance1_activation_output0.npy'
f_list=glob.glob(f_strg)
print(f_list)

arr=[np.load(fname)[:,0,:,:] for fname in f_list]
s_lbann=np.vstack(arr)
print(s_lbann.shape,np.max(s_lbann))





['/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_data/generate_images/20200806_070111_gen_img_exagan/dump_outs/trainer0/model0/sgd.testing.epoch.3.step.3_gen_img_instance1_activation_output0.npy', '/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_data/generate_images/20200806_070111_gen_img_exagan/dump_outs/trainer0/model0/sgd.testing.epoch.0.step.0_gen_img_instance1_activation_output0.npy', '/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_data/generate_images/20200806_070111_gen_img_exagan/dump_outs/trainer0/model0/sgd.testing.epoch.1.step.1_gen_img_instance1_activation_output0.npy', '/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_data/generate_images/20200806_070111_gen_img_exagan/dump_outs/trainer0/model0/sgd.testing.epoch.2.step.2_gen_img_instance1_activation_output0.npy']
(10023, 128, 128) 0.99845964


In [36]:
def f_compute_chisqr(dict_val,dict_sample):
    '''
    Compute chi-square values for sample w.r.t input images
    Input: 2 dictionaries with 4 keys for histogram and spectrum values and errors
    '''
    ### !!Both pixel histograms MUST have same bins and normalization!
    ### Compute chi-sqr
    ### Used in keras code : np.sum(np.divide(np.power(valhist - samphist, 2.0), valhist))
    ###  chi_sqr :: sum((Obs-Val)^2/(Val))
    
    chisqr_dict={}
    
    val_dr=dict_val['hist_val'].copy()
    val_dr[val_dr<=0.]=1.0    ### Avoiding division by zero for zero bins
    
    sq_diff=(dict_val['hist_val']-dict_sample['hist_val'])**2
    
    size=len(dict_val['hist_val'])
    l1,l2=int(size*0.3),int(size*0.7)
    keys=['chi_1a','chi_1b','chi_1c','chi_1']
    
    for (key,start,end) in zip(keys,[0,l1,l2,0],[l1,l2,None,None]):  # 4 lists : small, medium, large pixel values and full 
        chisqr_dict.update({key:np.sum(np.divide(sq_diff[start:end],val_dr[start:end]))})
    
    idx=None  # Choosing the number of histograms to use. Eg : -5 to skip last 5 bins
#     chisqr_dict.update({'chi_sqr1':})
    
    chisqr_dict.update({'chi_2':np.sum(np.divide(sq_diff[:idx],1.0))}) ## chi-sqr without denominator division
    chisqr_dict.update({'chi_imgvar':np.sum(dict_sample['hist_err'][:idx])/np.sum(dict_val['hist_err'][:idx])}) ## measures total spread in histograms wrt to input data
    
    idx=60
    spec_diff=(dict_val['spec_val']-dict_sample['spec_val'])**2
    ### computing the spectral loss chi-square
    chisqr_dict.update({'chi_spec1':np.sum(spec_diff[:idx]/dict_sample['spec_val'][:idx]**2)})
    
    ### computing the spectral loss chi-square
    chisqr_dict.update({'chi_spec2':np.sum(spec_diff[:idx]/dict_sample['spec_err'][:idx]**2)})
    
    return chisqr_dict

In [37]:
## Get LBANN training run data
# prefix='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_data/20200701_065330_batchsize_512/dump_outs/trainer0/model0/'
# lst=[(58,23120),(58,23360),(3,1200),(51,20640),(49,19760),(47,18880)]

# prefix='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_data/20200701_070005_batchsize_256/dump_outs/trainer0/model0/'
# lst=[(30,24400),(29,23680),(48,38160),(42,33760),(36,29120)]

# prefix='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_data/20200718_135530_batchsize_256/dump_outs/trainer0/model0/'
# lst=[(12,10220),(21,17380),(25,20340),(24,19720)]

# prefix='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_data/20200725_204329_batchsize_256/dump_outs/trainer0/model0/'
# lst=[(17,14207),(16,13380),(18,14922),(16,13089),(18,14730)]

# prefix='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_data/20200725_172458_batchsize_64/dump_outs/trainer0/model0/'
# lst=[(5,18209),(10,34243),(4,15055),(5,18209),(7,23293)]

# prefix='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_data/20200725_172458_batchsize_64/dump_outs/trainer0/model0/'
# lst=[(5,18209),(10,34243),(4,15055),(5,18209),(7,23293)]

prefix='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_data/20200805_124242_batchsize_64/dump_outs/trainer0/model0/'
lst=[(5,16920),(6,20200),(7,23680),(4,14720)]
s_new=[[] for i in range(len(lst))]
for count,run in enumerate(lst):
    epoch,step=run[0],run[1]
    print(epoch,step)
    fname=prefix+'sgd.training.epoch.{0}.step.{1}_gen_img_instance1_activation_output0.npy'.format(epoch,step)
    s_new[count]=np.load(fname)[:,0,:,:]


5 16920
6 20200
7 23680
4 14720


In [38]:
dict_samples={'raw': s_raw[:3000],'keras1':s_keras[0],'keras2':s_keras[1],'keras3':s_keras[2],'keras4':s_keras[3]}
dict_samples.update({'lbann1':s_lbann})
for count,i in enumerate(s_new):
    dict_samples.update({'new_'+str(count):i})
    

In [39]:
### Cropping out large pixel values

# for key in dict_samples.keys():
#     print(key)
# # for key in ['new_0','new_1']:
#     print(key,dict_samples[key].shape)
#     dict_samples[key]=np.array([n for n in dict_samples[key] if np.max(n)<0.98])[:256] ### pixel intensity < 400
#     print(key,dict_samples[key].shape)


In [40]:
for key in dict_samples.keys():
    print(key,f_invtransform(np.max(dict_samples[key])))

raw 773.7213676777742
keras1 1167.816620448088
keras2 816.8223718776532
keras3 982.2991666608887
keras4 724.1099644374552
lbann1 5189.547859704607
new_0 955.9997683274008
new_1 568.8696596504692
new_2 516.0702173692437
new_3 1008.0614009814757


### Compute chi-square values

In [41]:

def f_compute_chisqr(dict_val,dict_sample):
    '''
    Compute chi-square values for sample w.r.t input images
    Input: 2 dictionaries with 4 keys for histogram and spectrum values and errors
    '''
    ### !!Both pixel histograms MUST have same bins and normalization!
    ### Compute chi-sqr
    ### Used in keras code : np.sum(np.divide(np.power(valhist - samphist, 2.0), valhist))
    ###  chi_sqr :: sum((Obs-Val)^2/(Val))
    
    chisqr_dict={}
    
    val_dr=dict_val['hist_val'].copy()
    val_dr[val_dr<=0.]=1.0    ### Avoiding division by zero for zero bins
    
    sq_diff=(dict_val['hist_val']-dict_sample['hist_val'])**2
    
    size=len(dict_val['hist_val'])
    l1,l2=int(size*0.3),int(size*0.7)
    keys=['chi_1a','chi_1b','chi_1c','chi_1']
    
    for (key,start,end) in zip(keys,[0,l1,l2,0],[l1,l2,None,None]):  # 4 lists : small, medium, large pixel values and full 
        chisqr_dict.update({key:np.sum(np.divide(sq_diff[start:end],val_dr[start:end]))})
    
    idx=None  # Choosing the number of histograms to use. Eg : -5 to skip last 5 bins
#     chisqr_dict.update({'chi_sqr1':})
    
    chisqr_dict.update({'chi_2':np.sum(np.divide(sq_diff[:idx],1.0))}) ## chi-sqr without denominator division
    chisqr_dict.update({'chi_imgvar':np.sum(dict_sample['hist_err'][:idx])/np.sum(dict_val['hist_err'][:idx])}) ## measures total spread in histograms wrt to input data
    
    idx=60
    spec_diff=(dict_val['spec_val']-dict_sample['spec_val'])**2
    ### computing the spectral loss chi-square
    chisqr_dict.update({'chi_spec1':np.sum(spec_diff[:idx]/dict_sample['spec_val'][:idx]**2)})
    
    ### computing the spectral loss chi-square
    chisqr_dict.update({'chi_spec2':np.sum(spec_diff[:idx]/dict_sample['spec_err'][:idx]**2)})
    
    return chisqr_dict


def f_compute_hist_spect(sample,bins):
    ''' Compute pixel intensity histograms and radial spectrum for 2D arrays
    Input : Image arrays and bins
    Output: dictionary with 5 arrays : Histogram values, errors and bin centers, Spectrum values and errors.
    '''
    ### Compute pixel histogram for row
    gen_hist,gen_err,hist_bins=f_batch_histogram(sample,bins=bins,norm=True,hist_range=None)
    ### Compute spectrum for row
    spec,spec_err=f_compute_spectrum(sample,plot=False)

    dict1={'hist_val':gen_hist,'hist_err':gen_err,'hist_bin_centers':hist_bins,'spec_val':spec,'spec_err':spec_err }
    return dict1

In [42]:
### Compute spectrum for keras and lbann image batches
df1=pd.DataFrame([])

s_input=s_raw[:]
transform=False  # If true, it computes histogram in the orignal scale of pixels ie. 0-2000 
bins=np.concatenate([np.array([-0.5]),np.arange(0.5,20.5,1),np.arange(20.5,100.5,5),np.arange(100.5,1000.5,50),np.array([2000])]) #bin edges to use
# bins=np.array([-0.5,0.5,1.5,2.5,3.5,4.5,5.5,15.5,25.5,75.5,125.5,500.5]).astype(np.float64)

if not transform: bins=f_transform(bins)   ### scale to (-1,1)
# bins=100
### Get pixel histogram and spectrum of all input data
dict_val=f_compute_hist_spect(s_input,bins)
del s_input

for name,images in dict_samples.items():
    print(name,images.shape)
    ### Compute spectrum and histograms
    dict_img=f_compute_hist_spect(images,bins) ## list of 5 numpy arrays 
    ### Compute chi squares
    dict1=f_compute_chisqr(dict_val,dict_img)
    dict1.update({'name':name})
#     print(dict1)
    df1=df1.append(dict1,ignore_index=True)


raw (3000, 128, 128)
keras1 (5000, 128, 128)
keras2 (5000, 128, 128)
keras3 (5000, 128, 128)
keras4 (5000, 128, 128)
lbann1 (10023, 128, 128)
new_0 (64, 128, 128)
new_1 (64, 128, 128)
new_2 (64, 128, 128)
new_3 (64, 128, 128)


In [43]:
df1

Unnamed: 0,chi_1,chi_1a,chi_1b,chi_1c,chi_2,chi_imgvar,chi_spec1,chi_spec2,name
0,0.000238,2.5e-05,8.4e-05,0.000129,3e-06,1.838426,0.000973,64.630671,raw
1,0.000311,7.4e-05,3.9e-05,0.000198,5.8e-05,1.421143,0.03828,16780.929221,keras1
2,0.000928,0.000474,0.000269,0.000185,0.000295,1.477085,0.045803,20541.431537,keras2
3,0.002907,0.002594,5.6e-05,0.000257,0.001918,1.463584,0.021846,5646.684223,keras3
4,0.002724,0.001654,0.000642,0.000428,0.001215,1.421945,0.033614,18804.677014,keras4
5,0.160468,0.126459,0.025951,0.008058,0.031252,1.512923,0.908326,405758.57437,lbann1
6,0.010095,0.004923,0.002598,0.002574,0.000426,13.80641,0.304695,1812.721447,new_0
7,0.012598,0.002225,0.002188,0.008185,0.001568,12.018862,0.289086,1817.789213,new_1
8,0.015469,0.007496,0.004476,0.003497,0.003064,14.556343,0.144588,630.842789,new_2
9,0.013199,0.006341,0.001892,0.004966,0.001104,12.496791,0.205869,1189.958431,new_3


In [31]:
# f_pixel_intensity(s_lbann[330],label='',normalize=False,log_scale=True,mode='simple')
# f_pixel_intensity(f_invtransform(s_lbann[330]),label='',normalize=False,log_scale=True,mode='simple')

In [32]:
def f_widget_compare(sample_names,sample_dict,Fig_type='pixel',rescale=True,log_scale=True,bins=25,mode='avg',normalize=True,bkgnd=[]):
    '''
    Module to make widget plots for pixel intensity or spectrum comparison for multiple sample sets
    '''
    
    ### Crop out large pixel values
    for key in sample_names:
        print(sample_dict[key].shape)
        sample_dict[key]=np.array([arr for arr in sample_dict[key] if np.max(arr)<=0.994])
        print(sample_dict[key].shape)
    
    img_list=[sample_dict[key] for key in sample_names]
    label_list=list(sample_names)
    
    if rescale: 
        for count,img in enumerate(img_list):
            img_list[count]=f_invtransform(img)
        if len(bkgnd): bkgnd=f_invtransform(bkgnd)
#         hist_range=(0,2000)
    else:
        bins=f_transform(bins)
#         hist_range=(-1,0.996)
    assert Fig_type in ['pixel','spectrum'],"Invalid mode %s"%(mode)
    
    if Fig_type=='pixel':
#         f_compare_pixel_intensity(img_lst=img_list,label_lst=label_list,normalize=normalize,log_scale=log_scale, mode=mode,bins=bins,hist_range=hist_range)
        f_compare_pixel_intensity(img_lst=img_list,label_lst=label_list,normalize=normalize,log_scale=log_scale, mode=mode,bins=bins,hist_range=None,bkgnd_arr=bkgnd)

    elif Fig_type=='spectrum':
        f_compare_spectrum(img_lst=img_list,label_lst=label_list,log_scale=log_scale,bkgnd_arr=bkgnd)


In [33]:
bins=np.array([-0.5,0.5,1.5,2.5,3.5,4.5,5.5,15.5,25.5,75.5,125.5,500.5]).astype(np.float64)
bins=np.concatenate([np.array([-0.5]),np.arange(0.5,20.5,1),np.arange(20.5,100.5,5),np.arange(100.5,1000.5,50),np.array([2000])]) #bin edges to use


In [34]:
# dict_samples={'raw': s_raw,'keras1':s_keras[0],'keras2':s_keras[1],'s_new1':s_new1,'s_new2':s_new2,'s_new3':s_new3}

bkgnd=s_raw
# bkgnd=[]
interact_manual(f_widget_compare,sample_dict=fixed(dict_samples),
                sample_names=SelectMultiple(options=dict_samples.keys()),
                Fig_type=ToggleButtons(options=['pixel','spectrum']),
                bins=fixed(bins),
                mode=['avg','simple'],bkgnd=fixed(bkgnd))

interactive(children=(SelectMultiple(description='sample_names', options=('raw', 'keras1', 'keras2', 'keras3',…

<function __main__.f_widget_compare(sample_names, sample_dict, Fig_type='pixel', rescale=True, log_scale=True, bins=25, mode='avg', normalize=True, bkgnd=[])>

### View images

In [None]:
f_plot_grid(s_keras[0][:18],cols=6,fig_size=(10,5))


In [38]:
f_plot_grid(s_new[0][18:36],cols=6,fig_size=(10,5))


3 6


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [36]:
for count,i in enumerate(s_new[0]):
    if np.max(i)>0.9898:
        print(count,f_invtransform(np.max(i)))


24 951.1016419024327
58 865.6115300727208
166 974.3120460778867
