# Extract data from output files
### Analyze the output from a single LBANN run
March 9, 2020 \
April 6, 2020 : to store files in order of epochs \
April 21, 2020: added jupyter widgets to compare pixel intensity plots \
May 8, 2020: using all images for a given batch \
May 29, 2020: Modified for new update of LBANN. File names of images changed, so new extraction code. Also added code for computing chi-squared. \
June 17, 2020: Removed train_inp, train_gen and val_inp to reduce memory overhead. From now on, the code only analyzes val_gen \
June 26, 2020: Added gathering of steps and new chi-square quantities.\
July 1, 2020: Switched back to storing mainly train_gen with large steps (10 steps saved for 256 batchsize).

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import subprocess as sp
import os
import glob
import sys

import itertools
import time
from scipy import fftpack
# from ipywidgets import interact, interact_manual,fixed, SelectMultiple, IntText, IntSlider, FloatSlider,SelectionSlider,BoundedIntText
from ipywidgets import *

In [2]:
%matplotlib widget

In [3]:
sys.path.append('/global/u1/v/vpa/project/jpt_notebooks/Cosmology/Cosmo_GAN/repositories/lbann_cosmogan/3_analysis')
from modules_image_analysis import *

[NbConvertApp] Converting notebook modules_image_analysis.ipynb to script
[NbConvertApp] Writing 17167 bytes to modules_image_analysis.py


In [4]:
### Transformation functions for image pixel values
def f_transform(x):
    return 2.*x/(x + 4. + 1e-8) - 1.

def f_invtransform(s):
    return 4.*(1. + s)/(1. - s + 1e-8)

In [5]:
# ### Other transformatino functinos
# ### Transformation functions for image pixel values

# def f_transform_new(x):
#     if x<=50:
#         a=0.03; b=-1.0
#         return a*x+b
#     elif x>50: 
#         a=0.5/np.log(300)
#         b=0.5-a*np.log(50)
#         return a*np.log(x)+b

# def f_invtransform_new(y):
#     if y<=0.5:
#         a=0.03;b=-1.0
#         return (y-b)/a
#     elif y>0.5: 
#         a=0.5/np.log(300)
#         b=0.5-a*np.log(50)
#         return np.exp((y-b)/a)
    

# def f_transform(x):
#     return np.vectorize(f_transform_new)(x)

# def f_invtransform(s):
#     return np.vectorize(f_invtransform_new)(s)

# f_transform_new(2000)

### Modules for Extraction

In [6]:
def f_get_files_df_sorted(main_dir):
    '''
    Module to create Dataframe with filenames for each epoch and step
    Sorts by step and epoch
    '''
    
    ## Get images files and .npy arrays for each image in dump_outs folder
    t1=time.time()
    files_dict={}
#     keys=['train_gen','train_input','val_gen','val_input']
#     file_strg_lst=['model0-training*-gen_img*-output0.npy','model0-training*-inp_img*-output0.npy','model0-validation*-gen_img*-output0.npy','model0-validation*-inp_img*-output0.npy']
#     file_strg_lst=['sgd.training*_gen_img*_output0.npy','sgd.training*_inp_img*_output0.npy','sgd.validation*_gen_img*_output0.npy','sgd.validation*_inp_img*_output0.npy']

#     keys=['val_gen']
#     file_strg_lst=['sgd.validation*_gen_img*_output0.npy']
    keys=['train_gen']
    file_strg_lst=['sgd.training*_gen_img*_output0.npy']
    
    for key,file_strg in zip(keys,file_strg_lst):
        files_dict[key]=np.array(glob.glob(main_dir+file_strg))
        if files_dict[key].shape[0]>1000 : 
            print('Warning the number of files is very large. Possibility of memory overload')
    
    df_files=pd.DataFrame([])
    dict1={}
    t1=time.time()
    ### First get sorted Dataframe with file names
    for key in keys:
        files_arr=files_dict[key]  # Get array of files
        print(key,len(files_arr))
        for fname in files_arr:
            ### Extract the Epoch number and step number from the file name
            dict1['img_type']=key
            dict1['epoch']=np.int32(fname.split('epoch')[-1].split('.')[1])
            dict1['step']=np.int64(fname.split('step')[-1].split('.')[1].split('_')[0])
            dict1['fname']=fname
            
            df_files=df_files.append(dict1,ignore_index=True)
    ## Sort values
    df_files=df_files.sort_values(by=['img_type','epoch','step']).reset_index(drop=True)
    # df_files
    t2=time.time()
    print("Time for Sorting",t2-t1)
    
    return df_files


def f_filter_epoch(df_input,num_sliced=1):
    '''
    Get just equally spaced steps for each epoch
    '''
    print('Extracting %s steps of each epoch'%(num_sliced))
    df_output=pd.DataFrame([])
#     for key in ['train_gen','train_input','val_gen','val_input']: 
    for key in ['train_gen']: 
        ### For each type of images, get list of epochs
        df1=df_input[df_input.img_type==key]
        epochs=np.unique(df1.epoch.values).astype(int)

        for epoch in epochs:### Extract the last few steps in each epoch
            arr_step=df1[df1.epoch==epoch].step.values   ## Get all steps
            idxs=np.round(np.linspace(0,len(arr_step)-1,num_sliced).astype(int)) ## Get indices with equal spacing 
            df2=df1[df1.step.isin(arr_step[idxs])]        ## Get dataframe with these steps
            df_output=df_output.append(df2)  
    
    return df_output.reset_index(drop=True)

def f_get_images_df(df_files):
    '''
    Read dataframe with file names, read files and create new dataframe with images as numpy arrays
    Also computes number of images with intensity beyond a cutoff
    '''
    
    def f_row(df_row):
        '''
        Extract image
        '''
        fname,key=df_row.fname,df_row.img_type
        a1=np.load(fname)
        if key.endswith('input'): 
            size=np.int(np.sqrt(a1.shape[-1])) ### Extract size of images (=128)
            batch_size=a1.shape[0] ### Number of batches
            samples=a1.reshape(batch_size,size,size)
        elif key.endswith('gen') : samples=a1[:,0,:,:]
        else : raise SystemError

        return samples
    
    def f_high_pixel(df_row,cutoff=0.9966):
        '''
        Get number of images with a pixel about max cut-off value
        '''
        max_arr=np.amax(df_row.images,axis=(1,2))
        num_large=max_arr[max_arr>cutoff].shape[0]

        return num_large
    
    t1=time.time()
    ##### Create new Dataframe with sorted images
    df=df_files.copy()
    df['images']=df.apply(lambda row: f_row(row), axis=1)
    t2=time.time()
    print("Time for Reading images",t2-t1)
    
    ### Store the number of images with large pixel value
    cutoff=0.9966
    df['num_large']=df.apply(lambda row: f_high_pixel(row,cutoff), axis=1)
    
    return df
    


In [7]:
def f_get_sample_epochs(df,img_type,start_epoch=None,end_epoch=None):
    '''
    Module to extract images for a range of epochs given a dataframe
    '''
    if start_epoch==None and end_epoch==None:
        max_epoch=np.int(np.max(df.epoch.values))
#         print(max_epoch)
        start_epoch=0; end_epoch=max_epoch
#     if end_epoch==None: end_epoch=start_epoch+1
    
    arr=df[(df.epoch>=start_epoch) & (df.epoch<=end_epoch) & (df.img_type==img_type)].images.values
    arr=np.vstack(arr)
    
    return arr


def f_get_step(df,img_type,epoch,step):
    '''
    Module to extract images for a specific step and epoch
    '''
    
    arr=df[(df.epoch==epoch) & (df.step==step) & (df.img_type==img_type)].images.values
    arr=np.vstack(arr)
    
    return arr

def f_get_step_group(df,img_type,step_list):
    '''
    Module to extract images for a range of epochs given a dataframe
    '''
    arr=df[(df.step.isin(step_list)) & (df.img_type==img_type)].images.values
    arr=np.vstack(arr)
    
    return arr

## Extract image data 

In [None]:
# fldr_name='20200529_111342_seed3273_80epochs'
# fldr_name='20200718_114324_batchsize_512/'
# fldr_name='20200718_135530_batchsize_256/'
fldr_name='20200725_204329_batchsize_256/'
# fldr_name='20200725_172458_batchsize_64/'

main_dir='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_data/{0}dump_outs/trainer0/model0/'.format(fldr_name)
print(main_dir)

In [22]:
### Extract validation data
fname='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/raw_data/128_square/dataset_2_smoothing_200k/norm_1_train_val.npy'
s_val=np.load(fname,mmap_mode='r')[:8000][:,0,:,:]
print(s_val.shape)

(8000, 128, 128)


In [10]:
### Get dataframe with file names, sorted by epoch and step
df_files=f_get_files_df_sorted(main_dir)

### Slice out epochs
df_files=df_files[(df_files.epoch>3) & (df_files.epoch<20)]

### Slice out rows to keep a few steps for each epoch.
df_files=f_filter_epoch(df_files,num_sliced=200)

#############################################################
### Read images one by one into a numpy array and create a new DataFrame
df_full=f_get_images_df(df_files)
print(df_full.shape)
# ### Filter to keep just one step per epoch
# df_full=f_filter_epoch(df_full,1)

train_gen 15860
Time for Sorting 80.43227028846741
Extracting 200 steps of each epoch
Time for Reading images 239.3215959072113
(3200, 6)


In [11]:
# df_files.head(20)

## Chi-square

In [12]:
def f_compute_chisqr(df,s_input):
    ''' Compute chi-sqr values of pixel intensity histogram and spectrum for each row
    Uses the module f_pixel_intensity to compute histograms and f_compute_spectrum for spectrum
    '''
    
    def f_chisqr(images,val_hist,val_err,val_spec,val_spec_err,bins,transform):
        ''' Compute chi-sqr of rows wrt to input data'''
        
        val_dr=val_hist.copy()
        val_dr[val_dr<=0.]=1.0    ### Avoiding division by zero for zero bins
        
        ### Get all images in a batch
        sample=images if not transform else f_invtransform(images)

        ### Compute pixel histogram for row   ### !!Both pixel histograms MUST have same bins and normalization!
        gen_hist,gen_err=f_pixel_intensity(sample,plot=False,normalize=True,bins=bins,mode='avg')
        spec,spec_err=f_compute_spectrum(sample,plot=False)

        ### Compute chi-sqr
        ### Used in keras code : np.sum(np.divide(np.power(valhist - samphist, 2.0), valhist))
        ###  chi_sqr :: sum((Obs-Val)^2/(Val))
        sq_diff=(gen_hist-val_hist)**2        
        chi_sqr_list=[]
        
        size=len(bins)
        l1,l2=int(size*0.3),int(size*0.6)
        for count,(start,end) in enumerate(zip([0,l1,l2,0],[l1,l2,None,None])):  # 4 lists : small, medium, large pixel values and full 
            chi_sqr_list.append(np.sum(np.divide(sq_diff[start:end],val_dr[start:end])))
        
        idx=None  # Choosing the number of histograms to use. Eg : -5 to skip last 5 bins
        
        chi_sqr_list.append(np.sum(np.divide(sq_diff[:idx],1.0))) ## chi-sqr without denominator division
        chi_sqr_list.append(np.sum(gen_err[:idx])/np.sum(val_err[:idx])) ## measures total spread in histograms wrt to input data
        
        ### computing the spectral loss chi-square
        chi_sqr_list.append(np.sum((spec[:60]-val_spec[:60])**2/(val_spec[:60]**2)))
        
        ### computing the spectral loss chi-square
        chi_sqr_list.append(np.sum((spec[:60]-val_spec[:60])**2/(spec_err[:60]**2))) 
        
        return chi_sqr_list
    
    
    ########################
    ###### Code starts ########
    transform=False  # If true, it computes histogram in the orignal scale of pixels ie. 0-2000 
    
    ## Get bins (bin-edges) for histograms
    bins=np.concatenate([np.array([-0.5]),np.arange(0.5,20.5,1),np.arange(20.5,100.5,5),np.arange(100.5,1000.5,50),np.array([2000])]) #bin edges to use
    bins=np.concatenate([np.array([-0.5]),np.arange(0.5,20.5,5),np.arange(20.5,100.5,20),np.arange(100.5,1000.5,100),np.array([2000])]) #bin edges to use
    if not transform: bins=f_transform(bins)   ### scale to (-1,1)
#     bins=100
#     print(bins)
    
    ### Get pixel histogram of all input data
    val_hist,val_err=f_pixel_intensity(s_input,plot=False,normalize=True,bins=bins,mode='avg')    
    ### Computing spectrum ###
    val_spec,val_spec_err=f_compute_spectrum(s_input,plot=False)
    del s_input


    ### Get chi-sqr for each row (step-epoch) for generated data
    chi_sqrs=df.apply(lambda row: f_chisqr(row.images,val_hist=val_hist,val_err=val_err,val_spec=val_spec,val_spec_err=val_spec_err,bins=bins,transform=transform), axis=1).values
    chi_vals=np.array(list(zip(*chi_sqrs)))  ## transposing list of list
        
    chi_sqr_keys=['chi_sqr1a','chi_sqr1b','chi_sqr1c','chi_sqr1','chi_sqr2','chi_img_var','chi_spec','chi_spec2']
    for key,chi_val in zip(chi_sqr_keys,chi_vals):
        df[key]=chi_val
    
    return df

def f_get_best_chisqr_models(df):
    
    chi_sqr_keys=['chi_sqr1a','chi_sqr1b','chi_sqr1c','chi_sqr1','chi_sqr2','chi_img_var','chi_spec','chi_spec2']
    q_dict=dict(df.quantile(q=0.2,axis=0)[chi_sqr_keys])
    print(q_dict)
    
    df_sliced=df.query('chi_sqr1 < {0} & chi_spec < {1}'.format(q_dict['chi_sqr1'],q_dict['chi_spec']))[['epoch','step','img_type','num_large']+chi_sqr_keys]
    
    return df_sliced


In [13]:
t1=time.time()
# df1=f_compute_chisqr(df_full.loc[[0,1,2,3]],s_val) # Test on small df
df_full=f_compute_chisqr(df_full,s_val)
t2=time.time()
print("Time to compute chi-sqr",t2-t1)

Time to compute chi-sqr 2273.340839624405


In [14]:
df=df_full.copy()
# df1

In [15]:
df_sliced=f_get_best_chisqr_models(df_full)
# df_sliced=f_get_best_chisqr_models(df_full[df_full.epoch<30])
print(df_sliced.shape)

{'chi_sqr1a': 0.0025967644638182686, 'chi_sqr1b': 0.0007747741943072596, 'chi_sqr1c': 0.001270111504136592, 'chi_sqr1': 0.010144148981058343, 'chi_sqr2': 0.0008955959391827487, 'chi_img_var': 5.662798087658625, 'chi_spec': 0.5618603976900896, 'chi_spec2': 9249.28382997146}
(323, 12)


In [16]:
df_sliced

Unnamed: 0,epoch,step,img_type,num_large,chi_sqr1a,chi_sqr1b,chi_sqr1c,chi_sqr1,chi_sqr2,chi_img_var,chi_spec,chi_spec2
1513,11.0,9172.0,train_gen,0,0.001811,0.005636,0.000422,0.007868,0.001472,7.748514,0.489039,5544.370277
1674,12.0,9810.0,train_gen,0,0.003944,0.003626,0.000296,0.007866,0.002981,6.332546,0.535150,8471.060910
1693,12.0,9886.0,train_gen,0,0.006046,0.002520,0.001551,0.010116,0.004949,7.444880,0.471151,8571.190063
1695,12.0,9894.0,train_gen,0,0.004661,0.001854,0.001306,0.007821,0.002481,6.733334,0.548956,7083.946569
1732,12.0,10041.0,train_gen,0,0.000239,0.008168,0.000586,0.008993,0.000077,5.461594,0.437789,8567.402808
1848,13.0,10500.0,train_gen,0,0.003384,0.002223,0.003565,0.009172,0.002814,6.142944,0.525138,10268.912392
1862,13.0,10555.0,train_gen,0,0.000931,0.004348,0.002231,0.007510,0.000792,6.323101,0.406086,6924.464447
1896,13.0,10691.0,train_gen,0,0.000397,0.003866,0.000965,0.005228,0.000192,6.542157,0.540485,7915.098692
1905,13.0,10726.0,train_gen,0,0.000776,0.001420,0.007351,0.009548,0.000520,7.377083,0.502697,13019.970926
1910,13.0,10746.0,train_gen,0,0.002802,0.003177,0.002280,0.008259,0.002050,5.260420,0.361552,9084.684912


#### View best epochs

#### Locations with best chi_sqr

In [17]:
chi_sqr_keys=['epoch','step','chi_sqr1a','chi_sqr1b','chi_sqr1c','chi_sqr1','chi_sqr2','chi_img_var','chi_spec']
# index location of min/max values of chi squares
inds=[]
for key in ['chi_spec','chi_sqr1','chi_sqr1a','chi_sqr1b','chi_sqr1c','chi_sqr2']:
    inds.append(df[key].idxmin(axis=1))
inds=np.array(inds)
df.loc[inds][chi_sqr_keys]

Unnamed: 0,epoch,step,chi_sqr1a,chi_sqr1b,chi_sqr1c,chi_sqr1,chi_sqr2,chi_img_var,chi_spec
2963,18.0,14922.0,0.000852,0.000464,0.009129,0.010446,0.00054,6.786766,0.12316
3159,19.0,15699.0,9.1e-05,0.000831,0.000221,0.001143,9e-06,6.504053,0.732944
3013,19.0,15118.0,1.6e-05,0.001382,0.004827,0.006224,2.5e-05,6.120184,0.26101
2859,18.0,14508.0,0.005171,5.2e-05,0.02486,0.030083,0.004084,5.753047,0.664042
2961,18.0,14914.0,0.006517,0.003932,9.2e-05,0.010541,0.004749,5.112631,0.452514
3078,19.0,15377.0,1.7e-05,0.000508,0.001494,0.002019,5e-06,8.269145,0.412543


### Sorted dataframe by best chi-squares

In [27]:
# df_full.sort_values(by=['chi_sqr1','chi_spec'])[['epoch','step','chi_sqr1','chi_spec']].head(40)
df_full[df_full.epoch<10].sort_values(by=['chi_spec','chi_sqr1'])[['epoch','step','chi_sqr1','chi_spec']].head(40)

Unnamed: 0,epoch,step,chi_sqr1,chi_spec
1100,9.0,7534.0,0.065587,0.499226
893,8.0,6714.0,0.067489,0.565326
1148,9.0,7726.0,0.03556,0.567051
1097,9.0,7523.0,0.025853,0.580705
977,8.0,7048.0,0.03036,0.582066
1050,9.0,7335.0,0.02286,0.592426
1105,9.0,7554.0,0.032251,0.597466
1103,9.0,7546.0,0.026668,0.618567
890,8.0,6702.0,0.156789,0.630819
1104,9.0,7550.0,0.024887,0.634489


In [19]:
df_full[chi_sqr_keys].describe()

Unnamed: 0,epoch,step,chi_sqr1a,chi_sqr1b,chi_sqr1c,chi_sqr1,chi_sqr2,chi_img_var,chi_spec
count,3200.0,3200.0,3200.0,3200.0,3200.0,3200.0,3200.0,3200.0,3200.0
mean,11.5,9515.005,0.016644,0.005011,0.051282,0.072937,0.011941,6.802728,1.867861
std,4.610493,3663.340594,0.018102,0.010257,0.323776,0.335633,0.014311,1.467027,2.102318
min,4.0,3172.0,1.6e-05,5.2e-05,9.2e-05,0.001143,5e-06,2.978238,0.12316
25%,7.75,6343.75,0.00345,0.000966,0.001532,0.011841,0.001355,5.825944,0.637918
50%,11.5,9515.5,0.009965,0.002487,0.006201,0.025058,0.006247,6.531575,1.139921
75%,15.25,12687.25,0.024714,0.006056,0.023871,0.055694,0.017787,7.462841,2.322152
max,19.0,15859.0,0.148014,0.302883,14.436897,14.788363,0.111361,17.626901,30.133988


In [20]:
df_sliced

Unnamed: 0,epoch,step,img_type,num_large,chi_sqr1a,chi_sqr1b,chi_sqr1c,chi_sqr1,chi_sqr2,chi_img_var,chi_spec,chi_spec2
1513,11.0,9172.0,train_gen,0,0.001811,0.005636,0.000422,0.007868,0.001472,7.748514,0.489039,5544.370277
1674,12.0,9810.0,train_gen,0,0.003944,0.003626,0.000296,0.007866,0.002981,6.332546,0.535150,8471.060910
1693,12.0,9886.0,train_gen,0,0.006046,0.002520,0.001551,0.010116,0.004949,7.444880,0.471151,8571.190063
1695,12.0,9894.0,train_gen,0,0.004661,0.001854,0.001306,0.007821,0.002481,6.733334,0.548956,7083.946569
1732,12.0,10041.0,train_gen,0,0.000239,0.008168,0.000586,0.008993,0.000077,5.461594,0.437789,8567.402808
1848,13.0,10500.0,train_gen,0,0.003384,0.002223,0.003565,0.009172,0.002814,6.142944,0.525138,10268.912392
1862,13.0,10555.0,train_gen,0,0.000931,0.004348,0.002231,0.007510,0.000792,6.323101,0.406086,6924.464447
1896,13.0,10691.0,train_gen,0,0.000397,0.003866,0.000965,0.005228,0.000192,6.542157,0.540485,7915.098692
1905,13.0,10726.0,train_gen,0,0.000776,0.001420,0.007351,0.009548,0.000520,7.377083,0.502697,13019.970926
1910,13.0,10746.0,train_gen,0,0.002802,0.003177,0.002280,0.008259,0.002050,5.260420,0.361552,9084.684912


In [21]:
### Plot chi-sqr values
df_sliced.plot(x="epoch", y=["chi_sqr1", "chi_img_var", "chi_spec"],style='.',marker='*')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.axes._subplots.AxesSubplot at 0x2aaadd05e048>

In [25]:
# df_sliced.plot(x="step", y=["chi_sqr1", "chi_img_var", "chi_spec"],style='.',marker='*')
df_sliced.plot(x="step", y=["chi_sqr1"],style='-',marker='*')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.axes._subplots.AxesSubplot at 0x2aaaddb85978>

### High Pixel images

In [None]:
### Plot number of high pixel images
plt.figure()
plt.plot(df[df.img_type=='val_gen'].epoch,df[df.img_type=='val_gen'].num_large,linestyle='',marker='*')
plt.xlabel('Steps in Epochs')
plt.ylabel('Number of large pixel images from a batch of images')

## Compare samples

In [None]:
def f_widget_compare(sample_names,sample_dict,Fig_type='pixel',rescale=True,log_scale=True,bins=25,mode='avg',normalize=True,bkgnd=[]):
    '''
    Module to make widget plots for pixel intensity or spectrum comparison for multiple sample sets
    '''
#     ### Crop out large pixel values
#     for key in sample_names:
#         print(sample_dict[key].shape)
#         sample_dict[key]=np.array([arr for arr in sample_dict[key] if np.max(arr)<=0.994])
#         print(sample_dict[key].shape)
    
    img_list=[sample_dict[key] for key in sample_names]
    label_list=list(sample_names)
    
    bins=np.concatenate([np.array([-0.5]),np.arange(0.5,20.5,1),np.arange(20.5,100.5,5),np.arange(100.5,1000.5,50),np.array([2000])]) #bin edges to use
    
    if rescale: 
        for count,img in enumerate(img_list):
            img_list[count]=f_invtransform(img)
        if len(bkgnd): bkgnd=f_invtransform(bkgnd)
#         hist_range=(0,2000)
    else:
        bins=f_transform(bins)
#         hist_range=(-1,0.996)
    assert Fig_type in ['pixel','spectrum'],"Invalid mode %s"%(mode)
    
    if Fig_type=='pixel':
#         f_compare_pixel_intensity(img_lst=img_list,label_lst=label_list,normalize=normalize,log_scale=log_scale, mode=mode,bins=bins,hist_range=hist_range)
        f_compare_pixel_intensity(img_lst=img_list,label_lst=label_list,normalize=normalize,log_scale=log_scale, mode=mode,bins=bins,hist_range=None,bkgnd_arr=bkgnd)

    elif Fig_type=='spectrum':
        f_compare_spectrum(img_lst=img_list,label_lst=label_list,log_scale=log_scale,bkgnd_arr=bkgnd)


### Compare different steps

In [None]:
# img_list,labels_list=f_get_sample_epochs(df,'train_gen',10)

img_list,labels_list=[],[]
for a,b in df_sliced.iterrows():
    epoch,step=int(b.epoch),int(b.step)
    img_list.append(f_get_step(df,'train_gen',epoch,step))
    labels_list.append('%s:%s'%(str(epoch),str(step)))

dict_samples=dict.fromkeys(labels_list)
for key,val in zip(labels_list,img_list): dict_samples[key]=val

dict_samples.keys()
# ### Compare with input
# # dict_samples['keras']=s_keras
# dict_samples['input']=s_val
bkgnd=[]
bkgnd=s_val
interact_manual(f_widget_compare,sample_dict=fixed(dict_samples),
                sample_names=SelectMultiple(options=dict_samples.keys()),
                Fig_type=ToggleButtons(options=['pixel','spectrum']),bins=IntText(value=50),mode=['avg','simple'],bkgnd=fixed(bkgnd))



### Plot step groups in best epochs

In [None]:
print(np.unique(df_sliced.epoch.values))
step_list=df_sliced[df_sliced.epoch==26].step.values
print(step_list)

In [None]:
img_list,labels_list=[],[]
for epoch in np.unique(df_sliced.epoch.values).astype(int):
    step_list=df_sliced[df_sliced.epoch==epoch].step.values
    print(epoch,step_list)
    img_list.append(f_get_step_group(df,'train_gen',step_list))
    labels_list.append('%s'%(str(epoch)))

dict_samples=dict.fromkeys(labels_list)
for key,val in zip(labels_list,img_list): dict_samples[key]=val

dict_samples.keys()
# # ### Compare with input
# # # dict_samples['keras']=s_keras
# # dict_samples['input']=s_val
# bkgnd=[]
bkgnd=s_val
interact_manual(f_widget_compare,sample_dict=fixed(dict_samples),
                sample_names=SelectMultiple(options=dict_samples.keys()),
                Fig_type=ToggleButtons(options=['pixel','spectrum']),bins=IntText(value=50),mode=['avg','simple'],bkgnd=fixed(bkgnd))



In [None]:
# img_lst=[f_invtransform(i) for i in img_list]
# bins=np.concatenate([np.array([-0.5]),np.arange(0.5,20.5,1),np.arange(20.5,100.5,5),np.arange(100.5,1000.5,50),np.array([2000])]) #bin edges to use
# # bins=200
# f_compare_pixel_intensity(img_list,labels_list,normalize=True,log_scale=True, mode='avg',bins=bins,hist_range=None)
# f_compare_spectrum(img_list,labels_list,log_scale=True)


## View image block

In [None]:
f_plot_grid(img_arr,cols=6,fig_size=(10,5))


In [None]:
fname='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_data/20200701_054823_exagan/dump_outs/trainer0/model0/sgd.training.epoch.21.step.8480_gen_img_instance1_activation_output0.npy'
# fname='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_data/generate_images/20200629_145233_gen_img_exagan/dump_outs/trainer0/model0/sgd.testing.epoch.0.step.0_gen_img_instance1_activation_output0.npy'
s_new=np.load(fname)[:,0,:,:]
print(s_new.shape)

In [None]:
f_plot_grid(s_new[100:118],cols=6,fig_size=(10,5))

In [None]:
f_plot_grid(s_val[100:118],cols=6,fig_size=(10,5))