# post analysis pandas test
Dec 4, 2020

In [2]:

import numpy as np
import pandas as pd
import argparse

import subprocess as sp
import os
import glob
import sys

import time

from pandarallel import pandarallel

sys.path.append('/global/u1/v/vpa/project/jpt_notebooks/Cosmology/Cosmo_GAN/repositories/lbann_cosmogan/3_analysis')
from modules_image_analysis import *

In [3]:

def parse_args():
    """Parse command line arguments."""
    parser = argparse.ArgumentParser(description="Analyze output data from LBANN run")
    add_arg = parser.add_argument
    
    add_arg('--val_data','-v', type=str, default='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/raw_data/128_square/dataset_2_smoothing_200k/norm_1_train_val.npy',help='The .npy file with input data to compare with')
    add_arg('--folder','-f', type=str,help='The full path of the folder containing the data to analyze.')
    add_arg('--cores','-c', type=int, default=64,help='Number of cores to use for parallelization')
    add_arg('--bins_type','-bin', type=str, default='uneven',help='Number of cores to use for parallelization')
    
    return parser.parse_args()

### Transformation functions for image pixel values
def f_transform(x):
    return 2.*x/(x + 4. + 1e-8) - 1.

def f_invtransform(s):
    return 4.*(1. + s)/(1. - s + 1e-8)

# ### Modules for Extraction
def f_get_sorted_df(main_dir,label):
    
    '''
    Module to create Dataframe with filenames for each epoch and step
    Sorts by step and epoch
    '''
    def f_get_info_from_fname(fname):
        ''' Read file and return dictionary with epoch, step'''
        dict1={}

        dict1['epoch']=np.int32(fname.split('epoch-')[-1].split('_')[0])
        dict1['step']=np.int64(fname.split('step-')[-1].split('.')[0])
        return dict1
    
    t1=time.time()
    ### get list of file names
    fldr_loc=main_dir+'/images/'

    files_arr,img_arr=np.array([]),np.array([])
    files=glob.glob(fldr_loc+'*gen_img_label-{0}_epoch*_step*.npy'.format(label))
    files_arr=np.append(files_arr,files)
    img_arr=np.append(img_arr,['train'] *len(files))

    print('Number of files',len(files_arr))
    if len(files_arr)<1: print('No files'); raise SystemExit

    ### Create dataframe
    df_files=pd.DataFrame()
    df_files['img_type']=np.array(img_arr)
    df_files['fname']=np.array(files_arr).astype(str)

    # Create list of dictionaries
    dict1=df_files.apply(lambda row : f_get_info_from_fname(row.fname),axis=1)
    keys=dict1[0].keys() # Extract keys of dictionary
    # print(keys)
    # ### Convert list of dicts to dict of lists
    dict_list={key:[k[key] for k in dict1] for key in keys}
    # ### Add columns to Dataframe
    for key in dict_list.keys():
        df_files[key]=dict_list[key]

    df_files=df_files.sort_values(by=['img_type','epoch','step']).reset_index(drop=True) ### sort df by epoch and step
    
    t2=time.time()
    print("time for sorting",t2-t1)

    return df_files[['epoch','step','img_type','fname']]


def f_compute_hist_spect(sample,bins):
    ''' Compute pixel intensity histograms and radial spectrum for 2D arrays
    Input : Image arrays and bins
    Output: dictionary with 5 arrays : Histogram values, errors and bin centers, Spectrum values and errors.
    '''
    ### Compute pixel histogram for row
    gen_hist,gen_err,hist_bins=f_batch_histogram(sample,bins=bins,norm=True,hist_range=None)
    ### Compute spectrum for row
    spec,spec_sdev=f_compute_spectrum(sample,plot=False)

    dict1={'hist_val':gen_hist,'hist_err':gen_err,'hist_bin_centers':hist_bins,'spec_val':spec,'spec_sdev':spec_sdev }
    return dict1

def f_get_images(fname,img_type):
    '''
    Extract image using file name
    '''
    fname,key=fname,img_type
    a1=np.load(fname)
    
    samples=a1[:]
    return samples
    

def f_high_pixel(images,cutoff=0.9966):
    '''
    Get number of images with a pixel about max cut-off value
    '''
    max_arr=np.amax(images,axis=(1,2))
    num_large=max_arr[max_arr>cutoff].shape[0]

    return num_large


def f_compute_chisqr(dict_val,dict_sample):
    '''
    Compute chi-square values for sample w.r.t input images
    Input: 2 dictionaries with 4 keys for histogram and spectrum values and errors
    '''
    ### !!Both pixel histograms MUST have same bins and normalization!
    ### Compute chi-sqr
    ### Used in keras code : np.sum(np.divide(np.power(valhist - samphist, 2.0), valhist))
    ###  chi_sqr :: sum((Obs-Val)^2/(Val))
    
    chisqr_dict={}
    
    try: 
        val_dr=dict_val['hist_val'].copy()
        val_dr[val_dr<=0.]=1.0    ### Avoiding division by zero for zero bins

        sq_diff=(dict_val['hist_val']-dict_sample['hist_val'])**2

        size=len(dict_val['hist_val'])
        l1,l2=int(size*0.3),int(size*0.7)
        keys=['chi_1a','chi_1b','chi_1c','chi_1']
        
        for (key,start,end) in zip(keys,[0,l1,l2,0],[l1,l2,None,None]):  # 4 lists : small, medium, large pixel values and full 
            chisqr_dict.update({key:np.sum(np.divide(sq_diff[start:end],val_dr[start:end]))})

        idx=None  # Choosing the number of histograms to use. Eg : -5 to skip last 5 bins
    #     chisqr_dict.update({'chi_sqr1':})

        chisqr_dict.update({'chi_2':np.sum(np.divide(sq_diff[:idx],1.0))}) ## chi-sqr without denominator division
        chisqr_dict.update({'chi_imgvar':np.sum(dict_sample['hist_err'][:idx])/np.sum(dict_val['hist_err'][:idx])}) ## measures total spread in histograms wrt to input data

        idx=64
        spec_diff=(dict_val['spec_val']-dict_sample['spec_val'])**2
        ### computing the spectral loss chi-square
        chisqr_dict.update({'chi_spec1':np.sum(spec_diff[:idx]/dict_sample['spec_val'][:idx]**2)})

        ### computing the spectral loss chi-square
        chisqr_dict.update({'chi_spec2':np.sum(spec_diff[:idx]/dict_sample['spec_sdev'][:idx]**2)})
        
        spec_loss=1.0*np.log(np.mean((dict_val['spec_val'][:idx]-dict_sample['spec_val'][:idx])**2))+1.0*np.log(np.mean((dict_val['spec_sdev'][:idx]-dict_sample['spec_sdev'][:idx])**2))
        chisqr_dict.update({'chi_spec3':spec_loss})
    
    except Exception as e: 
        print(e)
        
        keys=['chi_1a','chi_1b','chi_1c','chi_1','chi_2','chi_imgvar','chi_spec1','chi_spec2']
        chisqr_dict=dict.fromkeys(keys,np.nan)
        pass
    
    return chisqr_dict
    
    
def f_get_computed_dict(fname,img_type,bins,dict_val):
    '''
    '''
    
    ### Get images from file
    images=f_get_images(fname,img_type)    
    ### Compute number of images with high pixel values
    high_pixel=f_high_pixel(images,cutoff=0.9898) # pixels over 780
    very_high_pixel=f_high_pixel(images,cutoff=0.9973) # pixels over 3000
    ### Compute spectrum and histograms
    dict_sample=f_compute_hist_spect(images,bins) ## list of 5 numpy arrays 
    ### Compute chi squares
    dict_chisqrs=f_compute_chisqr(dict_val,dict_sample)
    
    dict1={}
    dict1.update(dict_chisqrs)
    dict1.update({'num_imgs':images.shape[0],'num_large':high_pixel,'num_vlarge':very_high_pixel})
    dict1.update(dict_sample)
    
    return dict1




In [26]:
## Extract image data
# args=parse_args()

args=argparse.Namespace()
args.folder='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_from_other_code/pytorch/results/128sq/20201202_094018_cgan_model1/'
args.bins_type='unenven'
args.cores=1
args.val_data='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/raw_data/128_square/dataset_5_4univ_cgan/'

print(args)
fldr_name=args.folder

main_dir=fldr_name
if main_dir.endswith('/'): main_dir=main_dir[:-1]

assert os.path.exists(main_dir), "Directory doesn't exist"
print("Analyzing data in",main_dir)
num_cores=args.cores

## Define bin-edges for histogram
if args.bins_type=='uneven':
    bins=np.concatenate([np.array([-0.5]),np.arange(0.5,20.5,1),np.arange(20.5,100.5,5),np.arange(100.5,1000.5,50),np.array([2000])])
else : bins=np.arange(0,1510,10)
print("Bins",bins)
transform=False ## Images are in transformed space (-1,1), convert bins to the same space

if not transform: bins=f_transform(bins)   ### scale to (-1,1) 
print(bins)

Namespace(bins_type='unenven', cores=1, folder='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_from_other_code/pytorch/results/128sq/20201202_094018_cgan_model1/', val_data='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/raw_data/128_square/dataset_5_4univ_cgan/')
Analyzing data in /global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_from_other_code/pytorch/results/128sq/20201202_094018_cgan_model1
Bins [   0   10   20   30   40   50   60   70   80   90  100  110  120  130
  140  150  160  170  180  190  200  210  220  230  240  250  260  270
  280  290  300  310  320  330  340  350  360  370  380  390  400  410
  420  430  440  450  460  470  480  490  500  510  520  530  540  550
  560  570  580  590  600  610  620  630  640  650  660  670  680  690
  700  710  720  730  740  750  760  770  780  790  800  810  820  830
  840  850  860  870  880  890  900  910  920  930  940  950  960  970
  980  990 1000 1010 1020 1030 1040 1050 1060 1070 1080 1090 1100 1110
 1120 1130 1140 1150 

In [14]:
label_list[-2:]

[2, 3]

In [28]:

sigma_list=[0.5,0.65,0.8,1.1];label_list=[0,1,2,3];
for count,(sigma,label) in enumerate(zip(sigma_list,label_list)):

    ### Extract validation data
    fname=args.val_data+'norm_1_sig_%s_train_val.npy'%(sigma)
    print("Using validation data from ",fname)
    s_val=np.load(fname,mmap_mode='r')[:8000][:,0,:,:]
    print(s_val.shape)

    ### Get dataframe with file names, sorted by epoch and step
    df_files=f_get_sorted_df(main_dir,label).head(20)
    ### Compute 
    t1=time.time()

    ### Compute histogram and spectrum of raw data 
    dict_val=f_compute_hist_spect(s_val,bins)

    ### Parallel CPU test
#   ##Using pandarallel : https://stackoverflow.com/questions/26784164/pandas-multiprocessing-apply

    df=df_files.copy()
    pandarallel.initialize(progress_bar=True)
    # pandarallel.initialize(nb_workers=num_cores,progress_bar=True)

    t2=time.time()
    dict1=df.parallel_apply(lambda row: f_get_computed_dict(fname=row.fname,img_type='train_gen',bins=bins,dict_val=dict_val),axis=1)
    keys=dict1[0].keys()
    ### Convert list of dicts to dict of lists
    dict_list={key:[k[key] for k in dict1] for key in keys}
    ### Add columns to Dataframe
    for key in dict_list.keys():
        df[key]=dict_list[key]

    t3=time.time()
    print("Time ",t3-t2)
    display(df.head(5))

    ### Save to file
#     fname='/df_processed_{0}.pkle'.format(label)
#     df.to_pickle(main_dir+fname)
#     print("Saved file at ",main_dir+fname)



Using validation data from  /global/cfs/cdirs/m3363/vayyar/cosmogan_data/raw_data/128_square/dataset_5_4univ_cgan/norm_1_sig_0.5_train_val.npy
(8000, 128, 128)
Number of files 828
time for sorting 0.04817366600036621


  hist_arr=np.array([np.histogram(arr.flatten(), bins=bins, range=(llim,ulim), density=norm) for arr in img_arr]) ## range is important


INFO: Pandarallel will run on 64 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1), Label(value='0 / 1'))), HBox(c…

Time  1.3961498737335205


Unnamed: 0,epoch,step,img_type,fname,chi_1a,chi_1b,chi_1c,chi_1,chi_2,chi_imgvar,...,chi_spec2,chi_spec3,num_imgs,num_large,num_vlarge,hist_val,hist_err,hist_bin_centers,spec_val,spec_sdev
0,0,50,train,/global/cfs/cdirs/m3363/vayyar/cosmogan_data/r...,0.065435,0.0,0.0,0.065435,0.000565,0.01139081,...,157717.752598,33.569193,64,0,0,"[0.6999919896357361, 4.806518548107147e-05, 0....","[2.4754233096466883e-06, 1.4852539826945609e-0...","[-0.2857142862244898, 0.5476190467616213, 0.71...","[89576.60516897636, 28697.78469135861, 18538.5...","[10430.73215551764, 3293.4063665626263, 2231.3..."
1,0,100,train,/global/cfs/cdirs/m3363/vayyar/cosmogan_data/r...,0.065531,0.0,0.0,0.065531,0.000567,3.649114e-14,...,541260.333944,35.185946,64,0,0,"[0.7000000004999996, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[5.551115123125783e-17, 0.0, 0.0, 0.0, 0.0, 0....","[-0.2857142862244898, 0.5476190467616213, 0.71...","[14228.27129673725, 6107.935841116228, 5419.86...","[1261.380232430351, 572.8288998070738, 493.789..."
2,0,150,train,/global/cfs/cdirs/m3363/vayyar/cosmogan_data/r...,0.050843,0.0,0.0,0.050843,0.0003,1.805601,...,250922.396624,36.07399,64,0,0,"[0.6951040272909362, 0.028871154745630262, 0.0...","[0.00035479015429220294, 0.002027733941240742,...","[-0.2857142862244898, 0.5476190467616213, 0.71...","[86643.52490022779, 29253.6656764917, 17356.64...","[14425.988648297976, 4829.303051450552, 3241.9..."
3,0,200,train,/global/cfs/cdirs/m3363/vayyar/cosmogan_data/r...,0.06417,0.0,0.0,0.06417,0.000541,0.1505331,...,54573.205041,35.576153,64,0,0,"[0.6998845105402005, 0.0006929397573521136, 0....","[3.2713477032414136e-05, 0.0001962808617855665...","[-0.2857142862244898, 0.5476190467616213, 0.71...","[175141.0837815106, 38197.53267521793, 16679.7...","[51321.69928327323, 11062.825673203688, 4661.1..."
4,0,250,train,/global/cfs/cdirs/m3363/vayyar/cosmogan_data/r...,0.064753,0.0,0.0,0.064753,0.000552,0.08578933,...,7634.955365,35.394299,64,0,0,"[0.6999345784418476, 0.000392532348095417, 0.0...","[1.8643524941353936e-05, 0.0001118611494150843...","[-0.2857142862244898, 0.5476190467616213, 0.71...","[127203.37602999806, 66659.70887230635, 27372....","[62403.61536831027, 30434.482410179535, 12230...."


Using validation data from  /global/cfs/cdirs/m3363/vayyar/cosmogan_data/raw_data/128_square/dataset_5_4univ_cgan/norm_1_sig_0.65_train_val.npy
(8000, 128, 128)
Number of files 828
time for sorting 0.03362679481506348


  hist_arr=np.array([np.histogram(arr.flatten(), bins=bins, range=(llim,ulim), density=norm) for arr in img_arr]) ## range is important


INFO: Pandarallel will run on 64 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1), Label(value='0 / 1'))), HBox(c…

Time  1.2640659809112549


Unnamed: 0,epoch,step,img_type,fname,chi_1a,chi_1b,chi_1c,chi_1,chi_2,chi_imgvar,...,chi_spec2,chi_spec3,num_imgs,num_large,num_vlarge,hist_val,hist_err,hist_bin_centers,spec_val,spec_sdev
0,0,50,train,/global/cfs/cdirs/m3363/vayyar/cosmogan_data/r...,0.133525,0.000918,0.0,0.134444,0.001319,0.004067792,...,203126.226308,34.522563,64,0,0,"[0.6999926572077582, 4.4059753357648846e-05, 0...","[2.2250662195300864e-06, 1.3350397289378043e-0...","[-0.2857142862244898, 0.5476190467616213, 0.71...","[90927.18164037727, 28088.132446299234, 17881....","[10912.663570750283, 3398.103195346871, 2326.5..."
1,0,100,train,/global/cfs/cdirs/m3363/vayyar/cosmogan_data/r...,0.133613,0.000918,0.0,0.134532,0.001322,1.449766e-14,...,681793.619055,36.117919,64,0,0,"[0.7000000004999996, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[5.551115123125783e-17, 0.0, 0.0, 0.0, 0.0, 0....","[-0.2857142862244898, 0.5476190467616213, 0.71...","[13962.110983791994, 6065.300878071599, 5388.5...","[1258.208252760973, 576.6017511514799, 516.768..."
2,0,150,train,/global/cfs/cdirs/m3363/vayyar/cosmogan_data/r...,0.108312,0.000918,0.0,0.10923,0.000706,0.7186021,...,318639.311726,36.51668,64,0,0,"[0.6954318051537196, 0.02699260707974506, 0.00...","[0.00035961170394414833, 0.0020694923150555646...","[-0.2857142862244898, 0.5476190467616213, 0.71...","[88654.65215328336, 28741.318625770607, 16414....","[15428.509775547884, 5034.378222275879, 3294.0..."
3,0,200,train,/global/cfs/cdirs/m3363/vayyar/cosmogan_data/r...,0.132818,0.000918,0.0,0.133736,0.001302,0.03664856,...,58484.427081,34.313187,64,0,0,"[0.6999332432978037, 0.0004005432123422622, 0....","[2.0046622053670727e-05, 0.0001202797320714430...","[-0.2857142862244898, 0.5476190467616213, 0.71...","[139350.46328428015, 33598.826201422526, 16267...","[44026.320714662616, 10434.960123177983, 5000...."
4,0,250,train,/global/cfs/cdirs/m3363/vayyar/cosmogan_data/r...,0.132441,0.000918,0.0,0.133359,0.001293,0.05102154,...,8094.482789,34.817323,64,0,0,"[0.6999011998407494, 0.000592803954266548, 0.0...","[2.7908582729588156e-05, 0.0001674514960286730...","[-0.2857142862244898, 0.5476190467616213, 0.71...","[124621.12496832013, 66683.65831346512, 32816....","[65310.79498408747, 33979.44834678707, 17257.2..."


Using validation data from  /global/cfs/cdirs/m3363/vayyar/cosmogan_data/raw_data/128_square/dataset_5_4univ_cgan/norm_1_sig_0.8_train_val.npy
(8000, 128, 128)
Number of files 828
time for sorting 0.030637502670288086


  hist_arr=np.array([np.histogram(arr.flatten(), bins=bins, range=(llim,ulim), density=norm) for arr in img_arr]) ## range is important


INFO: Pandarallel will run on 64 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1), Label(value='0 / 1'))), HBox(c…

Time  1.0534305572509766


Unnamed: 0,epoch,step,img_type,fname,chi_1a,chi_1b,chi_1c,chi_1,chi_2,chi_imgvar,...,chi_spec2,chi_spec3,num_imgs,num_large,num_vlarge,hist_val,hist_err,hist_bin_centers,spec_val,spec_sdev
0,0,50,train,/global/cfs/cdirs/m3363/vayyar/cosmogan_data/r...,0.208848,0.004969,0.0,0.213818,0.00221,0.00273188,...,219038.518494,35.237302,64,0,0,"[0.6999906544916922, 5.6076049727916714e-05, 0...","[2.7474056128106594e-06, 1.6484433642528343e-0...","[-0.2857142862244898, 0.5476190467616213, 0.71...","[91724.5262658149, 28978.384943462344, 18891.6...","[10242.236324017562, 3241.260578371686, 2278.3..."
1,0,100,train,/global/cfs/cdirs/m3363/vayyar/cosmogan_data/r...,0.208961,0.004969,0.0,0.21393,0.002213,7.885352e-15,...,789465.49442,36.761672,64,0,0,"[0.7000000004999996, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[5.551115123125783e-17, 0.0, 0.0, 0.0, 0.0, 0....","[-0.2857142862244898, 0.5476190467616213, 0.71...","[14458.456927964464, 6318.466407467797, 5574.5...","[1168.917050545498, 553.3920217459556, 508.686..."
2,0,150,train,/global/cfs/cdirs/m3363/vayyar/cosmogan_data/r...,0.180779,0.004969,0.0,0.185749,0.00145,0.3956416,...,327908.033498,37.032138,64,0,0,"[0.6945519452287748, 0.032051467851627824, 0.0...","[0.0003549072958479182, 0.0020092101042914847,...","[-0.2857142862244898, 0.5476190467616213, 0.71...","[90114.56414166838, 30474.96209737472, 18255.2...","[14284.5012681425, 4753.891699396369, 3214.066..."
3,0,200,train,/global/cfs/cdirs/m3363/vayyar/cosmogan_data/r...,0.205398,0.004969,0.0,0.210368,0.002115,0.06653164,...,66786.082795,35.182654,64,0,0,"[0.6996929173698978, 0.0018424987767744062, 0....","[6.690973503579329e-05, 0.00040145840937838204...","[-0.2857142862244898, 0.5476190467616213, 0.71...","[197308.06338629872, 43316.24796358541, 20460....","[48874.49729812784, 10970.031384165928, 5114.2..."
4,0,250,train,/global/cfs/cdirs/m3363/vayyar/cosmogan_data/r...,0.204565,0.004969,0.0,0.209535,0.002093,0.08337473,...,7372.854023,36.722055,64,0,0,"[0.69962215473557, 0.002251052853363513, 3.890...","[8.128066324357122e-05, 0.0004823334893526516,...","[-0.2857142862244898, 0.5476190467616213, 0.71...","[197642.3197889924, 89556.02324528691, 46561.5...","[88378.96441276264, 41009.75049888433, 22665.3..."


Using validation data from  /global/cfs/cdirs/m3363/vayyar/cosmogan_data/raw_data/128_square/dataset_5_4univ_cgan/norm_1_sig_1.1_train_val.npy
(8000, 128, 128)
Number of files 828
time for sorting 0.026781082153320312


  hist_arr=np.array([np.histogram(arr.flatten(), bins=bins, range=(llim,ulim), density=norm) for arr in img_arr]) ## range is important


INFO: Pandarallel will run on 64 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1), Label(value='0 / 1'))), HBox(c…

Time  1.197822093963623


Unnamed: 0,epoch,step,img_type,fname,chi_1a,chi_1b,chi_1c,chi_1,chi_2,chi_imgvar,...,chi_spec2,chi_spec3,num_imgs,num_large,num_vlarge,hist_val,hist_err,hist_bin_centers,spec_val,spec_sdev
0,0,50,train,/global/cfs/cdirs/m3363/vayyar/cosmogan_data/r...,0.355493,0.028677,0.000115,0.384285,0.004333,0.00171599,...,231240.228641,36.463829,64,0,0,"[0.6999899869196703, 6.0081481851339336e-05, 0...","[3.499782536826079e-06, 2.0998695177213308e-05...","[-0.2857142862244898, 0.5476190467616213, 0.71...","[91757.82961613405, 28397.035377204353, 18351....","[10587.039695948022, 3314.2120789665264, 2203...."
1,0,100,train,/global/cfs/cdirs/m3363/vayyar/cosmogan_data/r...,0.355613,0.028677,0.000115,0.384405,0.004336,3.888264e-15,...,776578.773233,37.809946,64,0,0,"[0.7000000004999996, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[5.551115123125783e-17, 0.0, 0.0, 0.0, 0.0, 0....","[-0.2857142862244898, 0.5476190467616213, 0.71...","[13741.638382116798, 6066.483937240207, 5390.8...","[1236.1010778367004, 554.982202881101, 493.461..."
2,0,150,train,/global/cfs/cdirs/m3363/vayyar/cosmogan_data/r...,0.325007,0.028677,0.000115,0.3538,0.00344,0.1919933,...,329391.658064,37.773245,64,0,0,"[0.6950439458089599, 0.029215621908244612, 0.0...","[0.000353786719683728, 0.0020194765042881786, ...","[-0.2857142862244898, 0.5476190467616213, 0.71...","[90831.41109382361, 28490.942314088352, 16767....","[14699.914619637682, 4911.776636177021, 3245.5..."
3,0,200,train,/global/cfs/cdirs/m3363/vayyar/cosmogan_data/r...,0.354626,0.028677,0.000115,0.383419,0.004307,0.01173681,...,62181.009942,35.472687,64,0,0,"[0.6999172215692765, 0.0004966735833044051, 0....","[2.3937367839693666e-05, 0.0001436242067389440...","[-0.2857142862244898, 0.5476190467616213, 0.71...","[164996.02975438535, 39737.268643446456, 16087...","[48668.30956846613, 11601.681345969753, 4672.5..."
4,0,250,train,/global/cfs/cdirs/m3363/vayyar/cosmogan_data/r...,0.354887,0.028677,0.000115,0.38368,0.004315,0.007961396,...,10230.435035,33.558721,64,0,0,"[0.6999392514460011, 0.00036449432323145856, 0...","[1.6237368644476938e-05, 9.742421166390054e-05...","[-0.2857142862244898, 0.5476190467616213, 0.71...","[123238.64544549584, 66804.17211416065, 26641....","[55433.65600520117, 29988.370687470255, 11295...."


In [19]:
def f_batch_histogram(img_arr,bins,norm,hist_range):
    ''' Compute histogram statistics for a batch of images'''

    ## Extracting the range. This is important to ensure that the different histograms are compared correctly
    if hist_range==None : ulim,llim=np.max(img_arr),np.min(img_arr)
    else: ulim,llim=hist_range[1],hist_range[0]
#         print(ulim,llim)
    ### array of histogram of each image
    hist_arr=np.array([np.histogram(arr.flatten(), bins=bins, range=(llim,ulim), density=norm) for arr in img_arr]) ## range is important
    hist=np.stack(hist_arr[:,0]) # First element is histogram array
#         print(hist.shape)
    bin_list=np.stack(hist_arr[:,1]) # Second element is bin value 
    ### Compute statistics over histograms of individual images
    mean,err=np.mean(hist,axis=0),np.std(hist,axis=0)/np.sqrt(hist.shape[0])
    bin_edges=bin_list[0]
    centers = (bin_edges[:-1] + bin_edges[1:]) / 2

    return mean,err,centers



In [21]:
fname='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_from_other_code/pytorch/results/128sq/20201202_094018_cgan_model1/images/gen_img_label-0_epoch-8_step-35650.npy'
a1=np.load(fname)
print(a1.shape)

(64, 128, 128)
