# Extract data from output files
### Analyze the output from a single LBANN run
March 9, 2020

April 6, 2020 : Major edit to store files in order of epochs 

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import subprocess as sp
import os
import glob
import sys

import time
from scipy import fftpack

In [2]:
%matplotlib widget

In [3]:
sys.path.append('/global/u1/v/vpa/project/jpt_notebooks/Cosmology/Cosmo_GAN/LBANN/lbann_cosmogan/3_analysis/')
from modules_image_analysis import *

[NbConvertApp] Converting notebook modules_image_analysis.ipynb to script
[NbConvertApp] Writing 11079 bytes to modules_image_analysis.py


In [4]:
### Transformation functions for image pixel values
def f_transform(x):
    return 2.*x/(x + 4. + 1e-5) - 1.

def f_invtransform(s):
    return 4.*(1. + s)/(1. - s + 1e-5)

In [5]:

def f_get_samples(df,key):
    '''
    Extract array of samples from the DataFrame with images
    Images are of two types:
    1. *_gen have shape (64,1,128,128)
    2. *_input have shape (64,16384)
    '''
    
    keys=['train_gen','train_input','val_gen','val_input']
    assert key in keys,"Given key %s is not the the list of keys %s"%(key,keys)
    
    lst=df[df.type==key]['image'].values
    
    if key.endswith('input'):
        size=np.int(np.sqrt(lst[0].shape[-1])) ### Extract size of images (=128)
        samples=np.array([ii[0,:].reshape(size,size) for ii in lst])
    else : 
        samples=np.array([ii[0,0,:,:] for ii in lst])
    
    return samples

## Extract image data 

In [17]:
fldr_name='20200316_112134_exagan'
fldr_name='20200406_080207_exagan_with_mcr'
fldr_name='20200407_093719_exagan_no_mcr'

fldr_name='20200409_084926_exagan_no_mcr'
fldr_name='20200409_083646_exagan_with_mcr'
fldr_name='20200413_095840_exagan'
# fldr_name='20200415_093035_exagan'

### Code for set of runs
# f_list=['20200401_125919_exagan_0.1_1','20200401_130321_exagan_0.1_4',
#         '20200401_130907_exagan_0.3_1','20200401_130646_exagan_0.3_4']
# fldr_name=f_list[0]


main_dir='/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_data/{0}/dump_outs/'.format(fldr_name)
print(main_dir)


/global/cfs/cdirs/m3363/vayyar/cosmogan_data/results_data/20200413_095840_exagan/dump_outs/


In [18]:

## Get images files and .npy arrays for each image in dump_outs folder
files_dict={}
keys=['train_gen','train_input','val_gen','val_input']
file_strg_lst=['model0-training*-gen_img*-output0.npy','model0-training*-inp_img*-output0.npy','model0-validation*-gen_img*-output0.npy','model0-validation*-inp_img*-output0.npy']
for key,file_strg in zip(keys,file_strg_lst):
    files_dict[key]=np.array(glob.glob(main_dir+file_strg))
    if files_dict[key].shape[0]>1000 : 
        print('Warning the number of files is very large. Possibility of memory overload')

df_files=pd.DataFrame([])
dict1={}
t1=time.time()
### First get sorted Dataframe with file names
for key in keys: 
    files_arr=files_dict[key]  # Get array of files
    print(key,len(files_arr))
    for fname in files_arr:
        ### Extract the Epoch number and step number from the file name
        dict1['type']=key
        dict1['epoch']=np.int32(fname.split('epoch')[-1].split('-')[0])
        dict1['step']=np.int64(fname.split('step')[-1].split('-')[0])
        dict1['fname']=fname
        
        df_files=df_files.append(dict1,ignore_index=True)
## Sort values
df_files=df_files.sort_values(by=['type','epoch','step']).reset_index(drop=True)
# df_files
print("Sorting done")

t2=time.time()
### Then read images one by one into a numpy array and create a new DataFrame
sorted_fnames=df_files.fname.values
### Read images one by one. This is time-consuming.
### Deliberately kept as list because some of the input arrays have different dimensions, causing creation of array of arrays in some cases
images=[np.load(fname) for fname in sorted_fnames]  

##### Create new Dataframe with sorted images
df_full=pd.DataFrame([])
df_full['image']=images
t3=time.time()
for col in ['epoch','step','type','fname']: df_full[col]=df_files[col].values
print("Extraction done")

print("Time for Sorting",t2-t1)
print("Time for Reading images",t3-t2)

df=df_full.copy()
print(df.shape)


train_gen 1095
train_input 1095
val_gen 121
val_input 121
Sorting done
Extraction done
Time for Sorting 8.469172954559326
Time for Reading images 99.09673547744751
(2432, 5)


In [19]:
## Slice DataFrame before getting samples. Get 1 images per epochs (choose the last step)

def f_filter_epoch(df_input):
    '''
    Get just the last stored step image for each epoch
    '''
    df_output=pd.DataFrame([])
    for key in ['train_gen','train_input','val_gen','val_input']: 
        ### For each type of images, get list of epochs
        df1=df_input[df_input.type==key]
        epochs=np.unique(df1.epoch.values).astype(int)
        for epoch in epochs:### Extract the last step in each epoch
            df2=df1[df1.epoch==epoch]
            df_output=df_output.append(df2.iloc[-1])  
    
    return df_output.reset_index(drop=True)

df=f_filter_epoch(df_full)
df.shape

(242, 5)

## Extract samples 

In [20]:
### Available options : keys=['train_gen','train_input','val_gen','val_input']
samples1=f_get_samples(df,'train_input')
print(samples1.shape)
samples2=f_get_samples(df,'val_gen')
print(samples2.shape)

samples3=f_get_samples(df,'train_gen')
print(samples3.shape)
samples4=f_get_samples(df,'val_input')
print(samples4.shape)

(61, 128, 128)
(60, 128, 128)
(61, 128, 128)
(60, 128, 128)


## Find the region without very high pixel values


In [22]:
def f_plot_max_values(samples,cutoff=0.994):
    '''
    Make a plot of max values of images of a given set of sample images
    cutoff used to discard high values
    '''
    ### Get max pixel values of images
    max_values=np.array([np.max(i) for i in samples])
    ### Less than cutoff
    lesser_idx=np.where(max_values<cutoff)[0]
    higher_idx=np.where(max_values>=cutoff)[0]
    
    plt.figure()
    plt.plot(lesser_idx,max_values[lesser_idx],linestyle='',marker='*',color='r')
    plt.plot(higher_idx,max_values[higher_idx],linestyle='',marker='D',color='b')

    plt.axhline(y=cutoff,linestyle='--',color='k')
    plt.ylim(0.9,1.0)
    
f_plot_max_values(samples2,0.9945)
# f_plot_max_values(samples4,0.992)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Compare images

In [23]:
f_pixel_intensity(samples4,normalize=False)
f_pixel_intensity(samples2,normalize=False)

f_compare_pixel_intensity(samples4,samples2,label1='input',label2='generated',normalize=True)
# plt.savefig('comparison_intensity.png')


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

  


inf

In [28]:
start,end=20,32
# start,end=17,24

f_compare_pixel_intensity(samples4[start:end],samples2[start:end],label1='input',label2='generated',normalize=True)

  plt.xlabel('Pixel value')


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

  


inf

In [25]:
# start,end=23,52

In [29]:
f_pixel_intensity(f_invtransform(samples1),normalize=False)
# f_pixel_intensity(f_invtransform(samples2),normalize=True)
# f_pixel_intensity(f_invtransform(samples2[:30]),normalize=False)
f_pixel_intensity(f_invtransform(samples2[start:end]),bins=100,normalize=False)
# f_pixel_intensity(f_invtransform(samples2[end:]),normalize=False)


  row,col=int(i/cols),i%cols


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

  row,col=int(i/cols),i%cols


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

array([194707,   1116,    304,    147,     98,     51,     31,     28,
           24,     13,     11,      8,     10,      5,      5,      2,
            3,      5,      3,      0,      6,      3,      3,      2,
            2,      2,      0,      2,      1,      1,      0,      0,
            1,      0,      0,      1,      0,      2,      0,      0,
            1,      0,      0,      1,      1,      0,      1,      0,
            0,      0,      1,      0,      1,      0,      0,      1,
            0,      0,      0,      0,      0,      0,      0,      1,
            0,      0,      0,      0,      0,      0,      0,      0,
            0,      0,      0,      0,      1,      0,      0,      0,
            0,      0,      0,      0,      0,      0,      1,      0,
            0,      0,      0,      0,      0,      0,      0,      0,
            0,      0,      0,      1])

In [30]:
f_compare_pixel_intensity(f_invtransform(samples1),f_invtransform(samples2[start:end]),label1='input',label2='generated',bins=50,normalize=True)


  plt.xlabel('Pixel value')


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

  
  


nan

In [31]:
f_compare_pixel_intensity(f_invtransform(samples1[start:end]),f_invtransform(samples2[end:]),label1='input',label2='generated',normalize=False)


  plt.xlabel('Pixel value')


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

  
  


nan

### Plot grid of intensity histograms

In [None]:

# f_plot_intensity_grid(samples2[40:80][::5],cols=6)
f_plot_intensity_grid(f_invtransform(samples2[22:52][::3]),cols=6)

### Spectrum

In [None]:
# f_compute_spectrum(samples1)
# f_compute_spectrum(f_invtransform(samples2[51:80]))

In [32]:
# start,end=22,52
# start,end=23,33

f_compare_spectrum(samples4[start:end],samples2[start:end],label1='input',label2='generated')
f_compare_spectrum(f_invtransform(samples4[start:end]),f_invtransform(samples2[start:end]),label1='input',label2='generated')

(12, 128, 128) (12, 128, 128) (12, 88) (12, 88)


  def f_compute_spectrum(img_arr):


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

(12, 128, 128) (12, 128, 128) (12, 88) (12, 88)


  def f_compute_spectrum(img_arr):


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

90333129.54537441

In [None]:
start,end=33,None
f_compare_spectrum(samples4[start:end],samples2[start:end],label1='input',label2='generated')
f_compare_spectrum(f_invtransform(samples4[start:end]),f_invtransform(samples2[start:end]),label1='input',label2='generated')