# Code to view image details
April 23, 2020



In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import subprocess as sp
import os
import sys
import time

In [2]:
%matplotlib widget

In [3]:
sys.path.append('/global/u1/v/vpa/project/jpt_notebooks/Cosmology/Cosmo_GAN/LBANN/lbann_cosmogan/3_analysis/')
from modules_image_analysis import *

[NbConvertApp] Converting notebook modules_image_analysis.ipynb to script
[NbConvertApp] Writing 13913 bytes to modules_image_analysis.py


In [4]:
data_dir='/global/project/projectdirs/dasrepo/vpa/supernova_cnn/data/gathered_data/'

In [5]:
fname1=data_dir+'summary_label_files.csv'
df1=pd.read_csv(fname1,sep=',',comment='#')
print(df1.shape)

(2696889, 4)


In [6]:
num_sig,num_bkgnd=df1[df1.Label==1].shape[0],df1[df1.Label==0].shape[0]
print("Proportion of Signal-Background: {0}-{1}.\nProportion of Signal: {2}".format(num_sig,num_bkgnd,num_sig*1.0/(num_sig+num_bkgnd)))

Proportion of Signal-Background: 1334613-1362276.
Proportion of Signal: 0.49487131283489977


### Extract a slice of data

In [7]:
### Extracting 30 images of signal and bkgnd
size= 3000
df_sig=df1[df1.Label==1].head(size)
df_bkg=df1[df1.Label==0].head(size)

del(df1)

In [8]:
#df_sig
# df_bkg

## Extract image arrays

In [9]:
def f_get_image_arr(df,mode='type',ftype='diff',idx=5):
    '''
    Module to get image arrays from dataframe with filenames
    Input: Dataframe, mode
    2 modes: 
    'type': Gives all the images for the same type of files,
    'index': Gives all 3 types for the same index
    'ftype': type of files to extract : srch, temp, diff
    'idx': index number of ID array from which to extract
    '''
    
    if mode=='type': ### Pick all images of type=ftype
        df2=df[df.filename.str.contains(ftype)].reset_index(drop=True)
        ### Read .gif files and store them in an array
        imgs=[plt.imread(fle) for fle in df2['file path']]
        
    elif mode=='index': ### Pick srch','temp','diff'
        index=np.unique(df_sig.ID.values)[idx]
        df2=df[df.ID==index].reset_index(drop=True)
        imgs=[plt.imread(fle) for fle in df2['file path']]
    
    df2.loc[:,'image']=imgs
    return df2
    
# df=f_get_image_arr(df_sig,mode='index',idx=0)
# df=f_get_image_arr(df_sig,mode='type',ftype='diff')


In [10]:
df=f_get_image_arr(df_sig,mode='type',ftype='temp')
img_arr=np.stack(df.image.values)
f_pixel_intensity(img_arr,normalize=False)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

(1000, 25)


(array([798.269, 368.98 , 172.588, 113.127,  87.747,  85.881,  75.259,
         75.061,  75.039,  75.158,  80.794,  69.594,  64.968,  59.656,
         52.888,  49.035,  36.766,  29.692,  23.503,  18.121,  15.131,
         10.784,   9.455,   7.583, 145.921]),
 array([28.92149447, 15.44668131,  8.17300974,  5.64064029,  3.90776343,
         3.51852566,  3.00426196,  2.86808216,  2.88649744,  2.98946501,
         3.24046934,  2.87520002,  2.77020306,  2.62313013,  2.402059  ,
         2.27022285,  1.73953075,  1.43718653,  1.14525455,  0.89853679,
         0.76295468,  0.66894345,  1.25331081,  1.14814856,  5.8262958 ]))

In [11]:

def f_compare_pixel_intensity_images(df_input,title,mode='normal'):
    '''
    Compare pixel intensity histogram of all 3 files.
    2 modes: 
        normal: takes all values and computes histogram
        averaged: takes histograms for each images and computes mean and error
    '''
    
    plt.figure()

    for ftype in['srch','temp','diff']:
        df=f_get_image_arr(df_input,mode='type',ftype=ftype)
        img_arr=np.stack(df.image.values)  ### Extract the image array samples
        
        norm=True
        if mode=='normal':
            hist, bin_edges = np.histogram(img_arr.flatten(), bins=25, density=norm)
            centers = (bin_edges[:-1] + bin_edges[1:]) / 2
            #     print(bin_edges,centers)
            plt.errorbar(centers, hist, fmt='o-', label=ftype)

        elif mode=='avg':
            hist_arr=np.array([np.histogram(arr.flatten(), bins=25, density=norm) for arr in img_arr])
            hist=np.stack(hist_arr[:,0])
            bins=np.stack(hist_arr[:,1])
            ### Compute statistics of histogram of each image
            mean,err=np.mean(hist,axis=0),np.std(hist,axis=0)/np.sqrt(hist.shape[0])
            bin_edges=bins[0]
            centers = (bin_edges[:-1] + bin_edges[1:]) / 2

            plt.errorbar(centers,mean,yerr=err,fmt='o-',label=ftype)
        
        
    plt.xlabel('Pixel value')
    plt.ylabel('Counts')
    plt.title('Pixel Intensity Histogram of '+title)
    plt.legend(loc='best')
    
    
f_compare_pixel_intensity_images(df_sig,'signal',mode='avg')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [12]:
f_compare_pixel_intensity_images(df_bkg,title='bkgnd',mode='avg')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

### Plot power spectrum

In [13]:
# f_compute_spectrum(img_arr)

In [14]:
# df=f_get_image_arr(df_sig,mode='index',idx=0)
# df

In [15]:
imgs=df.image.values