# Code to normalize the images
April 7, 2020



In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import subprocess as sp
import os
import sys
import time

In [2]:
%matplotlib widget

In [3]:
sys.path.append('/global/u1/v/vpa/project/jpt_notebooks/Cosmology/Cosmo_GAN/LBANN/lbann_cosmogan/3_analysis/')
from modules_image_analysis import *

[NbConvertApp] Converting notebook modules_image_analysis.ipynb to script
[NbConvertApp] Writing 10903 bytes to modules_image_analysis.py


### Implementing MAD: Median Absolute Deviation
https://en.wikipedia.org/wiki/Median_absolute_deviation

$ MAD=Median \left(X-\tilde{X} \right) \ \ $   where $ \tilde{X}$ is the median of array

$ \sigma = k . MAD $

## Implement normalization with actual data

Procedure for normalization : 

For each sample (3 image types)
- Computed sigma using MAD method on diff image
- Divide entire sample by that value
- Some images give zero sigma, ignore normalization for these


In [6]:
save_location='/global/project/projectdirs/dasrepo/vpa/supernova_cnn/data/gathered_data/input_npy_files/'
f1='full_x.npy'
f2='renorm_full_x.npy'

### Read data from .npy file
ip_arr=np.load(save_location+f1)
print(ip_arr.shape)

def f_rescale_samples(samples):
    ''' Rescale individual images with MAD value of diff image
    '''
    def f_mad(arr):
        '''
        Compute MAD and std
        '''
        arr2=arr.flatten()
        MD=np.median(arr2)
    #     print(MD)
        mad=np.median(np.abs(arr2-MD))
        k=1.4826 ### For normal distribution
        sigma=mad*k

        return mad,sigma
    
    
    scaled_samples=np.ones_like(samples)
    lst_zeros=[] # List to store indices where the MAD value is zero
    for i,row in enumerate(samples):
        scale=f_mad(row[:,:,2])[1]
        if scale<1e-10: 
            print("Small value",i,scale)
#             print(i,row.shape,f_mad(row[:,:,0]),f_mad(row[:,:,1]),f_mad(row[:,:,2]))
            lst_zeros.append(i)
            scale=1.0
        scaled_samples[i]=row*(1.0/scale)
    
    ### For every row, compute the MAD value for diff image (idx =2 ) and multiple its inverse to each sample
#     scaled_samples=np.array([(1.0/f_mad(i[:,:,2])[1]+1e-6)*i for i in samples])
    
    return scaled_samples,lst_zeros

t1=time.time()
rescaled_arr,zero_lst=f_rescale_samples(ip_arr[:10000])
t2=time.time()
print(t2-t1)
print('Number of zero median images',len(zero_lst))
print(rescaled_arr.shape)
# np.save(save_location+f2,rescaled_arr)

(898963, 51, 51, 3)
Small value 1808 0.0
Small value 1856 0.0
Small value 3176 0.0
Small value 5066 0.0
1.830139398574829
Number of zero median images 4
(10000, 51, 51, 3)


### Analyzing the zero NMAD valued points
There are a few 761 (0.08%) images for which the diff images have a 0 NMAD value. These reason is because these images have most pixels with the same value.

In [7]:
zero_lst
# zero_imgs=ip_arr[zero_lst][:,:,:,2]
# print(zero_imgs.shape)

[1808, 1856, 3176, 5066]

In [8]:
img_arr=np.array([ip_arr[idx][:,:,2] for idx in [1806,1807,1808,1809]])
# img_arr=np.array([ip_arr[idx][:,:,2] for idx in [10168,10167,10169,10170]])
# img_arr=np.array([ip_arr[idx][:,:,2] for idx in zero_lst[-13:-1]])

f_plot_grid(img_arr,cols=4)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [9]:
f_plot_intensity_grid(img_arr,cols=2)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …