In [None]:
import numpy as np
import pywt
import matplotlib.pyplot as plt
import random
from scipy import stats
from scipy import fft
import random
import pywt.data
from PIL import Image
import pandas as pd
import seaborn as sns
import os
import pickle

In [None]:
# assign directory
ROOT_DIR = '/Users/brandonmarks/Desktop/Research Materials/hierarchical-bayesian-model-validation/'
data_dir = ROOT_DIR + 'Data/Panoptic Argiculture 2/Toy dataset - Near Infra Red'
 
# iterate over files in that directory
file_list = [os.path.join(data_dir, filename) for filename in os.listdir(data_dir)]
file_names = os.listdir(data_dir)


In [None]:
def getIndexDF(image):
    x_freqs = fft.fftfreq(image.shape[0])
    y_freqs = fft.fftfreq(image.shape[1])
    coord_df = pd.DataFrame()
    coord_df["index_coords"] = [(x,y) for x in np.arange(image.shape[0]) for y in np.arange(image.shape[0])]
    coord_df["x_index"] = [x for x in np.arange(image.shape[0]) for y in np.arange(image.shape[0])]
    coord_df["y_index"] = [y for x in np.arange(image.shape[0]) for y in np.arange(image.shape[0])]
    coord_df["x_freq"] = [x_freqs[x] for x in np.arange(image.shape[0]) for y in np.arange(image.shape[0])]
    coord_df["y_freq"] = [y_freqs[y] for x in np.arange(image.shape[0]) for y in np.arange(image.shape[0])]
    coord_df["magnitude"] = np.sqrt(coord_df["x_freq"] ** 2 + coord_df["y_freq"] **2)
    coord_df = coord_df.set_index(["index_coords"])
    coord_df = coord_df[(coord_df["x_freq"] >= 0 )& (coord_df["y_freq"] >= 0)]
    return coord_df

In [None]:
def convert_to_fourier_basis(folder_dir, k, grey_scale_only=False):
    file_list = [os.path.join(folder_dir, filename) for filename in os.listdir(folder_dir)]
    file_names = os.listdir(folder_dir)
    
    image = Image.open(file_list[0]).convert('L')
    image = np.array(image)
    coord_df = getIndexDF(image)
    max_mag = max(coord_df["magnitude"].to_numpy())
    cuttoffs = np.linspace(0, max_mag, k+1)
    cuttoffs[-1] += 0.0001
    

    df = pd.DataFrame(columns=["Image ID", "Band", "Channel", "Data"])
    
    coord_arr = []
    for j in range(k):
        filtered = coord_df[(coord_df["magnitude"] >= cuttoffs[j] )&  (coord_df["magnitude"] < cuttoffs[j+1])]
        x = filtered["x_index"].to_numpy()
        y = filtered["y_index"].to_numpy()
        coord_arr.append([tuple(x),tuple(y)])

    if grey_scale_only == True:
            colors_id = [3]
    else:
        colors_id = np.arange(4)
    colors = ["Red", "Green", "Blue", "Gray"]
    #Fill DF DICT


    for i in range(len(file_list)):
        name = file_names[i].split(".")[0]
        for c in colors_id:
            if c == 3:
                image = Image.open(file_list[k]).convert('L')
                
            else:
                image = np.array(Image.open(file_list[k]))[:,:,c]
        
            image = np.array(image)
            
            transformed = np.array(fft.fft2(image))
            for j in range(k):
                x_ind,y_ind = coord_arr[j]
                
                df.loc[len(df.index)] = [name, j+1,colors[c], transformed[y_ind, x_ind]]

        

        
    return df

In [None]:
converted = convert_to_fourier_basis(data_dir, 7, grey_scale_only=True)
converted

In [None]:
sum(test["Size"])

# ANALYSIS OF COMPLEX COMPONENTS

In [None]:
def ComplexStudy(convertedDF, band, bound = 1e7, bw = 0.02):
    complex_data = np.concatenate(convertedDF[convertedDF["Band"] == band]["Data"].to_numpy())
    real_data = np.real(complex_data)
    imag_data = np.imag(complex_data)


    n_r = len(real_data)
    n_c = len(imag_data)
    fig, axes = plt.subplots(1, 3, figsize=(24, 6))
    #axes[0].set_xlim(left = -.25*bound, right = .25*bound)
    #axes[1].set_xlim(left = -.25*bound, right = .25*bound)
    axes[1].set_ylim(bottom = 10**-6, top= 10)
    #axes[2].set_xlim(left = -.25*bound, right = .25*bound)
    sns.kdeplot(ax = axes[0], x = real_data, bw_method=bw, label = "Real")
    sns.kdeplot(ax = axes[0], x = imag_data,bw_method = bw, label = "Imaginary")
    sns.kdeplot(ax = axes[1], x = real_data, bw_method = bw, log_scale=[False, True], label = "Real")
    sns.kdeplot(ax = axes[1], x = imag_data, bw_method = bw, log_scale=[False, True], label = "Imaginary")
    axes[2].plot(np.sort(real_data), np.arange(1, n_r+1)/n_r, label='Real CDF')
    axes[2].plot(np.sort(imag_data), np.arange(1, n_c+1)/n_c, label='Imaginary CDF')
    axes[0].set_title("Non Log Scale Pdf")
    axes[1].set_title("Log Scale Pdf")
    axes[2].set_title("CDF")
    fig.suptitle("Band " + str(band))
    axes[0].legend()
    axes[1].legend()
    axes[2].legend()

In [None]:
ComplexStudy(converted, 2)

In [None]:
for i in range(1, 8):
    ComplexStudy(converted, i)

In [None]:
for i in range(1, 8):
    ComplexStudy(converted, i, bw = 0.2)

# Color Analysis

In [None]:
def LayerAnalysis(convertedDF, channel, band, bound = 1e7, bw = 0.02):
    convertedDF_channel = convertedDF[convertedDF["Channel"] ==channel]
    fig, axes = plt.subplots(1, 3, figsize=(24, 6))

    complex_data_first = np.concatenate(convertedDF_channel[convertedDF_channel["Band"] == 2 * band -1]["Data"].to_numpy())
    real_data_first = np.real(complex_data_first)
    imag_data_first = np.imag(complex_data_first)
    first_band = np.append(real_data_first, imag_data_first)

    complex_data_second = np.concatenate(convertedDF_channel[convertedDF_channel["Band"] == 2 * band]["Data"].to_numpy())
    real_data_second = np.real(complex_data_second)
    imag_data_second = np.imag(complex_data_second)
    second_band = np.append(real_data_second, imag_data_second)
        


    n_first = len(first_band)
    n_second = len(second_band)
    
    #axes[0].set_xlim(left = -.25*bound, right = .25*bound)
    #axes[1].set_xlim(left = -.25*bound, right = .25*bound)
    axes[1].set_ylim(bottom = 10**-6, top= 10)
    #axes[2].set_xlim(left = -.25*bound, right = .25*bound)
    sns.kdeplot(ax = axes[0], x = first_band, bw_method=bw, label = "First Half")
    sns.kdeplot(ax = axes[0], x = second_band, bw_method=bw, label = "Second Half")

    sns.kdeplot(ax = axes[1], x = first_band, bw_method = bw, log_scale=[False, True], label = "First Half")
    sns.kdeplot(ax = axes[1], x = second_band, bw_method = bw, log_scale=[False, True],label = "Second Half")

    axes[2].plot(np.sort(first_band), np.arange(1, n_first+1)/n_first, label="First Half")
    axes[2].plot(np.sort(second_band), np.arange(1, n_second+1)/n_second, label="Second Half")

    axes[0].set_title("Non Log Scale Pdf")
    axes[1].set_title("Log Scale Pdf")
    axes[2].set_title("CDF")
    fig.suptitle("Band " + str(band))
    axes[0].legend()
    axes[1].legend()
    axes[2].legend()

## Gray

### 8 Layers

In [None]:
layer_num = 8
channel_color = "Gray"
converted_layer = convert_to_fourier_basis(data_dir, layer_num*2, grey_scale_only=True)

In [None]:
for i in range(1, layer_num+1):
    LayerAnalysis(converted_layer, channel_color, i, bw = 0.2)

### 16 Layers

In [None]:
layer_num = 16
channel_color = "Gray"
converted_layer = convert_to_fourier_basis(data_dir, layer_num*2, grey_scale_only=True)

In [None]:
for i in range(1, layer_num+1):
    LayerAnalysis(converted_layer, channel_color, i, bw = 0.2)

### 32 Layers

In [None]:
layer_num = 32
channel_color = "Gray"
converted_layer = convert_to_fourier_basis(data_dir, layer_num*2, grey_scale_only=True)

In [None]:
for i in range(1, 17):
    LayerAnalysis(converted_layer, channel_color, i, bw = 0.2)

In [None]:
for i in range(17, layer_num+1):
    LayerAnalysis(converted_layer, channel_color, i, bw = 0.2)