In [None]:
import numpy as np
import pywt
import matplotlib.pyplot as plt
import random
from scipy import stats
from scipy import fft
import random
import pywt.data
from PIL import Image
import pandas as pd
import seaborn as sns
import os
import pickle

In [None]:
# assign directory
ROOT_DIR = '/Users/brandonmarks/Desktop/Research Materials/hierarchical-bayesian-model-validation/'
data_dir = ROOT_DIR + 'Data/Panoptic Argiculture 2/Toy dataset - rgb cleaned'
 
# iterate over files in that directory
file_list = [os.path.join(data_dir, filename) for filename in os.listdir(data_dir)]
file_names = os.listdir(data_dir)


In [None]:
def getIndexDF(image, no_zero =False):
    x_freqs = fft.fftfreq(image.shape[0])
    y_freqs = fft.fftfreq(image.shape[1])
    coord_df = pd.DataFrame()
    coord_df["index_coords"] = [(x,y) for x in np.arange(image.shape[0]) for y in np.arange(image.shape[0])]
    coord_df["x_index"] = [x for x in np.arange(image.shape[0]) for y in np.arange(image.shape[0])]
    coord_df["y_index"] = [y for x in np.arange(image.shape[0]) for y in np.arange(image.shape[0])]
    coord_df["x_freq"] = [x_freqs[x] for x in np.arange(image.shape[0]) for y in np.arange(image.shape[0])]
    coord_df["y_freq"] = [y_freqs[y] for x in np.arange(image.shape[0]) for y in np.arange(image.shape[0])]
    coord_df["magnitude"] = np.sqrt(coord_df["x_freq"] ** 2 + coord_df["y_freq"] **2)
    coord_df = coord_df.set_index(["index_coords"])
    coord_df = coord_df[(coord_df["x_freq"] >= 0 )& (coord_df["y_freq"] >= 0)]
    if no_zero:
        coord_df = coord_df[(coord_df["x_freq"] != 0 )| (coord_df["y_freq"] != 0)]
    return coord_df

In [None]:
def convert_fourier_list(folder_dir, c):
    file_list = [os.path.join(folder_dir, filename) for filename in os.listdir(folder_dir)]
    image = np.array(Image.open(file_list[0]).convert('L'))
    coord_df = getIndexDF(image, no_zero =False).sort_values(["magnitude"])
    x = coord_df["x_index"].to_numpy()
    y = coord_df["y_index"].to_numpy()
    magnitudes = coord_df["magnitude"]
    freq_arr = []
    mag_arr =  []
    for k in range(len(file_list)):
        if c == 3:
            image = np.array(Image.open(file_list[k]).convert('L'))
        else:
            image = np.array(Image.open(file_list[k]))[:,:,c]
        transformed = np.array(fft.fft2(image))
        freq_arr.append(transformed[tuple(x), tuple(y)])
        mag_arr.append(magnitudes)
    sample = np.concatenate(np.array(freq_arr).T)
    mag_flat = np.concatenate(np.array(mag_arr).T)
    return sample, mag_flat

In [None]:
freqs, mags = convert_fourier_list(data_dir, 3)


In [None]:
n = (mags[-1] + mags[0])/2
idx = np.argmax(mags>n)
first_sample = np.concatenate([np.real(freqs[:idx]),np.imag(freqs[:idx])])
second_sample = np.concatenate([np.real(freqs[idx:]),np.imag(freqs[idx:])])
first_sample,second_sample

In [None]:
mags,freqs = mags[idx:], freqs[idx:]


In [None]:
n = (mags[-1] + mags[0])/2
idx = np.argmax(mags>n)
first_sample = np.concatenate([np.real(freqs[:idx]),np.imag(freqs[:idx])])
second_sample = np.concatenate([np.real(freqs[idx:]),np.imag(freqs[idx:])])
first_sample,second_sample

In [None]:
stats.ks_2samp(first_sample, second_sample).statistic

In [None]:
def recursive_split(freqs, mags, threshold =0.05, max_depth = 5):
    magnitude_splits = []
    def recursive_helper(freqs, mags, magnitude_splits, depth):
        n = (mags[-1] + mags[0])/2
        idx = np.argmax(mags>n)
        first_sample = np.concatenate([np.real(freqs[:idx]),np.imag(freqs[:idx])])
        second_sample = np.concatenate([np.real(freqs[idx:]),np.imag(freqs[idx:])])
        stat = stats.ks_2samp(first_sample, second_sample).statistic
        if stat > threshold and depth > 0:
            magnitude_splits.append(n)
            recursive_helper(freqs[:idx], mags[:idx], magnitude_splits, depth-1)
            recursive_helper(freqs[idx:], mags[idx:], magnitude_splits, depth-1)
    recursive_helper(freqs, mags, magnitude_splits, max_depth)
    return magnitude_splits





In [None]:
mag_splits = recursive_split(freqs, mags, threshold =0.05)
mag_splits

In [None]:
df = pd.DataFrame(columns=["Band", "Channel", "Magnitudes", "Data"])
sorted_mag_split = np.sort(mag_splits)
prev = 0
for i in range(len(mag_splits)):
    next_idx = np.argmax(mags>sorted_mag_split[i])
    next_freqs = freqs[prev:next_idx]
    next_mags = mags[prev:next_idx]
    df.loc[len(df.index)] = [i+1, "Gray", next_mags, next_freqs]
    prev = next_idx

df

In [None]:
def convert_to_fourier_basis(folder_dir, color, threshold =0.05, max_depth = 5):
    color_dict = {"Red":0, "Green":1, "Blue":2, "Gray":3}
    c = color_dict[color]
    freqs, mags = convert_fourier_list(folder_dir, c)
    
    mag_splits = recursive_split(freqs, mags, threshold, max_depth)
    df = pd.DataFrame(columns=["Band", "Channel", "Magnitudes", "Data"])
    sorted_mag_split = np.sort(mag_splits)
    prev = 0
    for i in range(len(mag_splits)):
        next_idx = np.argmax(mags>sorted_mag_split[i])
        next_freqs = np.concatenate([np.real(freqs[prev:next_idx]),np.imag(freqs[prev:next_idx])])
        next_mags = np.concatenate([mags[prev:next_idx],mags[prev:next_idx]])
        df.loc[len(df.index)] = [i+1, color, next_mags, next_freqs]
        prev = next_idx

    return df

In [None]:
converted = convert_to_fourier_basis(data_dir, "Gray", threshold =0.05)
converted

In [None]:
len(converted["Data"][7])