In [1]:
import git
from pathlib import Path
import os
import numpy as np
import tqdm
import scipy.ndimage as ndi
import pandas as pd
from skimage.filters import gabor_kernel
from scipy.signal import fftconvolve

ROOT_DIR = Path(git.Repo('.', search_parent_directories=True).working_tree_dir)
os.chdir(os.path.join(ROOT_DIR, "utilities"))
from learned import *

np.random.seed(0)

In [2]:
DATASET = "syntheticMRI2D"
RAW_DATA_SUFFIX = "full-cleaned 2"
FINAL_DATA_NAME = "syntheticMRI2D-coronal-full"
PARAM_CSV = "gabor_new.csv"
CONSTANT_SAMPLE_SIZE = int(1e5)

num_images=1000
jitter=False 
normalize=False 

#data_dir = os.path.join(ROOT_DIR, 'raw-data', DATASET, RAW_DATA_SUFFIX)
data_dir = os.path.join(ROOT_DIR, 'raw-data', RAW_DATA_SUFFIX)

In [3]:
def _generate_gabor_kernel_skimage_cached(frequency: float,
                                          wave_number: int,
                                          theta: float,
                                          aspect_ratio: float,
                                          dtype_str: str) -> np.ndarray:
    if frequency <= 0:
        raise ValueError("frequency must be positive (cycles/pixel).")
    
    # choose n_stds explicitly (same value you pass to gabor_kernel; default is 3)
    n_stds = 3
    sigma_x = wave_number / (2.0 * n_stds * frequency)   # NOT 4*frequency
    sigma_y = aspect_ratio * sigma_x
    k_complex = gabor_kernel(frequency=frequency, theta=theta, sigma_x=sigma_x, sigma_y=sigma_y, n_stds=n_stds)

    k = np.real(k_complex).astype(np.dtype(dtype_str), copy=False)
    k -= k.mean()
    nrm = np.linalg.norm(k.ravel())
    if nrm > 1e-12:
        k = k / nrm
    return k

def generate_gabor_kernel_skimage(frequency: float,
                                  wave_number: int,
                                  theta: float = 0.0,
                                  aspect_ratio: float = 1.0,
                                  dtype=np.float64) -> np.ndarray:
    """Public wrapper returning a copy to avoid accidental mutation of cached data."""
    arr = _generate_gabor_kernel_skimage_cached(
        float(frequency), int(wave_number), float(theta),
        float(aspect_ratio), np.dtype(dtype).name
    )
    if arr is not None:
        return arr.copy()
    return None

In [None]:
param_df = pd.read_csv(os.path.join(ROOT_DIR, "gabor", PARAM_CSV))
# param_df = pd.read_csv(os.path.join(ROOT_DIR, "gabor", "gabor_test.csv"))

orientations = [0, np.pi/6, np.pi/3, np.pi/2, 2*np.pi/3, 5*np.pi/6]

filters = []
for i in range(len(param_df)):
    fil_orients = []

    for j in range(len(orientations)):
        fil = generate_gabor_kernel_skimage(param_df["frequency"][i], param_df["wave_number"][i], orientations[j], param_df["aspect_ratio"][i])
        if fil is not None:
            fil_orients.append(fil)
    
    filters.append(fil_orients)

#TODO
filters = [filters[0]]

print(filters[0][0].shape)
print(len(filters), len(filters[0]))

(25, 47)
1 6


In [5]:
for i in range(len(filters)):
    for j in range(len(filters[i])):
        print(f"index: {i}, params: {tuple(param_df.loc[i])[1:]}, orientation: {orientations[j]:.3f}, size: {filters[i][j].shape}")

index: 0, params: (2.0, 0.5, 0.044), orientation: 0.000, size: (25, 47)
index: 0, params: (2.0, 0.5, 0.044), orientation: 0.524, size: (25, 41)
index: 0, params: (2.0, 0.5, 0.044), orientation: 1.047, size: (41, 25)
index: 0, params: (2.0, 0.5, 0.044), orientation: 1.571, size: (47, 25)
index: 0, params: (2.0, 0.5, 0.044), orientation: 2.094, size: (41, 25)
index: 0, params: (2.0, 0.5, 0.044), orientation: 2.618, size: (25, 41)


In [6]:
#TODO
num_images = 10

images = load_images_from_directory(data_dir, n=num_images, jitter=jitter, normalize=normalize)
print("num_images, H, W, channels:", images.shape, "\nnum_filters", len(filters))

Loading images:   0%|          | 0/10 [00:00<?, ?it/s]

num_images, H, W, channels: (10, 369, 369) 
num_filters 1


In [15]:
# FFT doesn't work with NaNs
def apply_filter(image, filter):
    """
    Applies Gabor filter to the input RGB image.
    """
    # print(image.shape)
    # print(filter.shape)
    out = np.zeros_like(image)

    if image.ndim == 2:
        out = ndi.convolve(
            image,
            filter,
            mode='reflect'
        )
    
    else:
        C = image.shape[2]
        for ch in range(C):
            out[:, :, ch] = ndi.convolve(
                image[:, :, ch],
                filter,
                mode='reflect'
            )
    
    return out

In [18]:
filter_data_map = dict()
images_per_orient = 166 # ~= 1000 / 6

#TODO
images_per_orient = 5

for i in tqdm(range(len(filters))):
    transformed = []
    for j in range(len(filters[i])):   
        curr_transformed = []    
        fil = filters[i][j]

        indices = np.random.choice(len(images), size=images_per_orient, replace=False)
        image_orient_subset = images[indices]

        for k, image in enumerate(image_orient_subset):
            convolved = apply_filter(image, fil)

            #handle NaNs
            print(f"NaN proportion: {np.sum(np.isnan(convolved.flatten())) / len(convolved.flatten())}")
            convolved = convolved[~np.isnan(convolved)]

            curr_transformed.append(convolved.flatten())

        #handle NaNs
        if len(curr_transformed) == 0:
            continue

        curr_transformed = np.hstack(curr_transformed)

        # intermediate subsample to limit size
        curr_transformed = np.sort(curr_transformed)[np.round(np.linspace(0, curr_transformed.size - 1, min(curr_transformed.size, CONSTANT_SAMPLE_SIZE))).astype(int)]
        transformed = np.concatenate((transformed, curr_transformed))

    # append the negation of all coefficients to account for other orientations
    neg_transformed = -1 * transformed
    transformed = np.concatenate((transformed, neg_transformed))
    
    # select max of CONSTANT_SAMPLE_SIZE coefs
    transformed = np.sort(transformed)[np.round(np.linspace(0, transformed.size - 1, min(transformed.size, CONSTANT_SAMPLE_SIZE))).astype(int)]
    filter_data_map[i] = transformed

filter_data_map[0]

  0%|          | 0/1 [00:00<?, ?it/s]

NaN proportion: 0.3695918802006448
NaN proportion: 0.3875705965731744
NaN proportion: 0.34377685240267036
NaN proportion: 0.37886766401539357
NaN proportion: 0.3843685049316618
NaN proportion: 0.34959349593495936
NaN proportion: 0.3648695294541021
NaN proportion: 0.3760254404712069
NaN proportion: 0.37012066597630744
NaN proportion: 0.36130756971526357
NaN proportion: 0.37709770051630054
NaN proportion: 0.3514883116310838
NaN proportion: 0.39104442534940254
NaN proportion: 0.3857271906052394
NaN proportion: 0.3916466535939072
NaN proportion: 0.4059679350181036
NaN proportion: 0.3944521559036729
NaN proportion: 0.3963690043404499
NaN proportion: 0.4112778255153825
NaN proportion: 0.3659931992273852
NaN proportion: 0.3511945417557157
NaN proportion: 0.3967729379190811
NaN proportion: 0.3916466535939072
NaN proportion: 0.39104442534940254
NaN proportion: 0.37709770051630054
NaN proportion: 0.33541909944844706
NaN proportion: 0.33432480666270076
NaN proportion: 0.36130756971526357
NaN prop

array([-1459.3129873 , -1358.77534372, -1305.00933066, ...,
        1305.00933066,  1358.77534372,  1459.3129873 ])

# Saving Data

In [None]:
data_output_file = os.path.join(ROOT_DIR, 'transformed-data', f"{FINAL_DATA_NAME}-gabor.pickle") 
size_output_file = os.path.join(ROOT_DIR, 'transformed-data', f"{FINAL_DATA_NAME}-gabor-size.pickle")

total_samples = np.prod(images.shape)
print("Without subsampling size:", total_samples)
print("With subsampling, per-filter:", len(filter_data_map[0]))
pd.to_pickle(filter_data_map, data_output_file)
pd.to_pickle({i : total_samples for i in range(len(filter_data_map))}, size_output_file)

Without subsampling size: 786432000
With subsampling, per-filter: 100000
