In [1]:
import git
from pathlib import Path
import os
import numpy as np
import tqdm
import scipy.ndimage as ndi
import pandas as pd
from skimage.filters import gabor_kernel
from scipy.signal import fftconvolve

ROOT_DIR = Path(git.Repo('.', search_parent_directories=True).working_tree_dir)
os.chdir(os.path.join(ROOT_DIR, "utilities"))
from learned import *

np.random.seed(0)

In [None]:
DATASET = "coco"
RAW_DATA_SUFFIX = "coco/coco-indoor-cropped"
FINAL_DATA_NAME = "coco-indoor"
PARAM_CSV = "gabor_new.csv"
CONSTANT_SAMPLE_SIZE = int(1e5)

num_images=1000
jitter=True 
normalize=True 

#data_dir = os.path.join(ROOT_DIR, 'raw-data', DATASET, RAW_DATA_SUFFIX)
data_dir = os.path.join(ROOT_DIR, 'raw-data', RAW_DATA_SUFFIX)

In [3]:
def _generate_gabor_kernel_skimage_cached(frequency: float,
                                          wave_number: int,
                                          theta: float,
                                          aspect_ratio: float,
                                          dtype_str: str) -> np.ndarray:
    if frequency <= 0:
        raise ValueError("frequency must be positive (cycles/pixel).")
    
    # choose n_stds explicitly (same value you pass to gabor_kernel; default is 3)
    n_stds = 3
    sigma_x = wave_number / (2.0 * n_stds * frequency)   # NOT 4*frequency
    sigma_y = aspect_ratio * sigma_x
    k_complex = gabor_kernel(frequency=frequency, theta=theta, sigma_x=sigma_x, sigma_y=sigma_y, n_stds=n_stds)

    k = np.real(k_complex).astype(np.dtype(dtype_str), copy=False)
    k -= k.mean()
    nrm = np.linalg.norm(k.ravel())
    if nrm > 1e-12:
        k = k / nrm
    return k

def generate_gabor_kernel_skimage(frequency: float,
                                  wave_number: int,
                                  theta: float = 0.0,
                                  aspect_ratio: float = 1.0,
                                  dtype=np.float64) -> np.ndarray:
    """Public wrapper returning a copy to avoid accidental mutation of cached data."""
    arr = _generate_gabor_kernel_skimage_cached(
        float(frequency), int(wave_number), float(theta),
        float(aspect_ratio), np.dtype(dtype).name
    )
    if arr is not None:
        return arr.copy()
    return None

In [4]:
# param_df = pd.read_csv(os.path.join(ROOT_DIR, "gabor", PARAM_CSV))
param_df = pd.read_csv(os.path.join(ROOT_DIR, "gabor", "gabor_test.csv"))

orientations = [0, np.pi/6, np.pi/3, np.pi/2, 2*np.pi/3, 5*np.pi/6]

filters = []
for i in range(len(param_df)):
    fil_orients = []

    for j in range(len(orientations)):
        fil = generate_gabor_kernel_skimage(param_df["frequency"][i], param_df["wave_number"][i], orientations[j], param_df["aspect_ratio"][i])
        if fil is not None:
            fil_orients.append(fil)
    
    filters.append(fil_orients)

print(filters[0][0].shape)
print(len(filters), len(filters[0]))

(35, 69)
20 6


In [5]:
for i in range(len(filters)):
    for j in range(len(filters[i])):
        print(f"index: {i}, params: {tuple(param_df.loc[i])[1:]}, orientation: {orientations[j]:.3f}, size: {filters[i][j].shape}")

index: 0, params: (2.0, 0.5, 0.03), orientation: 0.000, size: (35, 69)
index: 0, params: (2.0, 0.5, 0.03), orientation: 0.524, size: (35, 59)
index: 0, params: (2.0, 0.5, 0.03), orientation: 1.047, size: (59, 35)
index: 0, params: (2.0, 0.5, 0.03), orientation: 1.571, size: (69, 35)
index: 0, params: (2.0, 0.5, 0.03), orientation: 2.094, size: (59, 35)
index: 0, params: (2.0, 0.5, 0.03), orientation: 2.618, size: (35, 59)
index: 1, params: (2.0, 0.5, 0.054), orientation: 0.000, size: (21, 39)
index: 1, params: (2.0, 0.5, 0.054), orientation: 0.524, size: (21, 35)
index: 1, params: (2.0, 0.5, 0.054), orientation: 1.047, size: (35, 21)
index: 1, params: (2.0, 0.5, 0.054), orientation: 1.571, size: (39, 21)
index: 1, params: (2.0, 0.5, 0.054), orientation: 2.094, size: (35, 21)
index: 1, params: (2.0, 0.5, 0.054), orientation: 2.618, size: (21, 35)
index: 2, params: (2.0, 0.5, 0.096), orientation: 0.000, size: (13, 23)
index: 2, params: (2.0, 0.5, 0.096), orientation: 0.524, size: (13, 21

In [6]:
images = load_images_from_directory(data_dir, n=num_images, jitter=jitter, normalize=normalize)
print("num_images, H, W, channels:", images.shape, "\nnum_filters", len(filters))

Loading images:   0%|          | 0/1000 [00:00<?, ?it/s]

num_images, H, W, channels: (1000, 256, 256, 3) 
num_filters 20


In [7]:
def apply_filter(image, filter):
    """
    Applies Gabor filter to the input RGB image.
    """
    # print(image.shape)
    # print(filter.shape)
    out = np.zeros_like(image)
    for ch in range(3):
        out[:, :, ch] = ndi.convolve(
            image[:, :, ch],
            filter,
            mode='reflect'
        )
    return out

In [8]:
filter_data_map = dict()
images_per_orient = 166 # ~= 1000 / 6

for i in tqdm(range(len(filters))):
    transformed = []
    for j in range(len(filters[i])):   
        curr_transformed = []    
        fil = filters[i][j]

        indices = np.random.choice(len(images), size=images_per_orient, replace=False)
        image_orient_subset = images[indices]

        for k, image in enumerate(image_orient_subset):
            convolved = apply_filter(image, fil)
            curr_transformed.append(convolved.flatten())

        curr_transformed = np.hstack(curr_transformed)
        # intermediate subsample to limit size
        curr_transformed = np.sort(curr_transformed)[np.round(np.linspace(0, curr_transformed.size - 1, min(curr_transformed.size, CONSTANT_SAMPLE_SIZE))).astype(int)]
        transformed = np.concatenate((transformed, curr_transformed))
    
    # select max of CONSTANT_SAMPLE_SIZE coefs
    transformed = np.sort(transformed)[np.round(np.linspace(0, transformed.size - 1, min(transformed.size, CONSTANT_SAMPLE_SIZE))).astype(int)]
    filter_data_map[i] = transformed

filter_data_map[0]

  0%|          | 0/20 [00:00<?, ?it/s]

array([-68.50176031, -43.6876815 , -39.96166699, ...,  49.14177839,
        55.67562525,  68.81354836])

# Saving Data

In [9]:
data_output_file = os.path.join(ROOT_DIR, 'transformed-data', f"{FINAL_DATA_NAME}-gabor.pickle") 
size_output_file = os.path.join(ROOT_DIR, 'transformed-data', f"{FINAL_DATA_NAME}-gabor-size.pickle")

total_samples = np.prod(images.shape)
print("Without subsampling size:", total_samples)
print("With subsampling, per-filter:", len(filter_data_map[0]))
pd.to_pickle(filter_data_map, data_output_file)
pd.to_pickle({i : total_samples for i in range(len(filter_data_map))}, size_output_file)

Without subsampling size: 196608000
With subsampling, per-filter: 100000
