In [3]:
import git
from pathlib import Path
import os
import numpy as np
import tqdm
import scipy.ndimage as ndi
import pandas as pd
from skimage.filters import gabor_kernel
from scipy.signal import fftconvolve

ROOT_DIR = Path(git.Repo('.', search_parent_directories=True).working_tree_dir)
os.chdir(os.path.join(ROOT_DIR, "utilities"))
from learned import *

np.random.seed(0)

In [4]:
def _generate_gabor_kernel_skimage_cached(frequency: float,
                                          wave_number: int,
                                          theta: float,
                                          aspect_ratio: float,
                                          dtype_str: str) -> np.ndarray:
    if frequency <= 0:
        raise ValueError("frequency must be positive (cycles/pixel).")

    sigma_x = float(wave_number) / (4.0 * float(frequency)) # wave_number ≈ 4σ·f
    if sigma_x <= 0:
        raise ValueError("Computed sigma_x must be positive; check wave_number and frequency.")
    sigma_y = float(aspect_ratio) * sigma_x  

    k_complex = gabor_kernel(
        frequency=float(frequency),  # cycles/pixel
        theta=float(theta),
        sigma_x=float(sigma_x),
        sigma_y=float(sigma_y)
    )
    k = np.real(k_complex).astype(np.dtype(dtype_str), copy=False)
    k -= k.mean()
    nrm = np.linalg.norm(k.ravel())
    if nrm > 1e-12:
        k = k / nrm
    return k

def generate_gabor_kernel_skimage(frequency: float,
                                  wave_number: int,
                                  theta: float = 0.0,
                                  aspect_ratio: float = 1.0,
                                  dtype=np.float64) -> np.ndarray:
    """Public wrapper returning a copy to avoid accidental mutation of cached data."""
    arr = _generate_gabor_kernel_skimage_cached(
        float(frequency), int(wave_number), float(theta),
        float(aspect_ratio), np.dtype(dtype).name
    )
    return arr.copy()

In [5]:
param_df = pd.read_csv(os.path.join(ROOT_DIR, "gabor", "gabor_full.csv"))

filters = []
for i in range(len(param_df)):
    fil = generate_gabor_kernel_skimage(param_df["frequency"][i], param_df["wave_number"][i], 0, param_df["aspect_ratio"][i])
    filters.append(fil)

print(filters[0].shape)
print(len(filters))
# select just 2 for now
filters = filters[:3]

(1501, 3001)
90


In [6]:
DATASET = "pastis"
RAW_DATA_SUFFIX = "full-pastis-RGB"
FINAL_DATA_NAME = 'pastis-full' 
CONSTANT_SAMPLE_SIZE = int(1e5)

num_images=10
jitter=False 
normalize=False 
n_bootstrap=10

#data_dir = os.path.join(ROOT_DIR, 'raw-data', DATASET, RAW_DATA_SUFFIX)
data_dir = os.path.join(ROOT_DIR, 'raw-data', RAW_DATA_SUFFIX)

In [7]:
images = load_images_from_directory(data_dir, n=num_images, jitter=jitter, normalize=normalize)
print("num_images, H, W, channels:", images.shape, "\nnum_filters", len(filters))

Loading images:   0%|          | 0/10 [00:00<?, ?it/s]

num_images, H, W, channels: (10, 128, 128, 3) 
num_filters 3


In [8]:
# def apply_filter(image, filter):
#     """
#     Applies Gabor filter to the input RGB image.
#     """
#     # print(image.shape)
#     # print(filter.shape)
#     out = np.zeros_like(image)
#     for ch in range(3):
#         out[:, :, ch] = ndi.convolve(
#             image[:, :, ch],
#             filter,
#             mode='reflect'
#         )
#     return out

def apply_filter(image, kernel, out=None, fft_when_large=True, fft_threshold=31*31):
    """
    Convolve each channel of an image with a kernel, using minimal extra memory.
    - Forces float32 to avoid float64 temps
    - Writes directly into 'out' via ndimage's 'output=' param
    - Optionally switches to FFT for large kernels
    """
    # Ensure contiguous float32
    img = np.asarray(image, dtype=np.float32, order="C")
    ker = np.asarray(kernel, dtype=np.float32, order="C")
    # Prepare output buffer (re-useable across calls)
    if out is None:
        out = np.empty_like(img)
    # Handle grayscale or RGB seamlessly
    if img.ndim == 2:  # grayscale
        if fft_when_large and ker.size > fft_threshold:
            out[...] = fftconvolve(img, ker, mode="same").astype(np.float32, copy=False)
        else:
            ndi.convolve(img, ker, output=out, mode="reflect", cval=0.0)
        return out
    # Color image
    C = img.shape[2]
    if fft_when_large and ker.size > fft_threshold:
        for ch in range(C):
            out[..., ch] = fftconvolve(img[..., ch], ker, mode="same").astype(np.float32, copy=False)
    else:
        for ch in range(C):
            # Write directly into preallocated slice to avoid extra temporaries
            ndi.convolve(img[..., ch], ker, output=out[..., ch], mode="reflect", cval=0.0)
    return out

In [9]:
filter_data_map = dict()
print(filters[0].shape)

for i in tqdm(range(len(filters))):
    
    fil = filters[i]
    transformed = []
    for j, image in enumerate(images):
        convolved = apply_filter(image, fil)
        transformed.append(convolved.flatten())

    transformed = np.hstack(transformed)
    # select max of CONSTANT_SAMPLE_SIZE coefs
    transformed = np.sort(transformed)[np.round(np.linspace(0, transformed.size - 1, min(transformed.size, CONSTANT_SAMPLE_SIZE))).astype(int)]
    filter_data_map[i] = transformed

filter_data_map[0]

(1501, 3001)


  0%|          | 0/3 [00:00<?, ?it/s]

array([-60.491257, -60.489914, -60.48882 , ...,  32.312542,  32.313217,
        32.31383 ], dtype=float32)

# Saving Data

In [10]:
data_output_file = os.path.join(ROOT_DIR, 'transformed-data', f"{FINAL_DATA_NAME}-gabor.pickle") 
size_output_file = os.path.join(ROOT_DIR, 'transformed-data', f"{FINAL_DATA_NAME}-gabor-size.pickle")

total_samples = np.prod(images.shape)
print("Without subsampling size:", total_samples)
pd.to_pickle(filter_data_map, data_output_file)
pd.to_pickle({i : total_samples for i in range(len(filter_data_map))}, size_output_file)

Without subsampling size: 491520
