In [1]:
DATA_NAME = 'pastis-full' 
TRANSFORM = 'gabor'
CHANNEL = 'red'
PARAM_CSV = 'gabor_new.csv' # only use for Gabor

In [2]:
path_list = DATA_NAME.split("-") + TRANSFORM.split("-")
if CHANNEL:
    path_list.append(CHANNEL)
print(f"Name the notebook:\nindependence_{'_'.join(path_list[::-1])}.ipynb")
FULL_DATA_NAME='-'.join(path_list)


import git
from pathlib import Path
import os
CWD = os.getcwd()
ROOT_DIR = Path(git.Repo('.', search_parent_directories=True).working_tree_dir)

Path(os.path.join(CWD, "CSVs")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "plots")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "cache")).mkdir(exist_ok=True)

GROUP = 'layer' if TRANSFORM.split("-")[0] == 'wavelet' else ('band' if TRANSFORM.split("-")[0] == 'fourier' else 'error')
RERUN = False
SKIP_OPTIMIZE_STEP = False

Name the notebook:
independence_red_gabor_full_pastis.ipynb


In [3]:
os.chdir(os.path.join(ROOT_DIR, "utilities"))
from testing import * # If MATLAB is not installed, open utilities and set to False
from plotting import *
os.chdir(CWD)
np.random.seed(0)

In [4]:
from scipy import spatial
from sklearn.decomposition import PCA

In [5]:
group_data_map = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}.pickle'))
group_total_samples = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}-size.pickle'))
param_df = pd.read_csv(os.path.join(ROOT_DIR, "gabor", PARAM_CSV))

In [6]:
if 'fourier' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)#[::3]
elif 'wavelet' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)
elif 'learned' in TRANSFORM:
    nonskewed_df = pd.read_csv(os.path.join(ROOT_DIR, 'learned-filters', 'nonskewed_filter_idxs_df.csv')).set_index(['dataset', 'num_images', 'num_bootstrap'])
    nonskewed_filter_idxs = eval(nonskewed_df.loc[DATA_NAME].sort_values('num_images', ascending=False)['nonskewed_filter_idxs'].iloc[0]) 
    GROUPS = nonskewed_filter_idxs # can set to filter_group_map.keys() to include all prepared filters
elif 'gabor' in TRANSFORM:
    GROUPS = param_df['index']

In [7]:
n_bootstrap = int(1e5) 
bootstrap_size = int(1e4)


In [8]:

cov_matrix = np.zeros((len(GROUPS), len(GROUPS)))
for _ in tqdm(range(n_bootstrap)):
    X = np.zeros((bootstrap_size, len(GROUPS)))
    for i in range(len(GROUPS)):
        group = GROUPS[i]
        if len(group_data_map[group]) < 100:
            X[:, i] = np.full(bootstrap_size, np.nan)
        else:
            X[:, i] = np.random.choice(group_data_map[group], size=(bootstrap_size), replace=True)
    cov_matrix += np.cov(X, rowvar=False)
cov_matrix /= n_bootstrap

  0%|          | 0/100000 [00:00<?, ?it/s]

In [9]:



# Convert covariance matrix to DataFrame for better formatting
cov_df = pd.DataFrame(cov_matrix, index=GROUPS, columns=GROUPS)
cov_df = cov_df.dropna(how='all').dropna(axis=1, how='all')
cov_df.to_csv(os.path.join(CWD, "CSVs", f'covariance_matrix.csv'))
cov_df.round(2)

index,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,45.68,0.0,-0.0,0.0,0.0,-0.0,-0.0,-0.0,0.0,-0.0,...,-0.0,0.0,-0.0,0.0,0.0,-0.0,0.0,-0.0,-0.0,0.0
1,0.0,18.02,0.0,-0.0,-0.0,-0.0,-0.0,0.0,-0.0,-0.0,...,-0.0,0.0,-0.0,0.0,-0.0,-0.0,-0.0,0.0,0.0,0.0
2,-0.0,0.0,6.86,-0.0,0.0,0.0,0.0,-0.0,0.0,-0.0,...,0.0,-0.0,0.0,-0.0,0.0,-0.0,-0.0,-0.0,0.0,-0.0
3,0.0,-0.0,-0.0,2.61,-0.0,0.0,-0.0,-0.0,-0.0,0.0,...,-0.0,-0.0,0.0,0.0,-0.0,0.0,0.0,0.0,-0.0,-0.0
4,0.0,-0.0,0.0,-0.0,0.96,-0.0,0.0,0.0,-0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,-0.0,0.0,0.0,0.0,-0.0
5,-0.0,-0.0,0.0,0.0,-0.0,0.28,0.0,-0.0,0.0,-0.0,...,-0.0,-0.0,0.0,-0.0,0.0,0.0,0.0,0.0,0.0,-0.0
6,-0.0,-0.0,0.0,-0.0,0.0,0.0,0.16,0.0,0.0,0.0,...,0.0,-0.0,-0.0,-0.0,-0.0,0.0,-0.0,0.0,-0.0,0.0
7,-0.0,0.0,-0.0,-0.0,0.0,-0.0,0.0,59.56,0.0,0.0,...,-0.0,0.0,-0.0,0.0,-0.0,-0.0,-0.0,0.0,0.0,-0.0
8,0.0,-0.0,0.0,-0.0,-0.0,0.0,0.0,0.0,23.81,0.0,...,-0.0,0.0,0.0,0.0,0.0,-0.0,0.0,0.0,-0.0,-0.0
9,-0.0,-0.0,-0.0,0.0,0.0,-0.0,0.0,0.0,0.0,9.41,...,0.0,0.0,-0.0,0.0,-0.0,0.0,0.0,0.0,-0.0,-0.0


In [10]:
corr_matrix = cov_df / np.sqrt(np.outer(np.diag(cov_df), np.diag(cov_df)))
corr_matrix.round(5)

index,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1.0,4e-05,-1e-05,4e-05,1e-05,-4e-05,-4e-05,-4e-05,2e-05,-2e-05,...,-3e-05,8e-05,-1e-05,3e-05,0.0,-3e-05,2e-05,-0.0,-0.0,2e-05
1,4e-05,1.0,5e-05,-6e-05,-1e-05,-4e-05,-1e-05,5e-05,-1e-05,-3e-05,...,-0.0,0.0,-0.0,3e-05,-1e-05,-0.0,-3e-05,2e-05,2e-05,4e-05
2,-1e-05,5e-05,1.0,-3e-05,0.0,4e-05,3e-05,-0.0,4e-05,-1e-05,...,1e-05,-2e-05,3e-05,-4e-05,5e-05,-0.0,-8e-05,-1e-05,4e-05,-1e-05
3,4e-05,-6e-05,-3e-05,1.0,-2e-05,3e-05,-2e-05,-5e-05,-3e-05,0.0,...,-2e-05,-5e-05,2e-05,2e-05,-2e-05,3e-05,2e-05,3e-05,-4e-05,-0.0
4,1e-05,-1e-05,0.0,-2e-05,1.0,-2e-05,1e-05,0.0,-1e-05,7e-05,...,0.0,2e-05,1e-05,2e-05,3e-05,-4e-05,4e-05,2e-05,5e-05,-4e-05
5,-4e-05,-4e-05,4e-05,3e-05,-2e-05,1.0,1e-05,-6e-05,3e-05,-1e-05,...,-3e-05,-2e-05,4e-05,-3e-05,0.0,1e-05,3e-05,1e-05,0.0,-1e-05
6,-4e-05,-1e-05,3e-05,-2e-05,1e-05,1e-05,1.0,2e-05,0.0,5e-05,...,3e-05,-1e-05,-2e-05,-2e-05,-0.0,2e-05,-3e-05,4e-05,-3e-05,1e-05
7,-4e-05,5e-05,-0.0,-5e-05,0.0,-6e-05,2e-05,1.0,0.0,3e-05,...,-0.0,3e-05,-3e-05,1e-05,-4e-05,-1e-05,-4e-05,4e-05,3e-05,-4e-05
8,2e-05,-1e-05,4e-05,-3e-05,-1e-05,3e-05,0.0,0.0,1.0,3e-05,...,-2e-05,1e-05,0.0,3e-05,3e-05,-6e-05,2e-05,3e-05,-2e-05,-2e-05
9,-2e-05,-3e-05,-1e-05,0.0,7e-05,-1e-05,5e-05,3e-05,3e-05,1.0,...,2e-05,2e-05,-4e-05,0.0,-2e-05,3e-05,0.0,1e-05,-4e-05,-0.0


In [11]:
np.linalg.norm(cov_df - np.diag(np.diag(cov_df)))  # Remove diagonal elements for visualization

0.014064583003730528

In [12]:
pca = PCA()
pca.fit(cov_df)

print("Singular values (explained variance):")
print(pca.explained_variance_)

print("\nPrincipal components (eigenvectors):")
eigenvectors = pca.components_
eigenvectors_df = pd.DataFrame(eigenvectors)
eigenvectors_df

Singular values (explained variance):
[8.48160019e+01 6.04860393e+01 5.08217239e+01 4.88233230e+01
 4.33921542e+01 3.58358472e+01 1.34955374e+01 8.87496245e+00
 7.89873482e+00 7.56346759e+00 6.85020004e+00 6.18360918e+00
 2.09513604e+00 1.37603139e+00 1.12774770e+00 1.04174948e+00
 1.00948013e+00 8.83262689e-01 3.15975842e-01 2.01754403e-01
 1.64813707e-01 1.55461783e-01 1.38742326e-01 1.17541107e-01
 4.24211303e-02 2.75507981e-02 2.22424816e-02 2.07540163e-02
 1.80859039e-02 1.40802622e-02 4.64967083e-03 2.75432912e-03
 2.12459597e-03 1.87819471e-03 1.83441463e-03 1.23719492e-03
 6.32844844e-04 4.47056761e-04 2.15776096e-04 1.74456058e-04
 1.35413811e-04 1.25378918e-29]

Principal components (eigenvectors):


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
0,-0.03884,-0.006701,-0.002357,-0.000896,-0.000325,-9.9e-05,-5.2e-05,0.995654,-0.009639,-0.003256,...,-0.000281,-9.1e-05,-2.5e-05,-0.038447,-0.006745,-0.002233,-0.000887,-0.000317,-0.0001,-2.844298e-05
1,-0.105395,-0.007532,-0.002553,-0.000968,-0.000352,-0.0001,-5.9e-05,0.04997,-0.011238,-0.003544,...,-0.000298,-9.6e-05,-2.5e-05,-0.103063,-0.00751,-0.002383,-0.00094,-0.000353,-0.000111,-2.688817e-05
2,0.729272,-0.000462,-0.000148,-5.5e-05,-2.2e-05,-6e-06,-4e-06,0.002012,-0.000734,-0.000228,...,-2.4e-05,-3e-06,-2e-06,-0.684133,-0.000471,-0.000159,-4.4e-05,-2.6e-05,-6e-06,-5.388636e-08
3,0.637778,-0.0127,-0.004185,-0.001547,-0.000567,-0.00017,-9.5e-05,0.045775,-0.019643,-0.005847,...,-0.000489,-0.000156,-4.2e-05,0.68544,-0.012716,-0.003942,-0.001544,-0.00056,-0.000178,-4.77113e-05
4,0.194076,-0.016181,-0.005199,-0.001915,-0.000702,-0.000203,-0.000118,0.044055,-0.025698,-0.007261,...,-0.000607,-0.0002,-5.4e-05,0.19687,-0.016099,-0.004902,-0.001917,-0.000701,-0.000224,-5.973142e-05
5,0.099027,-0.021042,-0.006446,-0.002377,-0.000873,-0.000255,-0.000139,0.038234,-0.035321,-0.0091,...,-0.000758,-0.000253,-6.3e-05,0.099783,-0.020914,-0.006065,-0.002365,-0.000862,-0.000272,-7.077091e-05
6,0.016989,-0.045065,-0.007714,-0.002735,-0.000992,-0.000285,-0.000161,0.011357,0.993799,-0.011522,...,-0.000858,-0.000282,-7.3e-05,0.017008,-0.044358,-0.007303,-0.002694,-0.000975,-0.000312,-8.29998e-05
7,0.008809,-0.15495,-0.007234,-0.002422,-0.000899,-0.000257,-0.000142,0.006209,0.039211,-0.011373,...,-0.000772,-0.000245,-6.7e-05,0.008881,-0.145263,-0.0067,-0.00241,-0.000874,-0.000272,-7.573988e-05
8,0.001035,0.748786,-0.001015,-0.000355,-0.000129,-4e-05,-2e-05,0.000713,0.004159,-0.001666,...,-0.000108,-3.9e-05,-8e-06,0.001048,-0.662139,-0.000926,-0.000352,-0.000115,-3.8e-05,-7.660978e-06
9,0.012263,0.585833,-0.01241,-0.004142,-0.001483,-0.000438,-0.000241,0.008802,0.044334,-0.020358,...,-0.001285,-0.000419,-0.000113,0.012307,0.681372,-0.011627,-0.004088,-0.001476,-0.00046,-0.0001216365


In [13]:
cos_dist = spatial.distance.cdist(eigenvectors, np.eye(cov_df.shape[0]), metric='cosine')
cos_dist =  [np.min(row) for row in cos_dist]
cos_dist


[0.0043462556467452895,
 0.014954930682800405,
 0.2707281216261568,
 0.31456005527872277,
 0.06624544887677042,
 0.026953707941334892,
 0.006200676799531291,
 0.028706418521374122,
 0.2512136420339577,
 0.31862784512554665,
 0.1938584087644828,
 0.20645826501168707,
 0.00790505481149728,
 0.021715921139428884,
 0.06222333712917616,
 0.20176430349013597,
 0.27416515742246916,
 0.3475964961004102,
 0.011167304562558211,
 0.02924115526148008,
 0.2185603365436556,
 0.30669790630745253,
 0.23528692866310108,
 0.23693796780499565,
 0.018028237308804318,
 0.04124834121226317,
 0.23298474850804873,
 0.3623278880277436,
 0.2901096887710356,
 0.23317111199454277,
 0.026667810680646054,
 0.0537419110494175,
 0.10360506639616007,
 0.18956989294252968,
 0.2542281254903862,
 0.5203073443268209,
 0.21785160928251757,
 0.2981325815311866,
 0.19090459169886387,
 0.34732329705421483,
 0.1982289281236861,
 0.48216286458271473]