In [1]:
DATA_NAME = 'coco-indoor' 
TRANSFORM = 'gabor'
CHANNEL = 'red'
PARAM_CSV = 'gabor_new.csv' # only use for Gabor

In [2]:
path_list = DATA_NAME.split("-") + TRANSFORM.split("-")
if CHANNEL:
    path_list.append(CHANNEL)
print(f"Name the notebook:\nindependence_{'_'.join(path_list[::-1])}.ipynb")
FULL_DATA_NAME='-'.join(path_list)


import git
from pathlib import Path
import os
CWD = os.getcwd()
ROOT_DIR = Path(git.Repo('.', search_parent_directories=True).working_tree_dir)

Path(os.path.join(CWD, "CSVs")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "plots")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "cache")).mkdir(exist_ok=True)

GROUP = 'layer' if TRANSFORM.split("-")[0] == 'wavelet' else ('band' if TRANSFORM.split("-")[0] == 'fourier' else 'error')
RERUN = False
SKIP_OPTIMIZE_STEP = False

Name the notebook:
independence_red_gabor_indoor_coco.ipynb


In [3]:
os.chdir(os.path.join(ROOT_DIR, "utilities"))
from testing import * # If MATLAB is not installed, open utilities and set to False
from plotting import *
os.chdir(CWD)
np.random.seed(0)

In [4]:
from scipy import spatial
from sklearn.decomposition import PCA

In [5]:
group_data_map = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}.pickle'))
group_total_samples = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}-size.pickle'))
param_df = pd.read_csv(os.path.join(ROOT_DIR, "gabor", PARAM_CSV))

In [6]:
if 'fourier' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)#[::3]
elif 'wavelet' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)
elif 'learned' in TRANSFORM:
    nonskewed_df = pd.read_csv(os.path.join(ROOT_DIR, 'learned-filters', 'nonskewed_filter_idxs_df.csv')).set_index(['dataset', 'num_images', 'num_bootstrap'])
    nonskewed_filter_idxs = eval(nonskewed_df.loc[DATA_NAME].sort_values('num_images', ascending=False)['nonskewed_filter_idxs'].iloc[0]) 
    GROUPS = nonskewed_filter_idxs # can set to filter_group_map.keys() to include all prepared filters
elif 'gabor' in TRANSFORM:
    GROUPS = param_df['index']

In [7]:
n_bootstrap = int(1e5) 
bootstrap_size = int(1e4)


In [8]:

cov_matrix = np.zeros((len(GROUPS), len(GROUPS)))
for _ in tqdm(range(n_bootstrap)):
    X = np.zeros((bootstrap_size, len(GROUPS)))
    for i in range(len(GROUPS)):
        group = GROUPS[i]
        if len(group_data_map[group]) < 100:
            X[:, i] = np.full(bootstrap_size, np.nan)
        else:
            X[:, i] = np.random.choice(group_data_map[group], size=(bootstrap_size), replace=True)
    cov_matrix += np.cov(X, rowvar=False)
cov_matrix /= n_bootstrap

  0%|          | 0/100000 [00:00<?, ?it/s]

In [9]:



# Convert covariance matrix to DataFrame for better formatting
cov_df = pd.DataFrame(cov_matrix, index=GROUPS, columns=GROUPS)
cov_df = cov_df.dropna(how='all').dropna(axis=1, how='all')
cov_df.to_csv(os.path.join(CWD, "CSVs", f'covariance_matrix.csv'))
cov_df.round(2)

index,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,55330.05,1.54,-0.23,0.48,0.13,-0.18,-0.1,-2.79,0.91,-0.51,...,-0.24,0.39,-0.0,0.89,-0.16,-0.43,0.17,0.11,0.06,0.06
1,1.54,19050.11,0.73,-0.43,-0.12,-0.11,-0.01,1.65,-0.24,-0.41,...,-0.0,-0.0,0.03,1.1,-0.34,-0.17,-0.29,0.05,0.05,0.07
2,-0.23,0.73,6874.58,-0.08,-0.0,0.08,0.04,0.13,0.5,-0.22,...,0.05,-0.04,0.03,-0.7,0.57,0.0,-0.26,-0.0,0.08,-0.02
3,0.48,-0.43,-0.08,2517.91,-0.05,0.02,-0.01,-0.66,-0.24,0.02,...,-0.04,-0.04,0.02,0.41,0.09,0.11,0.06,0.02,-0.03,-0.01
4,0.13,-0.12,-0.0,-0.05,960.44,-0.01,0.0,0.04,0.01,0.21,...,-0.0,0.01,0.0,-0.02,0.11,-0.06,0.07,0.01,0.02,-0.02
5,-0.18,-0.11,0.08,0.02,-0.01,337.9,-0.0,-0.27,0.07,-0.02,...,-0.01,-0.01,0.0,-0.11,0.01,-0.01,0.04,-0.0,0.0,0.0
6,-0.1,-0.01,0.04,-0.01,0.0,-0.0,228.02,0.09,-0.0,0.08,...,0.02,-0.0,-0.01,-0.05,0.0,0.02,-0.02,0.02,-0.01,0.0
7,-2.79,1.65,0.13,-0.66,0.04,-0.27,0.09,75053.82,-0.18,0.62,...,-0.0,0.12,-0.14,0.57,-1.66,-0.58,-0.57,0.29,0.23,-0.15
8,0.91,-0.24,0.5,-0.24,0.01,0.07,-0.0,-0.18,27107.09,0.34,...,-0.1,0.04,0.01,0.65,1.02,-0.72,0.19,0.12,-0.01,-0.03
9,-0.51,-0.41,-0.22,0.02,0.21,-0.02,0.08,0.62,0.34,9416.34,...,0.03,0.03,-0.05,0.25,-0.32,0.22,0.01,0.0,-0.1,0.02


In [10]:
corr_matrix = cov_df / np.sqrt(np.outer(np.diag(cov_df), np.diag(cov_df)))
corr_matrix.round(5)

index,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1.0,5e-05,-1e-05,4e-05,2e-05,-4e-05,-3e-05,-4e-05,2e-05,-2e-05,...,-4e-05,9e-05,-0.0,2e-05,-0.0,-2e-05,1e-05,1e-05,1e-05,2e-05
1,5e-05,1.0,6e-05,-6e-05,-3e-05,-4e-05,-0.0,4e-05,-1e-05,-3e-05,...,-0.0,-0.0,2e-05,3e-05,-2e-05,-1e-05,-4e-05,1e-05,2e-05,4e-05
2,-1e-05,6e-05,1.0,-2e-05,-0.0,5e-05,3e-05,1e-05,4e-05,-3e-05,...,2e-05,-3e-05,3e-05,-4e-05,5e-05,0.0,-6e-05,-0.0,5e-05,-1e-05
3,4e-05,-6e-05,-2e-05,1.0,-4e-05,2e-05,-1e-05,-5e-05,-3e-05,0.0,...,-3e-05,-4e-05,3e-05,3e-05,1e-05,3e-05,2e-05,1e-05,-3e-05,-1e-05
4,2e-05,-3e-05,-0.0,-4e-05,1.0,-2e-05,1e-05,0.0,0.0,7e-05,...,-0.0,1e-05,1e-05,-0.0,3e-05,-2e-05,4e-05,1e-05,4e-05,-4e-05
5,-4e-05,-4e-05,5e-05,2e-05,-2e-05,1.0,-0.0,-5e-05,2e-05,-1e-05,...,-2e-05,-4e-05,2e-05,-3e-05,1e-05,-1e-05,5e-05,-1e-05,1e-05,0.0
6,-3e-05,-0.0,3e-05,-1e-05,1e-05,-0.0,1.0,2e-05,-0.0,5e-05,...,4e-05,-1e-05,-4e-05,-2e-05,0.0,2e-05,-3e-05,5e-05,-4e-05,1e-05
7,-4e-05,4e-05,1e-05,-5e-05,0.0,-5e-05,2e-05,1.0,-0.0,2e-05,...,-0.0,2e-05,-4e-05,1e-05,-4e-05,-3e-05,-4e-05,3e-05,4e-05,-4e-05
8,2e-05,-1e-05,4e-05,-3e-05,0.0,2e-05,-0.0,-0.0,1.0,2e-05,...,-2e-05,1e-05,1e-05,2e-05,5e-05,-5e-05,2e-05,2e-05,-0.0,-2e-05
9,-2e-05,-3e-05,-3e-05,0.0,7e-05,-1e-05,5e-05,2e-05,2e-05,1.0,...,1e-05,2e-05,-4e-05,1e-05,-2e-05,3e-05,0.0,0.0,-5e-05,2e-05


In [11]:
np.linalg.norm(cov_df - np.diag(np.diag(cov_df)))  # Remove diagonal elements for visualization

15.918676892004195

In [12]:
pca = PCA()
pca.fit(cov_df)

print("Singular values (explained variance):")
print(pca.explained_variance_)

print("\nPrincipal components (eigenvectors):")
eigenvectors = pca.components_
eigenvectors_df = pd.DataFrame(eigenvectors)
eigenvectors_df

Singular values (explained variance):
[1.34546986e+08 8.60178444e+07 7.42397880e+07 7.07316719e+07
 5.76024156e+07 5.04377013e+07 1.74722378e+07 1.08877268e+07
 8.73829917e+06 8.28574403e+06 7.62765285e+06 6.75699636e+06
 2.10050346e+06 1.38134236e+06 1.17540931e+06 1.10128102e+06
 9.45661172e+05 8.10996359e+05 2.93412560e+05 1.72106457e+05
 1.55091677e+05 1.44628855e+05 1.23145990e+05 9.54244087e+04
 4.09897139e+04 2.40617568e+04 2.20442620e+04 2.05897372e+04
 1.84126140e+04 1.44676389e+04 5.48231410e+03 4.09747696e+03
 3.68574800e+03 2.87633979e+03 2.81605446e+03 2.07966202e+03
 1.34694643e+03 1.04856290e+03 6.94514325e+02 6.45469951e+02
 6.07260496e+02 1.49130589e-23]

Principal components (eigenvectors):


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
0,-0.035057,-0.005699,-0.001945,-0.000717,-0.000269,-9.9e-05,-6.3e-05,0.9969886,-0.008786,-0.002679,...,-0.000236,-9.6e-05,-4.6e-05,-0.034104,-0.005624,-0.001988,-0.000717,-0.000259,-0.000111,-4.7e-05
1,-0.124255,-0.006262,-0.002063,-0.000757,-0.000287,-9.7e-05,-7e-05,0.03678848,-0.010118,-0.002848,...,-0.000245,-9.8e-05,-4.5e-05,-0.113279,-0.0061,-0.002073,-0.000744,-0.000284,-0.000122,-4.6e-05
2,0.790626,-0.001761,-0.000569,-0.000205,-7.7e-05,-2.9e-05,-2e-05,0.007370664,-0.002923,-0.000809,...,-7.4e-05,-2.4e-05,-1.3e-05,-0.61114,-0.001733,-0.000592,-0.000197,-7.9e-05,-3.2e-05,-1.1e-05
3,0.583131,-0.012723,-0.004118,-0.001473,-0.000564,-0.000202,-0.000136,0.04676644,-0.021262,-0.005704,...,-0.000492,-0.000195,-9.1e-05,0.769949,-0.012454,-0.004169,-0.001493,-0.000545,-0.00024,-9.6e-05
4,0.09289,-0.011202,-0.003504,-0.001249,-0.000475,-0.000165,-0.000116,0.02699742,-0.019622,-0.004862,...,-0.000415,-0.000169,-8e-05,0.097511,-0.010887,-0.003565,-0.001264,-0.000471,-0.000207,-8.3e-05
5,0.092686,-0.018556,-0.00565,-0.00202,-0.000773,-0.000272,-0.00018,0.03493805,-0.033825,-0.007898,...,-0.00068,-0.00028,-0.000124,0.095827,-0.018007,-0.00573,-0.002038,-0.000751,-0.000327,-0.000128
6,0.015939,-0.036489,-0.006931,-0.002409,-0.000908,-0.000317,-0.000216,0.01033496,0.995297,-0.010132,...,-0.000798,-0.000324,-0.000147,0.016109,-0.03417,-0.007115,-0.002399,-0.000881,-0.000386,-0.000154
7,0.009699,-0.105335,-0.007914,-0.002619,-0.001009,-0.000348,-0.000235,0.006624837,0.043313,-0.012095,...,-0.000879,-0.000343,-0.000164,0.009858,-0.087197,-0.008045,-0.002635,-0.000963,-0.000417,-0.00017
8,0.004456,0.902521,-0.004819,-0.001598,-0.0006,-0.000214,-0.000141,0.003088832,0.015909,-0.007704,...,-0.000519,-0.000214,-9.3e-05,0.004509,-0.407892,-0.004954,-0.001596,-0.000572,-0.000248,-9.6e-05
9,0.007722,0.313257,-0.008866,-0.002877,-0.001075,-0.000384,-0.000256,0.005412338,0.026028,-0.014321,...,-0.000948,-0.000381,-0.000179,0.007793,0.838799,-0.009171,-0.002889,-0.001056,-0.000453,-0.00018


In [13]:
cos_dist = spatial.distance.cdist(eigenvectors, np.eye(cov_df.shape[0]), metric='cosine')
cos_dist =  [np.min(row) for row in cos_dist]
cos_dist


[0.0030113981909440923,
 0.01680433264567127,
 0.20937436478599758,
 0.23005069907320919,
 0.04544183217921549,
 0.04084099024853949,
 0.0047026177674722724,
 0.014551758348685495,
 0.09747910295448903,
 0.16120084795039358,
 0.1707295502754239,
 0.12671668404696868,
 0.007580445682748627,
 0.026082462418272723,
 0.16024265516663572,
 0.20470281145826463,
 0.11552625354836232,
 0.09932133643029428,
 0.009826862876729736,
 0.04933113522080124,
 0.27207226572525434,
 0.3711773158171139,
 0.14391918180325924,
 0.06803411296261075,
 0.01755378720740708,
 0.07409411134282629,
 0.17843696911594253,
 0.30362337732074207,
 0.34538288723721144,
 0.2755811762302265,
 0.04198909277574692,
 0.25652943600988964,
 0.33766982240064747,
 0.2837259666073897,
 0.19697330277921976,
 0.5154205323515373,
 0.22449714930296316,
 0.32875664236799373,
 0.16496565641953342,
 0.2988322866786087,
 0.15164983016182532,
 0.5831744724838421]