In [1]:
DATA_NAME = 'coco-indoor' 
TRANSFORM = 'gabor'
CHANNEL = 'gray'
PARAM_CSV = 'gabor_new.csv' # only use for Gabor

In [2]:
path_list = DATA_NAME.split("-") + TRANSFORM.split("-")
if CHANNEL:
    path_list.append(CHANNEL)
print(f"Name the notebook:\nindependence_{'_'.join(path_list[::-1])}.ipynb")
FULL_DATA_NAME='-'.join(path_list)


import git
from pathlib import Path
import os
CWD = os.getcwd()
ROOT_DIR = Path(git.Repo('.', search_parent_directories=True).working_tree_dir)

Path(os.path.join(CWD, "CSVs")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "plots")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "cache")).mkdir(exist_ok=True)

GROUP = 'layer' if TRANSFORM.split("-")[0] == 'wavelet' else ('band' if TRANSFORM.split("-")[0] == 'fourier' else 'error')
RERUN = False
SKIP_OPTIMIZE_STEP = False

Name the notebook:
independence_gray_gabor_indoor_coco.ipynb


In [3]:
os.chdir(os.path.join(ROOT_DIR, "utilities"))
from testing import * # If MATLAB is not installed, open utilities and set to False
from plotting import *
os.chdir(CWD)
np.random.seed(0)

In [4]:
from scipy import spatial
from sklearn.decomposition import PCA

In [5]:
group_data_map = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}.pickle'))
group_total_samples = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}-size.pickle'))
param_df = pd.read_csv(os.path.join(ROOT_DIR, "gabor", PARAM_CSV))

In [6]:
if 'fourier' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)#[::3]
elif 'wavelet' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)
elif 'learned' in TRANSFORM:
    nonskewed_df = pd.read_csv(os.path.join(ROOT_DIR, 'learned-filters', 'nonskewed_filter_idxs_df.csv')).set_index(['dataset', 'num_images', 'num_bootstrap'])
    nonskewed_filter_idxs = eval(nonskewed_df.loc[DATA_NAME].sort_values('num_images', ascending=False)['nonskewed_filter_idxs'].iloc[0]) 
    GROUPS = nonskewed_filter_idxs # can set to filter_group_map.keys() to include all prepared filters
elif 'gabor' in TRANSFORM:
    GROUPS = param_df['index']

In [7]:
n_bootstrap = int(1e5) 
bootstrap_size = int(1e4)


In [8]:

cov_matrix = np.zeros((len(GROUPS), len(GROUPS)))
for _ in tqdm(range(n_bootstrap)):
    X = np.zeros((bootstrap_size, len(GROUPS)))
    for i in range(len(GROUPS)):
        group = GROUPS[i]
        if len(group_data_map[group]) < 100:
            X[:, i] = np.full(bootstrap_size, np.nan)
        else:
            X[:, i] = np.random.choice(group_data_map[group], size=(bootstrap_size), replace=True)
    cov_matrix += np.cov(X, rowvar=False)
cov_matrix /= n_bootstrap

  0%|          | 0/100000 [00:00<?, ?it/s]

In [9]:



# Convert covariance matrix to DataFrame for better formatting
cov_df = pd.DataFrame(cov_matrix, index=GROUPS, columns=GROUPS)
cov_df = cov_df.dropna(how='all').dropna(axis=1, how='all')
cov_df.to_csv(os.path.join(CWD, "CSVs", f'covariance_matrix.csv'))
cov_df.round(2)

index,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,48146.82,1.36,-0.2,0.43,0.11,-0.17,-0.08,-2.47,0.81,-0.44,...,-0.22,0.36,-0.0,0.81,-0.15,-0.38,0.14,0.11,0.05,0.05
1,1.36,16783.98,0.64,-0.39,-0.11,-0.1,-0.01,1.48,-0.2,-0.36,...,-0.0,-0.0,0.03,0.98,-0.31,-0.15,-0.26,0.04,0.04,0.06
2,-0.2,0.64,5987.39,-0.07,0.0,0.08,0.04,0.12,0.44,-0.2,...,0.04,-0.04,0.03,-0.62,0.52,0.0,-0.24,-0.0,0.07,-0.01
3,0.43,-0.39,-0.07,2323.21,-0.05,0.02,-0.01,-0.6,-0.21,0.02,...,-0.04,-0.03,0.01,0.37,0.09,0.1,0.06,0.02,-0.03,-0.01
4,0.11,-0.11,0.0,-0.05,898.38,-0.01,0.0,0.03,0.0,0.2,...,-0.0,0.01,0.0,-0.02,0.11,-0.06,0.06,0.01,0.02,-0.01
5,-0.17,-0.1,0.08,0.02,-0.01,336.72,-0.0,-0.26,0.07,-0.02,...,-0.01,-0.01,0.0,-0.11,0.02,-0.01,0.04,-0.0,0.0,0.0
6,-0.08,-0.01,0.04,-0.01,0.0,-0.0,204.23,0.08,-0.0,0.07,...,0.02,-0.0,-0.01,-0.05,0.0,0.02,-0.02,0.02,-0.01,0.0
7,-2.47,1.48,0.12,-0.6,0.03,-0.26,0.08,67164.46,-0.14,0.54,...,-0.0,0.11,-0.12,0.55,-1.53,-0.52,-0.53,0.27,0.21,-0.14
8,0.81,-0.2,0.44,-0.21,0.0,0.07,-0.0,-0.14,23793.42,0.29,...,-0.1,0.04,0.01,0.57,0.94,-0.63,0.18,0.12,-0.01,-0.03
9,-0.44,-0.36,-0.2,0.02,0.2,-0.02,0.07,0.54,0.29,8489.77,...,0.03,0.03,-0.04,0.23,-0.29,0.19,0.01,0.0,-0.09,0.02


In [10]:
corr_matrix = cov_df / np.sqrt(np.outer(np.diag(cov_df), np.diag(cov_df)))
corr_matrix.round(5)

index,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1.0,5e-05,-1e-05,4e-05,2e-05,-4e-05,-3e-05,-4e-05,2e-05,-2e-05,...,-3e-05,9e-05,-0.0,2e-05,-1e-05,-2e-05,1e-05,2e-05,1e-05,2e-05
1,5e-05,1.0,6e-05,-6e-05,-3e-05,-4e-05,-0.0,4e-05,-1e-05,-3e-05,...,-0.0,-0.0,2e-05,3e-05,-2e-05,-1e-05,-4e-05,1e-05,2e-05,4e-05
2,-1e-05,6e-05,1.0,-2e-05,0.0,5e-05,4e-05,1e-05,4e-05,-3e-05,...,2e-05,-3e-05,3e-05,-4e-05,5e-05,0.0,-6e-05,-0.0,5e-05,-1e-05
3,4e-05,-6e-05,-2e-05,1.0,-4e-05,2e-05,-1e-05,-5e-05,-3e-05,0.0,...,-3e-05,-4e-05,2e-05,3e-05,1e-05,3e-05,2e-05,1e-05,-3e-05,-1e-05
4,2e-05,-3e-05,0.0,-4e-05,1.0,-2e-05,1e-05,0.0,0.0,7e-05,...,-0.0,1e-05,1e-05,-0.0,3e-05,-3e-05,4e-05,1e-05,4e-05,-4e-05
5,-4e-05,-4e-05,5e-05,2e-05,-2e-05,1.0,-1e-05,-5e-05,2e-05,-1e-05,...,-2e-05,-4e-05,2e-05,-3e-05,1e-05,-0.0,5e-05,-1e-05,1e-05,0.0
6,-3e-05,-0.0,4e-05,-1e-05,1e-05,-1e-05,1.0,2e-05,-0.0,5e-05,...,4e-05,-1e-05,-4e-05,-2e-05,0.0,2e-05,-3e-05,5e-05,-4e-05,1e-05
7,-4e-05,4e-05,1e-05,-5e-05,0.0,-5e-05,2e-05,1.0,-0.0,2e-05,...,-0.0,2e-05,-4e-05,1e-05,-4e-05,-3e-05,-4e-05,3e-05,4e-05,-4e-05
8,2e-05,-1e-05,4e-05,-3e-05,0.0,2e-05,-0.0,-0.0,1.0,2e-05,...,-2e-05,1e-05,1e-05,2e-05,5e-05,-5e-05,2e-05,3e-05,-0.0,-2e-05
9,-2e-05,-3e-05,-3e-05,0.0,7e-05,-1e-05,5e-05,2e-05,2e-05,1.0,...,1e-05,2e-05,-4e-05,1e-05,-2e-05,3e-05,0.0,0.0,-5e-05,2e-05


In [11]:
np.linalg.norm(cov_df - np.diag(np.diag(cov_df)))  # Remove diagonal elements for visualization

14.128728730183205

In [12]:
pca = PCA()
pca.fit(cov_df)

print("Singular values (explained variance):")
print(pca.explained_variance_)

print("\nPrincipal components (eigenvectors):")
eigenvectors = pca.components_
eigenvectors_df = pd.DataFrame(eigenvectors)
eigenvectors_df

Singular values (explained variance):
[1.07733878e+08 6.56066955e+07 5.96095013e+07 5.50445045e+07
 4.86417540e+07 3.81830051e+07 1.34632990e+07 8.36738617e+06
 7.34156302e+06 6.65639055e+06 5.73449466e+06 4.50248830e+06
 1.70663467e+06 1.11112458e+06 8.95546271e+05 8.42679657e+05
 7.52558950e+05 6.71757925e+05 2.54889140e+05 1.45750079e+05
 1.37605153e+05 1.23835950e+05 9.98628068e+04 8.64669270e+04
 3.45076904e+04 2.34975148e+04 2.19139806e+04 1.88123652e+04
 1.66694639e+04 1.34316375e+04 5.35897099e+03 3.89352099e+03
 3.39633946e+03 2.74267020e+03 2.64437481e+03 1.93435619e+03
 1.19419312e+03 9.13370098e+02 6.96460066e+02 5.73905429e+02
 5.23161903e+02 9.92252350e-24]

Principal components (eigenvectors):


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
0,-0.03213,-0.005634,-0.001904,-0.000744,-0.000283,-0.00011,-6.3e-05,0.997211,-0.008623,-0.002717,...,-0.000258,-0.000104,-4.7e-05,-0.036027,-0.005984,-0.001951,-0.00077,-0.000304,-0.000119,-5e-05
1,-0.103587,-0.005545,-0.001801,-0.000702,-0.000269,-9.7e-05,-6.3e-05,0.029098,-0.008933,-0.00258,...,-0.000238,-9.5e-05,-4.2e-05,-0.196295,-0.005848,-0.001814,-0.000711,-0.000298,-0.000116,-4.2e-05
2,-0.310761,-0.006295,-0.002036,-0.000773,-0.000301,-0.000113,-6.9e-05,0.026325,-0.010293,-0.002903,...,-0.000267,-0.000111,-4.7e-05,0.932654,-0.006678,-0.002053,-0.000817,-0.000321,-0.00013,-5.3e-05
3,0.91544,-0.009847,-0.00315,-0.001196,-0.000463,-0.000178,-0.000108,0.0348,-0.016351,-0.004539,...,-0.000425,-0.000164,-7.4e-05,0.251801,-0.010491,-0.003205,-0.001247,-0.000501,-0.000198,-7.7e-05
4,0.213328,-0.014054,-0.0044,-0.001667,-0.000643,-0.00024,-0.000149,0.038325,-0.023939,-0.006331,...,-0.000585,-0.000238,-0.000106,0.144644,-0.014908,-0.004486,-0.001744,-0.000707,-0.000281,-0.000111
5,0.084355,-0.0172,-0.005148,-0.001952,-0.000757,-0.000284,-0.000168,0.029972,-0.031373,-0.007476,...,-0.000692,-0.000285,-0.000119,0.071258,-0.018324,-0.005233,-0.002036,-0.000819,-0.000323,-0.000125
6,0.016083,-0.036713,-0.006834,-0.002521,-0.000962,-0.000358,-0.000219,0.01003,0.995207,-0.010435,...,-0.000879,-0.000357,-0.000153,0.015203,-0.042213,-0.007037,-0.002599,-0.001042,-0.000414,-0.000163
7,0.008801,-0.099502,-0.007058,-0.002481,-0.000969,-0.000355,-0.000216,0.005806,0.038745,-0.011337,...,-0.000877,-0.000342,-0.000155,0.008465,-0.177915,-0.007188,-0.002585,-0.00103,-0.000404,-0.000163
8,0.008236,-0.30027,-0.007748,-0.002693,-0.001025,-0.000384,-0.000234,0.005531,0.030784,-0.012795,...,-0.000939,-0.000375,-0.000169,0.007873,0.937956,-0.007994,-0.002804,-0.001127,-0.00044,-0.000175
9,0.011291,0.91997,-0.012079,-0.004203,-0.001596,-0.000603,-0.000361,0.007608,0.039072,-0.020403,...,-0.001449,-0.000587,-0.000254,0.010818,0.244716,-0.012476,-0.004361,-0.001728,-0.000677,-0.000264


In [13]:
cos_dist = spatial.distance.cdist(eigenvectors, np.eye(cov_df.shape[0]), metric='cosine')
cos_dist =  [np.min(row) for row in cos_dist]
cos_dist


[0.0027886220132145034,
 0.02768233775341966,
 0.06734560794091826,
 0.08455958312566059,
 0.053309983192887156,
 0.017032352182601573,
 0.004792972460005429,
 0.024484209624098474,
 0.06204389739723071,
 0.08002956669061767,
 0.0545497754358133,
 0.02372163779213643,
 0.007415951430234435,
 0.019799708597279975,
 0.13128225936516136,
 0.17891836245418447,
 0.23935983728392896,
 0.28080306535824606,
 0.010196577580695698,
 0.11593536399791393,
 0.21221470444221713,
 0.1850066457385917,
 0.17775947046654839,
 0.20095720862574662,
 0.021680532165309296,
 0.216746525469799,
 0.2895470284481574,
 0.1581897968722683,
 0.2760656580725651,
 0.34021421148720943,
 0.04091221215597107,
 0.11878969269089135,
 0.17043804871764423,
 0.23316174772851805,
 0.2133920714111346,
 0.4958188654679442,
 0.15747832124610062,
 0.22146273641874425,
 0.23473612274160982,
 0.2772334875074861,
 0.1622611726152049,
 0.5692714035354667]