In [1]:
DATA_NAME = 'coco-outdoor' 
TRANSFORM = 'gabor'
CHANNEL = 'blue'
PARAM_CSV = 'gabor_new.csv' # only use for Gabor

In [2]:
path_list = DATA_NAME.split("-") + TRANSFORM.split("-")
if CHANNEL:
    path_list.append(CHANNEL)
print(f"Name the notebook:\nindependence_{'_'.join(path_list[::-1])}.ipynb")
FULL_DATA_NAME='-'.join(path_list)


import git
from pathlib import Path
import os
CWD = os.getcwd()
ROOT_DIR = Path(git.Repo('.', search_parent_directories=True).working_tree_dir)

Path(os.path.join(CWD, "CSVs")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "plots")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "cache")).mkdir(exist_ok=True)

GROUP = 'layer' if TRANSFORM.split("-")[0] == 'wavelet' else ('band' if TRANSFORM.split("-")[0] == 'fourier' else 'error')
RERUN = False
SKIP_OPTIMIZE_STEP = False

Name the notebook:
independence_blue_gabor_outdoor_coco.ipynb


In [3]:
os.chdir(os.path.join(ROOT_DIR, "utilities"))
from testing import * # If MATLAB is not installed, open utilities and set to False
from plotting import *
os.chdir(CWD)
np.random.seed(0)

In [4]:
from scipy import spatial
from sklearn.decomposition import PCA

In [5]:
group_data_map = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}.pickle'))
group_total_samples = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}-size.pickle'))
param_df = pd.read_csv(os.path.join(ROOT_DIR, "gabor", PARAM_CSV))

In [6]:
if 'fourier' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)#[::3]
elif 'wavelet' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)
elif 'learned' in TRANSFORM:
    nonskewed_df = pd.read_csv(os.path.join(ROOT_DIR, 'learned-filters', 'nonskewed_filter_idxs_df.csv')).set_index(['dataset', 'num_images', 'num_bootstrap'])
    nonskewed_filter_idxs = eval(nonskewed_df.loc[DATA_NAME].sort_values('num_images', ascending=False)['nonskewed_filter_idxs'].iloc[0]) 
    GROUPS = nonskewed_filter_idxs # can set to filter_group_map.keys() to include all prepared filters
elif 'gabor' in TRANSFORM:
    GROUPS = param_df['index']

In [7]:
n_bootstrap = int(1e5) 
bootstrap_size = int(1e4)


In [8]:

cov_matrix = np.zeros((len(GROUPS), len(GROUPS)))
for _ in tqdm(range(n_bootstrap)):
    X = np.zeros((bootstrap_size, len(GROUPS)))
    for i in range(len(GROUPS)):
        group = GROUPS[i]
        if len(group_data_map[group]) < 100:
            X[:, i] = np.full(bootstrap_size, np.nan)
        else:
            X[:, i] = np.random.choice(group_data_map[group], size=(bootstrap_size), replace=True)
    cov_matrix += np.cov(X, rowvar=False)
cov_matrix /= n_bootstrap

  0%|          | 0/100000 [00:00<?, ?it/s]

In [9]:



# Convert covariance matrix to DataFrame for better formatting
cov_df = pd.DataFrame(cov_matrix, index=GROUPS, columns=GROUPS)
cov_df = cov_df.dropna(how='all').dropna(axis=1, how='all')
cov_df.to_csv(os.path.join(CWD, "CSVs", f'covariance_matrix.csv'))
cov_df.round(2)

index,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,53291.67,1.53,-0.24,0.53,0.15,-0.2,-0.14,-2.81,0.93,-0.53,...,-0.24,0.46,-0.04,0.84,-0.14,-0.48,0.21,0.08,0.05,0.05
1,1.53,19038.55,0.76,-0.47,-0.11,-0.13,-0.01,1.64,-0.24,-0.44,...,0.01,-0.01,0.02,1.06,-0.34,-0.17,-0.33,0.06,0.05,0.08
2,-0.24,0.76,7753.39,-0.11,-0.0,0.1,0.05,0.16,0.56,-0.23,...,0.06,-0.05,0.04,-0.74,0.64,0.0,-0.33,-0.0,0.11,-0.02
3,0.53,-0.47,-0.11,3045.25,-0.06,0.03,-0.02,-0.72,-0.25,0.01,...,-0.05,-0.06,0.02,0.41,0.05,0.13,0.07,0.03,-0.04,-0.01
4,0.15,-0.11,-0.0,-0.06,1302.13,-0.01,0.01,-0.0,0.01,0.26,...,-0.0,0.01,0.01,-0.01,0.14,-0.08,0.09,0.01,0.04,-0.03
5,-0.2,-0.13,0.1,0.03,-0.01,512.79,0.0,-0.32,0.1,-0.04,...,-0.02,-0.01,0.01,-0.16,0.02,0.0,0.05,-0.0,0.0,-0.0
6,-0.14,-0.01,0.05,-0.02,0.01,0.0,354.39,0.09,-0.0,0.11,...,0.03,-0.01,-0.01,-0.06,-0.01,0.03,-0.03,0.03,-0.02,0.0
7,-2.81,1.64,0.16,-0.72,-0.0,-0.32,0.09,72789.11,-0.18,0.59,...,-0.02,0.18,-0.17,0.59,-1.8,-0.55,-0.62,0.36,0.24,-0.19
8,0.93,-0.24,0.56,-0.25,0.01,0.1,-0.0,-0.18,27265.87,0.35,...,-0.14,0.05,0.01,0.62,1.06,-0.77,0.23,0.16,-0.03,-0.05
9,-0.53,-0.44,-0.23,0.01,0.26,-0.04,0.11,0.59,0.35,10545.94,...,0.06,0.03,-0.06,0.27,-0.36,0.25,0.01,0.01,-0.12,0.02


In [10]:
corr_matrix = cov_df / np.sqrt(np.outer(np.diag(cov_df), np.diag(cov_df)))
corr_matrix.round(5)

index,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1.0,5e-05,-1e-05,4e-05,2e-05,-4e-05,-3e-05,-5e-05,2e-05,-2e-05,...,-3e-05,8e-05,-1e-05,2e-05,-0.0,-2e-05,2e-05,1e-05,1e-05,1e-05
1,5e-05,1.0,6e-05,-6e-05,-2e-05,-4e-05,-0.0,4e-05,-1e-05,-3e-05,...,0.0,-0.0,1e-05,3e-05,-2e-05,-1e-05,-4e-05,1e-05,2e-05,4e-05
2,-1e-05,6e-05,1.0,-2e-05,-0.0,5e-05,3e-05,1e-05,4e-05,-3e-05,...,2e-05,-2e-05,3e-05,-4e-05,5e-05,0.0,-7e-05,-0.0,5e-05,-1e-05
3,4e-05,-6e-05,-2e-05,1.0,-3e-05,2e-05,-2e-05,-5e-05,-3e-05,0.0,...,-2e-05,-5e-05,3e-05,3e-05,1e-05,3e-05,2e-05,2e-05,-3e-05,-1e-05
4,2e-05,-2e-05,-0.0,-3e-05,1.0,-2e-05,1e-05,-0.0,0.0,7e-05,...,-0.0,1e-05,1e-05,-0.0,3e-05,-3e-05,4e-05,1e-05,4e-05,-4e-05
5,-4e-05,-4e-05,5e-05,2e-05,-2e-05,1.0,0.0,-5e-05,3e-05,-2e-05,...,-3e-05,-3e-05,3e-05,-3e-05,1e-05,0.0,4e-05,-0.0,0.0,-1e-05
6,-3e-05,-0.0,3e-05,-2e-05,1e-05,0.0,1.0,2e-05,-0.0,6e-05,...,4e-05,-1e-05,-3e-05,-1e-05,-0.0,2e-05,-3e-05,5e-05,-3e-05,1e-05
7,-5e-05,4e-05,1e-05,-5e-05,-0.0,-5e-05,2e-05,1.0,-0.0,2e-05,...,-0.0,3e-05,-4e-05,1e-05,-5e-05,-2e-05,-4e-05,4e-05,4e-05,-4e-05
8,2e-05,-1e-05,4e-05,-3e-05,0.0,3e-05,-0.0,-0.0,1.0,2e-05,...,-2e-05,1e-05,0.0,2e-05,4e-05,-5e-05,2e-05,3e-05,-1e-05,-2e-05
9,-2e-05,-3e-05,-3e-05,0.0,7e-05,-2e-05,6e-05,2e-05,2e-05,1.0,...,2e-05,1e-05,-4e-05,1e-05,-2e-05,3e-05,0.0,0.0,-5e-05,1e-05


In [11]:
np.linalg.norm(cov_df - np.diag(np.diag(cov_df)))  # Remove diagonal elements for visualization

16.259306624996476

In [12]:
pca = PCA()
pca.fit(cov_df)

print("Singular values (explained variance):")
print(pca.explained_variance_)

print("\nPrincipal components (eigenvectors):")
eigenvectors = pca.components_
eigenvectors_df = pd.DataFrame(eigenvectors)
eigenvectors_df

Singular values (explained variance):
[1.26571298e+08 8.21358933e+07 6.95725468e+07 6.67859369e+07
 5.92165159e+07 5.11043372e+07 1.76865674e+07 1.12741942e+07
 1.02904164e+07 8.66935171e+06 8.02833932e+06 7.25341759e+06
 2.63326497e+06 1.60686495e+06 1.46174062e+06 1.38448681e+06
 1.22668812e+06 1.07642205e+06 4.04020867e+05 2.62271963e+05
 2.32836815e+05 2.18301302e+05 1.96669651e+05 1.64562258e+05
 6.76786769e+04 4.67900965e+04 4.04359932e+04 3.77848307e+04
 3.56020261e+04 2.81475290e+04 1.22344863e+04 9.63402978e+03
 8.61019655e+03 7.49671573e+03 6.79692168e+03 5.12260606e+03
 3.34818337e+03 2.74129763e+03 2.14466306e+03 2.03186403e+03
 1.77822564e+03 7.00151584e-24]

Principal components (eigenvectors):


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
0,-0.033944,-0.005848,-0.002249,-0.000886,-0.000374,-0.000152,-0.0001,0.996814,-0.009142,-0.003086,...,-0.000342,-0.000157,-7.8e-05,-0.034244,-0.006559,-0.002254,-0.000905,-0.000355,-0.000172,-8.7e-05
1,-0.104192,-0.006502,-0.002416,-0.000947,-0.000401,-0.000154,-0.000111,0.03843,-0.010688,-0.003325,...,-0.000359,-0.000164,-8e-05,-0.109001,-0.00728,-0.002381,-0.00095,-0.000391,-0.000188,-8.6e-05
2,-0.64093,-0.00116,-0.000429,-0.000163,-7.1e-05,-2.7e-05,-1.8e-05,0.004381,-0.001959,-0.000569,...,-5.7e-05,-3.3e-05,-1.3e-05,0.767067,-0.001302,-0.000409,-0.00018,-6.2e-05,-3.4e-05,-1.9e-05
3,0.727294,-0.011076,-0.004038,-0.001543,-0.00066,-0.000266,-0.000183,0.039567,-0.018939,-0.005587,...,-0.000604,-0.000271,-0.000134,0.5953,-0.012564,-0.004011,-0.001591,-0.000631,-0.000309,-0.00015
4,0.178642,-0.012733,-0.004542,-0.001731,-0.000738,-0.000289,-0.000206,0.035089,-0.022405,-0.006284,...,-0.000674,-0.000312,-0.000154,0.169312,-0.014425,-0.004527,-0.001787,-0.000718,-0.000352,-0.000169
5,0.123104,-0.018859,-0.006541,-0.002497,-0.001069,-0.000422,-0.000287,0.038992,-0.034678,-0.009123,...,-0.000981,-0.000457,-0.000214,0.119745,-0.021497,-0.006501,-0.00257,-0.001025,-0.0005,-0.000238
6,0.016769,-0.035011,-0.007748,-0.002846,-0.001199,-0.000469,-0.000326,0.010616,0.994621,-0.011429,...,-0.0011,-0.000507,-0.000242,0.016657,-0.047414,-0.00778,-0.002894,-0.001148,-0.000563,-0.000271
7,0.008147,-0.069973,-0.007038,-0.002442,-0.001053,-0.000405,-0.00028,0.005457,0.035519,-0.010951,...,-0.000957,-0.000425,-0.000211,0.008176,-0.26354,-0.006965,-0.002508,-0.000989,-0.000479,-0.000236
8,0.011571,-0.168519,-0.011208,-0.003865,-0.001624,-0.000638,-0.000442,0.007869,0.044394,-0.01785,...,-0.00149,-0.000676,-0.000334,0.011524,0.944569,-0.011232,-0.003961,-0.00157,-0.000758,-0.000368
9,0.007238,0.921576,-0.008834,-0.003015,-0.001256,-0.000501,-0.00034,0.004978,0.023896,-0.014725,...,-0.001146,-0.000529,-0.000254,0.007219,0.090809,-0.008888,-0.003083,-0.001202,-0.00058,-0.000276


In [13]:
cos_dist = spatial.distance.cdist(eigenvectors, np.eye(cov_df.shape[0]), metric='cosine')
cos_dist =  [np.min(row) for row in cos_dist]
cos_dist


[0.0031855344940083796,
 0.014991113666682931,
 0.2329327256784186,
 0.2727064741708535,
 0.07132044715265085,
 0.043474571126299266,
 0.005379028975491917,
 0.041505607397672284,
 0.0554311114178162,
 0.07842414906105066,
 0.17673316768154912,
 0.1725571577112487,
 0.007554504800000972,
 0.041804800318567814,
 0.2626651469472523,
 0.3422831533552918,
 0.18047821910181472,
 0.17913304706492383,
 0.012408697041959904,
 0.04513438583180218,
 0.14574247137754137,
 0.24293176476790124,
 0.2572847424482986,
 0.1892701467165736,
 0.02245874562423633,
 0.05365105625460309,
 0.11461982788582548,
 0.2578830467883738,
 0.2458604577614769,
 0.3878601046657373,
 0.04995778997607636,
 0.13096981536773067,
 0.19757567070091697,
 0.27269006904893534,
 0.25964425976273653,
 0.40652016456378604,
 0.1805663754396769,
 0.26011732103927154,
 0.21985502439020244,
 0.22430948823512042,
 0.12268138675069706,
 0.5911252566222317]