In [1]:
DATA_NAME = 'coco-outdoor' 
TRANSFORM = 'gabor'
CHANNEL = 'green'
PARAM_CSV = 'gabor_new.csv' # only use for Gabor

In [2]:
path_list = DATA_NAME.split("-") + TRANSFORM.split("-")
if CHANNEL:
    path_list.append(CHANNEL)
print(f"Name the notebook:\nindependence_{'_'.join(path_list[::-1])}.ipynb")
FULL_DATA_NAME='-'.join(path_list)


import git
from pathlib import Path
import os
CWD = os.getcwd()
ROOT_DIR = Path(git.Repo('.', search_parent_directories=True).working_tree_dir)

Path(os.path.join(CWD, "CSVs")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "plots")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "cache")).mkdir(exist_ok=True)

GROUP = 'layer' if TRANSFORM.split("-")[0] == 'wavelet' else ('band' if TRANSFORM.split("-")[0] == 'fourier' else 'error')
RERUN = False
SKIP_OPTIMIZE_STEP = False

Name the notebook:
independence_green_gabor_outdoor_coco.ipynb


In [3]:
os.chdir(os.path.join(ROOT_DIR, "utilities"))
from testing import * # If MATLAB is not installed, open utilities and set to False
from plotting import *
os.chdir(CWD)
np.random.seed(0)

In [4]:
from scipy import spatial
from sklearn.decomposition import PCA

In [5]:
group_data_map = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}.pickle'))
group_total_samples = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}-size.pickle'))
param_df = pd.read_csv(os.path.join(ROOT_DIR, "gabor", PARAM_CSV))

In [6]:
if 'fourier' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)#[::3]
elif 'wavelet' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)
elif 'learned' in TRANSFORM:
    nonskewed_df = pd.read_csv(os.path.join(ROOT_DIR, 'learned-filters', 'nonskewed_filter_idxs_df.csv')).set_index(['dataset', 'num_images', 'num_bootstrap'])
    nonskewed_filter_idxs = eval(nonskewed_df.loc[DATA_NAME].sort_values('num_images', ascending=False)['nonskewed_filter_idxs'].iloc[0]) 
    GROUPS = nonskewed_filter_idxs # can set to filter_group_map.keys() to include all prepared filters
elif 'gabor' in TRANSFORM:
    GROUPS = param_df['index']

In [7]:
n_bootstrap = int(1e5) 
bootstrap_size = int(1e4)


In [8]:

cov_matrix = np.zeros((len(GROUPS), len(GROUPS)))
for _ in tqdm(range(n_bootstrap)):
    X = np.zeros((bootstrap_size, len(GROUPS)))
    for i in range(len(GROUPS)):
        group = GROUPS[i]
        if len(group_data_map[group]) < 100:
            X[:, i] = np.full(bootstrap_size, np.nan)
        else:
            X[:, i] = np.random.choice(group_data_map[group], size=(bootstrap_size), replace=True)
    cov_matrix += np.cov(X, rowvar=False)
cov_matrix /= n_bootstrap

  0%|          | 0/100000 [00:00<?, ?it/s]

In [9]:



# Convert covariance matrix to DataFrame for better formatting
cov_df = pd.DataFrame(cov_matrix, index=GROUPS, columns=GROUPS)
cov_df = cov_df.dropna(how='all').dropna(axis=1, how='all')
cov_df.to_csv(os.path.join(CWD, "CSVs", f'covariance_matrix.csv'))
cov_df.round(2)

index,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,53963.17,1.6,-0.24,0.54,0.16,-0.21,-0.15,-2.77,0.91,-0.55,...,-0.24,0.46,-0.04,0.96,-0.15,-0.49,0.2,0.08,0.05,0.05
1,1.6,20823.9,0.77,-0.52,-0.11,-0.13,-0.01,1.77,-0.27,-0.46,...,0.0,-0.01,0.02,1.17,-0.33,-0.17,-0.35,0.06,0.05,0.09
2,-0.24,0.77,7666.8,-0.11,-0.0,0.1,0.05,0.12,0.57,-0.23,...,0.06,-0.05,0.04,-0.77,0.64,-0.0,-0.34,-0.0,0.11,-0.02
3,0.54,-0.52,-0.11,3192.95,-0.07,0.03,-0.02,-0.76,-0.26,0.01,...,-0.05,-0.06,0.02,0.43,0.05,0.13,0.07,0.03,-0.04,-0.01
4,0.16,-0.11,-0.0,-0.07,1371.68,-0.01,0.01,0.0,0.02,0.28,...,-0.0,0.01,0.01,-0.01,0.15,-0.09,0.09,0.01,0.04,-0.03
5,-0.21,-0.13,0.1,0.03,-0.01,537.37,0.0,-0.33,0.1,-0.04,...,-0.02,-0.02,0.01,-0.17,0.02,0.01,0.06,-0.0,0.0,-0.0
6,-0.15,-0.01,0.05,-0.02,0.01,0.0,364.26,0.09,-0.0,0.11,...,0.03,-0.01,-0.01,-0.07,-0.01,0.03,-0.03,0.04,-0.02,0.0
7,-2.77,1.77,0.12,-0.76,0.0,-0.33,0.09,76423.44,-0.15,0.68,...,-0.01,0.18,-0.17,0.52,-1.85,-0.58,-0.67,0.39,0.26,-0.2
8,0.91,-0.27,0.57,-0.26,0.02,0.1,-0.0,-0.15,28467.98,0.38,...,-0.14,0.05,0.01,0.66,1.11,-0.81,0.24,0.19,-0.04,-0.05
9,-0.55,-0.46,-0.23,0.01,0.28,-0.04,0.11,0.68,0.38,11276.55,...,0.06,0.04,-0.06,0.27,-0.38,0.27,0.01,0.01,-0.13,0.02


In [10]:
corr_matrix = cov_df / np.sqrt(np.outer(np.diag(cov_df), np.diag(cov_df)))
corr_matrix.round(5)

index,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1.0,5e-05,-1e-05,4e-05,2e-05,-4e-05,-3e-05,-4e-05,2e-05,-2e-05,...,-3e-05,8e-05,-1e-05,2e-05,-0.0,-2e-05,2e-05,1e-05,1e-05,1e-05
1,5e-05,1.0,6e-05,-6e-05,-2e-05,-4e-05,-0.0,4e-05,-1e-05,-3e-05,...,0.0,-0.0,1e-05,3e-05,-2e-05,-1e-05,-4e-05,1e-05,1e-05,4e-05
2,-1e-05,6e-05,1.0,-2e-05,-0.0,5e-05,3e-05,0.0,4e-05,-2e-05,...,2e-05,-2e-05,3e-05,-4e-05,5e-05,-0.0,-7e-05,-0.0,5e-05,-1e-05
3,4e-05,-6e-05,-2e-05,1.0,-3e-05,2e-05,-2e-05,-5e-05,-3e-05,0.0,...,-2e-05,-5e-05,3e-05,3e-05,1e-05,3e-05,2e-05,1e-05,-3e-05,-1e-05
4,2e-05,-2e-05,-0.0,-3e-05,1.0,-2e-05,1e-05,0.0,0.0,7e-05,...,-0.0,1e-05,1e-05,-0.0,3e-05,-3e-05,4e-05,1e-05,4e-05,-4e-05
5,-4e-05,-4e-05,5e-05,2e-05,-2e-05,1.0,0.0,-5e-05,2e-05,-2e-05,...,-3e-05,-3e-05,3e-05,-3e-05,1e-05,0.0,4e-05,-0.0,0.0,-1e-05
6,-3e-05,-0.0,3e-05,-2e-05,1e-05,0.0,1.0,2e-05,-0.0,6e-05,...,4e-05,-1e-05,-3e-05,-2e-05,-0.0,2e-05,-3e-05,5e-05,-3e-05,1e-05
7,-4e-05,4e-05,0.0,-5e-05,0.0,-5e-05,2e-05,1.0,-0.0,2e-05,...,-0.0,3e-05,-4e-05,1e-05,-5e-05,-2e-05,-4e-05,4e-05,4e-05,-4e-05
8,2e-05,-1e-05,4e-05,-3e-05,0.0,2e-05,-0.0,-0.0,1.0,2e-05,...,-2e-05,1e-05,0.0,2e-05,4e-05,-5e-05,2e-05,3e-05,-1e-05,-2e-05
9,-2e-05,-3e-05,-2e-05,0.0,7e-05,-2e-05,6e-05,2e-05,2e-05,1.0,...,2e-05,1e-05,-4e-05,1e-05,-2e-05,3e-05,0.0,0.0,-5e-05,1e-05


In [11]:
np.linalg.norm(cov_df - np.diag(np.diag(cov_df)))  # Remove diagonal elements for visualization

17.09750967397214

In [12]:
pca = PCA()
pca.fit(cov_df)

print("Singular values (explained variance):")
print(pca.explained_variance_)

print("\nPrincipal components (eigenvectors):")
eigenvectors = pca.components_
eigenvectors_df = pd.DataFrame(eigenvectors)
eigenvectors_df

Singular values (explained variance):
[1.39525127e+08 8.98270881e+07 7.92980662e+07 6.97316838e+07
 6.44704473e+07 5.91778232e+07 1.92880905e+07 1.24724762e+07
 1.15192388e+07 1.03518241e+07 9.51226415e+06 8.25586786e+06
 3.00973874e+06 1.98148806e+06 1.51046731e+06 1.41480424e+06
 1.30444548e+06 1.07340063e+06 4.52853868e+05 2.81360969e+05
 2.57298722e+05 2.35430741e+05 1.99356493e+05 1.70961213e+05
 7.27621144e+04 5.14787713e+04 4.86547347e+04 4.32422903e+04
 3.77152513e+04 3.06424858e+04 1.29421502e+04 1.01749933e+04
 9.17209266e+03 7.60018690e+03 7.22425116e+03 5.38816885e+03
 3.44481670e+03 2.89191814e+03 2.30023991e+03 2.00594920e+03
 1.80808632e+03 8.56316831e-24]

Principal components (eigenvectors):


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
0,-0.030194,-0.006134,-0.002117,-0.000886,-0.000375,-0.000151,-9.8e-05,0.996843,-0.009077,-0.003145,...,-0.000342,-0.00015,-7.5e-05,-0.03747,-0.006605,-0.002204,-0.000908,-0.00039,-0.000171,-8.2e-05
1,-0.07244,-0.006598,-0.002187,-0.000911,-0.000387,-0.000147,-0.000105,0.036207,-0.010225,-0.003266,...,-0.000345,-0.00015,-7.4e-05,-0.162212,-0.007072,-0.002241,-0.000917,-0.000414,-0.00018,-7.8e-05
2,-0.174781,-0.008102,-0.002658,-0.001081,-0.000466,-0.000184,-0.000125,0.032388,-0.012809,-0.003964,...,-0.000417,-0.000186,-8.9e-05,0.965682,-0.008743,-0.002731,-0.001129,-0.000483,-0.000217,-0.000101
3,0.93484,-0.007833,-0.002523,-0.001023,-0.000439,-0.000178,-0.00012,0.023652,-0.012715,-0.003808,...,-0.000404,-0.000169,-8.6e-05,0.115462,-0.008516,-0.002616,-0.001055,-0.000462,-0.000203,-9.2e-05
4,0.167718,-0.00788,-0.002496,-0.001008,-0.00043,-0.000167,-0.00012,0.020053,-0.013016,-0.003744,...,-0.000391,-0.000172,-8.8e-05,0.071074,-0.008515,-0.002594,-0.001047,-0.000463,-0.000205,-9.4e-05
5,0.244772,-0.022979,-0.007128,-0.002896,-0.001245,-0.000488,-0.000328,0.049227,-0.038808,-0.010786,...,-0.001137,-0.000506,-0.000241,0.142283,-0.024881,-0.007377,-0.002995,-0.00131,-0.000577,-0.000261
6,0.017393,-0.039948,-0.00715,-0.002811,-0.001189,-0.000463,-0.000316,0.010369,0.994213,-0.011614,...,-0.001088,-0.00048,-0.000231,0.015588,-0.048808,-0.007507,-0.002872,-0.001248,-0.000553,-0.000252
7,0.007802,-0.093864,-0.005909,-0.002209,-0.00096,-0.000366,-0.000248,0.004962,0.033315,-0.010213,...,-0.000868,-0.000368,-0.000185,0.007196,-0.273605,-0.006127,-0.002281,-0.000985,-0.00043,-0.000201
8,0.010388,-0.253466,-0.008667,-0.003232,-0.001365,-0.000534,-0.000363,0.006716,0.039381,-0.01536,...,-0.001249,-0.000541,-0.000271,0.00952,0.925448,-0.00913,-0.00333,-0.001448,-0.00063,-0.00029
9,0.008464,0.887975,-0.008146,-0.00305,-0.001277,-0.000508,-0.000338,0.005513,0.028954,-0.014935,...,-0.001164,-0.000513,-0.000249,0.007785,0.146387,-0.008627,-0.003132,-0.001342,-0.000584,-0.000264


In [13]:
cos_dist = spatial.distance.cdist(eigenvectors, np.eye(cov_df.shape[0]), metric='cosine')
cos_dist =  [np.min(row) for row in cos_dist]
cos_dist


[0.0031567448569140844,
 0.019339467318620907,
 0.03431817303129836,
 0.06515988490883351,
 0.15261704086600225,
 0.15157538201863507,
 0.005787439195321764,
 0.04692463048087603,
 0.07455176658025764,
 0.11202534698075395,
 0.1472913663930212,
 0.08533901952472411,
 0.007582855952205869,
 0.01625724562624198,
 0.07297513583208637,
 0.22053737837191,
 0.2750173509280952,
 0.07262230795711955,
 0.011957794271539757,
 0.060858054002562945,
 0.16314188219375103,
 0.20251734033946833,
 0.1792216338293421,
 0.1936869087178622,
 0.02479115815938482,
 0.2011099585877517,
 0.3100291558246594,
 0.22012266877708364,
 0.28621008590431585,
 0.3927405920983442,
 0.04888844207456167,
 0.1837248086246459,
 0.2597025565433855,
 0.23687106083432208,
 0.22047718048519127,
 0.4826052298595762,
 0.194728315313919,
 0.3074908443628801,
 0.2442317518481999,
 0.291664209477134,
 0.13681220872844135,
 0.5882459650439185]