In [1]:
DATA_NAME = 'coco-outdoor' 
TRANSFORM = 'learned'
CHANNEL = ''

In [2]:
path_list = DATA_NAME.split("-") + TRANSFORM.split("-")
if CHANNEL:
    path_list.append(CHANNEL)
print(f"Name the notebook:\nindependence_{'_'.join(path_list[::-1])}.ipynb")
FULL_DATA_NAME='-'.join(path_list)


import git
from pathlib import Path
import os
CWD = os.getcwd()
ROOT_DIR = Path(git.Repo('.', search_parent_directories=True).working_tree_dir)

Path(os.path.join(CWD, "CSVs")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "plots")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "cache")).mkdir(exist_ok=True)

GROUP = 'layer' if TRANSFORM.split("-")[0] == 'wavelet' else ('band' if TRANSFORM.split("-")[0] == 'fourier' else 'error')
RERUN = False
SKIP_OPTIMIZE_STEP = False

Name the notebook:
independence_learned_outdoor_coco.ipynb


In [3]:
os.chdir(os.path.join(ROOT_DIR, "utilities"))
from testing import * # If MATLAB is not installed, open utilities and set to False
from plotting import *
os.chdir(CWD)
np.random.seed(0)

In [4]:
from scipy import spatial
from sklearn.decomposition import PCA

In [5]:
group_data_map = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}.pickle'))
group_total_samples = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}-size.pickle'))

In [6]:
if 'fourier' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)#[::3]
elif 'wavelet' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)
elif 'learned' in TRANSFORM:
    nonskewed_df = pd.read_csv(os.path.join(ROOT_DIR, 'learned-filters', 'nonskewed_filter_idxs_df.csv')).set_index(['dataset', 'num_images', 'num_bootstrap'])
    nonskewed_filter_idxs = eval(nonskewed_df.loc[DATA_NAME].sort_values('num_images', ascending=False)['nonskewed_filter_idxs'].iloc[0]) 
    GROUPS = nonskewed_filter_idxs # can set to filter_group_map.keys() to include all prepared filters

In [7]:
n_bootstrap = int(1e5) 
bootstrap_size = int(1e4)


In [8]:

cov_matrix = np.zeros((len(GROUPS), len(GROUPS)))
for _ in tqdm(range(n_bootstrap)):
    X = np.zeros((bootstrap_size, len(GROUPS)))
    for i in range(len(GROUPS)):
        group = GROUPS[i]
        X[:, i] = np.random.choice(group_data_map[group], size=(bootstrap_size), replace=True)
    cov_matrix += np.cov(X, rowvar=False)
cov_matrix /= n_bootstrap

  0%|          | 0/100000 [00:00<?, ?it/s]

In [9]:



# Convert covariance matrix to DataFrame for better formatting
cov_df = pd.DataFrame(cov_matrix, index=GROUPS, columns=GROUPS)
cov_df.to_csv(os.path.join(CWD, "CSVs", f'covariance_matrix.csv'))
cov_df.round(2)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,54,55,56,57,58,59,60,61,62,63
0,5.72,0.00,-0.00,-0.00,0.00,0.0,0.0,0.0,0.0,-0.0,...,-0.0,0.0,0.0,-0.0,0.0,-0.00,-0.00,-0.00,0.00,-0.00
1,0.00,6.53,-0.00,-0.00,-0.00,0.0,0.0,0.0,-0.0,0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,0.00,-0.00,0.00,0.00,-0.00
2,-0.00,-0.00,7.16,0.00,0.00,-0.0,0.0,-0.0,-0.0,-0.0,...,-0.0,-0.0,-0.0,0.0,0.0,-0.00,0.00,-0.00,0.00,0.00
3,-0.00,-0.00,0.00,4.84,-0.00,-0.0,-0.0,0.0,0.0,-0.0,...,-0.0,0.0,0.0,0.0,-0.0,0.00,0.00,-0.00,-0.00,-0.00
4,0.00,-0.00,0.00,-0.00,8.76,-0.0,-0.0,0.0,0.0,0.0,...,-0.0,0.0,0.0,0.0,0.0,-0.00,-0.00,0.00,0.00,-0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59,-0.00,0.00,-0.00,0.00,-0.00,0.0,0.0,-0.0,0.0,0.0,...,0.0,-0.0,0.0,0.0,-0.0,7.33,0.00,-0.00,-0.00,-0.00
60,-0.00,-0.00,0.00,0.00,-0.00,-0.0,-0.0,-0.0,0.0,0.0,...,-0.0,-0.0,0.0,0.0,-0.0,0.00,0.91,-0.00,0.00,0.00
61,-0.00,0.00,-0.00,-0.00,0.00,-0.0,0.0,-0.0,-0.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,0.0,-0.00,-0.00,2.83,0.00,0.00
62,0.00,0.00,0.00,-0.00,0.00,-0.0,-0.0,0.0,0.0,-0.0,...,-0.0,-0.0,-0.0,0.0,-0.0,-0.00,0.00,0.00,5.81,-0.00


In [10]:
corr_matrix = cov_matrix / np.sqrt(np.outer(np.diag(cov_matrix), np.diag(cov_matrix)))
corr_df = pd.DataFrame(corr_matrix, index=GROUPS, columns=GROUPS)
corr_df.round(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,54,55,56,57,58,59,60,61,62,63
0,1.00000,0.00001,-0.00002,-0.00002,0.00003,0.00000,0.00001,0.00003,0.00001,-0.00003,...,-0.00001,0.00003,0.00001,-0.00006,0.00001,-0.00000,-0.00002,-0.00002,0.00002,-0.00000
1,0.00001,1.00000,-0.00002,-0.00002,-0.00000,0.00000,0.00004,0.00005,-0.00000,0.00002,...,-0.00002,-0.00005,-0.00001,-0.00006,-0.00002,0.00005,-0.00001,0.00003,0.00004,-0.00002
2,-0.00002,-0.00002,1.00000,0.00004,0.00004,-0.00003,0.00005,-0.00005,-0.00002,-0.00003,...,-0.00000,-0.00000,-0.00002,0.00003,0.00001,-0.00003,0.00000,-0.00002,0.00005,0.00002
3,-0.00002,-0.00002,0.00004,1.00000,-0.00003,-0.00005,-0.00000,0.00002,0.00003,-0.00007,...,-0.00002,0.00001,0.00004,0.00002,-0.00002,0.00002,0.00001,-0.00005,-0.00000,-0.00003
4,0.00003,-0.00000,0.00004,-0.00003,1.00000,-0.00004,-0.00002,0.00000,0.00003,0.00000,...,-0.00002,0.00002,0.00002,0.00000,0.00002,-0.00002,-0.00002,0.00002,0.00005,-0.00001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59,-0.00000,0.00005,-0.00003,0.00002,-0.00002,0.00000,0.00002,-0.00001,0.00001,0.00002,...,0.00004,-0.00003,0.00002,0.00002,-0.00007,1.00000,0.00001,-0.00000,-0.00002,-0.00001
60,-0.00002,-0.00001,0.00000,0.00001,-0.00002,-0.00003,-0.00001,-0.00003,0.00002,0.00001,...,-0.00004,-0.00000,0.00001,0.00004,-0.00004,0.00001,1.00000,-0.00001,0.00004,0.00002
61,-0.00002,0.00003,-0.00002,-0.00005,0.00002,-0.00002,0.00000,-0.00002,-0.00002,-0.00004,...,-0.00002,-0.00003,-0.00000,-0.00000,0.00000,-0.00000,-0.00001,1.00000,0.00002,0.00001
62,0.00002,0.00004,0.00005,-0.00000,0.00005,-0.00007,-0.00005,0.00004,0.00005,-0.00000,...,-0.00004,-0.00004,-0.00001,0.00000,-0.00001,-0.00002,0.00004,0.00002,1.00000,-0.00001


In [11]:
np.linalg.norm(cov_matrix - np.diag(np.diag(cov_matrix)))  # Remove diagonal elements for visualization

0.009709729678010627

In [12]:
pca = PCA()
pca.fit(cov_matrix)

print("Singular values (explained variance):")
print(pca.explained_variance_)

print("\nPrincipal components (eigenvectors):")
eigenvectors = pca.components_
eigenvectors_df = pd.DataFrame(eigenvectors)
eigenvectors_df

Singular values (explained variance):
[4.49665916e+00 3.61631378e+00 2.99678068e+00 1.22689852e+00
 1.19668084e+00 9.32586885e-01 8.58342939e-01 8.42750184e-01
 8.05890204e-01 7.74437904e-01 6.75569398e-01 6.63297693e-01
 6.29835491e-01 6.14702564e-01 5.94616065e-01 5.32632439e-01
 5.15638021e-01 5.00296515e-01 4.94386783e-01 4.86058500e-01
 4.83105768e-01 4.59793912e-01 4.31851208e-01 3.96509525e-01
 3.66570254e-01 3.49569098e-01 3.37071398e-01 3.17235611e-01
 2.95949279e-01 2.75806062e-01 2.65767184e-01 2.53319139e-01
 2.36371601e-01 2.14699837e-01 2.11533445e-01 2.05682352e-01
 1.96848906e-01 1.89096218e-01 1.79219537e-01 1.72120922e-01
 1.65762160e-01 1.58145201e-01 1.55527019e-01 1.51256669e-01
 1.44082407e-01 1.43140977e-01 1.42390630e-01 1.39790632e-01
 1.37305001e-01 1.35904841e-01 1.30891383e-01 1.27888900e-01
 1.24339901e-01 1.21701444e-01 1.17244886e-01 1.12641847e-01
 1.08994558e-01 1.06676813e-01 9.71229472e-02 8.13417371e-02
 4.19049640e-02 1.67531745e-02 5.15566071e-03 5

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,54,55,56,57,58,59,60,61,62,63
0,-0.005063,-0.006097,-0.006815,-0.004151,-0.009404,-0.004967,-0.002292,-0.002893,-0.000333,-0.002443,...,-0.003817,-0.002449,-0.005563,-0.002562,-0.002389,-0.007124,-0.000723,-0.002273,-0.005211,-0.002120
1,-0.006187,-0.007488,-0.008614,-0.004987,-0.012253,-0.006100,-0.002729,-0.003478,-0.000390,-0.002886,...,-0.004624,-0.002867,-0.006907,-0.003064,-0.002861,-0.008877,-0.000845,-0.002726,-0.006321,-0.002546
2,-0.007800,-0.009501,-0.011081,-0.006218,-0.016660,-0.007595,-0.003361,-0.004313,-0.000481,-0.003549,...,-0.005732,-0.003531,-0.008751,-0.003777,-0.003497,-0.011565,-0.001023,-0.003351,-0.007978,-0.003080
3,-0.004251,-0.006452,-0.009056,-0.002909,-0.521648,-0.003960,-0.001356,-0.001837,-0.000182,-0.001468,...,-0.002624,-0.001413,-0.005192,-0.001563,-0.001423,-0.010450,-0.000375,-0.001339,-0.004638,-0.001224
4,-0.017459,-0.026275,-0.038562,-0.012189,0.847327,-0.016715,-0.005550,-0.007388,-0.000730,-0.005918,...,-0.010765,-0.005851,-0.021731,-0.006325,-0.005766,-0.044548,-0.001592,-0.005473,-0.018254,-0.005049
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59,-0.009927,-0.008389,-0.007505,-0.012755,-0.005875,-0.010304,-0.045637,-0.022906,0.004103,-0.037272,...,-0.014223,-0.037521,-0.009001,-0.031060,-0.039366,-0.007230,0.010131,-0.047111,-0.009732,-0.067351
60,-0.022790,-0.019525,-0.017603,-0.027893,-0.014148,-0.023312,-0.062392,-0.042723,0.020560,-0.056493,...,-0.030467,-0.056706,-0.020763,-0.051342,-0.058158,-0.017190,0.059770,-0.063049,-0.022347,-0.072461
61,-0.038151,-0.033159,-0.030101,-0.045760,-0.024465,-0.039005,-0.085309,-0.065029,0.101971,-0.079724,...,-0.049291,-0.080052,-0.035103,-0.074630,-0.081389,-0.029396,0.832789,-0.085881,-0.037548,-0.094023
62,-0.043096,-0.037653,-0.034276,-0.051187,-0.028006,-0.044027,-0.089326,-0.070669,0.707770,-0.084351,...,-0.054922,-0.084657,-0.039762,-0.079681,-0.085887,-0.033493,-0.443279,-0.089810,-0.042411,-0.096717


In [13]:
cos_dist = spatial.distance.cdist(eigenvectors, np.eye(len(GROUPS)), metric='cosine')
cos_dist = np.sort(np.concatenate(cos_dist))[:len(GROUPS)]
cos_dist


array([0.0032197 , 0.00502546, 0.0059796 , 0.01693941, 0.03538142,
       0.04240422, 0.05940562, 0.06588931, 0.08702928, 0.09500614,
       0.1001798 , 0.10173431, 0.10496473, 0.10528653, 0.10683012,
       0.10755258, 0.12390444, 0.12643571, 0.13110985, 0.13591914,
       0.13729919, 0.14606594, 0.14622992, 0.14669682, 0.14743811,
       0.15267264, 0.16721066, 0.17088523, 0.17117522, 0.17119786,
       0.17199803, 0.1760124 , 0.17721306, 0.17822881, 0.18534873,
       0.18910706, 0.19145579, 0.19694083, 0.19866505, 0.20495598,
       0.20856304, 0.20862671, 0.21465499, 0.21815505, 0.22137838,
       0.22213006, 0.22946091, 0.22967996, 0.24010803, 0.24148467,
       0.24749638, 0.25328897, 0.27857208, 0.28208923, 0.2922302 ,
       0.29654868, 0.29855837, 0.30132119, 0.30353525, 0.33671165,
       0.3518631 , 0.35675436, 0.3609725 , 0.41114454])