In [1]:
DATA_NAME = 'coco-indoor' 
TRANSFORM = 'learned'
CHANNEL = ''

In [2]:
path_list = DATA_NAME.split("-") + TRANSFORM.split("-")
if CHANNEL:
    path_list.append(CHANNEL)
print(f"Name the notebook:\nindependence_{'_'.join(path_list[::-1])}.ipynb")
FULL_DATA_NAME='-'.join(path_list)


import git
from pathlib import Path
import os
CWD = os.getcwd()
ROOT_DIR = Path(git.Repo('.', search_parent_directories=True).working_tree_dir)

Path(os.path.join(CWD, "CSVs")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "plots")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "cache")).mkdir(exist_ok=True)

GROUP = 'layer' if TRANSFORM.split("-")[0] == 'wavelet' else ('band' if TRANSFORM.split("-")[0] == 'fourier' else 'error')
RERUN = False
SKIP_OPTIMIZE_STEP = False

Name the notebook:
independence_learned_indoor_coco.ipynb


In [3]:
os.chdir(os.path.join(ROOT_DIR, "utilities"))
from testing import * # If MATLAB is not installed, open utilities and set to False
from plotting import *
os.chdir(CWD)
np.random.seed(0)

In [4]:
from scipy import spatial
from sklearn.decomposition import PCA

In [5]:
group_data_map = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}.pickle'))
group_total_samples = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}-size.pickle'))

In [6]:
if 'fourier' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)#[::3]
elif 'wavelet' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)
elif 'learned' in TRANSFORM:
    nonskewed_df = pd.read_csv(os.path.join(ROOT_DIR, 'learned-filters', 'nonskewed_filter_idxs_df.csv')).set_index(['dataset', 'num_images', 'num_bootstrap'])
    nonskewed_filter_idxs = eval(nonskewed_df.loc[DATA_NAME].sort_values('num_images', ascending=False)['nonskewed_filter_idxs'].iloc[0]) 
    GROUPS = nonskewed_filter_idxs # can set to filter_group_map.keys() to include all prepared filters

In [7]:
n_bootstrap = int(1e5) 
bootstrap_size = int(1e4)


In [8]:

cov_matrix = np.zeros((len(GROUPS), len(GROUPS)))
for _ in tqdm(range(n_bootstrap)):
    X = np.zeros((bootstrap_size, len(GROUPS)))
    for i in range(len(GROUPS)):
        group = GROUPS[i]
        X[:, i] = np.random.choice(group_data_map[group], size=(bootstrap_size), replace=True)
    cov_matrix += np.cov(X, rowvar=False)
cov_matrix /= n_bootstrap

  0%|          | 0/100000 [00:00<?, ?it/s]

In [9]:



# Convert covariance matrix to DataFrame for better formatting
cov_df = pd.DataFrame(cov_matrix, index=GROUPS, columns=GROUPS)
cov_df.to_csv(os.path.join(CWD, "CSVs", f'covariance_matrix.csv'))
cov_df.round(2)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,54,55,56,57,58,59,60,61,62,63
0,4.72,-0.00,0.0,0.00,0.00,-0.0,-0.0,0.0,-0.0,0.0,...,-0.0,-0.0,-0.0,-0.0,0.0,0.00,0.00,0.00,0.00,0.00
1,-0.00,6.79,0.0,-0.00,-0.00,-0.0,0.0,-0.0,-0.0,-0.0,...,-0.0,-0.0,0.0,0.0,0.0,-0.00,-0.00,0.00,-0.00,0.00
2,0.00,0.00,6.0,0.00,-0.00,0.0,0.0,-0.0,-0.0,0.0,...,0.0,0.0,-0.0,0.0,-0.0,0.00,-0.00,-0.00,0.00,-0.00
3,0.00,-0.00,0.0,2.82,-0.00,-0.0,-0.0,-0.0,-0.0,0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.00,0.00,0.00,-0.00,0.00
4,0.00,-0.00,-0.0,-0.00,8.53,-0.0,-0.0,0.0,0.0,0.0,...,-0.0,0.0,0.0,-0.0,-0.0,0.00,0.00,0.00,0.00,-0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59,0.00,-0.00,0.0,-0.00,0.00,0.0,-0.0,0.0,0.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,6.15,-0.00,0.00,0.00,-0.00
60,0.00,-0.00,-0.0,0.00,0.00,-0.0,0.0,0.0,-0.0,0.0,...,-0.0,0.0,-0.0,-0.0,0.0,-0.00,0.45,0.00,0.00,-0.00
61,0.00,0.00,-0.0,0.00,0.00,0.0,0.0,-0.0,-0.0,0.0,...,-0.0,-0.0,0.0,0.0,0.0,0.00,0.00,1.84,0.00,-0.00
62,0.00,-0.00,0.0,-0.00,0.00,-0.0,-0.0,0.0,-0.0,0.0,...,0.0,-0.0,-0.0,-0.0,0.0,0.00,0.00,0.00,4.97,-0.00


In [10]:
corr_matrix = cov_matrix / np.sqrt(np.outer(np.diag(cov_matrix), np.diag(cov_matrix)))
corr_df = pd.DataFrame(corr_matrix, index=GROUPS, columns=GROUPS)
corr_df.round(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,54,55,56,57,58,59,60,61,62,63
0,1.00000,-0.00002,0.00002,0.00005,0.00000,-0.00000,-0.00000,0.00002,-0.00005,0.00003,...,-0.00009,-0.00003,-0.00003,-0.00004,0.00003,0.00002,0.00004,0.00005,0.00003,0.00004
1,-0.00002,1.00000,0.00001,-0.00008,-0.00004,-0.00002,0.00000,-0.00002,-0.00002,-0.00005,...,-0.00001,-0.00005,0.00004,0.00000,0.00003,-0.00002,-0.00001,0.00004,-0.00001,0.00007
2,0.00002,0.00001,1.00000,0.00003,-0.00003,0.00003,0.00003,-0.00004,-0.00002,0.00003,...,0.00002,0.00004,-0.00004,0.00001,-0.00002,0.00003,-0.00002,-0.00002,0.00003,-0.00005
3,0.00005,-0.00008,0.00003,1.00000,-0.00000,-0.00002,-0.00002,-0.00002,-0.00004,0.00001,...,-0.00001,-0.00006,-0.00001,-0.00001,-0.00001,-0.00002,0.00001,0.00002,-0.00002,0.00005
4,0.00000,-0.00004,-0.00003,-0.00000,1.00000,-0.00005,-0.00000,0.00003,0.00004,0.00003,...,-0.00002,0.00001,0.00001,-0.00002,-0.00006,0.00000,0.00004,0.00002,0.00003,-0.00002
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59,0.00002,-0.00002,0.00003,-0.00002,0.00000,0.00001,-0.00004,0.00002,0.00002,-0.00001,...,-0.00005,-0.00001,-0.00002,-0.00004,-0.00000,1.00000,-0.00002,0.00004,0.00005,-0.00000
60,0.00004,-0.00001,-0.00002,0.00001,0.00004,-0.00004,0.00005,0.00000,-0.00002,0.00002,...,-0.00008,0.00006,-0.00000,-0.00002,0.00002,-0.00002,1.00000,0.00002,0.00003,-0.00001
61,0.00005,0.00004,-0.00002,0.00002,0.00002,0.00004,0.00002,-0.00000,-0.00002,0.00000,...,-0.00002,-0.00001,0.00001,0.00006,0.00003,0.00004,0.00002,1.00000,0.00001,-0.00001
62,0.00003,-0.00001,0.00003,-0.00002,0.00003,-0.00001,-0.00001,0.00001,-0.00001,0.00000,...,0.00000,-0.00001,-0.00001,-0.00003,0.00000,0.00005,0.00003,0.00001,1.00000,-0.00001


In [11]:
np.linalg.norm(cov_matrix - np.diag(np.diag(cov_matrix)))  # Remove diagonal elements for visualization

0.00761040350189202

In [12]:
pca = PCA()
pca.fit(cov_matrix)

print("Singular values (explained variance):")
print(pca.explained_variance_)

print("\nPrincipal components (eigenvectors):")
eigenvectors = pca.components_
eigenvectors_df = pd.DataFrame(eigenvectors)
eigenvectors_df

Singular values (explained variance):
[5.23300587e+00 4.21568575e+00 1.46030002e+00 1.17858719e+00
 1.01654083e+00 7.48837372e-01 6.41339126e-01 6.17197878e-01
 6.06638779e-01 5.84187366e-01 5.63773989e-01 5.46927971e-01
 5.41081065e-01 5.31228588e-01 5.07442374e-01 4.76615889e-01
 4.45098807e-01 3.96732894e-01 3.58386730e-01 3.30535884e-01
 3.07874466e-01 2.86602890e-01 1.87865168e-01 1.69344933e-01
 1.48125049e-01 1.40658858e-01 1.35068096e-01 1.32744324e-01
 1.28747021e-01 1.23142746e-01 1.08582863e-01 1.06810624e-01
 1.03034405e-01 1.00390906e-01 9.79991356e-02 9.41366147e-02
 9.21477170e-02 8.96363484e-02 8.78501826e-02 8.39845412e-02
 8.01844102e-02 7.80674658e-02 7.42291184e-02 7.03528180e-02
 6.82668162e-02 6.63292762e-02 6.41632244e-02 6.05870397e-02
 5.71434666e-02 5.35927728e-02 5.03784894e-02 4.99244899e-02
 4.92411531e-02 4.51013702e-02 4.10184031e-02 3.87031605e-02
 3.77912464e-02 3.03620024e-02 2.65502332e-02 5.52551279e-03
 1.75510216e-03 4.35217270e-34]

Principal comp

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,52,53,54,55,56,57,58,59,60,61
0,-0.004098,-0.006393,-0.005457,-0.002322,-0.008813,-0.004681,-0.001592,-0.002678,-0.000197,-0.001626,...,-0.001934,-0.001424,-0.005144,-0.001706,-0.002307,-0.005649,-0.000356,-0.001500,-0.004346,-0.000998
1,-0.005288,-0.008450,-0.007055,-0.002984,-0.012135,-0.006064,-0.002020,-0.003432,-0.000244,-0.002066,...,-0.002424,-0.001808,-0.006654,-0.002168,-0.002923,-0.007329,-0.000456,-0.001906,-0.005605,-0.001279
2,-0.008582,-0.019344,-0.013609,-0.004242,-0.063536,-0.010446,-0.002817,-0.005011,-0.000330,-0.002869,...,-0.003411,-0.002446,-0.012425,-0.002968,-0.004156,-0.014610,-0.000612,-0.002598,-0.009466,-0.001723
3,-0.010255,-0.028662,-0.018072,-0.004754,0.991298,-0.013118,-0.003115,-0.005658,-0.000358,-0.003167,...,-0.003816,-0.002724,-0.015781,-0.003327,-0.004731,-0.019435,-0.000664,-0.002892,-0.011324,-0.001933
4,-0.012889,-0.046954,-0.025028,-0.005675,0.086030,-0.016991,-0.003652,-0.006814,-0.000434,-0.003781,...,-0.004476,-0.003216,-0.021243,-0.003972,-0.005518,-0.027640,-0.000778,-0.003424,-0.014517,-0.002255
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57,-0.010712,-0.007103,-0.008138,-0.021484,-0.005561,-0.009340,-0.045260,-0.017344,0.006265,-0.042611,...,-0.029853,-0.067652,-0.008564,-0.038734,-0.021795,-0.007917,0.012468,-0.055024,-0.010060,0.205733
58,-0.008223,-0.005503,-0.006226,-0.016042,-0.004288,-0.007222,-0.031461,-0.013169,0.005608,-0.030039,...,-0.021724,-0.043860,-0.006570,-0.027616,-0.016311,-0.006097,0.011304,-0.037205,-0.007706,0.933970
59,-0.044212,-0.030466,-0.034575,-0.076134,-0.024207,-0.039272,-0.114706,-0.065624,0.179621,-0.111937,...,-0.093447,-0.132406,-0.036262,-0.107083,-0.077260,-0.033733,0.673181,-0.123764,-0.041894,-0.208691
60,-0.027678,-0.019193,-0.021751,-0.046770,-0.015268,-0.024678,-0.068128,-0.040626,0.658836,-0.066692,...,-0.056680,-0.077204,-0.022798,-0.064127,-0.047422,-0.021233,-0.623654,-0.072856,-0.026262,-0.111384


In [13]:
cos_dist = spatial.distance.cdist(eigenvectors, np.eye(len(GROUPS)), metric='cosine')
cos_dist = np.sort(np.concatenate(cos_dist))[:len(GROUPS)]
cos_dist


array([0.00302381, 0.00325007, 0.00431403, 0.00870171, 0.00933641,
       0.01091944, 0.02236969, 0.02884052, 0.03477556, 0.04117383,
       0.06602983, 0.06697525, 0.07869805, 0.07949506, 0.08554983,
       0.10132119, 0.10388685, 0.1083259 , 0.12213963, 0.12623261,
       0.13869851, 0.15871523, 0.16656944, 0.17439659, 0.17831168,
       0.18469498, 0.18548526, 0.19571559, 0.21032075, 0.22147465,
       0.22795328, 0.23260485, 0.24149013, 0.24207866, 0.24474974,
       0.24508369, 0.25380141, 0.25418002, 0.25990248, 0.26709237,
       0.26914544, 0.26958509, 0.27316373, 0.27380149, 0.27526871,
       0.28696844, 0.28742603, 0.28755659, 0.28953601, 0.31005458,
       0.3268191 , 0.34116424, 0.35004935, 0.35849507, 0.3642844 ,
       0.37499141, 0.38597557, 0.39664309, 0.40205579, 0.43819129,
       0.43969933, 0.44546216])