In [1]:
DATA_NAME = 'coco-outdoor' 
TRANSFORM = 'learned'
CHANNEL = ''

In [2]:
path_list = DATA_NAME.split("-") + TRANSFORM.split("-")
if CHANNEL:
    path_list.append(CHANNEL)
print(f"Name the notebook:\nindependence_{'_'.join(path_list[::-1])}.ipynb")
FULL_DATA_NAME='-'.join(path_list)


import git
from pathlib import Path
import os
CWD = os.getcwd()
ROOT_DIR = Path(git.Repo('.', search_parent_directories=True).working_tree_dir)

Path(os.path.join(CWD, "CSVs")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "plots")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "cache")).mkdir(exist_ok=True)

GROUP = 'layer' if TRANSFORM.split("-")[0] == 'wavelet' else ('band' if TRANSFORM.split("-")[0] == 'fourier' else 'error')
RERUN = False
SKIP_OPTIMIZE_STEP = False

Name the notebook:
independence_learned_outdoor_coco.ipynb


In [3]:
os.chdir(os.path.join(ROOT_DIR, "utilities"))
from testing import * # If MATLAB is not installed, open utilities and set to False
from plotting import *
os.chdir(CWD)
np.random.seed(0)

In [4]:
from scipy import spatial
from sklearn.decomposition import PCA

In [5]:
group_data_map = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}.pickle'))
group_total_samples = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}-size.pickle'))

In [6]:
if 'fourier' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)#[::3]
elif 'wavelet' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)
elif 'learned' in TRANSFORM:
    nonskewed_df = pd.read_csv(os.path.join(ROOT_DIR, 'learned-filters', 'nonskewed_filter_idxs_df.csv')).set_index(['dataset', 'num_images', 'num_bootstrap'])
    nonskewed_filter_idxs = eval(nonskewed_df.loc[DATA_NAME].sort_values('num_images', ascending=False)['nonskewed_filter_idxs'].iloc[0]) 
    GROUPS = nonskewed_filter_idxs # can set to filter_group_map.keys() to include all prepared filters

In [7]:
n_bootstrap = int(1e5) 
bootstrap_size = int(1e4)


In [8]:

cov_matrix = np.zeros((len(GROUPS), len(GROUPS)))
for _ in tqdm(range(n_bootstrap)):
    X = np.zeros((bootstrap_size, len(GROUPS)))
    for i in range(len(GROUPS)):
        group = GROUPS[i]
        X[:, i] = np.random.choice(group_data_map[group], size=(bootstrap_size), replace=True)
    cov_matrix += np.cov(X, rowvar=False)
cov_matrix /= n_bootstrap

  0%|          | 0/100000 [00:00<?, ?it/s]

In [9]:



# Convert covariance matrix to DataFrame for better formatting
cov_df = pd.DataFrame(cov_matrix, index=GROUPS, columns=GROUPS)
cov_df.to_csv(os.path.join(CWD, "CSVs", f'covariance_matrix.csv'))
cov_df.round(2)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,54,55,56,57,58,59,60,61,62,63
0,5.59,0.00,-0.00,-0.00,0.0,0.0,0.0,0.0,0.0,-0.0,...,-0.0,0.0,0.0,-0.0,0.0,0.00,-0.0,-0.0,0.00,-0.00
1,0.00,6.54,-0.00,-0.00,-0.0,0.0,0.0,0.0,0.0,0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,0.00,-0.0,0.0,0.00,-0.00
2,-0.00,-0.00,6.84,0.00,0.0,-0.0,0.0,-0.0,-0.0,-0.0,...,-0.0,-0.0,-0.0,0.0,0.0,-0.00,-0.0,-0.0,0.00,0.00
3,-0.00,-0.00,0.00,4.69,-0.0,-0.0,-0.0,0.0,0.0,-0.0,...,-0.0,0.0,0.0,0.0,-0.0,0.00,0.0,-0.0,-0.00,-0.00
4,0.00,-0.00,0.00,-0.00,8.8,-0.0,-0.0,0.0,0.0,0.0,...,-0.0,0.0,0.0,0.0,0.0,-0.00,-0.0,0.0,0.00,-0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59,0.00,0.00,-0.00,0.00,-0.0,-0.0,0.0,-0.0,0.0,0.0,...,0.0,-0.0,0.0,0.0,-0.0,6.99,0.0,-0.0,-0.00,-0.00
60,-0.00,-0.00,-0.00,0.00,-0.0,-0.0,-0.0,-0.0,0.0,0.0,...,-0.0,0.0,0.0,0.0,-0.0,0.00,0.9,-0.0,0.00,0.00
61,-0.00,0.00,-0.00,-0.00,0.0,-0.0,0.0,-0.0,-0.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.00,-0.0,2.7,0.00,0.00
62,0.00,0.00,0.00,-0.00,0.0,-0.0,-0.0,0.0,0.0,-0.0,...,-0.0,-0.0,-0.0,0.0,-0.0,-0.00,0.0,0.0,5.88,-0.00


In [10]:
corr_matrix = cov_matrix / np.sqrt(np.outer(np.diag(cov_matrix), np.diag(cov_matrix)))
corr_df = pd.DataFrame(corr_matrix, index=GROUPS, columns=GROUPS)
corr_df.round(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,54,55,56,57,58,59,60,61,62,63
0,1.00000,0.00001,-0.00001,-0.00003,0.00003,0.00001,0.00001,0.00003,0.00001,-0.00003,...,-0.00001,0.00002,0.00001,-0.00006,0.00001,0.00000,-0.00002,-0.00002,0.00002,-0.00000
1,0.00001,1.00000,-0.00002,-0.00002,-0.00001,0.00000,0.00004,0.00005,0.00000,0.00002,...,-0.00002,-0.00005,-0.00001,-0.00006,-0.00002,0.00004,-0.00001,0.00003,0.00004,-0.00002
2,-0.00001,-0.00002,1.00000,0.00004,0.00004,-0.00003,0.00005,-0.00005,-0.00002,-0.00003,...,-0.00000,-0.00000,-0.00001,0.00003,0.00001,-0.00003,-0.00000,-0.00002,0.00005,0.00002
3,-0.00003,-0.00002,0.00004,1.00000,-0.00003,-0.00005,-0.00001,0.00001,0.00003,-0.00007,...,-0.00002,0.00002,0.00004,0.00002,-0.00002,0.00002,0.00001,-0.00005,-0.00000,-0.00002
4,0.00003,-0.00001,0.00004,-0.00003,1.00000,-0.00004,-0.00002,0.00000,0.00003,0.00000,...,-0.00002,0.00001,0.00002,0.00001,0.00002,-0.00002,-0.00002,0.00002,0.00005,-0.00001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59,0.00000,0.00004,-0.00003,0.00002,-0.00002,-0.00000,0.00002,-0.00001,0.00001,0.00002,...,0.00004,-0.00003,0.00002,0.00002,-0.00007,1.00000,0.00001,-0.00001,-0.00002,-0.00001
60,-0.00002,-0.00001,-0.00000,0.00001,-0.00002,-0.00003,-0.00001,-0.00003,0.00003,0.00001,...,-0.00004,0.00000,0.00001,0.00004,-0.00004,0.00001,1.00000,-0.00001,0.00004,0.00002
61,-0.00002,0.00003,-0.00002,-0.00005,0.00002,-0.00002,0.00000,-0.00002,-0.00002,-0.00004,...,-0.00002,-0.00003,-0.00000,-0.00000,-0.00000,-0.00001,-0.00001,1.00000,0.00002,0.00001
62,0.00002,0.00004,0.00005,-0.00000,0.00005,-0.00007,-0.00005,0.00004,0.00005,-0.00000,...,-0.00004,-0.00004,-0.00001,0.00000,-0.00001,-0.00002,0.00004,0.00002,1.00000,-0.00001


In [11]:
np.linalg.norm(cov_matrix - np.diag(np.diag(cov_matrix)))  # Remove diagonal elements for visualization

0.009574784300730504

In [12]:
pca = PCA()
pca.fit(cov_matrix)

print("Singular values (explained variance):")
print(pca.explained_variance_)

print("\nPrincipal components (eigenvectors):")
eigenvectors = pca.components_
eigenvectors_df = pd.DataFrame(eigenvectors)
eigenvectors_df

Singular values (explained variance):
[4.28264225e+00 3.53609406e+00 2.87202714e+00 1.22010804e+00
 1.17298477e+00 8.72326398e-01 8.44738710e-01 7.84333189e-01
 7.67262056e-01 7.33999083e-01 6.76844895e-01 6.66697424e-01
 6.48946750e-01 6.14385567e-01 5.91231256e-01 5.43017809e-01
 5.03513768e-01 4.95268190e-01 4.90696620e-01 4.85170302e-01
 4.66278564e-01 4.46234114e-01 4.21715583e-01 3.54224205e-01
 3.50073845e-01 3.43462546e-01 3.25020816e-01 2.75223448e-01
 2.65153399e-01 2.58436490e-01 2.46694459e-01 2.33881377e-01
 2.27237820e-01 2.13197354e-01 2.04376387e-01 1.98748918e-01
 1.89656285e-01 1.84652819e-01 1.76107055e-01 1.71584956e-01
 1.64743065e-01 1.56531130e-01 1.53257608e-01 1.51744279e-01
 1.48035690e-01 1.43217687e-01 1.38970244e-01 1.35414337e-01
 1.31015007e-01 1.30063914e-01 1.29295547e-01 1.24128779e-01
 1.20433084e-01 1.17475937e-01 1.15727055e-01 1.11757164e-01
 1.09508883e-01 1.08998416e-01 9.76631127e-02 7.88113593e-02
 4.19443639e-02 1.63346090e-02 5.05388818e-03 2

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,54,55,56,57,58,59,60,61,62,63
0,-0.005000,-0.006236,-0.006528,-0.004070,-0.009778,-0.005100,-0.002350,-0.002849,-0.000335,-0.002448,...,-0.003441,-0.002357,-0.005638,-0.002585,-0.002338,-0.006803,-0.000721,-0.002191,-0.005389,-0.002129
1,-0.006189,-0.007759,-0.008307,-0.004954,-0.012874,-0.006354,-0.002843,-0.003476,-0.000398,-0.002937,...,-0.004217,-0.002803,-0.007090,-0.003141,-0.002846,-0.008527,-0.000857,-0.002669,-0.006634,-0.002598
2,-0.007697,-0.009762,-0.010533,-0.006093,-0.017587,-0.007835,-0.003449,-0.004248,-0.000483,-0.003560,...,-0.005135,-0.003399,-0.008893,-0.003815,-0.003424,-0.010946,-0.001021,-0.003231,-0.008294,-0.003097
3,-0.007964,-0.012537,-0.014818,-0.005666,0.950942,-0.008313,-0.002764,-0.003472,-0.000354,-0.002854,...,-0.004437,-0.002722,-0.010356,-0.003077,-0.002711,-0.016406,-0.000784,-0.002541,-0.008932,-0.002467
4,-0.015644,-0.025535,-0.030135,-0.010830,0.295120,-0.016106,-0.005275,-0.006710,-0.000679,-0.005490,...,-0.008536,-0.005155,-0.020457,-0.005914,-0.005206,-0.033825,-0.001459,-0.004844,-0.018097,-0.004680
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59,-0.010776,-0.008805,-0.008351,-0.014004,-0.006158,-0.010711,-0.043769,-0.025226,0.004440,-0.039277,...,-0.017827,-0.045715,-0.009516,-0.032173,-0.045076,-0.008066,0.010892,-0.058770,-0.010065,-0.068847
60,-0.022594,-0.018783,-0.017844,-0.027936,-0.013565,-0.022276,-0.058880,-0.042912,0.019607,-0.055558,...,-0.033749,-0.059925,-0.020115,-0.049849,-0.059582,-0.017458,0.055967,-0.066917,-0.021243,-0.071130
61,-0.039106,-0.033086,-0.031544,-0.047257,-0.024339,-0.038674,-0.083801,-0.066956,0.103921,-0.080554,...,-0.055306,-0.084858,-0.035251,-0.074796,-0.084488,-0.030864,0.831177,-0.090963,-0.037055,-0.094633
62,-0.043497,-0.037032,-0.035353,-0.052018,-0.027458,-0.043048,-0.086893,-0.071560,0.708562,-0.084009,...,-0.060244,-0.087794,-0.039350,-0.078799,-0.087508,-0.034611,-0.445675,-0.093010,-0.041279,-0.096036


In [13]:
cos_dist = spatial.distance.cdist(eigenvectors, np.eye(len(GROUPS)), metric='cosine')
cos_dist = np.sort(np.concatenate(cos_dist))[:len(GROUPS)]
cos_dist


array([0.00383738, 0.00448952, 0.00602681, 0.03448376, 0.03710411,
       0.04569146, 0.04905799, 0.0520866 , 0.05370046, 0.05748953,
       0.0634061 , 0.06453648, 0.0764255 , 0.0823958 , 0.10983608,
       0.11005109, 0.11718658, 0.12066436, 0.12571845, 0.13118379,
       0.14051551, 0.14384982, 0.14475699, 0.14848204, 0.16260221,
       0.16882291, 0.17851774, 0.17963561, 0.18726076, 0.19958439,
       0.20613892, 0.21289862, 0.24847972, 0.25091352, 0.2521674 ,
       0.25728601, 0.25933855, 0.26038412, 0.270854  , 0.27336926,
       0.27536867, 0.27538722, 0.27724477, 0.27787628, 0.28181187,
       0.28393421, 0.28618566, 0.28640509, 0.29143777, 0.30238395,
       0.30888349, 0.30957412, 0.32530252, 0.33371384, 0.33680347,
       0.35002565, 0.36819192, 0.37869287, 0.38625251, 0.39438525,
       0.40850058, 0.41259937, 0.44630465, 0.45418704])