In [1]:
DATA_NAME = 'coco-outdoor' 
TRANSFORM = 'gabor'
CHANNEL = 'red'
PARAM_CSV = 'gabor_new.csv' # only use for Gabor

In [2]:
path_list = DATA_NAME.split("-") + TRANSFORM.split("-")
if CHANNEL:
    path_list.append(CHANNEL)
print(f"Name the notebook:\nindependence_{'_'.join(path_list[::-1])}.ipynb")
FULL_DATA_NAME='-'.join(path_list)


import git
from pathlib import Path
import os
CWD = os.getcwd()
ROOT_DIR = Path(git.Repo('.', search_parent_directories=True).working_tree_dir)

Path(os.path.join(CWD, "CSVs")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "plots")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "cache")).mkdir(exist_ok=True)

GROUP = 'layer' if TRANSFORM.split("-")[0] == 'wavelet' else ('band' if TRANSFORM.split("-")[0] == 'fourier' else 'error')
RERUN = False
SKIP_OPTIMIZE_STEP = False

Name the notebook:
independence_red_gabor_outdoor_coco.ipynb


In [3]:
os.chdir(os.path.join(ROOT_DIR, "utilities"))
from testing import * # If MATLAB is not installed, open utilities and set to False
from plotting import *
os.chdir(CWD)
np.random.seed(0)

In [4]:
from scipy import spatial
from sklearn.decomposition import PCA

In [5]:
group_data_map = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}.pickle'))
group_total_samples = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}-size.pickle'))
param_df = pd.read_csv(os.path.join(ROOT_DIR, "gabor", PARAM_CSV))

In [6]:
if 'fourier' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)#[::3]
elif 'wavelet' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)
elif 'learned' in TRANSFORM:
    nonskewed_df = pd.read_csv(os.path.join(ROOT_DIR, 'learned-filters', 'nonskewed_filter_idxs_df.csv')).set_index(['dataset', 'num_images', 'num_bootstrap'])
    nonskewed_filter_idxs = eval(nonskewed_df.loc[DATA_NAME].sort_values('num_images', ascending=False)['nonskewed_filter_idxs'].iloc[0]) 
    GROUPS = nonskewed_filter_idxs # can set to filter_group_map.keys() to include all prepared filters
elif 'gabor' in TRANSFORM:
    GROUPS = param_df['index']

In [7]:
n_bootstrap = int(1e5) 
bootstrap_size = int(1e4)


In [8]:

cov_matrix = np.zeros((len(GROUPS), len(GROUPS)))
for _ in tqdm(range(n_bootstrap)):
    X = np.zeros((bootstrap_size, len(GROUPS)))
    for i in range(len(GROUPS)):
        group = GROUPS[i]
        if len(group_data_map[group]) < 100:
            X[:, i] = np.full(bootstrap_size, np.nan)
        else:
            X[:, i] = np.random.choice(group_data_map[group], size=(bootstrap_size), replace=True)
    cov_matrix += np.cov(X, rowvar=False)
cov_matrix /= n_bootstrap

  0%|          | 0/100000 [00:00<?, ?it/s]

In [9]:



# Convert covariance matrix to DataFrame for better formatting
cov_df = pd.DataFrame(cov_matrix, index=GROUPS, columns=GROUPS)
cov_df = cov_df.dropna(how='all').dropna(axis=1, how='all')
cov_df.to_csv(os.path.join(CWD, "CSVs", f'covariance_matrix.csv'))
cov_df.round(2)

index,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,56498.91,1.69,-0.25,0.55,0.16,-0.22,-0.15,-2.88,0.96,-0.56,...,-0.24,0.48,-0.04,0.98,-0.13,-0.5,0.21,0.06,0.05,0.05
1,1.69,22148.32,0.82,-0.54,-0.12,-0.13,-0.01,1.85,-0.29,-0.47,...,0.0,-0.01,0.02,1.21,-0.32,-0.17,-0.36,0.07,0.06,0.09
2,-0.25,0.82,7966.12,-0.12,-0.0,0.1,0.05,0.12,0.59,-0.23,...,0.06,-0.05,0.04,-0.78,0.65,-0.01,-0.34,-0.0,0.11,-0.02
3,0.55,-0.54,-0.12,3231.15,-0.07,0.03,-0.02,-0.77,-0.28,0.01,...,-0.05,-0.06,0.02,0.43,0.04,0.14,0.07,0.03,-0.05,-0.01
4,0.16,-0.12,-0.0,-0.07,1328.87,-0.01,0.01,0.01,0.02,0.27,...,-0.0,0.01,0.01,-0.0,0.15,-0.09,0.09,0.01,0.04,-0.02
5,-0.22,-0.13,0.1,0.03,-0.01,550.41,0.0,-0.34,0.1,-0.04,...,-0.02,-0.02,0.01,-0.18,0.02,0.01,0.06,-0.0,-0.0,-0.0
6,-0.15,-0.01,0.05,-0.02,0.01,0.0,346.76,0.09,-0.0,0.11,...,0.03,-0.01,-0.01,-0.07,-0.01,0.03,-0.03,0.04,-0.02,0.0
7,-2.88,1.85,0.12,-0.77,0.01,-0.34,0.09,78933.32,-0.18,0.71,...,-0.02,0.19,-0.18,0.6,-1.86,-0.6,-0.68,0.39,0.27,-0.19
8,0.96,-0.29,0.59,-0.28,0.02,0.1,-0.0,-0.18,30041.92,0.41,...,-0.14,0.05,0.01,0.74,1.09,-0.87,0.24,0.18,-0.04,-0.05
9,-0.56,-0.47,-0.23,0.01,0.27,-0.04,0.11,0.71,0.41,11469.78,...,0.06,0.04,-0.07,0.25,-0.38,0.28,0.01,0.02,-0.13,0.02


In [10]:
corr_matrix = cov_df / np.sqrt(np.outer(np.diag(cov_df), np.diag(cov_df)))
corr_matrix.round(5)

index,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1.0,5e-05,-1e-05,4e-05,2e-05,-4e-05,-3e-05,-4e-05,2e-05,-2e-05,...,-3e-05,8e-05,-1e-05,2e-05,-0.0,-2e-05,2e-05,1e-05,1e-05,1e-05
1,5e-05,1.0,6e-05,-6e-05,-2e-05,-4e-05,-0.0,4e-05,-1e-05,-3e-05,...,0.0,-0.0,1e-05,3e-05,-1e-05,-1e-05,-4e-05,1e-05,2e-05,4e-05
2,-1e-05,6e-05,1.0,-2e-05,-0.0,5e-05,3e-05,0.0,4e-05,-2e-05,...,2e-05,-2e-05,3e-05,-4e-05,5e-05,-0.0,-7e-05,-0.0,5e-05,-1e-05
3,4e-05,-6e-05,-2e-05,1.0,-3e-05,2e-05,-2e-05,-5e-05,-3e-05,0.0,...,-2e-05,-5e-05,3e-05,3e-05,0.0,3e-05,2e-05,2e-05,-3e-05,-1e-05
4,2e-05,-2e-05,-0.0,-3e-05,1.0,-2e-05,1e-05,0.0,0.0,7e-05,...,-0.0,1e-05,1e-05,-0.0,3e-05,-3e-05,4e-05,1e-05,4e-05,-4e-05
5,-4e-05,-4e-05,5e-05,2e-05,-2e-05,1.0,0.0,-5e-05,2e-05,-2e-05,...,-3e-05,-3e-05,3e-05,-3e-05,1e-05,0.0,4e-05,-0.0,-0.0,-0.0
6,-3e-05,-0.0,3e-05,-2e-05,1e-05,0.0,1.0,2e-05,-0.0,6e-05,...,4e-05,-1e-05,-3e-05,-2e-05,-0.0,2e-05,-3e-05,5e-05,-3e-05,1e-05
7,-4e-05,4e-05,0.0,-5e-05,0.0,-5e-05,2e-05,1.0,-0.0,2e-05,...,-0.0,3e-05,-4e-05,1e-05,-5e-05,-2e-05,-4e-05,4e-05,4e-05,-4e-05
8,2e-05,-1e-05,4e-05,-3e-05,0.0,2e-05,-0.0,-0.0,1.0,2e-05,...,-2e-05,1e-05,0.0,2e-05,4e-05,-5e-05,2e-05,3e-05,-1e-05,-2e-05
9,-2e-05,-3e-05,-2e-05,0.0,7e-05,-2e-05,6e-05,2e-05,2e-05,1.0,...,2e-05,2e-05,-4e-05,1e-05,-2e-05,3e-05,0.0,0.0,-5e-05,1e-05


In [11]:
np.linalg.norm(cov_df - np.diag(np.diag(cov_df)))  # Remove diagonal elements for visualization

17.697213372845862

In [12]:
pca = PCA()
pca.fit(cov_df)

print("Singular values (explained variance):")
print(pca.explained_variance_)

print("\nPrincipal components (eigenvectors):")
eigenvectors = pca.components_
eigenvectors_df = pd.DataFrame(eigenvectors)
eigenvectors_df

Singular values (explained variance):
[1.48857766e+08 9.97245721e+07 8.16755442e+07 7.63698101e+07
 7.09418109e+07 6.43314890e+07 2.14709980e+07 1.42500963e+07
 1.17551562e+07 1.08601138e+07 9.54251704e+06 8.48442156e+06
 3.11392958e+06 1.91487574e+06 1.71471801e+06 1.50594236e+06
 1.36346982e+06 1.21515914e+06 4.68711906e+05 3.03462284e+05
 2.63462108e+05 2.44170692e+05 2.17743639e+05 1.86367669e+05
 7.71028912e+04 5.05104115e+04 4.38800578e+04 4.15145460e+04
 3.76984261e+04 3.00649230e+04 1.35164859e+04 1.06259438e+04
 9.46385387e+03 7.90225330e+03 7.56302437e+03 5.50562281e+03
 3.37682037e+03 2.67277242e+03 2.10293035e+03 1.97917254e+03
 1.85134693e+03 1.02676206e-23]

Principal components (eigenvectors):


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
0,-0.031314,-0.00631,-0.002117,-0.000862,-0.00035,-0.000149,-9e-05,0.996615,-0.009286,-0.003076,...,-0.000319,-0.000148,-7.7e-05,-0.034593,-0.006131,-0.002273,-0.000883,-0.000351,-0.000173,-7.4e-05
1,-0.075481,-0.007335,-0.002369,-0.00096,-0.000391,-0.000158,-0.000104,0.043656,-0.01126,-0.003456,...,-0.00035,-0.000161,-8.2e-05,-0.10068,-0.007033,-0.002506,-0.000967,-0.000403,-0.000197,-7.6e-05
2,-0.286239,-0.006138,-0.001942,-0.000767,-0.000317,-0.000133,-8.3e-05,0.021695,-0.009741,-0.002827,...,-0.000282,-0.000135,-6.6e-05,0.945647,-0.005896,-0.002054,-0.000804,-0.000315,-0.00016,-6.8e-05
3,0.897328,-0.008065,-0.002523,-0.000997,-0.00041,-0.000176,-0.00011,0.024764,-0.012998,-0.003718,...,-0.000377,-0.000167,-8.8e-05,0.211704,-0.007775,-0.002697,-0.001028,-0.000415,-0.000206,-8.4e-05
4,0.227256,-0.010448,-0.003212,-0.001265,-0.000518,-0.000213,-0.000141,0.027188,-0.017118,-0.004711,...,-0.000473,-0.00022,-0.000115,0.135369,-0.00998,-0.003441,-0.001312,-0.000535,-0.000267,-0.000109
5,0.230759,-0.023315,-0.007002,-0.00277,-0.00114,-0.000473,-0.000295,0.049662,-0.039202,-0.010347,...,-0.001044,-0.000491,-0.000241,0.173444,-0.022217,-0.007479,-0.002861,-0.001157,-0.000574,-0.000232
6,0.017933,-0.041786,-0.007145,-0.00274,-0.00111,-0.000457,-0.00029,0.010849,0.994455,-0.011236,...,-0.001018,-0.000474,-0.000236,0.016988,-0.037213,-0.00777,-0.002796,-0.001124,-0.000561,-0.000229
7,0.011328,-0.12446,-0.008019,-0.00294,-0.001216,-0.000494,-0.000311,0.0073,0.049608,-0.013276,...,-0.001105,-0.000499,-0.000258,0.010909,-0.089327,-0.008669,-0.003029,-0.001214,-0.000597,-0.000249
8,0.007483,0.93218,-0.006838,-0.002504,-0.00101,-0.00042,-0.000261,0.00492,0.025909,-0.011863,...,-0.000914,-0.00043,-0.000211,0.007194,-0.334214,-0.007491,-0.002566,-0.001012,-0.0005,-0.000201
9,0.011712,0.279331,-0.011826,-0.004244,-0.001705,-0.00071,-0.000445,0.007801,0.037383,-0.020905,...,-0.001562,-0.000722,-0.000368,0.011253,0.905094,-0.013034,-0.00436,-0.001743,-0.000854,-0.000349


In [13]:
cos_dist = spatial.distance.cdist(eigenvectors, np.eye(cov_df.shape[0]), metric='cosine')
cos_dist =  [np.min(row) for row in cos_dist]
cos_dist


[0.0033847807078016823,
 0.012025268963791191,
 0.0543532854006753,
 0.10267236503280774,
 0.14792985102949574,
 0.12194634382743108,
 0.0055448835361093796,
 0.016494197126211585,
 0.06781979831496665,
 0.09490555709197035,
 0.10850998716921556,
 0.10131586356933353,
 0.007252431663718029,
 0.03931099263858495,
 0.06621660266628648,
 0.08931939142984913,
 0.21839089369861042,
 0.24654439024251495,
 0.01219579480397448,
 0.03921850415748751,
 0.13360266333655235,
 0.1967236849263143,
 0.23670523113710806,
 0.2625466845531744,
 0.020656649336660338,
 0.05296298015412426,
 0.186546038821518,
 0.3310711872366433,
 0.3137783826538403,
 0.3154408801982601,
 0.05098355604354832,
 0.14696681177766435,
 0.21132495603858548,
 0.2758790736080188,
 0.20522053864969003,
 0.49567550636853297,
 0.14850457636465442,
 0.19982224796880976,
 0.18183436933439756,
 0.2838433462219372,
 0.13643647113622448,
 0.5993083345179837]