In [1]:
DATA_NAME = 'coco-indoor' 
TRANSFORM = 'gabor'
CHANNEL = 'green'
PARAM_CSV = 'gabor_new.csv' # only use for Gabor

In [2]:
path_list = DATA_NAME.split("-") + TRANSFORM.split("-")
if CHANNEL:
    path_list.append(CHANNEL)
print(f"Name the notebook:\nindependence_{'_'.join(path_list[::-1])}.ipynb")
FULL_DATA_NAME='-'.join(path_list)


import git
from pathlib import Path
import os
CWD = os.getcwd()
ROOT_DIR = Path(git.Repo('.', search_parent_directories=True).working_tree_dir)

Path(os.path.join(CWD, "CSVs")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "plots")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "cache")).mkdir(exist_ok=True)

GROUP = 'layer' if TRANSFORM.split("-")[0] == 'wavelet' else ('band' if TRANSFORM.split("-")[0] == 'fourier' else 'error')
RERUN = False
SKIP_OPTIMIZE_STEP = False

Name the notebook:
independence_green_gabor_indoor_coco.ipynb


In [3]:
os.chdir(os.path.join(ROOT_DIR, "utilities"))
from testing import * # If MATLAB is not installed, open utilities and set to False
from plotting import *
os.chdir(CWD)
np.random.seed(0)

In [4]:
from scipy import spatial
from sklearn.decomposition import PCA

In [5]:
group_data_map = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}.pickle'))
group_total_samples = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}-size.pickle'))
param_df = pd.read_csv(os.path.join(ROOT_DIR, "gabor", PARAM_CSV))

In [6]:
if 'fourier' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)#[::3]
elif 'wavelet' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)
elif 'learned' in TRANSFORM:
    nonskewed_df = pd.read_csv(os.path.join(ROOT_DIR, 'learned-filters', 'nonskewed_filter_idxs_df.csv')).set_index(['dataset', 'num_images', 'num_bootstrap'])
    nonskewed_filter_idxs = eval(nonskewed_df.loc[DATA_NAME].sort_values('num_images', ascending=False)['nonskewed_filter_idxs'].iloc[0]) 
    GROUPS = nonskewed_filter_idxs # can set to filter_group_map.keys() to include all prepared filters
elif 'gabor' in TRANSFORM:
    GROUPS = param_df['index']

In [7]:
n_bootstrap = int(1e5) 
bootstrap_size = int(1e4)


In [8]:

cov_matrix = np.zeros((len(GROUPS), len(GROUPS)))
for _ in tqdm(range(n_bootstrap)):
    X = np.zeros((bootstrap_size, len(GROUPS)))
    for i in range(len(GROUPS)):
        group = GROUPS[i]
        if len(group_data_map[group]) < 100:
            X[:, i] = np.full(bootstrap_size, np.nan)
        else:
            X[:, i] = np.random.choice(group_data_map[group], size=(bootstrap_size), replace=True)
    cov_matrix += np.cov(X, rowvar=False)
cov_matrix /= n_bootstrap

  0%|          | 0/100000 [00:00<?, ?it/s]

In [9]:



# Convert covariance matrix to DataFrame for better formatting
cov_df = pd.DataFrame(cov_matrix, index=GROUPS, columns=GROUPS)
cov_df = cov_df.dropna(how='all').dropna(axis=1, how='all')
cov_df.to_csv(os.path.join(CWD, "CSVs", f'covariance_matrix.csv'))
cov_df.round(2)

index,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,49235.48,1.4,-0.21,0.44,0.12,-0.17,-0.09,-2.54,0.82,-0.47,...,-0.23,0.37,-0.0,0.82,-0.14,-0.39,0.15,0.1,0.05,0.05
1,1.4,17540.13,0.67,-0.4,-0.11,-0.1,-0.01,1.53,-0.22,-0.39,...,-0.01,-0.0,0.03,1.0,-0.31,-0.15,-0.27,0.04,0.04,0.07
2,-0.21,0.67,6263.85,-0.07,0.0,0.08,0.04,0.13,0.46,-0.21,...,0.05,-0.04,0.03,-0.63,0.55,0.0,-0.24,-0.0,0.08,-0.01
3,0.44,-0.4,-0.07,2366.98,-0.05,0.02,-0.01,-0.61,-0.22,0.02,...,-0.04,-0.04,0.01,0.38,0.09,0.11,0.06,0.02,-0.03,-0.01
4,0.12,-0.11,0.0,-0.05,947.32,-0.01,0.0,0.03,0.0,0.21,...,-0.0,0.01,0.0,-0.02,0.11,-0.06,0.06,0.01,0.02,-0.02
5,-0.17,-0.1,0.08,0.02,-0.01,332.07,-0.0,-0.26,0.07,-0.02,...,-0.01,-0.01,0.0,-0.11,0.02,-0.01,0.04,-0.0,0.01,0.0
6,-0.09,-0.01,0.04,-0.01,0.0,-0.0,206.07,0.08,-0.0,0.07,...,0.02,-0.0,-0.01,-0.05,0.0,0.02,-0.02,0.02,-0.01,0.0
7,-2.54,1.53,0.13,-0.61,0.03,-0.26,0.08,69679.32,-0.14,0.57,...,-0.0,0.11,-0.12,0.57,-1.63,-0.53,-0.54,0.28,0.21,-0.15
8,0.82,-0.22,0.46,-0.22,0.0,0.07,-0.0,-0.14,24894.65,0.3,...,-0.1,0.04,0.01,0.58,0.98,-0.67,0.18,0.13,-0.01,-0.03
9,-0.47,-0.39,-0.21,0.02,0.21,-0.02,0.07,0.57,0.3,9170.76,...,0.03,0.03,-0.04,0.24,-0.31,0.21,0.02,-0.0,-0.09,0.02


In [10]:
corr_matrix = cov_df / np.sqrt(np.outer(np.diag(cov_df), np.diag(cov_df)))
corr_matrix.round(5)

index,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1.0,5e-05,-1e-05,4e-05,2e-05,-4e-05,-3e-05,-4e-05,2e-05,-2e-05,...,-4e-05,9e-05,-0.0,2e-05,-0.0,-2e-05,1e-05,2e-05,1e-05,2e-05
1,5e-05,1.0,6e-05,-6e-05,-3e-05,-4e-05,-1e-05,4e-05,-1e-05,-3e-05,...,-0.0,-0.0,2e-05,3e-05,-2e-05,-1e-05,-4e-05,1e-05,2e-05,4e-05
2,-1e-05,6e-05,1.0,-2e-05,0.0,5e-05,4e-05,1e-05,4e-05,-3e-05,...,2e-05,-3e-05,3e-05,-4e-05,5e-05,0.0,-6e-05,-0.0,5e-05,-1e-05
3,4e-05,-6e-05,-2e-05,1.0,-4e-05,2e-05,-1e-05,-5e-05,-3e-05,0.0,...,-3e-05,-4e-05,2e-05,3e-05,1e-05,3e-05,2e-05,1e-05,-3e-05,-1e-05
4,2e-05,-3e-05,0.0,-4e-05,1.0,-2e-05,1e-05,0.0,0.0,7e-05,...,-0.0,1e-05,1e-05,-0.0,3e-05,-3e-05,4e-05,1e-05,4e-05,-4e-05
5,-4e-05,-4e-05,5e-05,2e-05,-2e-05,1.0,-1e-05,-5e-05,2e-05,-1e-05,...,-2e-05,-4e-05,2e-05,-3e-05,1e-05,-1e-05,5e-05,-0.0,1e-05,0.0
6,-3e-05,-1e-05,4e-05,-1e-05,1e-05,-1e-05,1.0,2e-05,-0.0,5e-05,...,4e-05,-1e-05,-4e-05,-2e-05,0.0,2e-05,-3e-05,5e-05,-4e-05,1e-05
7,-4e-05,4e-05,1e-05,-5e-05,0.0,-5e-05,2e-05,1.0,-0.0,2e-05,...,-0.0,2e-05,-4e-05,1e-05,-5e-05,-3e-05,-4e-05,3e-05,4e-05,-4e-05
8,2e-05,-1e-05,4e-05,-3e-05,0.0,2e-05,-0.0,-0.0,1.0,2e-05,...,-2e-05,1e-05,1e-05,2e-05,5e-05,-5e-05,2e-05,3e-05,-0.0,-2e-05
9,-2e-05,-3e-05,-3e-05,0.0,7e-05,-1e-05,5e-05,2e-05,2e-05,1.0,...,1e-05,2e-05,-4e-05,1e-05,-2e-05,3e-05,0.0,-0.0,-5e-05,2e-05


In [11]:
np.linalg.norm(cov_df - np.diag(np.diag(cov_df)))  # Remove diagonal elements for visualization

14.738437959466001

In [12]:
pca = PCA()
pca.fit(cov_df)

print("Singular values (explained variance):")
print(pca.explained_variance_)

print("\nPrincipal components (eigenvectors):")
eigenvectors = pca.components_
eigenvectors_df = pd.DataFrame(eigenvectors)
eigenvectors_df

Singular values (explained variance):
[1.15947895e+08 7.25208022e+07 5.91162548e+07 5.69085395e+07
 5.09303137e+07 4.58547005e+07 1.47385617e+07 9.00935755e+06
 7.95989624e+06 7.27593415e+06 6.31189675e+06 5.68293948e+06
 1.98988284e+06 1.24000777e+06 1.00320772e+06 9.21707134e+05
 7.92570544e+05 6.94708036e+05 2.73471849e+05 1.55214455e+05
 1.40292642e+05 1.30647429e+05 1.16540475e+05 9.91592657e+04
 3.57938860e+04 2.36506044e+04 2.23210143e+04 2.05202949e+04
 1.80253878e+04 1.42261833e+04 5.23432600e+03 3.45369321e+03
 3.20828754e+03 2.81199102e+03 2.62627619e+03 1.90136700e+03
 1.11774605e+03 9.12029078e+02 6.91637448e+02 6.48034027e+02
 5.10320584e+02 1.18885659e-23]

Principal components (eigenvectors):


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
0,-0.030771,-0.005693,-0.001924,-0.000732,-0.0002885417,-0.000105,-6.161475e-05,0.997265,-0.008736,-0.002839,...,-0.000255,-0.000104,-4.423247e-05,-0.030609,-0.006017,-0.002002,-0.000746,-0.00029,-0.0001110248,-5.1e-05
1,-0.091082,-0.006652,-0.002167,-0.00082,-0.0003258208,-0.000111,-7.241411e-05,0.037198,-0.010718,-0.00321,...,-0.000282,-0.000114,-4.640466e-05,-0.090649,-0.006985,-0.002219,-0.000822,-0.000337,-0.0001288704,-5.3e-05
2,0.709133,-2.5e-05,-4e-06,-3e-06,3.886357e-07,-1e-06,-8.800344e-07,0.000225,-5e-05,-2.9e-05,...,-7e-06,3e-06,-5.998377e-07,-0.705074,-3.7e-05,-2e-05,8e-06,-4e-06,8.844776e-07,3e-06
3,0.661266,-0.010519,-0.003349,-0.001235,-0.0004956817,-0.000178,-0.0001103581,0.033794,-0.017692,-0.004987,...,-0.000439,-0.000173,-7.211812e-05,0.665746,-0.011153,-0.003453,-0.001275,-0.0005,-0.0001953117,-8.5e-05
4,0.149932,-0.010084,-0.003145,-0.001157,-0.0004610815,-0.00016,-0.0001040081,0.025832,-0.017389,-0.004681,...,-0.000407,-0.000166,-7.103947e-05,0.149604,-0.010644,-0.003259,-0.001194,-0.000478,-0.000186824,-8.2e-05
5,0.164482,-0.020236,-0.006177,-0.002282,-0.0009150963,-0.000322,-0.0001965461,0.042476,-0.035893,-0.009262,...,-0.000813,-0.000336,-0.0001328422,0.164422,-0.021389,-0.006379,-0.002345,-0.00093,-0.0003606434,-0.000155
6,0.016693,-0.036565,-0.006831,-0.002453,-0.0009695712,-0.000337,-0.0002111506,0.010135,0.995252,-0.010876,...,-0.000861,-0.000352,-0.0001416433,0.016697,-0.041404,-0.007153,-0.00249,-0.000983,-0.0003832707,-0.000166
7,0.008648,-0.103374,-0.006871,-0.002345,-0.0009499845,-0.000325,-0.0002020875,0.005595,0.036121,-0.011631,...,-0.000835,-0.000328,-0.0001388502,0.008721,-0.180871,-0.007126,-0.002406,-0.000945,-0.0003641539,-0.000162
8,0.007893,-0.315754,-0.007297,-0.002464,-0.0009718645,-0.00034,-0.0002120539,0.005203,0.028365,-0.012733,...,-0.000866,-0.000348,-0.0001474229,0.007899,0.932488,-0.007675,-0.002525,-0.001001,-0.0003837828,-0.000168
9,0.011228,0.91477,-0.011694,-0.003959,-0.001557536,-0.00055,-0.0003371778,0.007425,0.037717,-0.020902,...,-0.001374,-0.000562,-0.000227577,0.011251,0.25696,-0.012314,-0.004043,-0.00158,-0.0006070733,-0.00026


In [13]:
cos_dist = spatial.distance.cdist(eigenvectors, np.eye(cov_df.shape[0]), metric='cosine')
cos_dist =  [np.min(row) for row in cos_dist]
cos_dist


[0.002734925014482137,
 0.011783468673814856,
 0.2908671811787533,
 0.334254191602538,
 0.10299472418469713,
 0.08972408686404709,
 0.004747645137636258,
 0.025798649120408412,
 0.06751218197475484,
 0.08522967961392958,
 0.1250081962097147,
 0.12908718084336823,
 0.006717486418424956,
 0.01906472872298437,
 0.0844664810027893,
 0.11505525742073952,
 0.12981854840851725,
 0.12939541127192866,
 0.010005862373211771,
 0.05053815495196634,
 0.16302645595350285,
 0.2335928288302379,
 0.23945438270555897,
 0.25594813210737477,
 0.020427131718101954,
 0.10145887002147147,
 0.2477436382799978,
 0.3527704243649149,
 0.2985764229018648,
 0.295761149393291,
 0.034280516293890284,
 0.2278086327030041,
 0.3263589467281729,
 0.21825013183803432,
 0.24529259543609128,
 0.48879871926709295,
 0.18132799784794362,
 0.27223000812149567,
 0.21353469285185633,
 0.22411747200174925,
 0.1277423987881321,
 0.5526499673303127]