In [1]:
DATA_NAME = 'coco-outdoor' 
TRANSFORM = 'gabor'
CHANNEL = 'gray'
PARAM_CSV = 'gabor_new.csv' # only use for Gabor

In [2]:
path_list = DATA_NAME.split("-") + TRANSFORM.split("-")
if CHANNEL:
    path_list.append(CHANNEL)
print(f"Name the notebook:\nindependence_{'_'.join(path_list[::-1])}.ipynb")
FULL_DATA_NAME='-'.join(path_list)


import git
from pathlib import Path
import os
CWD = os.getcwd()
ROOT_DIR = Path(git.Repo('.', search_parent_directories=True).working_tree_dir)

Path(os.path.join(CWD, "CSVs")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "plots")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "cache")).mkdir(exist_ok=True)

GROUP = 'layer' if TRANSFORM.split("-")[0] == 'wavelet' else ('band' if TRANSFORM.split("-")[0] == 'fourier' else 'error')
RERUN = False
SKIP_OPTIMIZE_STEP = False

Name the notebook:
independence_gray_gabor_outdoor_coco.ipynb


In [3]:
os.chdir(os.path.join(ROOT_DIR, "utilities"))
from testing import * # If MATLAB is not installed, open utilities and set to False
from plotting import *
os.chdir(CWD)
np.random.seed(0)

In [4]:
from scipy import spatial
from sklearn.decomposition import PCA

In [5]:
group_data_map = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}.pickle'))
group_total_samples = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}-size.pickle'))
param_df = pd.read_csv(os.path.join(ROOT_DIR, "gabor", PARAM_CSV))

In [6]:
if 'fourier' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)#[::3]
elif 'wavelet' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)
elif 'learned' in TRANSFORM:
    nonskewed_df = pd.read_csv(os.path.join(ROOT_DIR, 'learned-filters', 'nonskewed_filter_idxs_df.csv')).set_index(['dataset', 'num_images', 'num_bootstrap'])
    nonskewed_filter_idxs = eval(nonskewed_df.loc[DATA_NAME].sort_values('num_images', ascending=False)['nonskewed_filter_idxs'].iloc[0]) 
    GROUPS = nonskewed_filter_idxs # can set to filter_group_map.keys() to include all prepared filters
elif 'gabor' in TRANSFORM:
    GROUPS = param_df['index']

In [7]:
n_bootstrap = int(1e5) 
bootstrap_size = int(1e4)


In [8]:

cov_matrix = np.zeros((len(GROUPS), len(GROUPS)))
for _ in tqdm(range(n_bootstrap)):
    X = np.zeros((bootstrap_size, len(GROUPS)))
    for i in range(len(GROUPS)):
        group = GROUPS[i]
        if len(group_data_map[group]) < 100:
            X[:, i] = np.full(bootstrap_size, np.nan)
        else:
            X[:, i] = np.random.choice(group_data_map[group], size=(bootstrap_size), replace=True)
    cov_matrix += np.cov(X, rowvar=False)
cov_matrix /= n_bootstrap

  0%|          | 0/100000 [00:00<?, ?it/s]

In [9]:



# Convert covariance matrix to DataFrame for better formatting
cov_df = pd.DataFrame(cov_matrix, index=GROUPS, columns=GROUPS)
cov_df = cov_df.dropna(how='all').dropna(axis=1, how='all')
cov_df.to_csv(os.path.join(CWD, "CSVs", f'covariance_matrix.csv'))
cov_df.round(2)

index,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,52147.91,1.52,-0.24,0.52,0.15,-0.21,-0.14,-2.74,0.88,-0.52,...,-0.23,0.46,-0.04,0.9,-0.12,-0.49,0.19,0.06,0.05,0.04
1,1.52,19448.22,0.75,-0.49,-0.11,-0.12,-0.01,1.69,-0.25,-0.43,...,0.01,-0.01,0.02,1.13,-0.3,-0.17,-0.33,0.06,0.06,0.08
2,-0.24,0.75,7714.32,-0.11,-0.0,0.1,0.05,0.13,0.55,-0.22,...,0.06,-0.05,0.04,-0.76,0.63,0.0,-0.32,-0.0,0.11,-0.02
3,0.52,-0.49,-0.11,3096.13,-0.06,0.03,-0.02,-0.74,-0.26,0.01,...,-0.05,-0.06,0.02,0.42,0.04,0.14,0.07,0.03,-0.04,-0.01
4,0.15,-0.11,-0.0,-0.06,1261.25,-0.01,0.0,0.0,0.01,0.26,...,-0.0,0.01,0.01,-0.01,0.14,-0.08,0.08,0.01,0.04,-0.02
5,-0.21,-0.12,0.1,0.03,-0.01,525.8,0.0,-0.32,0.09,-0.04,...,-0.02,-0.02,0.01,-0.17,0.02,0.0,0.05,-0.0,0.0,-0.0
6,-0.14,-0.01,0.05,-0.02,0.0,0.0,351.44,0.09,-0.0,0.11,...,0.03,-0.01,-0.01,-0.07,-0.01,0.03,-0.03,0.04,-0.02,0.0
7,-2.74,1.69,0.13,-0.74,0.0,-0.32,0.09,74528.7,-0.18,0.65,...,-0.01,0.19,-0.17,0.59,-1.79,-0.61,-0.63,0.37,0.26,-0.19
8,0.88,-0.25,0.55,-0.26,0.01,0.09,-0.0,-0.18,27890.8,0.38,...,-0.14,0.05,0.01,0.71,1.02,-0.82,0.22,0.17,-0.04,-0.05
9,-0.52,-0.43,-0.22,0.01,0.26,-0.04,0.11,0.65,0.38,10626.3,...,0.06,0.04,-0.06,0.25,-0.37,0.26,0.01,0.01,-0.12,0.02


In [10]:
corr_matrix = cov_df / np.sqrt(np.outer(np.diag(cov_df), np.diag(cov_df)))
corr_matrix.round(5)

index,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1.0,5e-05,-1e-05,4e-05,2e-05,-4e-05,-3e-05,-4e-05,2e-05,-2e-05,...,-3e-05,8e-05,-1e-05,2e-05,-0.0,-2e-05,2e-05,1e-05,1e-05,1e-05
1,5e-05,1.0,6e-05,-6e-05,-2e-05,-4e-05,-0.0,4e-05,-1e-05,-3e-05,...,0.0,-0.0,1e-05,3e-05,-2e-05,-1e-05,-4e-05,1e-05,2e-05,4e-05
2,-1e-05,6e-05,1.0,-2e-05,-0.0,5e-05,3e-05,1e-05,4e-05,-2e-05,...,2e-05,-2e-05,3e-05,-4e-05,5e-05,0.0,-7e-05,-0.0,5e-05,-1e-05
3,4e-05,-6e-05,-2e-05,1.0,-3e-05,2e-05,-2e-05,-5e-05,-3e-05,0.0,...,-2e-05,-5e-05,3e-05,3e-05,1e-05,3e-05,2e-05,2e-05,-3e-05,-1e-05
4,2e-05,-2e-05,-0.0,-3e-05,1.0,-2e-05,1e-05,0.0,0.0,7e-05,...,-0.0,1e-05,1e-05,-0.0,3e-05,-3e-05,4e-05,1e-05,4e-05,-4e-05
5,-4e-05,-4e-05,5e-05,2e-05,-2e-05,1.0,0.0,-5e-05,2e-05,-2e-05,...,-3e-05,-3e-05,3e-05,-3e-05,1e-05,0.0,4e-05,-1e-05,0.0,-1e-05
6,-3e-05,-0.0,3e-05,-2e-05,1e-05,0.0,1.0,2e-05,-0.0,6e-05,...,4e-05,-1e-05,-3e-05,-2e-05,-0.0,2e-05,-3e-05,5e-05,-3e-05,1e-05
7,-4e-05,4e-05,1e-05,-5e-05,0.0,-5e-05,2e-05,1.0,-0.0,2e-05,...,-0.0,3e-05,-4e-05,1e-05,-5e-05,-2e-05,-4e-05,4e-05,4e-05,-4e-05
8,2e-05,-1e-05,4e-05,-3e-05,0.0,2e-05,-0.0,-0.0,1.0,2e-05,...,-2e-05,1e-05,0.0,2e-05,4e-05,-5e-05,2e-05,3e-05,-1e-05,-2e-05
9,-2e-05,-3e-05,-2e-05,0.0,7e-05,-2e-05,6e-05,2e-05,2e-05,1.0,...,2e-05,1e-05,-4e-05,1e-05,-2e-05,3e-05,0.0,0.0,-5e-05,1e-05


In [11]:
np.linalg.norm(cov_df - np.diag(np.diag(cov_df)))  # Remove diagonal elements for visualization

16.555172717770667

In [12]:
pca = PCA()
pca.fit(cov_df)

print("Singular values (explained variance):")
print(pca.explained_variance_)

print("\nPrincipal components (eigenvectors):")
eigenvectors = pca.components_
eigenvectors_df = pd.DataFrame(eigenvectors)
eigenvectors_df

Singular values (explained variance):
[1.32678275e+08 8.08833223e+07 7.78378528e+07 6.51955657e+07
 6.06239132e+07 5.54161236e+07 1.85073963e+07 1.24532512e+07
 9.97125135e+06 9.05486243e+06 8.44128704e+06 7.54374669e+06
 2.67487744e+06 1.71853806e+06 1.63291401e+06 1.40121389e+06
 1.18977227e+06 1.07381723e+06 3.92324372e+05 2.75795687e+05
 2.32755413e+05 2.17476633e+05 1.86849007e+05 1.62630879e+05
 7.20242891e+04 4.77406831e+04 4.08656636e+04 3.84786237e+04
 3.66630761e+04 2.89445976e+04 1.25970905e+04 1.10552676e+04
 9.56060223e+03 7.80686625e+03 7.10222573e+03 5.36522692e+03
 3.48769213e+03 2.72241033e+03 2.05905679e+03 1.97936905e+03
 1.95689687e+03 8.06262853e-24]

Principal components (eigenvectors):


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
0,-0.029535,-0.005866,-0.002198,-0.000885,-0.000356,-0.000153,-9.8e-05,0.997029,-0.009184,-0.003051,...,-0.000342,-0.00016,-8.2e-05,-0.041759,-0.006269,-0.002414,-0.000879,-0.000364,-0.00019,-8.3e-05
1,-0.007771,-0.000601,-0.000225,-7.3e-05,-3.3e-05,-1.9e-05,-8e-06,0.003239,-0.000997,-0.000306,...,-3.4e-05,-1.8e-05,-8e-06,0.746137,-0.000665,-0.000252,-9.9e-05,-2.6e-05,-1.9e-05,-1.3e-05
2,-0.163748,-0.010209,-0.003663,-0.001448,-0.000588,-0.000244,-0.000166,0.046467,-0.017097,-0.005104,...,-0.000555,-0.000261,-0.00013,0.646555,-0.010888,-0.003973,-0.001442,-0.000609,-0.000321,-0.000132
3,0.932799,-0.00698,-0.002456,-0.000959,-0.00039,-0.000169,-0.000112,0.02156,-0.012158,-0.003459,...,-0.000379,-0.000168,-8.8e-05,0.073503,-0.007517,-0.00269,-0.000956,-0.000404,-0.000212,-8.7e-05
4,0.202093,-0.008368,-0.0029,-0.001128,-0.000457,-0.000189,-0.000133,0.022089,-0.014842,-0.004059,...,-0.000439,-0.000205,-0.000107,0.062069,-0.008951,-0.003183,-0.001133,-0.000483,-0.000255,-0.000106
5,0.244896,-0.021529,-0.007317,-0.002859,-0.001165,-0.000486,-0.000322,0.047606,-0.039203,-0.010319,...,-0.001124,-0.000532,-0.000259,0.115026,-0.02306,-0.008005,-0.002861,-0.001208,-0.000634,-0.00026
6,0.018073,-0.034798,-0.007485,-0.002823,-0.001132,-0.000469,-0.000316,0.010583,0.994379,-0.011175,...,-0.001093,-0.000513,-0.000253,0.015271,-0.040464,-0.008365,-0.00279,-0.00117,-0.000618,-0.000257
7,0.011945,-0.074876,-0.008675,-0.003125,-0.001278,-0.000523,-0.000349,0.007466,0.053071,-0.013552,...,-0.001223,-0.000557,-0.000285,0.010443,-0.110097,-0.009672,-0.003116,-0.001302,-0.000679,-0.000287
8,0.008292,-0.233827,-0.008065,-0.002837,-0.00113,-0.000472,-0.000316,0.005353,0.027611,-0.013208,...,-0.001094,-0.000506,-0.000259,0.00727,0.955852,-0.009175,-0.002825,-0.001186,-0.000616,-0.000257
9,0.007039,0.889197,-0.007809,-0.002758,-0.001092,-0.000463,-0.000303,0.004566,0.021897,-0.013177,...,-0.001051,-0.000493,-0.000245,0.006203,0.143684,-0.008948,-0.002738,-0.00113,-0.000586,-0.00024


In [13]:
cos_dist = spatial.distance.cdist(eigenvectors, np.eye(cov_df.shape[0]), metric='cosine')
cos_dist =  [np.min(row) for row in cos_dist]
cos_dist


[0.0029710743366305747,
 0.2538626388521905,
 0.2714090715255252,
 0.06720139094805089,
 0.14573046910942378,
 0.1347393673749111,
 0.005620741374229743,
 0.014300257836151076,
 0.04414792287208613,
 0.11080330075736966,
 0.18453242933457725,
 0.14660665036792364,
 0.008209169835671215,
 0.22707511489762877,
 0.2638337959113739,
 0.07079298106861398,
 0.25987977607948587,
 0.29659221552564274,
 0.013774066858276313,
 0.035061164112040855,
 0.25470702113001653,
 0.3236159348874824,
 0.26525177819627577,
 0.32603990259756155,
 0.021881499774422997,
 0.05013635786419712,
 0.11240644931229404,
 0.2629374755980779,
 0.22535439879029662,
 0.4157360137564189,
 0.08292822246724507,
 0.1692908582280731,
 0.2034983129794481,
 0.22524059616488168,
 0.2708416878987341,
 0.4087219016375683,
 0.16188445512202143,
 0.20396744885017948,
 0.15063471477310308,
 0.23603269889625578,
 0.20227659115163177,
 0.6134960725522054]