In [1]:
DATA_NAME = 'standardTesting-full' 
TRANSFORM = 'gabor'
CHANNEL = 'gray'
PARAM_CSV = "gabor_new.csv"# only use for Gabor

In [2]:
path_list = DATA_NAME.split("-") + TRANSFORM.split("-")
if CHANNEL:
    path_list.append(CHANNEL)
print(f"Name the notebook:\nindependence_{'_'.join(path_list[::-1])}.ipynb")
FULL_DATA_NAME='-'.join(path_list)


import git
from pathlib import Path
import os
CWD = os.getcwd()
ROOT_DIR = Path(git.Repo('.', search_parent_directories=True).working_tree_dir)

Path(os.path.join(CWD, "CSVs")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "plots")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "cache")).mkdir(exist_ok=True)

GROUP = 'layer' if TRANSFORM.split("-")[0] == 'wavelet' else ('band' if TRANSFORM.split("-")[0] == 'fourier' else 'error')
RERUN = False
SKIP_OPTIMIZE_STEP = False

Name the notebook:
independence_gray_gabor_full_standardTesting.ipynb


In [3]:
os.chdir(os.path.join(ROOT_DIR, "utilities"))
from testing import * # If MATLAB is not installed, open utilities and set to False
from plotting import *
os.chdir(CWD)
np.random.seed(0)

In [4]:
from scipy import spatial
from sklearn.decomposition import PCA

In [5]:
group_data_map = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}.pickle'))
group_total_samples = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}-size.pickle'))
param_df = pd.read_csv(os.path.join(ROOT_DIR, "gabor", PARAM_CSV))

In [6]:
if 'fourier' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)#[::3]
elif 'wavelet' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)
elif 'learned' in TRANSFORM:
    nonskewed_df = pd.read_csv(os.path.join(ROOT_DIR, 'learned-filters', 'nonskewed_filter_idxs_df.csv')).set_index(['dataset', 'num_images', 'num_bootstrap'])
    nonskewed_filter_idxs = eval(nonskewed_df.loc[DATA_NAME].sort_values('num_images', ascending=False)['nonskewed_filter_idxs'].iloc[0]) 
    GROUPS = nonskewed_filter_idxs # can set to filter_group_map.keys() to include all prepared filters
elif 'gabor' in TRANSFORM:
    GROUPS = param_df['index']

In [7]:
n_bootstrap = int(1e5) 
bootstrap_size = int(1e4)


In [8]:

cov_matrix = np.zeros((len(GROUPS), len(GROUPS)))
for _ in tqdm(range(n_bootstrap)):
    X = np.zeros((bootstrap_size, len(GROUPS)))
    for i in range(len(GROUPS)):
        group = GROUPS[i]
        if len(group_data_map[group]) < 100:
            X[:, i] = np.full(bootstrap_size, np.nan)
        else:
            X[:, i] = np.random.choice(group_data_map[group], size=(bootstrap_size), replace=True)
    cov_matrix += np.cov(X, rowvar=False)
cov_matrix /= n_bootstrap

  0%|          | 0/100000 [00:00<?, ?it/s]

In [9]:



# Convert covariance matrix to DataFrame for better formatting
cov_df = pd.DataFrame(cov_matrix, index=GROUPS, columns=GROUPS)
cov_df = cov_df.dropna(how='all').dropna(axis=1, how='all')
cov_df.to_csv(os.path.join(CWD, "CSVs", f'covariance_matrix.csv'))
cov_df.round(2)

index,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,38919.0,1.09,-0.17,0.35,0.09,-0.1,-0.07,-2.11,0.66,-0.4,...,-0.15,0.22,-0.01,0.49,-0.07,-0.33,0.15,0.04,0.02,0.03
1,1.09,13813.01,0.51,-0.32,-0.06,-0.07,-0.0,1.24,-0.18,-0.29,...,0.0,-0.0,0.01,0.79,-0.19,-0.08,-0.19,0.04,0.04,0.04
2,-0.17,0.51,5212.21,-0.07,-0.0,0.05,0.02,0.11,0.41,-0.13,...,0.03,-0.02,0.02,-0.51,0.42,-0.02,-0.21,-0.01,0.06,-0.01
3,0.35,-0.32,-0.07,1797.82,-0.03,0.01,-0.01,-0.48,-0.17,0.01,...,-0.02,-0.02,0.01,0.28,0.01,0.08,0.04,0.02,-0.02,-0.0
4,0.09,-0.06,-0.0,-0.03,583.21,-0.01,0.0,-0.01,0.0,0.14,...,-0.0,0.0,0.0,-0.03,0.09,-0.05,0.04,0.01,0.02,-0.01
5,-0.1,-0.07,0.05,0.01,-0.01,181.5,0.0,-0.18,0.05,-0.01,...,-0.01,-0.0,0.0,-0.09,0.01,0.0,0.02,-0.0,0.0,0.0
6,-0.07,-0.0,0.02,-0.01,0.0,0.0,111.35,0.04,0.0,0.05,...,0.01,-0.0,-0.0,-0.01,-0.0,0.01,-0.02,0.01,-0.0,0.0
7,-2.11,1.24,0.11,-0.48,-0.01,-0.18,0.04,52155.31,-0.17,0.39,...,-0.01,0.08,-0.08,0.39,-1.14,-0.25,-0.4,0.21,0.11,-0.07
8,0.66,-0.18,0.41,-0.17,0.0,0.05,0.0,-0.17,19633.82,0.28,...,-0.07,0.03,-0.0,0.44,0.66,-0.53,0.13,0.09,-0.02,-0.02
9,-0.4,-0.29,-0.13,0.01,0.14,-0.01,0.05,0.39,0.28,7266.84,...,0.03,0.02,-0.03,0.19,-0.23,0.16,0.02,0.01,-0.06,0.01


In [10]:
corr_matrix = cov_df / np.sqrt(np.outer(np.diag(cov_df), np.diag(cov_df)))
corr_matrix.round(5)

index,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1.0,5e-05,-1e-05,4e-05,2e-05,-4e-05,-3e-05,-5e-05,2e-05,-2e-05,...,-3e-05,9e-05,-0.0,1e-05,-0.0,-2e-05,2e-05,1e-05,1e-05,2e-05
1,5e-05,1.0,6e-05,-6e-05,-2e-05,-4e-05,-0.0,5e-05,-1e-05,-3e-05,...,0.0,-0.0,1e-05,3e-05,-1e-05,-1e-05,-4e-05,1e-05,2e-05,4e-05
2,-1e-05,6e-05,1.0,-2e-05,-0.0,5e-05,3e-05,1e-05,4e-05,-2e-05,...,2e-05,-2e-05,3e-05,-4e-05,5e-05,-0.0,-7e-05,-0.0,5e-05,-1e-05
3,4e-05,-6e-05,-2e-05,1.0,-3e-05,2e-05,-2e-05,-5e-05,-3e-05,0.0,...,-2e-05,-4e-05,2e-05,3e-05,0.0,3e-05,2e-05,1e-05,-3e-05,-0.0
4,2e-05,-2e-05,-0.0,-3e-05,1.0,-2e-05,1e-05,-0.0,0.0,7e-05,...,-0.0,1e-05,1e-05,-1e-05,3e-05,-3e-05,4e-05,1e-05,4e-05,-5e-05
5,-4e-05,-4e-05,5e-05,2e-05,-2e-05,1.0,0.0,-6e-05,3e-05,-1e-05,...,-3e-05,-3e-05,3e-05,-3e-05,0.0,0.0,4e-05,-0.0,0.0,1e-05
6,-3e-05,-0.0,3e-05,-2e-05,1e-05,0.0,1.0,2e-05,0.0,5e-05,...,4e-05,-1e-05,-3e-05,-1e-05,-0.0,1e-05,-3e-05,5e-05,-3e-05,0.0
7,-5e-05,5e-05,1e-05,-5e-05,-0.0,-6e-05,2e-05,1.0,-1e-05,2e-05,...,-0.0,3e-05,-4e-05,1e-05,-4e-05,-2e-05,-4e-05,4e-05,3e-05,-4e-05
8,2e-05,-1e-05,4e-05,-3e-05,0.0,3e-05,0.0,-1e-05,1.0,2e-05,...,-2e-05,1e-05,-0.0,2e-05,4e-05,-6e-05,2e-05,3e-05,-1e-05,-1e-05
9,-2e-05,-3e-05,-2e-05,0.0,7e-05,-1e-05,5e-05,2e-05,2e-05,1.0,...,2e-05,2e-05,-4e-05,1e-05,-2e-05,3e-05,0.0,0.0,-4e-05,1e-05


In [11]:
np.linalg.norm(cov_df - np.diag(np.diag(cov_df)))  # Remove diagonal elements for visualization

11.223961048827064

In [12]:
pca = PCA()
pca.fit(cov_df)

print("Singular values (explained variance):")
print(pca.explained_variance_)

print("\nPrincipal components (eigenvectors):")
eigenvectors = pca.components_
eigenvectors_df = pd.DataFrame(eigenvectors)
eigenvectors_df

Singular values (explained variance):
[6.50108922e+07 4.79403808e+07 3.94520804e+07 3.58022926e+07
 2.58569692e+07 1.98536324e+07 9.16153787e+06 5.31613617e+06
 4.95909461e+06 4.47508807e+06 3.36106771e+06 3.02178568e+06
 1.24962256e+06 8.44296097e+05 6.45400972e+05 5.49996937e+05
 5.10974366e+05 4.56926776e+05 1.79678953e+05 1.04212590e+05
 7.84906521e+04 7.38010015e+04 6.45250894e+04 5.58909322e+04
 2.30777676e+04 1.47991983e+04 1.20713785e+04 9.71685373e+03
 8.17460692e+03 6.65558915e+03 2.13174355e+03 1.37021728e+03
 1.09429345e+03 8.02599499e+02 7.34600813e+02 5.24968972e+02
 2.66283886e+02 1.62422871e+02 1.31934067e+02 1.01701180e+02
 8.77251818e+01 9.39403181e-24]

Principal components (eigenvectors):


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
0,-0.035475,-0.005799,-0.00206,-0.000715,-0.000229,-7.5e-05,-4.3e-05,0.995501,-0.008999,-0.002896,...,-0.000225,-6.5e-05,-2.6e-05,-0.041655,-0.006388,-0.00181,-0.00071,-0.00025,-9.3e-05,-2.4e-05
1,-0.076931,-0.006902,-0.002395,-0.000826,-0.000266,-8e-05,-5.2e-05,0.060955,-0.011046,-0.003371,...,-0.000256,-7.3e-05,-2.8e-05,-0.113777,-0.007568,-0.002069,-0.000807,-0.0003,-0.000111,-2.5e-05
2,-0.275215,-0.007027,-0.0024,-0.000808,-0.000264,-8.3e-05,-5e-05,0.034332,-0.011589,-0.003372,...,-0.000254,-7.6e-05,-2.8e-05,0.95542,-0.007764,-0.002076,-0.00082,-0.000287,-0.000111,-2.8e-05
3,0.953034,-0.012338,-0.004157,-0.001398,-0.000454,-0.000145,-8.9e-05,0.047845,-0.020743,-0.005896,...,-0.000451,-0.000125,-5e-05,0.257409,-0.013684,-0.003625,-0.001399,-0.000504,-0.00019,-4.5e-05
4,0.074016,-0.013697,-0.004345,-0.001461,-0.000476,-0.000149,-8.8e-05,0.027053,-0.025124,-0.006219,...,-0.000473,-0.00014,-4.9e-05,0.059629,-0.015282,-0.003771,-0.001461,-0.000524,-0.000196,-4.6e-05
5,0.047785,-0.019067,-0.005708,-0.001898,-0.000614,-0.000189,-0.000119,0.023547,-0.039491,-0.008198,...,-0.000607,-0.000179,-6.8e-05,0.041727,-0.021557,-0.004963,-0.001905,-0.000688,-0.00026,-6.3e-05
6,0.01705,-0.037378,-0.007453,-0.002422,-0.000776,-0.000239,-0.000148,0.011126,0.995205,-0.011228,...,-0.000769,-0.000224,-8.4e-05,0.01592,-0.04805,-0.006473,-0.002395,-0.000858,-0.000325,-7.8e-05
7,0.006401,-0.108139,-0.005778,-0.001781,-0.000559,-0.000176,-0.000109,0.004476,0.024783,-0.009386,...,-0.000557,-0.000162,-6.4e-05,0.006032,0.939803,-0.004977,-0.001768,-0.000639,-0.000238,-5.7e-05
8,0.009319,-0.348282,-0.009302,-0.002808,-0.000916,-0.00028,-0.000172,0.00651,0.033913,-0.015174,...,-0.000901,-0.000251,-0.000101,0.008882,0.259195,-0.007856,-0.002802,-0.001005,-0.000374,-9.3e-05
9,0.014201,0.920669,-0.016183,-0.004884,-0.001561,-0.000488,-0.000297,0.009992,0.047416,-0.027162,...,-0.001534,-0.000445,-0.000169,0.013493,0.191655,-0.013706,-0.004853,-0.001723,-0.000643,-0.000153


In [13]:
cos_dist = spatial.distance.cdist(eigenvectors, np.eye(cov_df.shape[0]), metric='cosine')
cos_dist =  [np.min(row) for row in cos_dist]
cos_dist


[0.004499460795929888,
 0.012375705009162408,
 0.04458004108740876,
 0.04696586120853208,
 0.013083465954475515,
 0.00996197692519829,
 0.004794985342040703,
 0.060196978939614176,
 0.1038739623812387,
 0.07933099191653414,
 0.07965339854542164,
 0.08374711682132063,
 0.008129793565833765,
 0.015861585845975745,
 0.034096810060811444,
 0.08463370924471936,
 0.2817555277466336,
 0.3454158287243695,
 0.00942502569083603,
 0.020558904536748135,
 0.2506787851374097,
 0.3165049337335649,
 0.2654199184695675,
 0.32527128917449877,
 0.01869767284604751,
 0.046410257636845254,
 0.07612545366396306,
 0.09996550794190628,
 0.26221214246444213,
 0.3219706786939076,
 0.02925142855638707,
 0.07503354200441326,
 0.10611741251242157,
 0.28792343185432123,
 0.2772200564535877,
 0.3671534328256256,
 0.09830972085209144,
 0.20189043885671165,
 0.32139330804809463,
 0.2803277665212768,
 0.16685043950076217,
 0.5268838976292732]