In [1]:
DATA_NAME = 'spaceNet-full' 
TRANSFORM = 'fourier'
CHANNEL = 'green'

In [2]:
path_list = DATA_NAME.split("-") + TRANSFORM.split("-")
if CHANNEL:
    path_list.append(CHANNEL)
print(f"Name the notebook:\nindependence_{'_'.join(path_list[::-1])}.ipynb")
FULL_DATA_NAME='-'.join(path_list)


import git
from pathlib import Path
import os
CWD = os.getcwd()
ROOT_DIR = Path(git.Repo('.', search_parent_directories=True).working_tree_dir)

Path(os.path.join(CWD, "CSVs")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "plots")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "cache")).mkdir(exist_ok=True)

GROUP = 'layer' if TRANSFORM.split("-")[0] == 'wavelet' else ('band' if TRANSFORM.split("-")[0] == 'fourier' else 'error')
RERUN = False
SKIP_OPTIMIZE_STEP = False

Name the notebook:
independence_green_fourier_full_spaceNet.ipynb


In [3]:
os.chdir(os.path.join(ROOT_DIR, "utilities"))
from testing import * # If MATLAB is not installed, open utilities and set to False
from plotting import *
os.chdir(CWD)
np.random.seed(0)

In [4]:
from scipy import spatial
from sklearn.decomposition import PCA

In [5]:
group_data_map = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}.pickle'))
group_total_samples = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}-size.pickle'))

In [6]:
if 'fourier' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)#[::3]
elif 'wavelet' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)
elif 'learned' in TRANSFORM:
    nonskewed_df = pd.read_csv(os.path.join(ROOT_DIR, 'learned-filters', 'nonskewed_filter_idxs_df.csv')).set_index(['dataset', 'num_images', 'num_bootstrap'])
    nonskewed_filter_idxs = eval(nonskewed_df.loc[DATA_NAME].sort_values('num_images', ascending=False)['nonskewed_filter_idxs'].iloc[0]) 
    GROUPS = nonskewed_filter_idxs # can set to filter_group_map.keys() to include all prepared filters

In [7]:
n_bootstrap = int(1e5) 
bootstrap_size = int(1e4)


In [8]:

cov_matrix = np.zeros((len(GROUPS), len(GROUPS)))
for _ in tqdm(range(n_bootstrap)):
    X = np.zeros((bootstrap_size, len(GROUPS)))
    for i in range(len(GROUPS)):
        group = GROUPS[i]
        X[:, i] = np.random.choice(group_data_map[group], size=(bootstrap_size), replace=True)
    cov_matrix += np.cov(X, rowvar=False)
cov_matrix /= n_bootstrap

  0%|          | 0/100000 [00:00<?, ?it/s]

In [9]:



# Convert covariance matrix to DataFrame for better formatting
cov_df = pd.DataFrame(cov_matrix, index=GROUPS, columns=GROUPS)
cov_df.to_csv(os.path.join(CWD, "CSVs", f'covariance_matrix.csv'))
cov_df.round(2)

Unnamed: 0,2,3,4,5,6,7,8,9,10,11,...,15,16,17,18,19,20,21,22,23,24
2,1477.55,-0.01,0.02,0.03,0.0,0.0,0.02,0.04,-0.0,0.01,...,0.0,0.0,-0.0,0.0,0.0,0.0,0.0,-0.0,0.0,-0.0
3,-0.01,1066.17,0.0,-0.02,0.02,-0.03,0.0,0.01,-0.01,-0.0,...,-0.0,0.0,0.0,0.0,0.0,-0.0,0.0,-0.0,-0.0,-0.0
4,0.02,0.0,830.46,0.03,0.01,-0.0,0.01,-0.01,0.02,0.0,...,-0.0,0.0,0.0,-0.0,0.0,0.0,0.0,-0.0,0.0,0.0
5,0.03,-0.02,0.03,613.37,-0.02,0.02,-0.0,-0.01,0.0,0.0,...,-0.0,-0.0,0.0,-0.0,0.0,0.0,0.0,-0.0,0.0,0.0
6,0.0,0.02,0.01,-0.02,423.4,-0.0,-0.0,-0.0,-0.01,0.0,...,0.0,-0.0,0.0,0.0,0.0,0.0,-0.0,0.0,-0.0,-0.0
7,0.0,-0.03,-0.0,0.02,-0.0,272.09,-0.0,0.0,0.0,-0.0,...,-0.0,0.0,-0.0,0.0,-0.0,0.0,0.0,-0.0,0.0,-0.0
8,0.02,0.0,0.01,-0.0,-0.0,-0.0,177.0,0.0,-0.01,0.0,...,0.0,0.0,-0.0,0.0,-0.0,0.0,-0.0,-0.0,-0.0,-0.0
9,0.04,0.01,-0.01,-0.01,-0.0,0.0,0.0,105.52,-0.0,-0.0,...,0.0,-0.0,-0.0,0.0,-0.0,0.0,0.0,0.0,-0.0,-0.0
10,-0.0,-0.01,0.02,0.0,-0.01,0.0,-0.01,-0.0,67.8,0.0,...,0.0,-0.0,0.0,-0.0,0.0,0.0,-0.0,0.0,0.0,0.0
11,0.01,-0.0,0.0,0.0,0.0,-0.0,0.0,-0.0,0.0,42.98,...,0.0,0.0,0.0,-0.0,0.0,0.0,-0.0,-0.0,-0.0,0.0


In [10]:
corr_matrix = cov_matrix / np.sqrt(np.outer(np.diag(cov_matrix), np.diag(cov_matrix)))
corr_df = pd.DataFrame(corr_matrix, index=GROUPS, columns=GROUPS)
corr_df.round(5)

Unnamed: 0,2,3,4,5,6,7,8,9,10,11,...,15,16,17,18,19,20,21,22,23,24
2,1.0,-1e-05,2e-05,3e-05,0.0,1e-05,4e-05,0.0001,-1e-05,3e-05,...,1e-05,1e-05,-2e-05,7e-05,5e-05,0.0,2e-05,-1e-05,3e-05,-2e-05
3,-1e-05,1.0,0.0,-2e-05,3e-05,-6e-05,1e-05,3e-05,-2e-05,-2e-05,...,-1e-05,4e-05,0.0,3e-05,3e-05,-1e-05,3e-05,-4e-05,-4e-05,-1e-05
4,2e-05,0.0,1.0,4e-05,2e-05,-0.0,3e-05,-4e-05,7e-05,0.0,...,-4e-05,1e-05,3e-05,-4e-05,0.0,1e-05,0.0,-2e-05,2e-05,0.0
5,3e-05,-2e-05,4e-05,1.0,-4e-05,5e-05,-1e-05,-3e-05,1e-05,1e-05,...,-2e-05,-2e-05,1e-05,-1e-05,4e-05,0.0,0.0,-2e-05,5e-05,3e-05
6,0.0,3e-05,2e-05,-4e-05,1.0,-0.0,-0.0,-0.0,-4e-05,3e-05,...,2e-05,-1e-05,2e-05,0.0,3e-05,4e-05,-3e-05,2e-05,-4e-05,-1e-05
7,1e-05,-6e-05,-0.0,5e-05,-0.0,1.0,-0.0,0.0,3e-05,-2e-05,...,-4e-05,2e-05,-3e-05,8e-05,-3e-05,5e-05,1e-05,-5e-05,0.0,-3e-05
8,4e-05,1e-05,3e-05,-1e-05,-0.0,-0.0,1.0,1e-05,-5e-05,5e-05,...,1e-05,3e-05,-1e-05,0.0,-0.0,0.0,-3e-05,-7e-05,-3e-05,-0.0
9,0.0001,3e-05,-4e-05,-3e-05,-0.0,0.0,1e-05,1.0,-3e-05,-2e-05,...,1e-05,-0.0,-1e-05,5e-05,-2e-05,2e-05,1e-05,2e-05,-3e-05,-6e-05
10,-1e-05,-2e-05,7e-05,1e-05,-4e-05,3e-05,-5e-05,-3e-05,1.0,6e-05,...,1e-05,-1e-05,0.0,-2e-05,1e-05,3e-05,-3e-05,2e-05,1e-05,2e-05
11,3e-05,-2e-05,0.0,1e-05,3e-05,-2e-05,5e-05,-2e-05,6e-05,1.0,...,2e-05,1e-05,5e-05,-1e-05,4e-05,6e-05,-4e-05,-1e-05,-6e-05,1e-05


In [11]:
np.linalg.norm(cov_matrix - np.diag(np.diag(cov_matrix)))  # Remove diagonal elements for visualization

0.12880114903400317

In [12]:
pca = PCA()
pca.fit(cov_matrix)

print("Singular values (explained variance):")
print(pca.explained_variance_)

print("\nPrincipal components (eigenvectors):")
eigenvectors = pca.components_
eigenvectors_df = pd.DataFrame(eigenvectors)
eigenvectors_df

Singular values (explained variance):
[9.52713246e+04 4.94775562e+04 2.98637455e+04 1.62331493e+04
 7.71281542e+03 3.17864968e+03 1.33962678e+03 4.75141764e+02
 1.95204151e+02 7.81127903e+01 3.28991972e+01 1.42224917e+01
 6.30623129e+00 2.58068761e+00 1.02946590e+00 3.67517927e-01
 1.27469880e-01 3.32363860e-02 9.12646642e-03 2.27751039e-03
 9.48994647e-04 4.89123377e-04 1.45235578e-33]

Principal components (eigenvectors):


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,13,14,15,16,17,18,19,20,21,22
0,0.9969,-0.065391,-0.034705,-0.020943,-0.012989,-0.007909,-0.005028,-0.002947,-0.001908,-0.001201,...,-0.000221,-0.000141,-8.6e-05,-4.8e-05,-2.5e-05,-1.4e-05,-7e-06,-5e-06,-3e-06,-3e-06
1,0.06062,0.992685,-0.093455,-0.038699,-0.020872,-0.012077,-0.00751,-0.004383,-0.002814,-0.001779,...,-0.000328,-0.000206,-0.000125,-7.3e-05,-3.8e-05,-2.1e-05,-1e-05,-7e-06,-5e-06,-4e-06
2,0.037714,0.086605,0.991031,-0.084994,-0.034503,-0.0182,-0.011004,-0.006382,-0.004029,-0.002557,...,-0.000475,-0.000299,-0.000178,-0.000109,-5.5e-05,-2.9e-05,-1.5e-05,-1e-05,-7e-06,-5e-06
3,0.02498,0.042311,0.077067,0.992425,-0.073692,-0.029643,-0.016792,-0.009439,-0.005937,-0.003737,...,-0.000689,-0.000439,-0.000262,-0.000157,-7.9e-05,-4.3e-05,-2.1e-05,-1.4e-05,-1e-05,-7e-06
4,0.016505,0.024774,0.035898,0.066916,0.994011,-0.063991,-0.028779,-0.014973,-0.009257,-0.005749,...,-0.00105,-0.00067,-0.0004,-0.00024,-0.000122,-6.5e-05,-3.4e-05,-2.1e-05,-1.7e-05,-1.2e-05
5,0.010487,0.015029,0.020103,0.02998,0.058096,0.994552,-0.068787,-0.026918,-0.015547,-0.009479,...,-0.001712,-0.001081,-0.000655,-0.000382,-0.000201,-0.000105,-5.3e-05,-3.6e-05,-2.6e-05,-2e-05
6,0.007143,0.010042,0.013101,0.018453,0.029489,0.063699,0.994764,-0.060002,-0.028482,-0.016194,...,-0.002815,-0.001782,-0.001077,-0.00064,-0.00033,-0.000176,-8.9e-05,-5.9e-05,-4.3e-05,-3.2e-05
7,0.004333,0.006064,0.007861,0.01077,0.01609,0.027449,0.05437,0.994316,-0.074351,-0.03206,...,-0.004897,-0.003104,-0.001868,-0.001104,-0.000575,-0.000304,-0.000152,-9.8e-05,-7.5e-05,-5.7e-05
8,0.002999,0.004168,0.005337,0.007289,0.010721,0.017233,0.029004,0.068289,0.992982,-0.077516,...,-0.008291,-0.005228,-0.003134,-0.001867,-0.00096,-0.00051,-0.000257,-0.000165,-0.000123,-9.1e-05
9,0.002004,0.002791,0.00358,0.004856,0.007066,0.011168,0.017704,0.033279,0.069782,0.991554,...,-0.014216,-0.008839,-0.005257,-0.003133,-0.001605,-0.000852,-0.000433,-0.000278,-0.000211,-0.000154


In [13]:
cos_dist = np.diag(spatial.distance.cdist(eigenvectors, np.eye(len(GROUPS)), metric='cosine'))
cos_dist = cos_dist = [np.min(row) for row in cos_dist]
cos_dist


[0.003099963108556203,
 0.007315093802359729,
 0.008969133820149189,
 0.007575396464834072,
 0.005989431020114178,
 0.005448065889725795,
 0.005235856959398544,
 0.005684102593976892,
 0.0070176832912504095,
 0.008446176672784023,
 0.010634503422465968,
 0.013243373014968918,
 0.0150424109514542,
 0.01631236691220672,
 0.017503778401152026,
 0.01906003009052437,
 0.019382902384938006,
 0.021513830873236772,
 0.029900424237029655,
 0.06656075011531415,
 0.160152726001893,
 0.3151336329032667,
 0.2992406041017607]