In [1]:
DATA_NAME = 'spaceNet-full' 
TRANSFORM = 'fourier'
CHANNEL = 'red'

In [2]:
path_list = DATA_NAME.split("-") + TRANSFORM.split("-")
if CHANNEL:
    path_list.append(CHANNEL)
print(f"Name the notebook:\nindependence_{'_'.join(path_list[::-1])}.ipynb")
FULL_DATA_NAME='-'.join(path_list)


import git
from pathlib import Path
import os
CWD = os.getcwd()
ROOT_DIR = Path(git.Repo('.', search_parent_directories=True).working_tree_dir)

Path(os.path.join(CWD, "CSVs")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "plots")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "cache")).mkdir(exist_ok=True)

GROUP = 'layer' if TRANSFORM.split("-")[0] == 'wavelet' else ('band' if TRANSFORM.split("-")[0] == 'fourier' else 'error')
RERUN = False
SKIP_OPTIMIZE_STEP = False

Name the notebook:
independence_red_fourier_full_spaceNet.ipynb


In [3]:
os.chdir(os.path.join(ROOT_DIR, "utilities"))
from testing import * # If MATLAB is not installed, open utilities and set to False
from plotting import *
os.chdir(CWD)
np.random.seed(0)

In [4]:
from scipy import spatial
from sklearn.decomposition import PCA

In [5]:
group_data_map = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}.pickle'))
group_total_samples = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}-size.pickle'))

In [6]:
if 'fourier' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)#[::3]
elif 'wavelet' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)
elif 'learned' in TRANSFORM:
    nonskewed_df = pd.read_csv(os.path.join(ROOT_DIR, 'learned-filters', 'nonskewed_filter_idxs_df.csv')).set_index(['dataset', 'num_images', 'num_bootstrap'])
    nonskewed_filter_idxs = eval(nonskewed_df.loc[DATA_NAME].sort_values('num_images', ascending=False)['nonskewed_filter_idxs'].iloc[0]) 
    GROUPS = nonskewed_filter_idxs # can set to filter_group_map.keys() to include all prepared filters

In [7]:
n_bootstrap = int(1e5) 
bootstrap_size = int(1e4)


In [8]:

cov_matrix = np.zeros((len(GROUPS), len(GROUPS)))
for _ in tqdm(range(n_bootstrap)):
    X = np.zeros((bootstrap_size, len(GROUPS)))
    for i in range(len(GROUPS)):
        group = GROUPS[i]
        X[:, i] = np.random.choice(group_data_map[group], size=(bootstrap_size), replace=True)
        cov_matrix += np.cov(X, rowvar=False)
cov_matrix /= n_bootstrap

  0%|          | 0/100000 [00:00<?, ?it/s]

In [9]:



# Convert covariance matrix to DataFrame for better formatting
cov_df = pd.DataFrame(cov_matrix, index=GROUPS, columns=GROUPS)
cov_df.to_csv(os.path.join(CWD, "CSVs", f'covariance_matrix.csv'))
cov_df.round(2)

Unnamed: 0,2,3,4,5,6,7,8,9,10,11,...,15,16,17,18,19,20,21,22,23,24
2,27123.27,-0.21,0.26,0.42,0.06,0.06,0.24,0.48,-0.02,0.09,...,0.01,0.01,-0.01,0.02,0.01,-0.0,0.0,-0.0,0.0,-0.0
3,-0.21,17761.03,-0.01,-0.27,0.24,-0.39,0.06,0.12,-0.05,-0.03,...,-0.01,0.02,0.0,0.01,0.0,-0.0,0.0,-0.0,-0.0,-0.0
4,0.26,-0.01,12791.8,0.45,0.13,-0.02,0.15,-0.15,0.18,0.0,...,-0.03,0.0,0.01,-0.01,0.0,0.0,-0.0,-0.0,0.0,0.0
5,0.42,-0.27,0.45,8740.15,-0.3,0.28,-0.02,-0.07,0.02,0.01,...,-0.01,-0.01,0.0,-0.0,0.0,0.0,0.0,-0.0,0.0,0.0
6,0.06,0.24,0.13,-0.3,5668.05,-0.01,-0.02,-0.01,-0.07,0.05,...,0.01,-0.0,0.0,0.0,0.0,0.0,-0.0,0.0,-0.0,-0.0
7,0.06,-0.39,-0.02,0.28,-0.01,3391.38,-0.01,0.01,0.04,-0.02,...,-0.02,0.01,-0.01,0.01,-0.0,0.0,0.0,-0.0,0.0,-0.0
8,0.24,0.06,0.15,-0.02,-0.02,-0.01,2145.86,0.01,-0.06,0.05,...,0.0,0.01,-0.0,0.0,-0.0,0.0,-0.0,-0.0,-0.0,-0.0
9,0.48,0.12,-0.15,-0.07,-0.01,0.01,0.01,1222.54,-0.03,-0.01,...,0.0,0.0,-0.0,0.0,-0.0,0.0,0.0,0.0,-0.0,-0.0
10,-0.02,-0.05,0.18,0.02,-0.07,0.04,-0.06,-0.03,763.56,0.03,...,0.0,-0.0,0.0,-0.0,0.0,0.0,-0.0,0.0,0.0,0.0
11,0.09,-0.03,0.0,0.01,0.05,-0.02,0.05,-0.01,0.03,466.71,...,0.0,0.0,0.0,-0.0,0.0,0.0,-0.0,-0.0,-0.0,0.0


In [10]:
corr_matrix = cov_matrix / np.sqrt(np.outer(np.diag(cov_matrix), np.diag(cov_matrix)))
corr_df = pd.DataFrame(corr_matrix, index=GROUPS, columns=GROUPS)
corr_df.round(5)

Unnamed: 0,2,3,4,5,6,7,8,9,10,11,...,15,16,17,18,19,20,21,22,23,24
2,1.0,-1e-05,1e-05,3e-05,0.0,1e-05,3e-05,8e-05,-0.0,3e-05,...,1e-05,1e-05,-1e-05,4e-05,3e-05,-0.0,1e-05,-0.0,1e-05,-0.0
3,-1e-05,1.0,-0.0,-2e-05,2e-05,-5e-05,1e-05,3e-05,-1e-05,-1e-05,...,-1e-05,2e-05,0.0,2e-05,1e-05,-1e-05,1e-05,-1e-05,-1e-05,-0.0
4,1e-05,-0.0,1.0,4e-05,2e-05,-0.0,3e-05,-4e-05,6e-05,0.0,...,-3e-05,0.0,2e-05,-2e-05,0.0,1e-05,-0.0,-1e-05,1e-05,0.0
5,3e-05,-2e-05,4e-05,1.0,-4e-05,5e-05,-1e-05,-2e-05,1e-05,1e-05,...,-1e-05,-2e-05,1e-05,-0.0,2e-05,0.0,0.0,-1e-05,1e-05,1e-05
6,0.0,2e-05,2e-05,-4e-05,1.0,-0.0,-1e-05,-0.0,-3e-05,3e-05,...,1e-05,-0.0,1e-05,0.0,1e-05,2e-05,-1e-05,1e-05,-1e-05,-0.0
7,1e-05,-5e-05,-0.0,5e-05,-0.0,1.0,-0.0,0.0,2e-05,-1e-05,...,-3e-05,1e-05,-2e-05,5e-05,-1e-05,2e-05,1e-05,-2e-05,0.0,-1e-05
8,3e-05,1e-05,3e-05,-1e-05,-1e-05,-0.0,1.0,1e-05,-5e-05,5e-05,...,1e-05,3e-05,-1e-05,0.0,-0.0,0.0,-1e-05,-3e-05,-1e-05,-0.0
9,8e-05,3e-05,-4e-05,-2e-05,-0.0,0.0,1e-05,1.0,-3e-05,-2e-05,...,1e-05,0.0,-1e-05,3e-05,-1e-05,1e-05,0.0,1e-05,-1e-05,-2e-05
10,-0.0,-1e-05,6e-05,1e-05,-3e-05,2e-05,-5e-05,-3e-05,1.0,5e-05,...,1e-05,-1e-05,0.0,-1e-05,1e-05,1e-05,-1e-05,1e-05,0.0,1e-05
11,3e-05,-1e-05,0.0,1e-05,3e-05,-1e-05,5e-05,-2e-05,5e-05,1.0,...,1e-05,1e-05,4e-05,-1e-05,3e-05,3e-05,-2e-05,-1e-05,-2e-05,0.0


In [11]:
np.linalg.norm(cov_matrix - np.diag(np.diag(cov_matrix)))  # Remove diagonal elements for visualization

1.7022395715535086

In [12]:
pca = PCA()
pca.fit(cov_matrix)

print("Singular values (explained variance):")
print(pca.explained_variance_)

print("\nPrincipal components (eigenvectors):")
eigenvectors = pca.components_
eigenvectors_df = pd.DataFrame(eigenvectors)
eigenvectors_df

Singular values (explained variance):
[3.20637035e+07 1.37156957e+07 7.08514791e+06 3.29767419e+06
 1.38274462e+06 4.94046286e+05 1.96920865e+05 6.37804798e+04
 2.47533568e+04 9.20711009e+03 3.60024564e+03 1.40706375e+03
 5.37799730e+02 1.81113608e+02 5.93801389e+01 1.65571780e+01
 4.32946721e+00 8.33343727e-01 1.68379737e-01 2.44068297e-02
 5.22803871e-03 9.80886374e-04 3.61120422e-35]

Principal components (eigenvectors):


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,13,14,15,16,17,18,19,20,21,22
0,0.998178,-0.050758,-0.026284,-0.015454,-0.009375,-0.005441,-0.003402,-0.001916,-0.001208,-0.000734,...,-0.000104,-6e-05,-3.3e-05,-1.6e-05,-7e-06,-3e-06,-1e-06,-6.490142e-07,-2.872857e-07,-1.22891e-07
1,0.048065,0.995663,-0.071183,-0.029839,-0.016135,-0.009008,-0.005546,-0.00312,-0.001952,-0.001192,...,-0.00017,-9.7e-05,-5.2e-05,-2.6e-05,-1.2e-05,-6e-06,-2e-06,-1.095037e-06,-5.502995e-07,-1.922308e-07
2,0.028198,0.067115,0.994609,-0.066194,-0.027596,-0.014172,-0.008539,-0.004786,-0.00295,-0.001809,...,-0.000259,-0.000148,-7.8e-05,-4.1e-05,-1.8e-05,-8e-06,-3e-06,-1.604794e-06,-7.519588e-07,-2.847959e-07
3,0.01787,0.032014,0.061334,0.995113,-0.061436,-0.024248,-0.013817,-0.007536,-0.004636,-0.002822,...,-0.000402,-0.000232,-0.000123,-6.4e-05,-2.8e-05,-1.3e-05,-5e-06,-2.501341e-06,-1.130587e-06,-4.25947e-07
4,0.011526,0.01866,0.028766,0.057091,0.995826,-0.053738,-0.024923,-0.012673,-0.007685,-0.004623,...,-0.000654,-0.000378,-0.0002,-0.000104,-4.6e-05,-2.1e-05,-8e-06,-3.90631e-06,-2.049208e-06,-7.387363e-07
5,0.006948,0.010856,0.015551,0.024721,0.049518,0.99579,-0.063618,-0.024215,-0.013767,-0.008144,...,-0.001142,-0.000654,-0.00035,-0.000178,-8e-05,-3.6e-05,-1.4e-05,-7.108026e-06,-3.420559e-06,-1.28948e-06
6,0.00468,0.007215,0.010137,0.015335,0.025786,0.05981,0.995685,-0.054451,-0.025792,-0.014311,...,-0.001942,-0.001116,-0.000596,-0.000308,-0.000137,-6.3e-05,-2.5e-05,-1.222357e-05,-5.949397e-06,-2.159219e-06
7,0.002731,0.004203,0.005883,0.008687,0.013736,0.025,0.049882,0.995218,-0.069388,-0.029333,...,-0.003538,-0.002036,-0.001084,-0.000557,-0.000249,-0.000114,-4.4e-05,-2.134125e-05,-1.078244e-05,-4.049574e-06
8,0.001846,0.002825,0.003911,0.005764,0.008998,0.015474,0.026498,0.064464,0.994059,-0.071368,...,-0.006161,-0.003534,-0.001876,-0.000971,-0.00043,-0.000197,-7.7e-05,-3.706988e-05,-1.835605e-05,-6.753388e-06
9,0.001194,0.001832,0.002541,0.003723,0.005757,0.00975,0.015801,0.030748,0.065069,0.992975,...,-0.010896,-0.006189,-0.003268,-0.001693,-0.000747,-0.000342,-0.000135,-6.511127e-05,-3.264108e-05,-1.185606e-05


In [13]:
cos_dist = np.diag(spatial.distance.cdist(eigenvectors, np.eye(len(GROUPS)), metric='cosine'))
cos_dist = cos_dist = [np.min(row) for row in cos_dist]
cos_dist


[0.0018222436469367276,
 0.0043372241414161294,
 0.005391262383739548,
 0.004887269330701938,
 0.004174399874012358,
 0.004209700029546015,
 0.004314721942803801,
 0.004781645267976642,
 0.0059412219995408355,
 0.007025443539442566,
 0.008513643320643194,
 0.009769392239332042,
 0.010187108685868251,
 0.010537390801483748,
 0.010922163637015703,
 0.011243298822376069,
 0.011202880671479565,
 0.012612637032805574,
 0.015347409114835453,
 0.029597256870567024,
 0.06569551162785414,
 0.1129243295507748,
 0.07892593907191536]