In [1]:
DATA_NAME = 'spaceNet-full' 
TRANSFORM = 'gabor'
CHANNEL = 'red'
PARAM_CSV = 'gabor_new.csv' # only use for Gabor

In [2]:
path_list = DATA_NAME.split("-") + TRANSFORM.split("-")
if CHANNEL:
    path_list.append(CHANNEL)
print(f"Name the notebook:\nindependence_{'_'.join(path_list[::-1])}.ipynb")
FULL_DATA_NAME='-'.join(path_list)


import git
from pathlib import Path
import os
CWD = os.getcwd()
ROOT_DIR = Path(git.Repo('.', search_parent_directories=True).working_tree_dir)

Path(os.path.join(CWD, "CSVs")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "plots")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "cache")).mkdir(exist_ok=True)

GROUP = 'layer' if TRANSFORM.split("-")[0] == 'wavelet' else ('band' if TRANSFORM.split("-")[0] == 'fourier' else 'error')
RERUN = False
SKIP_OPTIMIZE_STEP = False

Name the notebook:
independence_red_gabor_full_spaceNet.ipynb


In [3]:
os.chdir(os.path.join(ROOT_DIR, "utilities"))
from testing import * # If MATLAB is not installed, open utilities and set to False
from plotting import *
os.chdir(CWD)
np.random.seed(0)

In [4]:
from scipy import spatial
from sklearn.decomposition import PCA

In [5]:
group_data_map = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}.pickle'))
group_total_samples = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}-size.pickle'))
param_df = pd.read_csv(os.path.join(ROOT_DIR, "gabor", PARAM_CSV))

In [6]:
if 'fourier' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)#[::3]
elif 'wavelet' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)
elif 'learned' in TRANSFORM:
    nonskewed_df = pd.read_csv(os.path.join(ROOT_DIR, 'learned-filters', 'nonskewed_filter_idxs_df.csv')).set_index(['dataset', 'num_images', 'num_bootstrap'])
    nonskewed_filter_idxs = eval(nonskewed_df.loc[DATA_NAME].sort_values('num_images', ascending=False)['nonskewed_filter_idxs'].iloc[0]) 
    GROUPS = nonskewed_filter_idxs # can set to filter_group_map.keys() to include all prepared filters
elif 'gabor' in TRANSFORM:
    GROUPS = param_df['index']

In [7]:
n_bootstrap = int(1e5) 
bootstrap_size = int(1e4)


In [8]:

cov_matrix = np.zeros((len(GROUPS), len(GROUPS)))
for _ in tqdm(range(n_bootstrap)):
    X = np.zeros((bootstrap_size, len(GROUPS)))
    for i in range(len(GROUPS)):
        group = GROUPS[i]
        if len(group_data_map[group]) < 100:
            X[:, i] = np.full(bootstrap_size, np.nan)
        else:
            X[:, i] = np.random.choice(group_data_map[group], size=(bootstrap_size), replace=True)
    cov_matrix += np.cov(X, rowvar=False)
cov_matrix /= n_bootstrap

  0%|          | 0/100000 [00:00<?, ?it/s]

In [9]:



# Convert covariance matrix to DataFrame for better formatting
cov_df = pd.DataFrame(cov_matrix, index=GROUPS, columns=GROUPS)
cov_df = cov_df.dropna(how='all').dropna(axis=1, how='all')
cov_df.to_csv(os.path.join(CWD, "CSVs", f'covariance_matrix.csv'))
cov_df.round(2)

index,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,96.51,0.0,-0.0,0.0,0.0,-0.0,-0.0,-0.0,0.0,-0.0,...,-0.0,0.0,-0.0,0.0,0.0,-0.0,0.0,0.0,0.0,0.0
1,0.0,39.65,0.0,-0.0,-0.0,-0.0,-0.0,0.0,-0.0,-0.0,...,-0.0,-0.0,0.0,0.0,-0.0,-0.0,-0.0,0.0,0.0,0.0
2,-0.0,0.0,17.82,-0.0,0.0,0.0,0.0,0.0,0.0,-0.0,...,0.0,-0.0,0.0,-0.0,0.0,0.0,-0.0,-0.0,0.0,0.0
3,0.0,-0.0,-0.0,7.13,-0.0,0.0,-0.0,-0.0,-0.0,0.0,...,-0.0,-0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.0,-0.0
4,0.0,-0.0,0.0,-0.0,2.83,-0.0,0.0,0.0,-0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,-0.0,0.0,0.0,0.0,-0.0
5,-0.0,-0.0,0.0,0.0,-0.0,0.85,0.0,-0.0,0.0,-0.0,...,-0.0,-0.0,0.0,-0.0,0.0,-0.0,0.0,0.0,0.0,-0.0
6,-0.0,-0.0,0.0,-0.0,0.0,0.0,0.51,0.0,0.0,0.0,...,0.0,-0.0,-0.0,-0.0,-0.0,0.0,-0.0,0.0,-0.0,0.0
7,-0.0,0.0,0.0,-0.0,0.0,-0.0,0.0,122.05,-0.0,0.0,...,0.0,0.0,-0.0,0.0,-0.0,-0.0,-0.0,0.0,0.0,-0.0
8,0.0,-0.0,0.0,-0.0,-0.0,0.0,0.0,-0.0,50.51,0.0,...,-0.0,0.0,0.0,0.0,0.0,-0.0,0.0,0.0,-0.0,-0.0
9,-0.0,-0.0,-0.0,0.0,0.0,-0.0,0.0,0.0,0.0,23.15,...,0.0,0.0,-0.0,0.0,-0.0,0.0,0.0,0.0,-0.0,0.0


In [10]:
corr_matrix = cov_df / np.sqrt(np.outer(np.diag(cov_df), np.diag(cov_df)))
corr_matrix.round(5)

index,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1.0,5e-05,-2e-05,4e-05,1e-05,-4e-05,-2e-05,-4e-05,2e-05,-2e-05,...,-3e-05,8e-05,-0.0,2e-05,0.0,-3e-05,1e-05,2e-05,1e-05,2e-05
1,5e-05,1.0,7e-05,-6e-05,-2e-05,-5e-05,-1e-05,4e-05,-1e-05,-3e-05,...,-0.0,-1e-05,0.0,3e-05,-2e-05,-1e-05,-3e-05,0.0,1e-05,3e-05
2,-2e-05,7e-05,1.0,-1e-05,0.0,4e-05,4e-05,1e-05,4e-05,-2e-05,...,2e-05,-3e-05,4e-05,-4e-05,5e-05,1e-05,-7e-05,-1e-05,6e-05,0.0
3,4e-05,-6e-05,-1e-05,1.0,-3e-05,3e-05,-1e-05,-4e-05,-3e-05,1e-05,...,-3e-05,-5e-05,1e-05,4e-05,0.0,3e-05,3e-05,2e-05,-3e-05,-0.0
4,1e-05,-2e-05,0.0,-3e-05,1.0,-2e-05,0.0,0.0,-1e-05,6e-05,...,0.0,2e-05,0.0,0.0,2e-05,-3e-05,4e-05,2e-05,4e-05,-5e-05
5,-4e-05,-5e-05,4e-05,3e-05,-2e-05,1.0,0.0,-5e-05,3e-05,-2e-05,...,-2e-05,-3e-05,5e-05,-3e-05,1e-05,-0.0,4e-05,1e-05,2e-05,-2e-05
6,-2e-05,-1e-05,4e-05,-1e-05,0.0,0.0,1.0,2e-05,0.0,5e-05,...,4e-05,-1e-05,-3e-05,-1e-05,-1e-05,2e-05,-2e-05,4e-05,-2e-05,1e-05
7,-4e-05,4e-05,1e-05,-4e-05,0.0,-5e-05,2e-05,1.0,-0.0,2e-05,...,0.0,2e-05,-4e-05,1e-05,-5e-05,-2e-05,-4e-05,4e-05,3e-05,-2e-05
8,2e-05,-1e-05,4e-05,-3e-05,-1e-05,3e-05,0.0,-0.0,1.0,2e-05,...,-3e-05,1e-05,1e-05,1e-05,4e-05,-5e-05,3e-05,4e-05,-1e-05,-2e-05
9,-2e-05,-3e-05,-2e-05,1e-05,6e-05,-2e-05,5e-05,2e-05,2e-05,1.0,...,1e-05,0.0,-3e-05,1e-05,-2e-05,3e-05,0.0,1e-05,-4e-05,1e-05


In [11]:
np.linalg.norm(cov_df - np.diag(np.diag(cov_df)))  # Remove diagonal elements for visualization

0.030853601260743962

In [12]:
pca = PCA()
pca.fit(cov_df)

print("Singular values (explained variance):")
print(pca.explained_variance_)

print("\nPrincipal components (eigenvectors):")
eigenvectors = pca.components_
eigenvectors_df = pd.DataFrame(eigenvectors)
eigenvectors_df

Singular values (explained variance):
[3.56564990e+02 2.93620689e+02 2.23699560e+02 2.06101468e+02
 1.96822505e+02 1.79919597e+02 6.07773852e+01 4.42734730e+01
 3.76225541e+01 3.41790854e+01 3.26403099e+01 2.92805632e+01
 1.27191066e+01 9.78432748e+00 7.76385977e+00 7.48340492e+00
 6.81752876e+00 5.72422623e+00 2.05045596e+00 1.66156255e+00
 1.37025565e+00 1.19893372e+00 1.06834436e+00 8.96530640e-01
 3.47173228e-01 2.16579473e-01 1.97334072e-01 1.84466409e-01
 1.68551754e-01 1.30495728e-01 3.98128826e-02 2.72996159e-02
 2.05374606e-02 1.71798722e-02 1.51812862e-02 1.09405330e-02
 6.01057007e-03 4.54171441e-03 2.89608893e-03 2.33540533e-03
 1.44054638e-03 4.60621443e-29]

Principal components (eigenvectors):


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
0,-0.04107,-0.006799,-0.002796,-0.00111,-0.000435,-0.000135,-7.7e-05,0.991733,-0.00941,-0.00369,...,-0.000408,-0.000127,-3.6e-05,-0.034245,-0.006448,-0.002819,-0.001186,-0.000433,-0.000142,-4.7e-05
1,-0.081225,-0.008639,-0.003476,-0.001372,-0.000541,-0.000159,-9.9e-05,0.096943,-0.012175,-0.004594,...,-0.0005,-0.000152,-4.1e-05,-0.060608,-0.008086,-0.003458,-0.00145,-0.000551,-0.00018,-5.5e-05
2,0.960962,-0.007317,-0.002844,-0.001096,-0.000435,-0.000134,-8e-05,0.03027,-0.010736,-0.003803,...,-0.000415,-0.000121,-3.4e-05,-0.210766,-0.006848,-0.002865,-0.001171,-0.000439,-0.000143,-4.3e-05
3,0.10811,-0.005555,-0.00214,-0.000818,-0.000327,-0.000102,-5.9e-05,0.01836,-0.008268,-0.002846,...,-0.000301,-9.1e-05,-2.5e-05,0.86992,-0.005204,-0.002131,-0.000894,-0.000319,-0.000108,-3.5e-05
4,0.125337,-0.009855,-0.003746,-0.001429,-0.000564,-0.000169,-0.000107,0.028989,-0.014817,-0.004962,...,-0.000525,-0.000162,-4.7e-05,0.307544,-0.009145,-0.003752,-0.001547,-0.000578,-0.000191,-6.2e-05
5,0.198539,-0.02715,-0.010059,-0.003856,-0.001525,-0.00046,-0.000273,0.064555,-0.041682,-0.013461,...,-0.001435,-0.000449,-0.000117,0.312652,-0.025091,-0.010056,-0.004151,-0.001543,-0.000509,-0.000159
6,0.016356,-0.049909,-0.009463,-0.003394,-0.001319,-0.000391,-0.000237,0.011397,0.992348,-0.013678,...,-0.001242,-0.000385,-0.000101,0.017636,-0.0403,-0.009568,-0.003609,-0.001324,-0.000439,-0.000139
7,0.011149,-0.142354,-0.01034,-0.003493,-0.001381,-0.000408,-0.000244,0.008083,0.059809,-0.015716,...,-0.001291,-0.000386,-0.000108,0.011981,-0.081506,-0.010336,-0.003759,-0.001374,-0.000448,-0.000144
8,0.008744,0.952237,-0.010245,-0.003419,-0.001317,-0.0004,-0.000236,0.006439,0.035605,-0.016377,...,-0.00123,-0.000383,-0.000101,0.009319,-0.209223,-0.010352,-0.003658,-0.001321,-0.000431,-0.000133
9,0.003487,0.067499,-0.004712,-0.001535,-0.000589,-0.000177,-0.000105,0.002661,0.012659,-0.00782,...,-0.000555,-0.000171,-4.9e-05,0.003735,0.81538,-0.004799,-0.001647,-0.000606,-0.000195,-6e-05


In [13]:
cos_dist = spatial.distance.cdist(eigenvectors, np.eye(cov_df.shape[0]), metric='cosine')
cos_dist =  [np.min(row) for row in cos_dist]
cos_dist


[0.008266655474891471,
 0.01285918304070699,
 0.03903825756085433,
 0.13007950200665086,
 0.22970806406929445,
 0.17771372417236797,
 0.007651657060628425,
 0.020802088306486555,
 0.0477629391196589,
 0.18461995235352624,
 0.3036999700473586,
 0.19385851294165657,
 0.013786637887424624,
 0.024661688218184596,
 0.25681162189704465,
 0.3901386677531318,
 0.24193885863571585,
 0.10430713032299399,
 0.02290226367470083,
 0.041488468486008845,
 0.07001003433778186,
 0.1267380931344657,
 0.20051269187710719,
 0.1723527494553182,
 0.018963238359986034,
 0.06910498430287926,
 0.23316519661558688,
 0.3626219603765415,
 0.28219211941153577,
 0.4087182433013433,
 0.03326888987480359,
 0.06011766124451823,
 0.09862028037609827,
 0.23497984220790213,
 0.3129978568822722,
 0.33481306385463927,
 0.2055337851199993,
 0.3001744311038762,
 0.23014565347265614,
 0.24988111374765276,
 0.1921159423762454,
 0.4367750499591554]