In [1]:
DATA_NAME = 'segmentAnything-full' 
TRANSFORM = 'gabor'
CHANNEL = 'green'
PARAM_CSV = 'gabor_new.csv' # only use for Gabor

In [2]:
path_list = DATA_NAME.split("-") + TRANSFORM.split("-")
if CHANNEL:
    path_list.append(CHANNEL)
print(f"Name the notebook:\nindependence_{'_'.join(path_list[::-1])}.ipynb")
FULL_DATA_NAME='-'.join(path_list)


import git
from pathlib import Path
import os
CWD = os.getcwd()
ROOT_DIR = Path(git.Repo('.', search_parent_directories=True).working_tree_dir)

Path(os.path.join(CWD, "CSVs")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "plots")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "cache")).mkdir(exist_ok=True)

GROUP = 'layer' if TRANSFORM.split("-")[0] == 'wavelet' else ('band' if TRANSFORM.split("-")[0] == 'fourier' else 'error')
RERUN = False
SKIP_OPTIMIZE_STEP = False

Name the notebook:
independence_green_gabor_full_segmentAnything.ipynb


In [3]:
os.chdir(os.path.join(ROOT_DIR, "utilities"))
from testing import * # If MATLAB is not installed, open utilities and set to False
from plotting import *
os.chdir(CWD)
np.random.seed(0)

In [4]:
from scipy import spatial
from sklearn.decomposition import PCA

In [5]:
group_data_map = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}.pickle'))
group_total_samples = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}-size.pickle'))
param_df = pd.read_csv(os.path.join(ROOT_DIR, "gabor", PARAM_CSV))

In [6]:
if 'fourier' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)#[::3]
elif 'wavelet' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)
elif 'learned' in TRANSFORM:
    nonskewed_df = pd.read_csv(os.path.join(ROOT_DIR, 'learned-filters', 'nonskewed_filter_idxs_df.csv')).set_index(['dataset', 'num_images', 'num_bootstrap'])
    nonskewed_filter_idxs = eval(nonskewed_df.loc[DATA_NAME].sort_values('num_images', ascending=False)['nonskewed_filter_idxs'].iloc[0]) 
    GROUPS = nonskewed_filter_idxs # can set to filter_group_map.keys() to include all prepared filters
elif 'gabor' in TRANSFORM:
    GROUPS = param_df['index']

In [7]:
n_bootstrap = int(1e5) 
bootstrap_size = int(1e4)


In [8]:

cov_matrix = np.zeros((len(GROUPS), len(GROUPS)))
for _ in tqdm(range(n_bootstrap)):
    X = np.zeros((bootstrap_size, len(GROUPS)))
    for i in range(len(GROUPS)):
        group = GROUPS[i]
        if len(group_data_map[group]) < 100:
            X[:, i] = np.full(bootstrap_size, np.nan)
        else:
            X[:, i] = np.random.choice(group_data_map[group], size=(bootstrap_size), replace=True)
    cov_matrix += np.cov(X, rowvar=False)
cov_matrix /= n_bootstrap

  0%|          | 0/100000 [00:00<?, ?it/s]

In [9]:



# Convert covariance matrix to DataFrame for better formatting
cov_df = pd.DataFrame(cov_matrix, index=GROUPS, columns=GROUPS)
cov_df = cov_df.dropna(how='all').dropna(axis=1, how='all')
cov_df.to_csv(os.path.join(CWD, "CSVs", f'covariance_matrix.csv'))
cov_df.round(2)

index,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,41250.32,1.27,-0.22,0.46,0.12,-0.15,-0.13,-2.06,0.65,-0.45,...,-0.19,0.35,-0.02,0.78,-0.1,-0.46,0.21,0.04,0.03,0.05
1,1.27,17333.69,0.63,-0.47,-0.06,-0.1,-0.01,1.42,-0.24,-0.37,...,0.0,-0.0,0.01,0.9,-0.23,-0.11,-0.28,0.07,0.05,0.08
2,-0.22,0.63,7014.1,-0.13,-0.0,0.08,0.04,0.08,0.5,-0.15,...,0.04,-0.04,0.03,-0.64,0.56,-0.02,-0.34,-0.02,0.09,-0.02
3,0.46,-0.47,-0.13,2995.33,-0.05,0.03,-0.02,-0.62,-0.22,0.0,...,-0.05,-0.05,0.02,0.32,-0.03,0.13,0.06,0.04,-0.04,-0.0
4,0.12,-0.06,-0.0,-0.05,1252.77,-0.01,0.01,-0.02,0.0,0.24,...,0.0,0.01,0.01,0.02,0.13,-0.09,0.09,0.01,0.03,-0.02
5,-0.15,-0.1,0.08,0.03,-0.01,453.94,0.0,-0.27,0.09,-0.03,...,-0.02,-0.01,0.01,-0.15,0.02,0.01,0.04,-0.0,0.0,-0.0
6,-0.13,-0.01,0.04,-0.02,0.01,0.0,289.51,0.07,0.0,0.09,...,0.02,-0.0,-0.01,-0.05,-0.01,0.02,-0.03,0.03,-0.01,0.0
7,-2.06,1.42,0.08,-0.62,-0.02,-0.27,0.07,56198.16,-0.09,0.58,...,-0.02,0.14,-0.13,0.42,-1.48,-0.4,-0.56,0.33,0.18,-0.14
8,0.65,-0.24,0.5,-0.22,0.0,0.09,0.0,-0.09,22585.05,0.36,...,-0.12,0.04,0.0,0.61,0.87,-0.74,0.19,0.17,-0.04,-0.04
9,-0.45,-0.37,-0.15,0.0,0.24,-0.03,0.09,0.58,0.36,9643.34,...,0.06,0.04,-0.05,0.15,-0.3,0.25,0.02,0.03,-0.1,0.01


In [10]:
corr_matrix = cov_df / np.sqrt(np.outer(np.diag(cov_df), np.diag(cov_df)))
corr_matrix.round(5)

index,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1.0,5e-05,-1e-05,4e-05,2e-05,-4e-05,-4e-05,-4e-05,2e-05,-2e-05,...,-3e-05,8e-05,-1e-05,2e-05,-0.0,-3e-05,2e-05,1e-05,1e-05,2e-05
1,5e-05,1.0,6e-05,-6e-05,-1e-05,-4e-05,-0.0,5e-05,-1e-05,-3e-05,...,0.0,-0.0,0.0,3e-05,-1e-05,-1e-05,-4e-05,1e-05,2e-05,4e-05
2,-1e-05,6e-05,1.0,-3e-05,-0.0,4e-05,3e-05,0.0,4e-05,-2e-05,...,2e-05,-2e-05,3e-05,-4e-05,5e-05,-0.0,-7e-05,-1e-05,5e-05,-1e-05
3,4e-05,-6e-05,-3e-05,1.0,-3e-05,2e-05,-2e-05,-5e-05,-3e-05,0.0,...,-2e-05,-5e-05,3e-05,3e-05,-0.0,3e-05,2e-05,2e-05,-3e-05,-0.0
4,2e-05,-1e-05,-0.0,-3e-05,1.0,-2e-05,1e-05,-0.0,0.0,7e-05,...,0.0,2e-05,1e-05,0.0,3e-05,-3e-05,4e-05,1e-05,4e-05,-4e-05
5,-4e-05,-4e-05,4e-05,2e-05,-2e-05,1.0,1e-05,-5e-05,3e-05,-2e-05,...,-3e-05,-2e-05,3e-05,-3e-05,1e-05,1e-05,3e-05,-0.0,0.0,-1e-05
6,-4e-05,-0.0,3e-05,-2e-05,1e-05,1e-05,1.0,2e-05,0.0,5e-05,...,4e-05,-1e-05,-2e-05,-2e-05,-1e-05,1e-05,-3e-05,5e-05,-3e-05,1e-05
7,-4e-05,5e-05,0.0,-5e-05,-0.0,-5e-05,2e-05,1.0,-0.0,3e-05,...,-0.0,3e-05,-4e-05,1e-05,-5e-05,-2e-05,-4e-05,4e-05,3e-05,-4e-05
8,2e-05,-1e-05,4e-05,-3e-05,0.0,3e-05,0.0,-0.0,1.0,2e-05,...,-2e-05,1e-05,0.0,2e-05,4e-05,-6e-05,2e-05,3e-05,-1e-05,-2e-05
9,-2e-05,-3e-05,-2e-05,0.0,7e-05,-2e-05,5e-05,3e-05,2e-05,1.0,...,2e-05,2e-05,-4e-05,1e-05,-2e-05,3e-05,0.0,1e-05,-5e-05,0.0


In [11]:
np.linalg.norm(cov_df - np.diag(np.diag(cov_df)))  # Remove diagonal elements for visualization

13.799596199336905

In [12]:
pca = PCA()
pca.fit(cov_df)

print("Singular values (explained variance):")
print(pca.explained_variance_)

print("\nPrincipal components (eigenvectors):")
eigenvectors = pca.components_
eigenvectors_df = pd.DataFrame(eigenvectors)
eigenvectors_df

Singular values (explained variance):
[7.55166932e+07 5.70432715e+07 4.53678904e+07 4.06406932e+07
 3.66206094e+07 3.39108464e+07 1.21624360e+07 9.19563591e+06
 8.57144843e+06 7.18648857e+06 6.69247788e+06 6.11464215e+06
 2.20483385e+06 1.57903591e+06 1.33778812e+06 1.16909176e+06
 1.06543259e+06 9.55287199e+05 3.91769763e+05 2.76070351e+05
 2.45193266e+05 2.13364969e+05 1.93814119e+05 1.62055466e+05
 6.65354860e+04 4.77956495e+04 4.39324170e+04 3.75279443e+04
 3.45782162e+04 2.71967588e+04 1.01367234e+04 7.06863725e+03
 6.32177829e+03 5.09247092e+03 4.83917959e+03 3.58513028e+03
 2.24047867e+03 1.76007411e+03 1.24090772e+03 1.14848749e+03
 1.06316564e+03 5.79767787e-24]

Principal components (eigenvectors):


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
0,-0.032613,-0.006774,-0.002525,-0.001077,-0.000445,-0.000166,-0.000101,0.994944,-0.009593,-0.003514,...,-0.000435,-0.000154,-7.6e-05,-0.039723,-0.007684,-0.002719,-0.001158,-0.000483,-0.000189,-7.8e-05
1,-0.062935,-0.008222,-0.002975,-0.001262,-0.000523,-0.000185,-0.000123,0.06596,-0.011968,-0.004154,...,-0.000502,-0.000177,-8.5e-05,-0.094788,-0.009313,-0.003161,-0.001338,-0.000581,-0.000227,-8.5e-05
2,-0.197143,-0.008405,-0.002957,-0.001225,-0.000512,-0.000188,-0.000119,0.032775,-0.012668,-0.004135,...,-0.000493,-0.000178,-8.4e-05,0.970226,-0.009648,-0.003149,-0.00134,-0.000556,-0.000224,-9e-05
3,0.947401,-0.010237,-0.003532,-0.001458,-0.000608,-0.000226,-0.000145,0.030692,-0.015802,-0.00499,...,-0.000599,-0.000204,-0.000102,0.154744,-0.011857,-0.003796,-0.001577,-0.00067,-0.000264,-0.000103
4,0.101277,-0.007053,-0.002353,-0.000981,-0.000412,-0.000152,-9e-05,0.016543,-0.011149,-0.003356,...,-0.000406,-0.000147,-6.3e-05,0.054612,-0.00817,-0.002515,-0.001056,-0.000444,-0.000174,-6.6e-05
5,0.215837,-0.025867,-0.008522,-0.003513,-0.001465,-0.000529,-0.000339,0.05173,-0.04181,-0.012084,...,-0.001437,-0.000517,-0.000243,0.140373,-0.030094,-0.009151,-0.00381,-0.001618,-0.000639,-0.000248
6,0.017151,-0.04383,-0.007791,-0.003075,-0.001261,-0.000453,-0.000291,0.01059,0.990545,-0.011879,...,-0.001239,-0.000442,-0.000207,0.015588,-0.069585,-0.008544,-0.003299,-0.001382,-0.000549,-0.000214
7,0.008595,-0.06315,-0.005944,-0.002245,-0.000948,-0.000335,-0.000212,0.005559,0.047297,-0.009399,...,-0.00092,-0.000316,-0.000154,0.007997,-0.354285,-0.006438,-0.002434,-0.001015,-0.000397,-0.000159
8,0.01574,-0.175614,-0.011921,-0.004502,-0.001846,-0.000666,-0.000426,0.010352,0.073207,-0.019256,...,-0.00181,-0.000635,-0.000309,0.014559,0.913223,-0.013133,-0.004865,-0.002039,-0.000796,-0.000313
9,0.009023,0.925359,-0.008789,-0.003283,-0.001331,-0.000487,-0.000306,0.006055,0.032552,-0.014883,...,-0.001301,-0.000465,-0.000219,0.0084,0.087532,-0.009762,-0.00354,-0.00146,-0.00057,-0.000219


In [13]:
cos_dist = spatial.distance.cdist(eigenvectors, np.eye(cov_df.shape[0]), metric='cosine')
cos_dist =  [np.min(row) for row in cos_dist]
cos_dist


[0.005055529223811583,
 0.010974213353034279,
 0.029774495123392697,
 0.05259944527109861,
 0.1566971109193923,
 0.16812300642693045,
 0.009454691690012118,
 0.07074302666137633,
 0.08677677846150789,
 0.074641374892912,
 0.29098560431665244,
 0.35193598286767513,
 0.010062741119513774,
 0.026557135224072215,
 0.050026449951668495,
 0.084005986537952,
 0.28336919978943986,
 0.3449845714496892,
 0.01526405334289982,
 0.05383796259878326,
 0.08682215883279532,
 0.1538592979736474,
 0.24157756426217725,
 0.17143283812576315,
 0.02499659578004676,
 0.14149394226231393,
 0.19626018909030096,
 0.210509593857388,
 0.3089136176217543,
 0.31741640102667723,
 0.037116489283385734,
 0.1667856309990029,
 0.23136610841039185,
 0.23468605678008803,
 0.2253046546152322,
 0.46955513018879336,
 0.18850476180943931,
 0.2690994804105983,
 0.23216121684882796,
 0.35546840331602647,
 0.19956220378888756,
 0.584591272943916]