In [1]:
DATA_NAME = 'segmentAnything-full' 
TRANSFORM = 'gabor'
CHANNEL = 'gray'
PARAM_CSV = 'gabor_new.csv' # only use for Gabor

In [2]:
path_list = DATA_NAME.split("-") + TRANSFORM.split("-")
if CHANNEL:
    path_list.append(CHANNEL)
print(f"Name the notebook:\nindependence_{'_'.join(path_list[::-1])}.ipynb")
FULL_DATA_NAME='-'.join(path_list)


import git
from pathlib import Path
import os
CWD = os.getcwd()
ROOT_DIR = Path(git.Repo('.', search_parent_directories=True).working_tree_dir)

Path(os.path.join(CWD, "CSVs")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "plots")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "cache")).mkdir(exist_ok=True)

GROUP = 'layer' if TRANSFORM.split("-")[0] == 'wavelet' else ('band' if TRANSFORM.split("-")[0] == 'fourier' else 'error')
RERUN = False
SKIP_OPTIMIZE_STEP = False

Name the notebook:
independence_gray_gabor_full_segmentAnything.ipynb


In [3]:
os.chdir(os.path.join(ROOT_DIR, "utilities"))
from testing import * # If MATLAB is not installed, open utilities and set to False
from plotting import *
os.chdir(CWD)
np.random.seed(0)

In [4]:
from scipy import spatial
from sklearn.decomposition import PCA

In [5]:
group_data_map = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}.pickle'))
group_total_samples = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}-size.pickle'))
param_df = pd.read_csv(os.path.join(ROOT_DIR, "gabor", PARAM_CSV))

In [6]:
if 'fourier' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)#[::3]
elif 'wavelet' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)
elif 'learned' in TRANSFORM:
    nonskewed_df = pd.read_csv(os.path.join(ROOT_DIR, 'learned-filters', 'nonskewed_filter_idxs_df.csv')).set_index(['dataset', 'num_images', 'num_bootstrap'])
    nonskewed_filter_idxs = eval(nonskewed_df.loc[DATA_NAME].sort_values('num_images', ascending=False)['nonskewed_filter_idxs'].iloc[0]) 
    GROUPS = nonskewed_filter_idxs # can set to filter_group_map.keys() to include all prepared filters
elif 'gabor' in TRANSFORM:
    GROUPS = param_df['index']

In [7]:
n_bootstrap = int(1e5) 
bootstrap_size = int(1e4)


In [8]:

cov_matrix = np.zeros((len(GROUPS), len(GROUPS)))
for _ in tqdm(range(n_bootstrap)):
    X = np.zeros((bootstrap_size, len(GROUPS)))
    for i in range(len(GROUPS)):
        group = GROUPS[i]
        if len(group_data_map[group]) < 100:
            X[:, i] = np.full(bootstrap_size, np.nan)
        else:
            X[:, i] = np.random.choice(group_data_map[group], size=(bootstrap_size), replace=True)
    cov_matrix += np.cov(X, rowvar=False)
cov_matrix /= n_bootstrap

  0%|          | 0/100000 [00:00<?, ?it/s]

In [9]:



# Convert covariance matrix to DataFrame for better formatting
cov_df = pd.DataFrame(cov_matrix, index=GROUPS, columns=GROUPS)
cov_df = cov_df.dropna(how='all').dropna(axis=1, how='all')
cov_df.to_csv(os.path.join(CWD, "CSVs", f'covariance_matrix.csv'))
cov_df.round(2)

index,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,39241.18,1.19,-0.21,0.44,0.12,-0.15,-0.13,-1.98,0.64,-0.43,...,-0.17,0.34,-0.02,0.77,-0.07,-0.45,0.2,0.03,0.02,0.05
1,1.19,16581.21,0.62,-0.45,-0.06,-0.1,-0.01,1.37,-0.23,-0.36,...,0.0,-0.0,0.01,0.9,-0.2,-0.11,-0.29,0.06,0.05,0.07
2,-0.21,0.62,6968.56,-0.12,0.0,0.07,0.04,0.08,0.49,-0.14,...,0.04,-0.04,0.03,-0.64,0.54,-0.02,-0.34,-0.01,0.09,-0.02
3,0.44,-0.45,-0.12,2952.46,-0.05,0.03,-0.02,-0.61,-0.22,0.0,...,-0.04,-0.06,0.02,0.32,-0.04,0.13,0.06,0.04,-0.04,-0.0
4,0.12,-0.06,0.0,-0.05,1229.97,-0.01,0.01,-0.02,0.0,0.24,...,-0.0,0.01,0.01,0.01,0.13,-0.09,0.09,0.01,0.04,-0.02
5,-0.15,-0.1,0.07,0.03,-0.01,452.41,0.0,-0.27,0.09,-0.03,...,-0.02,-0.01,0.01,-0.15,0.02,0.01,0.04,-0.0,0.0,-0.0
6,-0.13,-0.01,0.04,-0.02,0.01,0.0,276.68,0.06,0.0,0.09,...,0.02,-0.0,-0.01,-0.05,-0.01,0.02,-0.03,0.03,-0.01,0.0
7,-1.98,1.37,0.08,-0.61,-0.02,-0.27,0.06,53970.94,-0.09,0.56,...,-0.02,0.15,-0.13,0.44,-1.51,-0.39,-0.56,0.32,0.17,-0.13
8,0.64,-0.23,0.49,-0.22,0.0,0.09,0.0,-0.09,22259.4,0.36,...,-0.12,0.04,0.0,0.6,0.83,-0.73,0.21,0.17,-0.04,-0.04
9,-0.43,-0.36,-0.14,0.0,0.24,-0.03,0.09,0.56,0.36,9502.77,...,0.06,0.04,-0.05,0.16,-0.29,0.25,0.01,0.03,-0.1,0.01


In [10]:
corr_matrix = cov_df / np.sqrt(np.outer(np.diag(cov_df), np.diag(cov_df)))
corr_matrix.round(5)

index,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1.0,5e-05,-1e-05,4e-05,2e-05,-4e-05,-4e-05,-4e-05,2e-05,-2e-05,...,-3e-05,8e-05,-1e-05,2e-05,-0.0,-3e-05,2e-05,0.0,1e-05,2e-05
1,5e-05,1.0,6e-05,-6e-05,-1e-05,-4e-05,-0.0,5e-05,-1e-05,-3e-05,...,0.0,-0.0,0.0,3e-05,-1e-05,-1e-05,-4e-05,1e-05,2e-05,4e-05
2,-1e-05,6e-05,1.0,-3e-05,0.0,4e-05,3e-05,0.0,4e-05,-2e-05,...,1e-05,-2e-05,3e-05,-4e-05,5e-05,-0.0,-7e-05,-0.0,5e-05,-1e-05
3,4e-05,-6e-05,-3e-05,1.0,-3e-05,2e-05,-2e-05,-5e-05,-3e-05,0.0,...,-2e-05,-5e-05,3e-05,3e-05,-1e-05,3e-05,2e-05,2e-05,-3e-05,-1e-05
4,2e-05,-1e-05,0.0,-3e-05,1.0,-2e-05,1e-05,-0.0,0.0,7e-05,...,-0.0,2e-05,1e-05,0.0,3e-05,-3e-05,4e-05,1e-05,5e-05,-4e-05
5,-4e-05,-4e-05,4e-05,2e-05,-2e-05,1.0,0.0,-5e-05,3e-05,-2e-05,...,-3e-05,-2e-05,3e-05,-3e-05,1e-05,1e-05,4e-05,-0.0,0.0,-1e-05
6,-4e-05,-0.0,3e-05,-2e-05,1e-05,0.0,1.0,2e-05,0.0,5e-05,...,4e-05,-1e-05,-3e-05,-1e-05,-1e-05,1e-05,-3e-05,5e-05,-3e-05,1e-05
7,-4e-05,5e-05,0.0,-5e-05,-0.0,-5e-05,2e-05,1.0,-0.0,2e-05,...,-0.0,3e-05,-4e-05,1e-05,-5e-05,-2e-05,-4e-05,4e-05,3e-05,-4e-05
8,2e-05,-1e-05,4e-05,-3e-05,0.0,3e-05,0.0,-0.0,1.0,2e-05,...,-2e-05,1e-05,0.0,2e-05,4e-05,-6e-05,2e-05,3e-05,-1e-05,-2e-05
9,-2e-05,-3e-05,-2e-05,0.0,7e-05,-2e-05,5e-05,2e-05,2e-05,1.0,...,2e-05,2e-05,-4e-05,1e-05,-2e-05,3e-05,0.0,1e-05,-4e-05,0.0


In [11]:
np.linalg.norm(cov_df - np.diag(np.diag(cov_df)))  # Remove diagonal elements for visualization

13.506271674104834

In [12]:
pca = PCA()
pca.fit(cov_df)

print("Singular values (explained variance):")
print(pca.explained_variance_)

print("\nPrincipal components (eigenvectors):")
eigenvectors = pca.components_
eigenvectors_df = pd.DataFrame(eigenvectors)
eigenvectors_df

Singular values (explained variance):
[6.96793854e+07 5.24259852e+07 4.75118726e+07 3.78654870e+07
 3.75587734e+07 3.45429084e+07 1.18065497e+07 8.54073650e+06
 7.63155137e+06 6.74384400e+06 6.60060677e+06 5.93618578e+06
 2.13987986e+06 1.40286705e+06 1.30423496e+06 1.15405610e+06
 1.06123787e+06 9.46297195e+05 3.70485577e+05 2.60678795e+05
 2.44773901e+05 2.06866642e+05 1.90486217e+05 1.62869875e+05
 6.14075826e+04 4.56048547e+04 3.70051399e+04 3.61375657e+04
 3.31823331e+04 2.58436350e+04 1.00332512e+04 6.63535515e+03
 6.10099316e+03 5.13985942e+03 4.95953827e+03 3.60341476e+03
 2.25189930e+03 1.67206747e+03 1.25708271e+03 1.12100345e+03
 1.01677126e+03 5.30669804e-24]

Principal components (eigenvectors):


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
0,-0.030865,-0.00659,-0.002557,-0.001081,-0.0004445396,-0.000168,-9.850052e-05,0.994376,-0.009726,-0.003531,...,-0.000412,-0.000165,-7.5e-05,-0.053378,-0.007313,-0.002741,-0.001188,-0.00044,-0.000184,-8e-05
1,-0.048867,-0.006673,-0.002512,-0.00106,-0.0004361474,-0.000156,-0.0001003502,0.05298,-0.010177,-0.003483,...,-0.000397,-0.000157,-7e-05,-0.21789,-0.007375,-0.002654,-0.001142,-0.000444,-0.000184,-7.2e-05
2,-0.096584,-0.00993,-0.003708,-0.001529,-0.0006361653,-0.000237,-0.0001447226,0.056115,-0.01538,-0.005138,...,-0.000582,-0.000236,-0.000103,0.96329,-0.011048,-0.003932,-0.001702,-0.000631,-0.00027,-0.000114
3,-0.370857,-0.001541,-0.000529,-0.000227,-9.712028e-05,-3.5e-05,-1.486285e-05,0.004531,-0.002495,-0.000759,...,-9e-05,-4.3e-05,-1.1e-05,0.012214,-0.00169,-0.000552,-0.000248,-8.9e-05,-3.7e-05,-1.4e-05
4,0.709388,5.5e-05,6e-06,2e-06,2.762564e-07,-6e-06,-8.856155e-07,4.5e-05,7.9e-05,-1.5e-05,...,-3e-06,8e-06,1e-06,6.9e-05,-3e-06,3e-06,1e-05,5e-06,3e-06,2e-06
5,0.587151,-0.026812,-0.009428,-0.003859,-0.001603987,-0.000591,-0.0003623433,0.066676,-0.044705,-0.013242,...,-0.001493,-0.000603,-0.000261,0.142556,-0.030123,-0.010072,-0.004276,-0.001612,-0.000679,-0.00028
6,0.01887,-0.04034,-0.00811,-0.003169,-0.001295212,-0.000472,-0.0002910599,0.011305,0.992102,-0.012244,...,-0.001206,-0.000486,-0.00021,0.015003,-0.055146,-0.008842,-0.003479,-0.001293,-0.000549,-0.000227
7,0.011186,-0.075423,-0.00787,-0.002931,-0.00122336,-0.000441,-0.0002686177,0.007132,0.052324,-0.012438,...,-0.001129,-0.000441,-0.000198,0.009288,-0.199828,-0.008513,-0.003251,-0.001203,-0.000502,-0.000213
8,0.011645,-0.159447,-0.009562,-0.003534,-0.001435774,-0.000527,-0.0003232724,0.007606,0.04427,-0.015585,...,-0.001337,-0.000531,-0.000239,0.009685,0.948641,-0.010519,-0.003909,-0.001451,-0.000604,-0.000252
9,0.00188,-0.611054,-0.001763,-0.000605,-0.0002432416,-8.9e-05,-5.818139e-05,0.001187,0.005746,-0.002867,...,-0.000237,-9e-05,-4.1e-05,0.001525,0.023313,-0.00195,-0.000674,-0.000254,-0.000105,-4.8e-05


In [13]:
cos_dist = spatial.distance.cdist(eigenvectors, np.eye(cov_df.shape[0]), metric='cosine')
cos_dist =  [np.min(row) for row in cos_dist]
cos_dist


[0.005623845810334371,
 0.029417361661789876,
 0.036709802314139006,
 0.15237926817031489,
 0.29061156742525385,
 0.412566460169302,
 0.007897646358434929,
 0.03054308289316865,
 0.051359105745791056,
 0.21532152450137398,
 0.2700645258636327,
 0.34382098679994877,
 0.008949850740203491,
 0.07143543347277359,
 0.10348345323693708,
 0.10113662061656015,
 0.28262693348033807,
 0.35318809026446085,
 0.01562765506716668,
 0.1952014427897466,
 0.23885445370995373,
 0.11965549918167939,
 0.2687746018319107,
 0.3151384988271768,
 0.025314656943897895,
 0.04841535968724331,
 0.25409711462085727,
 0.257287934969751,
 0.2623897023246927,
 0.3513241485633146,
 0.035647356377384365,
 0.25490096749413604,
 0.34368725844375103,
 0.1845624167993387,
 0.26016337133964185,
 0.5280984927101976,
 0.15730203978637458,
 0.19414258058199507,
 0.1886950967203963,
 0.29226345688062005,
 0.13960221576154996,
 0.5753689721388716]