In [1]:
DATA_NAME = 'syntheticMRI2D-sagittal' 
TRANSFORM = 'gabor'
CHANNEL = ''
PARAM_CSV = 'gabor_new.csv' # only use for Gabor

In [2]:
path_list = DATA_NAME.split("-") + TRANSFORM.split("-")
if CHANNEL:
    path_list.append(CHANNEL)
print(f"Name the notebook:\nindependence_{'_'.join(path_list[::-1])}.ipynb")
FULL_DATA_NAME='-'.join(path_list)


import git
from pathlib import Path
import os
CWD = os.getcwd()
ROOT_DIR = Path(git.Repo('.', search_parent_directories=True).working_tree_dir)

Path(os.path.join(CWD, "CSVs")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "plots")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "cache")).mkdir(exist_ok=True)

GROUP = 'layer' if TRANSFORM.split("-")[0] == 'wavelet' else ('band' if TRANSFORM.split("-")[0] == 'fourier' else 'error')
RERUN = False
SKIP_OPTIMIZE_STEP = False

Name the notebook:
independence_gabor_sagittal_syntheticMRI2D.ipynb


In [3]:
os.chdir(os.path.join(ROOT_DIR, "utilities"))
from testing import * # If MATLAB is not installed, open utilities and set to False
from plotting import *
os.chdir(CWD)
np.random.seed(0)

In [4]:
from scipy import spatial
from sklearn.decomposition import PCA

In [5]:
group_data_map = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}.pickle'))
group_total_samples = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}-size.pickle'))
param_df = pd.read_csv(os.path.join(ROOT_DIR, "gabor", PARAM_CSV))

In [6]:
if 'fourier' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)#[::3]
elif 'wavelet' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)
elif 'learned' in TRANSFORM:
    nonskewed_df = pd.read_csv(os.path.join(ROOT_DIR, 'learned-filters', 'nonskewed_filter_idxs_df.csv')).set_index(['dataset', 'num_images', 'num_bootstrap'])
    nonskewed_filter_idxs = eval(nonskewed_df.loc[DATA_NAME].sort_values('num_images', ascending=False)['nonskewed_filter_idxs'].iloc[0]) 
    GROUPS = nonskewed_filter_idxs # can set to filter_group_map.keys() to include all prepared filters
elif 'gabor' in TRANSFORM:
    GROUPS = param_df['index']

In [7]:
n_bootstrap = int(1e5) 
bootstrap_size = int(1e4)

In [8]:
cov_matrix = np.zeros((len(GROUPS), len(GROUPS)))
for _ in tqdm(range(n_bootstrap)):
    X = np.zeros((bootstrap_size, len(GROUPS)))
    for i in range(len(GROUPS)):
        group = GROUPS[i]
        if len(group_data_map[group]) < 100:
            X[:, i] = np.full(bootstrap_size, np.nan)
        else:
            X[:, i] = np.random.choice(group_data_map[group], size=(bootstrap_size), replace=True)
    cov_matrix += np.cov(X, rowvar=False)
cov_matrix /= n_bootstrap

  0%|          | 0/100000 [00:00<?, ?it/s]

In [9]:



# Convert covariance matrix to DataFrame for better formatting
cov_df = pd.DataFrame(cov_matrix, index=GROUPS, columns=GROUPS)
cov_df = cov_df.dropna(how='all').dropna(axis=1, how='all')
cov_df.to_csv(os.path.join(CWD, "CSVs", f'covariance_matrix.csv'))
cov_df.round(2)

index,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,17817.42,0.55,-0.09,0.2,0.03,-0.04,-0.03,-0.81,0.27,-0.23,...,-0.05,0.03,-0.0,0.5,-0.01,-0.21,0.06,0.02,0.0,0.01
1,0.55,9615.43,0.43,-0.23,-0.03,-0.04,-0.01,0.63,-0.1,-0.24,...,-0.0,-0.0,0.0,0.38,-0.11,-0.04,-0.11,0.02,0.01,0.02
2,-0.09,0.43,5046.44,-0.03,0.02,0.03,0.02,0.07,0.3,-0.14,...,0.02,-0.01,0.01,-0.43,0.26,-0.01,-0.13,-0.0,0.02,-0.0
3,0.2,-0.23,-0.03,1729.88,-0.02,0.01,-0.0,-0.28,-0.12,0.0,...,-0.01,-0.01,0.0,0.21,-0.03,0.06,0.03,0.01,-0.01,-0.0
4,0.03,-0.03,0.02,-0.02,404.04,-0.0,0.0,0.02,-0.01,0.09,...,-0.0,0.0,0.0,0.04,0.05,-0.03,0.03,0.01,0.0,-0.0
5,-0.04,-0.04,0.03,0.01,-0.0,70.0,0.0,-0.07,0.03,-0.01,...,-0.0,-0.0,0.0,-0.04,0.0,0.0,0.01,0.0,0.0,-0.0
6,-0.03,-0.01,0.02,-0.0,0.0,0.0,33.08,0.02,0.0,0.02,...,0.0,-0.0,-0.0,-0.02,-0.0,0.0,-0.0,0.0,-0.0,0.0
7,-0.81,0.63,0.07,-0.28,0.02,-0.07,0.02,22268.66,0.01,0.34,...,-0.0,0.0,-0.02,0.22,-0.51,-0.09,-0.19,0.07,0.01,-0.02
8,0.27,-0.1,0.3,-0.12,-0.01,0.03,0.0,0.01,10992.44,0.2,...,-0.03,0.01,0.0,0.33,0.3,-0.31,0.08,0.05,-0.01,-0.01
9,-0.23,-0.24,-0.14,0.0,0.09,-0.01,0.02,0.34,0.2,6058.41,...,0.02,0.0,-0.01,0.08,-0.11,0.1,-0.01,0.01,-0.01,0.0


In [10]:
corr_matrix = cov_df / np.sqrt(np.outer(np.diag(cov_df), np.diag(cov_df)))
corr_matrix.round(5)

index,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1.0,4e-05,-1e-05,4e-05,1e-05,-4e-05,-3e-05,-4e-05,2e-05,-2e-05,...,-3e-05,6e-05,-1e-05,3e-05,-0.0,-3e-05,2e-05,1e-05,0.0,2e-05
1,4e-05,1.0,6e-05,-6e-05,-2e-05,-5e-05,-2e-05,4e-05,-1e-05,-3e-05,...,-0.0,-0.0,0.0,3e-05,-1e-05,-1e-05,-4e-05,2e-05,2e-05,4e-05
2,-1e-05,6e-05,1.0,-1e-05,1e-05,4e-05,4e-05,1e-05,4e-05,-2e-05,...,2e-05,-4e-05,4e-05,-4e-05,4e-05,-0.0,-6e-05,-0.0,6e-05,-1e-05
3,4e-05,-6e-05,-1e-05,1.0,-2e-05,3e-05,-2e-05,-5e-05,-3e-05,0.0,...,-2e-05,-3e-05,2e-05,4e-05,-1e-05,3e-05,2e-05,1e-05,-2e-05,-1e-05
4,1e-05,-2e-05,1e-05,-2e-05,1.0,-2e-05,1e-05,1e-05,-1e-05,6e-05,...,-0.0,4e-05,1e-05,2e-05,3e-05,-3e-05,5e-05,2e-05,4e-05,-4e-05
5,-4e-05,-5e-05,4e-05,3e-05,-2e-05,1.0,0.0,-6e-05,3e-05,-2e-05,...,-2e-05,-3e-05,4e-05,-3e-05,0.0,0.0,4e-05,1e-05,3e-05,-1e-05
6,-3e-05,-2e-05,4e-05,-2e-05,1e-05,0.0,1.0,2e-05,0.0,5e-05,...,4e-05,-1e-05,-3e-05,-2e-05,-0.0,2e-05,-2e-05,4e-05,-2e-05,1e-05
7,-4e-05,4e-05,1e-05,-5e-05,1e-05,-6e-05,2e-05,1.0,0.0,3e-05,...,-0.0,1e-05,-4e-05,1e-05,-4e-05,-1e-05,-4e-05,4e-05,2e-05,-4e-05
8,2e-05,-1e-05,4e-05,-3e-05,-1e-05,3e-05,0.0,0.0,1.0,2e-05,...,-2e-05,1e-05,0.0,2e-05,4e-05,-6e-05,3e-05,3e-05,-1e-05,-1e-05
9,-2e-05,-3e-05,-2e-05,0.0,6e-05,-2e-05,5e-05,3e-05,2e-05,1.0,...,2e-05,1e-05,-3e-05,1e-05,-2e-05,3e-05,-0.0,1e-05,-3e-05,1e-05


In [11]:
np.linalg.norm(cov_df - np.diag(np.diag(cov_df)))  # Remove diagonal elements for visualization

5.6830544909508385

In [12]:
pca = PCA()
pca.fit(cov_df)

print("Singular values (explained variance):")
print(pca.explained_variance_)

print("\nPrincipal components (eigenvectors):")
eigenvectors = pca.components_
eigenvectors_df = pd.DataFrame(eigenvectors)
eigenvectors_df

Singular values (explained variance):
[1.18699158e+07 9.12976907e+06 8.64126144e+06 7.54438961e+06
 6.25255148e+06 5.86978184e+06 2.87325522e+06 2.19211114e+06
 1.34276001e+06 1.10549204e+06 1.03105461e+06 8.99224066e+05
 8.30387136e+05 5.95097895e+05 2.71175641e+05 2.50827347e+05
 1.59183400e+05 1.40311892e+05 1.24604295e+05 6.97982386e+04
 4.37247289e+04 3.48963909e+04 1.63150315e+04 1.48901838e+04
 1.11067803e+04 3.76957022e+03 2.34353155e+03 1.29357040e+03
 7.25171428e+02 5.52420536e+02 4.35085927e+02 1.10652060e+02
 6.34728314e+01 3.37781475e+01 3.00644305e+01 2.27071913e+01
 1.39651992e+01 9.69934869e+00 7.16876250e+00 6.43909267e+00
 5.97630899e+00 2.22020936e-24]

Principal components (eigenvectors):


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
0,-0.043495,-0.009988,-0.004495,-0.001485,-0.000341,-6.2e-05,-2.7e-05,0.993215,-0.01236,-0.005518,...,-0.000131,-1.8e-05,-1.5e-05,-0.063728,-0.006167,-0.002084,-0.000701,-0.000147,-2.1e-05,-1.4e-05
1,-0.067581,-0.00728,-0.003091,-0.001016,-0.000234,-3.8e-05,-2e-05,0.038747,-0.009299,-0.003832,...,-8.7e-05,-1e-05,-9e-06,-0.382822,-0.004264,-0.00139,-0.000462,-0.000107,-1.5e-05,-7e-06
2,-0.214747,-0.016271,-0.006849,-0.002187,-0.000506,-8.9e-05,-4.3e-05,0.069625,-0.020886,-0.008463,...,-0.00019,-2.5e-05,-2e-05,0.902008,-0.009446,-0.003087,-0.001039,-0.000221,-3.2e-05,-2e-05
3,0.960023,-0.019362,-0.007814,-0.002468,-0.000572,-0.000102,-4.9e-05,0.052485,-0.025523,-0.009766,...,-0.000221,-2.7e-05,-2.3e-05,0.153551,-0.010911,-0.003523,-0.001161,-0.00025,-3.6e-05,-2.1e-05
4,0.018913,-0.003825,-0.001441,-0.000441,-9.9e-05,-1.5e-05,-1.1e-05,0.006149,-0.005314,-0.001792,...,-3.6e-05,-5e-06,-6e-06,0.011268,-0.002022,-0.000653,-0.000214,-4.8e-05,-7e-06,-5e-06
5,0.1528,-0.042666,-0.015436,-0.00477,-0.001103,-0.000191,-9e-05,0.05736,-0.06043,-0.019525,...,-0.000425,-5.9e-05,-4.3e-05,0.10188,-0.022024,-0.006797,-0.002248,-0.000484,-6.9e-05,-4.2e-05
6,0.024345,-0.103644,-0.014887,-0.004125,-0.000939,-0.00016,-7.7e-05,0.01609,0.990865,-0.020355,...,-0.000362,-4.8e-05,-3.7e-05,0.02114,-0.024246,-0.005965,-0.0019,-0.000407,-5.9e-05,-3.7e-05
7,0.020727,0.989447,-0.020712,-0.005318,-0.001199,-0.000211,-9.9e-05,0.014517,0.094384,-0.030322,...,-0.000458,-6.2e-05,-4.7e-05,0.018447,-0.037959,-0.007693,-0.002448,-0.000521,-7.3e-05,-4.4e-05
8,0.011002,0.042018,-0.027698,-0.005391,-0.001217,-0.000209,-9.8e-05,0.008179,0.027244,-0.053703,...,-0.000466,-5.8e-05,-4.9e-05,0.010065,-0.09391,-0.008006,-0.002445,-0.000524,-7.3e-05,-4.7e-05
9,0.006391,0.019961,-0.025108,-0.004002,-0.000892,-0.000145,-7.1e-05,0.004826,0.01432,-0.068789,...,-0.000329,-4.4e-05,-3.3e-05,0.005869,-0.381991,-0.00603,-0.001796,-0.00038,-5.6e-05,-3.4e-05


In [13]:
cos_dist = np.diag(spatial.distance.cdist(eigenvectors, np.eye(cov_df.shape[0]), metric='cosine'))
cos_dist = cos_dist = [np.min(row) for row in cos_dist]
cos_dist


[1.0434952664149753,
 1.0072804651148222,
 1.0068493931231473,
 1.0024677573621992,
 1.0000991112723394,
 1.0001909300395366,
 1.0000766977444382,
 0.9854828127809305,
 0.9727556352711116,
 1.06878856880618,
 1.0133967337216565,
 1.0003530776922125,
 1.0012805476415672,
 1.0003132025680186,
 0.9972411292152134,
 0.972483566486586,
 0.9547184327009681,
 1.0157405843041183,
 1.0065514896919114,
 1.00095959189929,
 1.0004786438687179,
 0.9965791032495089,
 0.9998823954557323,
 0.9845648913556452,
 0.9522719404739866,
 1.1301051627526972,
 1.010924989942683,
 1.0047786992156973,
 0.9994407342462697,
 0.9988555798959874,
 0.9931149726871451,
 0.993759600400526,
 0.9698865706039388,
 1.0049830542779934,
 1.0379135073749066,
 0.9997971837478964,
 0.9996614709786796,
 0.9992993083010082,
 0.9992725710761501,
 0.9977882608755847,
 1.020169705000572,
 0.5636091715150655]