In [1]:
DATA_NAME = 'syntheticMRI2D-coronal' 
TRANSFORM = 'gabor'
CHANNEL = ''
PARAM_CSV = 'gabor_new.csv' # only use for Gabor

In [2]:
path_list = DATA_NAME.split("-") + TRANSFORM.split("-")
if CHANNEL:
    path_list.append(CHANNEL)
print(f"Name the notebook:\nindependence_{'_'.join(path_list[::-1])}.ipynb")
FULL_DATA_NAME='-'.join(path_list)


import git
from pathlib import Path
import os
CWD = os.getcwd()
ROOT_DIR = Path(git.Repo('.', search_parent_directories=True).working_tree_dir)

Path(os.path.join(CWD, "CSVs")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "plots")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "cache")).mkdir(exist_ok=True)

GROUP = 'layer' if TRANSFORM.split("-")[0] == 'wavelet' else ('band' if TRANSFORM.split("-")[0] == 'fourier' else 'error')
RERUN = False
SKIP_OPTIMIZE_STEP = False

Name the notebook:
independence_gabor_coronal_syntheticMRI2D.ipynb


In [3]:
os.chdir(os.path.join(ROOT_DIR, "utilities"))
from testing import * # If MATLAB is not installed, open utilities and set to False
from plotting import *
os.chdir(CWD)
np.random.seed(0)

In [4]:
from scipy import spatial
from sklearn.decomposition import PCA

In [5]:
group_data_map = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}.pickle'))
group_total_samples = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}-size.pickle'))
param_df = pd.read_csv(os.path.join(ROOT_DIR, "gabor", PARAM_CSV))

In [6]:
if 'fourier' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)#[::3]
elif 'wavelet' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)
elif 'learned' in TRANSFORM:
    nonskewed_df = pd.read_csv(os.path.join(ROOT_DIR, 'learned-filters', 'nonskewed_filter_idxs_df.csv')).set_index(['dataset', 'num_images', 'num_bootstrap'])
    nonskewed_filter_idxs = eval(nonskewed_df.loc[DATA_NAME].sort_values('num_images', ascending=False)['nonskewed_filter_idxs'].iloc[0]) 
    GROUPS = nonskewed_filter_idxs # can set to filter_group_map.keys() to include all prepared filters
elif 'gabor' in TRANSFORM:
    GROUPS = param_df['index']

In [7]:
n_bootstrap = int(1e5) 
bootstrap_size = int(1e4)


In [8]:

cov_matrix = np.zeros((len(GROUPS), len(GROUPS)))
for _ in tqdm(range(n_bootstrap)):
    X = np.zeros((bootstrap_size, len(GROUPS)))
    for i in range(len(GROUPS)):
        group = GROUPS[i]
        if len(group_data_map[group]) < 100:
            X[:, i] = np.full(bootstrap_size, np.nan)
        else:
            X[:, i] = np.random.choice(group_data_map[group], size=(bootstrap_size), replace=True)
    cov_matrix += np.cov(X, rowvar=False)
cov_matrix /= n_bootstrap

  0%|          | 0/100000 [00:00<?, ?it/s]

In [9]:



# Convert covariance matrix to DataFrame for better formatting
cov_df = pd.DataFrame(cov_matrix, index=GROUPS, columns=GROUPS)
cov_df = cov_df.dropna(how='all').dropna(axis=1, how='all')
cov_df.to_csv(os.path.join(CWD, "CSVs", f'covariance_matrix.csv'))
cov_df.round(2)

index,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,24108.47,0.63,-0.09,0.17,0.02,-0.04,-0.02,-0.96,0.38,-0.22,...,-0.04,0.05,-0.0,0.65,-0.02,-0.26,0.04,0.03,0.0,0.01
1,0.63,11879.04,0.44,-0.19,-0.02,-0.04,-0.01,0.8,-0.07,-0.28,...,-0.0,0.0,0.0,0.42,-0.15,-0.06,-0.11,0.0,0.01,0.01
2,-0.09,0.44,4272.79,-0.03,0.01,0.02,0.01,0.02,0.34,-0.14,...,0.01,-0.01,0.01,-0.43,0.26,-0.0,-0.09,-0.0,0.02,-0.0
3,0.17,-0.19,-0.03,1078.94,-0.01,0.01,-0.0,-0.27,-0.09,0.01,...,-0.01,-0.0,0.0,0.16,-0.02,0.04,0.02,-0.0,-0.0,-0.0
4,0.02,-0.02,0.01,-0.01,218.22,-0.0,0.0,0.02,-0.02,0.07,...,0.0,0.0,0.0,0.03,0.04,-0.02,0.02,0.0,0.0,-0.0
5,-0.04,-0.04,0.02,0.01,-0.0,39.82,0.0,-0.06,0.02,-0.01,...,0.0,-0.0,0.0,-0.03,0.0,-0.0,0.01,0.0,0.0,-0.0
6,-0.02,-0.01,0.01,-0.0,0.0,0.0,19.31,0.02,0.0,0.02,...,0.0,-0.0,-0.0,-0.01,-0.0,0.0,-0.0,0.0,-0.0,0.0
7,-0.96,0.8,0.02,-0.27,0.02,-0.06,0.02,29662.07,0.05,0.47,...,0.02,0.01,-0.02,0.27,-0.7,-0.13,-0.16,0.05,0.03,-0.02
8,0.38,-0.07,0.34,-0.09,-0.02,0.02,0.0,0.05,14517.2,0.2,...,-0.02,0.01,-0.0,0.42,0.38,-0.33,0.07,0.04,-0.0,-0.0
9,-0.22,-0.28,-0.14,0.01,0.07,-0.01,0.02,0.47,0.2,6166.91,...,0.01,0.0,-0.01,0.13,-0.13,0.09,-0.01,0.0,-0.01,0.0


In [10]:
corr_matrix = cov_df / np.sqrt(np.outer(np.diag(cov_df), np.diag(cov_df)))
corr_matrix.round(5)

index,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1.0,4e-05,-1e-05,3e-05,1e-05,-4e-05,-3e-05,-4e-05,2e-05,-2e-05,...,-3e-05,7e-05,-1e-05,3e-05,-0.0,-4e-05,1e-05,2e-05,1e-05,2e-05
1,4e-05,1.0,6e-05,-5e-05,-1e-05,-5e-05,-2e-05,4e-05,-1e-05,-3e-05,...,-0.0,0.0,0.0,3e-05,-2e-05,-1e-05,-4e-05,0.0,2e-05,4e-05
2,-1e-05,6e-05,1.0,-1e-05,1e-05,5e-05,4e-05,0.0,4e-05,-3e-05,...,2e-05,-3e-05,3e-05,-4e-05,5e-05,-0.0,-6e-05,-0.0,6e-05,-1e-05
3,3e-05,-5e-05,-1e-05,1.0,-3e-05,3e-05,-2e-05,-5e-05,-2e-05,0.0,...,-3e-05,-3e-05,2e-05,3e-05,-1e-05,3e-05,2e-05,-1e-05,-2e-05,-1e-05
4,1e-05,-1e-05,1e-05,-3e-05,1.0,-2e-05,1e-05,1e-05,-1e-05,6e-05,...,0.0,3e-05,1e-05,1e-05,3e-05,-3e-05,5e-05,2e-05,3e-05,-4e-05
5,-4e-05,-5e-05,5e-05,3e-05,-2e-05,1.0,0.0,-6e-05,3e-05,-2e-05,...,0.0,-4e-05,4e-05,-3e-05,1e-05,-0.0,4e-05,1e-05,3e-05,-1e-05
6,-3e-05,-2e-05,4e-05,-2e-05,1e-05,0.0,1.0,2e-05,0.0,5e-05,...,4e-05,-1e-05,-3e-05,-2e-05,-0.0,2e-05,-2e-05,4e-05,-2e-05,1e-05
7,-4e-05,4e-05,0.0,-5e-05,1e-05,-6e-05,2e-05,1.0,0.0,4e-05,...,1e-05,2e-05,-3e-05,1e-05,-5e-05,-2e-05,-4e-05,3e-05,3e-05,-5e-05
8,2e-05,-1e-05,4e-05,-2e-05,-1e-05,3e-05,0.0,0.0,1.0,2e-05,...,-3e-05,1e-05,-0.0,2e-05,4e-05,-6e-05,3e-05,4e-05,-1e-05,-1e-05
9,-2e-05,-3e-05,-3e-05,0.0,6e-05,-2e-05,5e-05,4e-05,2e-05,1.0,...,1e-05,1e-05,-3e-05,1e-05,-2e-05,2e-05,-1e-05,0.0,-4e-05,1e-05


In [11]:
np.linalg.norm(cov_df - np.diag(np.diag(cov_df)))  # Remove diagonal elements for visualization

6.728023496981773

In [12]:
pca = PCA()
pca.fit(cov_df)

print("Singular values (explained variance):")
print(pca.explained_variance_)

print("\nPrincipal components (eigenvectors):")
eigenvectors = pca.components_
eigenvectors_df = pd.DataFrame(eigenvectors)
eigenvectors_df

Singular values (explained variance):
[2.10369158e+07 1.44089540e+07 1.39026210e+07 1.29527559e+07
 9.97586447e+06 9.33658094e+06 4.99949617e+06 3.34462438e+06
 1.89284990e+06 1.69780862e+06 1.23628795e+06 1.06755605e+06
 8.87439555e+05 4.30808561e+05 2.91163678e+05 2.41218531e+05
 1.14078087e+05 1.05434863e+05 7.68178702e+04 2.72752164e+04
 1.85667091e+04 1.09217863e+04 5.80598976e+03 4.53558943e+03
 3.74519264e+03 1.09509059e+03 4.87902355e+02 2.29527081e+02
 1.45993222e+02 9.48421359e+01 6.49687840e+01 3.57113934e+01
 2.13482462e+01 1.29965758e+01 1.12541390e+01 9.77775666e+00
 8.70115125e+00 5.25249421e+00 2.24846913e+00 1.73848922e+00
 1.32979975e+00 4.02580253e-24]

Principal components (eigenvectors):


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
0,-0.049992,-0.00953,-0.002941,-0.000738,-0.000147,-2.9e-05,-1.2e-05,0.995499,-0.012945,-0.004328,...,-3.6e-05,-1.2e-05,-5e-06,-0.043844,-0.005205,-0.001458,-0.000341,-4.2e-05,-1.3e-05,-6e-06
1,-0.456638,-0.004719,-0.001337,-0.00034,-6.8e-05,-1e-05,-6e-06,0.017978,-0.006861,-0.001982,...,-1.5e-05,-5e-06,-2e-06,-0.106821,-0.002383,-0.000635,-0.000149,-2.3e-05,-7e-06,-2e-06
2,0.804065,-0.010276,-0.002882,-0.000708,-0.000143,-2.7e-05,-1.3e-05,0.035715,-0.015043,-0.004328,...,-3.7e-05,-1e-05,-5e-06,-0.469605,-0.005184,-0.001417,-0.000319,-4.3e-05,-1.3e-05,-4e-06
3,0.361531,-0.022858,-0.006296,-0.001528,-0.000308,-5.8e-05,-2.9e-05,0.063961,-0.034046,-0.00941,...,-7.5e-05,-2.4e-05,-1e-05,0.866641,-0.011337,-0.003066,-0.000712,-9e-05,-2.9e-05,-1.1e-05
4,0.025248,-0.008003,-0.001992,-0.000475,-9.5e-05,-1.6e-05,-1e-05,0.011448,-0.013262,-0.002998,...,-2.2e-05,-8e-06,-4e-06,0.02942,-0.003644,-0.000974,-0.000223,-3e-05,-1e-05,-4e-06
5,0.097521,-0.039377,-0.009399,-0.002257,-0.000456,-8.3e-05,-4e-05,0.047784,-0.067713,-0.014328,...,-0.000114,-4e-05,-1.5e-05,0.111581,-0.017451,-0.004536,-0.001046,-0.000135,-4.3e-05,-1.5e-05
6,0.025266,-0.073423,-0.009006,-0.002095,-0.000421,-7.5e-05,-3.7e-05,0.017349,0.993358,-0.014554,...,-0.000106,-3.6e-05,-1.4e-05,0.026706,-0.018555,-0.004252,-0.000954,-0.000121,-4e-05,-1.5e-05
7,0.018032,0.993243,-0.011913,-0.002661,-0.000532,-0.0001,-4.8e-05,0.013257,0.065712,-0.020773,...,-0.000131,-4.5e-05,-1.8e-05,0.018834,-0.028249,-0.005399,-0.001221,-0.000156,-4.9e-05,-1.7e-05
8,0.008293,0.03271,-0.012487,-0.002446,-0.0005,-9.1e-05,-4.3e-05,0.006401,0.018991,-0.027104,...,-0.000126,-3.9e-05,-1.7e-05,0.008639,-0.048922,-0.005086,-0.001119,-0.000144,-4.5e-05,-1.8e-05
9,0.01255,0.044347,-0.022272,-0.004204,-0.000848,-0.000148,-7.4e-05,0.009761,0.027494,-0.052097,...,-0.000203,-6.9e-05,-2.7e-05,0.013006,-0.109349,-0.008807,-0.001915,-0.000248,-8e-05,-3e-05


In [13]:
cos_dist = spatial.distance.cdist(eigenvectors, np.eye(cov_df.shape[0]), metric='cosine')
cos_dist =  [np.min(row) for row in cos_dist]
cos_dist


[0.0045014829955334346,
 0.11746744281242216,
 0.19593480080378156,
 0.13335878317269956,
 0.1685078679243701,
 0.17908326483005832,
 0.006642044866791541,
 0.006757120859179233,
 0.03857518914008673,
 0.044214317732694575,
 0.03400730345903191,
 0.050105974473495074,
 0.03463010368389208,
 0.00922260116465401,
 0.02708103691252506,
 0.02790425293672283,
 0.1844781674518342,
 0.1991168655547445,
 0.021395276110985795,
 0.010225255737656758,
 0.013266571598859134,
 0.01111040539056185,
 0.027885846374526624,
 0.08072714177736806,
 0.07880936548482176,
 0.006866581201749988,
 0.009469678370164103,
 0.01845349621299852,
 0.029942346334916103,
 0.043109013449386446,
 0.04485668785436214,
 0.03660524876952198,
 0.045338114917163885,
 0.10239882048170657,
 0.18849065061704984,
 0.26497115421354656,
 0.24022634109119556,
 0.48061855544860665,
 0.21975600677548868,
 0.3746265092242773,
 0.2616897194967358,
 0.4906121043615681]