In [1]:
DATA_NAME = 'pastis-full' 
TRANSFORM = 'gabor'
CHANNEL = 'blue'
PARAM_CSV = 'gabor_new.csv' # only use for Gabor

In [2]:
path_list = DATA_NAME.split("-") + TRANSFORM.split("-")
if CHANNEL:
    path_list.append(CHANNEL)
print(f"Name the notebook:\nindependence_{'_'.join(path_list[::-1])}.ipynb")
FULL_DATA_NAME='-'.join(path_list)


import git
from pathlib import Path
import os
CWD = os.getcwd()
ROOT_DIR = Path(git.Repo('.', search_parent_directories=True).working_tree_dir)

Path(os.path.join(CWD, "CSVs")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "plots")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "cache")).mkdir(exist_ok=True)

GROUP = 'layer' if TRANSFORM.split("-")[0] == 'wavelet' else ('band' if TRANSFORM.split("-")[0] == 'fourier' else 'error')
RERUN = False
SKIP_OPTIMIZE_STEP = False

Name the notebook:
independence_blue_gabor_full_pastis.ipynb


In [3]:
os.chdir(os.path.join(ROOT_DIR, "utilities"))
from testing import * # If MATLAB is not installed, open utilities and set to False
from plotting import *
os.chdir(CWD)
np.random.seed(0)

In [4]:
from scipy import spatial
from sklearn.decomposition import PCA

In [5]:
group_data_map = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}.pickle'))
group_total_samples = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}-size.pickle'))
param_df = pd.read_csv(os.path.join(ROOT_DIR, "gabor", PARAM_CSV))

In [6]:
if 'fourier' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)#[::3]
elif 'wavelet' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)
elif 'learned' in TRANSFORM:
    nonskewed_df = pd.read_csv(os.path.join(ROOT_DIR, 'learned-filters', 'nonskewed_filter_idxs_df.csv')).set_index(['dataset', 'num_images', 'num_bootstrap'])
    nonskewed_filter_idxs = eval(nonskewed_df.loc[DATA_NAME].sort_values('num_images', ascending=False)['nonskewed_filter_idxs'].iloc[0]) 
    GROUPS = nonskewed_filter_idxs # can set to filter_group_map.keys() to include all prepared filters
elif 'gabor' in TRANSFORM:
    GROUPS = param_df['index']

In [7]:
n_bootstrap = int(1e5) 
bootstrap_size = int(1e4)


In [8]:

cov_matrix = np.zeros((len(GROUPS), len(GROUPS)))
for _ in tqdm(range(n_bootstrap)):
    X = np.zeros((bootstrap_size, len(GROUPS)))
    for i in range(len(GROUPS)):
        group = GROUPS[i]
        if len(group_data_map[group]) < 100:
            X[:, i] = np.full(bootstrap_size, np.nan)
        else:
            X[:, i] = np.random.choice(group_data_map[group], size=(bootstrap_size), replace=True)
    cov_matrix += np.cov(X, rowvar=False)
cov_matrix /= n_bootstrap

  0%|          | 0/100000 [00:00<?, ?it/s]

In [9]:



# Convert covariance matrix to DataFrame for better formatting
cov_df = pd.DataFrame(cov_matrix, index=GROUPS, columns=GROUPS)
cov_df = cov_df.dropna(how='all').dropna(axis=1, how='all')
cov_df.to_csv(os.path.join(CWD, "CSVs", f'covariance_matrix.csv'))
cov_df.round(2)

index,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,66.02,0.0,-0.0,0.0,0.0,-0.0,-0.0,-0.0,0.0,-0.0,...,-0.0,0.0,-0.0,0.0,0.0,-0.0,0.0,-0.0,-0.0,0.0
1,0.0,26.31,0.0,-0.0,-0.0,-0.0,-0.0,0.0,-0.0,-0.0,...,-0.0,0.0,-0.0,0.0,-0.0,-0.0,-0.0,0.0,0.0,0.0
2,-0.0,0.0,9.57,-0.0,0.0,0.0,0.0,-0.0,0.0,-0.0,...,0.0,-0.0,0.0,-0.0,0.0,-0.0,-0.0,-0.0,0.0,-0.0
3,0.0,-0.0,-0.0,3.65,-0.0,0.0,-0.0,-0.0,-0.0,-0.0,...,-0.0,-0.0,0.0,0.0,-0.0,0.0,0.0,0.0,-0.0,-0.0
4,0.0,-0.0,0.0,-0.0,1.25,-0.0,0.0,-0.0,-0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,-0.0,0.0,0.0,0.0,-0.0
5,-0.0,-0.0,0.0,0.0,-0.0,0.31,0.0,-0.0,0.0,-0.0,...,-0.0,-0.0,0.0,-0.0,0.0,0.0,0.0,0.0,-0.0,-0.0
6,-0.0,-0.0,0.0,-0.0,0.0,0.0,0.17,0.0,0.0,0.0,...,0.0,-0.0,-0.0,-0.0,-0.0,0.0,-0.0,0.0,-0.0,0.0
7,-0.0,0.0,-0.0,-0.0,-0.0,-0.0,0.0,87.43,0.0,0.0,...,-0.0,0.0,-0.0,0.0,-0.0,-0.0,-0.0,0.0,0.0,-0.0
8,0.0,-0.0,0.0,-0.0,-0.0,0.0,0.0,0.0,35.73,0.0,...,-0.0,0.0,0.0,0.0,0.0,-0.0,0.0,0.0,-0.0,-0.0
9,-0.0,-0.0,-0.0,-0.0,0.0,-0.0,0.0,0.0,0.0,13.97,...,0.0,0.0,-0.0,-0.0,-0.0,0.0,0.0,0.0,-0.0,-0.0


In [10]:
corr_matrix = cov_df / np.sqrt(np.outer(np.diag(cov_df), np.diag(cov_df)))
corr_matrix.round(5)

index,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1.0,4e-05,-1e-05,4e-05,2e-05,-4e-05,-4e-05,-4e-05,2e-05,-2e-05,...,-2e-05,7e-05,-1e-05,3e-05,0.0,-3e-05,2e-05,-1e-05,-0.0,1e-05
1,4e-05,1.0,5e-05,-7e-05,-0.0,-3e-05,-0.0,5e-05,-1e-05,-3e-05,...,-0.0,0.0,-0.0,3e-05,-0.0,-0.0,-3e-05,2e-05,2e-05,4e-05
2,-1e-05,5e-05,1.0,-3e-05,0.0,3e-05,2e-05,-0.0,4e-05,-1e-05,...,1e-05,-2e-05,2e-05,-4e-05,4e-05,-1e-05,-8e-05,-1e-05,4e-05,-2e-05
3,4e-05,-7e-05,-3e-05,1.0,-2e-05,2e-05,-3e-05,-5e-05,-3e-05,-0.0,...,-2e-05,-5e-05,3e-05,2e-05,-3e-05,3e-05,2e-05,3e-05,-4e-05,-0.0
4,2e-05,-0.0,0.0,-2e-05,1.0,-2e-05,1e-05,-0.0,-0.0,7e-05,...,0.0,2e-05,1e-05,2e-05,3e-05,-4e-05,4e-05,2e-05,5e-05,-4e-05
5,-4e-05,-3e-05,3e-05,2e-05,-2e-05,1.0,1e-05,-5e-05,3e-05,-1e-05,...,-3e-05,-2e-05,3e-05,-4e-05,0.0,1e-05,3e-05,0.0,-0.0,-1e-05
6,-4e-05,-0.0,2e-05,-3e-05,1e-05,1e-05,1.0,2e-05,0.0,5e-05,...,3e-05,-1e-05,-2e-05,-2e-05,-1e-05,1e-05,-3e-05,4e-05,-3e-05,1e-05
7,-4e-05,5e-05,-0.0,-5e-05,-0.0,-5e-05,2e-05,1.0,0.0,3e-05,...,-1e-05,3e-05,-3e-05,1e-05,-4e-05,-1e-05,-4e-05,4e-05,3e-05,-4e-05
8,2e-05,-1e-05,4e-05,-3e-05,-0.0,3e-05,0.0,0.0,1.0,3e-05,...,-2e-05,1e-05,0.0,3e-05,3e-05,-6e-05,2e-05,4e-05,-2e-05,-2e-05
9,-2e-05,-3e-05,-1e-05,-0.0,7e-05,-1e-05,5e-05,3e-05,3e-05,1.0,...,2e-05,2e-05,-4e-05,-0.0,-2e-05,3e-05,0.0,2e-05,-4e-05,-0.0


In [11]:
np.linalg.norm(cov_df - np.diag(np.diag(cov_df)))  # Remove diagonal elements for visualization

0.02070433641075783

In [12]:
pca = PCA()
pca.fit(cov_df)

print("Singular values (explained variance):")
print(pca.explained_variance_)

print("\nPrincipal components (eigenvectors):")
eigenvectors = pca.components_
eigenvectors_df = pd.DataFrame(eigenvectors)
eigenvectors_df

Singular values (explained variance):
[1.82747075e+02 1.24867182e+02 1.12424731e+02 1.06885393e+02
 1.01536954e+02 8.99799340e+01 3.03832494e+01 2.01809684e+01
 1.68268347e+01 1.61140370e+01 1.44890935e+01 1.28969229e+01
 4.61342591e+00 2.74903294e+00 2.20140955e+00 2.08822177e+00
 1.98051367e+00 1.74314499e+00 6.18807530e-01 3.79849848e-01
 3.16244907e-01 2.78723383e-01 2.46528597e-01 2.10787090e-01
 7.68198469e-02 4.34868344e-02 3.65890647e-02 3.20481423e-02
 2.91581339e-02 2.29578961e-02 6.92538953e-03 3.65494382e-03
 2.77055427e-03 2.32774233e-03 2.11666333e-03 1.48923726e-03
 7.77564983e-04 5.29276064e-04 2.14417951e-04 1.56123978e-04
 1.11243236e-04 3.09225304e-29]

Principal components (eigenvectors):


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
0,-0.03637,-0.006623,-0.002224,-0.000851,-0.000287,-7.5e-05,-3.9e-05,0.99572,-0.009891,-0.003277,...,-0.000248,-6.4e-05,-1.6e-05,-0.041645,-0.006659,-0.002176,-0.0008,-0.000264,-7.7e-05,-1.7e-05
1,-0.093546,-0.006349,-0.002043,-0.000783,-0.000263,-6.4e-05,-3.8e-05,0.036776,-0.009957,-0.003031,...,-0.000223,-5.7e-05,-1.3e-05,-0.163113,-0.006327,-0.001967,-0.000716,-0.00025,-7.4e-05,-1.3e-05
2,-0.219797,-0.005599,-0.001787,-0.000668,-0.000225,-5.9e-05,-3.2e-05,0.024038,-0.008929,-0.00265,...,-0.000191,-5.1e-05,-1.1e-05,0.929573,-0.005606,-0.001725,-0.000638,-0.000207,-6.3e-05,-1.4e-05
3,-0.659379,-0.001697,-0.000525,-0.000195,-6.6e-05,-1.3e-05,-9e-06,0.006193,-0.002733,-0.000761,...,-5.7e-05,-1.9e-05,-4e-06,0.054549,-0.001633,-0.000505,-0.000194,-6.3e-05,-2e-05,-5e-06
4,0.660631,-0.014823,-0.004628,-0.001719,-0.000584,-0.000147,-8.6e-05,0.049358,-0.024257,-0.006904,...,-0.000508,-0.000131,-3.2e-05,0.262647,-0.014777,-0.004517,-0.001631,-0.000552,-0.000165,-3.4e-05
5,0.262688,-0.023355,-0.007078,-0.002638,-0.000899,-0.000226,-0.000125,0.058779,-0.039471,-0.010636,...,-0.000784,-0.000209,-4.6e-05,0.185269,-0.023211,-0.006894,-0.002493,-0.00084,-0.000249,-5e-05
6,0.018404,-0.041343,-0.007175,-0.002584,-0.000871,-0.000215,-0.000122,0.011873,0.994167,-0.011515,...,-0.000757,-0.0002,-4.5e-05,0.017286,-0.040646,-0.007057,-0.002417,-0.000808,-0.000243,-5e-05
7,0.011086,-0.11661,-0.00774,-0.002658,-0.000915,-0.000226,-0.000125,0.0076,0.047422,-0.013125,...,-0.000789,-0.000201,-4.8e-05,0.010592,-0.110818,-0.007485,-0.002506,-0.000842,-0.000245,-5.3e-05
8,0.001273,0.759534,-0.001152,-0.000407,-0.000137,-3.6e-05,-1.8e-05,0.000865,0.004522,-0.002064,...,-0.000116,-3.4e-05,-6e-06,0.001222,-0.649732,-0.001099,-0.000383,-0.000117,-3.6e-05,-5e-06
9,0.012501,0.582617,-0.011731,-0.003991,-0.001326,-0.000337,-0.000186,0.008791,0.040703,-0.021113,...,-0.001154,-0.000302,-7.1e-05,0.011945,0.700187,-0.011475,-0.003732,-0.001248,-0.000364,-7.5e-05


In [13]:
cos_dist = spatial.distance.cdist(eigenvectors, np.eye(cov_df.shape[0]), metric='cosine')
cos_dist =  [np.min(row) for row in cos_dist]
cos_dist


[0.004279511863340191,
 0.025195727349802133,
 0.07042685845221197,
 0.2510475167389934,
 0.33936932865182157,
 0.09956861349423052,
 0.005833374447340889,
 0.018578881738303132,
 0.24046607988837843,
 0.299812659149289,
 0.14527348864318013,
 0.12901290772798668,
 0.0065418600179458775,
 0.01917335960119171,
 0.0979534910704184,
 0.17522789126922367,
 0.29174827132883496,
 0.35670351443997594,
 0.010148988515115143,
 0.030668108149822015,
 0.07047037325029426,
 0.11330060396072583,
 0.21514606012457183,
 0.2340514685968188,
 0.014580163409078861,
 0.046525815613155275,
 0.09541953705456441,
 0.16129961601312748,
 0.2995689169324318,
 0.35120450683248194,
 0.02181920520120617,
 0.04948319442839533,
 0.09502640138755569,
 0.23353144963402983,
 0.26399608110275774,
 0.39033841376259526,
 0.24207973819989226,
 0.3234147400697124,
 0.18264098546094631,
 0.3151781227414251,
 0.24346022726405792,
 0.45075256729770563]