In [1]:
DATA_NAME = 'pastis-full' 
TRANSFORM = 'fourier'
CHANNEL = 'red'

In [2]:
path_list = DATA_NAME.split("-") + TRANSFORM.split("-")
if CHANNEL:
    path_list.append(CHANNEL)
print(f"Name the notebook:\nindependence_{'_'.join(path_list[::-1])}.ipynb")
FULL_DATA_NAME='-'.join(path_list)


import git
from pathlib import Path
import os
CWD = os.getcwd()
ROOT_DIR = Path(git.Repo('.', search_parent_directories=True).working_tree_dir)

Path(os.path.join(CWD, "CSVs")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "plots")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "cache")).mkdir(exist_ok=True)

GROUP = 'layer' if TRANSFORM.split("-")[0] == 'wavelet' else ('band' if TRANSFORM.split("-")[0] == 'fourier' else 'error')
RERUN = False
SKIP_OPTIMIZE_STEP = False

Name the notebook:
independence_red_fourier_full_pastis.ipynb


In [3]:
os.chdir(os.path.join(ROOT_DIR, "utilities"))
from testing import * # If MATLAB is not installed, open utilities and set to False
from plotting import *
os.chdir(CWD)
np.random.seed(0)

In [4]:
from scipy import spatial
from sklearn.decomposition import PCA

In [5]:
group_data_map = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}.pickle'))
group_total_samples = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}-size.pickle'))

In [6]:
if 'fourier' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)#[::3]
elif 'wavelet' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)
elif 'learned' in TRANSFORM:
    nonskewed_df = pd.read_csv(os.path.join(ROOT_DIR, 'learned-filters', 'nonskewed_filter_idxs_df.csv')).set_index(['dataset', 'num_images', 'num_bootstrap'])
    nonskewed_filter_idxs = eval(nonskewed_df.loc[DATA_NAME].sort_values('num_images', ascending=False)['nonskewed_filter_idxs'].iloc[0]) 
    GROUPS = nonskewed_filter_idxs # can set to filter_group_map.keys() to include all prepared filters

In [7]:
n_bootstrap = int(1e5) 
bootstrap_size = int(1e4)


In [8]:

cov_matrix = np.zeros((len(GROUPS), len(GROUPS)))
for _ in tqdm(range(n_bootstrap)):
    X = np.zeros((bootstrap_size, len(GROUPS)))
    for i in range(len(GROUPS)):
        group = GROUPS[i]
        X[:, i] = np.random.choice(group_data_map[group], size=(bootstrap_size), replace=True)
    cov_matrix += np.cov(X, rowvar=False)
cov_matrix /= n_bootstrap

  0%|          | 0/100000 [00:00<?, ?it/s]

In [9]:



# Convert covariance matrix to DataFrame for better formatting
cov_df = pd.DataFrame(cov_matrix, index=GROUPS, columns=GROUPS)
cov_df.to_csv(os.path.join(CWD, "CSVs", f'covariance_matrix.csv'))
cov_df.round(2)

Unnamed: 0,2,3,4,5,6,7,8,9,10,11,...,24,25,26,27,28,29,30,31,32,33
2,9.6,-0.0,0.0,-0.0,-0.0,0.0,0.0,-0.0,0.0,-0.0,...,0.0,0.0,0.0,-0.0,0.0,-0.0,0.0,0.0,0.0,-0.0
3,-0.0,7.42,0.0,0.0,0.0,0.0,-0.0,-0.0,0.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,0.0,0.0,0.0,-0.0,-0.0
4,0.0,0.0,6.38,0.0,-0.0,-0.0,0.0,-0.0,0.0,0.0,...,0.0,-0.0,-0.0,-0.0,0.0,-0.0,-0.0,0.0,0.0,0.0
5,-0.0,0.0,0.0,5.09,0.0,-0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,-0.0,-0.0,-0.0,0.0,-0.0,0.0,-0.0,0.0
6,-0.0,0.0,-0.0,0.0,4.23,0.0,0.0,-0.0,-0.0,-0.0,...,-0.0,0.0,-0.0,-0.0,0.0,-0.0,-0.0,0.0,0.0,0.0
7,0.0,0.0,-0.0,-0.0,0.0,3.36,0.0,-0.0,0.0,0.0,...,-0.0,-0.0,-0.0,-0.0,0.0,0.0,-0.0,-0.0,-0.0,0.0
8,0.0,-0.0,0.0,0.0,0.0,0.0,2.78,0.0,0.0,0.0,...,-0.0,0.0,0.0,0.0,-0.0,-0.0,-0.0,0.0,-0.0,-0.0
9,-0.0,-0.0,-0.0,0.0,-0.0,-0.0,0.0,2.32,-0.0,0.0,...,0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.0,0.0,-0.0
10,0.0,0.0,0.0,0.0,-0.0,0.0,0.0,-0.0,1.9,0.0,...,0.0,-0.0,0.0,-0.0,0.0,-0.0,0.0,-0.0,0.0,0.0
11,-0.0,-0.0,0.0,0.0,-0.0,0.0,0.0,0.0,0.0,1.57,...,0.0,-0.0,0.0,-0.0,-0.0,0.0,-0.0,0.0,-0.0,-0.0


In [10]:
corr_matrix = cov_matrix / np.sqrt(np.outer(np.diag(cov_matrix), np.diag(cov_matrix)))
corr_df = pd.DataFrame(corr_matrix, index=GROUPS, columns=GROUPS)
corr_df.round(5)

Unnamed: 0,2,3,4,5,6,7,8,9,10,11,...,24,25,26,27,28,29,30,31,32,33
2,1.0,-7e-05,2e-05,-3e-05,-2e-05,4e-05,3e-05,-4e-05,3e-05,-0.0,...,4e-05,3e-05,4e-05,-5e-05,0.0,-0.0,6e-05,4e-05,7e-05,-2e-05
3,-7e-05,1.0,0.0,2e-05,3e-05,6e-05,-2e-05,-3e-05,4e-05,-0.0,...,-3e-05,-4e-05,-3e-05,-2e-05,-2e-05,0.0,1e-05,3e-05,-2e-05,-6e-05
4,2e-05,0.0,1.0,7e-05,-3e-05,-1e-05,2e-05,-1e-05,2e-05,0.0,...,1e-05,-1e-05,-2e-05,-3e-05,3e-05,-3e-05,-0.0,5e-05,1e-05,5e-05
5,-3e-05,2e-05,7e-05,1.0,4e-05,-2e-05,2e-05,6e-05,1e-05,0.0,...,3e-05,3e-05,-1e-05,-3e-05,-4e-05,4e-05,-0.0,4e-05,-1e-05,1e-05
6,-2e-05,3e-05,-3e-05,4e-05,1.0,0.0,3e-05,-1e-05,-2e-05,-0.0,...,-0.0,1e-05,-0.0,-3e-05,2e-05,-0.0,-0.0,2e-05,1e-05,1e-05
7,4e-05,6e-05,-1e-05,-2e-05,0.0,1.0,2e-05,-0.0,1e-05,0.0,...,-6e-05,-1e-05,-0.0,-1e-05,4e-05,2e-05,-7e-05,-5e-05,-3e-05,8e-05
8,3e-05,-2e-05,2e-05,2e-05,3e-05,2e-05,1.0,7e-05,4e-05,0.0,...,-5e-05,0.0,2e-05,0.0,-7e-05,-1e-05,-0.0,1e-05,-4e-05,-4e-05
9,-4e-05,-3e-05,-1e-05,6e-05,-1e-05,-0.0,7e-05,1.0,-7e-05,2e-05,...,1e-05,-8e-05,-1e-05,-1e-05,-4e-05,-2e-05,-1e-05,1e-05,3e-05,-0.0
10,3e-05,4e-05,2e-05,1e-05,-2e-05,1e-05,4e-05,-7e-05,1.0,5e-05,...,1e-05,-1e-05,1e-05,-6e-05,2e-05,-1e-05,2e-05,-1e-05,0.0,1e-05
11,-0.0,-0.0,0.0,0.0,-0.0,0.0,0.0,2e-05,5e-05,1.0,...,2e-05,-3e-05,1e-05,-1e-05,-3e-05,2e-05,-5e-05,1e-05,-3e-05,-2e-05


In [11]:
np.linalg.norm(cov_matrix - np.diag(np.diag(cov_matrix)))  # Remove diagonal elements for visualization

0.001627953169529512

In [12]:
pca = PCA()
pca.fit(cov_matrix)

print("Singular values (explained variance):")
print(pca.explained_variance_)

print("\nPrincipal components (eigenvectors):")
eigenvectors = pca.components_
eigenvectors_df = pd.DataFrame(eigenvectors)
eigenvectors_df

Singular values (explained variance):
[2.88866480e+00 1.72302862e+00 1.26942382e+00 8.09131424e-01
 5.55711153e-01 3.51056753e-01 2.40850436e-01 1.67238595e-01
 1.12016154e-01 7.66123756e-02 5.12854948e-02 3.76513690e-02
 2.26553271e-02 1.63046058e-02 1.07835906e-02 7.10431858e-03
 4.58396183e-03 2.93650617e-03 1.84233966e-03 1.21461520e-03
 7.22503434e-04 4.30186915e-04 2.51954839e-04 1.47577155e-04
 7.97373089e-05 4.14774027e-05 2.11153066e-05 1.00551780e-05
 2.40524868e-06 6.77744797e-07 1.41713765e-07 5.67244180e-31]

Principal components (eigenvectors):


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,22,23,24,25,26,27,28,29,30,31
0,0.997142,-0.058077,-0.035133,-0.021619,-0.01591,-0.011528,-0.009146,-0.00746,-0.00595,-0.004874,...,-0.000275,-0.000213,-0.000157,-0.000119,-8.4e-05,-6.1e-05,-2.8e-05,-1.6e-05,-7e-06,-4e-06
1,0.052281,0.991634,-0.105186,-0.038867,-0.02489,-0.016659,-0.012822,-0.010187,-0.007994,-0.006494,...,-0.000368,-0.000285,-0.000211,-0.000152,-0.000111,-7.9e-05,-3.8e-05,-2.1e-05,-1.1e-05,-6e-06
2,0.037489,0.097892,0.989848,-0.077952,-0.040614,-0.024675,-0.018156,-0.014129,-0.010963,-0.00881,...,-0.000486,-0.000377,-0.00028,-0.000204,-0.000144,-0.000107,-5.1e-05,-2.8e-05,-1.4e-05,-7e-06
3,0.023573,0.040722,0.06693,0.990386,-0.095853,-0.040017,-0.026372,-0.019327,-0.014551,-0.011448,...,-0.000605,-0.000466,-0.000349,-0.000255,-0.000185,-0.00013,-6.4e-05,-3.5e-05,-1.8e-05,-9e-06
4,0.018818,0.028738,0.039908,0.085597,0.989594,-0.082687,-0.042994,-0.028795,-0.020508,-0.015655,...,-0.000789,-0.000607,-0.000452,-0.00033,-0.000236,-0.000171,-8.3e-05,-4.6e-05,-2.3e-05,-1.1e-05
5,0.013616,0.019346,0.024733,0.039132,0.070002,0.988634,-0.102527,-0.048904,-0.030216,-0.021635,...,-0.000995,-0.00076,-0.000563,-0.00041,-0.000291,-0.000211,-0.000108,-6e-05,-3e-05,-1.3e-05
6,0.011371,0.0157,0.019284,0.02771,0.040835,0.088553,0.985703,-0.112298,-0.049553,-0.031749,...,-0.001262,-0.000963,-0.000712,-0.000518,-0.000381,-0.000272,-0.000133,-7.4e-05,-3.8e-05,-1.9e-05
7,0.009859,0.013296,0.016018,0.021807,0.029733,0.049133,0.096335,0.984613,-0.108511,-0.051835,...,-0.001599,-0.001245,-0.000913,-0.000663,-0.000484,-0.000348,-0.00017,-9.4e-05,-4.6e-05,-2.3e-05
8,0.008071,0.010728,0.012789,0.016925,0.021972,0.032128,0.048485,0.090603,0.983746,-0.118039,...,-0.002001,-0.001544,-0.00114,-0.000837,-0.000596,-0.000434,-0.00021,-0.000119,-5.8e-05,-2.9e-05
9,0.006942,0.009153,0.010808,0.014034,0.017733,0.024489,0.033606,0.049946,0.099183,0.982231,...,-0.002544,-0.00197,-0.001449,-0.001057,-0.000766,-0.000548,-0.000272,-0.00015,-7.6e-05,-3.8e-05


In [13]:
cos_dist = np.diag(spatial.distance.cdist(eigenvectors, np.eye(len(GROUPS)), metric='cosine'))
cos_dist = cos_dist = [np.min(row) for row in cos_dist]
cos_dist


[0.0028576882510686907,
 0.008366147855379147,
 0.010151602433843832,
 0.00961386196550873,
 0.010405877847912803,
 0.011366093577111713,
 0.014296561920583928,
 0.015386982736639743,
 0.016253895255673,
 0.01776905516371763,
 0.02297268089787985,
 0.023484306759236406,
 0.022842675599592743,
 0.027912732045147326,
 0.026324608782103853,
 0.02751093465266452,
 0.02894369793601259,
 0.030953695626092403,
 0.036634215933850967,
 0.03993691039161473,
 0.038086759708615636,
 0.042364038623973754,
 0.04888871440404441,
 0.05402695869085339,
 0.056496341577832454,
 0.06576443110778252,
 0.08436468986880907,
 0.0773954609373928,
 0.05664529937937046,
 0.08480137114925335,
 0.15796187175659127,
 0.13541221563903783]