In [1]:
DATA_NAME = 'standardTesting-full' 
TRANSFORM = 'learned'
CHANNEL = ''

In [2]:
path_list = DATA_NAME.split("-") + TRANSFORM.split("-")
if CHANNEL:
    path_list.append(CHANNEL)
print(f"Name the notebook:\nindependence_{'_'.join(path_list[::-1])}.ipynb")
FULL_DATA_NAME='-'.join(path_list)


import git
from pathlib import Path
import os
CWD = os.getcwd()
ROOT_DIR = Path(git.Repo('.', search_parent_directories=True).working_tree_dir)

Path(os.path.join(CWD, "CSVs")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "plots")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "cache")).mkdir(exist_ok=True)

GROUP = 'layer' if TRANSFORM.split("-")[0] == 'wavelet' else ('band' if TRANSFORM.split("-")[0] == 'fourier' else 'error')
RERUN = False
SKIP_OPTIMIZE_STEP = False

Name the notebook:
independence_learned_full_standardTesting.ipynb


In [3]:
os.chdir(os.path.join(ROOT_DIR, "utilities"))
from testing import * # If MATLAB is not installed, open utilities and set to False
from plotting import *
os.chdir(CWD)
np.random.seed(0)

In [4]:
from scipy import spatial
from sklearn.decomposition import PCA

In [5]:
group_data_map = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}.pickle'))
group_total_samples = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}-size.pickle'))

In [6]:
if 'fourier' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)#[::3]
elif 'wavelet' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)
elif 'learned' in TRANSFORM:
    nonskewed_df = pd.read_csv(os.path.join(ROOT_DIR, 'learned-filters', 'nonskewed_filter_idxs_df.csv')).set_index(['dataset', 'num_images', 'num_bootstrap'])
    nonskewed_filter_idxs = eval(nonskewed_df.loc[DATA_NAME].sort_values('num_images', ascending=False)['nonskewed_filter_idxs'].iloc[0]) 
    GROUPS = nonskewed_filter_idxs # can set to filter_group_map.keys() to include all prepared filters

In [7]:
n_bootstrap = int(1e5) 
bootstrap_size = int(1e4)


In [8]:

cov_matrix = np.zeros((len(GROUPS), len(GROUPS)))
for _ in tqdm(range(n_bootstrap)):
    X = np.zeros((bootstrap_size, len(GROUPS)))
    for i in range(len(GROUPS)):
        group = GROUPS[i]
        if len(group_data_map[group]) < 100:
            X[:, i] = np.full(bootstrap_size, np.nan)
        else:
            X[:, i] = np.random.choice(group_data_map[group], size=(bootstrap_size), replace=True)
    cov_matrix += np.cov(X, rowvar=False)
cov_matrix /= n_bootstrap

  0%|          | 0/100000 [00:00<?, ?it/s]

In [9]:



# Convert covariance matrix to DataFrame for better formatting
cov_df = pd.DataFrame(cov_matrix, index=GROUPS, columns=GROUPS)
cov_df = cov_df.dropna(how='all').dropna(axis=1, how='all')
cov_df.to_csv(os.path.join(CWD, "CSVs", f'covariance_matrix.csv'))
cov_df.round(2)

Unnamed: 0,0,2,3,4,5,6,7,8,9,10,...,52,54,55,56,57,59,60,61,62,63
0,4.92,-0.0,-0.0,0.0,-0.0,-0.0,-0.0,-0.0,0.0,0.0,...,-0.0,-0.0,-0.0,0.0,-0.0,-0.0,-0.0,-0.0,0.0,0.0
2,-0.0,5.83,-0.0,0.0,0.0,0.0,-0.0,-0.0,-0.0,-0.0,...,0.0,-0.0,0.0,-0.0,-0.0,-0.0,0.0,-0.0,-0.0,-0.0
3,-0.0,-0.0,2.4,0.0,-0.0,0.0,-0.0,-0.0,-0.0,-0.0,...,0.0,0.0,0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.0
4,0.0,0.0,0.0,8.37,-0.0,0.0,0.0,-0.0,0.0,-0.0,...,-0.0,0.0,0.0,0.0,-0.0,0.0,0.0,-0.0,0.0,0.0
5,-0.0,0.0,-0.0,-0.0,6.43,0.0,-0.0,-0.0,-0.0,-0.0,...,0.0,-0.0,0.0,0.0,-0.0,0.0,0.0,-0.0,0.0,0.0
6,-0.0,0.0,0.0,0.0,0.0,2.96,-0.0,-0.0,-0.0,-0.0,...,0.0,-0.0,-0.0,0.0,-0.0,0.0,0.0,-0.0,0.0,-0.0
7,-0.0,-0.0,-0.0,0.0,-0.0,-0.0,2.42,0.0,0.0,-0.0,...,0.0,-0.0,0.0,-0.0,-0.0,-0.0,0.0,-0.0,0.0,-0.0
8,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.0,0.11,-0.0,0.0,...,-0.0,0.0,-0.0,0.0,-0.0,0.0,-0.0,0.0,0.0,-0.0
9,0.0,-0.0,-0.0,0.0,-0.0,-0.0,0.0,-0.0,0.87,0.0,...,-0.0,0.0,-0.0,0.0,-0.0,-0.0,0.0,-0.0,-0.0,0.0
10,0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.0,0.0,2.69,...,-0.0,-0.0,0.0,-0.0,0.0,-0.0,-0.0,-0.0,0.0,-0.0


In [10]:
corr_matrix = cov_df / np.sqrt(np.outer(np.diag(cov_df), np.diag(cov_df)))
corr_matrix.round(5)

Unnamed: 0,0,2,3,4,5,6,7,8,9,10,...,52,54,55,56,57,59,60,61,62,63
0,1.0,-3e-05,-0.0,2e-05,-0.0,-2e-05,-2e-05,-4e-05,4e-05,3e-05,...,-0.0,-3e-05,-2e-05,3e-05,-6e-05,-0.0,-2e-05,-2e-05,0.0,2e-05
2,-3e-05,1.0,-3e-05,2e-05,4e-05,1e-05,-2e-05,-1e-05,-1e-05,-3e-05,...,5e-05,-6e-05,1e-05,-1e-05,-1e-05,-1e-05,1e-05,-1e-05,-0.0,-1e-05
3,-0.0,-3e-05,1.0,0.0,-1e-05,1e-05,-3e-05,-1e-05,-1e-05,-4e-05,...,2e-05,2e-05,5e-05,-1e-05,-2e-05,-2e-05,-2e-05,-2e-05,-5e-05,3e-05
4,2e-05,2e-05,0.0,1.0,-7e-05,3e-05,9e-05,-2e-05,7e-05,-3e-05,...,-2e-05,4e-05,0.0,3e-05,-1e-05,8e-05,3e-05,-1e-05,7e-05,5e-05
5,-0.0,4e-05,-1e-05,-7e-05,1.0,1e-05,-5e-05,-2e-05,-0.0,-5e-05,...,0.0,-1e-05,3e-05,3e-05,-1e-05,2e-05,4e-05,-3e-05,1e-05,4e-05
6,-2e-05,1e-05,1e-05,3e-05,1e-05,1.0,-1e-05,-0.0,-0.0,-2e-05,...,3e-05,-1e-05,-3e-05,2e-05,-1e-05,2e-05,0.0,-3e-05,2e-05,-3e-05
7,-2e-05,-2e-05,-3e-05,9e-05,-5e-05,-1e-05,1.0,0.0,2e-05,-2e-05,...,2e-05,-3e-05,1e-05,-2e-05,-3e-05,-4e-05,4e-05,-3e-05,3e-05,-2e-05
8,-4e-05,-1e-05,-1e-05,-2e-05,-2e-05,-0.0,0.0,1.0,-0.0,0.0,...,-0.0,1e-05,-3e-05,2e-05,-1e-05,1e-05,-3e-05,2e-05,3e-05,-4e-05
9,4e-05,-1e-05,-1e-05,7e-05,-0.0,-0.0,2e-05,-0.0,1.0,1e-05,...,-5e-05,3e-05,-2e-05,2e-05,-7e-05,-1e-05,3e-05,-2e-05,-4e-05,2e-05
10,3e-05,-3e-05,-4e-05,-3e-05,-5e-05,-2e-05,-2e-05,0.0,1e-05,1.0,...,-1e-05,-1e-05,2e-05,-1e-05,3e-05,-2e-05,-3e-05,-2e-05,4e-05,-1e-05


In [11]:
np.linalg.norm(cov_df - np.diag(np.diag(cov_df)))  # Remove diagonal elements for visualization

0.005401541757087389

In [12]:
pca = PCA()
pca.fit(cov_df)

print("Singular values (explained variance):")
print(pca.explained_variance_)

print("\nPrincipal components (eigenvectors):")
eigenvectors = pca.components_
eigenvectors_df = pd.DataFrame(eigenvectors)
eigenvectors_df

Singular values (explained variance):
[1.30399465e+00 7.72432317e-01 7.31440236e-01 6.38237320e-01
 6.22527216e-01 6.07073188e-01 5.72097724e-01 5.06772807e-01
 4.81305746e-01 4.62518845e-01 4.46895710e-01 4.15024771e-01
 4.10147609e-01 3.90972352e-01 3.63173673e-01 3.42739504e-01
 2.97365247e-01 2.31195978e-01 2.20996271e-01 2.11620881e-01
 1.90578262e-01 1.65899398e-01 1.63401855e-01 1.58117046e-01
 1.51567309e-01 1.45054684e-01 1.35092536e-01 1.30356648e-01
 1.23600242e-01 1.17366979e-01 1.09588913e-01 1.07466401e-01
 1.02126643e-01 9.47219754e-02 9.26777087e-02 8.99815073e-02
 8.86055254e-02 8.17235916e-02 7.55825228e-02 7.53895369e-02
 7.17945782e-02 6.97807107e-02 6.42289865e-02 5.65881406e-02
 5.23486298e-02 3.19484398e-02 2.22234976e-02 1.98306934e-02
 1.72838602e-02 1.49268900e-02 1.23595870e-02 3.23021761e-03
 5.85491354e-04 6.13305021e-32]

Principal components (eigenvectors):


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,44,45,46,47,48,49,50,51,52,53
0,-0.013347,-0.020291,-0.004637,0.997368,-0.028572,-0.005976,-0.004604,-0.000204,-0.001531,-0.005335,...,-0.010821,-0.001389,-0.004267,-0.025575,-0.005629,-0.017613,-0.00041,-0.003399,-0.014189,-0.001626
1,-0.016802,-0.047842,-0.003906,0.016693,0.972106,-0.005249,-0.00397,-0.000161,-0.001239,-0.004606,...,-0.011603,-0.001115,-0.003531,-0.212397,-0.00486,-0.031506,-0.000321,-0.002793,-0.019081,-0.001288
2,-0.025802,-0.094813,-0.005606,0.020512,0.193028,-0.007573,-0.005665,-0.000223,-0.001752,-0.006585,...,-0.017309,-0.001594,-0.005079,0.965303,-0.007081,-0.054097,-0.000454,-0.003989,-0.030018,-0.001868
3,-0.017768,0.935893,-0.002977,0.007898,0.02916,-0.004053,-0.002994,-0.000115,-0.000914,-0.003534,...,-0.010267,-0.000835,-0.002665,0.039622,-0.003739,-0.065626,-0.000233,-0.002078,-0.021637,-0.000969
4,-0.004334,0.03606,-0.000617,0.001628,0.005738,-0.000831,-0.000559,-2.7e-05,-0.000193,-0.000739,...,-0.002332,-0.000168,-0.000566,0.00695,-0.000737,-0.016614,-5.6e-05,-0.000464,-0.004944,-0.000179
5,-0.052662,0.269385,-0.007804,0.019085,0.06032,-0.01089,-0.007854,-0.00031,-0.002359,-0.009235,...,-0.029093,-0.002154,-0.007051,0.076244,-0.00997,-0.372165,-0.000628,-0.005441,-0.068364,-0.002504
6,-0.075415,0.147797,-0.009228,0.0197,0.054856,-0.012902,-0.00933,-0.000353,-0.00277,-0.010998,...,-0.037438,-0.002513,-0.008281,0.065869,-0.011885,0.908687,-0.000735,-0.006356,-0.105255,-0.002917
7,-0.093455,0.041433,-0.005785,0.009802,0.02238,-0.008347,-0.005844,-0.000221,-0.001681,-0.006939,...,-0.029741,-0.001508,-0.005246,0.0257,-0.007557,0.06953,-0.000449,-0.003919,-0.234576,-0.001775
8,-0.174707,0.032138,-0.005792,0.008708,0.018996,-0.00828,-0.005728,-0.000205,-0.001678,-0.006868,...,-0.032976,-0.00147,-0.005099,0.021424,-0.007465,0.048359,-0.000423,-0.003867,0.914163,-0.001745
9,-0.515114,0.021533,-0.004506,0.006412,0.013479,-0.006617,-0.004527,-0.000165,-0.001273,-0.005455,...,-0.029317,-0.001126,-0.00399,0.015087,-0.005879,0.03081,-0.000332,-0.00298,0.14207,-0.001364


In [13]:
cos_dist = np.diag(spatial.distance.cdist(eigenvectors, np.eye(cov_df.shape[0]), metric='cosine'))
cos_dist = cos_dist = [np.min(row) for row in cos_dist]
cos_dist


[1.0133465651330347,
 1.0478419692926606,
 1.0056062820137306,
 0.9921024198642558,
 0.9942620912620646,
 1.0108903191060392,
 1.0093300919904236,
 1.000220697804514,
 1.0016775011401406,
 1.0054551800623677,
 1.0128414693161534,
 1.000012209199065,
 0.9956339028284726,
 1.033693663764501,
 0.9892448609654177,
 1.0206918783036447,
 0.9215883377700925,
 1.0060733058227649,
 0.983659452196308,
 1.0244962284762618,
 1.104181449664576,
 1.0025355008316674,
 1.0032958980397835,
 0.9617087858705846,
 1.0037973606560664,
 1.1090727138612357,
 1.0116258456969809,
 1.0262264027626515,
 0.9787287759431637,
 0.9447552875767752,
 1.0025486762983253,
 0.9794366148822644,
 0.9884043241910551,
 0.9981956745044266,
 1.0384091659405674,
 0.9970161944593666,
 1.0308461919800538,
 1.0149620438220188,
 0.9985980869573726,
 1.0012061257260996,
 1.0069395541255608,
 0.9987245842240314,
 1.0092045806390526,
 1.021906277112098,
 1.0078663205542515,
 0.8676580335808031,
 0.973768202759105,
 0.9996208672823863,