In [1]:
DATA_NAME = 'agriVision-full' 
TRANSFORM = 'fourier'
CHANNEL = 'gray'

In [2]:
path_list = DATA_NAME.split("-") + TRANSFORM.split("-")
if CHANNEL:
    path_list.append(CHANNEL)
print(f"Name the notebook:\nindependence_{'_'.join(path_list[::-1])}.ipynb")
FULL_DATA_NAME='-'.join(path_list)


import git
from pathlib import Path
import os
CWD = os.getcwd()
ROOT_DIR = Path(git.Repo('.', search_parent_directories=True).working_tree_dir)

Path(os.path.join(CWD, "CSVs")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "plots")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "cache")).mkdir(exist_ok=True)

GROUP = 'layer' if TRANSFORM.split("-")[0] == 'wavelet' else ('band' if TRANSFORM.split("-")[0] == 'fourier' else 'error')
RERUN = False
SKIP_OPTIMIZE_STEP = False

Name the notebook:
independence_gray_fourier_full_agriVision.ipynb


In [3]:
os.chdir(os.path.join(ROOT_DIR, "utilities"))
from testing import * # If MATLAB is not installed, open utilities and set to False
from plotting import *
os.chdir(CWD)
np.random.seed(0)

In [4]:
from scipy import spatial
from sklearn.decomposition import PCA

In [5]:
group_data_map = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}.pickle'))
group_total_samples = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}-size.pickle'))

In [6]:
if 'fourier' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)#[::3]
elif 'wavelet' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)
elif 'learned' in TRANSFORM:
    nonskewed_df = pd.read_csv(os.path.join(ROOT_DIR, 'learned-filters', 'nonskewed_filter_idxs_df.csv')).set_index(['dataset', 'num_images', 'num_bootstrap'])
    nonskewed_filter_idxs = eval(nonskewed_df.loc[DATA_NAME].sort_values('num_images', ascending=False)['nonskewed_filter_idxs'].iloc[0]) 
    GROUPS = nonskewed_filter_idxs # can set to filter_group_map.keys() to include all prepared filters

In [7]:
n_bootstrap = int(1e5) 
bootstrap_size = int(1e4)


In [8]:

cov_matrix = np.zeros((len(GROUPS), len(GROUPS)))
for _ in tqdm(range(n_bootstrap)):
    X = np.zeros((bootstrap_size, len(GROUPS)))
    for i in range(len(GROUPS)):
        group = GROUPS[i]
        X[:, i] = np.random.choice(group_data_map[group], size=(bootstrap_size), replace=True)
    cov_matrix += np.cov(X, rowvar=False)
cov_matrix /= n_bootstrap

  0%|          | 0/100000 [00:00<?, ?it/s]

In [9]:



# Convert covariance matrix to DataFrame for better formatting
cov_df = pd.DataFrame(cov_matrix, index=GROUPS, columns=GROUPS)
cov_df.to_csv(os.path.join(CWD, "CSVs", f'covariance_matrix.csv'))
cov_df.round(2)

Unnamed: 0,2,3,4,5,6,7,8,9,10,11,...,24,25,26,27,28,29,30,31,32,33
2,1020.3,0.03,-0.07,-0.01,0.03,0.0,-0.0,-0.01,0.0,-0.02,...,-0.0,-0.0,-0.0,0.0,0.0,0.0,-0.0,-0.0,-0.0,-0.0
3,0.03,1306.59,0.01,0.0,0.05,0.02,0.01,0.02,0.01,0.01,...,-0.0,-0.0,-0.0,-0.0,0.0,0.0,-0.0,0.0,-0.0,0.0
4,-0.07,0.01,1131.26,-0.02,-0.02,-0.0,0.03,-0.01,0.01,0.01,...,-0.01,0.0,0.0,-0.0,0.0,-0.0,0.0,0.0,0.0,-0.0
5,-0.01,0.0,-0.02,612.9,-0.02,0.01,-0.02,-0.0,-0.01,-0.01,...,0.0,0.0,0.0,0.0,-0.0,-0.0,-0.0,-0.0,0.0,0.0
6,0.03,0.05,-0.02,-0.02,434.06,-0.01,-0.0,-0.01,-0.01,-0.01,...,-0.0,0.0,0.0,-0.0,0.0,-0.0,0.0,-0.0,-0.0,-0.0
7,0.0,0.02,-0.0,0.01,-0.01,277.95,-0.0,0.0,0.01,0.0,...,0.0,0.0,-0.0,0.0,-0.0,0.0,0.0,0.0,-0.0,-0.0
8,-0.0,0.01,0.03,-0.02,-0.0,-0.0,207.91,-0.0,0.0,0.01,...,-0.0,-0.0,-0.0,0.0,-0.0,-0.0,0.0,-0.0,-0.0,-0.0
9,-0.01,0.02,-0.01,-0.0,-0.01,0.0,-0.0,187.78,-0.0,0.01,...,0.0,-0.0,-0.0,-0.0,-0.0,0.0,-0.0,0.0,0.0,-0.0
10,0.0,0.01,0.01,-0.01,-0.01,0.01,0.0,-0.0,135.48,-0.0,...,0.0,-0.0,0.0,-0.0,-0.0,0.0,0.0,-0.0,-0.0,0.0
11,-0.02,0.01,0.01,-0.01,-0.01,0.0,0.01,0.01,-0.0,119.43,...,0.0,-0.0,-0.0,0.0,-0.0,0.0,-0.0,0.0,-0.0,-0.0


In [10]:
corr_matrix = cov_matrix / np.sqrt(np.outer(np.diag(cov_matrix), np.diag(cov_matrix)))
corr_df = pd.DataFrame(corr_matrix, index=GROUPS, columns=GROUPS)
corr_df.round(5)

Unnamed: 0,2,3,4,5,6,7,8,9,10,11,...,24,25,26,27,28,29,30,31,32,33
2,1.0,3e-05,-6e-05,-2e-05,4e-05,1e-05,-1e-05,-3e-05,1e-05,-5e-05,...,-1e-05,-1e-05,-7e-05,7e-05,3e-05,0.0,-1e-05,-6e-05,-1e-05,-1e-05
3,3e-05,1.0,1e-05,0.0,6e-05,4e-05,2e-05,4e-05,3e-05,1e-05,...,-4e-05,-0.0,-3e-05,-1e-05,5e-05,2e-05,-1e-05,2e-05,-1e-05,3e-05
4,-6e-05,1e-05,1.0,-2e-05,-3e-05,-0.0,6e-05,-1e-05,2e-05,3e-05,...,-7e-05,2e-05,5e-05,-2e-05,0.0,-7e-05,2e-05,6e-05,2e-05,-1e-05
5,-2e-05,0.0,-2e-05,1.0,-3e-05,1e-05,-4e-05,-1e-05,-3e-05,-5e-05,...,3e-05,2e-05,3e-05,0.0,-1e-05,-2e-05,-1e-05,-2e-05,0.00011,2e-05
6,4e-05,6e-05,-3e-05,-3e-05,1.0,-2e-05,-1e-05,-3e-05,-3e-05,-2e-05,...,-2e-05,2e-05,4e-05,-4e-05,2e-05,-0.0,3e-05,-3e-05,-3e-05,-2e-05
7,1e-05,4e-05,-0.0,1e-05,-2e-05,1.0,-1e-05,1e-05,4e-05,2e-05,...,1e-05,6e-05,-1e-05,5e-05,-5e-05,2e-05,1e-05,3e-05,-3e-05,-1e-05
8,-1e-05,2e-05,6e-05,-4e-05,-1e-05,-1e-05,1.0,-2e-05,2e-05,4e-05,...,-2e-05,-4e-05,-0.0,0.0,-1e-05,-2e-05,3e-05,-2e-05,-2e-05,-5e-05
9,-3e-05,4e-05,-1e-05,-1e-05,-3e-05,1e-05,-2e-05,1.0,-1e-05,3e-05,...,3e-05,-1e-05,-4e-05,-1e-05,-1e-05,1e-05,-0.0,5e-05,3e-05,-1e-05
10,1e-05,3e-05,2e-05,-3e-05,-3e-05,4e-05,2e-05,-1e-05,1.0,-3e-05,...,2e-05,-0.0,3e-05,-6e-05,-1e-05,5e-05,2e-05,-3e-05,-5e-05,3e-05
11,-5e-05,1e-05,3e-05,-5e-05,-2e-05,2e-05,4e-05,3e-05,-3e-05,1.0,...,1e-05,-2e-05,-2e-05,3e-05,-1e-05,3e-05,-2e-05,1e-05,-3e-05,-3e-05


In [11]:
np.linalg.norm(cov_matrix - np.diag(np.diag(cov_matrix)))  # Remove diagonal elements for visualization

0.18447363510073617

In [12]:
pca = PCA()
pca.fit(cov_matrix)

print("Singular values (explained variance):")
print(pca.explained_variance_)

print("\nPrincipal components (eigenvectors):")
eigenvectors = pca.components_
eigenvectors_df = pd.DataFrame(eigenvectors)
eigenvectors_df

Singular values (explained variance):
[5.36043060e+04 4.00803913e+04 3.22920442e+04 1.17087980e+04
 5.86347146e+03 2.40829444e+03 1.35325430e+03 1.09902476e+03
 9.12186800e+02 5.68858387e+02 4.36855493e+02 2.35957354e+02
 1.89275352e+02 7.90189620e+01 5.20420671e+01 4.66215505e+01
 2.80312157e+01 2.02331863e+01 1.40734284e+01 9.05220331e+00
 6.82893193e+00 5.37453473e+00 2.78672578e+00 1.22848442e+00
 4.70647747e-01 3.20835955e-01 6.50717190e-03 4.12989276e-03
 5.07359320e-04 1.89478940e-04 3.93626031e-06 1.05713622e-26]

Principal components (eigenvectors):


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,22,23,24,25,26,27,28,29,30,31
0,-0.056603,0.992915,-0.102148,-0.016442,-0.010115,-0.006033,-0.004429,-0.003965,-0.002836,-0.002498,...,-0.000208,-0.000451,-8.4e-05,-7.4e-05,-9e-06,-8e-06,-3e-06,-2e-06,-3.214483e-07,-6.868177e-08
1,-0.164478,0.090967,0.98173,-0.022903,-0.013355,-0.007717,-0.005575,-0.005034,-0.003574,-0.003131,...,-0.000264,-0.000561,-0.0001,-9.4e-05,-1.2e-05,-1.2e-05,-3e-06,-2e-06,-2.913187e-07,-2.044218e-07
2,0.983921,0.071182,0.156823,-0.037781,-0.02052,-0.011585,-0.008357,-0.007504,-0.005303,-0.004679,...,-0.000383,-0.000834,-0.000157,-0.000133,-1.7e-05,-1.6e-05,-5e-06,-4e-06,-5.906565e-07,-2.961228e-07
3,0.031064,0.020045,0.02548,0.997156,-0.051355,-0.020041,-0.013442,-0.011824,-0.008123,-0.007089,...,-0.000557,-0.001229,-0.000223,-0.000201,-2.7e-05,-2.4e-05,-8e-06,-6e-06,-2.75859e-07,-3.460193e-07
4,0.018055,0.013009,0.015743,0.048277,0.996809,-0.040583,-0.022888,-0.019571,-0.012669,-0.010892,...,-0.000832,-0.001816,-0.000329,-0.000301,-3.8e-05,-3.5e-05,-1.1e-05,-9e-06,-1.279886e-06,-6.764268e-07
5,0.009818,0.007424,0.008739,0.018925,0.03553,0.99493,-0.06157,-0.044312,-0.022336,-0.01838,...,-0.001229,-0.002695,-0.000495,-0.000437,-6e-05,-5.1e-05,-1.7e-05,-1.2e-05,-1.929304e-06,-9.526135e-07
6,0.006134,0.004707,0.005459,0.011062,0.017799,0.047319,0.979347,-0.168758,-0.034413,-0.025839,...,-0.001421,-0.003146,-0.000568,-0.000507,-6.8e-05,-6.1e-05,-1.9e-05,-1.5e-05,-2.17399e-06,-1.334538e-06
7,0.006183,0.004733,0.005532,0.010928,0.017135,0.039126,0.138171,0.957204,-0.05249,-0.036611,...,-0.00176,-0.003926,-0.000714,-0.000635,-8.5e-05,-7.4e-05,-2.5e-05,-1.7e-05,-2.255902e-06,-1.448167e-06
8,0.007922,0.006124,0.007111,0.013898,0.021363,0.044752,0.109904,0.212015,-0.107442,-0.066996,...,-0.002757,-0.006117,-0.00111,-0.000989,-0.000129,-0.000117,-3.8e-05,-2.6e-05,-3.899363e-06,-1.827689e-06
9,0.005165,0.004004,0.004647,0.008899,0.013217,0.024154,0.04214,0.055348,0.974998,-0.184102,...,-0.002914,-0.006544,-0.001164,-0.001056,-0.000138,-0.00012,-4e-05,-3e-05,-4.585331e-06,-1.852083e-06


In [13]:
cos_dist = np.diag(spatial.distance.cdist(eigenvectors, np.eye(len(GROUPS)), metric='cosine'))
cos_dist = cos_dist = [np.min(row) for row in cos_dist]
cos_dist


[1.0566032489976813,
 0.9090330573058893,
 0.8431773258574845,
 0.002844069813893846,
 0.003191406363617033,
 0.005069813768808551,
 0.020652773374305156,
 0.04279586502973998,
 1.107441951926659,
 1.1841024072874962,
 1.0637357999829158,
 1.0350577078701917,
 1.0431576322443177,
 1.1021278828094343,
 1.0144150892391757,
 1.0926057006512058,
 1.0364654460298837,
 1.0805933340603908,
 1.0813870595793484,
 0.9942614037409322,
 0.9921832416925753,
 1.0845206015613065,
 0.042142732800395266,
 0.963175061738689,
 0.12037266545671588,
 0.12600130351111372,
 0.12815253158652384,
 0.13493572382720664,
 0.08005503400277514,
 0.0829632963480249,
 0.11637497233949279,
 0.11561542407514735]