In [1]:
DATA_NAME = 'agriVision-full' 
TRANSFORM = 'fourier'
CHANNEL = 'gray'

In [2]:
path_list = DATA_NAME.split("-") + TRANSFORM.split("-")
if CHANNEL:
    path_list.append(CHANNEL)
print(f"Name the notebook:\nindependence_{'_'.join(path_list[::-1])}.ipynb")
FULL_DATA_NAME='-'.join(path_list)


import git
from pathlib import Path
import os
CWD = os.getcwd()
ROOT_DIR = Path(git.Repo('.', search_parent_directories=True).working_tree_dir)

Path(os.path.join(CWD, "CSVs")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "plots")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "cache")).mkdir(exist_ok=True)

GROUP = 'layer' if TRANSFORM.split("-")[0] == 'wavelet' else ('band' if TRANSFORM.split("-")[0] == 'fourier' else 'error')
RERUN = False
SKIP_OPTIMIZE_STEP = False

Name the notebook:
independence_gray_fourier_full_agriVision.ipynb


In [3]:
os.chdir(os.path.join(ROOT_DIR, "utilities"))
from testing import * # If MATLAB is not installed, open utilities and set to False
from plotting import *
os.chdir(CWD)
np.random.seed(0)

In [4]:
from scipy import spatial
from sklearn.decomposition import PCA

In [5]:
group_data_map = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}.pickle'))
group_total_samples = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}-size.pickle'))

In [6]:
if 'fourier' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)#[::3]
elif 'wavelet' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)
elif 'learned' in TRANSFORM:
    nonskewed_df = pd.read_csv(os.path.join(ROOT_DIR, 'learned-filters', 'nonskewed_filter_idxs_df.csv')).set_index(['dataset', 'num_images', 'num_bootstrap'])
    nonskewed_filter_idxs = eval(nonskewed_df.loc[DATA_NAME].sort_values('num_images', ascending=False)['nonskewed_filter_idxs'].iloc[0]) 
    GROUPS = nonskewed_filter_idxs # can set to filter_group_map.keys() to include all prepared filters

In [7]:
n_bootstrap = int(1e5) 
bootstrap_size = int(1e4)


In [8]:

cov_matrix = np.zeros((len(GROUPS), len(GROUPS)))
for _ in tqdm(range(n_bootstrap)):
    X = np.zeros((bootstrap_size, len(GROUPS)))
    for i in range(len(GROUPS)):
        group = GROUPS[i]
        X[:, i] = np.random.choice(group_data_map[group], size=(bootstrap_size), replace=True)
        cov_matrix += np.cov(X, rowvar=False)
cov_matrix /= n_bootstrap

  0%|          | 0/100000 [00:00<?, ?it/s]

In [9]:



# Convert covariance matrix to DataFrame for better formatting
cov_df = pd.DataFrame(cov_matrix, index=GROUPS, columns=GROUPS)
cov_df.to_csv(os.path.join(CWD, "CSVs", f'covariance_matrix.csv'))
cov_df.round(2)

Unnamed: 0,2,3,4,5,6,7,8,9,10,11,...,24,25,26,27,28,29,30,31,32,33
2,31519.5,1.0,-1.81,-0.35,0.73,0.06,-0.12,-0.3,0.08,-0.37,...,-0.01,-0.01,-0.03,0.03,0.0,-0.0,-0.0,-0.0,-0.0,-0.0
3,1.0,41241.21,0.17,0.13,1.31,0.6,0.22,0.51,0.29,0.16,...,-0.05,0.0,-0.02,-0.0,0.01,0.0,-0.0,0.0,-0.0,0.0
4,-1.81,0.17,31630.22,-0.61,-0.51,-0.1,0.72,-0.19,0.17,0.24,...,-0.07,0.03,0.03,-0.01,0.0,-0.01,0.0,0.0,0.0,-0.0
5,-0.35,0.13,-0.61,16909.06,-0.38,0.15,-0.35,-0.08,-0.19,-0.28,...,0.02,0.02,0.01,0.0,-0.0,-0.0,-0.0,-0.0,0.0,0.0
6,0.73,1.31,-0.51,-0.38,11327.16,-0.13,-0.04,-0.18,-0.18,-0.13,...,-0.02,0.02,0.01,-0.01,0.0,-0.0,0.0,-0.0,-0.0,-0.0
7,0.06,0.6,-0.1,0.15,-0.13,7069.8,-0.06,0.07,0.18,0.1,...,0.01,0.04,-0.0,0.01,-0.0,0.0,0.0,0.0,-0.0,-0.0
8,-0.12,0.22,0.72,-0.35,-0.04,-0.06,5081.87,-0.08,0.11,0.15,...,-0.01,-0.02,-0.0,0.0,-0.0,-0.0,0.0,-0.0,-0.0,-0.0
9,-0.3,0.51,-0.19,-0.08,-0.18,0.07,-0.08,4521.61,-0.05,0.11,...,0.01,-0.0,-0.01,-0.0,-0.0,0.0,-0.0,0.0,0.0,-0.0
10,0.08,0.29,0.17,-0.19,-0.18,0.18,0.11,-0.05,3215.99,-0.09,...,0.01,-0.0,0.01,-0.01,-0.0,0.0,0.0,-0.0,-0.0,0.0
11,-0.37,0.16,0.24,-0.28,-0.13,0.1,0.15,0.11,-0.09,2725.84,...,0.0,-0.01,-0.0,0.0,-0.0,0.0,-0.0,0.0,-0.0,-0.0


In [10]:
corr_matrix = cov_matrix / np.sqrt(np.outer(np.diag(cov_matrix), np.diag(cov_matrix)))
corr_df = pd.DataFrame(corr_matrix, index=GROUPS, columns=GROUPS)
corr_df.round(5)

Unnamed: 0,2,3,4,5,6,7,8,9,10,11,...,24,25,26,27,28,29,30,31,32,33
2,1.0,3e-05,-6e-05,-2e-05,4e-05,0.0,-1e-05,-3e-05,1e-05,-4e-05,...,-1e-05,-0.0,-3e-05,3e-05,1e-05,-0.0,-0.0,-2e-05,-0.0,-0.0
3,3e-05,1.0,0.0,1e-05,6e-05,4e-05,2e-05,4e-05,3e-05,1e-05,...,-2e-05,0.0,-2e-05,-0.0,2e-05,1e-05,-0.0,1e-05,-0.0,1e-05
4,-6e-05,0.0,1.0,-3e-05,-3e-05,-1e-05,6e-05,-2e-05,2e-05,3e-05,...,-4e-05,1e-05,3e-05,-1e-05,0.0,-2e-05,1e-05,2e-05,1e-05,-0.0
5,-2e-05,1e-05,-3e-05,1.0,-3e-05,1e-05,-4e-05,-1e-05,-3e-05,-4e-05,...,2e-05,1e-05,2e-05,0.0,-0.0,-1e-05,-0.0,-1e-05,3e-05,0.0
6,4e-05,6e-05,-3e-05,-3e-05,1.0,-1e-05,-1e-05,-3e-05,-3e-05,-2e-05,...,-1e-05,1e-05,2e-05,-2e-05,1e-05,-0.0,1e-05,-1e-05,-1e-05,-0.0
7,0.0,4e-05,-1e-05,1e-05,-1e-05,1.0,-1e-05,1e-05,4e-05,2e-05,...,1e-05,3e-05,-0.0,3e-05,-2e-05,1e-05,0.0,1e-05,-1e-05,-0.0
8,-1e-05,2e-05,6e-05,-4e-05,-1e-05,-1e-05,1.0,-2e-05,3e-05,4e-05,...,-1e-05,-2e-05,-0.0,0.0,-1e-05,-1e-05,1e-05,-1e-05,-1e-05,-1e-05
9,-3e-05,4e-05,-2e-05,-1e-05,-3e-05,1e-05,-2e-05,1.0,-1e-05,3e-05,...,2e-05,-0.0,-2e-05,-0.0,-0.0,1e-05,-0.0,2e-05,1e-05,-0.0
10,1e-05,3e-05,2e-05,-3e-05,-3e-05,4e-05,3e-05,-1e-05,1.0,-3e-05,...,2e-05,-0.0,2e-05,-3e-05,-0.0,3e-05,1e-05,-1e-05,-1e-05,1e-05
11,-4e-05,1e-05,3e-05,-4e-05,-2e-05,2e-05,4e-05,3e-05,-3e-05,1.0,...,1e-05,-1e-05,-1e-05,2e-05,-1e-05,2e-05,-1e-05,0.0,-1e-05,-0.0


In [11]:
np.linalg.norm(cov_matrix - np.diag(np.diag(cov_matrix)))  # Remove diagonal elements for visualization

4.596482736190178

In [12]:
pca = PCA()
pca.fit(cov_matrix)

print("Singular values (explained variance):")
print(pca.explained_variance_)

print("\nPrincipal components (eigenvectors):")
eigenvectors = pca.components_
eigenvectors_df = pd.DataFrame(eigenvectors)
eigenvectors_df

Singular values (explained variance):
[5.33155514e+07 3.21697200e+07 3.00450175e+07 8.90967064e+06
 3.99247174e+06 1.55608904e+06 8.06021766e+05 6.31320842e+05
 3.21208908e+05 2.29386025e+05 1.51063328e+05 9.55234012e+04
 3.75117372e+04 3.31597959e+04 1.99404590e+04 1.57399802e+04
 7.70363755e+03 5.23645307e+03 1.92781758e+03 1.69695741e+03
 1.23519822e+03 9.48577274e+02 3.08226833e+02 1.47687262e+02
 2.98524855e+01 1.75745420e+01 2.41419725e-01 1.12300804e-01
 8.20338835e-03 1.85891454e-03 1.73241640e-05 1.05146373e-23]

Principal components (eigenvectors):


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,22,23,24,25,26,27,28,29,30,31
0,-0.05538771,0.9967056,-0.05626867,-0.01435429,-0.008582893,-0.005104,-0.003621,-0.003201,-0.002266,-0.001919,...,-7.3e-05,-0.000142,-2.3e-05,-1.8e-05,-1.880196e-06,-1.435391e-06,-4.168598e-07,-1.949164e-07,-2.392375e-08,-3.153998e-09
1,-0.6701829,0.004613746,0.742178,-0.001862528,-0.00105597,-0.000585,-0.000384,-0.000358,-0.000251,-0.000199,...,-9e-06,-1.5e-05,-1e-06,-3e-06,-2.958139e-07,-3.131272e-07,-7.847048e-09,5.477762e-08,4.468074e-09,-1.151568e-09
2,0.7394269,0.07787814,0.667068,-0.03814007,-0.02049426,-0.011665,-0.008144,-0.00723,-0.00507,-0.004294,...,-0.000163,-0.000314,-5e-05,-4e-05,-4.458342e-06,-3.440456e-06,-8.839846e-07,-4.66253e-07,-5.052603e-08,-1.343695e-08
3,0.02522754,0.01659944,0.02508727,0.997912,-0.04398675,-0.017907,-0.011668,-0.010144,-0.006951,-0.005836,...,-0.000212,-0.000418,-6.6e-05,-5.3e-05,-6.123728e-06,-4.486664e-06,-1.215304e-06,-6.485797e-07,-2.702718e-08,-1.380961e-08
4,0.01445026,0.01041022,0.01444802,0.04174372,0.9975846,-0.038329,-0.020745,-0.017519,-0.011357,-0.009382,...,-0.000334,-0.00065,-0.000102,-8.4e-05,-9.286312e-06,-6.864451e-06,-1.768832e-06,-1.014278e-06,-1.114791e-07,-2.874929e-08
5,0.00818735,0.006112822,0.008167347,0.01745921,0.03477686,0.99627,-0.055726,-0.039936,-0.0208,-0.016391,...,-0.000524,-0.001021,-0.000163,-0.000129,-1.543159e-05,-1.063405e-05,-2.875445e-06,-1.459789e-06,-1.783394e-07,-4.43638e-08
6,0.00528878,0.00399305,0.005241527,0.01055407,0.01781043,0.045988,0.984171,-0.161892,-0.035608,-0.025165,...,-0.000672,-0.001317,-0.000208,-0.000167,-1.926686e-05,-1.405118e-05,-3.609266e-06,-2.021049e-06,-2.223709e-07,-6.676493e-08
7,0.006157879,0.004643887,0.006129662,0.01206645,0.0198104,0.044141,0.15445,0.982702,-0.066212,-0.042567,...,-0.000997,-0.001962,-0.000313,-0.00025,-2.865071e-05,-2.050212e-05,-5.549846e-06,-2.756724e-06,-2.92154e-07,-8.545973e-08
8,0.003785875,0.002875678,0.003771617,0.007258287,0.0113391,0.020844,0.037819,0.05103,0.987262,-0.127739,...,-0.001221,-0.002404,-0.000379,-0.000308,-3.504608e-05,-2.455608e-05,-6.699656e-06,-3.65012e-06,-4.225658e-07,-8.597217e-08
9,0.003691862,0.002797408,0.003657934,0.007004104,0.01077752,0.018959,0.031185,0.038984,0.114739,0.983076,...,-0.001668,-0.003288,-0.00052,-0.000415,-4.79357e-05,-3.377911e-05,-9.368213e-06,-4.835123e-06,-5.419296e-07,-1.431406e-07


In [13]:
cos_dist = np.diag(spatial.distance.cdist(eigenvectors, np.eye(len(GROUPS)), metric='cosine'))
cos_dist = cos_dist = [np.min(row) for row in cos_dist]
cos_dist


[1.0553877066890338,
 0.9953862537060566,
 0.3329320185333048,
 0.0020880353844012767,
 0.0024153521824146207,
 0.0037304089422824926,
 0.015829069532521056,
 0.01729805416216923,
 0.012737983688208665,
 0.016924351234914647,
 1.1125965429119218,
 1.0490928175583445,
 1.0392995869606114,
 1.0892951900008945,
 1.0478705317662453,
 1.0653549269283282,
 1.0367360306137947,
 1.0720888492496607,
 1.035627070117861,
 0.9923534339024616,
 0.9906603183715998,
 1.0698458247706062,
 0.022863334845540817,
 0.9516503250484686,
 0.0468206755405568,
 0.04707347572226128,
 0.036813065519708976,
 0.03954099878572226,
 0.029295742290199156,
 0.028683091523597493,
 0.034005036900533026,
 0.03256804507469868]