In [1]:
DATA_NAME = 'agriVision-full' 
TRANSFORM = 'fourier'
CHANNEL = 'blue'

In [2]:
path_list = DATA_NAME.split("-") + TRANSFORM.split("-")
if CHANNEL:
    path_list.append(CHANNEL)
print(f"Name the notebook:\nindependence_{'_'.join(path_list[::-1])}.ipynb")
FULL_DATA_NAME='-'.join(path_list)


import git
from pathlib import Path
import os
CWD = os.getcwd()
ROOT_DIR = Path(git.Repo('.', search_parent_directories=True).working_tree_dir)

Path(os.path.join(CWD, "CSVs")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "plots")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "cache")).mkdir(exist_ok=True)

GROUP = 'layer' if TRANSFORM.split("-")[0] == 'wavelet' else ('band' if TRANSFORM.split("-")[0] == 'fourier' else 'error')
RERUN = False
SKIP_OPTIMIZE_STEP = False

Name the notebook:
independence_blue_fourier_full_agriVision.ipynb


In [3]:
os.chdir(os.path.join(ROOT_DIR, "utilities"))
from testing import * # If MATLAB is not installed, open utilities and set to False
from plotting import *
os.chdir(CWD)
np.random.seed(0)

In [4]:
from scipy import spatial
from sklearn.decomposition import PCA

In [5]:
group_data_map = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}.pickle'))
group_total_samples = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}-size.pickle'))

In [6]:
if 'fourier' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)#[::3]
elif 'wavelet' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)
elif 'learned' in TRANSFORM:
    nonskewed_df = pd.read_csv(os.path.join(ROOT_DIR, 'learned-filters', 'nonskewed_filter_idxs_df.csv')).set_index(['dataset', 'num_images', 'num_bootstrap'])
    nonskewed_filter_idxs = eval(nonskewed_df.loc[DATA_NAME].sort_values('num_images', ascending=False)['nonskewed_filter_idxs'].iloc[0]) 
    GROUPS = nonskewed_filter_idxs # can set to filter_group_map.keys() to include all prepared filters

In [7]:
n_bootstrap = int(1e5) 
bootstrap_size = int(1e4)


In [8]:

cov_matrix = np.zeros((len(GROUPS), len(GROUPS)))
for _ in tqdm(range(n_bootstrap)):
    X = np.zeros((bootstrap_size, len(GROUPS)))
    for i in range(len(GROUPS)):
        group = GROUPS[i]
        X[:, i] = np.random.choice(group_data_map[group], size=(bootstrap_size), replace=True)
    cov_matrix += np.cov(X, rowvar=False)
cov_matrix /= n_bootstrap

  0%|          | 0/100000 [00:00<?, ?it/s]

In [9]:



# Convert covariance matrix to DataFrame for better formatting
cov_df = pd.DataFrame(cov_matrix, index=GROUPS, columns=GROUPS)
cov_df.to_csv(os.path.join(CWD, "CSVs", f'covariance_matrix.csv'))
cov_df.round(2)

Unnamed: 0,2,3,4,5,6,7,8,9,10,11,...,24,25,26,27,28,29,30,31,32,33
2,1291.57,0.03,-0.07,-0.01,0.03,0.0,-0.0,-0.02,0.01,-0.02,...,-0.0,-0.0,-0.0,0.0,0.0,0.0,-0.0,-0.0,-0.0,-0.0
3,0.03,1479.52,0.01,0.01,0.05,0.02,0.01,0.03,0.02,0.01,...,-0.0,-0.0,-0.0,-0.0,0.0,0.0,-0.0,0.0,-0.0,0.0
4,-0.07,0.01,1233.06,-0.03,-0.02,-0.01,0.03,-0.01,0.01,0.01,...,-0.01,0.0,0.0,-0.0,0.0,-0.0,0.0,0.0,0.0,-0.0
5,-0.01,0.01,-0.03,662.01,-0.02,0.01,-0.02,-0.0,-0.01,-0.01,...,0.0,0.0,0.0,0.0,-0.0,-0.0,-0.0,-0.0,0.0,0.0
6,0.03,0.05,-0.02,-0.02,512.73,-0.0,-0.0,-0.01,-0.01,-0.01,...,-0.0,0.0,0.0,-0.0,0.0,-0.0,0.0,-0.0,-0.0,-0.0
7,0.0,0.02,-0.01,0.01,-0.0,352.3,-0.0,0.0,0.01,0.0,...,-0.0,0.01,-0.0,0.0,-0.0,0.0,0.0,0.0,-0.0,-0.0
8,-0.0,0.01,0.03,-0.02,-0.0,-0.0,256.58,-0.0,0.0,0.01,...,-0.0,-0.0,-0.0,0.0,-0.0,-0.0,0.0,-0.0,-0.0,-0.0
9,-0.02,0.03,-0.01,-0.0,-0.01,0.0,-0.0,238.63,-0.0,0.01,...,0.0,-0.0,-0.0,-0.0,-0.0,0.0,-0.0,0.0,0.0,-0.0
10,0.01,0.02,0.01,-0.01,-0.01,0.01,0.0,-0.0,158.74,-0.0,...,0.0,-0.0,0.0,-0.0,-0.0,0.0,0.0,-0.0,-0.0,0.0
11,-0.02,0.01,0.01,-0.01,-0.01,0.0,0.01,0.01,-0.0,122.42,...,0.0,-0.0,-0.0,0.0,-0.0,0.0,-0.0,0.0,-0.0,-0.0


In [10]:
corr_matrix = cov_matrix / np.sqrt(np.outer(np.diag(cov_matrix), np.diag(cov_matrix)))
corr_df = pd.DataFrame(corr_matrix, index=GROUPS, columns=GROUPS)
corr_df.round(5)

Unnamed: 0,2,3,4,5,6,7,8,9,10,11,...,24,25,26,27,28,29,30,31,32,33
2,1.0,2e-05,-6e-05,-1e-05,4e-05,0.0,-0.0,-3e-05,1e-05,-4e-05,...,-2e-05,-1e-05,-6e-05,6e-05,3e-05,0.0,-1e-05,-6e-05,-2e-05,-1e-05
3,2e-05,1.0,1e-05,1e-05,6e-05,3e-05,1e-05,4e-05,3e-05,1e-05,...,-3e-05,-0.0,-3e-05,-0.0,6e-05,1e-05,-1e-05,3e-05,-1e-05,3e-05
4,-6e-05,1e-05,1.0,-3e-05,-2e-05,-1e-05,6e-05,-2e-05,2e-05,4e-05,...,-8e-05,1e-05,5e-05,-2e-05,0.0,-6e-05,2e-05,6e-05,2e-05,-1e-05
5,-1e-05,1e-05,-3e-05,1.0,-3e-05,2e-05,-4e-05,-1e-05,-3e-05,-5e-05,...,3e-05,1e-05,3e-05,0.0,-0.0,-1e-05,-1e-05,-2e-05,0.00011,2e-05
6,4e-05,6e-05,-2e-05,-3e-05,1.0,-1e-05,-1e-05,-2e-05,-3e-05,-3e-05,...,-2e-05,2e-05,4e-05,-3e-05,2e-05,-0.0,3e-05,-3e-05,-3e-05,-2e-05
7,0.0,3e-05,-1e-05,2e-05,-1e-05,1.0,-1e-05,0.0,4e-05,2e-05,...,-0.0,7e-05,-1e-05,5e-05,-5e-05,2e-05,1e-05,2e-05,-3e-05,-1e-05
8,-0.0,1e-05,6e-05,-4e-05,-1e-05,-1e-05,1.0,-2e-05,2e-05,4e-05,...,-2e-05,-4e-05,-0.0,1e-05,-1e-05,-2e-05,3e-05,-2e-05,-1e-05,-5e-05
9,-3e-05,4e-05,-2e-05,-1e-05,-2e-05,0.0,-2e-05,1.0,-1e-05,3e-05,...,4e-05,-0.0,-4e-05,-1e-05,-1e-05,1e-05,-0.0,5e-05,2e-05,-0.0
10,1e-05,3e-05,2e-05,-3e-05,-3e-05,4e-05,2e-05,-1e-05,1.0,-3e-05,...,3e-05,-1e-05,3e-05,-6e-05,-1e-05,5e-05,2e-05,-3e-05,-5e-05,3e-05
11,-4e-05,1e-05,4e-05,-5e-05,-3e-05,2e-05,4e-05,3e-05,-3e-05,1.0,...,2e-05,-2e-05,-2e-05,3e-05,-1e-05,4e-05,-2e-05,1e-05,-3e-05,-3e-05


In [11]:
np.linalg.norm(cov_matrix - np.diag(np.diag(cov_matrix)))  # Remove diagonal elements for visualization

0.20791947550064938

In [12]:
pca = PCA()
pca.fit(cov_matrix)

print("Singular values (explained variance):")
print(pca.explained_variance_)

print("\nPrincipal components (eigenvectors):")
eigenvectors = pca.components_
eigenvectors_df = pd.DataFrame(eigenvectors)
eigenvectors_df

Singular values (explained variance):
[6.87847270e+04 5.25600603e+04 4.68394345e+04 1.36790917e+04
 8.18304279e+03 3.87276859e+03 2.58533727e+03 2.05304878e+03
 1.73693982e+03 7.79831493e+02 4.65574840e+02 3.65323185e+02
 2.87668700e+02 1.11679577e+02 7.25516207e+01 5.84827807e+01
 3.78001501e+01 2.52892408e+01 1.73662109e+01 1.06306049e+01
 7.94054242e+00 5.82541023e+00 4.45115058e+00 1.18127965e+00
 4.79069470e-01 3.11423196e-01 6.58403426e-03 4.12036483e-03
 5.23476411e-04 2.12845584e-04 3.97580553e-06 1.35651813e-26]

Principal components (eigenvectors):


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,22,23,24,25,26,27,28,29,30,31
0,-0.105355,0.991265,-0.076358,-0.014811,-0.010355,-0.006636,-0.004705,-0.004338,-0.002847,-0.002189,...,-0.000234,-0.000429,-7.3e-05,-6.2e-05,-7e-06,-7e-06,-3e-06,-2e-06,-2.778801e-07,-5.267545e-08
1,0.943278,0.074916,-0.322675,-0.015768,-0.010598,-0.006636,-0.004667,-0.004317,-0.002805,-0.002168,...,-0.000227,-0.000421,-7.5e-05,-5.7e-05,-8e-06,-7e-06,-3e-06,-2e-06,-3.1785e-07,-1.382452e-07
2,0.31304,0.10486,0.942737,-0.034174,-0.022537,-0.013871,-0.009637,-0.008954,-0.005803,-0.004444,...,-0.000478,-0.000865,-0.000144,-0.000125,-1.7e-05,-1.6e-05,-5e-06,-3e-06,-4.663864e-07,-2.90347e-07
3,0.022219,0.017923,0.024096,0.996182,-0.068136,-0.025088,-0.015354,-0.01391,-0.00853,-0.006425,...,-0.000653,-0.001213,-0.000201,-0.000175,-2.4e-05,-2.1e-05,-7e-06,-6e-06,-1.947803e-07,-2.876136e-07
4,0.01631,0.013636,0.017453,0.064293,0.995161,-0.048699,-0.024467,-0.021744,-0.012469,-0.009202,...,-0.000923,-0.001697,-0.000281,-0.000248,-3.3e-05,-2.9e-05,-9e-06,-8e-06,-1.076009e-06,-5.748084e-07
5,0.009529,0.00815,0.01007,0.023734,0.04102,0.991998,-0.054081,-0.043182,-0.019072,-0.013296,...,-0.001242,-0.00228,-0.000385,-0.000325,-4.7e-05,-3.9e-05,-1.3e-05,-1e-05,-1.465562e-06,-7.551349e-07
6,0.00722,0.00625,0.007626,0.016456,0.024999,0.07145,-0.160377,-0.091748,-0.025702,-0.016718,...,-0.001451,-0.002678,-0.00045,-0.000385,-5.1e-05,-4.6e-05,-1.5e-05,-1.1e-05,-1.638743e-06,-5.751252e-07
7,0.006496,0.005624,0.006803,0.014312,0.020801,0.047132,0.94703,-0.288194,-0.033298,-0.020284,...,-0.001667,-0.003091,-0.000511,-0.000439,-6e-05,-5.4e-05,-1.7e-05,-1.4e-05,-1.921626e-06,-1.157446e-06
8,0.00988,0.008523,0.010374,0.021255,0.030279,0.061851,0.264182,0.94924,-0.068333,-0.038781,...,-0.002986,-0.005572,-0.00093,-0.000794,-0.000109,-9.5e-05,-3.2e-05,-2.3e-05,-3.075432e-06,-1.736332e-06
9,0.005015,0.004356,0.005259,0.010238,0.013756,0.02249,0.039325,0.046572,0.990545,-0.085255,...,-0.00346,-0.006519,-0.001062,-0.000926,-0.000125,-0.000107,-3.6e-05,-2.8e-05,-4.183572e-06,-1.696387e-06


In [13]:
cos_dist = np.diag(spatial.distance.cdist(eigenvectors, np.eye(len(GROUPS)), metric='cosine'))
cos_dist = cos_dist = [np.min(row) for row in cos_dist]
cos_dist


[1.1053550087653579,
 0.9250843385205051,
 0.05726342927633532,
 0.0038183224998802334,
 0.004839273522764254,
 0.008001920189077194,
 1.1603765513984823,
 1.2881944753840415,
 1.0683333503657015,
 1.0852545327189802,
 1.0875871429793365,
 1.0356598382425908,
 1.0429174078928098,
 1.0771671590089855,
 1.0671204151363338,
 1.0766388115488281,
 1.0358115613032743,
 1.0731604673361355,
 1.0633038295343848,
 0.9963469130010032,
 0.9896945202821389,
 1.02239110342328,
 0.22277438687025752,
 0.970114081044472,
 0.08811213840099918,
 0.09048897802397737,
 0.11611542101719274,
 0.12307783723971943,
 0.1013057741084804,
 0.10532290460007154,
 0.11755768379923404,
 0.11690935822578574]