In [1]:
DATA_NAME = 'agriVision-full' 
TRANSFORM = 'gabor'
CHANNEL = 'red'
PARAM_CSV = 'gabor_new.csv' # only use for Gabor

In [2]:
path_list = DATA_NAME.split("-") + TRANSFORM.split("-")
if CHANNEL:
    path_list.append(CHANNEL)
print(f"Name the notebook:\nindependence_{'_'.join(path_list[::-1])}.ipynb")
FULL_DATA_NAME='-'.join(path_list)


import git
from pathlib import Path
import os
CWD = os.getcwd()
ROOT_DIR = Path(git.Repo('.', search_parent_directories=True).working_tree_dir)

Path(os.path.join(CWD, "CSVs")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "plots")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "cache")).mkdir(exist_ok=True)

GROUP = 'layer' if TRANSFORM.split("-")[0] == 'wavelet' else ('band' if TRANSFORM.split("-")[0] == 'fourier' else 'error')
RERUN = False
SKIP_OPTIMIZE_STEP = False

Name the notebook:
independence_red_gabor_full_agriVision.ipynb


In [3]:
os.chdir(os.path.join(ROOT_DIR, "utilities"))
from testing import * # If MATLAB is not installed, open utilities and set to False
from plotting import *
os.chdir(CWD)
np.random.seed(0)

In [4]:
from scipy import spatial
from sklearn.decomposition import PCA

In [5]:
group_data_map = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}.pickle'))
group_total_samples = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}-size.pickle'))
param_df = pd.read_csv(os.path.join(ROOT_DIR, "gabor", PARAM_CSV))

In [6]:
if 'fourier' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)#[::3]
elif 'wavelet' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)
elif 'learned' in TRANSFORM:
    nonskewed_df = pd.read_csv(os.path.join(ROOT_DIR, 'learned-filters', 'nonskewed_filter_idxs_df.csv')).set_index(['dataset', 'num_images', 'num_bootstrap'])
    nonskewed_filter_idxs = eval(nonskewed_df.loc[DATA_NAME].sort_values('num_images', ascending=False)['nonskewed_filter_idxs'].iloc[0]) 
    GROUPS = nonskewed_filter_idxs # can set to filter_group_map.keys() to include all prepared filters
elif 'gabor' in TRANSFORM:
    GROUPS = param_df['index']

In [7]:
n_bootstrap = int(1e5) 
bootstrap_size = int(1e4)


In [8]:

cov_matrix = np.zeros((len(GROUPS), len(GROUPS)))
for _ in tqdm(range(n_bootstrap)):
    X = np.zeros((bootstrap_size, len(GROUPS)))
    for i in range(len(GROUPS)):
        group = GROUPS[i]
        if len(group_data_map[group]) < 100:
            X[:, i] = np.full(bootstrap_size, np.nan)
        else:
            X[:, i] = np.random.choice(group_data_map[group], size=(bootstrap_size), replace=True)
    cov_matrix += np.cov(X, rowvar=False)
cov_matrix /= n_bootstrap

  0%|          | 0/100000 [00:00<?, ?it/s]

In [9]:



# Convert covariance matrix to DataFrame for better formatting
cov_df = pd.DataFrame(cov_matrix, index=GROUPS, columns=GROUPS)
cov_df = cov_df.dropna(how='all').dropna(axis=1, how='all')
cov_df.to_csv(os.path.join(CWD, "CSVs", f'covariance_matrix.csv'))
cov_df.round(2)

index,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,104.94,0.0,-0.0,0.0,-0.0,-0.0,0.0,-0.0,0.0,-0.0,...,-0.0,0.0,0.0,0.0,0.0,-0.0,-0.0,0.0,0.0,0.0
1,0.0,38.61,0.0,-0.0,-0.0,-0.0,-0.0,0.0,0.0,-0.0,...,0.0,-0.0,0.0,0.0,-0.0,-0.0,0.0,-0.0,0.0,0.0
2,-0.0,0.0,30.1,0.0,0.0,0.0,0.0,0.0,0.0,-0.0,...,0.0,-0.0,0.0,-0.0,0.0,0.0,-0.0,-0.0,0.0,0.0
3,0.0,-0.0,0.0,24.73,-0.0,0.0,-0.0,0.0,-0.0,0.0,...,-0.0,-0.0,-0.0,0.0,0.0,0.0,0.0,0.0,-0.0,0.0
4,-0.0,-0.0,0.0,-0.0,7.21,-0.0,-0.0,-0.0,-0.0,0.0,...,0.0,0.0,-0.0,-0.0,0.0,-0.0,0.0,0.0,0.0,-0.0
5,-0.0,-0.0,0.0,0.0,-0.0,1.07,-0.0,-0.0,0.0,-0.0,...,0.0,-0.0,0.0,-0.0,0.0,-0.0,0.0,0.0,0.0,0.0
6,0.0,-0.0,0.0,-0.0,-0.0,-0.0,0.62,0.0,0.0,0.0,...,0.0,-0.0,-0.0,-0.0,0.0,0.0,0.0,0.0,-0.0,-0.0
7,-0.0,0.0,0.0,0.0,-0.0,-0.0,0.0,114.98,0.0,-0.0,...,-0.0,0.0,-0.0,-0.0,-0.0,0.0,-0.0,0.0,0.0,-0.0
8,0.0,0.0,0.0,-0.0,-0.0,0.0,0.0,0.0,62.59,0.0,...,-0.0,-0.0,0.0,0.0,0.0,-0.0,0.0,0.0,-0.0,-0.0
9,-0.0,-0.0,-0.0,0.0,0.0,-0.0,0.0,-0.0,0.0,22.53,...,0.0,-0.0,-0.0,0.0,-0.0,0.0,-0.0,-0.0,-0.0,0.0


In [10]:
corr_matrix = cov_df / np.sqrt(np.outer(np.diag(cov_df), np.diag(cov_df)))
corr_matrix.round(5)

index,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1.0,5e-05,-3e-05,5e-05,-2e-05,-4e-05,0.0,-2e-05,4e-05,-1e-05,...,-5e-05,5e-05,1e-05,2e-05,4e-05,-6e-05,-1e-05,3e-05,3e-05,4e-05
1,5e-05,1.0,6e-05,-3e-05,-3e-05,-6e-05,-1e-05,4e-05,1e-05,-3e-05,...,0.0,-3e-05,2e-05,4e-05,-3e-05,-0.0,1e-05,-1e-05,1e-05,1e-05
2,-3e-05,6e-05,1.0,3e-05,1e-05,3e-05,5e-05,2e-05,5e-05,-0.0,...,0.0,-2e-05,4e-05,-0.0,4e-05,1e-05,-6e-05,-4e-05,3e-05,2e-05
3,5e-05,-3e-05,3e-05,1.0,-2e-05,3e-05,-1e-05,1e-05,-3e-05,3e-05,...,-5e-05,-3e-05,-1e-05,3e-05,2e-05,1e-05,4e-05,5e-05,-2e-05,2e-05
4,-2e-05,-3e-05,1e-05,-2e-05,1.0,-1e-05,-0.0,-2e-05,-4e-05,4e-05,...,0.0,5e-05,-1e-05,-3e-05,1e-05,-4e-05,6e-05,3e-05,1e-05,-3e-05
5,-4e-05,-6e-05,3e-05,3e-05,-1e-05,1.0,-0.0,-2e-05,2e-05,-1e-05,...,2e-05,-5e-05,3e-05,-1e-05,2e-05,-3e-05,4e-05,3e-05,7e-05,1e-05
6,0.0,-1e-05,5e-05,-1e-05,-0.0,-0.0,1.0,1e-05,1e-05,3e-05,...,3e-05,-1e-05,-3e-05,-0.0,0.0,2e-05,3e-05,2e-05,-1e-05,-1e-05
7,-2e-05,4e-05,2e-05,1e-05,-2e-05,-2e-05,1e-05,1.0,2e-05,-3e-05,...,-2e-05,1e-05,-4e-05,-1e-05,-5e-05,0.0,-6e-05,2e-05,2e-05,-2e-05
8,4e-05,1e-05,5e-05,-3e-05,-4e-05,2e-05,1e-05,2e-05,1.0,1e-05,...,-2e-05,-0.0,3e-05,1e-05,1e-05,-3e-05,4e-05,2e-05,-2e-05,-2e-05
9,-1e-05,-3e-05,-0.0,3e-05,4e-05,-1e-05,3e-05,-3e-05,1e-05,1.0,...,0.0,-2e-05,-1e-05,0.0,-1e-05,2e-05,-3e-05,-0.0,-2e-05,1e-05


In [11]:
np.linalg.norm(cov_df - np.diag(np.diag(cov_df)))  # Remove diagonal elements for visualization

0.034245266099151864

In [12]:
pca = PCA()
pca.fit(cov_df)

print("Singular values (explained variance):")
print(pca.explained_variance_)

print("\nPrincipal components (eigenvectors):")
eigenvectors = pca.components_
eigenvectors_df = pd.DataFrame(eigenvectors)
eigenvectors_df

Singular values (explained variance):
[3.16702063e+02 2.64441054e+02 2.45362931e+02 1.91916067e+02
 9.93241457e+01 9.27425446e+01 6.09417616e+01 5.62083834e+01
 4.78764731e+01 4.68899430e+01 4.42271144e+01 3.68763297e+01
 3.51875117e+01 3.17960664e+01 2.97868747e+01 2.85219494e+01
 2.55948469e+01 2.15247881e+01 1.90389939e+01 1.42677541e+01
 1.20071394e+01 1.01918914e+01 2.15762272e+00 1.41058730e+00
 1.33379589e+00 1.13575625e+00 5.68006163e-01 3.75714171e-01
 2.86013883e-01 2.34332372e-01 1.69882612e-01 6.48723356e-02
 4.35341948e-02 2.79558230e-02 2.74401797e-02 1.91308901e-02
 9.48667925e-03 6.09266729e-03 1.59881540e-03 7.84824767e-04
 4.35338409e-04 5.84352444e-29]

Principal components (eigenvectors):


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
0,-0.108219,-0.006794,-0.005044,-0.004058,-0.001137,-0.000169,-9.6e-05,0.989746,-0.013982,-0.003684,...,-0.000552,-0.000166,-1.9e-05,-0.037286,-0.008256,-0.00806,-0.008082,-0.000772,-0.000215,-2.7e-05
1,0.940907,-0.006266,-0.004659,-0.00367,-0.001024,-0.000154,-8.7e-05,0.074117,-0.013724,-0.00334,...,-0.000505,-0.000147,-1.5e-05,-0.049671,-0.007619,-0.007606,-0.007491,-0.000691,-0.000192,-2.2e-05
2,0.306564,-0.012487,-0.009118,-0.007253,-0.002004,-0.000294,-0.00017,0.100841,-0.028311,-0.006544,...,-0.000975,-0.000284,-3.1e-05,-0.126238,-0.015273,-0.014993,-0.014948,-0.001383,-0.000382,-4.5e-05
3,0.078776,-0.014246,-0.01022,-0.008031,-0.00219,-0.000322,-0.000187,0.050791,-0.037404,-0.007232,...,-0.001049,-0.000312,-3.3e-05,0.987702,-0.017863,-0.017516,-0.017389,-0.001487,-0.000418,-5.1e-05
4,0.013025,-0.012775,-0.00818,-0.00611,-0.001532,-0.000228,-0.000124,0.010757,-0.344842,-0.005434,...,-0.000742,-0.000227,-2.3e-05,0.019333,-0.018249,-0.017494,-0.017499,-0.001053,-0.000293,-3.4e-05
5,0.02477,-0.028433,-0.017677,-0.013246,-0.003296,-0.000477,-0.000274,0.020777,0.931806,-0.011659,...,-0.001588,-0.000476,-4.8e-05,0.035837,-0.041533,-0.040084,-0.039461,-0.002235,-0.000627,-7.5e-05
6,0.008946,-0.027825,-0.013609,-0.009522,-0.00213,-0.000314,-0.000179,0.007766,0.031897,-0.008151,...,-0.001009,-0.000304,-3.2e-05,0.011656,-0.061862,-0.055617,-0.054184,-0.00146,-0.000399,-4.6e-05
7,0.011587,-0.045405,-0.020552,-0.014071,-0.003054,-0.000443,-0.000255,0.010068,0.037164,-0.012059,...,-0.001462,-0.000437,-4.8e-05,0.014833,-0.130063,-0.113471,-0.107204,-0.002095,-0.000588,-6.7e-05
8,0.003236,-0.023483,-0.007984,-0.005206,-0.001069,-0.000152,-9.1e-05,0.002941,0.009083,-0.004421,...,-0.000523,-0.000157,-1.7e-05,0.004189,0.883832,-0.386868,-0.254847,-0.000737,-0.000205,-2.4e-05
9,0.001585,-0.012064,-0.003818,-0.00257,-0.000549,-8.2e-05,-4.4e-05,0.001323,0.004323,-0.002086,...,-0.000255,-7.7e-05,-9e-06,0.001965,0.107537,0.709334,-0.695925,-0.000346,-0.0001,-1.2e-05


In [13]:
cos_dist = spatial.distance.cdist(eigenvectors, np.eye(cov_df.shape[0]), metric='cosine')
cos_dist =  [np.min(row) for row in cos_dist]
cos_dist


[0.01025392935319347,
 0.05909341433027859,
 0.0641843720055989,
 0.012298447008823432,
 0.0635755492191109,
 0.06819403707046556,
 0.03763113571165566,
 0.04985747682906194,
 0.11616801468716353,
 0.2906662119066046,
 0.36525441092576794,
 0.1666096469806464,
 0.2229566407535628,
 0.11602894053763635,
 0.28433345881723726,
 0.3155532340484497,
 0.33291604560732413,
 0.19621819709841914,
 0.24396237370794183,
 0.08430300626042009,
 0.19320246670921115,
 0.21777473354367127,
 0.014561602272063157,
 0.23037670154308953,
 0.3662182762515318,
 0.21106049071880828,
 0.026216250814481423,
 0.04617168525920623,
 0.10203847321454107,
 0.14070977359228531,
 0.08988739223856423,
 0.03837592094986064,
 0.05908012588165201,
 0.19942274675357674,
 0.2243544968997967,
 0.46837365379792506,
 0.2743446287872595,
 0.3455548308511255,
 0.08916507282575703,
 0.22454650476100824,
 0.29970851024109135,
 0.3036369453669592]