In [1]:
DATA_NAME = 'agriVision-full' 
TRANSFORM = 'wavelet-diagonal'
CHANNEL = 'gray'

In [2]:
path_list = DATA_NAME.split("-") + TRANSFORM.split("-")
if CHANNEL:
    path_list.append(CHANNEL)
print(f"Name the notebook:\nindependence_{'_'.join(path_list[::-1])}.ipynb")
FULL_DATA_NAME='-'.join(path_list)


import git
from pathlib import Path
import os
CWD = os.getcwd()
ROOT_DIR = Path(git.Repo('.', search_parent_directories=True).working_tree_dir)

Path(os.path.join(CWD, "CSVs")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "plots")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "cache")).mkdir(exist_ok=True)

GROUP = 'layer' if TRANSFORM.split("-")[0] == 'wavelet' else ('band' if TRANSFORM.split("-")[0] == 'fourier' else 'error')
RERUN = False
SKIP_OPTIMIZE_STEP = False

Name the notebook:
independence_gray_diagonal_wavelet_full_agriVision.ipynb


In [3]:
os.chdir(os.path.join(ROOT_DIR, "utilities"))
from testing import * # If MATLAB is not installed, open utilities and set to False
from plotting import *
os.chdir(CWD)
np.random.seed(0)

In [4]:
from scipy import spatial
from sklearn.decomposition import PCA

In [5]:
group_data_map = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}.pickle'))
group_total_samples = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}-size.pickle'))

In [6]:
if 'fourier' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)#[::3]
elif 'wavelet' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)

In [7]:
n_bootstrap = int(1e5) 
bootstrap_size = int(1e4)


In [8]:

cov_matrix = np.zeros((len(GROUPS), len(GROUPS)))
for _ in tqdm(range(n_bootstrap)):
    X = np.zeros((bootstrap_size, len(GROUPS)))
    for i in range(len(GROUPS)):
        group = GROUPS[i]
        X[:, i] = np.random.choice(group_data_map[group], size=(bootstrap_size), replace=True)
    cov_matrix += np.cov(X, rowvar=False)
cov_matrix /= n_bootstrap

  0%|          | 0/100000 [00:00<?, ?it/s]

In [9]:



# Convert covariance matrix to DataFrame for better formatting
cov_df = pd.DataFrame(cov_matrix, index=GROUPS, columns=GROUPS)
cov_df.to_csv(os.path.join(CWD, "CSVs", f'covariance_matrix.csv'))
cov_df.round(2)

Unnamed: 0,2,3,4,5,6,7,8,9,10
2,50291.16,0.01,0.04,-0.21,0.02,0.01,0.02,-0.01,-0.0
3,0.01,8967.65,0.46,-0.02,0.04,-0.0,0.0,0.0,0.0
4,0.04,0.46,1822.25,0.01,0.02,-0.0,-0.0,-0.0,0.0
5,-0.21,-0.02,0.01,297.36,0.01,0.0,-0.0,-0.0,-0.0
6,0.02,0.04,0.02,0.01,75.4,0.0,-0.0,0.0,-0.0
7,0.01,-0.0,-0.0,0.0,0.0,23.2,0.0,-0.0,-0.0
8,0.02,0.0,-0.0,-0.0,-0.0,0.0,9.47,-0.0,0.0
9,-0.01,0.0,-0.0,-0.0,0.0,-0.0,-0.0,1.65,-0.0
10,-0.0,0.0,0.0,-0.0,-0.0,-0.0,0.0,-0.0,0.17


In [10]:
corr_matrix = cov_matrix / np.sqrt(np.outer(np.diag(cov_matrix), np.diag(cov_matrix)))
corr_df = pd.DataFrame(corr_matrix, index=GROUPS, columns=GROUPS)
corr_df.round(5)

Unnamed: 0,2,3,4,5,6,7,8,9,10
2,1.0,0.0,0.0,-5e-05,1e-05,1e-05,3e-05,-4e-05,-4e-05
3,0.0,1.0,0.00011,-1e-05,5e-05,-0.0,1e-05,1e-05,4e-05
4,0.0,0.00011,1.0,1e-05,6e-05,-1e-05,-0.0,-2e-05,4e-05
5,-5e-05,-1e-05,1e-05,1.0,3e-05,6e-05,-4e-05,-5e-05,-4e-05
6,1e-05,5e-05,6e-05,3e-05,1.0,2e-05,-4e-05,0.0,-3e-05
7,1e-05,-0.0,-1e-05,6e-05,2e-05,1.0,2e-05,-1e-05,-3e-05
8,3e-05,1e-05,-0.0,-4e-05,-4e-05,2e-05,1.0,-3e-05,1e-05
9,-4e-05,1e-05,-2e-05,-5e-05,0.0,-1e-05,-3e-05,1.0,-5e-05
10,-4e-05,4e-05,4e-05,-4e-05,-3e-05,-3e-05,1e-05,-5e-05,1.0


In [11]:
np.linalg.norm(cov_matrix - np.diag(np.diag(cov_matrix)))  # Remove diagonal elements for visualization

0.7208126585846113

In [12]:
pca = PCA()
pca.fit(cov_matrix)

print("Singular values (explained variance):")
print(pca.explained_variance_)

print("\nPrincipal components (eigenvectors):")
eigenvectors = pca.components_
eigenvectors_df = pd.DataFrame(eigenvectors)
eigenvectors_df

Singular values (explained variance):
[2.81172526e+08 8.79911620e+06 3.55751399e+05 9.23116454e+03
 5.71200243e+02 5.12759065e+01 7.36832088e+00 1.70777880e-01
 9.18084177e-35]

Principal components (eigenvectors):


Unnamed: 0,0,1,2,3,4,5,6,7,8
0,0.999725,-0.022999,-0.004513,-0.000739,-0.000186,-5.7e-05,-2.3e-05,-4e-06,-5.1301e-07
1,0.022849,0.999268,-0.030284,-0.004727,-0.001192,-0.000368,-0.00015,-2.6e-05,-2.589174e-06
2,0.005179,0.030019,0.999116,-0.028056,-0.006896,-0.002124,-0.000866,-0.000152,-1.547607e-05
3,0.000977,0.00546,0.027444,0.998005,-0.054068,-0.015459,-0.006288,-0.001098,-0.0001158161
4,0.000292,0.001635,0.00807,0.052135,0.994513,-0.084619,-0.031114,-0.005317,-0.000560783
5,0.000108,0.000605,0.002979,0.018318,0.077566,0.983576,-0.160399,-0.021993,-0.002302337
6,6.3e-05,0.000355,0.001752,0.010748,0.0428,0.154522,0.98249,-0.09379,-0.009415882
7,1.6e-05,9.1e-05,0.000448,0.002746,0.010817,0.035237,0.087428,0.989815,-0.1060935
8,4e-06,1.9e-05,9.5e-05,0.000582,0.002294,0.007451,0.018243,0.104672,0.9943087


In [13]:

cos_dist = spatial.distance.cdist(eigenvectors, np.eye(len(GROUPS)), metric='cosine')
min_cos_dist = [np.min(row) for row in cos_dist]
arg_min_cos_dist = [np.argmin(row) for row in cos_dist]
min_cos_dist

[0.0002750043813556369,
 0.000731820373234271,
 0.000884358770165794,
 0.001995493631274381,
 0.0054865545967491824,
 0.016424025458795954,
 0.01750970276103425,
 0.01018480219557516,
 0.00569125580864227]

In [14]:
arg_min_cos_dist

[0, 1, 2, 3, 4, 5, 6, 7, 8]