In [6]:
DATA_NAME = 'agriVision-full' 
TRANSFORM = 'wavelet-horizVert'
CHANNEL = 'gray'

In [7]:
path_list = DATA_NAME.split("-") + TRANSFORM.split("-")
if CHANNEL:
    path_list.append(CHANNEL)
print(f"Name the notebook:\nindependence_{'_'.join(path_list[::-1])}.ipynb")
FULL_DATA_NAME='-'.join(path_list)


import git
from pathlib import Path
import os
CWD = os.getcwd()
ROOT_DIR = Path(git.Repo('.', search_parent_directories=True).working_tree_dir)

Path(os.path.join(CWD, "CSVs")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "plots")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "cache")).mkdir(exist_ok=True)

GROUP = 'layer' if TRANSFORM.split("-")[0] == 'wavelet' else ('band' if TRANSFORM.split("-")[0] == 'fourier' else 'error')
RERUN = False
SKIP_OPTIMIZE_STEP = False

Name the notebook:
independence_gray_horizVert_wavelet_full_agriVision.ipynb


In [8]:
os.chdir(os.path.join(ROOT_DIR, "utilities"))
from testing import * # If MATLAB is not installed, open utilities and set to False
from plotting import *
os.chdir(CWD)
np.random.seed(0)

In [9]:
from scipy import spatial
from sklearn.decomposition import PCA

In [10]:
group_data_map = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}.pickle'))
group_total_samples = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}-size.pickle'))

In [11]:
if 'fourier' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)#[::3]
elif 'wavelet' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)
elif 'learned' in TRANSFORM:
    nonskewed_df = pd.read_csv(os.path.join(ROOT_DIR, 'learned-filters', 'nonskewed_filter_idxs_df.csv')).set_index(['dataset', 'num_images', 'num_bootstrap'])
    nonskewed_filter_idxs = eval(nonskewed_df.loc[DATA_NAME].sort_values('num_images', ascending=False)['nonskewed_filter_idxs'].iloc[0]) 
    GROUPS = nonskewed_filter_idxs # can set to filter_group_map.keys() to include all prepared filters

In [12]:
n_bootstrap = int(1e5) 
bootstrap_size = int(1e4)


In [13]:

cov_matrix = np.zeros((len(GROUPS), len(GROUPS)))
for _ in tqdm(range(n_bootstrap)):
    X = np.zeros((bootstrap_size, len(GROUPS)))
    for i in range(len(GROUPS)):
        group = GROUPS[i]
        X[:, i] = np.random.choice(group_data_map[group], size=(bootstrap_size), replace=True)
        cov_matrix += np.cov(X, rowvar=False)
cov_matrix /= n_bootstrap

  0%|          | 0/100000 [00:00<?, ?it/s]

In [14]:



# Convert covariance matrix to DataFrame for better formatting
cov_df = pd.DataFrame(cov_matrix, index=GROUPS, columns=GROUPS)
cov_df.to_csv(os.path.join(CWD, "CSVs", f'covariance_matrix.csv'))
cov_df.round(2)

Unnamed: 0,2,3,4,5,6,7,8,9,10
2,1351947.78,-15.82,-10.25,-6.43,-0.87,-0.69,0.19,-0.13,-0.0
3,-15.82,268045.83,-3.72,0.6,0.19,-0.06,-0.02,0.01,0.01
4,-10.25,-3.72,60844.97,0.16,0.41,-0.02,-0.04,-0.01,-0.0
5,-6.43,0.6,0.16,9613.41,-0.04,0.0,0.01,-0.01,-0.0
6,-0.87,0.19,0.41,-0.04,1698.79,0.02,0.0,0.0,0.0
7,-0.69,-0.06,-0.02,0.0,0.02,349.29,0.0,0.0,0.0
8,0.19,-0.02,-0.04,0.01,0.0,0.0,89.87,0.0,-0.0
9,-0.13,0.01,-0.01,-0.01,0.0,0.0,0.0,17.87,-0.0
10,-0.0,0.01,-0.0,-0.0,0.0,0.0,-0.0,-0.0,1.35


In [15]:
corr_matrix = cov_matrix / np.sqrt(np.outer(np.diag(cov_matrix), np.diag(cov_matrix)))
corr_df = pd.DataFrame(corr_matrix, index=GROUPS, columns=GROUPS)
corr_df.round(5)

Unnamed: 0,2,3,4,5,6,7,8,9,10
2,1.0,-3e-05,-4e-05,-6e-05,-2e-05,-3e-05,2e-05,-3e-05,-0.0
3,-3e-05,1.0,-3e-05,1e-05,1e-05,-1e-05,-0.0,0.0,2e-05
4,-4e-05,-3e-05,1.0,1e-05,4e-05,-0.0,-2e-05,-1e-05,-0.0
5,-6e-05,1e-05,1e-05,1.0,-1e-05,0.0,1e-05,-3e-05,-2e-05
6,-2e-05,1e-05,4e-05,-1e-05,1.0,3e-05,1e-05,1e-05,0.0
7,-3e-05,-1e-05,-0.0,0.0,3e-05,1.0,1e-05,4e-05,0.0
8,2e-05,-0.0,-2e-05,1e-05,1e-05,1e-05,1.0,0.0,-0.0
9,-3e-05,0.0,-1e-05,-3e-05,1e-05,4e-05,0.0,1.0,-3e-05
10,-0.0,2e-05,-0.0,-2e-05,0.0,0.0,-0.0,-3e-05,1.0


In [16]:
np.linalg.norm(cov_matrix - np.diag(np.diag(cov_matrix)))  # Remove diagonal elements for visualization

28.71743968858178

In [17]:
pca = PCA()
pca.fit(cov_matrix)

print("Singular values (explained variance):")
print(pca.explained_variance_)

print("\nPrincipal components (eigenvectors):")
eigenvectors = pca.components_
eigenvectors_df = pd.DataFrame(eigenvectors)
eigenvectors_df

Singular values (explained variance):
[2.03222475e+11 7.86242066e+09 3.96490012e+08 9.63269991e+06
 2.89058468e+05 1.14992539e+04 6.74754349e+02 1.98550573e+01
 2.40672936e-32]

Principal components (eigenvectors):


Unnamed: 0,0,1,2,3,4,5,6,7,8
0,0.999652,-0.025777,-0.00561,-0.000888,-0.000157,-3.3e-05,-8e-06,-2e-06,-1.28491e-07
1,0.025565,0.999071,-0.034304,-0.005103,-0.0009,-0.000185,-4.8e-05,-9e-06,-6.74329e-07
2,0.006458,0.033993,0.99902,-0.027171,-0.004659,-0.000959,-0.000247,-4.9e-05,-3.724514e-06
3,0.001185,0.005957,0.02678,0.998926,-0.03655,-0.007244,-0.00186,-0.000372,-2.825031e-05
4,0.00025,0.001256,0.005532,0.035938,0.997805,-0.053686,-0.013132,-0.002602,-0.0001969891
5,6.4e-05,0.000319,0.001404,0.008894,0.05192,0.994273,-0.09147,-0.016638,-0.001255924
6,2.2e-05,0.00011,0.000486,0.00307,0.017408,0.088436,0.990467,-0.103864,-0.007402407
7,7e-06,3.3e-05,0.000146,0.000927,0.005239,0.025508,0.101035,0.991578,-0.07671672
8,1e-06,5e-06,2.2e-05,0.000141,0.000794,0.003861,0.01501,0.075505,0.9970246


In [18]:
cos_dist = np.diag(spatial.distance.cdist(eigenvectors, np.eye(len(GROUPS)), metric='cosine'))
cos_dist


array([0.00034844, 0.00092904, 0.00097959, 0.00107361, 0.00219504,
       0.00572744, 0.00953349, 0.00842168, 0.00297536])