In [1]:
DATA_NAME = 'agriVision-full' 
TRANSFORM = 'fourier'
CHANNEL = 'blue'

In [2]:
path_list = DATA_NAME.split("-") + TRANSFORM.split("-")
if CHANNEL:
    path_list.append(CHANNEL)
print(f"Name the notebook:\nindependence_{'_'.join(path_list[::-1])}.ipynb")
FULL_DATA_NAME='-'.join(path_list)


import git
from pathlib import Path
import os
CWD = os.getcwd()
ROOT_DIR = Path(git.Repo('.', search_parent_directories=True).working_tree_dir)

Path(os.path.join(CWD, "CSVs")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "plots")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "cache")).mkdir(exist_ok=True)

GROUP = 'layer' if TRANSFORM.split("-")[0] == 'wavelet' else ('band' if TRANSFORM.split("-")[0] == 'fourier' else 'error')
RERUN = False
SKIP_OPTIMIZE_STEP = False

Name the notebook:
independence_blue_fourier_full_agriVision.ipynb


In [3]:
os.chdir(os.path.join(ROOT_DIR, "utilities"))
from testing import * # If MATLAB is not installed, open utilities and set to False
from plotting import *
os.chdir(CWD)
np.random.seed(0)

In [4]:
from scipy import spatial
from sklearn.decomposition import PCA

In [5]:
group_data_map = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}.pickle'))
group_total_samples = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}-size.pickle'))

In [6]:
if 'fourier' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)#[::3]
elif 'wavelet' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)
elif 'learned' in TRANSFORM:
    nonskewed_df = pd.read_csv(os.path.join(ROOT_DIR, 'learned-filters', 'nonskewed_filter_idxs_df.csv')).set_index(['dataset', 'num_images', 'num_bootstrap'])
    nonskewed_filter_idxs = eval(nonskewed_df.loc[DATA_NAME].sort_values('num_images', ascending=False)['nonskewed_filter_idxs'].iloc[0]) 
    GROUPS = nonskewed_filter_idxs # can set to filter_group_map.keys() to include all prepared filters

In [7]:
n_bootstrap = int(1e5) 
bootstrap_size = int(1e4)


In [8]:

cov_matrix = np.zeros((len(GROUPS), len(GROUPS)))
for _ in tqdm(range(n_bootstrap)):
    X = np.zeros((bootstrap_size, len(GROUPS)))
    for i in range(len(GROUPS)):
        group = GROUPS[i]
        X[:, i] = np.random.choice(group_data_map[group], size=(bootstrap_size), replace=True)
        cov_matrix += np.cov(X, rowvar=False)
cov_matrix /= n_bootstrap

  0%|          | 0/100000 [00:00<?, ?it/s]

In [9]:



# Convert covariance matrix to DataFrame for better formatting
cov_df = pd.DataFrame(cov_matrix, index=GROUPS, columns=GROUPS)
cov_df.to_csv(os.path.join(CWD, "CSVs", f'covariance_matrix.csv'))
cov_df.round(2)

Unnamed: 0,2,3,4,5,6,7,8,9,10,11,...,24,25,26,27,28,29,30,31,32,33
2,41330.19,0.96,-2.09,-0.25,0.82,0.05,-0.02,-0.47,0.12,-0.36,...,-0.03,-0.01,-0.04,0.03,0.0,0.0,-0.0,-0.0,-0.0,-0.0
3,0.96,45865.11,0.42,0.24,1.41,0.61,0.22,0.66,0.39,0.14,...,-0.04,-0.01,-0.02,-0.0,0.01,0.0,-0.0,0.0,-0.0,0.0
4,-2.09,0.42,36991.68,-0.77,-0.5,-0.18,0.89,-0.32,0.23,0.31,...,-0.1,0.02,0.03,-0.01,0.0,-0.01,0.0,0.0,0.0,-0.0
5,-0.25,0.24,-0.77,19198.37,-0.43,0.24,-0.44,-0.12,-0.25,-0.31,...,0.03,0.01,0.01,0.0,-0.0,-0.0,-0.0,-0.0,0.0,0.0
6,0.82,1.41,-0.5,-0.43,14356.56,-0.11,-0.13,-0.21,-0.24,-0.15,...,-0.01,0.02,0.01,-0.01,0.0,-0.0,0.0,-0.0,-0.0,-0.0
7,0.05,0.61,-0.18,0.24,-0.11,9512.0,-0.11,0.02,0.23,0.11,...,-0.0,0.06,-0.0,0.01,-0.0,0.0,0.0,0.0,-0.0,-0.0
8,-0.02,0.22,0.89,-0.44,-0.13,-0.11,6671.13,-0.1,0.12,0.17,...,-0.01,-0.03,-0.0,0.0,-0.0,-0.0,0.0,-0.0,-0.0,-0.0
9,-0.47,0.66,-0.32,-0.12,-0.21,0.02,-0.1,5965.86,-0.05,0.13,...,0.02,-0.0,-0.01,-0.0,-0.0,0.0,-0.0,0.0,0.0,-0.0
10,0.12,0.39,0.23,-0.25,-0.24,0.23,0.12,-0.05,3809.79,-0.08,...,0.01,-0.0,0.01,-0.01,-0.0,0.0,0.0,-0.0,-0.0,0.0
11,-0.36,0.14,0.31,-0.31,-0.15,0.11,0.17,0.13,-0.08,2815.68,...,0.01,-0.01,-0.0,0.0,-0.0,0.0,-0.0,0.0,-0.0,-0.0


In [10]:
corr_matrix = cov_matrix / np.sqrt(np.outer(np.diag(cov_matrix), np.diag(cov_matrix)))
corr_df = pd.DataFrame(corr_matrix, index=GROUPS, columns=GROUPS)
corr_df.round(5)

Unnamed: 0,2,3,4,5,6,7,8,9,10,11,...,24,25,26,27,28,29,30,31,32,33
2,1.0,2e-05,-5e-05,-1e-05,3e-05,0.0,-0.0,-3e-05,1e-05,-3e-05,...,-1e-05,-0.0,-3e-05,3e-05,1e-05,0.0,-0.0,-2e-05,-0.0,-0.0
3,2e-05,1.0,1e-05,1e-05,5e-05,3e-05,1e-05,4e-05,3e-05,1e-05,...,-2e-05,-0.0,-1e-05,-0.0,3e-05,1e-05,-0.0,1e-05,-0.0,1e-05
4,-5e-05,1e-05,1.0,-3e-05,-2e-05,-1e-05,6e-05,-2e-05,2e-05,3e-05,...,-5e-05,1e-05,3e-05,-1e-05,0.0,-2e-05,1e-05,2e-05,1e-05,-0.0
5,-1e-05,1e-05,-3e-05,1.0,-3e-05,2e-05,-4e-05,-1e-05,-3e-05,-4e-05,...,2e-05,1e-05,2e-05,0.0,-0.0,-1e-05,-0.0,-1e-05,3e-05,0.0
6,3e-05,5e-05,-2e-05,-3e-05,1.0,-1e-05,-1e-05,-2e-05,-3e-05,-2e-05,...,-1e-05,1e-05,2e-05,-2e-05,1e-05,-0.0,1e-05,-1e-05,-1e-05,-0.0
7,0.0,3e-05,-1e-05,2e-05,-1e-05,1.0,-1e-05,0.0,4e-05,2e-05,...,-0.0,4e-05,-1e-05,3e-05,-2e-05,1e-05,0.0,1e-05,-1e-05,-0.0
8,-0.0,1e-05,6e-05,-4e-05,-1e-05,-1e-05,1.0,-2e-05,2e-05,4e-05,...,-1e-05,-2e-05,-0.0,0.0,-0.0,-1e-05,1e-05,-1e-05,-0.0,-1e-05
9,-3e-05,4e-05,-2e-05,-1e-05,-2e-05,0.0,-2e-05,1.0,-1e-05,3e-05,...,3e-05,-0.0,-2e-05,-0.0,-0.0,1e-05,-0.0,2e-05,1e-05,-0.0
10,1e-05,3e-05,2e-05,-3e-05,-3e-05,4e-05,2e-05,-1e-05,1.0,-3e-05,...,2e-05,-0.0,2e-05,-3e-05,-0.0,2e-05,1e-05,-1e-05,-2e-05,1e-05
11,-3e-05,1e-05,3e-05,-4e-05,-2e-05,2e-05,4e-05,3e-05,-3e-05,1.0,...,1e-05,-1e-05,-1e-05,2e-05,-1e-05,2e-05,-1e-05,0.0,-1e-05,-1e-05


In [11]:
np.linalg.norm(cov_matrix - np.diag(np.diag(cov_matrix)))  # Remove diagonal elements for visualization

5.278971529037717

In [12]:
pca = PCA()
pca.fit(cov_matrix)

print("Singular values (explained variance):")
print(pca.explained_variance_)

print("\nPrincipal components (eigenvectors):")
eigenvectors = pca.components_
eigenvectors_df = pd.DataFrame(eigenvectors)
eigenvectors_df

Singular values (explained variance):
[6.61362619e+07 5.34160786e+07 4.24249695e+07 1.14970228e+07
 6.41143363e+06 2.81489751e+06 1.38909932e+06 1.09788191e+06
 4.61393015e+05 4.21611411e+05 2.43898005e+05 1.41192832e+05
 5.37915296e+04 4.80077391e+04 2.87696928e+04 2.10750787e+04
 1.21887460e+04 7.23084775e+03 2.44014602e+03 2.02630444e+03
 1.40972388e+03 1.04668080e+03 4.84947919e+02 1.41743744e+02
 3.00278321e+01 1.58327630e+01 2.25764891e-01 1.07014869e-01
 7.86579780e-03 2.05038209e-03 1.33489897e-05 1.30430770e-23]

Principal components (eigenvectors):


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,22,23,24,25,26,27,28,29,30,31
0,-0.1386532,0.9882167,-0.06228878,-0.01311218,-0.008908219,-0.005564004,-0.003819,-0.003385,-0.002143,-0.001581,...,-7.4e-05,-0.000122,-1.8e-05,-1.4e-05,-1e-06,-1e-06,-3.288818e-07,-1.529247e-07,-1.739401e-08,-1.594458e-09
1,0.9786353,0.1267745,-0.1599365,-0.01847974,-0.01222799,-0.007524118,-0.005133,-0.004571,-0.002872,-0.002127,...,-9.8e-05,-0.000162,-2.5e-05,-1.7e-05,-2e-06,-2e-06,-4.401833e-07,-2.910898e-07,-2.518713e-08,-5.843405e-09
2,0.149287,0.08231974,0.9846314,-0.02875834,-0.0183389,-0.0110022,-0.007402,-0.006611,-0.004139,-0.003042,...,-0.000143,-0.000233,-3.4e-05,-2.6e-05,-3e-06,-2e-06,-5.804294e-07,-2.926715e-07,-2.676517e-08,-8.718976e-09
3,0.01934107,0.01658885,0.02314878,0.9970805,-0.0604665,-0.0225848,-0.013552,-0.011763,-0.007059,-0.005127,...,-0.00023,-0.000384,-5.7e-05,-4.3e-05,-5e-06,-4e-06,-1.01523e-06,-5.847108e-07,-1.443787e-08,-1.016418e-08
4,0.01395168,0.01225151,0.01617389,0.05780511,0.9964205,-0.04487847,-0.022099,-0.018695,-0.010583,-0.00755,...,-0.000336,-0.000555,-8.2e-05,-6.3e-05,-7e-06,-5e-06,-1.376629e-06,-8.618866e-07,-7.860663e-08,-2.085882e-08
5,0.008585683,0.007644744,0.009735475,0.02296036,0.04070376,0.9961917,-0.05259,-0.038884,-0.017602,-0.011942,...,-0.000503,-0.000829,-0.000125,-9.2e-05,-1.1e-05,-8e-06,-2.128281e-06,-1.181786e-06,-1.184011e-07,-3.001361e-08
6,0.005273471,0.004727276,0.005903369,0.01256636,0.01872417,0.0426783,0.983152,-0.169892,-0.028317,-0.016995,...,-0.000646,-0.001074,-0.000159,-0.00012,-1.4e-05,-1e-05,-2.628699e-06,-1.619965e-06,-1.471332e-07,-4.661662e-08
7,0.006339847,0.005661747,0.007112461,0.01471949,0.02140624,0.04319359,0.16341,0.982026,-0.050031,-0.02759,...,-0.000978,-0.001634,-0.000245,-0.000183,-2.2e-05,-1.6e-05,-4.195346e-06,-2.259434e-06,-1.97968e-07,-5.794946e-08
8,0.001133226,0.001010432,0.001263146,0.002526805,0.003487915,0.005752264,0.010165,0.012996,0.836913,-0.020585,...,-0.00042,-0.00071,-0.000103,-8.1e-05,-1e-05,-6e-06,-1.760946e-06,-1.168744e-06,-1.170774e-07,-2.403548e-08
9,0.004954903,0.004460238,0.005552818,0.01100301,0.01515804,0.02501334,0.043259,0.053984,0.536681,-0.111476,...,-0.00203,-0.003379,-0.000503,-0.00038,-4.4e-05,-3.2e-05,-8.571573e-06,-4.857289e-06,-4.687162e-07,-9.480103e-08


In [13]:
cos_dist = np.diag(spatial.distance.cdist(eigenvectors, np.eye(len(GROUPS)), metric='cosine'))
cos_dist = cos_dist = [np.min(row) for row in cos_dist]
cos_dist


[1.1386531898593384,
 0.8732255432933953,
 0.015368639055459488,
 0.0029195063189429993,
 0.00357951824546876,
 0.0038082660732202678,
 0.016848219758969663,
 0.01797370844022994,
 0.1630871348307361,
 1.1114763669492438,
 1.0936510589418666,
 1.047258510325879,
 1.0339670193909745,
 1.0805212629326824,
 1.060525584875347,
 1.0616140091280102,
 1.0328797518486577,
 1.0662371068377468,
 1.0522770946511324,
 0.9951760530434562,
 0.9896006086523231,
 1.055024611348737,
 0.025026890017243453,
 0.959218238095926,
 0.031385533275193556,
 0.030836969496620847,
 0.03887336178401268,
 0.04169358706600823,
 0.03652336265440481,
 0.03585608556073827,
 0.03395855560460459,
 0.032936992658890274]