In [1]:
DATA_NAME = 'agriVision-full' 
TRANSFORM = 'gabor'
CHANNEL = 'blue'
PARAM_CSV = 'gabor_new.csv' # only use for Gabor

In [2]:
path_list = DATA_NAME.split("-") + TRANSFORM.split("-")
if CHANNEL:
    path_list.append(CHANNEL)
print(f"Name the notebook:\nindependence_{'_'.join(path_list[::-1])}.ipynb")
FULL_DATA_NAME='-'.join(path_list)


import git
from pathlib import Path
import os
CWD = os.getcwd()
ROOT_DIR = Path(git.Repo('.', search_parent_directories=True).working_tree_dir)

Path(os.path.join(CWD, "CSVs")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "plots")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "cache")).mkdir(exist_ok=True)

GROUP = 'layer' if TRANSFORM.split("-")[0] == 'wavelet' else ('band' if TRANSFORM.split("-")[0] == 'fourier' else 'error')
RERUN = False
SKIP_OPTIMIZE_STEP = False

Name the notebook:
independence_blue_gabor_full_agriVision.ipynb


In [3]:
os.chdir(os.path.join(ROOT_DIR, "utilities"))
from testing import * # If MATLAB is not installed, open utilities and set to False
from plotting import *
os.chdir(CWD)
np.random.seed(0)

In [4]:
from scipy import spatial
from sklearn.decomposition import PCA

In [5]:
group_data_map = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}.pickle'))
group_total_samples = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}-size.pickle'))
param_df = pd.read_csv(os.path.join(ROOT_DIR, "gabor", PARAM_CSV))

In [6]:
if 'fourier' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)#[::3]
elif 'wavelet' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)
elif 'learned' in TRANSFORM:
    nonskewed_df = pd.read_csv(os.path.join(ROOT_DIR, 'learned-filters', 'nonskewed_filter_idxs_df.csv')).set_index(['dataset', 'num_images', 'num_bootstrap'])
    nonskewed_filter_idxs = eval(nonskewed_df.loc[DATA_NAME].sort_values('num_images', ascending=False)['nonskewed_filter_idxs'].iloc[0]) 
    GROUPS = nonskewed_filter_idxs # can set to filter_group_map.keys() to include all prepared filters
elif 'gabor' in TRANSFORM:
    GROUPS = param_df['index']

In [7]:
n_bootstrap = int(1e5) 
bootstrap_size = int(1e4)


In [8]:

cov_matrix = np.zeros((len(GROUPS), len(GROUPS)))
for _ in tqdm(range(n_bootstrap)):
    X = np.zeros((bootstrap_size, len(GROUPS)))
    for i in range(len(GROUPS)):
        group = GROUPS[i]
        if len(group_data_map[group]) < 100:
            X[:, i] = np.full(bootstrap_size, np.nan)
        else:
            X[:, i] = np.random.choice(group_data_map[group], size=(bootstrap_size), replace=True)
    cov_matrix += np.cov(X, rowvar=False)
cov_matrix /= n_bootstrap

  0%|          | 0/100000 [00:00<?, ?it/s]

In [9]:



# Convert covariance matrix to DataFrame for better formatting
cov_df = pd.DataFrame(cov_matrix, index=GROUPS, columns=GROUPS)
cov_df = cov_df.dropna(how='all').dropna(axis=1, how='all')
cov_df.to_csv(os.path.join(CWD, "CSVs", f'covariance_matrix.csv'))
cov_df.round(2)

index,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,122.43,0.0,-0.0,0.0,-0.0,-0.0,0.0,-0.0,0.0,-0.0,...,-0.0,0.0,0.0,0.0,0.0,-0.0,-0.0,0.0,0.0,0.0
1,0.0,41.23,0.0,-0.0,-0.0,-0.0,-0.0,0.0,0.0,-0.0,...,0.0,-0.0,0.0,0.0,-0.0,-0.0,-0.0,0.0,0.0,0.0
2,-0.0,0.0,31.38,0.0,0.0,0.0,0.0,0.0,0.0,-0.0,...,-0.0,-0.0,0.0,-0.0,0.0,0.0,-0.0,-0.0,0.0,0.0
3,0.0,-0.0,0.0,22.12,-0.0,0.0,-0.0,0.0,-0.0,0.0,...,-0.0,-0.0,0.0,0.0,0.0,-0.0,0.0,0.0,-0.0,0.0
4,-0.0,-0.0,0.0,-0.0,5.82,-0.0,-0.0,-0.0,-0.0,0.0,...,0.0,0.0,-0.0,-0.0,0.0,-0.0,0.0,0.0,0.0,-0.0
5,-0.0,-0.0,0.0,0.0,-0.0,0.95,-0.0,-0.0,0.0,-0.0,...,0.0,-0.0,0.0,-0.0,0.0,-0.0,0.0,0.0,0.0,-0.0
6,0.0,-0.0,0.0,-0.0,-0.0,-0.0,0.54,0.0,0.0,0.0,...,0.0,-0.0,-0.0,-0.0,0.0,0.0,0.0,0.0,-0.0,0.0
7,-0.0,0.0,0.0,0.0,-0.0,-0.0,0.0,96.85,0.0,-0.0,...,-0.0,0.0,-0.0,-0.0,-0.0,0.0,-0.0,0.0,0.0,-0.0
8,0.0,0.0,0.0,-0.0,-0.0,0.0,0.0,0.0,51.27,0.0,...,-0.0,0.0,0.0,0.0,0.0,-0.0,0.0,0.0,-0.0,-0.0
9,-0.0,-0.0,-0.0,0.0,0.0,-0.0,0.0,-0.0,0.0,31.01,...,0.0,-0.0,-0.0,0.0,-0.0,0.0,-0.0,-0.0,-0.0,0.0


In [10]:
corr_matrix = cov_df / np.sqrt(np.outer(np.diag(cov_df), np.diag(cov_df)))
corr_matrix.round(5)

index,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1.0,5e-05,-3e-05,5e-05,-1e-05,-4e-05,0.0,-3e-05,3e-05,-1e-05,...,-5e-05,5e-05,1e-05,2e-05,3e-05,-6e-05,-1e-05,4e-05,3e-05,4e-05
1,5e-05,1.0,7e-05,-4e-05,-3e-05,-6e-05,-1e-05,4e-05,0.0,-3e-05,...,0.0,-2e-05,1e-05,4e-05,-3e-05,-1e-05,-1e-05,0.0,1e-05,2e-05
2,-3e-05,7e-05,1.0,3e-05,1e-05,2e-05,4e-05,2e-05,3e-05,-0.0,...,-0.0,-1e-05,4e-05,-1e-05,5e-05,1e-05,-8e-05,-4e-05,4e-05,2e-05
3,5e-05,-4e-05,3e-05,1.0,-3e-05,4e-05,-1e-05,0.0,-4e-05,2e-05,...,-4e-05,-3e-05,1e-05,3e-05,2e-05,-0.0,3e-05,1e-05,-3e-05,0.0
4,-1e-05,-3e-05,1e-05,-3e-05,1.0,-1e-05,-0.0,-1e-05,-4e-05,4e-05,...,0.0,4e-05,-0.0,-3e-05,1e-05,-6e-05,6e-05,3e-05,2e-05,-3e-05
5,-4e-05,-6e-05,2e-05,4e-05,-1e-05,1.0,-0.0,-3e-05,4e-05,-1e-05,...,1e-05,-4e-05,4e-05,-2e-05,1e-05,-0.0,5e-05,2e-05,5e-05,-1e-05
6,0.0,-1e-05,4e-05,-1e-05,-0.0,-0.0,1.0,1e-05,1e-05,4e-05,...,3e-05,-1e-05,-3e-05,-1e-05,1e-05,2e-05,3e-05,0.0,-2e-05,0.0
7,-3e-05,4e-05,2e-05,0.0,-1e-05,-3e-05,1e-05,1.0,1e-05,-2e-05,...,-1e-05,1e-05,-4e-05,-2e-05,-6e-05,1e-05,-4e-05,2e-05,2e-05,-3e-05
8,3e-05,0.0,3e-05,-4e-05,-4e-05,4e-05,1e-05,1e-05,1.0,2e-05,...,-2e-05,1e-05,2e-05,1e-05,2e-05,-5e-05,4e-05,3e-05,-2e-05,-2e-05
9,-1e-05,-3e-05,-0.0,2e-05,4e-05,-1e-05,4e-05,-2e-05,2e-05,1.0,...,0.0,-3e-05,-2e-05,1e-05,-1e-05,4e-05,-2e-05,-1e-05,-4e-05,1e-05


In [11]:
np.linalg.norm(cov_df - np.diag(np.diag(cov_df)))  # Remove diagonal elements for visualization

0.03824361510538293

In [12]:
pca = PCA()
pca.fit(cov_df)

print("Singular values (explained variance):")
print(pca.explained_variance_)

print("\nPrincipal components (eigenvectors):")
eigenvectors = pca.components_
eigenvectors_df = pd.DataFrame(eigenvectors)
eigenvectors_df

Singular values (explained variance):
[4.83234318e+02 3.57930037e+02 2.71162822e+02 2.23791779e+02
 1.92490211e+02 1.24785342e+02 1.14642492e+02 1.02185353e+02
 8.49513631e+01 6.26644053e+01 5.55716189e+01 4.30925638e+01
 3.95098163e+01 2.55782906e+01 2.37593132e+01 2.17677598e+01
 1.45261450e+01 1.16198734e+01 1.03618976e+01 7.73251949e+00
 6.19626785e+00 4.93476685e+00 2.53037787e+00 2.29165239e+00
 1.56418920e+00 1.38436709e+00 7.81468495e-01 5.49690634e-01
 4.89570105e-01 3.70792262e-01 9.29000568e-02 4.07612708e-02
 3.56953337e-02 2.56180982e-02 2.16909055e-02 1.93484354e-02
 1.25135014e-02 5.83837089e-03 2.88706265e-03 1.24346719e-03
 2.44801643e-04 9.14838139e-29]

Principal components (eigenvectors):


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
0,-0.071712,-0.006418,-0.004721,-0.003227,-0.000839,-0.000137,-7.8e-05,-0.02633,-0.008437,-0.004652,...,-0.000676,-0.000131,-1.1e-05,0.995712,-0.007884,-0.002639,-0.003681,-0.001141,-0.000152,-1.8e-05
1,0.99193,-0.008047,-0.005858,-0.003947,-0.001016,-0.000168,-9.4e-05,-0.04672,-0.010809,-0.005768,...,-0.000842,-0.000158,-1.4e-05,0.065211,-0.010001,-0.003254,-0.0045,-0.001386,-0.00018,-1.9e-05
2,0.068982,-0.009515,-0.006811,-0.004511,-0.001138,-0.000186,-0.000109,-0.121513,-0.013146,-0.006624,...,-0.000933,-0.000185,-1.7e-05,0.033997,-0.012059,-0.00367,-0.005155,-0.001595,-0.000207,-2.5e-05
3,0.043512,-0.011322,-0.007893,-0.005246,-0.001313,-0.000217,-0.000122,0.975672,-0.01613,-0.007801,...,-0.00108,-0.000211,-2e-05,0.026574,-0.014762,-0.004238,-0.006003,-0.001807,-0.000234,-2.8e-05
4,0.041732,-0.016036,-0.010948,-0.007204,-0.00179,-0.000289,-0.000167,0.15653,-0.023531,-0.010781,...,-0.001466,-0.000279,-2.5e-05,0.027874,-0.021163,-0.005777,-0.008248,-0.002483,-0.000323,-3.6e-05
5,0.014261,-0.01374,-0.008691,-0.005463,-0.001301,-0.000213,-0.000115,0.025913,-0.023566,-0.008547,...,-0.001078,-0.000216,-1.7e-05,0.010792,-0.020094,-0.004313,-0.006345,-0.001815,-0.000237,-2.5e-05
6,0.01814,-0.021036,-0.012853,-0.007987,-0.001916,-0.000313,-0.000177,0.031539,-0.03783,-0.012665,...,-0.00158,-0.000296,-2.8e-05,0.014112,-0.031659,-0.006331,-0.009275,-0.002634,-0.000338,-3.9e-05
7,0.021084,-0.030849,-0.018347,-0.011127,-0.002629,-0.000415,-0.000243,0.034709,-0.061368,-0.017929,...,-0.002134,-0.000416,-3.5e-05,0.016561,-0.049454,-0.008788,-0.012966,-0.003636,-0.000467,-5.3e-05
8,0.019201,-0.041745,-0.022586,-0.013329,-0.003038,-0.000493,-0.000281,0.029599,-0.108183,-0.022123,...,-0.002474,-0.000482,-4.3e-05,0.01532,-0.077858,-0.010374,-0.015592,-0.004248,-0.000544,-5.9e-05
9,0.010982,-0.052903,-0.022081,-0.011916,-0.00258,-0.000408,-0.000236,0.015841,0.957827,-0.021483,...,-0.002104,-0.000405,-3.5e-05,0.008983,-0.254157,-0.009099,-0.014125,-0.003561,-0.000459,-5.2e-05


In [13]:
cos_dist = spatial.distance.cdist(eigenvectors, np.eye(cov_df.shape[0]), metric='cosine')
cos_dist =  [np.min(row) for row in cos_dist]
cos_dist


[0.004288152311995752,
 0.008069770891632744,
 0.01397171868044611,
 0.024328111544978714,
 0.022746107784474834,
 0.042859606436250086,
 0.06838296933984522,
 0.056992072590777765,
 0.031559606927627826,
 0.04217316004137284,
 0.05016476119581381,
 0.10578356504840636,
 0.11771858881902109,
 0.06478617183629265,
 0.23199948430252892,
 0.26901920098094123,
 0.031824577201037596,
 0.09320534629080968,
 0.10965424005568125,
 0.04985169653105437,
 0.06392436128668644,
 0.05246151040445479,
 0.17790298287389172,
 0.2006665267002684,
 0.2781700145285918,
 0.31136901628474756,
 0.036381077742456025,
 0.21006761318890455,
 0.3018753601243078,
 0.13784840111823182,
 0.01611334065115655,
 0.16772886084639715,
 0.21232411457592748,
 0.11165638416609291,
 0.2675142653330004,
 0.24672252098957959,
 0.4123061057658858,
 0.1347291870112176,
 0.1880921756360514,
 0.20987351903042906,
 0.217391844779239,
 0.22621085231032456]