In [1]:
DATA_NAME = 'coco-indoor' 
TRANSFORM = 'learned'
CHANNEL = ''

In [2]:
path_list = DATA_NAME.split("-") + TRANSFORM.split("-")
if CHANNEL:
    path_list.append(CHANNEL)
print(f"Name the notebook:\nindependence_{'_'.join(path_list[::-1])}.ipynb")
FULL_DATA_NAME='-'.join(path_list)


import git
from pathlib import Path
import os
CWD = os.getcwd()
ROOT_DIR = Path(git.Repo('.', search_parent_directories=True).working_tree_dir)

Path(os.path.join(CWD, "CSVs")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "plots")).mkdir(exist_ok=True)
Path(os.path.join(CWD, "cache")).mkdir(exist_ok=True)

GROUP = 'layer' if TRANSFORM.split("-")[0] == 'wavelet' else ('band' if TRANSFORM.split("-")[0] == 'fourier' else 'error')
RERUN = False
SKIP_OPTIMIZE_STEP = False

Name the notebook:
independence_learned_indoor_coco.ipynb


In [3]:
os.chdir(os.path.join(ROOT_DIR, "utilities"))
from testing import * # If MATLAB is not installed, open utilities and set to False
from plotting import *
os.chdir(CWD)
np.random.seed(0)

In [4]:
from scipy import spatial
from sklearn.decomposition import PCA

In [5]:
group_data_map = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}.pickle'))
group_total_samples = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data", f'{FULL_DATA_NAME}-size.pickle'))

In [6]:
if 'fourier' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)#[::3]
elif 'wavelet' in TRANSFORM:
    GROUPS = np.arange(2, sorted(group_data_map)[-1] + 1)
elif 'learned' in TRANSFORM:
    nonskewed_df = pd.read_csv(os.path.join(ROOT_DIR, 'learned-filters', 'nonskewed_filter_idxs_df.csv')).set_index(['dataset', 'num_images', 'num_bootstrap'])
    nonskewed_filter_idxs = eval(nonskewed_df.loc[DATA_NAME].sort_values('num_images', ascending=False)['nonskewed_filter_idxs'].iloc[0]) 
    GROUPS = nonskewed_filter_idxs # can set to filter_group_map.keys() to include all prepared filters

In [7]:
n_bootstrap = int(1e5) 
bootstrap_size = int(1e4)


In [8]:

cov_matrix = np.zeros((len(GROUPS), len(GROUPS)))
for _ in tqdm(range(n_bootstrap)):
    X = np.zeros((bootstrap_size, len(GROUPS)))
    for i in range(len(GROUPS)):
        group = GROUPS[i]
        X[:, i] = np.random.choice(group_data_map[group], size=(bootstrap_size), replace=True)
        cov_matrix += np.cov(X, rowvar=False)
cov_matrix /= n_bootstrap

  0%|          | 0/100000 [00:00<?, ?it/s]

In [9]:



# Convert covariance matrix to DataFrame for better formatting
cov_df = pd.DataFrame(cov_matrix, index=GROUPS, columns=GROUPS)
cov_df.to_csv(os.path.join(CWD, "CSVs", f'covariance_matrix.csv'))
cov_df.round(2)

Unnamed: 0,0,2,3,4,5,6,7,8,9,10,...,54,55,56,57,58,59,60,61,62,63
0,288.91,-0.00,0.00,0.02,0.00,-0.00,-0.0,0.0,-0.01,0.01,...,-0.0,-0.0,-0.0,-0.0,0.0,0.00,0.00,0.00,0.00,0.00
2,-0.00,372.18,0.00,-0.02,-0.01,-0.01,-0.0,-0.0,-0.00,-0.01,...,0.0,-0.0,0.0,0.0,0.0,-0.00,-0.00,0.00,0.00,0.00
3,0.00,0.00,160.71,0.01,-0.01,0.00,0.0,-0.0,-0.00,0.00,...,0.0,0.0,-0.0,0.0,-0.0,0.00,-0.00,-0.00,0.00,-0.00
4,0.02,-0.02,0.01,495.39,-0.00,-0.01,-0.0,-0.0,-0.01,0.00,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.00,0.00,0.00,-0.00,0.00
5,0.00,-0.01,-0.01,-0.00,299.32,-0.01,-0.0,0.0,0.01,0.01,...,-0.0,0.0,0.0,-0.0,-0.0,0.00,0.00,0.00,0.00,-0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59,0.00,-0.00,0.00,-0.00,0.00,0.00,-0.0,0.0,0.00,-0.00,...,-0.0,-0.0,-0.0,-0.0,-0.0,30.82,-0.00,0.00,0.00,-0.00
60,0.00,-0.00,-0.00,0.00,0.00,-0.00,0.0,-0.0,-0.00,0.00,...,-0.0,0.0,-0.0,-0.0,0.0,-0.00,1.77,0.00,0.00,-0.00
61,0.00,0.00,-0.00,0.00,0.00,0.00,0.0,0.0,-0.00,0.00,...,-0.0,-0.0,0.0,0.0,0.0,0.00,0.00,5.33,0.00,-0.00
62,0.00,0.00,0.00,-0.00,0.00,-0.00,-0.0,0.0,-0.00,-0.00,...,0.0,-0.0,-0.0,-0.0,0.0,0.00,0.00,0.00,9.64,-0.00


In [10]:
corr_matrix = cov_matrix / np.sqrt(np.outer(np.diag(cov_matrix), np.diag(cov_matrix)))
corr_df = pd.DataFrame(corr_matrix, index=GROUPS, columns=GROUPS)
corr_df.round(5)

Unnamed: 0,0,2,3,4,5,6,7,8,9,10,...,54,55,56,57,58,59,60,61,62,63
0,1.00000,-0.00001,0.00002,0.00005,0.00000,-0.00000,-0.00000,0.00003,-0.00004,0.00003,...,-0.00004,-0.00001,-0.00001,-0.00001,0.00001,0.00001,0.00001,0.00001,0.00000,0.00000
2,-0.00001,1.00000,0.00001,-0.00005,-0.00004,-0.00004,-0.00000,-0.00004,-0.00000,-0.00004,...,0.00000,-0.00002,0.00001,0.00000,0.00000,-0.00001,-0.00001,0.00000,0.00000,0.00001
3,0.00002,0.00001,1.00000,0.00003,-0.00002,0.00002,0.00003,-0.00004,-0.00002,0.00002,...,0.00001,0.00002,-0.00001,0.00000,-0.00000,0.00001,-0.00001,-0.00000,0.00001,-0.00001
4,0.00005,-0.00005,0.00003,1.00000,-0.00001,-0.00003,-0.00001,-0.00003,-0.00004,0.00000,...,-0.00000,-0.00002,-0.00000,-0.00000,-0.00000,-0.00001,0.00001,0.00001,-0.00001,0.00001
5,0.00000,-0.00004,-0.00002,-0.00001,1.00000,-0.00005,-0.00001,0.00002,0.00004,0.00003,...,-0.00001,0.00000,0.00000,-0.00001,-0.00002,0.00000,0.00001,0.00000,0.00001,-0.00000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59,0.00001,-0.00001,0.00001,-0.00001,0.00000,0.00000,-0.00001,0.00001,0.00000,-0.00001,...,-0.00004,-0.00001,-0.00001,-0.00003,-0.00000,1.00000,-0.00002,0.00003,0.00003,-0.00000
60,0.00001,-0.00001,-0.00001,0.00001,0.00001,-0.00001,0.00001,-0.00000,-0.00000,0.00001,...,-0.00005,0.00004,-0.00000,-0.00002,0.00001,-0.00002,1.00000,0.00001,0.00002,-0.00001
61,0.00001,0.00000,-0.00000,0.00001,0.00000,0.00001,0.00001,0.00000,-0.00000,0.00000,...,-0.00001,-0.00001,0.00000,0.00004,0.00002,0.00003,0.00001,1.00000,0.00001,-0.00001
62,0.00000,0.00000,0.00001,-0.00001,0.00001,-0.00000,-0.00000,0.00000,-0.00000,-0.00000,...,0.00000,-0.00000,-0.00001,-0.00002,0.00000,0.00003,0.00002,0.00001,1.00000,-0.00001


In [11]:
np.linalg.norm(cov_matrix - np.diag(np.diag(cov_matrix)))  # Remove diagonal elements for visualization

0.19658497105907335

In [12]:
pca = PCA()
pca.fit(cov_matrix)

print("Singular values (explained variance):")
print(pca.explained_variance_)

print("\nPrincipal components (eigenvectors):")
eigenvectors = pca.components_
eigenvectors_df = pd.DataFrame(eigenvectors)
eigenvectors_df

Singular values (explained variance):
[1.00764661e+04 3.96973175e+03 3.33363984e+03 3.11511033e+03
 2.23420342e+03 1.45253925e+03 1.35707132e+03 1.30198414e+03
 1.05150301e+03 6.91781978e+02 6.65257037e+02 5.47334107e+02
 4.17622704e+02 3.59066889e+02 3.39061344e+02 3.15409909e+02
 3.03336544e+02 2.35284468e+02 2.24922046e+02 1.95837588e+02
 1.86395303e+02 1.80652978e+02 1.76355774e+02 1.72162705e+02
 1.62102440e+02 1.60568402e+02 1.60016064e+02 1.56567202e+02
 1.53010714e+02 1.41098474e+02 1.29563434e+02 1.23274207e+02
 1.15368715e+02 1.08166983e+02 9.78958870e+01 8.42997246e+01
 8.02962444e+01 7.46053129e+01 7.33210785e+01 6.96314111e+01
 6.52654586e+01 6.11747332e+01 4.89327906e+01 3.26445059e+01
 3.02085138e+01 2.67279843e+01 2.50748835e+01 2.20971092e+01
 1.90144617e+01 1.68559754e+01 1.36946630e+01 1.04058564e+01
 7.39194913e+00 4.81343881e+00 4.30254884e+00 3.64108379e+00
 2.99261533e+00 1.95313780e+00 9.36148067e-01 2.78192824e-01
 3.81538157e-02 1.48596874e-27]

Principal comp

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,52,53,54,55,56,57,58,59,60,61
0,-0.006581,-0.009533,-0.003285,-0.016308,-0.006882,-0.002183,-0.003861,-0.000256,-0.002071,-0.002386,...,-0.000445,-0.000307,-0.000893,-0.000278,-0.000344,-0.000609,-0.000035,-0.000104,-0.000191,-0.000024
1,-0.011792,-0.023615,-0.004833,0.994076,-0.012824,-0.003133,-0.005794,-0.000356,-0.002947,-0.003376,...,-0.000610,-0.000428,-0.001226,-0.000382,-0.000471,-0.000836,-0.000047,-0.000143,-0.000261,-0.000033
2,-0.012780,-0.030508,-0.004807,0.062337,-0.013968,-0.003070,-0.005815,-0.000342,-0.002882,-0.003355,...,-0.000589,-0.000405,-0.001188,-0.000374,-0.000458,-0.000810,-0.000048,-0.000140,-0.000252,-0.000033
3,-0.022408,-0.059787,-0.008037,0.073951,-0.024609,-0.005097,-0.009740,-0.000564,-0.004768,-0.005537,...,-0.000976,-0.000684,-0.001970,-0.000620,-0.000759,-0.001344,-0.000077,-0.000232,-0.000419,-0.000054
4,-0.032683,0.995139,-0.008675,0.027267,-0.038464,-0.005307,-0.010760,-0.000579,-0.004934,-0.005798,...,-0.000989,-0.000693,-0.001998,-0.000621,-0.000765,-0.001363,-0.000078,-0.000233,-0.000423,-0.000054
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57,-0.006455,-0.005012,-0.011642,-0.003766,-0.006228,-0.017123,-0.010144,-0.481112,-0.018155,-0.015907,...,-0.108009,-0.229288,-0.043924,-0.323641,-0.175774,-0.069105,0.028489,0.109500,0.686633,0.019765
58,0.008562,0.006646,0.015410,0.004997,0.008261,0.022619,0.013440,0.286213,0.023965,0.021016,...,0.123614,0.204760,0.056471,0.243963,0.174640,0.085312,-0.081272,-0.460089,0.664060,-0.055547
59,0.006385,0.004957,0.011483,0.003727,0.006162,0.016821,0.010016,0.157397,0.017819,0.015638,...,0.084705,0.126113,0.041297,0.142217,0.112068,0.060912,-0.236777,0.856413,0.233922,-0.149412
60,0.001273,0.000989,0.002289,0.000743,0.001228,0.003353,0.001996,0.028648,0.003550,0.003115,...,0.016419,0.023635,0.008178,0.026284,0.021265,0.011968,0.794272,0.075087,0.039115,-0.597626


In [13]:
cos_dist = spatial.distance.cdist(eigenvectors, np.eye(len(GROUPS)), metric='cosine')
cos_dist = np.sort(np.concatenate(cos_dist))[:len(GROUPS)]
cos_dist


array([0.00052424, 0.00486145, 0.00592376, 0.01143513, 0.01299401,
       0.01384904, 0.02708178, 0.03374381, 0.03479699, 0.04015138,
       0.04094864, 0.05744657, 0.05966597, 0.0685469 , 0.07465679,
       0.10853526, 0.11275392, 0.11373731, 0.11571864, 0.14017556,
       0.14358665, 0.14407174, 0.15099477, 0.15425202, 0.15493839,
       0.16014157, 0.17353101, 0.18441112, 0.19445354, 0.20181248,
       0.20196339, 0.20572806, 0.21038137, 0.21457999, 0.22149172,
       0.22176252, 0.23094828, 0.23496332, 0.24124888, 0.24156388,
       0.24424054, 0.246008  , 0.24948247, 0.25404592, 0.26810065,
       0.27054563, 0.2718783 , 0.27784511, 0.28519084, 0.30028618,
       0.31336738, 0.31930982, 0.32473402, 0.33593951, 0.36183751,
       0.3666447 , 0.36859495, 0.37239763, 0.37797668, 0.39966145,
       0.44743503, 0.45301537])