In [1]:
import os
import git
from pathlib import Path
from typing import List
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import plotly.express as px
import ipywidgets as widgets
from IPython.display import display
import numpy as np
import plotly.graph_objects as go
from IPython.display import clear_output
import scipy
from scipy import stats
from scipy.spatial import ConvexHull
import pylustrator
from scipy.spatial import Delaunay
from scipy.spatial import distance
from sklearn.decomposition import PCA

ROOT_DIR = Path(git.Repo('.', search_parent_directories=True).working_tree_dir)

In [2]:
def find_cov_matrix(root_dir: str) -> List[str]:
    root_path = Path(root_dir)
    if not root_path.exists():
        raise FileNotFoundError(f"Directory not found: {root_dir}")

    master_df_paths = []
    for current_dir, _, files in os.walk(root_path):
        if 'covariance_matrix.csv' in files:
            master_df_path = Path(os.path.join(current_dir, 'covariance_matrix.csv'))
            master_df_paths.append(str(master_df_path.absolute()))
    return master_df_paths


all_cov_paths = find_cov_matrix(os.path.join(ROOT_DIR, "results", "case-studies"))
all_cov_dfs = []




for path in all_cov_paths:
    if 'scaleTesting' in path:
        continue
    cov_master_df = pd.DataFrame(columns=['dataset', 'transform', 'subset', 'channel', 'orientation', 'dataset_type'], index=[0])

    parts = Path(path).parts[-7:]
    if parts[0] == 'case-studies':
        parts = parts[1:]
    elif parts[0] == 'results':
        parts = parts[2:]
    if "MRI" in path:
        dataset, slice, transform, orientation, _, _ = parts
        cov_master_df['dataset'] = dataset
        cov_master_df['transform'] = transform
        cov_master_df['subset'] = slice
        cov_master_df['channel'] = np.nan
        cov_master_df['orientation'] = orientation
       
    elif len(parts) > 6:
        dataset, subset, transform, orientation, channel, _, _ = parts
        cov_master_df['dataset'] = dataset
        cov_master_df['transform'] = transform
        cov_master_df['subset'] = subset
        cov_master_df['channel'] = channel
        cov_master_df['orientation'] = orientation
       
    elif "learned" in path:
        dataset, subset, transform, _, _ = parts
        cov_master_df['dataset'] = dataset
        cov_master_df['transform'] = transform
        cov_master_df['subset'] = subset
        cov_master_df = cov_master_df.rename(columns={'filter_group' : 'orientation'})
        cov_master_df['channel'] = np.nan
       

    else:
        dataset, size, transform, channel, _, _ = parts
        
        cov_master_df['dataset'] = dataset
        cov_master_df['transform'] = transform
        cov_master_df['subset'] = size
        cov_master_df['channel'] = channel
        cov_master_df['orientation'] = np.nan
       
    
    if dataset in ['pastis', 'agriVision', 'spaceNet']:
        cov_master_df['dataset_type'] = 'remote sensing'
    elif dataset in ['syntheticMRI2D', 'syntheticMRI3D']:
        cov_master_df['dataset_type'] = 'medical'
    elif dataset in ['coco', 'segmentAnything', 'standardTesting']:
        cov_master_df['dataset_type'] = 'natural'
    # elif dataset in ['standardTesting']:
    #     master_df['dataset_type'] = 'classical'
    cov_matrix = pd.read_csv(path[:-26] + "CSVs" + os.sep + "covariance_matrix.csv").drop(columns=['Unnamed: 0'], axis = 1).to_numpy()
    cov_master_df["fro_norm"] = np.linalg.norm(cov_matrix - np.diag(np.diag(cov_matrix)))
    pca = PCA()
    pca.fit(cov_matrix)
    eigenvectors = pca.components_
    cov_master_df["cosine_dist"] = [np.diag(distance.cdist(eigenvectors, np.eye(eigenvectors.shape[0]), metric='cosine'))]
    all_cov_dfs.append(cov_master_df)
    
cov_main_df = pd.concat(all_cov_dfs)
