# Compute Connectome Harmonics

In [1]:
%load_ext autoreload
%autoreload 2

import sys
import os
from pathlib import Path

# Dynamically set the working directory to the project root
project_root = Path.cwd().parent.parent
os.chdir(project_root)
sys.path.append(os.path.join(os.getcwd(), 'src'))

import ch
from ch.settings import load_settings
from ch.data_handling.io import load_matlab_file, get_nets_and_ages, backup_dataframe
from ch.data_handling.metadata import load_metadata

# Load settings
settings = load_settings()

# Load metadata - pass settings object
subject_metadata = load_metadata(settings, table='subjects')


15:02:03 - ch.data_handling.metadata - INFO - Loading subjects metadata from metadata/subject_metadata.parquet
15:02:03 - ch.data_handling.metadata - INFO - Loaded metadata for 594 subjects


In [3]:

# ch.data_handling.metadata.delete_metadata_column(settings, 'atlas', 'subjects')
subject_metadata = load_metadata(settings, table='subjects')
subject_metadata.head(20)

15:02:06 - ch.data_handling.metadata - INFO - Loading subjects metadata from metadata/subject_metadata.parquet
15:02:06 - ch.data_handling.metadata - INFO - Loaded metadata for 594 subjects


Unnamed: 0_level_0,age,dataset,scan_type,n_regions,has_yeo_labels,created_at,updated_at
subject_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,24,camcan,DTI,376,True,2025-09-05 14:10:52.072141,2025-09-05 15:01:31.558620
2,28,camcan,DTI,376,True,2025-09-05 14:10:52.072141,2025-09-05 15:01:31.558620
3,28,camcan,DTI,376,True,2025-09-05 14:10:52.072141,2025-09-05 15:01:31.558620
4,23,camcan,DTI,376,True,2025-09-05 14:10:52.072141,2025-09-05 15:01:31.558620
5,22,camcan,DTI,376,True,2025-09-05 14:10:52.072141,2025-09-05 15:01:31.558620
6,25,camcan,DTI,376,True,2025-09-05 14:10:52.072141,2025-09-05 15:01:31.558620
7,18,camcan,DTI,376,True,2025-09-05 14:10:52.072141,2025-09-05 15:01:31.558620
8,25,camcan,DTI,376,True,2025-09-05 14:10:52.072141,2025-09-05 15:01:31.558620
9,28,camcan,DTI,376,True,2025-09-05 14:10:52.072141,2025-09-05 15:01:31.558620
10,25,camcan,DTI,376,True,2025-09-05 14:10:52.072141,2025-09-05 15:01:31.558620


In [None]:
# Example: Using instantiate_metadata (will show warning since metadata exists)
print("🏗️ Example: Using instantiate_metadata function")

try:
    # This will fail because metadata already exists
    ch.data_handling.metadata.instantiate_metadata(settings, metadata)
except FileExistsError as e:
    print(f"❌ Expected error: {e}")
    print("✅ Safety check working correctly!")

print("\n📝 To use instantiate_metadata, you would need to delete the existing file first:")
print("   settings.metadata_parquet.unlink()  # Delete existing file")
print("   ch.data_handling.metadata.instantiate_metadata(settings, new_df)")


In [None]:
import ch

camcan_metadata = ch.data_handling.metadata.extract_camcan_metadata(settings)
print(camcan_metadata.info())
print(metadata.head())


# Example: Using the simplified update_metadata function
print("📝 Example: Using the simplified update_metadata function")

# Extract metadata
camcan_metadata = ch.data_handling.metadata.extract_camcan_metadata(settings)

# Update metadata with the DataFrame - simple and clean!
# Note: This would now use instantiate_metadata() for creating new metadata
# or update_metadata() for adding/updating columns
ch.data_handling.metadata.update_metadata(settings, camcan_metadata, 'age')

print("✅ Metadata updated successfully!")
print(f"   Shape: {camcan_metadata.shape}")
print(f"   Columns: {list(camcan_metadata.columns)}")

camcan_metadata = ch.data_handling.metadata.extract_camcan_metadata(settings)
print(camcan_metadata.info())
print(metadata.head())



In [None]:
import ch

ch.data_handling.metadata.create_metadata_summary(settings)


In [None]:
# Example: Adding and then deleting a column
import pandas as pd
import numpy as np

print("🗑️ Example: Adding and deleting a column")

# First, add a test column
test_data = pd.DataFrame({
    'subject_id': [1, 2, 3, 4, 5],
    'test_score': np.random.normal(50, 10, 5)
})

print("Adding test column...")
ch.data_handling.metadata.update_metadata(settings, test_data, 'test_score')

# Check it was added
metadata_with_test = ch.data_handling.metadata.load_metadata(settings)
print(f"✅ Columns after adding: {list(metadata_with_test.columns)}")

# Now delete it
print("\nDeleting test column...")
ch.data_handling.metadata.delete_metadata_column(settings, 'test_score')

# Check it was deleted
metadata_after_delete = ch.data_handling.metadata.load_metadata(settings)
print(f"✅ Columns after deleting: {list(metadata_after_delete.columns)}")

# Try to delete a critical column (should fail)
print("\nTrying to delete critical column 'age'...")
try:
    ch.data_handling.metadata.delete_metadata_column(settings, 'age')
except ValueError as e:
    print(f"❌ Expected error: {e}")
    print("✅ Safety check working correctly!")


In [None]:
# Example: Using utility functions for metadata management
print("🔧 Example: Using utility functions")

# Create a backup before making changes
print("Creating metadata backup...")
backup_path = ch.data_handling.metadata.backup_metadata(settings)
print(f"✅ Backup created: {backup_path}")

# Get info for a specific subject
print("\nGetting info for subject 1:")
subject_info = ch.data_handling.metadata.get_subject_info(settings, 1)
print(subject_info)

# Get statistics for the age column
print("\nAge column statistics:")
age_stats = ch.data_handling.metadata.get_column_stats(settings, 'age')
for key, value in age_stats.items():
    print(f"  {key}: {value}")

# Get statistics for a categorical column
print("\nDataset column statistics:")
dataset_stats = ch.data_handling.metadata.get_column_stats(settings, 'dataset')
for key, value in dataset_stats.items():
    print(f"  {key}: {value}")


In [None]:
metadata.head()

import ch 

camcan_metadata = ch.data_handling.metadata.extract_camcan_metadata(settings)
camcan_metadata.info()


In [None]:
# Fix the column alignment issue with metadata.info()
import pandas as pd

# Set pandas display options for better formatting
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

# Now display the info with proper alignment
print("Metadata Info:")
metadata.info()

print("\n" + "="*50)
print("Metadata Shape:", metadata.shape)
print("="*50)

# Also show a preview of the data
print("\nFirst few rows:")
metadata.head()

In [None]:
d = load_matlab_file(camcan_raw)
print(sorted(k for k in d.keys() if not k.startswith("__")))
for k in ["nets", "age", "eigs", "laplacian", "connectivity", "harmonics"]:

    if k in d:
        v = d[k]
        try:
            print(k, "shape:", v.shape)
        except Exception:
            print(k, type(v))

In [None]:
nets, ages = get_nets_and_ages(camcan_raw)
print("nets shape:", getattr(nets, "shape", None))
print("ages shape:", getattr(ages, "shape", None))
assert getattr(nets, "ndim", 0) >= 2, "Expected 2D-ish nets array"
assert getattr(ages, "ndim", 0) == 1, "Expected 1D ages array"

from src.ch.viz.viz import plot_heatmap

print(nets.shape)

plot_heatmap(nets[:,:,500])


In [None]:
import matplotlib.pyplot as plt
import numpy as np


def visualize_matrix(matrix):
    
    # num_rows = params['num_harmonics'] 

    # matrix = matrix[:num_rows, :num_rows]

    fig, axs = plt.subplots(1, 3, figsize=(15, 6), gridspec_kw={'width_ratios': [1, 1, 0.05]})

    # Original heatmap
    im = axs[0].imshow(matrix, cmap='hot', interpolation='nearest')
    axs[0].set_title('Original Heatmap')
    axs[0].set_ylabel('Harmonic Number')
    # axs[0].set_xticks(range(num_rows))
    # axs[0].set_yticks(range(num_rows))
    # axs[0].set_xticklabels(range(num_rows), rotation=90)
    # axs[0].set_yticklabels(range(num_rows))

    # Heatmap with rounded integer values
    im2 = axs[1].imshow(matrix, cmap='hot', interpolation='nearest')
    axs[1].set_title('Heatmap with Rounded Integer Values')
    axs[1].set_ylabel('Harmonic Number')
    # axs[1].set_xticks(range(num_rows))
    # axs[1].set_yticks(range(num_rows))
    # axs[1].set_xticklabels(range(num_rows), rotation=90)
    # axs[1].set_yticklabels(range(num_rows))

    # Annotate each cell with the rounded integer value
    # for i in range(num_rows):
    #     for j in range(num_rows):
    #         axs[1].text(j, i, f'{matrix[i, j]:.0f}', ha='center', va='center', color='c')  # Cyan color for better visibility

    # plt.suptitle(params['caption'], fontsize=14)
    # plt.figtext(0.5, 0.01, params['subtitle'], fontsize=14, ha='center', va='center')

    # Position the colorbar to the right of the heatmaps
    fig.colorbar(im, cax=axs[2], orientation='vertical')

    plt.tight_layout()
    plt.show()


visualize_matrix(nets[:,:,500])

In [None]:
# Load your processed data
from ch.data_handling.data_utils import load_metadata, load_connectivity_simple

metadata = load_metadata(settings)
connectivity = load_connectivity_simple(settings)

print(f"✅ Metadata: {metadata.shape}")
print(f"✅ Connectivity: {connectivity.shape}")
print(f"✅ Data type: {connectivity.dtype}")

# Get a subject's connectivity matrix
subject_conn = connectivity[0]  # First subject
print(f"✅ Subject 1 matrix: {subject_conn.shape}")

In [None]:

metadata

In [None]:
# FIX METADATA: Restore all original columns
from ch.data_handling.io import get_nets_and_ages
import pandas as pd

print("🔄 Restoring full metadata...")

# Load raw data to get the correct number of subjects
nets, ages = get_nets_and_ages(settings.camcan_raw)
n_subjects = len(ages)

# Create full metadata DataFrame with all original columns
metadata = pd.DataFrame({
    'subject_id': range(1, n_subjects + 1),
    'age': ages.flatten(),
    'dataset': 'camcan',
    'scan_type': 'DTI',
    'n_regions': 376,
    'has_yeo_labels': True
})
metadata.set_index('subject_id', inplace=True)

# Save metadata
metadata_path = settings.metadata_parquet
metadata.to_parquet(metadata_path)
print(f"✅ Restored full metadata: {metadata_path}")
print(f"   Columns: {list(metadata.columns)}")
print(f"   Shape: {metadata.shape}")

# Show the restored metadata
print("\nRestored metadata:")
metadata.head()
