In [2]:
import os
import numpy as np
import pandas as pd
import random
from glob import glob
from sklearn.metrics import silhouette_score
from tqdm import tqdm

import matplotlib.pyplot as plt

In [3]:
# config
preprocessed_dir = '../data/nili_fossae_preprocessed_spectra'
label_root = '../results/clustering'
random_number = 28
k_values = list(range(5, 13))
num_cubes = 15
random.seed(random_number)

# samples
index_paths = sorted(glob(os.path.join(preprocessed_dir, '*_indices.npy')))
cube_names = [os.path.basename(p).replace('_indices.npy', '') for p in index_paths]
sampled = random.sample(cube_names, num_cubes)
print(f"Using {num_cubes} sampled cubes:")
for name in sampled:
    print(">", name)


# stack full spectra and labels per k
print("\n Stack full spectra and labels per k")
spectra_all = []
labels_dict = {k: [] for k in k_values}
offset = 0

for name in sampled:
    spectra_path = os.path.join(preprocessed_dir, f"{name}_spectra.npy")
    spectra = np.load(spectra_path)
    spectra_all.append(spectra)

    for k in k_values:
        labels_path = os.path.join(label_root, f'k{k}', f'labels_k{k}.npy')
        labels_full = np.load(labels_path)
        labels_cube = labels_full[offset:offset + len(spectra)]
        labels_dict[k].append(labels_cube)
    
    offset += len(spectra)

X = np.vstack(spectra_all)

# compute silhouette scores
print("\n Compute silhouette scores")    
scores = {}
for k in tqdm(k_values, desc="Silhouette Evaluation"):
    y = np.concatenate(labels_dict[k])
    try:
        score = silhouette_score(X, y, sample_size=10000, random_state=random_number)
    except Exception as e:
        print(f"[k={k}] Silhouette failed: {e}")
        score = np.nan
    scores[k] = score

# results
print("\n Silhouette scores (higher = better separation):")
for k in k_values:
    s = scores[k]
    print(f"k = {k}: {s:.4f}" if not np.isnan(s) else f"k = {k}: failed")

# Optional: Save results
df = pd.DataFrame({'k': list(scores.keys()), 'silhouette': list(scores.values())})
df.to_csv('../results/labelmaps/k_silhouette_scores.csv', index=False)
print("[done] Saved: k_silhouette_scores.csv")


Using 15 sampled cubes:
> frt00009d44_07_if165j_mtr3
> frt0000a4fc_07_if166j_mtr3
> frt0000bec0_07_if165j_mtr3
> frt0001182a_07_if165j_mtr3
> hrl00011a72_07_if183j_mtr3
> hrl0000b8c2_07_if183j_mtr3
> frt0000c968_07_if166j_mtr3
> frt0000cbe5_07_if166j_mtr3
> frt0000abcb_07_if166j_mtr3
> hrl000040ff_07_if183j_mtr3
> frt0000b573_07_if166j_mtr3
> frt0000c202_07_if165j_mtr3
> frt0000c62b_07_if166j_mtr3
> frt00005850_07_if167j_mtr3
> hrl0000b404_07_if183j_mtr3

 Stack full spectra and labels per k

 Compute silhouette scores


Silhouette Evaluation: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:10<00:00,  1.32s/it]



 Silhouette scores (higher = better separation):
k = 5: -0.0905
k = 6: -0.1516
k = 7: -0.1715
k = 8: -0.1212
k = 9: -0.1344
k = 10: -0.1597
k = 11: -0.1637
k = 12: -0.2395
[done] Saved: k_silhouette_scores.csv


In [4]:
# config
labelmap_root = '../results/labelmaps'
output_dir = '../results/labelmaps/k_panels'
os.makedirs(output_dir, exist_ok=True)

print(f"Generating panel images for {len(sampled)} cubes across k=5–12...")

for cube in sampled:
    fig, axs = plt.subplots(1, len(k_values), figsize=(4 * len(k_values), 4))
    
    for i, k in enumerate(k_values):
        labelmap_path = os.path.join(labelmap_root, f'k{k}', f"{cube}_labelmap.npy")
        ax = axs[i]

        if os.path.exists(labelmap_path):
            labelmap = np.load(labelmap_path)
            ax.imshow(labelmap, cmap='tab10', interpolation='nearest')
            ax.set_title(f"k = {k}")
        else:
            ax.text(0.5, 0.5, "Missing", ha='center', va='center')
        
        ax.axis('off')

    plt.suptitle(f"Labelmap Comparison: {cube}", fontsize=14)
    plt.tight_layout()
    save_path = os.path.join(output_dir, f"{cube}_panel.png")
    plt.savefig(save_path)
    plt.close()
    print(f"Saved: {save_path}")

Generating panel images for 15 cubes across k=5–12...
Saved: ../results/labelmaps/k_panels/frt00009d44_07_if165j_mtr3_panel.png
Saved: ../results/labelmaps/k_panels/frt0000a4fc_07_if166j_mtr3_panel.png
Saved: ../results/labelmaps/k_panels/frt0000bec0_07_if165j_mtr3_panel.png
Saved: ../results/labelmaps/k_panels/frt0001182a_07_if165j_mtr3_panel.png
Saved: ../results/labelmaps/k_panels/hrl00011a72_07_if183j_mtr3_panel.png
Saved: ../results/labelmaps/k_panels/hrl0000b8c2_07_if183j_mtr3_panel.png
Saved: ../results/labelmaps/k_panels/frt0000c968_07_if166j_mtr3_panel.png
Saved: ../results/labelmaps/k_panels/frt0000cbe5_07_if166j_mtr3_panel.png
Saved: ../results/labelmaps/k_panels/frt0000abcb_07_if166j_mtr3_panel.png
Saved: ../results/labelmaps/k_panels/hrl000040ff_07_if183j_mtr3_panel.png
Saved: ../results/labelmaps/k_panels/frt0000b573_07_if166j_mtr3_panel.png
Saved: ../results/labelmaps/k_panels/frt0000c202_07_if165j_mtr3_panel.png
Saved: ../results/labelmaps/k_panels/frt0000c62b_07_if166j