In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
from glob import glob
import pandas as pd

In [2]:
# adjusted paths (currently inside notebooks/)
project_root = '..'

kmeans_dir = os.path.join(project_root, 'results/labelmaps/k8')
cnn_dir    = os.path.join(project_root, 'outputs/cnn_labelmaps_20_epochs')
preprocessed_dir = os.path.join(project_root, 'data/nili_fossae_preprocessed_spectra')

# list of test cubes from CNN labelmaps
labelmap_files = sorted(glob(os.path.join(cnn_dir, '*_cnn_labelmap.npy')))
cube_names = [os.path.basename(f).replace('_cnn_labelmap.npy', '') for f in labelmap_files]

print(f"Found {len(cube_names)} test cubes")

Found 28 test cubes


In [3]:
def show_comparison(cube_name):
    print(f"Comparing: {cube_name}")

    # load labelmaps
    kmeans_map = np.load(os.path.join(kmeans_dir, f"{cube_name}_labelmap.npy"))
    cnn_map    = np.load(os.path.join(cnn_dir, f"{cube_name}_cnn_labelmap.npy"))

    # difference map: where predictions disagree
    diff = (kmeans_map != cnn_map) & (kmeans_map >= 0) & (cnn_map >= 0)

    # plot
    fig, axs = plt.subplots(1, 3, figsize=(12, 4))

    axs[0].imshow(kmeans_map, cmap='tab10', interpolation='nearest')
    axs[0].set_title("KMeans (k=10)")
    axs[0].axis('off')

    axs[1].imshow(cnn_map, cmap='tab10', interpolation='nearest')
    axs[1].set_title("CNN")
    axs[1].axis('off')

    axs[2].imshow(diff, cmap='Reds', interpolation='nearest')
    axs[2].set_title("Disagreement Map")
    axs[2].axis('off')

    plt.suptitle(cube_name)
    plt.tight_layout()
    plt.show()


In [None]:
os.makedirs('../outputs/map_tests/individual_cubes', exist_ok=True)

results = []

for cube_name in cube_names:
    print(f"Processing {cube_name}")
    
    # load maps
    kmeans_path = os.path.join(kmeans_dir, f"{cube_name}_labelmap.npy")
    cnn_path = os.path.join(cnn_dir, f"{cube_name}_cnn_labelmap.npy")

    if not (os.path.exists(kmeans_path) and os.path.exists(cnn_path)):
        print(f"Missing labelmap for {cube_name}")
        continue

    kmeans_map = np.load(kmeans_path)
    cnn_map = np.load(cnn_path)

    # disagreement map and stats
    valid_mask = (kmeans_map >= 0) & (cnn_map >= 0)
    total = np.count_nonzero(valid_mask)
    diff_map = (kmeans_map != cnn_map) & valid_mask
    diff_count = np.count_nonzero(diff_map)
    disagreement_pct = 100 * diff_count / total if total > 0 else 0

    print(f"{cube_name}: {diff_count}/{total} disagree ({disagreement_pct:.2f}%)")
    results.append((cube_name, total, diff_count, disagreement_pct))

    # plot and save comparison image
    fig, axs = plt.subplots(1, 3, figsize=(12, 4))

    axs[0].imshow(kmeans_map, cmap='tab10', interpolation='nearest')
    axs[0].set_title("KMeans (k=8)")
    axs[0].axis('off')

    axs[1].imshow(cnn_map, cmap='tab10', interpolation='nearest')
    axs[1].set_title("CNN (20 epochs)")
    axs[1].axis('off')

    axs[2].imshow(diff_map, cmap='Reds', interpolation='nearest')
    axs[2].set_title("Disagreement")
    axs[2].axis('off')

    axs[0].text(-0.1, 0.5, cube_name, va='center', ha='right',
                fontsize=10, transform=axs[0].transAxes)

    plt.tight_layout()
    out_path = f"../outputs/map_tests/individual_cubes/{cube_name}_cnn_vs_kmeans.png"
    plt.savefig(out_path, dpi=300)
    plt.close(fig)

# save CSV summary
df = pd.DataFrame(results, columns=['Cube Name', 'Total Pixels', 'Disagreeing Pixels', 'Disagreement (%)'])
csv_path = '../outputs/map_tests/disagreement_summary.csv'
df.to_csv(csv_path, index=False)

# Print average disagreement
if results:
    avg_disagreement = np.mean([r[3] for r in results])
    print(f"\nAverage disagreement: {avg_disagreement:.2f}% across {len(results)} cubes")
    print(f"[OK] CSV saved to: {csv_path}")
else:
    print("No valid comparisons made.")

Processing frt00007bc8_07_if166j_mtr3
frt00007bc8_07_if166j_mtr3: 32082/467483 disagree (6.86%)
Processing frt00008389_07_if166j_mtr3
frt00008389_07_if166j_mtr3: 34583/380642 disagree (9.09%)
Processing frt000093be_07_if166j_mtr3
frt000093be_07_if166j_mtr3: 42397/288048 disagree (14.72%)
Processing frt00009c31_07_if166j_mtr3
frt00009c31_07_if166j_mtr3: 18811/212648 disagree (8.85%)
Processing frt00009c6a_07_if166j_mtr3
frt00009c6a_07_if166j_mtr3: 10959/172436 disagree (6.36%)
Processing frt0000bec0_07_if165j_mtr3
frt0000bec0_07_if165j_mtr3: 4757/131165 disagree (3.63%)
Processing frt0000bfd1_07_if166j_mtr3
frt0000bfd1_07_if166j_mtr3: 29755/156969 disagree (18.96%)
Processing frt0000c202_07_if165j_mtr3
frt0000c202_07_if165j_mtr3: 14375/201990 disagree (7.12%)
Processing frt0000c62b_07_if166j_mtr3
frt0000c62b_07_if166j_mtr3: 111639/226878 disagree (49.21%)
Processing frt0000c968_07_if166j_mtr3
frt0000c968_07_if166j_mtr3: 28398/171121 disagree (16.60%)
Processing frt0000d6d6_07_if166j_mtr