In [1]:
import os
import numpy as np
from glob import glob

# conf
spectra_dir = '../data/nili_fossae_preprocessed_spectra'
labels_path = '../results/clustering/k8/labels_k8.npy'
output_dir = '../results/stacked_k8_dataset'
os.makedirs(output_dir, exist_ok=True)

In [2]:
# load
spectra_files = sorted(glob(os.path.join(spectra_dir, '*_spectra.npy')))
labels = np.load(labels_path)
print(f"Total labels: {len(labels):,}")

Total labels: 23,615,764


In [3]:
# stacking

offset = 0
for i, path in enumerate(spectra_files, 1):
    cube_name = os.path.basename(path).replace('_spectra.npy', '')
    spectra = np.load(path)
    if spectra.ndim > 2:
        spectra = spectra.reshape(-1, spectra.shape[-1])

    count = len(spectra)
    labels_chunk = labels[offset:offset + count]
    ids_chunk = np.array([cube_name] * count)

    np.save(os.path.join(output_dir, f'{cube_name}_X.npy'), spectra)
    np.save(os.path.join(output_dir, f'{cube_name}_y.npy'), labels_chunk)
    np.save(os.path.join(output_dir, f'{cube_name}_ids.npy'), ids_chunk)

    offset += count
    print(f"[{i}/{len(spectra_files)}] Saved {cube_name}: {count:,} pixels")

print("\n Dataset saved to disk. Load with mmap_mode='r'")

[1/137] Saved frt00003584_07_if166j_mtr3: 427,754 pixels
[2/137] Saved frt000037ae_07_if166j_mtr3: 319,996 pixels
[3/137] Saved frt00003e12_07_if166j_mtr3: 444,615 pixels
[4/137] Saved frt00003fb9_07_if166j_mtr3: 429,990 pixels
[5/137] Saved frt000047a3_07_if166j_mtr3: 423,865 pixels
[6/137] Saved frt00004f75_07_if166j_mtr3: 1,824,202 pixels
[7/137] Saved frt00005443_07_if166j_mtr3: 394,890 pixels
[8/137] Saved frt00005850_07_if167j_mtr3: 601,925 pixels
[9/137] Saved frt00005a3e_07_if165j_mtr3: 394,241 pixels
[10/137] Saved frt00005c5e_07_if166j_mtr3: 460,247 pixels
[11/137] Saved frt000064d9_07_if166j_mtr3: 442,443 pixels
[12/137] Saved frt0000652e_07_if166j_mtr3: 396,546 pixels
[13/137] Saved frt000066a4_07_if166j_mtr3: 269,502 pixels
[14/137] Saved frt00007bc8_07_if166j_mtr3: 467,483 pixels
[15/137] Saved frt0000805f_07_if166j_mtr3: 435,657 pixels
[16/137] Saved frt00008389_07_if166j_mtr3: 380,642 pixels
[17/137] Saved frt00008530_07_if166j_mtr3: 229,492 pixels
[18/137] Saved frt000