In [16]:
from pathlib import Path

import numpy as np
from numpy.linalg import norm
from histobench.evaluation.compute_embeddings_tcga_ut import load_embeddings

In [17]:
def load_old_embeddings(input_path):
    """Load embeddings and tile IDs from disk."""
    data = np.load(input_path)
    return data["embeddings"], data["image_ids"]

In [18]:
embeddings_old, image_ids_old = load_old_embeddings("/home/valentin/workspaces/histolung/data/embeddings/LungHist700/LungHist700_10x/test_embeddings.npz")

In [19]:
embeddings, image_paths = load_embeddings("/home/valentin/workspaces/histobench/data/embeddings/lunghist700/test_darya.h5")

In [20]:
norm(embeddings)

880.4357

In [21]:
image_ids = [Path(path).stem for path in image_paths]

In [22]:
image_ids[0]

'aca_bd_20x_304'

In [23]:
# Sort both embeddings by image_ids and compare
import numpy as np

# Create mapping from image_id to index for both sets
old_id_to_idx = {img_id: idx for idx, img_id in enumerate(image_ids_old)}
new_id_to_idx = {img_id: idx for idx, img_id in enumerate(image_ids)}

# Find common image_ids
common_ids = sorted(set(image_ids_old) & set(image_ids))

# Stack embeddings in the same order for comparison
old_sorted = np.stack([embeddings_old[old_id_to_idx[img_id]] for img_id in common_ids])
new_sorted = np.stack([embeddings[new_id_to_idx[img_id]] for img_id in common_ids])

# Check for differences
are_equal = np.allclose(old_sorted, new_sorted)
print(f"Are all common embeddings equal? {are_equal}")

# Optionally, show indices or values where they differ
if not are_equal:
    diff = np.abs(old_sorted - new_sorted)
    print("Max difference:", diff.max())
    print("Indices with max difference:", np.unravel_index(np.argmax(diff), diff.shape))

Are all common embeddings equal? False
Max difference: 0.002701044
Indices with max difference: (244, 2033)


In [24]:
len(common_ids)

691

In [25]:
new_sorted.shape, old_sorted.shape

((691, 2048), (691, 2048))

In [26]:
new_sorted[0,:]

array([0.6374878 , 0.84489286, 2.3136556 , ..., 1.4872555 , 0.05804763,
       1.0085356 ], dtype=float32)

In [27]:
old_sorted[0,:]

array([0.63771075, 0.84482867, 2.3136683 , ..., 1.4875383 , 0.05806846,
       1.0085622 ], dtype=float32)

In [28]:
norm(new_sorted[0,:] - old_sorted[0,:])

0.005038293

In [29]:
norm(new_sorted, axis=1).mean()

33.291767

In [30]:
norm(old_sorted, axis=1).mean()

33.29172