In [None]:
# Importa le librerie necessarie
import os
import torch
import torchvision.transforms as transforms
import h5py
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

# Verifica disponibilità GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Dispositivo utilizzato: {device}")
print(f"CUDA disponibile: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memoria GPU: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")

In [None]:
# Carica il modello CONCH 1.5
from src.trident.patch_encoder_models.load import encoder_factory

# Carica CONCH 1.5
print("Caricamento modello CONCH 1.5...")
patch_encoder = encoder_factory('conch_v15')
patch_encoder.to(device)
patch_encoder.eval()

print(f"Modello caricato: {patch_encoder.enc_name}")
print(f"Dimensione features: {patch_encoder.n_features}")
print(f"Parametri del modello: {sum(p.numel() for p in patch_encoder.parameters()):,}")
print("Modello impostato in modalità eval ✓")

In [None]:
# Configurazione percorsi e parametri
# Modifica questi percorsi secondo la tua configurazione
HEST_PATCHES_DIR = "/path/to/hest/patches"  # Directory contenente i file H5 delle patch HEST
OUTPUT_DIR = "/path/to/output/features"     # Directory dove salvare le features estratte
BATCH_SIZE = 32                             # Batch size per l'inferenza

# Crea la directory di output se non esiste
os.makedirs(OUTPUT_DIR, exist_ok=True)
print(f"Directory di output: {OUTPUT_DIR}")

# Trova tutti i file H5 delle patch
def find_hest_patch_files(patches_dir, pattern='patches.h5'):
    """Trova tutti i file H5 delle patch HEST."""
    patch_files = []
    for root, dirs, files in os.walk(patches_dir):
        for file in files:
            if file.endswith(pattern):
                patch_files.append(os.path.join(root, file))
    return patch_files


In [None]:
# Funzioni per caricare e processare le patch HEST
def load_patches_from_h5(h5_path):
    """Carica patch e metadati da file H5 HEST."""
    data = {}
    print(f"Caricamento patch da: {h5_path}")
    
    with h5py.File(h5_path, 'r') as f:
        # Stampa le chiavi disponibili nel file
        print(f"Chiavi disponibili nel file H5: {list(f.keys())}")
        
        # Carica patches (immagini)
        if 'imgs' in f:
            data['patches'] = f['imgs'][:]
            print(f"Caricate {len(data['patches'])} patch dalla chiave 'imgs'")
        elif 'img' in f:
            data['patches'] = f['img'][:]
            print(f"Caricate {len(data['patches'])} patch dalla chiave 'img'")
        else:
            raise KeyError("Nessun dato immagine trovato nel file H5. Attese chiavi 'imgs' o 'img'.")
        
        # Carica coordinate se disponibili
        if 'coords' in f:
            data['coords'] = f['coords'][:]
            print(f"Caricate coordinate: {data['coords'].shape}")
        
        # Carica barcodes se disponibili
        if 'barcode' in f:
            data['barcodes'] = f['barcode'][:]
            print(f"Caricati {len(data['barcodes'])} barcodes")
        
        # Carica altri metadati
        for key in f.keys():
            if key not in ['imgs', 'img', 'coords', 'barcode']:
                try:
                    data[key] = f[key][:]
                    print(f"Caricati metadati '{key}': {data[key].shape}")
                except:
                    print(f"Impossibile caricare metadati '{key}'")
    
    print(f"Forma delle patch: {data['patches'].shape}")
    print(f"Tipo di dati: {data['patches'].dtype}")
    return data



In [None]:
# Funzione per l'inferenza con CONCH 1.5
def extract_patch_features(patches, patch_encoder, device, batch_size=32):
    """Estrae features dalle patch usando CONCH 1.5."""
    patch_encoder.eval()
    features = []
    
    print(f"Estrazione features da {len(patches)} patch...")
    print(f"Batch size: {batch_size}")
    
    # Preprocessing: normalizzazione
    if patches.dtype == np.uint8:
        patches = patches.astype(np.float32) / 255.0
    
    # Converte in tensor PyTorch e cambia dimensioni (N, H, W, C) -> (N, C, H, W)
    patches_tensor = torch.from_numpy(patches).permute(0, 3, 1, 2)
    print(f"Forma tensor patches: {patches_tensor.shape}")
    
    # Processamento in batch
    with torch.no_grad():
        for i in tqdm(range(0, len(patches_tensor), batch_size), desc="Estrazione features"):
            batch = patches_tensor[i:i+batch_size].to(device)
            
            # Inferenza
            batch_features = patch_encoder(batch)
            features.append(batch_features.cpu().numpy())
    
    # Concatena tutti i batch
    features = np.concatenate(features, axis=0)
    print(f"Features estratte: {features.shape}")
    print(f"Dimensionalità per patch: {features.shape[1]}")
    
    return features

In [None]:
# Salva le features estratte in una directory
def save_features_to_directory(features, metadata, output_dir, sample_name="example_sample"):
    """Salva features e metadati in una directory organizzata."""
    
    # Crea directory per il campione
    sample_dir = os.path.join(output_dir, sample_name)
    os.makedirs(sample_dir, exist_ok=True)
    
    # Salva features in formato H5
    features_path = os.path.join(sample_dir, f"{sample_name}_features.h5")
    
    with h5py.File(features_path, 'w') as f:
        # Salva features
        f.create_dataset('features', data=features)
        
        # Salva metadati
        if 'coords' in metadata:
            f.create_dataset('coords', data=metadata['coords'])
            
        if 'barcodes' in metadata:
            # Converte strings in bytes per H5
            barcodes_bytes = [b.encode('utf-8') if isinstance(b, str) else b for b in metadata['barcodes']]
            f.create_dataset('barcodes', data=barcodes_bytes)
        
        # Salva informazioni sul modello
        f.attrs['model_name'] = 'conch_v15'
        f.attrs['feature_dim'] = features.shape[1]
        f.attrs['n_patches'] = features.shape[0]
        f.attrs['processing_date'] = str(np.datetime64('now'))
    
    print(f"Features salvate in: {features_path}")
    
    # Salva anche in formato NumPy per facilità di accesso
    numpy_path = os.path.join(sample_dir, f"{sample_name}_features.npy")
    np.save(numpy_path, features)
    print(f"Features salvate anche in: {numpy_path}")
    
    # Salva metadati in formato JSON
    import json
    metadata_path = os.path.join(sample_dir, f"{sample_name}_metadata.json")
    
    # Prepara metadati per JSON
    json_metadata = {
        'model_name': 'conch_v15',
        'feature_dim': int(features.shape[1]),
        'n_patches': int(features.shape[0]),
        'processing_date': str(np.datetime64('now')),
        'coords_shape': metadata['coords'].shape if 'coords' in metadata else None,
        'n_barcodes': len(metadata['barcodes']) if 'barcodes' in metadata else None
    }
    
    with open(metadata_path, 'w') as f:
        json.dump(json_metadata, f, indent=2)
    
    print(f"Metadati salvati in: {metadata_path}")
    return sample_dir

