In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pyimzml.ImzMLParser import ImzMLParser

# Clustering
from sklearn.cluster import KMeans, DBSCAN
from sklearn.mixture import GaussianMixture
import skfuzzy as fuzz # pip install scikit-fuzzy

# Prétraitement & évaluation
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score


In [None]:
# Fichier imzML
file_path ="your path"

# Paramètres de sélection
mz_min, mz_max = 600, 1000   # Range de masse personnalisable
bin_size = 0.1               # Taille du binning (Da)

coords = []
spectra = []

for idx, (x, y, z) in enumerate(parser.coordinates):
    mzs, intensities = parser.getspectrum(idx)
    
    # Filtrage selon le mass range
    mask = (mzs >= mz_min) & (mzs <= mz_max)
    mzs, intensities = mzs[mask], intensities[mask]
    
    # Binning
    bins = np.arange(mz_min, mz_max + bin_size, bin_size)
    binned, _ = np.histogram(mzs, bins=bins, weights=intensities)
    
    coords.append([x, y])
    spectra.append(binned)

coords = np.array(coords)
spectra = np.array(spectra)
print(f"Spectres extraits : {spectra.shape[0]} pixels × {spectra.shape[1]} variables m/z")

In [None]:
tic = spectra.sum(axis=1)

x_max, y_max = coords[:, 0].max().astype(int) + 1, coords[:, 1].max().astype(int) + 1
tic_map = np.zeros((y_max, x_max))

for (x, y), val in zip(coords, tic):
    tic_map[int(y), int(x)] = val

plt.figure(figsize=(8, 6))
plt.imshow(tic_map, cmap='jet', origin='upper')
plt.title("TIC Map – Intensité totale par pixel")
plt.axis('off')
plt.colorbar(label="Intensité totale")
plt.show()


In [None]:
pca = PCA(n_components=10)
spectra_pca = pca.fit_transform(spectra)
print(f"Variance expliquée par les 10 composantes : {pca.explained_variance_ratio_.sum():.2%}")

In [None]:
# Visualisation spatiale pour k optimal
k_opt = 4
kmeans = KMeans(n_clusters=k_opt, random_state=1).fit(spectra_pca)
labels = kmeans.labels_

img_kmeans = np.full((y_max, x_max), -1)
for (x, y), lab in zip(coords, labels):
    img_kmeans[int(y), int(x)] = lab

plt.figure(figsize=(8, 6))
plt.imshow(img_kmeans, cmap='tab10', origin='upper')
plt.title(f"Segmentation K-means (k={k_opt})")
plt.axis('off'); plt.colorbar(label='Cluster')
plt.show()


In [None]:
import matplotlib.cm as cm

# ---- Calcul du spectre moyen par cluster ----
cluster_centroids = []
for k in range(k_opt):
    cluster_spectra = spectra[labels == k]
    centroid = cluster_spectra.mean(axis=0)
    cluster_centroids.append(centroid)

cluster_centroids = np.array(cluster_centroids)

# ---- Couleurs cohérentes avec la carte de segmentation ----
colors = cm.get_cmap('tab10', k_opt)

# ---- Axe m/z pour affichage ----
mz_axis = np.linspace(mz_min, mz_max, spectra.shape[1])

plt.figure(figsize=(10, 6))
for k in range(k_opt):
    plt.plot(mz_axis, cluster_centroids[k],
             color=colors(k), lw=2,
             label=f'Cluster {k+1}')

plt.title("Spectres moyens par cluster (K-means)")
plt.xlabel("m/z")
plt.ylabel("Intensité moyenne")
plt.legend()
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()
