# Run hyperparameter tuning on simulated data



OBS Please cite the original publications for the simulated data.

[1]: Cheng, Andrew, Guanyu Hu, and Wei Vivian Li. "Benchmarking cell-type clustering methods for spatially resolved transcriptomics data." Briefings in Bioinformatics 24.1 (2023): bbac475.

and the creators of for the original data

[2]: Codeluppi, Simone, et al. "Spatial organization of the somatosensory cortex revealed by osmFISH." Nature methods 15.11 (2018): 932-935.

[3]: Qian, Xiaoyan, et al. "Probabilistic cell typing enables fine mapping of closely related cell types in situ." Nature methods 17.1 (2020): 101-106.

In [None]:
import pandas as pd
import optuna

from points2regions import Points2Regions, benchmark, AbstractTuner, BenchmarkDataset

# Run on one ISS replica
data = pd.read_csv(r'https://tissuumaps.dckube.scilifelab.se/private/Points2Region/simulated_ISS.csv').query('DatasetId == 1')
data = [BenchmarkDataset(df, x_col='X', y_col='Y', label_col='Gene', gt_col='CellType') for _, df in data.groupby('DatasetId')]  

SEED = 12345
class Tuner(AbstractTuner):
    def parameter_space(self, trial: optuna.Trial):
        hyperparams = {
            'pixel_width' : trial.suggest_float('pixel_width', low=0.1, high=4),
            'pixel_smoothing' : trial.suggest_float('pixel_smoothing', low=1, high=20),
            'num_clusters' : trial.suggest_int('num_clusters', low=4, high=15),
            'min_num_pts_per_pixel' : trial.suggest_float('min_num_pts_per_pixel', low=0, high=40)
        }
        return hyperparams
    

    def cluster(self, dataset: BenchmarkDataset, pixel_width: float, pixel_smoothing: float, num_clusters: int, min_num_pts_per_pixel:float):
        mdl = Points2Regions(dataset.xy, dataset.label, pixel_width, pixel_smoothing, min_num_pts_per_pixel)
        return mdl.fit_predict(num_clusters=num_clusters, output='marker', seed=SEED)
    
mdl = Tuner()
best_parameters, metrics = mdl.benchmark(data, n_trials=500, raise_errors=False, seed=SEED)


In [2]:
import pandas as pd
import optuna

from points2regions import Points2Regions, benchmark, AbstractTuner, BenchmarkDataset

# Run on one ISS replica
data = pd.read_csv(r'https://tissuumaps.dckube.scilifelab.se/private/Points2Region/simulated_osmFISH.csv')
data = BenchmarkDataset(data, x_col='X', y_col='Y', label_col='Gene', gt_col='CellType')

SEED = 12345
class Tuner(AbstractTuner):
    def parameter_space(self, trial: optuna.Trial):
        hyperparams = {
            'pixel_width' : trial.suggest_float('pixel_width', low=0.1, high=4),
            'pixel_smoothing' : trial.suggest_float('pixel_smoothing', low=1, high=20),
            'num_clusters' : trial.suggest_int('num_clusters', low=4, high=15),
            'min_num_pts_per_pixel' : trial.suggest_float('min_num_pts_per_pixel', low=0, high=40)
        }
        return hyperparams
    

    def cluster(self, dataset: BenchmarkDataset, pixel_width: float, pixel_smoothing: float, num_clusters: int, min_num_pts_per_pixel:float):
        mdl = Points2Regions(dataset.xy, dataset.label, pixel_width, pixel_smoothing, min_num_pts_per_pixel)
        return mdl.fit_predict(num_clusters=num_clusters, output='marker', seed=SEED)
    
mdl = Tuner()
best_parameters, metrics = mdl.benchmark(data, n_trials=500, raise_errors=False, seed=SEED)


[I 2024-01-12 14:07:46,773] A new study created in memory with name: no-name-57b1d8da-7484-42c8-b8ff-7a1956ec6c21
[I 2024-01-12 14:07:48,250] Trial 0 finished with value: 0.8624886573345779 and parameters: {'pixel_width': 3.725502761986877, 'pixel_smoothing': 7.011135537053932, 'num_clusters': 6, 'min_num_pts_per_pixel': 8.182411142121587}. Best is trial 0 with value: 0.8624886573345779.
[I 2024-01-12 14:07:50,432] Trial 1 finished with value: 0.7173408724186816 and parameters: {'pixel_width': 2.3141276134185778, 'pixel_smoothing': 12.315349356605779, 'num_clusters': 15, 'min_num_pts_per_pixel': 26.127083874862834}. Best is trial 0 with value: 0.8624886573345779.
[I 2024-01-12 14:07:52,150] Trial 2 finished with value: 0.6648898624217395 and parameters: {'pixel_width': 3.020735886382256, 'pixel_smoothing': 13.417827546182972, 'num_clusters': 12, 'min_num_pts_per_pixel': 38.45226944291285}. Best is trial 0 with value: 0.8624886573345779.
[I 2024-01-12 14:07:55,050] Trial 3 finished with

Crashed


[I 2024-01-12 14:07:56,183] Trial 4 finished with value: 0.6074240493922746 and parameters: {'pixel_width': 3.2582689549096457, 'pixel_smoothing': 17.571342360761378, 'num_clusters': 15, 'min_num_pts_per_pixel': 28.947413876798137}. Best is trial 0 with value: 0.8624886573345779.
[I 2024-01-12 14:07:58,619] Trial 5 finished with value: 0.7671749676499112 and parameters: {'pixel_width': 2.605653778793781, 'pixel_smoothing': 14.63161879543586, 'num_clusters': 9, 'min_num_pts_per_pixel': 13.023387101575112}. Best is trial 0 with value: 0.8624886573345779.
[I 2024-01-12 14:08:03,404] Trial 6 finished with value: 0.7151933097498073 and parameters: {'pixel_width': 1.814613962950757, 'pixel_smoothing': 14.864092572190177, 'num_clusters': 15, 'min_num_pts_per_pixel': 27.0749484709508}. Best is trial 0 with value: 0.8624886573345779.
[I 2024-01-12 14:08:06,175] Trial 7 finished with value: 0.5630567928968755 and parameters: {'pixel_width': 3.1842078196063754, 'pixel_smoothing': 4.24737089805625