-
Notifications
You must be signed in to change notification settings - Fork 9
/
_lisi.py
103 lines (85 loc) · 3.53 KB
/
_lisi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import numpy as np
import pandas as pd
from scib_metrics.nearest_neighbors import NeighborsResults
from scib_metrics.utils import compute_simpson_index
def lisi_knn(X: NeighborsResults, labels: np.ndarray, perplexity: float = None) -> np.ndarray:
"""Compute the local inverse simpson index (LISI) for each cell :cite:p:`korsunsky2019harmony`.
Parameters
----------
X
A :class:`~scib_metrics.utils.nearest_neighbors.NeighborsResults` object.
labels
Array of shape (n_cells,) representing label values
for each cell.
perplexity
Parameter controlling effective neighborhood size. If None, the
perplexity is set to the number of neighbors // 3.
Returns
-------
lisi
Array of shape (n_cells,) with the LISI score for each cell.
"""
labels = np.asarray(pd.Categorical(labels).codes)
knn_dists, knn_idx = X.distances, X.indices
row_idx = np.arange(X.n_samples)[:, np.newaxis]
if perplexity is None:
perplexity = np.floor(knn_idx.shape[1] / 3)
n_labels = len(np.unique(labels))
simpson = compute_simpson_index(
knn_dists=knn_dists, knn_idx=knn_idx, row_idx=row_idx, labels=labels, n_labels=n_labels, perplexity=perplexity
)
return 1 / simpson
def ilisi_knn(X: NeighborsResults, batches: np.ndarray, perplexity: float = None, scale: bool = True) -> np.ndarray:
"""Compute the integration local inverse simpson index (iLISI) for each cell :cite:p:`korsunsky2019harmony`.
Returns a scaled version of the iLISI score for each cell, by default :cite:p:`luecken2022benchmarking`.
Parameters
----------
X
A :class:`~scib_metrics.utils.nearest_neighbors.NeighborsResults` object.
batches
Array of shape (n_cells,) representing batch values
for each cell.
perplexity
Parameter controlling effective neighborhood size. If None, the
perplexity is set to the number of neighbors // 3.
scale
Scale lisi into the range [0, 1]. If True, higher values are better.
Returns
-------
ilisi
Array of shape (n_cells,) with the iLISI score for each cell.
"""
batches = np.asarray(pd.Categorical(batches).codes)
lisi = lisi_knn(X, batches, perplexity=perplexity)
ilisi = np.nanmedian(lisi)
if scale:
nbatches = len(np.unique(batches))
ilisi = (ilisi - 1) / (nbatches - 1)
return ilisi
def clisi_knn(X: NeighborsResults, labels: np.ndarray, perplexity: float = None, scale: bool = True) -> np.ndarray:
"""Compute the cell-type local inverse simpson index (cLISI) for each cell :cite:p:`korsunsky2019harmony`.
Returns a scaled version of the cLISI score for each cell, by default :cite:p:`luecken2022benchmarking`.
Parameters
----------
X
A :class:`~scib_metrics.utils.nearest_neighbors.NeighborsResults` object.
labels
Array of shape (n_cells,) representing cell type label values
for each cell.
perplexity
Parameter controlling effective neighborhood size. If None, the
perplexity is set to the number of neighbors // 3.
scale
Scale lisi into the range [0, 1]. If True, higher values are better.
Returns
-------
clisi
Array of shape (n_cells,) with the cLISI score for each cell.
"""
labels = np.asarray(pd.Categorical(labels).codes)
lisi = lisi_knn(X, labels, perplexity=perplexity)
clisi = np.nanmedian(lisi)
if scale:
nlabels = len(np.unique(labels))
clisi = (nlabels - clisi) / (nlabels - 1)
return clisi