In [7]:
import scanpy as sc
import numpy as np
import scipy.cluster
from scipy import sparse
from skimage.draw import disk
from tifffile import TiffWriter
from vitessce.data_utils import (
    to_diamond,
    multiplex_img_to_ome_tiff,
    rgb_img_to_ome_tiff,
    optimize_adata,
    to_dense,
    VAR_CHUNK_SIZE,
    sort_var_axis,
)
from os.path import join

In [2]:
adata = sc.datasets.visium_sge(sample_id="V1_Human_Lymph_Node", include_hires_tiff=True)

  0%|          | 0.00/7.86M [00:00<?, ?B/s]

  0%|          | 0.00/29.3M [00:00<?, ?B/s]

  0%|          | 0.00/380M [00:00<?, ?B/s]

  utils.warn_names_duplicates("var")


In [3]:
# Reference: https://scanpy-tutorials.readthedocs.io/en/latest/spatial/basic-analysis.html
# Calculate QC metrics
adata.var_names_make_unique()
adata.var["mt"] = adata.var_names.str.startswith("MT-")
sc.pp.calculate_qc_metrics(adata, qc_vars=["mt"], inplace=True)

# Perform basic filtering
sc.pp.filter_cells(adata, min_counts=5000)
sc.pp.filter_cells(adata, max_counts=35000)
adata = adata[adata.obs["pct_counts_mt"] < 20]
sc.pp.filter_genes(adata, min_cells=10)

# Perform normalization
sc.pp.normalize_total(adata, inplace=True)
sc.pp.log1p(adata)
# Determine the top 300 highly variable genes.
sc.pp.highly_variable_genes(adata, flavor="seurat", n_top_genes=300)

# Dimensionality reduction and clustering
sc.pp.pca(adata)
sc.pp.neighbors(adata)
sc.tl.umap(adata)
sc.tl.leiden(adata, key_added="clusters")


  adata.var['n_cells'] = number


In [4]:
# Hierarchical clustering of genes for optimal gene ordering
X_hvg_arr = adata[:, adata.var['highly_variable']].X.toarray()
X_hvg_index = adata[:, adata.var['highly_variable']].var.copy().index

Z = scipy.cluster.hierarchy.linkage(X_hvg_arr.T, method="average", optimal_ordering=True)

# Get the hierarchy-based ordering of genes.
num_cells = adata.obs.shape[0]
highly_var_index_ordering = scipy.cluster.hierarchy.leaves_list(Z)
highly_var_genes = X_hvg_index.values[highly_var_index_ordering].tolist()

all_genes = adata.var.index.values.tolist()
not_var_genes = adata.var.loc[~adata.var['highly_variable']].index.values.tolist()

def get_orig_index(gene_id):
    return all_genes.index(gene_id)

var_index_ordering = list(map(get_orig_index, highly_var_genes)) + list(map(get_orig_index, not_var_genes))

# Create a new *ordered* gene expression dataframe.
adata = adata[:, var_index_ordering].copy()
adata.obsm["X_hvg"] = adata[:, adata.var['highly_variable']].X.copy()

In [5]:
adata.write_h5ad('human_lymph_node.h5ad')

In [10]:
img_arr = adata.uns['spatial']['V1_Human_Lymph_Node']['images']['hires']
img_arr *= 255.0
img_arr = img_arr.astype(np.dtype('uint8'))

In [11]:
tiff_writer = TiffWriter('human_lymph_node.tiff')
tiff_writer.write(
    img_arr,
)
tiff_writer.close()