Transcriptome-scale super-resolved imaging in tissues by RNA seqFISH

ShortName: eng2019transcriptome

Steps of processing the data from raw to Anndata:

In [1]:
# 1, Download the raw data from https://download.brainimagelibrary.org/fc/4c/fc4c2570c3711952/

In [2]:
import scanpy as sc
import pandas as pd
import anndata as ad
import numpy as np
from numpy import array
from numpy import count_nonzero
from scipy.sparse import csr_matrix

In [3]:
data_path1 = 'sourcedata'
data_path2 = 'celltype_annotations'

In [4]:
cellcentroids_file = 'ob_cellcentroids.csv'
counts_file = 'ob_counts.csv'
annotations_file = 'OB_cell_type_annotations.csv'

In [5]:
counts = pd.read_csv(f'{data_path1}/{counts_file}')
cellcentroids = pd.read_csv(f'{data_path1}/{cellcentroids_file}')
annotations = pd.read_csv(f'{data_path2}/{annotations_file}')

In [6]:
cellcentroids

Unnamed: 0,Field of View,Cell ID,X,Y
0,0,1,1766.40,283.42
1,0,2,1891.40,348.38
2,0,3,1548.70,351.11
3,0,4,1657.60,357.37
4,0,5,1767.40,392.22
...,...,...,...,...
2045,6,187,339.97,1532.30
2046,6,188,608.31,1544.70
2047,6,189,779.11,1551.90
2048,6,190,691.00,1575.60


In [7]:
counts

Unnamed: 0,1700022a21rik,1700025g04rik,4933401b06rik,5830417i10rik,9330102e08rik,9430018g01rik,9430083a17rik,Aanat,Aasdh,Aatf,...,Tyrp1,Ubqlnl,Ptgdr,P2ry4,Prdm6,Pde6b,Zp1,Dlx4,Opn1sw,Pramef12
0,0,0,0,2,0,0,0,0,0,2,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,1,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2045,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2046,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2047,0,0,0,2,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2048,0,0,2,2,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [8]:
annotations

Unnamed: 0,index,louvain
0,0,10
1,1,3
2,2,1
3,3,15
4,4,5
...,...,...
2045,2045,9
2046,2046,9
2047,2047,9
2048,2048,9


In [9]:
id_array = counts.index

In [10]:
id_array

RangeIndex(start=0, stop=2050, step=1)

In [11]:
id_array_str = np.array(id_array).astype('str')

In [12]:
gene_id_array = counts.columns

In [13]:
gene_id_array

Index(['1700022a21rik', '1700025g04rik', '4933401b06rik', '5830417i10rik',
       '9330102e08rik', '9430018g01rik', '9430083a17rik', 'Aanat', 'Aasdh',
       'Aatf',
       ...
       'Tyrp1', 'Ubqlnl', 'Ptgdr', 'P2ry4', 'Prdm6', 'Pde6b', 'Zp1', 'Dlx4',
       'Opn1sw', 'Pramef12'],
      dtype='object', length=10000)

In [14]:
count_X = counts.values.astype('float32')

In [15]:
count_X

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 2., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [16]:
adata = ad.AnnData(count_X)
adata.var_names = gene_id_array
adata.obs_names = id_array_str

In [17]:
adata.obs['Field of View'] = cellcentroids.loc[id_array].loc[:,'Field of View']
adata.obs['Cell ID'] = cellcentroids.loc[id_array].loc[:,'Cell ID']
adata.obs['louvain'] = annotations.loc[id_array].loc[:,'louvain']

In [18]:
adata.obsm['spatial'] = cellcentroids.loc[id_array].loc[:,['X','Y']].values

In [19]:
adata

AnnData object with n_obs × n_vars = 2050 × 10000
    obs: 'Field of View', 'Cell ID', 'louvain'
    obsm: 'spatial'

In [20]:
save_path = ''

In [21]:
adata.write_h5ad(save_path)

In [22]:
#adata_load = sc.read_h5ad(save_path)

In [23]:
#adata_load

In [24]:
#A= adata_load.X

In [25]:
#sparsity = 1.0 - count_nonzero(A)/A.size
#print(sparsity)