Transcriptome-scale super-resolved imaging in tissues by RNA seqFISH

ShortName: eng2019transcriptome

Steps of processing the data from raw to Anndata:

In [1]:
# 1, Download the raw data from https://download.brainimagelibrary.org/fc/4c/fc4c2570c3711952/

In [13]:
import scanpy as sc
import pandas as pd
import anndata as ad
import numpy as np

In [14]:
data_path1 = 'sourcedata'
data_path2 = 'celltype_annotations'

In [15]:
cellcentroids_file = 'cortex_svz_cellcentroids.csv'
counts_file = 'cortex_svz_counts.csv'
annotations_file = 'cortex_svz_cell_type_annotations.csv'

In [16]:
counts = pd.read_csv(f'{data_path1}/{counts_file}')
cellcentroids = pd.read_csv(f'{data_path1}/{cellcentroids_file}')
annotations = pd.read_csv(f'{data_path2}/{annotations_file}')

In [17]:
cellcentroids

Unnamed: 0,Field of View,Cell ID,X,Y,Region
0,0,1,1305.70,414.98,Cortex
1,0,2,669.51,457.53,Cortex
2,0,3,1185.90,507.11,Cortex
3,0,4,710.24,533.06,Cortex
4,0,5,763.87,569.15,Cortex
...,...,...,...,...,...
908,6,105,1505.30,1409.30,Choroid Plexus
909,6,106,1344.30,1429.50,Choroid Plexus
910,6,107,1409.30,1454.20,Choroid Plexus
911,6,108,389.93,1667.50,Choroid Plexus


In [18]:
counts

Unnamed: 0,1700022a21rik,1700025g04rik,4933401b06rik,5830417i10rik,9330102e08rik,9430018g01rik,9430083a17rik,Aanat,Aasdh,Aatf,...,Tyrp1,Ubqlnl,Ptgdr,P2ry4,Prdm6,Pde6b,Zp1,Dlx4,Opn1sw,Pramef12
0,3,1,2,0,1,1,1,0,2,0,...,0,0,1,0,0,0,0,0,0,1
1,1,0,0,0,0,0,0,0,0,0,...,1,1,0,1,1,0,0,0,0,0
2,0,0,1,1,0,0,0,0,0,0,...,2,2,1,1,0,0,1,0,1,0
3,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,1,0,1,1,0,3,0,...,1,0,0,1,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
908,0,0,0,2,0,0,0,0,0,2,...,0,0,1,0,0,0,1,0,0,0
909,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
910,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
911,0,0,0,0,0,0,0,0,2,0,...,0,0,0,0,0,0,0,0,0,1


In [19]:
annotations

Unnamed: 0,index,louvain
0,0,13
1,1,3
2,2,3
3,3,7
4,4,6
...,...,...
908,908,1
909,909,1
910,910,2
911,911,1


In [20]:
id_array = counts.index

In [21]:
id_array

RangeIndex(start=0, stop=913, step=1)

In [22]:
id_array_str = np.array(id_array).astype('str')

In [23]:
gene_id_array = counts.columns

In [24]:
gene_id_array

Index(['1700022a21rik', '1700025g04rik', '4933401b06rik', '5830417i10rik',
       '9330102e08rik', '9430018g01rik', '9430083a17rik', 'Aanat', 'Aasdh',
       'Aatf',
       ...
       'Tyrp1', 'Ubqlnl', 'Ptgdr', 'P2ry4', 'Prdm6', 'Pde6b', 'Zp1', 'Dlx4',
       'Opn1sw', 'Pramef12'],
      dtype='object', length=10000)

In [25]:
count_X = counts.values.astype('float32')

In [26]:
count_X

array([[3., 1., 2., ..., 0., 0., 1.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 1., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [27]:
adata = ad.AnnData(count_X)
adata.var_names = gene_id_array
adata.obs_names = id_array_str

In [28]:
adata.obs['Field of View'] = cellcentroids.loc[id_array].loc[:,'Field of View']
adata.obs['Cell ID'] = cellcentroids.loc[id_array].loc[:,'Cell ID']
adata.obs['Region'] = cellcentroids.loc[id_array].loc[:,'Region'].values
adata.obs['louvain'] = annotations.loc[id_array].loc[:,'louvain']

In [29]:
adata.obsm['spatial'] = cellcentroids.loc[id_array].loc[:,['X','Y']].values

In [30]:
adata

AnnData object with n_obs × n_vars = 913 × 10000
    obs: 'Field of View', 'Cell ID', 'Region', 'louvain'
    obsm: 'spatial'

In [31]:
save_path = ''

In [32]:
adata.write_h5ad(save_path)

In [33]:
#adata_load = sc.read_h5ad(save_path)

In [34]:
#adata_load