EASI-FISH for thick tissue defines lateral hypothalamus spatio-molecular organization

ShortName: wang2021easi

Steps of processing the data from raw to Anndata:

In [1]:
# 1, Download the raw data from https://janelia.figshare.com/articles/dataset/EASI-FISH_enabled_spatial_analysis_of_molecular_cell_types_in_the_lateral_hypothalamus/13749154

In [2]:
import scanpy as sc
import pandas as pd
import anndata as ad

In [3]:
data_path = 'wang2021easi_EasiFish'

In [4]:
count_file = 'EASI_FISH_gene_count.csv'
meta_file = 'EASI_FISH_metadata.csv'

In [5]:
count = pd.read_csv(f'{data_path}/{count_file}')

In [6]:
count


Unnamed: 0.1,Unnamed: 0,Meis2,Th,Gpr101,Gpr83,Slc17a6,Bdnf,Calb2,Gal,Otp,...,Trh,Nts,Hcrt,Pmch,Cartpt,Slc32a1,Sst,Nrgn,Meis2-R9,Th-R9
0,LHA1_1,8.0,5.0,49.0,1.0,9.0,3.0,23.000000,77.0,19.0,...,9.0,39.0,26.0,43.0,6.0,183.0,17.0,39.0,15.0,13.0
1,LHA1_2,13.0,6.0,2.0,6.0,98.0,20.0,167.000000,5.0,8.0,...,24.0,2.0,3.0,18.0,0.0,7.0,7.0,110.0,12.0,9.0
2,LHA1_4,148.0,4.0,3.0,44.0,228.0,1.0,29.000000,6.0,4.0,...,4.0,7.0,8.0,19.0,3.0,40.0,20.0,3.0,151.0,17.0
3,LHA1_5,94.0,10.0,2.0,13.0,20.0,9.0,152.000000,19.0,10.0,...,4.0,1.0,1.0,10.0,11.0,177.0,68.0,82.0,115.0,26.0
4,LHA1_6,17.0,13.0,1.0,12.0,201.0,0.0,27.000000,7.0,8.0,...,8.0,0.0,5.0,15.0,3.0,13.0,22.0,7.0,24.0,16.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36418,LHA3_29908,7.0,4.0,4.0,4.0,26.0,1.0,486.720155,3.0,44.0,...,48.0,5.0,4.0,5.0,0.0,2.0,10.0,4.0,4.0,6.0
36419,LHA3_29917,1.0,0.0,0.0,1.0,16.0,4.0,48.000000,2.0,13.0,...,43.0,0.0,2.0,3.0,3.0,4.0,3.0,1.0,4.0,1.0
36420,LHA3_29932,5.0,4.0,0.0,4.0,2.0,0.0,1.000000,0.0,3.0,...,7.0,0.0,2.0,4.0,0.0,32.0,4.0,1.0,3.0,1.0
36421,LHA3_29957,6.0,9.0,3.0,1.0,24.0,9.0,26.000000,4.0,13.0,...,95.0,0.0,1.0,3.0,4.0,14.0,11.0,6.0,2.0,4.0


In [7]:
cell_id_array = count['Unnamed: 0']

In [8]:
cell_id_array

0            LHA1_1
1            LHA1_2
2            LHA1_4
3            LHA1_5
4            LHA1_6
            ...    
36418    LHA3_29908
36419    LHA3_29917
36420    LHA3_29932
36421    LHA3_29957
36422    LHA3_30018
Name: Unnamed: 0, Length: 36423, dtype: object

In [9]:
gene_id_array = count.columns

In [10]:
gene_id_array

Index(['Unnamed: 0', 'Meis2', 'Th', 'Gpr101', 'Gpr83', 'Slc17a6', 'Bdnf',
       'Calb2', 'Gal', 'Otp', 'Gad1', 'Col25a1', 'Synpr', 'Tac2', 'Calb1',
       'Tac1', 'Map1b', 'Trh', 'Nts', 'Hcrt', 'Pmch', 'Cartpt', 'Slc32a1',
       'Sst', 'Nrgn', 'Meis2-R9', 'Th-R9'],
      dtype='object')

In [11]:
gene_id_array = gene_id_array[1:]

In [12]:
gene_id_array

Index(['Meis2', 'Th', 'Gpr101', 'Gpr83', 'Slc17a6', 'Bdnf', 'Calb2', 'Gal',
       'Otp', 'Gad1', 'Col25a1', 'Synpr', 'Tac2', 'Calb1', 'Tac1', 'Map1b',
       'Trh', 'Nts', 'Hcrt', 'Pmch', 'Cartpt', 'Slc32a1', 'Sst', 'Nrgn',
       'Meis2-R9', 'Th-R9'],
      dtype='object')

In [13]:
count_X = count.values[:,1:].astype('float')

In [14]:
adata = ad.AnnData(count_X)
adata.var_names = gene_id_array
adata.obs_names = cell_id_array

  adata = ad.AnnData(count_X)


In [16]:
meta = pd.read_csv(f'{data_path}/{meta_file}')

In [17]:
meta


Unnamed: 0.1,Unnamed: 0,z,y,x,area,eccentricity,Solidity,Neuron,region,density(kde),cell_type
0,LHA1_1,135.130236,190.602062,266.543345,1996.864968,0.810345,0.789966,1,LHA,-20.838445,Inh-14
1,LHA1_2,316.599960,607.126559,394.430873,1838.495064,0.761807,0.901905,1,LHA,-21.272360,Ex-21
2,LHA1_4,347.088372,715.996855,242.855345,2666.160000,0.493653,0.734556,1,LHA,-21.842887,Ex-14
3,LHA1_5,251.254075,509.719876,732.489548,1861.068552,0.721073,0.906929,1,LHA,-21.057905,Inh-12
4,LHA1_6,289.769686,415.791167,570.987479,2638.343064,0.509223,0.650959,1,LHA,-21.337946,Ex-18
...,...,...,...,...,...,...,...,...,...,...,...
36418,LHA3_29908,182.377180,764.498088,337.681042,837.707472,0.335485,0.498335,1,LHA,-21.804141,Ex-25
36419,LHA3_29917,147.182830,681.771790,76.596247,608.595456,0.347409,0.700706,1,LHA,-21.340927,Ex-25
36420,LHA3_29932,164.128898,490.104207,607.835618,650.631912,0.546413,0.554159,1,LHA,-21.362798,Inh-23
36421,LHA3_29957,241.571465,760.089606,224.087652,1070.196624,0.524847,0.497398,1,LHA,-22.183890,Ex-25


In [18]:
meta

Unnamed: 0.1,Unnamed: 0,z,y,x,area,eccentricity,Solidity,Neuron,region,density(kde),cell_type
0,LHA1_1,135.130236,190.602062,266.543345,1996.864968,0.810345,0.789966,1,LHA,-20.838445,Inh-14
1,LHA1_2,316.599960,607.126559,394.430873,1838.495064,0.761807,0.901905,1,LHA,-21.272360,Ex-21
2,LHA1_4,347.088372,715.996855,242.855345,2666.160000,0.493653,0.734556,1,LHA,-21.842887,Ex-14
3,LHA1_5,251.254075,509.719876,732.489548,1861.068552,0.721073,0.906929,1,LHA,-21.057905,Inh-12
4,LHA1_6,289.769686,415.791167,570.987479,2638.343064,0.509223,0.650959,1,LHA,-21.337946,Ex-18
...,...,...,...,...,...,...,...,...,...,...,...
36418,LHA3_29908,182.377180,764.498088,337.681042,837.707472,0.335485,0.498335,1,LHA,-21.804141,Ex-25
36419,LHA3_29917,147.182830,681.771790,76.596247,608.595456,0.347409,0.700706,1,LHA,-21.340927,Ex-25
36420,LHA3_29932,164.128898,490.104207,607.835618,650.631912,0.546413,0.554159,1,LHA,-21.362798,Inh-23
36421,LHA3_29957,241.571465,760.089606,224.087652,1070.196624,0.524847,0.497398,1,LHA,-22.183890,Ex-25


In [19]:
meta = meta.set_index('Unnamed: 0')


In [20]:
adata.obs['area'] = meta.loc[adata.obs_names].loc[:,'area']
adata.obs['eccentricity'] = meta.loc[adata.obs_names].loc[:,'eccentricity']
adata.obs['Solidity'] = meta.loc[adata.obs_names].loc[:,'Solidity']
adata.obs['Neuron'] = meta.loc[adata.obs_names].loc[:,'Neuron']
adata.obs['region'] = meta.loc[adata.obs_names].loc[:,'region']
adata.obs['density(kde)'] = meta.loc[adata.obs_names].loc[:,'density(kde)']
adata.obs['cell_type'] = meta.loc[adata.obs_names].loc[:,'cell_type']

In [21]:
adata.obsm['spatial'] = meta.loc[adata.obs_names].loc[:,['x','y','z']]

In [22]:
adata

AnnData object with n_obs × n_vars = 36423 × 26
    obs: 'area', 'eccentricity', 'Solidity', 'Neuron', 'region', 'density(kde)', 'cell_type'
    obsm: 'spatial'

In [23]:
adata.write_h5ad('wang2021easi_EasiFish.h5ad')