Spatially Resolved Transcriptomics Enables Dissection of Genetic Heterogeneity in Stage III Cutaneous Malignant Melanoma

ShortName: thrane2018spatially

Steps of processing the data from raw to Anndata:

In [1]:
# 1, Download the raw data from https://www.spatialresearch.org/resources-published-datasets/

In [4]:
logcount_file = 'mouse_AD/GSE152506_logCPM_counts.txt'


# adata = sc.read_csv(logcount_file,delimiter=',',dtype='str')
adata_X = np.genfromtxt(logcount_file, delimiter=',',dtype='str')

In [116]:
import scanpy as sc
import pandas as pd
import anndata as ad

In [117]:
# data path of the ST data
data_path = './data/ST_melanoma'

In [118]:
# file name of the count matrix and 
count_file = 'ST_mel4_rep2_counts.tsv'

In [119]:
# read tsv using pandas
count = pd.read_csv(f'{data_path}/{count_file}', sep='\t')

In [120]:
count

Unnamed: 0,gene,6x7,6x8,7x6,7x7,7x8,7x9,7x10,7x11,7x12,...,26x14,27x9,27x10,27x11,27x12,27x13,27x14,28x11,28x12,28x13
0,ELP2 ENSG00000134759,1,0,1,0,1,2,1,0,1,...,1,0,0,0,0,1,0,1,3,0
1,NME1 ENSG00000239672,5,3,2,5,3,5,1,3,7,...,6,1,4,4,1,2,3,6,5,0
2,RP11-89K11.1 ENSG00000259658,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,PRAF2 ENSG00000243279,1,2,6,1,1,1,5,2,2,...,0,0,1,0,1,0,4,1,4,1
4,VWA1 ENSG00000179403,0,0,1,0,0,1,0,0,0,...,0,0,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15986,PCDHB3 ENSG00000113205,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
15987,PKIB ENSG00000135549,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
15988,SCGB1D2 ENSG00000124935,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
15989,SPATA3-AS1 ENSG00000238062,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [121]:
# get cell_ID
cell_id_array = count.columns[1:]
cell_id_array

Index(['6x7', '6x8', '7x6', '7x7', '7x8', '7x9', '7x10', '7x11', '7x12',
       '7x13',
       ...
       '26x14', '27x9', '27x10', '27x11', '27x12', '27x13', '27x14', '28x11',
       '28x12', '28x13'],
      dtype='object', length=248)

In [122]:
# get geneID
gene_id_array = count.gene
gene_id_array

0                ELP2 ENSG00000134759
1                NME1 ENSG00000239672
2        RP11-89K11.1 ENSG00000259658
3               PRAF2 ENSG00000243279
4                VWA1 ENSG00000179403
                     ...             
15986          PCDHB3 ENSG00000113205
15987            PKIB ENSG00000135549
15988         SCGB1D2 ENSG00000124935
15989      SPATA3-AS1 ENSG00000238062
15990           GFRA1 ENSG00000151892
Name: gene, Length: 15991, dtype: object

In [123]:
# get count_matrix
count_X = count.values[:,1:].T.astype('int')

In [124]:
# adata
adata = ad.AnnData(count_X)
adata.var_names = gene_id_array
adata.obs_names = cell_id_array

In [125]:
# get spot coordinate
spot_coordinate = pd.DataFrame()
spot_coordinate['coord_ID'] = cell_id_array
spot_coordinate['x'] = spot_coordinate['coord_ID'].apply(lambda x: x.split('x')[0])
spot_coordinate['y'] = spot_coordinate['coord_ID'].apply(lambda x: x.split('x')[1])
spot_coordinate = spot_coordinate.set_index('coord_ID')
spot_coordinate

Unnamed: 0_level_0,x,y
coord_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
6x7,6,7
6x8,6,8
7x6,7,6
7x7,7,7
7x8,7,8
...,...,...
27x13,27,13
27x14,27,14
28x11,28,11
28x12,28,12


In [126]:
adata.obsm['spatial'] = spot_coordinate.loc[adata.obs_names].loc[:,['x','y']]
adata

AnnData object with n_obs × n_vars = 248 × 15991
    obsm: 'spatial'

In [127]:
adata.write_h5ad(f'{data_path}/ST_mel4_rep2.h5ad')