In [13]:
import scanpy as sc
import pandas as pd
import numpy as np
import seaborn as sns

from anndata import AnnData

In [19]:
# index_col = 0 -> Set the first column as row name
raw_df = pd.read_csv('./brainCscaleddata.csv',
                     sep=',',
                     index_col=0).T


In [55]:
adata = AnnData(X=raw_df.to_numpy(), dtype=np.float32)
adata.obs['cell_id'] = raw_df.index
adata.var['gene_id'] = raw_df.columns
adata

AnnData object with n_obs × n_vars = 1803 × 32285
    obs: 'cell_id'
    var: 'gene_id'

### tissue_positions.csv
This text file contains a table with rows that correspond to spots. From Space Ranger v2.0 onwards this file, which was previously named tissue_positions_list.csv, is renamed and includes a header column. Excluding the header column, the file has 4,992 rows for Visium slides with a 6.5 mm Capture Area and 14,336 rows for Visium slides with 11 mm Capture Area which is the number of spots in the spatial array. Columns correspond to the following fields:
 - barcode: The sequence of the barcode associated to the spot.
in_tissue: Binary, indicating if the spot falls inside (1) or outside (0) of tissue.
 - array_row: For Visium slide (6.5 mm Capture Area), the row coordinate of the spot in the array from 0 to 77. The array has 78 rows. For Visium slides (11 mm Capture Area), the row coordinate of the spot range from 0 to 127 as the array has 128 rows.
 - array_col: The column coordinate of the spot in the array. In order to express the "orange crate" arrangement of the spots, for Visium slides (6.5 mm Capture Area) this column index uses even numbers from 0 to 126 for even rows, and odd numbers from 1 to 127 for odd rows with each row (even or odd) resulting in 64 spots. For Visium slides (11 mm Capture Area) this column index uses even numbers from 0 to 222 for even rows, and odd numbers from 1 to 223 for odd rows with each row (even or odd) resulting in 111 spots.
 - pxl_row_in_fullres: The row pixel coordinate of the center of the spot in the full resolution image.
 - pxl_col_in_fullres: The column pixel coordinate of the center of the spot in the full resolution image.

In [139]:
position_df = pd.read_csv('./tissue_positions_list.csv',
                          sep=',',
                          header=None,
                          index_col=0)
# set the column names
position_df.columns = ['in_tissue','array_row','array_col','pxl_row_in_fullres','pxl_col_in_fullres']
# relpace '-' to '.', example: AAACAAGTATCTCCCA-1 to AAACAAGTATCTCCCA.1
position_df.index = position_df.index.str.replace('-', '.')
# get the cell positions
cell_info_df = position_df.loc[list(adata.obs['cell_id'])]
# set the matrix position
adata.obs['x'] = cell_info_df['array_row'].to_numpy()
adata.obs['y'] = cell_info_df['array_col'].to_numpy()
# set the figure position
adata.obs['fig_x'] = cell_info_df['pxl_row_in_fullres'].to_numpy()
adata.obs['fig_y'] = cell_info_df['pxl_col_in_fullres'].to_numpy()

In [140]:
adata.obs

Unnamed: 0,cell_id,x,y,fig_x,fig_y
0,AAACAAGTATCTCCCA.1,50,102,3091,8043
1,AAACATTTCCCGGATT.1,61,97,3493,9501
2,AAACCCGAACGAAATC.1,45,115,2088,7392
3,AAACCTAAGCAGCCGG.1,65,83,4571,10019
4,AAACGAGACGGTTGAT.1,35,79,4822,6024
...,...,...,...,...,...
1798,TTGTGTATGCCACCAA.1,56,60,6313,8797
1799,TTGTGTTTCCCGAAAG.1,51,59,6380,8131
1800,TTGTTAGCAAATTCGA.1,22,42,7627,4256
1801,TTGTTCAGTGTGCTAC.1,24,64,5948,4545
