In [5]:
from glob import glob

import pandas as pd
import plotnine as p
from tqdm import tqdm
import anndata

Populating the interactive namespace from numpy and matplotlib


### Data source
Slide-Seq V2 data of the mouse brain originating from the region containing the hippocampus, partial cortex and thalamus, was downloaded from the data portal for Stickels, Murray et al

https://singlecell.broadinstitute.org/single_cell/study/SCP815/sensitive-spatial-genome-wide-expression-profiling-at-cellular-resolution#study-download

In [6]:
data = ['../Puck_190921_21.digital_expression.txt.gz',
        '../Puck_191204_01.digital_expression.txt.gz',
        '../Puck_200115_08.digital_expression.txt.gz']

In [7]:
adatas = []


for fname in tqdm(glob('../Puck_*.h5ad')):
    print(fname)
    adata = anndata.read(fname)
    obs = pd.read_csv(fname.replace('.digital_expression.h5ad', '_bead_locations.csv'), index_col=0)

    adata.obs['xcoord'] = obs['xcoord']
    adata.obs['ycoord'] = obs['ycoord']

    adatas.append(adata)

 67%|██████▋   | 2/3 [00:00<00:00,  8.64it/s]

../Puck_190921_21.digital_expression.h5ad
../Puck_191204_01.digital_expression.h5ad
../Puck_200115_08.digital_expression.h5ad


100%|██████████| 3/3 [00:00<00:00,  5.54it/s]


In [8]:
all_data = adatas[0].concatenate(adatas[1:], join='outer')

In [9]:
all_data

AnnData object with n_obs × n_vars = 107550 × 24582
    obs: 'filename', 'xcoord', 'ycoord', 'batch'

In [10]:
all_data.obs

Unnamed: 0,filename,xcoord,ycoord,batch
RGBOGBGRRRORRB-0,../Puck_190921_21.digital_expression.txt.gz,5006.6,3019.8,0
ROOBOGBBRBBGRR-0,../Puck_190921_21.digital_expression.txt.gz,5182.7,2194.0,0
BRBROROGORRBGR-0,../Puck_190921_21.digital_expression.txt.gz,3258.8,5555.6,0
RGOGBRGROBGGBG-0,../Puck_190921_21.digital_expression.txt.gz,4633.1,4714.2,0
GGROBBGGRGGGBG-0,../Puck_190921_21.digital_expression.txt.gz,4655.2,3735.0,0
...,...,...,...,...
ACAATACAGATACT-2,../Puck_200115_08.digital_expression.txt.gz,3025.5,5516.0,2
GGTCAGGTTAACGT-2,../Puck_200115_08.digital_expression.txt.gz,3069.7,3211.9,2
CATTATGGAATTCT-2,../Puck_200115_08.digital_expression.txt.gz,3507.1,2811.0,2
GACTTTTCTTAAAG-2,../Puck_200115_08.digital_expression.txt.gz,2155.0,1264.3,2


In [11]:
tissue = {
    'Puck_190921_19': 'mouse E15 brain',
    'Puck_190921_21': 'mouse hippocampus',
    'Puck_190926_01': 'unknown',
    'Puck_190926_02': 'unknown',
    'Puck_190926_03': 'mouse embryo Slide-seqV2',
    'Puck_190926_06': 'unknown',
    'Puck_191007_07': 'unknown',
    'Puck_191204_01': 'mouse hippocampus',
    'Puck_200115_08': 'mouse hippocampus'
}

In [12]:
all_data.obs['tissue'] = all_data.obs['filename'].str.split('.').str.get(0).map(tissue)

In [13]:
all_data

AnnData object with n_obs × n_vars = 107550 × 24582
    obs: 'filename', 'xcoord', 'ycoord', 'batch', 'tissue'

In [14]:
all_data.write('stickels_et_al_2020_slide_seqV2.h5ad')

... storing 'filename' as categorical
... storing 'tissue' as categorical
