# Looking at cell2location mapping of newly annotated cell states from SN + multiome atlas of donor P13 states

Onto all (8) relevant chips

11.2021

In [1]:
%matplotlib inline

import numpy as np
import pandas as pd
import seaborn as sns
import scanpy as sc
import anndata
import matplotlib.pyplot as plt

In [2]:
np.random.seed(0)

In [3]:
sc.settings.verbosity = 3  # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_versions()
sc.settings.set_figure_params(dpi=80)  # low dpi (dots per inch) yields small inline figures



-----
anndata     0.7.5
scanpy      1.7.1
sinfo       0.3.1
-----
PIL                 8.1.2
anndata             0.7.5
anyio               NA
attr                20.3.0
babel               2.9.0
backcall            0.2.0
brotli              NA
cairo               1.20.0
certifi             2020.12.05
cffi                1.14.5
chardet             4.0.0
cloudpickle         1.6.0
colorama            0.4.4
cycler              0.10.0
cython_runtime      NA
cytoolz             0.11.0
dask                2021.03.1
dateutil            2.8.1
decorator           4.4.2
fsspec              0.8.7
get_version         2.1
google              NA
h5py                3.1.0
idna                2.10
igraph              0.8.3
ipykernel           5.5.0
ipython_genutils    0.2.0
ipywidgets          7.6.3
jedi                0.18.0
jinja2              2.11.3
joblib              1.0.1
json5               NA
jsonschema          3.2.0
jupyter_server      1.4.1
jupyterlab_server   2.3.0
kiwisolver          1.3.1


In [4]:
%%bash

pip freeze

absl-py==0.12.0
aiohttp==3.7.4.post0
airr==1.3.1
alabaster==0.7.12
alembic @ file:///home/conda/feedstock_root/build_artifacts/alembic_1613901514078/work
anndata @ file:///home/conda/feedstock_root/build_artifacts/anndata_1605539061264/work
annoy @ file:///home/conda/feedstock_root/build_artifacts/python-annoy_1610271511811/work
anyio @ file:///home/conda/feedstock_root/build_artifacts/anyio_1614388751160/work/dist
arboreto==0.1.6
argon2-cffi @ file:///home/conda/feedstock_root/build_artifacts/argon2-cffi_1610522574055/work
async-generator==1.10
async-timeout==3.0.1
attrs @ file:///home/conda/feedstock_root/build_artifacts/attrs_1605083924122/work
Babel @ file:///home/conda/feedstock_root/build_artifacts/babel_1605182336601/work
backcall @ file:///home/conda/feedstock_root/build_artifacts/backcall_1592338393461/work
backports.functools-lru-cache==1.6.1
bbknn @ file:///opt/conda/conda-bld/bbknn_1616434096000/work
bleach @ file:///home/conda/feedstock_root/build_artifacts/bleach_16122134

# Reading in Visium data for donors P13, P14 and Hrv43

In [5]:
# reading adatas 
# trying all at once, this may break
IDs = ['34914_WS_PLA_S9101764', # P13 main 4 chips
           '34914_WS_PLA_S9101765',
           '34914_WS_PLA_S9101766',
           '34914_WS_PLA_S9101767',
           '34882_WS_PLA_S9101769', # P!4 2 chips
           '34882_WS_PLA_S9101770',
           
           # from batch 2
           '36173_Pla_HDBR9518710', # Hrv43
           '36173_Pla_Camb9518737', # P13 extra chip
           
          ]

adatas_visium = {}

for ID in IDs:
    print(ID)
    
    if ID in ['34914_WS_PLA_S9101764', # P13 main 4 chips
           '34914_WS_PLA_S9101765',
           '34914_WS_PLA_S9101766',
           '34914_WS_PLA_S9101767',
           '34882_WS_PLA_S9101769', # P!4 2 chips
           '34882_WS_PLA_S9101770',]:
        path = '/lustre/scratch117/cellgen/team292/aa22/data/202008_MFI_Visium/20200901_new_data_spaceranger110/spaceranger110_count_' + ID + '_GRCh38-3_0_0-premrna/'
    if ID in ['36173_Pla_HDBR9518710', # Hrv43
           '36173_Pla_Camb9518737', # P13 extra chip
             ]:
        path = '/lustre/scratch117/cellgen/team292/aa22/data/202103_MFI_Visium_batch_2/spaceranger110_count_' + ID + '_GRCh38-3_0_0_premrna/'
           
    
    adatas_visium[ID] = sc.read_visium(path)
    adatas_visium[ID].obs['sample'] = ID
    adatas_visium[ID].obs['barcode_sample'] = [idx + '_' + ID for idx in adatas_visium[ID].obs_names]
    adatas_visium[ID].obs.set_index('barcode_sample', inplace=True)
    adatas_visium[ID].var_names_make_unique()
    

reading /lustre/scratch117/cellgen/team292/aa22/data/202008_MFI_Visium/20200901_new_data_spaceranger110/spaceranger110_count_34914_WS_PLA_S9101764_GRCh38-3_0_0-premrna/filtered_feature_bc_matrix.h5


34914_WS_PLA_S9101764


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
 (0:00:01)
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
reading /lustre/scratch117/cellgen/team292/aa22/data/202008_MFI_Visium/20200901_new_data_spaceranger110/spaceranger110_count_34914_WS_PLA_S9101765_GRCh38-3_0_0-premrna/filtered_feature_bc_matrix.h5


34914_WS_PLA_S9101765


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
 (0:00:01)
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
reading /lustre/scratch117/cellgen/team292/aa22/data/202008_MFI_Visium/20200901_new_data_spaceranger110/spaceranger110_count_34914_WS_PLA_S9101766_GRCh38-3_0_0-premrna/filtered_feature_bc_matrix.h5


34914_WS_PLA_S9101766


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
 (0:00:00)
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
reading /lustre/scratch117/cellgen/team292/aa22/data/202008_MFI_Visium/20200901_new_data_spaceranger110/spaceranger110_count_34914_WS_PLA_S9101767_GRCh38-3_0_0-premrna/filtered_feature_bc_matrix.h5


34914_WS_PLA_S9101767


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
 (0:00:01)
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
reading /lustre/scratch117/cellgen/team292/aa22/data/202008_MFI_Visium/20200901_new_data_spaceranger110/spaceranger110_count_34882_WS_PLA_S9101769_GRCh38-3_0_0-premrna/filtered_feature_bc_matrix.h5


34882_WS_PLA_S9101769


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
 (0:00:00)
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
reading /lustre/scratch117/cellgen/team292/aa22/data/202008_MFI_Visium/20200901_new_data_spaceranger110/spaceranger110_count_34882_WS_PLA_S9101770_GRCh38-3_0_0-premrna/filtered_feature_bc_matrix.h5


34882_WS_PLA_S9101770


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
 (0:00:01)
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
reading /lustre/scratch117/cellgen/team292/aa22/data/202103_MFI_Visium_batch_2/spaceranger110_count_36173_Pla_HDBR9518710_GRCh38-3_0_0_premrna/filtered_feature_bc_matrix.h5


36173_Pla_HDBR9518710


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
 (0:00:00)
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
reading /lustre/scratch117/cellgen/team292/aa22/data/202103_MFI_Visium_batch_2/spaceranger110_count_36173_Pla_Camb9518737_GRCh38-3_0_0_premrna/filtered_feature_bc_matrix.h5


36173_Pla_Camb9518737


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
 (0:00:01)
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


In [6]:
# adding QC metrics to the visium objects
for ID in IDs:
    adatas_visium[ID].obs['n_counts'] = adatas_visium[ID].X.sum(1)
    adatas_visium[ID].obs['log_counts'] = np.log(adatas_visium[ID].obs['n_counts'])
    adatas_visium[ID].obs['n_genes'] = (adatas_visium[ID].X > 0).sum(1)
    
    mito_genes = [name for name in adatas_visium[ID].var_names if name.startswith('MT-')]
    #for each cell compute fraction of counts in mito genes vs. all genes
    #the `.A1` is only necessary, as X is sparse - it transform to a dense array after summing
    adatas_visium[ID].obs['percent_mito'] = np.sum(
        adatas_visium[ID][:, mito_genes].X, axis=1).A1 / np.sum(adatas_visium[ID].X, axis=1).A1
    # saving raw counts in .raw for now
    adatas_visium[ID].raw = adatas_visium[ID].copy()

In [7]:
# log transform the data - OPTIONAL, doing here for visualisation
for ID in IDs:
    sc.pp.log1p(adatas_visium[ID])

In [8]:
# plotting selected L/Rto see if there is an interesting pattern in space

MEs = ['ME_1_early_inv_column', 'ME_2_inv_front', 'ME_3_artery_remodelling', 'ME_4_fusion']

relevant_LR_simple_per_ME = {}

for ME in MEs:
    relevant_LR_simple_per_ME[ME] = list(pd.read_csv('./20211212_LR_to_vis_in_space_simple_' + ME + '.csv', index_col=0)['0'])

relevant_LR_complex = list(pd.read_csv('./20211212_LR_to_vis_in_space_complex_all_MEs.csv', index_col=0)['0'])

In [9]:
relevant_LR_simple_per_ME['ME_1_early_inv_column']

['BST1',
 'LRPAP1',
 'NGF',
 'DPP4',
 'PLAU',
 'SLC3A2',
 'CD24',
 'TMED2',
 'NOTCH1',
 'PDGFA',
 'CD47',
 'WNT6',
 'CXCL2',
 'TNFRSF12A']

In [None]:
sc.pl.spatial(adatas_visium['34914_WS_PLA_S9101767'], 
                  color=['JAM2', 'JAM3',
                        'PGR','HSD3B1'], ncols=2,
              cmap='YlGnBu')

In [None]:
for ME in MEs:
    for ID in IDs:
        sc.pl.spatial(adatas_visium[ID], 
                  color=relevant_LR_simple_per_ME[ME], cmap='YlGnBu', save='_relevant_LR_in_space_simple_' + ME + '_ID_' + ID + '.pdf')

In [None]:
# should be in order of MEs
for ID in IDs:
    sc.pl.spatial(adatas_visium[ID], 
                  color=relevant_LR_complex, cmap='YlGnBu', save='_relevant_LR_in_space_complex_ID_' + ID + '.pdf')

In [7]:
adatas_visium['36173_Pla_Camb9518737']

AnnData object with n_obs × n_vars = 4071 × 33538
    obs: 'in_tissue', 'array_row', 'array_col', 'sample', 'n_counts', 'log_counts', 'n_genes', 'percent_mito'
    var: 'gene_ids', 'feature_types', 'genome'
    uns: 'spatial'
    obsm: 'spatial'

In [8]:
# 23.11.2021 mapping where inv troph annotations are corrected
# mapping full atlas of donor P13 with inv troph states from P13-only SN + multiome analysis and all other states from P13 within all-donors analysis onto all 8 relevant Visium chips
# this run I'm going to refer to as 'hybrid' here

path_hybrid = '/lustre/scratch117/cellgen/team292/aa22/cell2loc_files/results/20211123_P13_new_full_atlas_SN_and_multiome_updated_onto_all_relevant_Visium_chips_with_LF_gene_filter/cell2location_map/'

# object with all used Visium chips concatenated (in this case all 8 relevant Visium chips)
sp_mapping_hybrid = sc.read(path_hybrid + 'sp.h5ad')



In [9]:
for ID in IDs:
    print(ID)
    #print(col)
    for col in sp_mapping_hybrid.obsm['q05_cell_abundance_w_sf']:
        col_chopped = col[18:] + '_hybrid'
        #print(col_chopped)
        adatas_visium[ID].obs[col_chopped] = sp_mapping_hybrid.obsm['q05_cell_abundance_w_sf'].loc[adatas_visium[ID].obs_names, col]
        
        

34914_WS_PLA_S9101764
34914_WS_PLA_S9101765
34914_WS_PLA_S9101766
34914_WS_PLA_S9101767
34882_WS_PLA_S9101769
34882_WS_PLA_S9101770
36173_Pla_HDBR9518710
36173_Pla_Camb9518737


In [10]:
c2l_tags_hybrid = [elem for elem in adatas_visium['34914_WS_PLA_S9101764'].obs.columns if '_hybrid' in elem]
c2l_tags_hybrid

['w_sf_B_cells_hybrid',
 'w_sf_DC_hybrid',
 'w_sf_EVT_1_hybrid',
 'w_sf_EVT_2_hybrid',
 'w_sf_Endo_F_hybrid',
 'w_sf_Endo_L_hybrid',
 'w_sf_Endo_M_hybrid',
 'w_sf_GC_hybrid',
 'w_sf_Granulocytes_hybrid',
 'w_sf_HOFB_hybrid',
 'w_sf_ILC3_hybrid',
 'w_sf_M3_hybrid',
 'w_sf_MO_hybrid',
 'w_sf_NK_hybrid',
 'w_sf_PV_hybrid',
 'w_sf_Plasma_hybrid',
 'w_sf_SCT_hybrid',
 'w_sf_T_cells_hybrid',
 'w_sf_VCT_hybrid',
 'w_sf_VCT_CCC_hybrid',
 'w_sf_VCT_fusing_hybrid',
 'w_sf_VCT_p_hybrid',
 'w_sf_dDC_hybrid',
 'w_sf_dEpi_lumenal_hybrid',
 'w_sf_dEpi_secretory_hybrid',
 'w_sf_dM1_hybrid',
 'w_sf_dM2_hybrid',
 'w_sf_dNK1_hybrid',
 'w_sf_dNK2_hybrid',
 'w_sf_dNK3_hybrid',
 'w_sf_dS1_hybrid',
 'w_sf_dS2_hybrid',
 'w_sf_dS3_hybrid',
 'w_sf_dT_cells_hybrid',
 'w_sf_dT_regs_hybrid',
 'w_sf_eEVT_hybrid',
 'w_sf_fF1_hybrid',
 'w_sf_fF2_hybrid',
 'w_sf_iEVT_hybrid',
 'w_sf_uSMC_hybrid']

In [11]:
# here hybrid_old is SN + multiome mapping, and hybrid - SN only
troph_tags = ['w_sf_' + ct + '_hybrid' for ct in ['VCT_CCC','EVT_1','EVT_2','eEVT','iEVT','GC']]
#troph_tags = [item for sublist in troph_tags for item in sublist]
troph_tags

['w_sf_VCT_CCC_hybrid',
 'w_sf_EVT_1_hybrid',
 'w_sf_EVT_2_hybrid',
 'w_sf_eEVT_hybrid',
 'w_sf_iEVT_hybrid',
 'w_sf_GC_hybrid']

In [None]:
# looking at only invading trophoblast

for ID in IDs:
    print(ID)
    sc.pl.spatial(adatas_visium[ID],
                 color = troph_tags, cmap='YlGnBu', ncols=4)

In [None]:
# plot all cell states

for ID in IDs:
    print(ID)
    sc.pl.spatial(adatas_visium[ID],
                 color = c2l_tags_hybrid, cmap='YlGnBu')

# 23.11.2021
# this run looks great - carrying on to define regions based on this (with SpatialDE2) in notebook S1 and run spatial colocation model for troph trajectories