## Contents:
* [Loading packages](#Loading_packages)
* [Reading cell2location model output](#read)


* [Running factorisation to identify groups of co-located cell types / tissue compartments](#factorisation)
* [Running factorisation including the composition of neighbouring spots](#factorisation_with_neighbours)

### Loading packages <a class="anchor" id="Loading_packages"></a>

In [1]:
# Import modules and packages:
%pylab inline
import pandas as pd
import sys, ast, os
rcParams['axes.spines.right'] = False
rcParams['axes.spines.top'] = False
import pickle as pickle
import numpy as np
import time
import itertools
data_type = 'float32'

#os.environ["THEANO_FLAGS"] = 'device=cuda,floatX=' + data_type + ',force_device=True'
sys.path.insert(1, '/nfs/team205/vk7/sanger_projects/cell2location/')
sys.path.insert(1, '/nfs/team205/vk7/sanger_projects/BayraktarLab/cell2location/')

%matplotlib inline
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
import os
import cell2location.models as c2l
import anndata
import scanpy as sc

from matplotlib import rcParams
rcParams['pdf.fonttype'] = 42 # enables correct plotting of text

Populating the interactive namespace from numpy and matplotlib




### Reading cell2location model output <a class="anchor" id="read"></a>

In [2]:
# read cell2location model results
c2l_results_folder = '/nfs/team205/vk7/sanger_projects/cell2location_paper/notebooks/results/visium_human_ln/'
results_folder = '/nfs/team205/vk7/sanger_projects/cell2location_paper/notebooks/selected_results/benchmarking/lymph_nodes_analysis/SPOTlight/'

run_name = 'CoLocationModelNB4V2_34clusters_4039locations_10241genes_input_inferred_V4_batch1024_l2_0001_n_comb50_5_cps5_fpc3_alpha001'

sp_data_file = c2l_results_folder + 'std_model/'+run_name+'/sp_with_clusters.h5ad'
adata_vis = anndata.read(sp_data_file)
adata_vis.obs['sample'] = list(adata_vis.uns['spatial'].keys())[0]

Variable names are not unique. To make them unique, call `.var_names_make_unique`.


In [3]:
SPOTlight = pd.read_csv(f'{results_folder}results_hvg5k.csv',
                       index_col='rn')
SPOTlight = SPOTlight.drop(columns=['res_ss'])
from re import sub
SPOTlight.columns = [sub('\.', '_',i) for i in SPOTlight.columns]
SPOTlight.columns = [sub('CD4_', 'CD4+',i) for i in SPOTlight.columns]
SPOTlight.columns = [sub('CD8_', 'CD8+',i) for i in SPOTlight.columns]
SPOTlight.columns = [sub('CCR7_', 'CCR7+',i) for i in SPOTlight.columns]
SPOTlight.columns = [sub('TIM3_', 'TIM3+',i) for i in SPOTlight.columns]
SPOTlight.columns = [sub('CD161_', 'CD161+',i) for i in SPOTlight.columns]

# compute cell proportions
SPOTlight = (SPOTlight.T / SPOTlight.sum(1)).T
SPOTlight[SPOTlight.isna()] = 0

SPOTlight.columns = [f'q05_spot_factors{i}' for i in SPOTlight.columns]

adata_vis = adata_vis[adata_vis.obs_names.isin(SPOTlight.index),:]
adata_vis.obs[SPOTlight.columns] = SPOTlight.loc[adata_vis.obs_names,:].values

# path for saving figures
fig_path = f'{results_folder}plots/figures/'
sc_figpath = sc.settings.figdir
sc.settings.figdir = f'{fig_path}'

Trying to set attribute `.obs` of view, copying.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


In [4]:
SPOTlight.columns

Index(['q05_spot_factorsB_activated', 'q05_spot_factorsB_Cycling',
       'q05_spot_factorsB_GC_DZ', 'q05_spot_factorsB_GC_LZ',
       'q05_spot_factorsB_GC_prePB', 'q05_spot_factorsB_IFN',
       'q05_spot_factorsB_mem', 'q05_spot_factorsB_naive',
       'q05_spot_factorsB_plasma', 'q05_spot_factorsB_preGC',
       'q05_spot_factorsDC_CCR7+', 'q05_spot_factorsDC_cDC1',
       'q05_spot_factorsDC_cDC2', 'q05_spot_factorsDC_pDC',
       'q05_spot_factorsEndo', 'q05_spot_factorsFDC', 'q05_spot_factorsILC',
       'q05_spot_factorsMacrophages_M1', 'q05_spot_factorsMacrophages_M2',
       'q05_spot_factorsMast', 'q05_spot_factorsMonocytes',
       'q05_spot_factorsNK', 'q05_spot_factorsNKT', 'q05_spot_factorsT_CD4+',
       'q05_spot_factorsT_CD4+_naive', 'q05_spot_factorsT_CD4+_TfH',
       'q05_spot_factorsT_CD4+_TfH_GC', 'q05_spot_factorsT_CD8+_CD161+',
       'q05_spot_factorsT_CD8+_cytotoxic', 'q05_spot_factorsT_CD8+_naive',
       'q05_spot_factorsT_TfR', 'q05_spot_factorsT_TIM3+',
 

### Running factorisation to identify groups of co-located cell types / tissue compartments <a class="anchor" id="factorisation"></a>

In [5]:
from re import sub
SPOTlight_copy = SPOTlight.copy()
SPOTlight_copy.columns = [sub('q05_spot_factors', '', i) for i in SPOTlight.loc[adata_vis.obs_names,:].columns]
adata_vis.uns['mod']['post_sample_q05']['spot_factors'] = SPOTlight_copy[adata_vis.uns['mod']['fact_names']].values
adata_vis.uns['mod']['obs_names'] = list(adata_vis.obs_names)

adata_vis.uns['mod']['post_sample_q05']['spot_factors'].shape, \
adata_vis.uns['mod']['post_sample_means']['spot_factors'].shape, \
len(adata_vis.obs_names)

((4039, 34), (4039, 34), 4039)

from cell2location import run_colocation
res_dict, sp_data = run_colocation(
                   adata_vis, model_name='CoLocatedGroupsSklearnNMF',
                   verbose=False, return_all=True,
    
                   train_args={'n_fact': np.arange(11, 16), 'n_iter': 20000,
                    'sample_name_col': 'sample',
                    'mode': 'normal', 'n_type': 'restart', 'n_restarts': 5},
    
                   model_kwargs={'init': 'random', 'random_state': 0, 'nmf_kwd_args': {'tol': 0.0001}},
    
                   posterior_args={},
                   export_args={'path': results_folder,
                                'run_name_suffix': 'tol0.0001'})

### Plot locations of all cell types <a class="anchor" id="Fig3E"></a> 

In [6]:
# making copy to transform to assign nice names
clust_names_orig = ['q05_spot_factors' + i for i in adata_vis.uns['mod']['fact_names']]
clust_names = adata_vis.uns['mod']['fact_names']
adata_vis.obs[clust_names] = (adata_vis.obs[clust_names_orig])

clust_names

array(['B_Cycling', 'B_GC_DZ', 'B_GC_LZ', 'B_GC_prePB', 'B_IFN',
       'B_activated', 'B_mem', 'B_naive', 'B_plasma', 'B_preGC',
       'DC_CCR7+', 'DC_cDC1', 'DC_cDC2', 'DC_pDC', 'Endo', 'FDC', 'ILC',
       'Macrophages_M1', 'Macrophages_M2', 'Mast', 'Monocytes', 'NK',
       'NKT', 'T_CD4+', 'T_CD4+_TfH', 'T_CD4+_TfH_GC', 'T_CD4+_naive',
       'T_CD8+_CD161+', 'T_CD8+_cytotoxic', 'T_CD8+_naive', 'T_TIM3+',
       'T_TfR', 'T_Treg', 'VSMC'], dtype=object)

In [7]:
ct_list=clust_names.copy()

fig_path = f'{results_folder}plots/figures/'
sc_figpath = sc.settings.figdir
sc.settings.figdir = f'{fig_path}'
with matplotlib.rc_context({'axes.facecolor':  'black',
                            'figure.figsize': [10, 10]}):
                
    sc.pl.spatial(adata_vis, cmap='magma',
                    color=ct_list, ncols=5, show=False,
                    #library_id=s,
                    size=1.3, img_key='hires', alpha_img=0.9, alpha=1,
                    frameon=True, legend_fontsize=50,
                    vmin=0, vmax='p98', save=f'all_maps_dot.png'
                 )

... storing 'sample' as categorical


