## Contents:
* [Loading packages](#Loading_packages)
* [Reading cell2location model output](#read)


* [Running factorisation to identify groups of co-located cell types / tissue compartments](#factorisation)
* [Running factorisation including the composition of neighbouring spots](#factorisation_with_neighbours)

### Loading packages <a class="anchor" id="Loading_packages"></a>

In [1]:
# Import modules and packages:
%pylab inline
import pandas as pd
import sys, ast, os
rcParams['axes.spines.right'] = False
rcParams['axes.spines.top'] = False
import pickle as pickle
import numpy as np
import time
import itertools
data_type = 'float32'

os.environ["THEANO_FLAGS"] = 'device=cuda,floatX=' + data_type + ',force_device=True'
sys.path.insert(1, '/nfs/team205/vk7/sanger_projects/cell2location/')
sys.path.insert(1, '/nfs/team205/vk7/sanger_projects/BayraktarLab/cell2location/')

%matplotlib inline
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
import os
import cell2location.models as c2l
import anndata
import scanpy as sc

from matplotlib import rcParams
rcParams['pdf.fonttype'] = 42 # enables correct plotting of text

Populating the interactive namespace from numpy and matplotlib


Using cuDNN version 7605 on context None
Mapped name None to device cuda: Tesla V100-SXM2-32GB (0000:62:00.0)


### Reading cell2location model output <a class="anchor" id="read"></a>

In [2]:
# read cell2location model results
c2l_results_folder = '/nfs/team205/vk7/sanger_projects/cell2location_paper/notebooks/results/visium_human_ln/'
results_folder = '/nfs/team205/vk7/sanger_projects/cell2location_paper/notebooks/selected_results/benchmarking/lymph_nodes_analysis/'

run_name = 'CoLocationModelNB4V2_34clusters_4039locations_10241genes_input_inferred_V4_batch1024_l2_0001_n_comb50_5_cps5_fpc3_alpha001'

sp_data_file = c2l_results_folder + 'std_model/'+run_name+'/sp_with_clusters.h5ad'
adata_vis = anndata.read(sp_data_file)
adata_vis.obs['sample'] = list(adata_vis.uns['spatial'].keys())[0]

Variable names are not unique. To make them unique, call `.var_names_make_unique`.


In [3]:
RCTD = pd.read_csv(f'{results_folder}rctd/results_weights_59clusters_2128genes_6156genes.csv',
                   index_col='spot')
RCTD.columns = [f'q05_spot_factors{i}' for i in RCTD.columns]
# compute cell proportions
RCTD = (RCTD.T / RCTD.sum(1)).T

adata_vis = adata_vis[adata_vis.obs_names.isin(RCTD.index),:]
adata_vis.obs[RCTD.columns] = RCTD.loc[adata_vis.obs_names,:].values

# path for saving figures
fig_path = f'{results_folder}plots/figures/'
sc_figpath = sc.settings.figdir
sc.settings.figdir = f'{fig_path}'

Trying to set attribute `.obs` of view, copying.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


### Running factorisation to identify groups of co-located cell types / tissue compartments <a class="anchor" id="factorisation"></a>

In [4]:
from re import sub
RCTD_copy = RCTD.copy()
RCTD_copy.columns = [sub('q05_spot_factors', '', i) for i in RCTD.loc[adata_vis.obs_names,:].columns]
adata_vis.uns['mod']['post_sample_q05']['spot_factors'] = RCTD_copy[adata_vis.uns['mod']['fact_names']].values
adata_vis.uns['mod']['obs_names'] = list(adata_vis.obs_names)

adata_vis.uns['mod']['post_sample_q05']['spot_factors'].shape, \
adata_vis.uns['mod']['post_sample_means']['spot_factors'].shape, \
len(adata_vis.obs_names)

((4037, 34), (4039, 34), 4037)

In [5]:
from cell2location import run_colocation
res_dict, sp_data = run_colocation(
                   adata_vis, model_name='CoLocatedGroupsSklearnNMF',
                   verbose=False, return_all=True,
    
                   train_args={'n_fact': np.arange(11, 16), 'n_iter': 20000,
                    'sample_name_col': 'sample',
                    'mode': 'normal', 'n_type': 'restart', 'n_restarts': 5},
    
                   model_kwargs={'init': 'random', 'random_state': 0, 'nmf_kwd_args': {'tol': 0.0001}},
    
                   posterior_args={},
                   export_args={'path': results_folder+'rctd/CoLocatedComb_2128genes_6156genes/',
                                'run_name_suffix': 'tol0.0001'})

Variable names are not unique. To make them unique, call `.var_names_make_unique`.


### Analysis name: CoLocatedGroupsSklearnNMF_11combinations_4037locations_34factorstol0.0001


Trying to set attribute `.obs` of view, copying.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
... storing 'sample' as categorical


### Analysis name: CoLocatedGroupsSklearnNMF_12combinations_4037locations_34factorstol0.0001


Trying to set attribute `.obs` of view, copying.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
... storing 'sample' as categorical


### Analysis name: CoLocatedGroupsSklearnNMF_13combinations_4037locations_34factorstol0.0001


Trying to set attribute `.obs` of view, copying.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
... storing 'sample' as categorical


### Analysis name: CoLocatedGroupsSklearnNMF_14combinations_4037locations_34factorstol0.0001


Trying to set attribute `.obs` of view, copying.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
... storing 'sample' as categorical


### Analysis name: CoLocatedGroupsSklearnNMF_15combinations_4037locations_34factorstol0.0001


Trying to set attribute `.obs` of view, copying.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
... storing 'sample' as categorical




... storing 'sample' as categorical


### Plot locations of all cell types <a class="anchor" id="Fig3E"></a> 

In [7]:
# making copy to transform to assign nice names
clust_names_orig = ['q05_spot_factors' + i for i in adata_vis.uns['mod']['fact_names']]
clust_names = adata_vis.uns['mod']['fact_names']
adata_vis.obs[clust_names] = (adata_vis.obs[clust_names_orig])

clust_names

array(['B_Cycling', 'B_GC_DZ', 'B_GC_LZ', 'B_GC_prePB', 'B_IFN',
       'B_activated', 'B_mem', 'B_naive', 'B_plasma', 'B_preGC',
       'DC_CCR7+', 'DC_cDC1', 'DC_cDC2', 'DC_pDC', 'Endo', 'FDC', 'ILC',
       'Macrophages_M1', 'Macrophages_M2', 'Mast', 'Monocytes', 'NK',
       'NKT', 'T_CD4+', 'T_CD4+_TfH', 'T_CD4+_TfH_GC', 'T_CD4+_naive',
       'T_CD8+_CD161+', 'T_CD8+_cytotoxic', 'T_CD8+_naive', 'T_TIM3+',
       'T_TfR', 'T_Treg', 'VSMC'], dtype=object)

In [9]:
ct_list=clust_names.copy()

fig_path = f'{results_folder}plots/figures/'
sc_figpath = sc.settings.figdir
sc.settings.figdir = f'{fig_path}'
with matplotlib.rc_context({'axes.facecolor':  'black',
                            'figure.figsize': [10, 10]}):
                
    sc.pl.spatial(adata_vis, cmap='magma',
                    color=ct_list, ncols=5, show=False,
                    #library_id=s,
                    size=1.3, img_key='hires', alpha_img=0.9, alpha=1,
                    frameon=True, legend_fontsize=50,
                    vmin=0, vmax='p98', save=f'all_maps_dot_2128genes_6156genes.png'
                 )

... storing 'sample' as categorical


