In [None]:
import scanpy as sc
import anndata as ad
import squidpy as sq
import os

import numpy as np
import pandas as pd

sc.logging.print_header()
print(f"squidpy=={sq.__version__}")

In [None]:
sc.settings.set_figure_params(dpi=150)
import skimage.io as io

In [None]:
path_to_visium = '/lustre/scratch126/cellgen/team292/vl6/VISIUM/'

In [None]:
# Add metadata 
meta_dict = {'sample' : ['HCA_F_RepTsp13147665', 'HCA_F_RepTsp13147666', 'HCA_F_RepTsp13173760', 'HCA_F_RepTsp13173761', 
                       'HCA_F_RepTsp13173762', 'HCA_F_RepTsp13173763', 'HCA_F_RepTsp13219892', 'HCA_F_RepTsp13219893', 
                        'HCA_F_RepTsp13219894', 'HCA_F_RepTsp13219895', 'HCA_F_RepTsp13447718', 'HCA_F_RepTsp13447719',
                        'HCA_F_RepTsp13447720', 'HCA_F_RepTsp13447721',  'HCA_F_RepTsp13902013', 'HCA_F_RepTsp13902014', 'HCA_F_RepTsp13902015', 
                        'HCA_F_RepTsp13902016', 'HCA_F_RepTsp13902018', 'HCA_F_RepTsp13902017', 
                        'HCA_F_RepTsp13902019', 'HCA_F_RepTsp13902020'],
            'sangerID' : ['Hrv58-GON-0-FO-1-s84', 'Hrv58-GON-0-FO-1-s80', 'HRV183-RPT-0-FO-1-S14_and_HRV183-RPT-0-FO-1-S12', 
                         'HRV183-RPT-0-FO-1-S9', 'HRV162-RPT-0-FO-1-S176_and_HRV183-RPT-0-FO-1-S26', 'HRV162-RPT-0-FO-1-S161_and_HRV162-RPT-0-FO-1-S162', 
                         'Hrv117-GON-0-FO-1-S109', 'HRV184-RPT-0-FO-2-S34_and_HRV184-RPT-0-FO-2-S31_and_Hrv117-GON-0-FO-1-S116', 
                         'HRV129-GON-0-FO-1-S42', 'HRV184-RPT-0-FO-1-S26', 'Hrv125-GON-0-FO-5-S65-A1', 
                         'Hrv125-GON-0-FO-5-S66-B1', 'Hrv125-GON-0-FO-5-S18-C1', 'Hrv125-GON-0-FO-5-S17-D1', 'Hrv238-RPT-0-FO-1-S39', 'Hrv238-RPT-0-FO-1-S24',
                          'Hrv214-RPT-0-FO-1-S58', 'Hrv214-RPT-0-FO-1-S57', 'Hrv245-RPT-9-FO-1-S13', 
                         'A70-RPT-9-FO-1-S39', 'A70-RPT-9-FO-1-S40', 'A70-RPT-9-FO-1-S41'],
            'slide' : ['V12F14-072', 'V12F14-072', 'V12F14-073', 'V12F14-073', 'V12F14-073', 'V12F14-073', 
                      'V12F14-077', 'V12F14-077', 'V12F14-077', 'V12F14-077', 'V12Y31-055', 'V12Y31-055', 
                      'V12Y31-055', 'V12Y31-055', 'V42L11-100', 'V42L11-100', 'V42L18-060', 'V42L18-060', 
                      'V42L13-373', 'V42L13-373', 'V43J19-097', 'V43J19-097'], 
            'position' : ['A1', 'D1', 'A1', 'B1', 'C1', 'D1', 'A1', 'B1', 'C1', 'D1', 'A1', 'B1', 'C1', 'D1', 
                         'A1', 'D1', 'A1', 'D1', 'A1', 'D1', 'A1', 'D1'], 
#            'image_name' : ['V12F14-072_A1.tif', 'V12F14-072_D1.tif', 'V12F14-073_A1.tif', 'V12F14-073_B1.tif', 'V12F14-073_C1.tif', 'V12F14-073_D1.tif', 
#                           'V12F14-077_A1.tif', 'V12F14-077_B1.tif', 'V12F14-077_C1.tif', 'V12F14-077_D1.tif', 'V12Y31-055_A1.tiff',
#                           'V12Y31-055_B1.tiff', 'V12Y31-055_C1.tiff', 'V12Y31-055_D1.tiff'], 
            'donor' : ['Hrv58', 'Hrv58',  'Hrv183', 'Hrv183', 'Hrv162_and_Hrv183', 'Hrv162', 'Hrv117', 'Hrv184_and_Hrv117', 'Hrv129', 'Hrv184',
                      'Hrv125', 'Hrv125', 'Hrv125', 'Hrv125', 'Hrv238', 'Hrv238', 'Hrv214', 'Hrv214', 'Hrv245', 
                      'A70', 'A70', 'A70'], 
            'stage(pcw)' : ['18', '18', '15', '15', '15_and_21', '21', '20', '15_and_20', '17', '15', 
                           '21','21','21','21', '17', '17', '15', '15', '15', '37YO', '37YO', '37YO'], 
            'sex' : ['female', 'female', 'female', 'female', 'female', 'female', 'male', 'male', 'male', 'male',
                    'female', 'female', 'female', 'female',  'female', 'female', 'female', 'female', 'female', 
                    'female', 'female', 'female'], 
           'sectioning' : ['coronal', 'coronal', 'coronal', 'coronal', 'coronal_and_transverse', 'transverse', 'transverse', 'transverse_and_coronal', 
                          'transverse', 'transverse', 'coronal', 'coronal', 'sagittal', 'sagittal', 
                          'coronal', 
                          'coronal', 'sagittal', 'sagittal', 'sagittal', 'sagittal', 'sagittal', 'sagittal'], 
            'permeabilisation(min)' : [30, 30, 30, 30, 30, 30, 30, 30, 30, 30,  30, 30, 30, 30, 30, 30, 30, 30, 30,
                                      30, 30, 30],
           'month_processing' : ['august', 'august', 'september', 'september', 'september', 'september', 'october', 'october', 'october', 'october', 
                                'january', 'january','january','january', 'july', 'july', 'july', 'july', 'july',
                                'july', 'july', 'july' ], 
           'spaceranger' : ['2.0.0', '2.0.0', '2.0.0', '2.0.0', '2.0.0', '2.0.0', '2.0.0', '2.0.0', 
                            '2.0.0', '2.0.0', '2.0.0', '2.0.0', '2.0.0', '2.0.0', '2.1.0', '2.1.0', '2.1.0', '2.1.0', 
                           '2.1.0', '2.1.0', '2.1.0', 
                           '2.1.0']}


meta = pd.DataFrame.from_dict(meta_dict)
meta = meta.set_index('sample')
meta


In [None]:
meta = pd.DataFrame.from_dict(meta_dict)
meta = meta.set_index('sample')
meta.head()

In [None]:
import anndata

In [None]:
samples =  ['HCA_F_RepTsp13902017', 'HCA_F_RepTsp13902019', 'HCA_F_RepTsp13902020']
holder = []
for s in samples: 
    adata = sc.read(path_to_visium + s + '/' + s + '_annotated.h5ad')
    adata.var_names_make_unique()
    # Select only spots corresponding to tissue
    adata.obs['sample'] = s
    adata.obs["sample"] = [str(i) for i in adata.obs['sample']]
    adata.obs_names = adata.obs["sample"] \
                          + '_' + adata.obs_names
    adata = adata[[i == 1.0 for i in adata.obs['in_tissue']]]
    print(adata.shape)
    holder.append(adata)

In [None]:
sc.set_figure_params(scanpy=True, dpi=80, dpi_save=150, frameon=True, vector_friendly=True, fontsize=14, 
                     figsize=[6,6], color_map=None, format='pdf', facecolor=None, transparent=False)

In [None]:
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42

In [None]:
sc.pl.spatial(holder[0],
             save = '_fimbria_h&e')

In [None]:
sc.pl.spatial(holder[0], color="histology_annots", palette = ['gold', 'darkorange'], 
             save = '_fimbria_hist')


In [None]:
holder[0].raw = holder[0].copy()
sc.pp.normalize_total(holder[0], inplace=True)
sc.pp.log1p(holder[0])

In [None]:
sc.pl.spatial(holder[0], color=["CRTAC1"], cmap = 'jet', use_raw = False, # limit color scale at 99.2% quantile of cell abundance
                  vmin=0, vmax='p99.7', size = 1.2,
             save = '_fimbria_CRTAC1')

In [None]:
sc.pl.spatial(holder[0], color="PNOC", cmap = 'jet', use_raw = False, 
              vmin=0, vmax='p99.7', size = 1.2,
             save = '_fimbria_PNOC')

In [None]:
sc.pl.spatial(holder[0], color="MUC6", cmap = 'jet', use_raw = False, 
              vmin=0, vmax='p99.7', size = 1.2,
             save = '_fimbria_MUC6')

In [None]:
sc.pl.spatial(holder[0], color=["WDR72"], cmap = 'jet', use_raw = False, # limit color scale at 99.2% quantile of cell abundance
                  vmin=0, vmax='p99.7', size = 1.2,
             save = '_fimbria_WDR72')

In [None]:
sc.pl.spatial(holder[0], color=["KCNN4"], cmap = 'jet', use_raw = False, # limit color scale at 99.2% quantile of cell abundance
                  vmin=0, vmax='p99.7', size = 1.2,
             save = '_fimbria_KCNN4')

In [None]:
sc.pl.spatial(holder[0], color=["GRAMD2A"], cmap = 'jet', use_raw = False, # limit color scale at 99.2% quantile of cell abundance
                  vmin=0, vmax='p99.7', size = 1.2,
             save = '_fimbria_GRAMD2A')

In [None]:
holder[1].raw = holder[1].copy()
sc.pp.normalize_total(holder[1], inplace=True)
sc.pp.log1p(holder[1])

In [None]:
sc.pl.spatial(holder[1], color="CRTAC1", cmap = 'jet', use_raw = False,
              vmin=0, vmax='p99.7', size = 1.2,
             save = '_ampulla_CRTAC1')

In [None]:
sc.pl.spatial(holder[1], color="PNOC", cmap = 'jet', use_raw = False, 
              vmin=0, vmax='p99.7', size = 1.2,
             save = '_ampulla_PNOC')

In [None]:
sc.pl.spatial(holder[1], color="MUC6", cmap = 'jet', use_raw = False, 
              vmin=0, vmax='p99.7', size = 1.2,
             save = '_ampulla_MUC6')

In [None]:
sc.pl.spatial(holder[1], color=["WDR72"], cmap = 'jet', use_raw = False, # limit color scale at 99.2% quantile of cell abundance
                  vmin=0, vmax='p99.7', size = 1.2,
             save = '_ampulla_WDR72')

In [None]:
sc.pl.spatial(holder[1], color=["KCNN4"], cmap = 'jet', use_raw = False, # limit color scale at 99.2% quantile of cell abundance
                  vmin=0, vmax='p99.7', size = 1.2,
             save = '_ampulla_KCNN4')

In [None]:
sc.pl.spatial(holder[1], color=["GRAMD2A"], cmap = 'jet', use_raw = False, # limit color scale at 99.2% quantile of cell abundance
                  vmin=0, vmax='p99.7', size = 1.2,
             save = '_ampulla_GRAMD2A')

In [None]:
sc.pl.spatial(holder[1],
             save = '_ampulla_h&e')

In [None]:
sc.pl.spatial(holder[1], color="histology_annots", palette = ['gold', 'darkorange', 'forestgreen'], 
             save = '_ampulla_hist')

In [None]:
holder[2].raw = holder[2].copy()
sc.pp.normalize_total(holder[2], inplace=True)
sc.pp.log1p(holder[2])

In [None]:
sc.pl.spatial(holder[2], color="CRTAC1", cmap = 'jet', use_raw = False, 
              vmin=0, vmax='p99.7', size = 1.2,
             save = '_isthmus_CRTAC1')

In [None]:
sc.pl.spatial(holder[2], color="PNOC", cmap = 'jet', use_raw = False, 
              vmin=0, vmax='p99.7', size = 1.2,
             save = '_isthmus_PNOC')

In [None]:
sc.pl.spatial(holder[2], color="MUC6", cmap = 'jet', use_raw = False, 
              vmin=0, vmax='p99.7', size = 1.2,
             save = '_isthmus_MUC6')

In [None]:
sc.pl.spatial(holder[2], color=["WDR72"], cmap = 'jet', use_raw = False, # limit color scale at 99.2% quantile of cell abundance
                  vmin=0, vmax='p99.7', size = 1.2,
             save = '_isthmus_WDR72')

In [None]:
sc.pl.spatial(holder[2], color=["KCNN4"], cmap = 'jet', use_raw = False, # limit color scale at 99.2% quantile of cell abundance
                  vmin=0, vmax='p99.7', size = 1.2,
             save = '_isthmus_KCNN4')

In [None]:
sc.pl.spatial(holder[2], color=["GRAMD2A"], cmap = 'jet', use_raw = False, # limit color scale at 99.2% quantile of cell abundance
                  vmin=0, vmax='p99.7', size = 1.2,
             save = '_isthmus_GRAMD2A')

In [None]:
sc.pl.spatial(holder[2],
             save = '_isthmus_h&e')

In [None]:
sc.pl.spatial(holder[2], color="histology_annots", palette = ['gold', 'darkorange', 'forestgreen'], 
             save = '_isthmus_hist')

In [None]:
holder[2].raw.X[20:25, 20:25].toarray()

In [None]:
import anndata
holder[0] = anndata.AnnData(X = holder[0].raw.X, var = holder[0].raw.var, obs = holder[0].obs)
holder[1] = anndata.AnnData(X = holder[1].raw.X, var = holder[1].raw.var, obs = holder[1].obs)
holder[2] = anndata.AnnData(X = holder[2].raw.X, var = holder[2].raw.var, obs = holder[2].obs)

In [None]:
adata_raw = holder[0].concatenate(holder[1:], join='outer', index_unique=None)
adata_raw.X = adata_raw.X.tocsr()
adata_raw

In [None]:
for col in meta.columns:
    adata_raw.obs[col] = adata_raw.obs['sample'].map(meta[col].to_dict())

In [None]:
adata_raw.obs['histology_annots'].value_counts(dropna = False)

In [None]:
adata_raw.obs['histology_annots'] = adata_raw.obs['histology_annots'].astype(str)

In [None]:
adata_raw.obs['histology_annots'].value_counts(dropna = False)

In [None]:
adata_raw = adata_raw[[i not in ['Background', 'nan'] for i in adata_raw.obs['histology_annots']]]

In [None]:
adata_raw.obs['histology_annots'].value_counts(dropna = False)

In [None]:
adata = adata_raw.copy()

In [None]:
adata

In [None]:
adata.obs['sample'].value_counts()

In [None]:
adata.X[30:40,30:40].toarray()

In [None]:
sc.pp.filter_genes(adata, min_cells=10)

In [None]:
adata.shape

In [None]:
adata.raw = adata.copy()

In [None]:
sc.pp.normalize_total(adata, inplace=True)
sc.pp.log1p(adata)

In [None]:
sc.pp.highly_variable_genes(adata, flavor="seurat", batch_key = "sample", n_top_genes=3000)

In [None]:
adata.shape

In [None]:
sc.pp.pca(adata)
sc.pp.neighbors(adata)

In [None]:
sc.tl.umap(adata)

In [None]:
sc.tl.leiden(adata, resolution = 0.6)

In [None]:
sc.pl.umap(adata, color = 'leiden')

In [None]:
sc.pl.umap(adata, color = [ 'histology_annots', 'sample','NRXN1', 'SLITRK2', 
                           'GATA6','GALNT17','PNOC',  'MMP28','NTRK3',  'DLGAP1', 'RSPO1', 
                           'ERP27','ALDH1A2',
    'WT1', 
    'TMEM45B', 'LYPD1', 'CRTAC1',], 
          color_map = 'OrRd', use_raw = False, ncols = 2)

In [None]:
adata.obs['histology_annots'].value_counts()

In [None]:
sc.pl.umap(adata, color = 'leiden', legend_loc = 'on data')

In [None]:
adata = adata[[i in ['7', '0', '1'] for i in adata.obs['leiden']]]

In [None]:
adata = adata[[i == 'Epithelium' for i in adata.obs['histology_annots']]]

In [None]:
adata.shape

In [None]:
sc.pl.umap(adata, color = 'sample')

### Epithelium only

In [None]:
adata = anndata.AnnData(X = adata.raw.X, obs = adata.obs, var = adata.raw.var)
adata.shape

In [None]:
sc.pp.filter_genes(adata, min_cells=10)

In [None]:
adata.shape

In [None]:
adata.raw = adata.copy()

In [None]:
sc.pp.normalize_total(adata, inplace=True)
sc.pp.log1p(adata)

In [None]:
sc.pp.highly_variable_genes(adata, flavor="seurat", batch_key = "sample", n_top_genes=3000)

In [None]:
sc.pp.pca(adata)
sc.pp.neighbors(adata)

In [None]:
sc.tl.umap(adata)

In [None]:
sc.pl.umap(adata, color = 'sample')

In [None]:
anat = {'HCA_F_RepTsp13902017' : 'fimbria', 'HCA_F_RepTsp13902019' : 'ampulla', 
       'HCA_F_RepTsp13902020' : 'isthmus'}
adata.obs['anatomy'] = adata.obs['sample'].map(anat)

In [None]:
adata.obs['anatomy'] = adata.obs['anatomy'].astype('category')
adata.obs['anatomy'] = adata.obs['anatomy'].cat.reorder_categories(['fimbria', 
                                                                   'ampulla', 'isthmus'])

In [None]:
sc.set_figure_params(scanpy=True, dpi=80, dpi_save=150, 
                         frameon=True, vector_friendly=True, fontsize=14, figsize=[5,5], color_map=None, 
                         format='pdf', facecolor=None, transparent=False,)
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42

In [None]:
sc.pl.dotplot(
    adata,
    var_names =[
       'EME2', 'ETV4', 'HAPLN3', 'SP5','MUC6', 'KCNN4',  'CHRNA4',  'SHANK1', 'FGF17', 'FGFR3', 'WDR72'
             ], groupby = 'anatomy', standard_scale = 'var',
     use_raw = False, save = '_adult_increasing_epi_FT.pdf'
)

In [None]:
sc.pl.dotplot(
    adata,
    var_names =[
      'PNOC','GATA6', 'RSPO1',  'CRTAC1', 'WT1', 'DLGAP1', 'LYPD1', 'CD109', 'DOK5', 'TTYH1', 'PPP2R2B',  'ALDH1A2', 'MMP28', 
        'APOA1',
        'ERP27'
             ], groupby = 'anatomy', standard_scale = 'var',
     use_raw = False, save = '_adult_decreasing_epi_FT.pdf'
)

In [None]:
sc.pl.umap(adata, color = [
       'NRXN1', 'SLITRK2',   'GATA6','GALNT17','PNOC',  'MMP28','NTRK3',  'DLGAP1', 'RSPO1',  'ERP27','ALDH1A2',
    'WT1', 
    'TMEM45B', 'LYPD1', 'CRTAC1','PTPRT',    'S100A1', 
         
        
      'MUC6', 'KCNN4', 'PADI2', 'RNF43', 'EYA2', 'WDR72',  'GRAMD2A', 'CCDC170', 'RND3', 'TACC2', 'STK33', 'ADGB', 
    'CCN2', 'AFAP1L2', 'TTYH1', 'PTGS1', 'CD109'
             ], use_raw = False, color_map = 'OrRd')

## Summarise gene set with a signature score -- decreasing signature

In [None]:
adata.X[20:25, 20:25].toarray()

In [None]:
decreasing_signature = [ 'PNOC','GATA6', 'RSPO1',  'CRTAC1', 'WT1', 'DLGAP1', 'LYPD1', 'CD109', 'DOK5', 'TTYH1', 'PPP2R2B',  'ALDH1A2', 'MMP28', 
        'APOA1',
        'ERP27'
         ]
len(decreasing_signature)

In [None]:
sc.tl.score_genes(adata, gene_list = decreasing_signature, ctrl_size=50, 
                  gene_pool=None, n_bins=25, score_name='ft_decreasing_score', 
                  random_state=0, copy=False, use_raw=None)

In [None]:
sc.pl.umap(adata, color = 'ft_decreasing_score', color_map = 'OrRd')

In [None]:
adata

In [None]:
sc.pl.violin(adata, keys = ['ft_decreasing_score'], groupby = 'anatomy', palette = ['#5e3c99', 
                                                                                   '#b2abd2', '#f1a1c5'],
            save = '_ft_decreasing_score.pdf')

In [None]:
increasing_signature = [ 'MUC6', 'HAPLN3', 'KCNN4', 'WDR72', 'CHRNA4', 'ETV4', 'SHANK1', 'FGF17', 'FGFR3', 
        'SP5','EME2'
         ]
len(increasing_signature)

In [None]:
sc.tl.score_genes(adata, gene_list = increasing_signature, ctrl_size=50, 
                  gene_pool=None, n_bins=25, score_name='ft_increasing_score', 
                  random_state=0, copy=False, use_raw=None)

In [None]:
sc.pl.umap(adata, color = 'ft_increasing_score', color_map = 'OrRd')

In [None]:
sc.pl.violin(adata, keys = ['ft_increasing_score'], groupby = 'anatomy', palette = ['#5e3c99', 
                                                                                   '#b2abd2', '#f1a1c5'],
            save = '_ft_increasing_score.pdf')

## Test if the signature shows a decreasing pattern from fimbria --> ampulla --> isthmus

In [None]:
order = {'fimbria' : '1', 'ampulla' : '2', 'isthmus' : '3'}

In [None]:
adata.obs['anatomy_order'] = adata.obs['anatomy'].map(order)

In [None]:
adata.obs['anatomy_order']

In [None]:
df = adata.obs[['ft_decreasing_score', 'ft_increasing_score', 'anatomy_order']]

In [None]:
df.head()

In [None]:
import rpy2.rinterface_lib.callbacks
import logging
# Ignore R warning messages
#Note: this can be commented out to get more verbose R output
rpy2.rinterface_lib.callbacks.logger.setLevel(logging.ERROR)
import anndata2ri
anndata2ri.activate()
%load_ext rpy2.ipython

In [None]:
# %%R 
# install.packages("clinfun")

In [None]:
%%R 
library(clinfun)

In [None]:
%%R -i df
# Convert the vector to a factor
factor_groups <- factor(df$anatomy_order, ordered = TRUE, 
                                levels = c("1", "2", "3"))
values <- df$ft_decreasing_score

In [None]:
%%R 
# Perform Jonckheere's trend test
result <- jonckheere.test(values, factor_groups, alternative = c("decreasing"), nperm=2000)
print(result)

### Differential expression analysis with TF-IDF between samples 

In [None]:
import anndata
bdata = anndata.AnnData(X = adata.raw.X, var = adata.raw.var, obs = adata.obs)

In [None]:
bdata

In [None]:
import rpy2.rinterface_lib.callbacks
import logging
# Ignore R warning messages
#Note: this can be commented out to get more verbose R output
rpy2.rinterface_lib.callbacks.logger.setLevel(logging.ERROR)
import anndata2ri
anndata2ri.activate()
%load_ext rpy2.ipython

In [None]:
for col in bdata.obs.columns: 
    if col != 'sample':
        del bdata.obs[col]

In [None]:
%%R -i bdata
bdata

In [None]:
%%R -o mrks

library(SoupX)
counts <- assay(bdata, "X")
colnames(counts) <- colnames(bdata)
rownames(counts) <- rownames(bdata)
mrks = quickMarkers(counts, colData(bdata)$sample, N = 100) # where clusters is the cell type assignment and 100 means I want the top 100 genes per cluster that pass the hypergeometric test

In [None]:
mrks.head()

In [None]:
cluster = mrks[mrks['cluster'] == 'HCA_F_RepTsp13902017']['gene'].to_list()[0:40]
sc.pl.umap(adata, color = cluster, color_map = 'OrRd', ncols = 3, use_raw = False)

In [None]:
cluster = mrks[mrks['cluster'] == 'HCA_F_RepTsp13902019']['gene'].to_list()[0:20]
sc.pl.umap(adata, color = cluster, color_map = 'OrRd', ncols = 3, use_raw = False)

In [None]:
cluster = mrks[mrks['cluster'] == 'HCA_F_RepTsp13902020']['gene'].to_list()[0:15]
sc.pl.umap(adata, color = cluster, color_map = 'OrRd', ncols = 3, use_raw = False)