## Müllerian duct differentiation - epithelium + mesenchyme

In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import scrublet as scr
import seaborn as sns
import scipy.stats
import anndata
import os

import scipy as scipy
import scipy as sp
import pickle as pkl
import matplotlib.pyplot as plt
import re
from collections import defaultdict
from statsmodels.nonparametric.smoothers_lowess import lowess
from numpy import asarray as ar
from collections import Counter
import networkx as nx
import igraph
import glob

In [None]:
mese = sc.read('/nfs/team292/vl6/FetalReproductiveTract/mullerian_mese_late_post10pcw.h5ad')
print(mese.shape)
epi = sc.read('/nfs/team292/vl6/FetalReproductiveTract/mullerian_epi_late_post10pcw.h5ad')
print(epi.shape)

In [None]:
# Reset to raw 
import anndata 
mese_raw = anndata.AnnData(X = mese.raw.X, var = mese.raw.var, obs = mese.obs)
epi_raw = anndata.AnnData(X = epi.raw.X, var = epi.raw.var, obs = epi.obs)
print(mese_raw.shape, epi_raw.shape)

In [None]:
adata = mese_raw.concatenate(epi_raw, join='outer',index_unique=None)
adata.shape

In [None]:
## import reproductive tract utils functions
import sys
cwd = '/nfs/team292/vl6/Experiments/Utils/'
sys.path.append(cwd)

import reptract_utils
import reptract_genes

In [None]:
adata.raw = adata.copy()
sc.pp.filter_genes(adata, min_counts=10)
# adata = reptract_utils.per_gene_analysis(adata)
adata = reptract_utils.normalize_log_transform(adata)
adata = reptract_utils.hvgs_pca_umap(adata)

In [None]:
sc.pl.umap(adata, color=['stage_pcw', 'sex', 
                         'n_genes', 'donor',  'doublet_scores',
                        'celltype'], ncols = 2, wspace = 0.3, 
          color_map = 'OrRd')

In [None]:
sc.pl.umap(adata, color = [ 'FOXL2','ITGA4', 'LGR5', 'TMEM176B', 'HOXA10', 'PTGER3', 'ACTA2', 'DES', 'MYH11',
                          'HOXA13', 'ISL1', 'TMEM163', 'MMP28', 'CTNNB1',  'WNT4', 'CDH7', 'ATF3', 'HMGA2', 
                          'KRT18', 'HOXA11', 'HOXC10', 'RXFP2', 'PAX8', 'PAX2', 'WT1', 'PNOC', 'LYPD1', 'COMP', 
                          'CALY', 'GDF10', 'SFRP2', 'ASPN', 'PTGFR', 'MOXD1', 'PRELP', 
                          'PCSK2', 'AR', 'BRINP1', 'CNTN1', 'LUZP2', 'GRIA4'], wspace = 0.15, use_raw = False,
           color_map = 'OrRd', ncols = 2)

### Harmony integration

In [None]:
sc.set_figure_params(scanpy=True, dpi=80, dpi_save=150, frameon=True, vector_friendly=True, 
                         fontsize=14, figsize=(6,6), color_map=None, format='pdf', facecolor=None, transparent=False)

In [None]:
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42

In [None]:
import rpy2.rinterface_lib.callbacks
import logging
# Ignore R warning messages
#Note: this can be commented out to get more verbose R output
rpy2.rinterface_lib.callbacks.logger.setLevel(logging.ERROR)
import anndata2ri
anndata2ri.activate()
%load_ext rpy2.ipython

In [None]:
# Extract PCA matrix and batch array
n_pcs = 25 # Principal components used to perform dimensionality reduction 
pca = adata.obsm['X_pca'][:, 0:(n_pcs)]
batch = adata.obs['donor'] # Batch ID, in this case stage

In [None]:
%%R -i pca -i batch -o hem

library(harmony)
library(magrittr)
set.seed(1000)
hem <- HarmonyMatrix(pca, batch, theta=0, lambda = 6, do_pca=FALSE, verbose = FALSE)  
hem = data.frame(hem)

In [None]:
# Add harmony values to the anndata object --> overwriting PCA 
adata.obsm['X_pca_harmony'] = hem.values 

In [None]:
sc.pp.neighbors(adata, n_pcs = n_pcs, use_rep = 'X_pca_harmony', random_state= 123, key_added = 'harmony')


In [None]:
sc.tl.umap(adata, min_dist=0.6, spread = 0.8, neighbors_key = 'harmony')


In [None]:
sc.pl.umap(adata, color = [ 'FOXL2','ITGA4', 'LGR5', 'TMEM176B','HOXA10', 'ESR1', 'PGR', 
                          'HOXA13', 'MYH11', 'PTGER3', 'CDH7', 'HOXC10', 'UPK3B', 'SP7'], wspace = 0.15, use_raw = False,
           color_map = 'OrRd', ncols = 2)

In [None]:
sc.pl.umap(adata, color=['stage_pcw', 'sex', 
                         'n_genes', 'donor',  'doublet_scores', 'celltype'], ncols = 2, wspace = 0.3, 
          color_map = 'OrRd')

In [None]:
sc.pl.umap(adata, color = [ 'mese_mullerian_lowres', 'epi_mullerian_lowres'], wspace = 0.15, use_raw = False,
           color_map = 'OrRd', ncols = 2)

In [None]:
adata.obs['mese_mullerian_lowres'] = adata.obs['mese_mullerian_lowres'].astype(str)

In [None]:
adata.obs['mullerian_lowres'] = np.where(adata.obs['mese_mullerian_lowres'] == 'nan',
                                         adata.obs['epi_mullerian_lowres'], adata.obs['mese_mullerian_lowres'])

In [None]:
sc.pl.umap(adata, color = [ 'mullerian_lowres'], wspace = 0.15, use_raw = False,
           color_map = 'OrRd', ncols = 2)

In [None]:
adata.obs['mullerian_lowres'] = adata.obs['mullerian_lowres'].astype('category')
adata.obs['mullerian_lowres'] = adata.obs['mullerian_lowres'].cat.reorder_categories([
    'FallopianTube_Epithelium', 'Uterus/Cervix_Epithelium', 'MüllerianVagina_Epithelium', 
    'FallopianTube_Mesenchyme', 'Uterus_Mesenchyme', 'Cervix_Mesenchyme', 'MüllerianVagina_Mesenchyme'
])

In [None]:
sc.pl.umap(adata, color = 'mullerian_lowres', 
          palette = ['plum', 'peachpuff', 'lightgreen', 'mediumorchid', 'gold', 'darkorange', 'forestgreen'], 
          save = '_mullerian_lowres')

In [None]:
adata.write('/nfs/team292/vl6/FetalReproductiveTract/mullerian_mese_epi_late_post10pcw.h5ad')

In [None]:
adata = sc.read('/nfs/team292/vl6/FetalReproductiveTract/mullerian_mese_epi_late_post10pcw.h5ad')

In [None]:
sc.pl.umap(adata, color = 'mullerian_lowres', 
          palette = ['plum', 'peachpuff', 'lightgreen', 'mediumorchid', 'gold', 'darkorange', 'forestgreen'])

In [None]:
# Make dotplot for supplementary note 
markers = {
    'Epithelium' : ['EMX2', 'WNT7A', 'SOX17', 'ERP27', 'DAPL1', 'PART1', 'LYPD1', 'PNOC', 
                  'LGR5', 'DLX5', 'UCA1', 'MSX1', 'MSX2', 'CDH2', 'ALDH1A1', 'AXIN2', 
                   'TP63', 'KRT5'], 
    'Mesenchyme' : ['EMX2', 'LEPR', 'FOXL2', 'ITGBL1', 'CD36',  'TSPAN8','HOXA5', 'HOXC5', 'HOXC6', 'HOXA7',
                   'HOXA10', 'HOXA11', 'ITGA4', 'RORB', 'HOXA13', 'SRD5A2', 'AR']
}
sc.pl.dotplot(adata[[i in ['FallopianTube_Epithelium', 'Uterus/Cervix_Epithelium', 
                           'MüllerianVagina_Epithelium'] for i in adata.obs['mullerian_lowres']]], var_names = ['EMX2', 'WNT7A', 'SOX17', 'ERP27', 'DAPL1', 'PART1', 'LYPD1', 'PNOC', 
                    'LGR5', 'DLX5', 'UCA1', 'MSX1', 'MSX2', 'CDH2', 'ALDH1A1', 'AXIN2', 
                   'TP63', 'KRT5'], groupby = 'mullerian_lowres',
             standard_scale = 'var', color_map = 'OrRd', save = '_mull_epi_post10pcw_suppnote')

sc.pl.dotplot(adata[[i in ['FallopianTube_Mesenchyme', 'Uterus_Mesenchyme', 
                           'Cervix_Mesenchyme', 'MüllerianVagina_Mesenchyme'] for i in adata.obs['mullerian_lowres']]], var_names = [
    'EMX2', 'LEPR', 'FOXL2', 'ITGBL1', 'CD36',  'TSPAN8','HOXA5', 'HOXC5', 'HOXC6', 'HOXA7',
                   'HOXA10', 'HOXA11', 'ITGA4', 'RORB', 'HOXA13', 'SRD5A2', 'AR'
], groupby = 'mullerian_lowres',
             standard_scale = 'var', color_map = 'OrRd', save = '_mull_mese_post10pcw_suppnote')

In [None]:
adata

## ATAC visualisations - MESENCHYME

In [None]:
outDir = '/lustre/scratch126/cellgen/team292/vl6/pycistopic/mullerian_mese_withvagina_post9pcw/'

In [None]:
atac_annots = pd.read_csv(outDir + 'mull_mese_embedding.csv', index_col = 0)
atac_annots.head()

In [None]:
# Create fake matrix 
fake_matrix = np.zeros([6513, 20000])
fake_matrix.shape

In [None]:
fake_vars = pd.DataFrame({'n_genes' : [5] * 20000})

In [None]:
import anndata 
adata = anndata.AnnData(X = fake_matrix, var = fake_vars, obs = atac_annots)

In [None]:
adata.obsm['X_umap'] = atac_annots[['tsne1', 'tsne2']].to_numpy()

In [None]:
sc.set_figure_params(scanpy=True, dpi=80, dpi_save=150, 
                         frameon=True, vector_friendly=True, fontsize=14, figsize=[7,7], color_map=None, 
                         format='pdf', facecolor=None, transparent=False)

In [None]:
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42

In [None]:
sc.pl.umap(adata, color = 'stage', cmap = 'viridis', save = '_atac_mull_mese_stage')

In [None]:
color_dict = {'FallopianTubeMese': 'orange', 'UterusMese': 'orangered', 'CervixMese': 'palevioletred', 'UpperVaginaMese': 'lightpink'}

In [None]:
sc.pl.umap(adata, color = 'mese_mullerian_lowres', 
           palette = color_dict, save = '_atac_mull_mese_celltype')

## ATAC visualisations - MESENCHYME

In [None]:
outDir2 = '/lustre/scratch126/cellgen/team292/vl6/pycistopic/mullerian_epi/'

In [None]:
atac_annots = pd.read_csv(outDir2 + 'mull_epi_embedding.csv', index_col = 0)
atac_annots.head()

In [None]:
atac_annots.shape

In [None]:
# Create fake matrix 
fake_matrix = np.zeros([2345, 20000])
fake_matrix.shape

In [None]:
fake_vars = pd.DataFrame({'n_genes' : [5] * 20000})

In [None]:
import anndata 
adata = anndata.AnnData(X = fake_matrix, var = fake_vars, obs = atac_annots)

In [None]:
adata.obsm['X_umap'] = atac_annots[['tsne1', 'tsne2']].to_numpy()

In [None]:
sc.pl.umap(adata, color = 'stage', cmap = 'viridis', save = '_atac_mull_epi_stage')

In [None]:
color_dict = {'FallopianTubeEpi': 'goldenrod',
 'UterusCervixEpi': 'indianred',
 'UpperVaginaEpi': 'mediumorchid'}

In [None]:
sc.pl.umap(adata, color = 'epi_mullerian_lowres', 
           palette = color_dict, save = '_atac_mull_epi_celltype')