In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt
import os
import sys
import scipy


def MovePlots(plotpattern, subplotdir):
    os.system('mkdir -p '+str(sc.settings.figdir)+'/'+subplotdir)
    os.system('mv '+str(sc.settings.figdir)+'/*'+plotpattern+'** '+str(sc.settings.figdir)+'/'+subplotdir)


sc.settings.verbosity = 3  # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.settings.figdir = './genital_tubercle_figures_humans/'
sc.logging.print_versions()
sc.settings.set_figure_params(dpi=80)  # low dpi (dots per inch) yields small inline figures

sys.executable

In [None]:
sc.set_figure_params(scanpy=True, dpi=80, dpi_save=150, 
                         frameon=True, vector_friendly=True, fontsize=14, figsize=[7,7], color_map=None, 
                         format='pdf', facecolor=None, transparent=False)

In [None]:
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42

In [None]:
path_to_data = '/nfs/team292/vl6/FetalReproductiveTract/'

## Human

In [None]:
human = sc.read(path_to_data + "human_genital_tubercle_mpw_orthologs.h5ad")
human

In [None]:
import anndata
human.X = human.layers['raw_counts']

In [None]:
human.X[20:30, 20:30].toarray()

In [None]:
human.raw = human.copy()

In [None]:
human = anndata.AnnData(X = human.raw.X, var = human.raw.var, obs = human.obs)

In [None]:
human.raw = human.copy()

In [None]:
human.layers["raw_counts"] = human.X.copy()

In [None]:
## import reproductive tract utils functions
cwd = '/nfs/team292/vl6/Experiments/Utils'
sys.path.append(cwd)

import reptract_utils
import reptract_genes

In [None]:
# Normalize and log-transform 
reptract_utils.normalize_log_transform(human)
human.layers["log_normalised_counts"] = human.X.copy()

In [None]:
sc.tl.pca(human)

In [None]:
sc.pl.pca_variance_ratio(human, n_pcs=50, log=True)

In [None]:
sc.pp.neighbors(human, n_pcs = 25)

In [None]:
sc.tl.umap(human)

In [None]:
sc.pl.umap(human, color = ['phase', 'stage_pcw', 'sex', 'donor',
                          'genitaltubercle_celltype'], ncols = 2)

In [None]:
# Harmony integration
import rpy2.rinterface_lib.callbacks
import logging
# Ignore R warning messages
#Note: this can be commented out to get more verbose R output
rpy2.rinterface_lib.callbacks.logger.setLevel(logging.ERROR)
import anndata2ri
anndata2ri.activate()
%load_ext rpy2.ipython

In [None]:
# Extract PCA matrix and batch array
n_pcs = 25 # Principal components used to perform dimensionality reduction 
pca = human.obsm['X_pca'][:, 0:(n_pcs)]
batch = human.obs['donor'] # Batch ID, in this case stage

In [None]:
%%R -i pca -i batch -o hem

library(harmony)
library(magrittr)
set.seed(1000)
hem <- HarmonyMatrix(pca, batch, theta=0, lambda = 6, do_pca=FALSE, verbose = FALSE)  
hem = data.frame(hem)

In [None]:
# Add harmony values to the anndata object --> overwriting PCA 
human.obsm['X_pca_harmony'] = hem.values 

In [None]:
sc.pp.neighbors(human, n_pcs = n_pcs, use_rep = 'X_pca_harmony', random_state= 123)
sc.tl.umap(human, min_dist=0.6, spread = 0.8)

In [None]:
sc.pl.umap(human, color = ['phase', 'stage_pcw', 'sex', 'donor',
                          'genitaltubercle_celltype'], ncols = 2)

In [None]:
human.obs['human_celltype'] = human.obs['genitaltubercle_celltype']
human.obs['species'] = 'human'

In [None]:
human.write(path_to_data + "human_genital_tubercle_mpw_orthologs_with_embedding.h5ad")

In [None]:
del human.obsp

In [None]:
del human.varm

In [None]:
del human.uns

In [None]:
for c in human.obs.columns:
    if c not in ['human_celltype', 'species', 'sample', 'donor', 'stage_pcw', 'sex']:
        del human.obs[c]

In [None]:
human.obs['celltype'] = human.obs['human_celltype']

In [None]:
human.obs['celltype'].value_counts()

In [None]:
%%R -i human 
human

In [None]:
%%R -i path_to_data
saveRDS(human, paste0(path_to_data, "human_genital_tubercle_mpw_orthologs.rds"))

## Mouse

In [None]:
mouse = sc.read(path_to_data + "mouse_genital_tubercle_mpw_orthologs.h5ad")
mouse

In [None]:
import anndata
mouse.X = mouse.layers['raw_counts']

In [None]:
mouse.X[20:30, 20:30].toarray()

In [None]:
mouse.raw = mouse.copy()

In [None]:
mouse = anndata.AnnData(X = mouse.raw.X, var = mouse.raw.var, obs = mouse.obs)

In [None]:
mouse.raw = mouse.copy()

In [None]:
mouse.layers["raw_counts"] = mouse.X.copy()

In [None]:
# Normalize and log-transform 
reptract_utils.normalize_log_transform(mouse)
mouse.layers["log_normalised_counts"] = mouse.X.copy()

In [None]:
sc.tl.pca(mouse)

In [None]:
sc.pl.pca_variance_ratio(mouse, n_pcs=50, log=True)

In [None]:
sc.pp.neighbors(mouse, n_pcs = 25)

In [None]:
sc.tl.umap(mouse)

In [None]:
sc.pl.umap(mouse, color = ['phase', 'age', 'donor',
                          'harmonised_celltype'], ncols = 2)

In [None]:
sc.pl.umap(mouse, color = ['Foxa1', 'Hoxa13', 'Nr2f2', 'Krt10', 'Isl1', 'Krt14', 
                            'Foxf1', 'Foxf2', 'Sox9', 'Lgr5', 
                          'Wif1', 'Shh', 'Dlx5', 'Wnt2', 'Rdh10', 'Six1', 'Igf1', 'Irx1', 
                          'Irx5', 'Inhba', 'Gap43', 'Shox2', 'Cpa6',  'Sall1', 'Foxl2', 
                          'Tcf21', 'Cdh8', 'Krtdap', 'Wnt3', ], color_map = 'OrRd', ncols = 3, use_raw = False)

In [None]:
# Extract PCA matrix and batch array
n_pcs = 25 # Principal components used to perform dimensionality reduction 
pca = mouse.obsm['X_pca'][:, 0:(n_pcs)]
batch = mouse.obs['donor'] # Batch ID, in this case stage

In [None]:
%%R -i pca -i batch -o hem

library(harmony)
library(magrittr)
set.seed(1000)
hem <- HarmonyMatrix(pca, batch, theta=0, lambda = 6, do_pca=FALSE, verbose = FALSE)  
hem = data.frame(hem)

In [None]:
# Add harmony values to the anndata object --> overwriting PCA 
mouse.obsm['X_pca_harmony'] = hem.values 

In [None]:
sc.pp.neighbors(mouse, n_pcs = n_pcs, use_rep = 'X_pca_harmony', random_state= 123)
sc.tl.umap(mouse, min_dist=0.6, spread = 0.8)

In [None]:
sc.pl.umap(mouse, color = ['phase', 'age', 'donor',
                          'harmonised_celltype'], ncols = 2)

In [None]:
mouse.obs['mouse_celltype'] = mouse.obs['harmonised_celltype']
mouse.obs['species'] = 'mouse'

In [None]:
mouse.write(path_to_data + "mouse_genital_tubercle_mpw_orthologs_with_embedding.h5ad")

In [None]:
del mouse.obsp

In [None]:
del mouse.varm
del mouse.uns

In [None]:
for c in mouse.obs.columns:
    if c not in ['mouse_celltype', 'species', 'sample', 'donor', 'age', 'sex']:
        del mouse.obs[c]

In [None]:
mouse

In [None]:
mouse.obs['celltype'] = mouse.obs['mouse_celltype']

In [None]:
mouse.obs['celltype'].value_counts()

In [None]:
%%R -i mouse
mouse

In [None]:
%%R -i path_to_data
saveRDS(mouse, paste0(path_to_data, "mouse_genital_tubercle_mpw_orthologs.rds"))

In [None]:
mouse = sc.read(path_to_data + "mouse_genital_tubercle_mpw_orthologs_with_embedding.h5ad")