# Trajectory inference for hematopoiesis in mouse

* [reference](https://scanpy-tutorials.readthedocs.io/en/latest/paga-paul15.html)

Install fa by `pip install fa2` first.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as pl
from matplotlib import rcParams
import scanpy as sc

In [None]:
sc.settings.verbosity = 3
sc.logging.print_versions()
sc.settings.set_figure_params(dpi=100, frameon=False, figsize=(6, 6), facecolor='white')

In [None]:
results_file = "write/paul15.h5ad"

## Load/download dataset

In [None]:
adata = sc.datasets.paul15()

In [None]:
adata

In [None]:
adata.X = adata.X.astype("float64")

## Preprocessing

In [None]:
sc.pp.recipe_zheng17(adata)

In [None]:
sc.tl.pca(adata, svd_solver="arpack")

In [None]:
sc.pp.neighbors(adata, n_neighbors=4, n_pcs=20)
sc.tl.draw_graph(adata)

## Visualization

In [None]:
sc.pl.draw_graph(adata, color="paul15_clusters", legend_loc="on data")

## Clustering and embedding

In [None]:
sc.tl.leiden(adata, resolution=1.4)

In [None]:
sc.tl.paga(adata, groups="leiden")

In [None]:
# Markers:
# Erythroids: Hba-a2
# Neutrophils: Elane
# Monocytes: Irf8
sc.pl.paga(adata, color=["leiden", "Hba-a2", "Elane", "Irf8"])

In [None]:
# Markers:
# Megakaryocytes: Itga2b
# Basophils: Prss34
# Mast cells: Cma1
sc.pl.paga(adata, color=["leiden", "Itga2b", "Prss34", "Cma1"])

In [None]:
adata.obs["leiden"].cat.categories

In [None]:
adata.obs["leiden_anno"] = adata.obs["leiden"]

In [None]:
adata.obs["leiden_anno"].cat.categories = ['0', '1', '2', '3', '4', '5/Stem', '6', '7', '8/Mo', '9/Ery', '10', '11', '12',
       '13', '14', '15', '16', '17', '18/Neu', '19', '20/Mega', '21', '22/Baso', '23', '24', "25", "26", "27/Mast"]

In [None]:
sc.tl.paga(adata, groups="leiden_anno")

In [None]:
sc.pl.paga(adata, threshold=0.4, show=False)

## Visualization with PAGA

In [None]:
sc.tl.draw_graph(adata, init_pos="paga")

### Recomputing the embedding using PAGA-initialization

## Reconstructing gene changes along PAGA paths for a given set of genes

In [None]:
adata.uns["iroot"] = np.flatnonzero(adata.obs["leiden_anno"]  == "5/Stem")[0]

In [None]:
sc.tl.dpt(adata)

In [None]:
gene_names = ['Gata2', 'Gata1', 'Klf1', 'Epor', 'Hba-a2',  # erythroid
              'Elane', 'Cebpe', 'Gfi1',                    # neutrophil
              'Irf8', 'Csf1r', 'Ctsg']                     # monocyte

In [None]:
adata_raw = sc.datasets.paul15()
sc.pp.log1p(adata_raw)
sc.pp.scale(adata_raw)
adata.raw = adata_raw

In [None]:
sc.pl.draw_graph(adata, color=["leiden_anno", "dpt_pseudotime"], legend_loc="on data")

In [None]:
# refill correct cell clusters
paths = [('erythrocytes', [16, ...]),
         ('neutrophils', []),
         ('monocytes', [])]

In [None]:
adata.obs["distance"] = adata.obs["dpt_pseudotime"]
adata.obs['clusters'] = adata.obs["leiden_anno"]
adata.uns["clusters_colors"] = adata.uns["leiden_colors_anno"]

In [None]:
_, axs = pl.subplots(ncols=3, figsize=(6, 2.5), gridspec_kw={'wspace': 0.05, 'left': 0.12})
pl.subplots_adjust(left=0.05, right=0.98, top=0.82, bottom=0.2)
for ipath, (descr, path) in enumerate(paths):
    _, data = sc.pl.paga_path(
        adata, path, gene_names,
        show_node_names=False,
        ax=axs[ipath],
        ytick_fontsize=12,
        left_margin=0.15,
        n_avg=50,
        annotations=['distance'],
        show_yticks=True if ipath==0 else False,
        show_colorbar=False,
        color_map='Greys',
        groups_key='clusters',
        color_maps_annotations={'distance': 'viridis'},
        title='{} path'.format(descr),
        return_data=True,
        show=False)
    data.to_csv("write/paga_path_{}.csv".format(descr))
# pl.savefig('./figures/paga_path_paul15.pdf')
pl.show()