In [None]:
import scanpy as sc
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import pathlib as pl

from tqdm.notebook import tqdm

import palettable

# Download data

In [None]:
datadir = pl.Path("/add/path/here/")

adatas = []
for f in tqdm(datadir.iterdir()):
    sample = sc.read_h5ad(f)
    sample.obs["sample_id"] = [f.stem]*sample.obs.shape[0]
    adatas.append(sample)

adata = adatas[0].concatenate(*adatas[1:])

In [None]:
highlevel_ct_mapping = {"?": "Unknown/technical", 
                        "MT-high": "Unknown/technical", 
                        "Nerve": "Nerve/adrenal", 
                        "Parietal cells": "Epithelial", 
                        "Fibroblast/CAF": "Stromal/Muscle", 
                        "CAF/Fibroblast": "Stromal/Muscle", 
                        "Smooth muscle": "Stromal/Muscle", 
                        "Adipocyte": "Stromal/Muscle"}

In [None]:
adata.obs["highlevel_celltype"] = adata.obs.celltype.replace(highlevel_ct_mapping)

In [None]:
clinical = pd.read_csv("/add/path/here/EAC_clinical_info.csv",index_col=0)
treatment_mapping = {"Neoadjuvant CROSS": "Neoadj. chemo", "Neoadjuvent carboplatin": "Neoadj. chemo"}
metastatic = (clinical["Tumor?"]=="Yes ") & (clinical["Site"].str.contains("metastasis"))
metastatic.name = "Metastatic?"
clinical["Metastatic?"] = metastatic

clinical["Location"] = clinical["Site"].replace({"GEJ": "Esophagus/GEJ", 
                          "Esophagus": "Esophagus/GEJ"})
clinical["Location"][clinical["Location"].str.contains("Liver")] = "Liver"
clinical["Location"][clinical["Location"].str.contains("Adrenal")] = "Adrenal gland"
clinical["Location"][clinical["Location"].str.contains("Peritoneal")] = "Peritoneum"

clinical["Stage"] = clinical["Grade/stage"].replace({"Stage IV ": "IV", "Stage IV": "IV", 
                                                     "Moderately differentiated; ypT1aN0": "I", 
                                                     "Moderately differentiated; pT1aN0": "I", 
                                                     "Poorly differentiated; ypT2N0": "II", 
                                                     "Presented with stage III became stage IV during esophagectomy when pleural metastases were identified": "III/IV"})

clinical["Treatment"] = ["Neoadj. chemo",
                         "None",
                         "Neoadj. chemo + ICI + RT",
                         "None",
                         "None",
                         "Chemo + HER2 targeted + ICI", 
                         "Neoadj. chemo + HER2 targeted", 
                         "Neoadj. chemo + ICI", 
                         "None",
                         "Neoadj. chemo + VEGFR2i"]

clinical["HER2 status"] = clinical["HER2"].replace({"HER 2 1+": "1+/equivocal"})

clinical = clinical.sort_values(by=["Tumor?","Metastatic?","Location"])

clinical["PD-L1 CPS score"] = [0,2,7,2,3,24,0,8,3,15]

# Saving refined annotations

In [None]:
refined_annotations = pd.DataFrame(index=adata.obs_names, columns=["Annotation"])

# T/NK cells

In [None]:
subadata = adata[adata.obs["highlevel_celltype"]=="T/NK"].copy()

In [None]:
refined_annotations.loc[subadata[subadata.obs.pct_counts_mt>=15].obs_names] = "T-HighMT"

In [None]:
subadata = subadata[subadata.obs["pct_counts_mt"]<15].copy()

In [None]:
subadata.shape

In [None]:
sc.tl.pca(subadata)

In [None]:
sc.external.pp.harmony_integrate(subadata, sigma=0.5,
                                 key="sample_id", 
                                 max_iter_harmony=20)

In [None]:
sc.pp.neighbors(subadata, use_rep="X_pca_harmony")

In [None]:
sc.tl.umap(subadata)

In [None]:
sc.pl.umap(subadata, color=["celltype", "sample_id", "log1p_total_counts", "pct_counts_mt"], ncols=1)

In [None]:
sc.tl.leiden(subadata, resolution=0.5)

In [None]:
sc.pl.umap(subadata, color=["sample_id", "leiden"], ncols=1)

In [None]:
sc.pl.umap(subadata, color=["CD4","CD8B","FOXP3","NKG7","CD3E","CD3G", "GZMA"], ncols=2)

In [None]:
sc.tl.rank_genes_groups(subadata, groupby="leiden")

In [None]:
sc.tl.dendrogram(subadata, groupby="leiden", use_rep="X_pca_harmony")

In [None]:
sc.pl.rank_genes_groups_heatmap(subadata, n_genes=5, use_raw=False, swap_axes=True, vmax=3, cmap='bwr')

In [None]:
sc.pl.umap(subadata, color=["leiden"], ncols=1)

In [None]:
sc.get.rank_genes_groups_df(subadata, group="5").sort_values(by="scores",ascending=False).head(20)

In [None]:
manual_annotations = {"0": "TCD4", 
                      "1": "TCD4", 
                      "2": "TCD8", 
                      "3": "NK", 
                      "4": "Treg", 
                      "5": "B", 
                      "6": "Mast",}

In [None]:
subadata.obs["lowlevel_celltype"] = subadata.obs.leiden.replace(manual_annotations)

In [None]:
fig = sc.pl.umap(subadata[~subadata.obs.lowlevel_celltype.isin(["B","Mast"])], color=["lowlevel_celltype"], return_fig=True)
fig.axes[0].spines[['right', 'top']].set_visible(False)
fig.axes[0].set_title("Cell type")

In [None]:
fig = sc.pl.umap(subadata[~subadata.obs.lowlevel_celltype.isin(["B","Mast"])], color=["sample_id"], return_fig=True)
fig.axes[0].spines[['right', 'top']].set_visible(False)
fig.axes[0].set_title("Sample ID")

In [None]:
refined_annotations.loc[subadata.obs_names,"Annotation"] = subadata.obs.lowlevel_celltype.ravel()

# Macrophages

In [None]:
subadata = adata[adata.obs["highlevel_celltype"]=="Macrophage"].copy()

In [None]:
refined_annotations.loc[subadata[subadata.obs.pct_counts_mt>=15].obs_names,"Annotation"] = "Myeloid-HighMT"

In [None]:
subadata = subadata[subadata.obs["pct_counts_mt"]<15].copy()

In [None]:
subadata.shape

In [None]:
sc.tl.pca(subadata)

In [None]:
sc.external.pp.harmony_integrate(subadata, 
                                 key="sample_id", 
                                 max_iter_harmony=20, sigma=0.1)

In [None]:
sc.pp.neighbors(subadata, use_rep="X_pca_harmony")

In [None]:
sc.tl.umap(subadata)

In [None]:
sc.pl.umap(subadata, color=["celltype", "sample_id", "log1p_total_counts", "pct_counts_mt"], ncols=1)

In [None]:
sc.tl.leiden(subadata, resolution=0.4)

In [None]:
sc.pl.umap(subadata, color=["sample_id", "leiden"], ncols=1)

In [None]:
sc.tl.rank_genes_groups(subadata, groupby="leiden")

In [None]:
sc.tl.dendrogram(subadata, groupby="leiden", use_rep="X_pca_harmony")

In [None]:
sc.pl.rank_genes_groups_heatmap(subadata, n_genes=4, use_raw=False, swap_axes=True, vmax=3, cmap='bwr')

References: 
- GPNMB https://www.frontiersin.org/articles/10.3389/fimmu.2021.674739/full
- CTSD https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3108842/, https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7746538/
- FKBP51 https://www.nature.com/articles/s41416-020-0840-8
- PDE4D (degrades cAMP) https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6199465/#:~:text=(A)%20PDE4%20regulates%20the%20production,via%20the%20degradation%20of%20cAMP.
- MRC1 https://www.frontiersin.org/articles/10.3389/fimmu.2019.01084/full
- CD163 https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7432735/
- ZBTB16 (=PLZF) https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4321291/
- RORA https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6261595/
- LRRK2 https://www.biorxiv.org/content/10.1101/2022.12.17.520834v2
- ITGA4 (=CD49b) https://genomebiology.biomedcentral.com/articles/10.1186/s13059-017-1362-4
- VISR (=VISTA) https://www.frontiersin.org/articles/10.3389/fimmu.2019.02641/full
- IL1R2 https://www.frontiersin.org/articles/10.3389/fimmu.2022.804641/full
- TCF7L2 https://www.frontiersin.org/articles/10.3389/fcvm.2021.701279/full#:~:text=TCF7L2%20Promotes%20M2%20Polarization,macrophages%20in%20the%20vascular%20wall. 
- AOAH https://elifesciences.org/articles/70938
- HDAC9 https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9356872/
- general macrophage signatures https://www.cell.com/trends/immunology/fulltext/S1471-4906(22)00094-1

In [None]:
sc.pl.umap(subadata, color=["leiden"])

In [None]:
sc.get.rank_genes_groups_df(subadata, group="8").sort_values(by="scores",ascending=False).head(20)

In [None]:
manual_annotations = {"0": "TAM1", 
                      "1": "TAM3", 
                      "2": "TAM2", 
                      "3": "TAM2", 
                      "4": "TAM1", 
                      "5": "DC",  
                      "6": "Kupffer cells",
                      "7": "Mast", "8": "DC"}

In [None]:
subadata.obs["lowlevel_celltype"] = subadata.obs.leiden.replace(manual_annotations)

In [None]:
sc.pl.umap(subadata, color=["lowlevel_celltype"])

In [None]:
sc.tl.rank_genes_groups(subadata, groupby="lowlevel_celltype")

In [None]:
group_markers = {}
for ct in sorted(subadata.obs.lowlevel_celltype.unique()):
    if ct in ["Myeloid-HighMT"]:
        continue
    else:
        group_markers[ct] = sc.get.rank_genes_groups_df(subadata, group=ct)
        group_markers[ct] = group_markers[ct].sort_values(by="scores",ascending=False).head(10).names.to_numpy()

In [None]:
sc.pl.dotplot(subadata[~subadata.obs.lowlevel_celltype.isin(["Myeloid-HighMT"])], var_names=group_markers, groupby="lowlevel_celltype")

In [None]:
refined_annotations.loc[subadata.obs_names,"Annotation"] = subadata.obs.lowlevel_celltype.ravel()

# Endothelial + Fibro + Muscle

In [None]:
subadata = adata[adata.obs["highlevel_celltype"].isin(["Endothelial","Stromal/Muscle"])].copy()

In [None]:
subadata = subadata[subadata.obs["pct_counts_mt"]<15].copy()

In [None]:
subadata.shape

In [None]:
sc.tl.pca(subadata)

In [None]:
sc.pp.neighbors(subadata)

In [None]:
sc.external.pp.harmony_integrate(subadata, 
                                 key="sample_id", 
                                 max_iter_harmony=20)

In [None]:
sc.pp.neighbors(subadata, use_rep="X_pca_harmony")

In [None]:
sc.tl.umap(subadata)

In [None]:
sc.pl.umap(subadata, color=["celltype", "sample_id", "log1p_total_counts", "pct_counts_mt"], ncols=1)

In [None]:
sc.tl.leiden(subadata, resolution=0.1)

In [None]:
sc.pl.umap(subadata, color=["sample_id", "leiden"], ncols=1)

In [None]:
sc.tl.rank_genes_groups(subadata, groupby="leiden")

In [None]:
sc.tl.dendrogram(subadata, groupby="leiden", use_rep="X_pca_harmony")

In [None]:
sc.pl.rank_genes_groups_heatmap(subadata, n_genes=4, use_raw=False, swap_axes=True, vmax=3, cmap='bwr')

In [None]:
manual_annotations = {"0": "Fibroblast", 
                      "1": "Endothelial", 
                      "2": "Muscle", 
                      "3": "Fibroblast", 
                      "4": "Endothelial", 
                      "5": "Muscle", }

In [None]:
subadata.obs["lowlevel_celltype"] = subadata.obs.leiden.replace(manual_annotations)

In [None]:
sc.pl.umap(subadata, color=["lowlevel_celltype"])

In [None]:
sc.tl.rank_genes_groups(subadata, groupby="lowlevel_celltype")

In [None]:
group_markers = {}
for ct in sorted(subadata.obs.lowlevel_celltype.unique().to_numpy()):
    group_markers[ct] = sc.get.rank_genes_groups_df(subadata, group=ct)
    group_markers[ct] = group_markers[ct].loc[~group_markers[ct].names.str.startswith("MT-")]
    group_markers[ct] = group_markers[ct].sort_values(by="scores",ascending=False).head(10).names.to_numpy()

In [None]:
sc.pl.dotplot(subadata, var_names=group_markers, groupby="lowlevel_celltype")

# Stromal/Muscle

In [None]:
subadata2 = subadata[subadata.obs.lowlevel_celltype=="Muscle"].copy()

In [None]:
refined_annotations.loc[subadata2[subadata2.obs.pct_counts_mt>=15].obs_names,"Annotation"] = "StrMus-HighMT"

In [None]:
subadata2 = subadata2[subadata2.obs["pct_counts_mt"]<15].copy()

In [None]:
subadata2.shape

In [None]:
sc.tl.pca(subadata2)

In [None]:
sc.external.pp.harmony_integrate(subadata2, key="sample_id", sigma=0.2, max_iter_harmony=20)

In [None]:
sc.pp.neighbors(subadata2, use_rep="X_pca_harmony")

In [None]:
sc.tl.umap(subadata2)

In [None]:
sc.pl.umap(subadata2, color=["sample_id", "log1p_total_counts", "pct_counts_mt"], ncols=1)

In [None]:
sc.tl.leiden(subadata2, resolution=0.1)

In [None]:
sc.pl.umap(subadata2, color=["sample_id", "leiden"], ncols=1)

In [None]:
sc.tl.rank_genes_groups(subadata2, groupby="leiden")

In [None]:
sc.tl.dendrogram(subadata2, groupby="leiden", use_rep="X_pca_harmony")

In [None]:
sc.pl.rank_genes_groups_heatmap(subadata2, n_genes=10, use_raw=False, swap_axes=True, vmax=3, cmap='bwr')

In [None]:
manual_annotations = {"0": "Smooth muscle", 
                      "1": "Skeletal muscle",
                      "2": "Smooth muscle", "3": "Smooth muscle"}

In [None]:
subadata2.obs["lowlevel_celltype"] = subadata2.obs.leiden.replace(manual_annotations)

In [None]:
sc.pl.umap(subadata2, color=["lowlevel_celltype"])

In [None]:
sc.tl.rank_genes_groups(subadata2, groupby="lowlevel_celltype")

In [None]:
group_markers = {}
for ct in sorted(subadata2.obs.lowlevel_celltype.unique().to_numpy()):
    if ct in ["StrMus-HighMT"]:
        continue
    else:
        group_markers[ct] = sc.get.rank_genes_groups_df(subadata2, group=ct)
        group_markers[ct] = group_markers[ct].sort_values(by="scores",ascending=False).head(10).names.to_numpy()

In [None]:
sc.pl.dotplot(subadata2[~subadata2.obs.lowlevel_celltype.isin(["StrMus-HighMT"])], var_names=group_markers, groupby="lowlevel_celltype")

In [None]:
refined_annotations.loc[subadata2.obs_names,"Annotation"] = subadata2.obs.lowlevel_celltype.ravel()

# Fibroblast

In [None]:
subadata2 = subadata[subadata.obs.lowlevel_celltype=="Fibroblast"].copy()

In [None]:
refined_annotations.loc[subadata2[subadata2.obs.pct_counts_mt>=15].obs_names,"Annotation"] = "StrMus-HighMT"

In [None]:
subadata2 = subadata2[subadata2.obs["pct_counts_mt"]<15].copy()

In [None]:
sc.tl.pca(subadata2)

In [None]:
sc.external.pp.harmony_integrate(subadata2, key="sample_id", sigma=0.2, max_iter_harmony=20)

In [None]:
sc.pp.neighbors(subadata2, use_rep="X_pca_harmony")

In [None]:
sc.tl.umap(subadata2)

In [None]:
sc.pl.umap(subadata2, color=["sample_id", "log1p_total_counts", "pct_counts_mt"], ncols=1)

In [None]:
sc.tl.leiden(subadata2, resolution=0.2)

In [None]:
sc.pl.umap(subadata2, color=["sample_id", "leiden"], ncols=1)

In [None]:
sc.pl.umap(subadata2, color=["sample_id", "PDGFRA"], ncols=1)

In [None]:
sc.tl.rank_genes_groups(subadata2, groupby="leiden")

In [None]:
sc.tl.dendrogram(subadata2, groupby="leiden", use_rep="X_pca_harmony")

In [None]:
sc.pl.rank_genes_groups_heatmap(subadata2, n_genes=5, use_raw=False, swap_axes=True, vmax=3, cmap='bwr')

In [None]:
manual_annotations = {"0": "Fibroblast", 
                      "1": "Fibroblast",
                      "2": "Fibroblast",
                      "3": "CAF",
                      "4": "CAF",
                      "5": "CAF",
                      "6": "CAF",
                      "7": "Fibroblast",}

In [None]:
subadata2.obs["lowlevel_celltype"] = subadata2.obs.leiden.replace(manual_annotations)

In [None]:
sc.pl.umap(subadata2, color=["lowlevel_celltype"])

In [None]:
sc.tl.rank_genes_groups(subadata2, groupby="lowlevel_celltype")

In [None]:
group_markers = {}
for ct in sorted(subadata2.obs.lowlevel_celltype.unique().to_numpy()):
    if ct in ["StrMus-HighMT"]:
        continue
    else:
        group_markers[ct] = sc.get.rank_genes_groups_df(subadata2, group=ct)
        group_markers[ct] = group_markers[ct].sort_values(by="scores",ascending=False).head(10).names.to_numpy()

In [None]:
sc.pl.dotplot(subadata2[~subadata2.obs.lowlevel_celltype.isin(["StrMus-HighMT"])], var_names=group_markers, groupby="lowlevel_celltype")

In [None]:
refined_annotations.loc[subadata2.obs_names,"Annotation"] = subadata2.obs.lowlevel_celltype.ravel()

In [None]:
cafadata = subadata2[subadata2.obs.lowlevel_celltype.isin(["CAF"])].copy()

In [None]:
sc.tl.pca(cafadata)

In [None]:
sc.pp.neighbors(cafadata)

In [None]:
sc.external.pp.harmony_integrate(cafadata, key="sample_id", sigma=0.1, max_iter_harmony=20)

In [None]:
sc.pp.neighbors(cafadata, use_rep="X_pca_harmony")

In [None]:
sc.tl.umap(cafadata)

In [None]:
sc.pl.umap(cafadata, color=["celltype", "sample_id", "log1p_total_counts", "pct_counts_mt"], ncols=1)

In [None]:
sc.tl.leiden(cafadata, resolution=0.1)

In [None]:
sc.pl.umap(cafadata, color=["sample_id", "leiden"], ncols=1)

In [None]:
sc.tl.rank_genes_groups(cafadata, groupby="leiden")

In [None]:
sc.tl.dendrogram(cafadata, groupby="leiden", use_rep="X_pca_harmony")

In [None]:
sc.pl.rank_genes_groups_heatmap(cafadata, n_genes=5, use_raw=False, swap_axes=True, vmax=3, cmap='bwr')

In [None]:
# genes from here https://www.nature.com/articles/s41467-022-34395-2
sc.pl.dotplot(cafadata, var_names={"CAFinfla": ["FAP","COL1A1","TGFB1","MMP11",],
                                   "CAFmyo": ["RGS5","MYH11","ACTA2"], 
                                   "CAFadi": ["CFD","PTGDS","FBLN1"], 
                                   "CAFendMT": ["PLVAP","RAMP2","FLT1"], 
                                   "CAFpn": ["S100B","GPM6B","NRXN1"], 
                                   "CAFap": ["HLA-DRA","LYZ"], 
                                   "Fibroblast": ["PDGFRA","PDGFRB","THY1",]}, groupby="leiden")

In [None]:
manual_annotations = {"0": "Inflammatory CAF", 
                      "1": "Inflammatory CAF", 
                      "2": "Inflammatory CAF", 
                      "3": "HGF-CAF", 
                      "4": "Adipose CAF"}

In [None]:
cafadata.obs["lowlevel_celltype_lvl2"] = cafadata.obs.leiden.replace(manual_annotations)

In [None]:
sc.pl.umap(cafadata, color=["lowlevel_celltype_lvl2"])

References to interesting genes + fibroblast descriptions to explore
- https://rupress.org/jem/article/217/3/e20190103/133821/Transforming-growth-factor-in-tissue-fibrosisTGF
- https://www.nature.com/articles/s41467-022-30633-9
- https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8323949/ 
- https://www.nature.com/articles/s41467-020-17740-1
- https://onlinelibrary.wiley.com/doi/full/10.1002/jcp.22604
- https://www.nature.com/articles/s41467-022-34395-2
- https://www.spandidos-publications.com/10.3892/or.2018.6500?text=abstract#b12-or-40-02-1185


In [None]:
refined_annotations.loc[cafadata.obs_names,"Annotation"] = cafadata.obs.lowlevel_celltype_lvl2.ravel()

# Endothelial

In [None]:
subadata2 = subadata[subadata.obs.lowlevel_celltype=="Endothelial"].copy()

In [None]:
refined_annotations.loc[subadata2[subadata2.obs.pct_counts_mt>=15].obs_names,"Annotation"] = "StrMus-HighMT"

In [None]:
subadata2 = subadata2[subadata2.obs["pct_counts_mt"]<15].copy()

In [None]:
sc.tl.pca(subadata2)

In [None]:
sc.external.pp.harmony_integrate(subadata2, key="sample_id", sigma=0.2, max_iter_harmony=20)

In [None]:
sc.pp.neighbors(subadata2, use_rep="X_pca_harmony")

In [None]:
sc.tl.umap(subadata2)

In [None]:
sc.pl.umap(subadata2, color=["sample_id", "log1p_total_counts", "pct_counts_mt"], ncols=1)

In [None]:
sc.tl.leiden(subadata2, resolution=0.1)

In [None]:
sc.pl.umap(subadata2, color=["sample_id", "leiden"], ncols=1)

In [None]:
sc.tl.rank_genes_groups(subadata2, groupby="leiden")

In [None]:
sc.tl.dendrogram(subadata2, groupby="leiden", use_rep="X_pca_harmony")

In [None]:
sc.pl.rank_genes_groups_heatmap(subadata2, n_genes=5, use_raw=False, swap_axes=True, vmax=3, cmap='bwr')

In [None]:
manual_annotations = {"0": "Quiescent EC", 
                      "1": "Activated EC",}

In [None]:
subadata2.obs["lowlevel_celltype"] = subadata2.obs.leiden.replace(manual_annotations)

In [None]:
sc.pl.umap(subadata2, color=["lowlevel_celltype"])

In [None]:
sc.tl.rank_genes_groups(subadata2, groupby="lowlevel_celltype")

In [None]:
group_markers = {}
for ct in sorted(subadata2.obs.lowlevel_celltype.unique().to_numpy()):
    if ct in ["StrMus-HighMT"]:
        continue
    else:
        group_markers[ct] = sc.get.rank_genes_groups_df(subadata2, group=ct)
        #group_markers[ct] = group_markers[ct].loc[~group_markers[ct].names.str.startswith("MT-")]
        group_markers[ct] = group_markers[ct].sort_values(by="scores",ascending=False).head(10).names.to_numpy()

In [None]:
sc.pl.dotplot(subadata2[~subadata2.obs.lowlevel_celltype.isin(["StrMus-HighMT"])], var_names=group_markers, groupby="lowlevel_celltype")

In [None]:
refined_annotations.loc[subadata2.obs_names,"Annotation"] = subadata2.obs.lowlevel_celltype.ravel()

In [None]:
endadata = subadata2[subadata2.obs.lowlevel_celltype=="Activated EC"].copy()

In [None]:
endadata.shape

In [None]:
sc.tl.pca(endadata)

In [None]:
sc.external.pp.harmony_integrate(endadata, key="sample_id", sigma=1., max_iter_harmony=30)

In [None]:
sc.pp.neighbors(endadata, use_rep="X_pca_harmony")

In [None]:
sc.tl.umap(endadata)

In [None]:
sc.pl.umap(endadata, color=["sample_id", "log1p_total_counts", "pct_counts_mt"], ncols=1)

In [None]:
sc.tl.leiden(endadata, resolution=0.2)

In [None]:
sc.pl.umap(endadata, color=["sample_id", "leiden"], ncols=1)

In [None]:
sc.tl.rank_genes_groups(endadata, groupby="leiden")

In [None]:
sc.tl.dendrogram(endadata, groupby="leiden", use_rep="X_pca_harmony")

In [None]:
sc.pl.rank_genes_groups_heatmap(endadata, n_genes=5, use_raw=False, swap_axes=True, vmax=3, cmap='bwr')

In [None]:
manual_annotations = {"0": "Venous EC", 
                      "1": "Hepatic EC", 
                      "2": "Angiogenic EC", 
                      "3": "Venous EC", 
                      "4": "Angiogenic EC", "5": "Carcinoma"}

In [None]:
endadata.obs["lowlevel_celltype_lvl2"] = endadata.obs.leiden.replace(manual_annotations)

In [None]:
sc.pl.umap(endadata, color=["lowlevel_celltype_lvl2"])

In [None]:
sc.tl.rank_genes_groups(endadata, groupby="lowlevel_celltype_lvl2")

In [None]:
group_markers = {}
for ct in sorted(endadata.obs.lowlevel_celltype_lvl2.unique().to_numpy()):
    group_markers[ct] = sc.get.rank_genes_groups_df(endadata, group=ct)
    group_markers[ct] = group_markers[ct].loc[~group_markers[ct].names.str.startswith("RPS")]
    group_markers[ct] = group_markers[ct].loc[~group_markers[ct].names.str.startswith("RPL")]
    group_markers[ct] = group_markers[ct].sort_values(by="scores",ascending=False).head(10).names.to_numpy()

In [None]:
sc.pl.dotplot(endadata, var_names=group_markers, groupby="lowlevel_celltype_lvl2")

In [None]:
refined_annotations.loc[endadata.obs_names,"Annotation"] = endadata.obs.lowlevel_celltype_lvl2.ravel()

References for interesting genes in endothelial cells
- https://www.nature.com/articles/nrgastro.2016.180#:~:text=An%20analysis%20of%20confluent%20and,vessels%20in%20patient%20CRC%20samples.
- https://www.sciencedirect.com/science/article/pii/S0065242310520043
- https://ashpublications.org/blood/article/114/2/478/26237/The-prototype-endothelial-marker-PAL-E-is-a
- https://www.ncbi.nlm.nih.gov/pmc/articles/PMC10172233/#CR71
- https://www.pnas.org/doi/abs/10.1073/pnas.91.8.3448
- https://pubmed.ncbi.nlm.nih.gov/21681612/
-https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7609066/ 
- https://www.ahajournals.org/doi/10.1161/CIRCULATIONAHA.120.052318
- https://www.nature.com/articles/s41598-022-05404-7
- https://www.ahajournals.org/doi/10.1161/CIRCRESAHA.108.178434
- https://www.nature.com/articles/s41419-018-0570-5
- https://www.jci.org/articles/view/90086

References for definition of the activated EC 
- https://academic.oup.com/endo/article/162/8/bqab104/6284300?login=true (INSR)
- https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8223739/ (CAMKII)
- https://academic.oup.com/cardiovascres/article/81/1/187/275992?login=true (CD81)
- https://faseb.onlinelibrary.wiley.com/doi/abs/10.1096/fasebj.24.1_supplement.750.1 (IFITM)
- https://www.nature.com/articles/ncb1355 (VIM)
- https://www.cell.com/cancer-cell/pdf/S1535-6108(10)00250-3.pdf (EZH2)
- https://www.ahajournals.org/doi/10.1161/01.RES.0000134920.10128.b4#:~:text=Previous%20Version%201-,Abstract,preferentially%20in%20vascular%20endothelial%20cells. (EPAS1)
- https://www.jci.org/articles/view/64201 (MRTFB)
- https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3286203/ (EGFL7)

# Save annotations

In [None]:
refined_annotations.Annotation = refined_annotations.Annotation.fillna(adata.obs.highlevel_celltype)

In [None]:
refined_annotations.to_csv("refined_annotations.csv")

In [None]:
adata.write("/add/path/here/full_cohort.h5ad")