## count NMI

In [None]:
library(Seurat)
library(NMI)
library(RColorBrewer)
library(ggplot2)
library(dplyr)
library(destiny)
library(readxl)
library(tidyverse)
library(ggrepel)
library(ggtern)

In [None]:
enri<-read.table("/fs/home/tangke/human_scATAC/hg38_data/small_HSC/HSC.10_10/enrichment/SCRIPT_enrichment.txt",header=T)

In [None]:
meta<-read.table("/fs/home/tangke/human_scATAC/hg38_data/small_HSC/metadata.txt")

In [None]:
meta[which(meta$assign.celltype=="MCP"),7]<-"pDC"

In [None]:
enri<-enri[rownames(enri)%in%rownames(meta),]

In [None]:
enri_b<-t(enri)
head(enri_b)
seurat <- CreateSeuratObject(counts = enri_b, project = "hsc")
seurat@assays$RNA@scale.data<-as.matrix(seurat@assays$RNA@counts)
seurat <- FindVariableFeatures(seurat, selection.method = "vst", nfeatures = 2000)
seurat <- RunPCA(seurat, features = VariableFeatures(object = seurat))
ElbowPlot(seurat)

seurat <- FindNeighbors(seurat, dims = 1:25)
seurat <- FindClusters(seurat, resolution = 0.7)
seurat <- RunUMAP(seurat, dims = 1:50)
DimPlot(seurat, reduction = "umap")

seurat_ident<-as.data.frame(seurat@active.ident)
seurat_ident_2<-seurat_ident
seurat_ident_2[,1]<-rownames(seurat_ident)
seurat_ident_2[,2]<-seurat_ident[,1]

In [None]:
c<-merge(seurat_ident_2,meta,by="row.names",sort=F)

In [None]:
d<-as.factor(c$assign.celltype)
names(d)<-c$Row.names

In [None]:
seurat@active.ident<-d
DimPlot(seurat, reduction = "umap")

In [None]:
type<-as.data.frame(seurat@active.ident)
b<-type
b[,1]<-rownames(type)
b[,2]<-type[,1]
head(b)

In [None]:
mutualinformation=NMI(seurat_ident_2,b)  #To calculate the NMI score
mutualinformation

In [None]:
setwd("/fs/home/tangke/human_scATAC/hg38_data/small_HSC/HSC.10_10")

In [None]:
saveRDS(seurat,"HSC_seurat.rds")  #Save rds

## trajectory analysis

In [None]:
logcounts <- GetAssayData(seurat, "data")

In [None]:
feature<-sort(apply(logcounts,1,var),decreasing=TRUE)[1:600] #Choose the top 600 most variable TFs to do the trajectory analysis.
feature<-rownames(as.data.frame(feature))

In [None]:
input_matrix <- t(as.matrix(logcounts[feature, ]))

In [None]:
dm <- DiffusionMap(as.matrix(input_matrix))
plot(eigenvalues(dm), ylim = 0:1, pch = 20,        
xlab = 'Diffusion component (DC)', ylab = 'Eigenvalue')

In [None]:
dm <- DiffusionMap(as.matrix(input_matrix),k=4)  #Set the number of nearest neighbors.

In [None]:
cell_type<-as.data.frame(seurat@active.ident)
head(cell_type)
colnames(cell_type)<-"cell_type"
seurat@meta.data$cell_type<-cell_type$cell_type

In [None]:
table(seurat$cell_type)

In [None]:
options(repr.plot.width = 10, repr.plot.height = 10)
set.seed(2020)
dpt <- DPT(dm,tips=1)
c=plot(dpt, 1:2)
c+theme_classic()

In [None]:
tmp <- data.frame(DC1 = dm$DC1,
                  DC2 = dm$DC2,
                  timepoint = cell_type,
                  dpt = dpt$DPT1)

In [None]:
set.seed(2020)
options(repr.plot.width = 10, repr.plot.height = 10)
p<-ggplot(tmp, aes(x = DC1, 
                y = DC2, 
                colour = cell_type)) +
    geom_point(size=2)+ 
    xlab("Diffusion component 1") + 
    ylab("Diffusion component 2") +
    theme_classic()
figure1<-p+ scale_color_manual(values=c(HSC="#2D6A4F",MPP="#85BC07",LMPP="#74c69d",
            CLP="#95A0FF",pDC="#4cc9f0",CMP="#F0ED4A",
            GMP="#F9C1F8",MEP="#FAA300",mono="#EE85F6",UNK="#e9ecef"))+
            theme(axis.title=element_text(size=25),axis.text=element_text(size=25),
                  legend.text=element_text(size=20),legend.title=element_text(size=25),text = element_text(family="myfont"))+
            guides(colour = guide_legend(override.aes = list(size=5)))
  #          theme(plot.title = element_text(size = 12, face = "bold") , legend.title=element_text(size=10) , legend.text=element_text(size=9))
figure1

## differential TF analysis

In [None]:
dr <- CreateDimReducObject(embeddings = dm@eigenvectors, key = 'DC')
seurat[['DC']] <- dr

In [None]:
col=c('#3b4cc0','#4a63d4','#5b7ae5',
    '#6d8ff1','#7fa2fa','#92b4fe','#a4c2fe','#b6cefa','#c7d7f1','#d6dce4','#e4d9d3',
    '#eecfbf','#f5c2aa','#f7b295','#f59f80','#ef896c','#e67158','#d85646','#c73735','#b40426')

In [None]:
a<-seurat@assays$RNA@counts

In [None]:
min=min(a[rownames(a)=="GATA1",])
max=max(a[rownames(a)=="GATA1",])
min
max

In [None]:
p1<-FeaturePlot(seurat,features="GATA1",reduction = "DC", combine = FALSE,pt.size=2)
fix.sc <- scale_color_gradientn( colours = col,limit=c(-0.8,0.8))

In [None]:
p2 <- lapply(p1, function (x) x + fix.sc)
p3<-CombinePlots(p2)
p4=p3+xlim(-0.08,0.08)+ylim(-0.12,0.06)+theme(axis.title=element_text(size=25,family="myfont"),axis.text=element_text(size=25),
                                          title=element_text(size=25,family="myfont"),
                                          legend.text=element_text(size=20,family="myfont"))+NoLegend()
   scale_x_continuous(breaks = seq(-0.08, 0.08, by = 0.04))
p4       

In [None]:
tip_idx <- tips(dpt)[[1]]

In [None]:
dpt_order<-as.data.frame(dpt[tip_idx, ])
colnames(dpt_order)<-'dpt'

In [None]:
cell<-as.data.frame(rownames(dpt@branch))
colnames(cell)<-'cell'

In [None]:
dpt_order$cell<-cell$cell

In [None]:
cell_type<-as.data.frame(seurat@active.ident)
colnames(cell_type)<-'cell_type'

In [None]:
dpt_order$cell_type<-cell_type$cell_type

In [None]:
MEP<-dpt_order[cell_type==c('HSC','MPP','CMP','MEP'),]

In [None]:
head(MEP)

In [None]:
TF='GATA1'

In [None]:
TF_test<-as.data.frame(seurat@assays$RNA@counts[TF,])
colnames(TF_test)='count'

In [None]:
label<-as.data.frame(seurat@active.ident)
colnames(label)='cell'
TF_test$cell<-label$cell

In [None]:
TF_test$group=TF

In [None]:
TF_test$cell<-rownames(TF_test)

In [None]:
merge_TF<-merge(MEP,TF_test,by='cell',sort=F)

In [None]:
head(TF_test)

In [None]:
g<-ggplot(data = merge_TF,aes(x = dpt,y = count))

In [None]:
options(repr.plot.width = 10, repr.plot.height = 4)

In [None]:
p=g+geom_smooth(method = 'loess',formula='y ~ x',color='black',se=F)+geom_point(aes(color=cell_type),size=0.5)+scale_color_manual(values=c(HSC="#2D6A4F",MPP="#85BC07",CMP="#F0ED4A",
            MEP="#FAA300"))+theme(axis.title=element_text(size=20),axis.text=element_text(size=20),
                  legend.text=element_text(size=15),legend.title=element_text(size=15),text = element_text(family="myfont"),
                                  panel.background = element_blank(),axis.line = element_line(colour = "black"))+
            guides(colour = guide_legend(override.aes = list(size=2)))+theme(plot.title = element_text(hjust = 0.5),title=element_text(size=20))+ggtitle("GATA1")
p

## triangle plot

In [None]:
dataf<-data.frame(matrix(NA,1243,10))

In [None]:
colnames(dataf)=c("CLP","CMP","GMP","HSC","LMPP","MEP","MPP","UNK","mono","pDC")

In [None]:
data<-seurat@assays$RNA@counts

In [None]:
rownames(dataf)<-rownames(data)

In [None]:
for (i in 1:10) {
   cell_type<-data[,colnames(data)%in%rownames(meta[meta$assign.celltype==colnames(dataf)[i],])]
   cell_type_aver<-as.data.frame(apply(cell_type,1,mean))
   colnames(cell_type_aver)<-colnames(dataf)[i]
   dataf[,i]<-cell_type_aver[,1]
    }

In [None]:
for (i in 1:1243){
    min=min(dataf[i,])
    max=max(dataf[i,])
    for (c in 1:10){
       dataf[i,c]=(dataf[i,c]-min)/(max-min)
    }
}

In [None]:
dataf_serve<-dataf[rownames(dataf)%in%c("GATA1","GATA2","TAL1","GATA6","GATA5","FLI1",'ZFPM1','TAL2',  #USE
                      "FOXO1",'PAX5','IKZF1','STAT3',"SMAD1","FOXP1",'PRDM1','RUNX1',
                      "SPI1","HSF1","CEBPE","STAT6",'CEBPA','CEBPB','IRF8','IRF4'),]

In [None]:
dataf_serve_use<-dataf_serve[,c("MEP","mono","CLP")]
dataf_serve_use$label=rownames(dataf_serve_use)

In [None]:
head(dataf_serve_use)

In [None]:
dataf_serve_use$group<-"color"

In [None]:
cMEP<-c("GATA1","GATA2","TAL1","GATA6","GATA5","FLI1",'ZFPM1','TAL2')
cCLP<-c("FOXO1",'PAX5','IKZF1','STAT3',"SMAD1","FOXP1",'PRDM1','RUNX1')  
cMONO<-c("SPI1","HSF1","CEBPE","STAT6",'CEBPA','CEBPB','IRF8','IRF4') 

In [None]:
for (i in rownames(dataf_serve_use)){
    if (i%in%cMEP){
        dataf_serve_use[i,"group"]="MEP"
    }else if (i%in%cCLP){
        dataf_serve_use[i,"group"]="CLP"
    }else{
        dataf_serve_use[i,"group"]="mono"
    }
}

In [None]:
dataf_serve_use$cul<-factor(dataf_serve_use$group,
                   levels=c("MEP","mono","CLP"),
                   labels=c("Erythroid","Myeloid","Lymphoid"))

In [None]:
options(repr.plot.width = 30, repr.plot.height = 30,repr.plot.res = 70)
gra3=ggtern(data = dataf_serve_use,aes(x = CLP, y = mono, z =MEP )) + 
  theme_bw() +
  geom_point(aes(color = cul),alpha = 25,size=6) +
  tern_limits(T=1.1, L=1.1, R=1.1)+
  geom_text(aes(label=label,color=cul),hjust=1,vjust=1,size=15,check_overlap = F,show.legend = FALSE,family="myfont")+  #
  theme_nomask()+
#  theme_hidegrid()+
  theme_hidelabels()+
  scale_color_manual(
    values = c("Erythroid"="#FAA300","Myeloid"="#EE85F6","Lymphoid"="#95A0FF"))+
  labs(x = "Lymphoid", y = "Myeloid", z = "Erythroid")+
  theme(text = element_text(size=40,family="myfont"))+
  labs(color="group")+
  guides(colour = guide_legend(override.aes = list(size=10)))+
  theme(legend.position=c(0.8,0.7))+
  theme(plot.margin=unit(c(2,2,2,2),"cm"))
gra3