# TF analysis

## count NMI

In [None]:
library(Seurat)
library(NMI)
library(RColorBrewer)
library(ggplot2)
library(dplyr)
library(ComplexHeatmap)
library(RColorBrewer)
library(patchwork)
library(data.tree) 
library(gridExtra) 
library(rlist) 
library(phangorn) 
library(scales)
library(dendextend)
library(tidytree)
library(ggtree)
library(ape)
library(phylogram)
library(clusterProfiler)
library(org.Hs.eg.db)
library(igraph)
library(ggraph)
library(tidygraph)

In [None]:
# enri<-read.table("/fs/home/tangke/human_scATAC/hg38_data/SCRIPT2/lung.10_10/enrichment/SCRIPT_enrichment.txt",header=T)  #25,0.5,30
enri_b<-t(enri)
head(enri_b)
seurat <- CreateSeuratObject(counts = enri_b, project = "lung")
seurat@assays$RNA@scale.data<-as.matrix(seurat@assays$RNA@counts)
seurat <- FindVariableFeatures(seurat, selection.method = "vst", nfeatures = 2000)
seurat <- RunPCA(seurat, features = VariableFeatures(object = seurat))
ElbowPlot(seurat)

seurat <- FindNeighbors(seurat, dims = 1:25)
seurat <- FindClusters(seurat, resolution = 0.7)
seurat <- RunUMAP(seurat, dims = 1:50)
DimPlot(seurat, reduction = "umap")

seurat_ident<-as.data.frame(seurat@active.ident)
seurat_ident_2<-seurat_ident
seurat_ident_2[,1]<-rownames(seurat_ident)
seurat_ident_2[,2]<-seurat_ident[,1]

input_file<-readRDS("/fs/home/tangke/human_scATAC/hg38_data/GSM4508936_lung_filtered.seurat.rds")
b<-as.data.frame(input_file@active.ident)
b$cell_type<-input_file@meta.data$cell_type
c<-merge(seurat_ident_2,b,by="row.names",sort=F)

d<-c$cell_type
names(d)<-c$Row.names

seurat@active.ident<-d
DimPlot(seurat, reduction = "umap")

In [None]:
type<-as.data.frame(seurat@active.ident)
b<-type
b[,1]<-rownames(type)
b[,2]<-type[,1]
head(b)

In [None]:
mutualinformation=NMI(seurat_ident_2,b)
mutualinformation

## subcluster

In [None]:
clusters<-seurat@active.ident
clusters <- as.data.frame(clusters)
clusters$cell <- rownames(clusters)

In [None]:
a=c("Ciliated epithelial cells","Lymphatic endothelial cells","Lymphoid cells","Megakaryocytes","Myeloid cells",
    "Neuroendocrine cells")
b=c("Stromal cells")
c=c("Bronchiolar and alveolar epithelial cells")
d=c("Vascular endothelial cells")

In [None]:
cluster_list <- list()
for (i in a){
  clusters_cell <- clusters[which(clusters$clusters==i),"cell"]
  cluster_list[[i]] <- sample(clusters_cell,ceiling(length(clusters_cell)/1))
}
for (i in b){
  clusters_cell <- clusters[which(clusters$clusters==i),"cell"]
  cluster_list[[i]] <- sample(clusters_cell,ceiling(length(clusters_cell)/20))
}
for (i in c){
  clusters_cell <- clusters[which(clusters$clusters==i),"cell"]
  cluster_list[[i]] <- sample(clusters_cell,ceiling(length(clusters_cell)/10))
}
for (i in d){
  clusters_cell <- clusters[which(clusters$clusters==i),"cell"]
  cluster_list[[i]] <- sample(clusters_cell,ceiling(length(clusters_cell)/2.5))
}

In [None]:
selected_cells <- unlist(cluster_list)
length(selected_cells)

In [None]:
lung<-seurat

In [None]:
table(lung@active.ident)

In [None]:
sub_lung<-seurat[,selected_cells]

In [None]:
lung_use<-sub_lung

In [None]:
table(lung_use@active.ident)

In [None]:
saveRDS(lung_use,'/fs/home/tangke/human_scATAC/hg38_data/SCRIPT3/seurat_tf/lung_sub_SCRIPT_seurat.rds')

In [None]:
rm(list=ls())

## complexheatmap analysis

In [None]:
lung_use<-readRDS("/fs/home/tangke/human_scATAC/hg38_data/SCRIPT3/seurat_tf/lung_sub_SCRIPT_seurat.rds")

In [None]:
lung.markers <- FindAllMarkers(lung_use, only.pos = TRUE, min.pct = 0.1, logfc.threshold = 0.1)

In [None]:
lung.markers_use<-lung.markers %>%
    group_by(cluster) %>%
    slice_max(n = 30, order_by = avg_log2FC)

In [None]:
gene<-unique(lung.markers_use$gene)

In [None]:
mat <- GetAssayData(lung_use, slot = "counts")
mat_use<-as.matrix(mat[gene,])

In [None]:
dim(mat_use)

In [None]:
mycol=c("#FF6D6F","#00AEEC","#8cb369","#b8b8ff","#FEFBDD",
           "#FFA6CD","#cddafd","#4ecdc4","#FDB12C","#ffa69e")
show_col(mycol)

In [None]:
names<-as.data.frame(lung_use@active.ident)
colnames(names)<-"cell_type"
type=names$cell_type
ha = HeatmapAnnotation(type = type, annotation_name_side = "left",
                       col=list(type=c("Bronchiolar and alveolar epithelial cells"=mycol[2],"Stromal cells"=mycol[4],"Vascular endothelial cells"=mycol[8],
            "Lymphoid cells"=mycol[6],"Ciliated epithelial cells"=mycol[1],"Lymphatic endothelial cells"=mycol[8],
            "Megakaryocytes"=mycol[9],"Myeloid cells"=mycol[10],"Neuroendocrine cells"=mycol[3])
           ))

In [None]:
dim(lung_use)

In [None]:
for (i in 1:150){
    min=min(mat_use[i,])
    max=max(mat_use[i,])
    for (c in 1:9417){
       mat_use[i,c]=(mat_use[i,c]-min)/(max-min)
    }
}

In [None]:
names$cell_type<-as.factor(names$cell_type)
annotation_col =names

In [None]:
options(repr.plot.width = 20, repr.plot.height = 20,repr.plot.res = 70)
set.seed(2021)
p<-Heatmap(mat_use,cluster_rows=TRUE,
       cluster_columns=TRUE,
       show_column_names=FALSE,
       show_row_names=TRUE,
      top_anno=ha,
       column_km=7,
       row_km = 10,
#      row_km=6,
       show_row_dend=FALSE,
      show_column_dend=FALSE,
#        right_annotation=row_anno,
       heatmap_legend_param=list(
       title="score",
       title_position="leftcenter-rot"
       ),
       row_gap = unit(0, "mm"), 
        column_gap = unit(0, "mm"),
        border = TRUE,
        width = unit(40, "cm"), 
        height = unit(40, "cm"),
           row_names_side = "left"
       )
p

## circle plot

In [None]:
set.seed (2021)
t<-draw(p)
a=column_dend(t)
o=column_order(t)

In [None]:
for (i in 1:7){
    dend=a[[i]]
    assign(paste("dend",i,sep=''),dend)
    order=o[[i]]
    om=data.frame(matrix(NA,150,length(order)))
    for (j in 1:length(order)){
        for (c in order){
            om[,j]=mat_use[,c]
            colnames(om)[j]=colnames(mat_use)[c]
            assign(paste("om",i,sep=''),om)
        }
    }
}

In [None]:
for (i in 1:7){
    dend=a[[i]]
    assign(paste("dend",i,sep=''),dend)
    }

In [None]:
om345<-cbind(om3,om4,om5)
om12<-cbind(om1,om2)
om12345<-cbind(om1,om2,om3,om4,om5)
om67<-cbind(om6,om7)

In [None]:
dend_12=as.dendrogram(hclust(dist(t(cbind(rowMeans(om1),rowMeans(om2)
                                         )))))
dend_12 = merge_dendrogram(dend_12, list(dend1,dend2))
dend_345=as.dendrogram(hclust(dist(t(cbind(rowMeans(om3),rowMeans(om4),rowMeans(om5)
                                         )))))
dend_345 = merge_dendrogram(dend_345, list(dend3,dend4,dend5))
dend_67=as.dendrogram(hclust(dist(t(cbind(rowMeans(om6),rowMeans(om7)
                                         )))))
dend_67 = merge_dendrogram(dend_67, list(dend6,dend7))
dend_12345=as.dendrogram(hclust(dist(t(cbind(rowMeans(om12),rowMeans(om345)
                                         )))))
dend_12345<- merge_dendrogram(dend_12345, list(dend_345,dend_12))
dend_p=as.dendrogram(hclust(dist(t(cbind(rowMeans(om12345),rowMeans(om67)
                                         )))))
dend_p<- merge_dendrogram(dend_p, list(dend_12345,dend_67))

In [None]:
dend_p

In [None]:
use<-as.phylo(dend_p)
names<-as.data.frame(lung_use@active.ident)
colnames(names)<-"cell_type"
type=names$cell_type
groupInfo <- split(colnames(mat_use), type)
use <- groupOTU(use, groupInfo)

In [None]:
transformtree<-function(tree,radialparameter,repeatparameter,tiplength){ 
    # radialparameter # # change this to collapse less(0.5) or more (3) and modify repeatparameter together 
    # repeatparameter # # i.e. increase if there are very small branches (levels) 
    #number of hierarchical levels in tree 
    dfr0<-as.data.frame(tree$edge) 
    tree2<-FromDataFrameNetwork(dfr0)# data.tree package 
    levels<-ToDataFrameTable(tree2, "level") 
    edgelevels<-max(levels)-1 
    # establish the hierarchy of nodes looking for the children of the children nodes 
    centralnode<-getMRCA(tree,1:length(tree$tip.label)) 
    childrenlist<-list() 
    childrenlist[1]<-list(phangorn::Children(tree, centralnode)) 
    for (i in 2:edgelevels){ 
    childrenlist[i]<- list(unlist(lapply(unlist(childrenlist[i-1]), function(x) phangorn::Children(tree, x)))) 
    } 
    # remove nodes of tips, we do not want to modify their length 
    childrentipsremoved<-lapply(childrenlist, function(x) x[!is.element(x,1:length(tree$tip.label))]) 
    # list of inner nodes 
    groupedinnernodes<-rlist::list.clean(childrentipsremoved, fun = function(x) length(x) == 0L) 
    #this is the vector that will multiply the inner edges 
    transfvector<- rep(((c(1:(length(groupedinnernodes)/repeatparameter))^(-radialparameter))*5), 
        each=repeatparameter) 
    # check length of groups of inner nodes and the transformation vector 
    lengths<-unlist(lapply(groupedinnernodes, function(x) length(x))) 
    if(length(lengths)-length(transfvector)>0) { 
    for (i in 1:abs(length(lengths)-length(transfvector)) ){ 
     transfvector <- c(transfvector,transfvector[length(transfvector)]) 
    } } 
    if(length(lengths)-length(transfvector)<0) { 
    for (i in 1:abs(length(lengths)-length(transfvector)) ){ 
     transfvector <- transfvector[-1] }} 
    # create the factor to transform the inner edges 
    vector1<-unlist(mapply(rep, transfvector,lengths)) 
    # discard length info, replace all edge length information by 1 
    size<-length(tree$edge.length) 
    tree$edge.length<-rep(1,size) 
    # replace edge length for the connecting inner nodes only 
    innernodes<-unlist(groupedinnernodes) 
    tree$edge.length[unlist(lapply(innernodes,function(x,y) which(y==x),y=tree$edge[,2]))]<- 
    tree$edge.length[unlist(lapply(innernodes,function(x,y) which(y==x),y=tree$edge[,2]))]* 
    vector1 
    # modify length of tip edges # optional decrease for big trees 
    tree$edge.length[tree$edge.length==1]<-tiplength 
    return(tree) 
} 

In [None]:
tree<-transformtree(use,2.5,2,0.2) 

In [None]:
options(repr.plot.width = 20, repr.plot.height = 20,repr.plot.res = 40)
g=ggtree(tree, color="black", layout="circular",branch.length="branch.length", size=1, open.angle=30)+
    geom_tiplab(aes(label=NA,col=group), align=T,linesize = 0.08,linetype = 1,offset = 3)+
    theme(legend.title=element_text(face="bold", size=15), legend.position="right", legend.text=element_text(size=13)) +
    scale_color_manual(
    values = c("Bronchiolar and alveolar epithelial cells"=mycol[2],"Stromal cells"=mycol[4],"Vascular endothelial cells"=mycol[8],
            "Lymphoid cells"=mycol[6],"Ciliated epithelial cells"=mycol[1],"Lymphatic endothelial cells"=mycol[8],
            "Megakaryocytes"=mycol[9],"Myeloid cells"=mycol[10],"Neuroendocrine cells"=mycol[3]))
g

# Target analysis

## GO analysis

In [None]:
rm(list=ls())

In [None]:
lung_script<-readRDS("/fs/home/tangke/human_scATAC/hg38_data/SCRIPT3/seurat_tf/lung_sub_SCRIPT_seurat.rds")
table(lung_script@active.ident)

In [None]:
Lymphoid_cells<-subset(lung_script,idents=c('Lymphoid cells'),invert=FALSE)  #GATA3 is expressed in Lymphoid

In [None]:
GATA3<-Read10X("/fs/home/tangke/human_scATAC/hg38_data/SCRIPT2/lung_impute/imputation/GATA3_10x",gene.column =1)

In [None]:
use_GATA3<-GATA3[,colnames(GATA3)%in%rownames(Lymphoid_cells@meta.data)]  #choose lymphoid cell in GATA3 matrix
use_qc_GATA3<-use_GATA3[,colnames(use_GATA3)%in%rownames(as.data.frame(sort(colSums(use_GATA3),decreasing=TRUE)[1:500]))]  #cell qc
Lymphoid_use_GATA3<-as.data.frame(sort(rowMeans(use_qc_GATA3),decreasing = TRUE)[1:1000]) #gene qc

In [None]:
eg_gata3_ly <- bitr(rownames(Lymphoid_use_GATA3), fromType="SYMBOL", toType=c("ENTREZID"), OrgDb="org.Hs.eg.db")
head(eg_gata3_ly)

In [None]:
go_gata3_ly<-enrichGO(eg_gata3_ly$ENTREZID, OrgDb = org.Hs.eg.db, ont='BP',
               pAdjustMethod = 'BH',
               qvalueCutoff = 0.05,
               keyType = 'ENTREZID')

In [None]:
go_gata3_ly_use<-go_gata3_ly[c(10,14,15,27,31,32,33,41,42,44),]

In [None]:
go_gata3_ly_use

In [None]:
go_gata3_ly_use<-go_gata3_ly_use[order(go_gata3_ly_use[,9],decreasing = TRUE),]

In [None]:
go_gata3_ly_use$Description[2]='regulation of myeloid cell\ndifferentiation'
go_gata3_ly_use$Description[4]='regulation of megakaryocyte\ndifferentiation'
go_gata3_ly_use$Description[5]='negative regulation of\nmyeloid cell differentiation'
go_gata3_ly_use$Description[7]='cellular response to\ninterleukin-7'
go_gata3_ly_use$Description[9]='negative regulation of\nmegakaryocyte differentiation'
go_gata3_ly_use$Description[10]='interleukin-7-mediated\nsignaling pathway'

In [None]:
options(repr.plot.width = 7, repr.plot.height = 10,repr.plot.res = 70)
par(mar=c(3,3,0.5,0.5),mgp=c(1.5,0.5,0),bg='black')

In [None]:
gata3_ly=ggplot(go_gata3_ly_use,aes(Description,Count))+geom_point(aes(color= pvalue,size=Count))+
labs(x = "GO Terms",y = "Gene Numbers",title = "GATA3 targets in LC") + 
        coord_flip()+theme_bw() +
        theme(panel.grid = element_blank())+
        theme(plot.title = element_text(hjust = 0.8, size = 15), 
              axis.text.y = element_text(size = 15,color='black'),
              axis.text.x = element_text(size = 15,color='black'), 
              axis.title.y = element_text(size = 15),
              axis.title.x = element_text(size = 15),
              legend.title = element_text(size = 15),
              legend.text = element_text(size = 15),
              text = element_text(hjust = 0.5))+scale_color_continuous(low='#FF6D6F',high='#4ecdc4')+
              scale_x_discrete(limits = rev(go_gata3_ly_use$Description))+
theme(text = element_text(size=40,family="myfont"))+
scale_y_continuous(breaks=seq(0,25,10))+theme(legend.position=c(0.8,0.4))
gata3_ly

## target analysis

In [None]:
gata3_target_igra<-c('KAT6B','HSPD1','CREB1','TLE4','IRF9','SP3','FBXW7','NFKB2','SEH1L','H4C2','TARBP2',
                     'SETD3','KAT7','HMGB2','KMT2E','RMRP','RPS9','CIC','USP5','OARD1')

In [None]:
Lymphoid_use_GATA3$gene<-rownames(Lymphoid_use_GATA3)

In [None]:
weight_GATA3<-Lymphoid_use_GATA3[(Lymphoid_use_GATA3$gene)%in%gata3_target_igra,]

In [None]:
colnames(weight_GATA3)<-c('weight','gene')

In [None]:
hb_graph_gata3<-data.frame(matrix(NA,20,2))
hb_graph_gata3[,1]<-c(rep('GATA3',20))
hb_graph_gata3[1:20,2]<-weight_GATA3$gene
hb_graph_gata3[1:20,3]<-weight_GATA3$weight
colnames(hb_graph_gata3)<-c('TF',"target",'weight')
name_gata3<-data.frame(c(hb_graph_gata3$TF,hb_graph_gata3$target))

In [None]:
nodes_gata3<-name_gata3%>%
    distinct()%>%
mutate(group=c(rep('4',21)))
colnames(nodes_gata3)<-c("label","group")

In [None]:
nodes_gata3[(nodes_gata3$labe)%in%c('GATA3'),2]='1'
nodes_gata3[(nodes_gata3$label)%in%c('KAT6B','HSPD1','CREB1','TLE4','IRF9','SP3','FBXW7','NFKB2'),2]='2'   #reference

In [None]:
edges_gata3<-hb_graph_gata3%>%
    rename(from=TF,to=target,weight=weight)

In [None]:
net_pc_gata3<-graph_from_data_frame(
   d=edges_gata3,vertices = nodes_gata3,
   directed=TRUE)

net_pc_gata3

In [None]:
graph_pc_gata3<-as_tbl_graph(net_pc_gata3)

In [None]:
options(repr.plot.width = 10, repr.plot.height = 15,repr.plot.res = 70)
par(mar=c(3,3,0.5,0.5),mgp=c(1.5,0.5,0),bg='black')

In [None]:
g_gata3<-ggraph(graph_pc_gata3,layout="stress")+   #kk
geom_edge_link(color='lightblue',aes(edge_width=100*weight),
                arrow = arrow(length = unit(10, 'mm')), 
                end_cap = circle(20, 'mm'))+
geom_node_point(aes(color=group),size = 45,alpha=0.4)+
geom_node_text(aes(label = name),size=15) +
scale_color_manual(
    values = c("1"="#FDB12C","2"="#FFA6CD","3"="#b8b8ff",'4'='#cddafd')) +
scale_edge_width(range=c(0.5,1.5))+
theme(text = element_text(size=40,family="myfont"))+
theme_void()+
NoLegend()

g_gata3