#  Intersect TFs

In [41]:
library(dplyr)
library(reshape2)
library(plyr)
library(RColorBrewer)
library(ggplot2)
library(pheatmap)
library(cowplot)

library(UpSetR)

In [42]:
TFs = read.csv('data/scenic/huTF_census.txt', stringsAsFactors = F, header = F)$V1

# ORGANOIDs RNAseq

## TF activities

In [43]:
TF_activities = read.csv('figures_organoid_confInhibitors/Donor44/cluster_markers_invivo/TFs_activities.csv', stringsAsFactors = F)
TF_activities = TF_activities[ grep('scanpy', TF_activities$cl_name, invert = T)  ,]
TF_activities = subset(TF_activities, FDR < 0.05 & NES > 0)
TF_activities$TF = gsub('_AA', '', TF_activities$Regulon) %>%  gsub('_[A-E]', '', .) 

TFs_organoid_activities = lapply(unique(TF_activities$cl_name), function(cl)  subset(TF_activities, cl_name == cl)$TF )
names(TFs_organoid_activities) = unique(TF_activities$cl_name)
TFs_organoid_activities

## DEGs

In [44]:
DE_new_secretory = read.csv('figures_organoid_confInhibitors/Donor44/cluster_markers_invivo/new_secretory_DEGs.csv', stringsAsFactors = F)
DE_Inter_PGRpos = read.csv('figures_organoid_confInhibitors/Donor44/cluster_markers_invivo/inter_PGRpos_DEGs.csv', stringsAsFactors = F)
DE_Ciliated = read.csv('figures_organoid_confInhibitors/Donor44/cluster_markers_invivo/ciliated_DEGs.csv', stringsAsFactors = F)

DE = list(Inter_PGRpos=DE_Inter_PGRpos,
          new_secretory=DE_new_secretory,
         Ciliated=DE_Ciliated)

TFs_organoid_DEGs = lapply(DE, function(x){
    x = subset(x, Gene %in% TFs & logFC > 0 & adj.P.Val < 0.05)$Gene
    return(x)
})

# ORGANOIDs ATAC

## TFmotifs

In [45]:
Dact_new_secretory = read.csv('data/scATAC/seurat.output-S2/clusters/newsecretory_vs_other_differential.activity_LR.csv', stringsAsFactors = F)
Dact_Inter_PGRpos = read.csv('data/scATAC/seurat.output-S2/clusters/PGR_vs_other_differential.activity_LR.csv', stringsAsFactors = F)
Dact_Ciliated = read.csv('data/scATAC/seurat.output-S2/clusters/ciliated_vs_other_differential.activity_LR.csv', stringsAsFactors = F)

Dact = list(Inter_PGRpos=Dact_Inter_PGRpos,
          new_secretory=Dact_new_secretory,
         Ciliated=Dact_Ciliated)

TFs_organoid_TFmotifs = lapply(Dact, function(x){
    x = subset(x, TF %in% TFs & avg_logFC > 0 & p_val_adj < 0.05)$TF
    return(x)
})

## DARs

In [46]:
Dact_new_secretory = read.csv('data/scATAC/seurat.output-S2/clusters/DARs_invivo/new_secretory_closest_genes.csv', stringsAsFactors = F)
Dact_Inter_PGRpos = read.csv('data/scATAC/seurat.output-S2/clusters/DARs_invivo/inter_PGRpos_closest_genes.csv', stringsAsFactors = F)
Dact_Ciliated = read.csv('data/scATAC/seurat.output-S2/clusters/DARs_invivo/ciliated_closest_genes.csv', stringsAsFactors = F)

Dact = list(inter_PGRpos=Dact_Inter_PGRpos,
          new_secretory=Dact_new_secretory,
         ciliated=Dact_Ciliated)

TFs_organoid_DARs = lapply(Dact, function(x){
    x = subset(x, gene_name %in% TFs & avg_logFC > 0 & p_val_adj < 0.05)$gene_name %>% unique(.) %>% sort(.)
    return(x)
})
TFs_organoid_DARs

In [47]:
# TF_activities = read.csv('data/scATAC/seurat.output-S2/clusters/DARs_invivo/TFs_activities_GSEA.csv', stringsAsFactors = F)
# # TF_activities = TF_activities[ grep('pos', TF_activities$TF) , ]
# TF_activities = subset(TF_activities, pvalue < 0.05 & odds.ratio > 2)
# TF_activities$TF = gsub('_AA_[a-z][a-z][a-z]', '', TF_activities$TF) %>%  gsub('_[A-E]_[a-z][a-z][a-z]', '', .) 
# TF_activities$TF = gsub('_AA', '', TF_activities$TF) %>%  gsub('_[A-E]', '', .) 

# TFs_organoid_DARs = lapply(unique(TF_activities$cl), function(cl_name)  subset(TF_activities, cl == cl_name)$TF )
# names(TFs_organoid_DARs) = unique(TF_activities$cl)
# TFs_organoid_DARs = lapply(TFs_organoid_DARs, unique)
# TFs_organoid_DARs = lapply(TFs_organoid_DARs, sort)
# TFs_organoid_DARs

# PLOTs

In [48]:
df2upset = function(cl_list, cl_name=NULL){
    cl_list = lapply(cl_list, intersect, TFs)
    cl_list = lapply(cl_list, unique)

    df = unique(melt(cl_list))
    names(df)[1] = 'TF'
    df$value = 1
    df = acast(df, TF~L1, fill = 0)
    df = data.frame(Identifier=rownames(df), df)
    fdf = df
    fdf$N = rowSums(df[,-1])
    write.csv(fdf[ order(fdf$N, decreasing = T) , ] , file = paste0('figures_organoid_confInhibitors/Donor44/TFintersect_',cl_name,'.csv'))
    return(df)
    }

In [49]:
cl_list = list(RNA_DEG = TFs_organoid_DEGs$Inter_PGRpos, RNA_TFact = TFs_organoid_activities$cl_inter_PGRpos_DEGs,
             ATAC_TFmotif = TFs_organoid_TFmotifs$Inter_PGRpos, ATAC_DARs = TFs_organoid_DARs$inter_PGRpos )
df = df2upset(cl_list, cl_name = 'PGRpos')
pdf(paste0('figures_organoid_confInhibitors/Donor44/TFintersect_PGRpos.pdf'), onefile=FALSE)
upset(df, sets.bar.color = "#56B4E9", order.by = "freq", empty.intersections = "on", text.scale = 2)
dev.off()
df[ rownames(df) %in% c('GLI1', 'FOXJ1', 'RFX2', 'MYC', 'TCF7', 'TCF7L2') , ]
df[ rownames(df) %in% c('CSRNP1', 'FOXO3', 'FOXO1') , ]
df[ rownames(df) %in% c('HES1', 'HEY1', 'HIF1A', 'RBPJ') , ]
df[ rownames(df) %in% c('HES6') , ]
df[ rowSums(df[,-1]) == 4 , ]

Unnamed: 0_level_0,Identifier,ATAC_DARs,ATAC_TFmotif,RNA_DEG,RNA_TFact
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>
MYC,MYC,0,1,1,1
TCF7,TCF7,1,1,0,1
TCF7L2,TCF7L2,0,1,0,0


Identifier,ATAC_DARs,ATAC_TFmotif,RNA_DEG,RNA_TFact
<chr>,<dbl>,<dbl>,<dbl>,<dbl>


Unnamed: 0_level_0,Identifier,ATAC_DARs,ATAC_TFmotif,RNA_DEG,RNA_TFact
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>
HES1,HES1,1,1,0,0
HEY1,HEY1,0,1,1,0
HIF1A,HIF1A,0,1,0,0
RBPJ,RBPJ,0,1,1,0


Identifier,ATAC_DARs,ATAC_TFmotif,RNA_DEG,RNA_TFact
<chr>,<dbl>,<dbl>,<dbl>,<dbl>


Identifier,ATAC_DARs,ATAC_TFmotif,RNA_DEG,RNA_TFact
<chr>,<dbl>,<dbl>,<dbl>,<dbl>


In [50]:
cl_list = list(RNA_DEG = TFs_organoid_DEGs$Ciliated, RNA_TFact = TFs_organoid_activities$cl_ciliated_DEGs,
             ATAC_TFmotif = TFs_organoid_TFmotifs$Ciliated, ATAC_DARs = TFs_organoid_DARs$ciliated )
df = df2upset(cl_list, cl_name = 'ciliated')
pdf(paste0('figures_organoid_confInhibitors/Donor44/TFintersect_ciliated.pdf'), onefile=FALSE)
upset(df, sets.bar.color = "#56B4E9", order.by = "freq", empty.intersections = "on", text.scale = 2)
dev.off()
df[ rownames(df) %in% c('GLI1', 'FOXJ1', 'RFX2', 'MYC', 'TCF7', 'TCF7L2') , ]
df[ rownames(df) %in% c('CSRNP1', 'FOXO3', 'FOXO1', 'CEBPA',  'CEBPB',  'CEBPD',  'CEBPG') , ]
df[ rownames(df) %in% c('HES1', 'HEY1', 'HIF1A', 'RBPJ') , ]
df[ rownames(df) %in% c('HES6') , ]
df[ rowSums(df[,-1]) == 4 , ]

Unnamed: 0_level_0,Identifier,ATAC_DARs,ATAC_TFmotif,RNA_DEG,RNA_TFact
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>
FOXJ1,FOXJ1,1,1,1,1
GLI1,GLI1,0,1,0,0
MYC,MYC,0,1,0,0
RFX2,RFX2,0,1,1,1
TCF7,TCF7,0,1,1,0


Unnamed: 0_level_0,Identifier,ATAC_DARs,ATAC_TFmotif,RNA_DEG,RNA_TFact
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>
CEBPB,CEBPB,0,0,1,0
CEBPG,CEBPG,0,0,1,0
FOXO1,FOXO1,0,1,0,0
FOXO3,FOXO3,1,1,0,0


Unnamed: 0_level_0,Identifier,ATAC_DARs,ATAC_TFmotif,RNA_DEG,RNA_TFact
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>
HES1,HES1,0,1,0,0
HEY1,HEY1,0,1,0,0
HIF1A,HIF1A,0,1,0,0
RBPJ,RBPJ,1,0,0,0


Unnamed: 0_level_0,Identifier,ATAC_DARs,ATAC_TFmotif,RNA_DEG,RNA_TFact
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>
HES6,HES6,0,0,1,0


Unnamed: 0_level_0,Identifier,ATAC_DARs,ATAC_TFmotif,RNA_DEG,RNA_TFact
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>
FOXJ1,FOXJ1,1,1,1,1


In [51]:
cl_list = list(RNA_DEG = TFs_organoid_DEGs$new_secretory, RNA_TFact = TFs_organoid_activities$cl_new_secretory_DEGs,
             ATAC_TFmotif = TFs_organoid_TFmotifs$new_secretory, ATAC_DARs = TFs_organoid_DARs$new_secretory )
df = df2upset(cl_list, cl_name = 'new_secretory')
pdf(paste0('figures_organoid_confInhibitors/Donor44/TFintersect_new_secretory.pdf'), onefile=FALSE)
upset(df, sets.bar.color = "#56B4E9", order.by = "freq", empty.intersections = "on", text.scale = 2)
dev.off()
df[ rownames(df) %in% c('GLI1', 'FOXJ1', 'RFX2', 'MYC', 'TCF7', 'TCF7L2') , ]
df[ rownames(df) %in% c('CSRNP1', 'FOXO3', 'FOXO1', 'CEBPA',  'CEBPB',  'CEBPD',  'CEBPG') , ]
df[ rownames(df) %in% c('HES1', 'HEY1', 'HIF1A', 'RBPJ') , ]
df[ rownames(df) %in% c('HES6') , ]
df[ rowSums(df[,-1]) == 4 , ]

Unnamed: 0_level_0,Identifier,ATAC_DARs,ATAC_TFmotif,RNA_DEG,RNA_TFact
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>
MYC,MYC,0,0,1,1
TCF7L2,TCF7L2,1,0,1,1


Unnamed: 0_level_0,Identifier,ATAC_DARs,ATAC_TFmotif,RNA_DEG,RNA_TFact
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>
CEBPA,CEBPA,0,1,1,1
CEBPB,CEBPB,1,1,1,1
CEBPD,CEBPD,0,1,1,1
CEBPG,CEBPG,0,1,0,1
CSRNP1,CSRNP1,0,0,1,1
FOXO1,FOXO1,0,0,1,0


Unnamed: 0_level_0,Identifier,ATAC_DARs,ATAC_TFmotif,RNA_DEG,RNA_TFact
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>
HES1,HES1,0,0,1,0
HEY1,HEY1,0,0,0,1
HIF1A,HIF1A,0,0,1,1


Identifier,ATAC_DARs,ATAC_TFmotif,RNA_DEG,RNA_TFact
<chr>,<dbl>,<dbl>,<dbl>,<dbl>


Unnamed: 0_level_0,Identifier,ATAC_DARs,ATAC_TFmotif,RNA_DEG,RNA_TFact
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>
CEBPB,CEBPB,1,1,1,1
HNF1B,HNF1B,1,1,1,1


# IN VIVO RNAseq

## TF activities

In [52]:
TF_activities = read.csv('data/cellphoneDB/clusterDEGs_and_TFs/DEGs_withinEpithelial/TFs_activities.csv', stringsAsFactors = F)
TF_activities = TF_activities[ grep('scanpy', TF_activities$cl_name, invert = T)  ,]
TF_activities = subset(TF_activities, FDR < 0.05 & NES > 0)
TF_activities$TF = gsub('_AA', '', TF_activities$Regulon) %>%  gsub('_[A-E]', '', .) 

TFs_invivo_activities = lapply(unique(TF_activities$cl_name), function(cl)  subset(TF_activities, cl_name == cl)$TF )
names(TFs_invivo_activities) = unique(TF_activities$cl_name)
TFs_invivo_activities

## DEGs

In [53]:
DE_glandular = read.csv('data/cellphoneDB/clusterDEGs_and_TFs/DEGs_withinEpithelial/epithelial_glandular_DEGs.csv', stringsAsFactors = F)
DE_Inter_PGRpos = read.csv('data/cellphoneDB/clusterDEGs_and_TFs/DEGs_withinEpithelial/epithelial_proliferative_DEGs.csv', stringsAsFactors = F)
DE_Ciliated = read.csv('data/cellphoneDB/clusterDEGs_and_TFs/DEGs_withinEpithelial/epithelial_ciliated_DEGs.csv', stringsAsFactors = F)

DE = list(Inter_PGRpos=DE_Inter_PGRpos,
          glandular=DE_glandular,
         Ciliated=DE_Ciliated)

TFs_invivo_DEGs = lapply(DE, function(x){
    x = subset(x, Gene %in% TFs & logFC > 0 & adj.P.Val < 0.05)$Gene
    return(x)
})
TFs_invivo_DEGs

# IN VIVO Spatial

## TF activities

In [54]:
TF_activities = read.csv('data/spatial/DEGs/TFs_activities.csv', stringsAsFactors = F)
TF_activities = TF_activities[ grep('scanpy', TF_activities$cl_name, invert = T)  ,]
TF_activities = subset(TF_activities, FDR < 0.05 & NES > 0)
TF_activities$TF = gsub('_AA', '', TF_activities$Regulon) %>%  gsub('_[A-E]', '', .) 

TFs_invivoSpatial_activities = lapply(unique(TF_activities$cl_name), function(cl)  subset(TF_activities, cl_name == cl)$TF )
names(TFs_invivoSpatial_activities) = unique(TF_activities$cl_name)
TFs_invivoSpatial_activities

## DEGs

In [55]:
DE_glandular = read.csv('data/spatial/DEGs/epi_glandular_vs_epi_rest_limma_DEGs_20200423.csv', stringsAsFactors = F)
DE_Inter_PGRpos = read.csv('data/spatial/DEGs/epi_basal_vs_epi_rest_limma_DEGs_20200423.csv', stringsAsFactors = F)
DE_Ciliated = read.csv('data/spatial/DEGs/epi_luminal_vs_epi_rest_limma_DEGs_20200423.csv', stringsAsFactors = F)

DE = list(Inter_PGRpos=DE_Inter_PGRpos,
          glandular=DE_glandular,
         Ciliated=DE_Ciliated)

TFs_invivoSpatial_DEGs = lapply(DE, function(x){
    x = subset(x, Gene %in% TFs & logFC > 0 & adj.P.Val < 0.05)$Gene
    return(x)
})
TFs_invivoSpatial_DEGs

# Plots intersect

In [64]:
df2upset = function(cl_list, cl_name=NULL){
    cl_list = lapply(cl_list, intersect, TFs)
    cl_list = lapply(cl_list, unique)

    df = unique(melt(cl_list))
    names(df)[1] = 'TF'
    df$value = 1
    df = acast(df, TF~L1, fill = 0)
    df = data.frame(Identifier=rownames(df), df)
    fdf = df
    fdf$N = rowSums(df[, -1 ])
    fdf$N_organoid = rowSums(df[,c(2:5) ])
    fdf$N_invivo = rowSums(df[,c(6:7) ])
    if( ncol(df) > 7 ){
        fdf$N_invivoSpatial = rowSums(df[,c(8:9) ])
        }
    fdf$is_relevant = fdf$N_organoid > 1 & fdf$N_invivo > 0
    write.csv(fdf[ order(fdf$N, decreasing = T) , ] , file = paste0('figures_organoid_confInhibitors/Donor44/TFintersect_',cl_name,'.csv'))
    return(df)
    }

In [57]:
DATA = list()


In [58]:
cl_list = list(RNA_DEG = TFs_organoid_DEGs$Inter_PGRpos, RNA_TFact = TFs_organoid_activities$cl_inter_PGRpos_DEGs,
              ATAC_TFmotif = TFs_organoid_TFmotifs$Inter_PGRpos, ATAC_DARs = TFs_organoid_DARs$inter_PGRpos,
              RNAinvivo_DEG = TFs_invivo_DEGs$Inter_PGRpos, RNAinvivo_TFact = TFs_invivo_activities$cl_epithelial_proliferative_DEGs)
df = df2upset(cl_list, cl_name = 'invivo_PGRpos')
pdf(paste0('figures_organoid_confInhibitors/Donor44/invivo_TFintersect_PGRpos.pdf'), onefile=FALSE)
upset(df, sets.bar.color = "#56B4E9", order.by = "freq", empty.intersections = "on", text.scale = 1.5, nsets = 6)
dev.off()

DATA$PGRpos = cl_list

In [59]:
cl_list = list(RNA_DEG = TFs_organoid_DEGs$Ciliated, RNA_TFact = TFs_organoid_activities$cl_ciliated_DEGs,
             ATAC_TFmotif = TFs_organoid_TFmotifs$Ciliated, ATAC_DARs = TFs_organoid_DARs$ciliated ,
              RNAinvivo_DEG = TFs_invivo_DEGs$Ciliated, RNAinvivo_TFact = TFs_invivo_activities$cl_epithelial_ciliated_DEGs)
df = df2upset(cl_list, cl_name = 'invivo_ciliated')
pdf(paste0('figures_organoid_confInhibitors/Donor44/invivo_TFintersect_ciliated.pdf'), onefile=FALSE)
upset(df, sets.bar.color = "#56B4E9", order.by = "freq", empty.intersections = "on", text.scale = 1.5, nsets = 6)
dev.off()

DATA$ciliated = cl_list

In [60]:
cl_list = list(RNA_DEG = TFs_organoid_DEGs$new_secretory, RNA_TFact = TFs_organoid_activities$cl_new_secretory_DEGs,
              ATAC_TFmotif = TFs_organoid_TFmotifs$new_secretory, ATAC_DARs = TFs_organoid_DARs$new_secretory  ,
              RNAinvivo_DEG = TFs_invivo_DEGs$glandular, RNAinvivo_TFact = TFs_invivo_activities$cl_epithelial_glandular_DEGs)
df = df2upset(cl_list, cl_name = 'invivo_new_secretory')
pdf(paste0('figures_organoid_confInhibitors/Donor44/invivo_TFintersect_new_secretory.pdf'), onefile=FALSE)
upset(df, sets.bar.color = "#56B4E9", order.by = "freq", empty.intersections = "on", text.scale = 1.5, nsets = 6)
dev.off()

DATA$new_secretory = cl_list

In [69]:
cl_list = list(RNA_DEG = TFs_organoid_DEGs$Ciliated, RNA_TFact = TFs_organoid_activities$cl_ciliated_DEGs,
             ATAC_TFmotif = TFs_organoid_TFmotifs$Ciliated, ATAC_DARs = TFs_organoid_DARs$ciliated ,
              RNAinvivo_DEG = TFs_invivo_DEGs$Ciliated, RNAinvivo_TFact = TFs_invivo_activities$cl_epithelial_ciliated_DEGs,
              RNAinvivoS_DEG = TFs_invivoSpatial_DEGs$Ciliated, RNAinvivoS_TFact = TFs_invivoSpatial_activities$cl_epi_luminal_vs_epi_rest_limma_DEGs_20200423)
df = df2upset(cl_list, cl_name = 'invivoSpatial_ciliated')
pdf(paste0('figures_organoid_confInhibitors/Donor44/invivo_TFintersectSpatial_ciliated.pdf'), onefile=FALSE)
upset(df, sets.bar.color = "#56B4E9", order.by = "freq", empty.intersections = "on", text.scale = 1.2, nsets = length(cl_list))
dev.off()


In [70]:
cl_list = list(RNA_DEG = TFs_organoid_DEGs$new_secretory, RNA_TFact = TFs_organoid_activities$cl_new_secretory_DEGs,
              ATAC_TFmotif = TFs_organoid_TFmotifs$new_secretory, ATAC_DARs = TFs_organoid_DARs$new_secretory  ,
              RNAinvivo_DEG = TFs_invivo_DEGs$glandular, RNAinvivo_TFact = TFs_invivo_activities$cl_epithelial_glandular_DEGs,
              RNAinvivoS_DEG = TFs_invivoSpatial_DEGs$glandular, RNAinvivoS_TFact = TFs_invivoSpatial_activities$cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423)
df = df2upset(cl_list, cl_name = 'invivoSpatial_new_secretory')
pdf(paste0('figures_organoid_confInhibitors/Donor44/invivo_TFintersectSpatial_new_secretory.pdf'), onefile=FALSE)
upset(df, sets.bar.color = "#56B4E9", order.by = "freq", empty.intersections = "on", text.scale = 1.2, nsets = length(cl_list))
dev.off()
