In [3]:
library(dplyr)
library(RColorBrewer)
library(harmony)
library(ggplot2)
library(pheatmap)
library(cowplot)
require(viper)
require(reshape2)


find_which = function(TF, TFslist, output='cells_string') {
    cells = sapply(TFslist, function(x) TF %in% x) %>% which(.) %>% names(.)
    if(output=='cells_string')
        return(paste(cells, collapse = ',')  )
    if(output=='Ncells')
        return(length(cells))
                   
}

## Load DE expression info

In [4]:
## IN VITRO
path_DE = '/lustre/scratch117/cellgen/team292/lh20/Sharing/Luz/bulkorg_noinhib_DEtables.rds'
exp_invitro = readRDS(path_DE)

# # Build dictionary clusters2DEGs
all_DEGs_invitro = lapply(exp_invitro, function(x) {
    x = subset(x, WilcoxLog10Pvalue_fdr_corr < log10(0.05) & Log2FC > 0)
    x$Log2FC = round(x$Log2FC, digits = 2)
    return(x)
} )
# Build dictionary clusters2expressed genes
clusters = names(all_DEGs_invitro)
DEGs_invitro = lapply(all_DEGs_invitro, rownames)
names(DEGs_invitro) = clusters


## IN VIVO
path_DE = '/lustre/scratch117/cellgen/team292/lh20/Sharing/Luz/epithelial_DEtables.rds'
exp_invivo = readRDS(path_DE)
for (i in names(exp_invivo)){
    exp_invivo[[i]]$gene = rownames(exp_invivo[[i]])
}
# # Build dictionary clusters2DEGs
all_DEGs_invivo = lapply(exp_invivo, function(x) {
    x = subset(x, WilcoxLog10Pvalue_fdr_corr < log10(0.05) & Log2FC > 0)
    x$Log2FC = round(x$Log2FC, digits = 2)
    return(x)
} )
# Build dictionary clusters2expressed genes
clusters = names(all_DEGs_invivo)
DEGs_invivo = lapply(all_DEGs_invivo, rownames)
names(DEGs_invivo) = clusters


names(DEGs_invitro)
names(DEGs_invivo)

## Load TF activities info

In [5]:
## IN VITRO
path_TFact = 'invitro/out/TFs_activities.csv'
ALL_TFact_invitro = read.csv(path_TFact, stringsAsFactors = F) 
ALL_TFact_invitro$TF = gsub('_AA', '', ALL_TFact_invitro$Regulon)
ALL_TFact_invitro$TF = gsub('_[B-E]', '', ALL_TFact_invitro$TF)
all_TFact_invitro = subset(ALL_TFact_invitro, FDR < 0.05 & NES > 0)

TFact_invitro = lapply(unique(all_TFact_invitro$cluster),
      function(cl)
          subset(all_TFact_invitro, cluster == cl) $TF
)
names(TFact_invitro) = unique(all_TFact_invitro$cluster)

## IN VIVO
path_TFact = 'invivo/epithelial_TFs_activities.csv'
ALL_TFact_invivo = read.csv(path_TFact, stringsAsFactors = F) 
ALL_TFact_invivo$TF = gsub('_AA', '', ALL_TFact_invivo$Regulon)
ALL_TFact_invivo$TF = gsub('_[B-E]', '', ALL_TFact_invivo$TF)
all_TFact_invivo = subset(ALL_TFact_invivo, FDR < 0.05 & NES > 0)

TFact_invivo = lapply(unique(all_TFact_invivo$cluster),
      function(cl)
          subset(all_TFact_invivo, cluster == cl) $TF
)
names(TFact_invivo) = unique(all_TFact_invivo$cluster)

# Lineages

In [6]:
ciliated = c('Preciliated', 'Ciliated', 'Ciliated_LRG5')
ciliated2 = c('Preciliated', 'Ciliated')
glandular = c('Estrogen_Induced', 'Secretory', 'Secretory_Proliferative', 'Glandular', 'Glandular_secretory')
lumen = c('Lumenal_1', 'Lumenal_2')
basal = c('SOX9_prolif',  'SOX9_LGR5', 'SOX9')

# Build summary table

In [7]:
TFs = c(ALL_TFact_invivo$TF, ALL_TFact_invitro$TF)

In [8]:
all_TFs = intersect(TFs,
                    c(unlist(TFact_invivo), unlist(TFact_invitro), unlist(DEGs_invitro), unlist(DEGs_invivo))) %>%
          sort(.)

results = data.frame(TF = all_TFs, stringsAsFactors = F)

In [9]:
results_ciliated = results

results_ciliated$is_upDEG_any_invivo = results_ciliated$TF %in% unlist(DEGs_invivo[ciliated]) + 0
results_ciliated$is_upDEG_any_invitro = results_ciliated$TF %in% unlist(DEGs_invitro[ciliated]) + 0
results_ciliated$is_upTFact_any_invivo = results_ciliated$TF %in% unlist(TFact_invivo[ciliated]) + 0
results_ciliated$is_upTFact_any_invitro = results_ciliated$TF %in% unlist(TFact_invitro[ciliated]) + 0
results_ciliated$N_is_up_any_invivo = apply(results_ciliated[,c(2,4) ], 1, sum)
results_ciliated$N_is_up_any_invitro = apply(results_ciliated[,c(3,5)], 1, sum)
results_ciliated$N_is_up_any = apply(results_ciliated[,2:5], 1, sum)
results_ciliated$is_up_in_both = apply(results_ciliated[,6:7], 1, sum) > 1

results_ciliated$DEG_invivo_which = sapply(results_ciliated$TF, find_which, DEGs_invivo[ciliated])
results_ciliated$DEG_invitro_which = sapply(results_ciliated$TF, find_which, DEGs_invitro[ciliated])
results_ciliated$TFact_invivo_which = sapply(results_ciliated$TF, find_which, TFact_invivo[ciliated])
results_ciliated$TFact_invitro_which = sapply(results_ciliated$TF, find_which, TFact_invitro[ciliated])
results_ciliated$N_all_celltypes = sapply(results_ciliated$TF, find_which, DEGs_invivo[ciliated], output='Ncells') + 
                                    sapply(results_ciliated$TF, find_which, DEGs_invitro[ciliated], output='Ncells')+
                                    sapply(results_ciliated$TF, find_which, TFact_invivo[ciliated], output='Ncells')+
                                    sapply(results_ciliated$TF, find_which, TFact_invitro[ciliated], output='Ncells')


results_ciliated = results_ciliated[ order(rownames(results_ciliated), decreasing = F) , ]
# results_ciliated = results_ciliated[ order(results_ciliated$N_all_celltypes, decreasing = T) , ]
results_ciliated = results_ciliated[ order(results_ciliated$N_is_up_any, decreasing = T) , ]
results_ciliated = results_ciliated[ order(results_ciliated$is_up_in_both, decreasing = T) , ]
head(results_ciliated, 30)
write.table(results_ciliated, file = '/home/jovyan/farm/endometrium_v2/merged/TFs_agreement_ciliated.tsv', row.names = F, quote = F, sep = '\t')

Unnamed: 0_level_0,TF,is_upDEG_any_invivo,is_upDEG_any_invitro,is_upTFact_any_invivo,is_upTFact_any_invitro,N_is_up_any_invivo,N_is_up_any_invitro,N_is_up_any,is_up_in_both,DEG_invivo_which,DEG_invitro_which,TFact_invivo_which,TFact_invitro_which,N_all_celltypes
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<lgl>,<chr>,<chr>,<chr>,<chr>,<int>
260,RFX2,1,1,1,1,2,2,4,True,"Preciliated,Ciliated,Ciliated_LRG5","Preciliated,Ciliated","Preciliated,Ciliated,Ciliated_LRG5","Preciliated,Ciliated",10
288,SOX30,1,1,1,1,2,2,4,True,Ciliated,Preciliated,"Preciliated,Ciliated,Ciliated_LRG5",Preciliated,6
385,ZNF19,1,1,1,1,2,2,4,True,"Ciliated,Ciliated_LRG5","Preciliated,Ciliated","Preciliated,Ciliated,Ciliated_LRG5","Preciliated,Ciliated",9
489,ZNF713,1,1,1,1,2,2,4,True,Ciliated,Preciliated,"Preciliated,Ciliated,Ciliated_LRG5","Preciliated,Ciliated",7
522,ZSCAN1,1,1,1,1,2,2,4,True,"Preciliated,Ciliated,Ciliated_LRG5","Preciliated,Ciliated","Preciliated,Ciliated,Ciliated_LRG5","Preciliated,Ciliated",10
84,FOXJ1,1,1,1,1,2,2,4,True,"Preciliated,Ciliated,Ciliated_LRG5","Preciliated,Ciliated","Preciliated,Ciliated,Ciliated_LRG5","Preciliated,Ciliated",10
89,FOXN4,1,1,1,1,2,2,4,True,Preciliated,Preciliated,"Ciliated,Ciliated_LRG5",Ciliated,5
164,LHX1,0,1,1,1,1,2,3,True,,Preciliated,Preciliated,Preciliated,3
311,TCF25,1,1,1,0,2,1,3,True,Preciliated,Preciliated,Ciliated,,3
422,ZNF429,0,1,1,1,1,2,3,True,,Preciliated,Ciliated,Preciliated,3


In [10]:
results_glandular = results

results_glandular$is_upDEG_any_invivo = results_glandular$TF %in% unlist(DEGs_invivo[glandular]) + 0
results_glandular$is_upDEG_any_invitro = results_glandular$TF %in% unlist(DEGs_invitro[glandular]) + 0
results_glandular$is_upTFact_any_invivo = results_glandular$TF %in% unlist(TFact_invivo[glandular]) + 0
results_glandular$is_upTFact_any_invitro = results_glandular$TF %in% unlist(TFact_invitro[glandular]) + 0
results_glandular$N_is_up_any_invivo = apply(results_glandular[,c(2,4) ], 1, sum)
results_glandular$N_is_up_any_invitro = apply(results_glandular[,c(3,5)], 1, sum)
results_glandular$N_is_up_any = apply(results_glandular[,2:5], 1, sum)
results_glandular$is_up_in_both = apply(results_glandular[,6:7], 1, sum) > 1

results_glandular$DEG_invivo_which = sapply(results_glandular$TF, find_which, DEGs_invivo[glandular])
results_glandular$DEG_invitro_which = sapply(results_glandular$TF, find_which, DEGs_invitro[glandular])
results_glandular$TFact_invivo_which = sapply(results_glandular$TF, find_which, TFact_invivo[glandular])
results_glandular$TFact_invitro_which = sapply(results_glandular$TF, find_which, TFact_invitro[glandular])
results_glandular$N_all_celltypes = sapply(results_glandular$TF, find_which, DEGs_invivo[glandular], output='Ncells') + 
                                    sapply(results_glandular$TF, find_which, DEGs_invitro[glandular], output='Ncells')+
                                    sapply(results_glandular$TF, find_which, TFact_invivo[glandular], output='Ncells')+
                                    sapply(results_glandular$TF, find_which, TFact_invitro[glandular], output='Ncells')


results_glandular = results_glandular[ order(rownames(results_glandular), decreasing = F) , ]
results_glandular = results_glandular[ order(results_glandular$N_is_up_any, decreasing = T) , ]
results_glandular = results_glandular[ order(results_glandular$is_up_in_both, decreasing = T) , ]
head(results_glandular, 30)
write.table(results_glandular, file = '/home/jovyan/farm/endometrium_v2/merged/TFs_agreement_glandular.tsv', row.names = F, quote = F, sep = '\t')

Unnamed: 0_level_0,TF,is_upDEG_any_invivo,is_upDEG_any_invitro,is_upTFact_any_invivo,is_upTFact_any_invitro,N_is_up_any_invivo,N_is_up_any_invitro,N_is_up_any,is_up_in_both,DEG_invivo_which,DEG_invitro_which,TFact_invivo_which,TFact_invitro_which,N_all_celltypes
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<lgl>,<chr>,<chr>,<chr>,<chr>,<int>
110,HIF1A,1,1,1,1,2,2,4,True,Glandular_secretory,"Secretory,Secretory_Proliferative",Glandular_secretory,Secretory,5
134,IKZF2,1,1,1,1,2,2,4,True,Glandular_secretory,Secretory,Glandular,"Secretory,Secretory_Proliferative",5
35,CSRNP1,1,1,1,1,2,2,4,True,"Glandular,Glandular_secretory",Estrogen_Induced,"Glandular,Glandular_secretory","Secretory,Secretory_Proliferative",7
413,ZNF384,1,1,1,1,2,2,4,True,Glandular_secretory,Secretory,Glandular_secretory,"Secretory,Secretory_Proliferative",5
499,ZNF765,1,1,1,1,2,2,4,True,Glandular_secretory,Secretory,Glandular,Secretory,4
62,ELK1,1,1,1,1,2,2,4,True,Glandular_secretory,"Estrogen_Induced,Secretory",Glandular,Estrogen_Induced,5
113,HMBOX1,1,1,0,1,1,2,3,True,Glandular_secretory,Secretory,,Estrogen_Induced,3
12,ATF4,1,1,0,1,1,2,3,True,Glandular_secretory,"Secretory,Secretory_Proliferative",,Secretory,4
136,IRF1,1,1,0,1,1,2,3,True,"Glandular,Glandular_secretory",Secretory,,Secretory,4
144,IRX3,1,1,0,1,1,2,3,True,Glandular_secretory,"Secretory,Secretory_Proliferative",,Secretory,4


In [11]:
TF_selected_ciliated = read.csv('merged/TF_Ciliated_selected.csv', stringsAsFactors = F)
TF_selected_glandular = read.csv('merged/TF_Glandular_selected.csv', stringsAsFactors = F)

In [12]:
plotHeatmap_TFexp = function(myDF, TFs_of_interest, sigTFs, color, pdf_file = NA){
    TFs_of_interest = intersect(TFs_of_interest, myDF$gene)
    df = subset(myDF, gene %in% TFs_of_interest)
    # build matrix of TF activity scores to plot
    df$value = df$Log2FC
    FoldC = acast(df, gene~L1, fill = 0)
    FoldC = FoldC[TFs_of_interest, ]
    labels = FoldC
    labels[] = ''
    for( i in rownames(labels) )
        for( j in colnames(labels) ){
            ag = subset(sigTFs, cluster == j & TF == i)
            if( nrow(ag) > 0 )
                labels[i , j] = '*'
            }
        
    pheatmap(t(FoldC), cellheight = 10, cellwidth = 10, cluster_rows = F, cluster_cols = F, display_numbers = t(labels),
             color = color,
            filename = pdf_file)
}

## IN VIVO
df_DEGs_invivo = melt(exp_invivo, id.vars = names(exp_invivo[[1]]) )
df_DEGs_invivo = subset(df_DEGs_invivo, L1 %in% c(ciliated2, glandular, lumen))
df_DEGs_invivo$L1 = factor(df_DEGs_invivo$L1, levels = intersect(c(ciliated, glandular, lumen), df_DEGs_invivo$L1))
plotHeatmap_TFexp(df_DEGs_invivo, 
                  setdiff(TF_selected_glandular$invivo, '.'), 
                  all_TFact_invivo, 
                  color = colorRampPalette(c(brewer.pal(n = 3, name = 'Blues')[c(3,2)], "white", brewer.pal(n = 8, name = 'Reds')[c(5,8)]))(50),
                  pdf_file = 'merged/heatmap_glandularTFs_invivo.pdf')
plotHeatmap_TFexp(df_DEGs_invivo, 
                  setdiff(TF_selected_ciliated$invivo, '.'), 
                  all_TFact_invivo, 
                  color = colorRampPalette(c(brewer.pal(n = 3, name = 'Blues')[c(3,2)], "white", brewer.pal(n = 8, name = 'Reds')[c(5,6, 8)]))(50),
                  pdf_file = 'merged/heatmap_ciliatedTFs_invivo.pdf')

## IN VIVO
df_DEGs_invitro = melt(exp_invitro, id.vars = names(exp_invivo[[1]]) )
df_DEGs_invitro = subset(df_DEGs_invitro, L1 %in% c(glandular, ciliated))
df_DEGs_invitro$L1 = factor(df_DEGs_invitro$L1, levels = intersect(c(glandular, ciliated), df_DEGs_invitro$L1))
plotHeatmap_TFexp(df_DEGs_invitro, 
                  setdiff(TF_selected_glandular$invitro, '.'), 
                  all_TFact_invitro, 
                  color = colorRampPalette(c(brewer.pal(n = 5, name = 'Blues')[c(5:1)], "white", brewer.pal(n = 8, name = 'Reds')[c(4,6,8)]))(50),
                  pdf_file = 'merged/heatmap_glandularTFs_invitro.pdf')
plotHeatmap_TFexp(df_DEGs_invitro, 
                  setdiff(TF_selected_ciliated$invitro, '.'), 
                  all_TFact_invitro, 
                  color = colorRampPalette(c(brewer.pal(n = 5, name = 'Blues')[c(5:2)], "white", brewer.pal(n = 8, name = 'Reds')[c(4,6,7, 8)]))(50),
                  pdf_file = 'merged/heatmap_ciliatedTFs_invitro.pdf')