#  Plot TFs

In [41]:
library(dplyr)
library(reshape2)
library(plyr)
library(RColorBrewer)
library(ggplot2)
library(pheatmap)
library(cowplot)

## Load TF activities/enrichemnt file

In [42]:
TF_activities = read.csv('data/cellphoneDB/clusterDEGs_and_TFs/DEGs_withinEpithelial/TFs_activities.csv', stringsAsFactors = F)
TF_activities = TF_activities[ grep('epithelial_', TF_activities$cl_name) , ]
TF_activities$cl_name = gsub('epithelial_', '', TF_activities$cl_name)


TF_activities = TF_activities[order(TF_activities$NES, decreasing = T), ]
TF_activities$id = paste(TF_activities$cl_name, TF_activities$Regulon)
head(TF_activities)

Unnamed: 0_level_0,cl_name,Regulon,NES,p.value,FDR,Size,id
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<int>,<chr>
2,cl_ciliated_DEGs,ZNF19_E,15.33777,4.276324e-53,9.856927e-51,175,cl_ciliated_DEGs ZNF19_E
3,cl_glandular_DEGs,CSRNP1_E,15.06643,2.692739e-51,8.275684999999999e-50,153,cl_glandular_DEGs CSRNP1_E
5,cl_ciliated_DEGs,FOXJ1_E,14.66518,1.0772819999999999e-48,2.128401e-46,150,cl_ciliated_DEGs FOXJ1_E
7,cl_proliferative_DEGs,ZNF749_E,12.58085,2.691199e-36,3.917819e-35,194,cl_proliferative_DEGs ZNF749_E
8,cl_WIF1_2_DEGs,NFKB1_AA,11.1735,5.496902e-29,1.652656e-27,92,cl_WIF1_2_DEGs NFKB1_AA
9,cl_WIF1_2_DEGs,CSRNP1_E,10.85243,1.9419260000000002e-27,5.1647749999999997e-26,153,cl_WIF1_2_DEGs CSRNP1_E


## Load TF agreement file to retrieve DE sign

In [43]:
TF_activities_agreement = read.csv('data/cellphoneDB/clusterDEGs_and_TFs/DEGs_withinEpithelial/TFactivities_and_expression_agreement.csv', stringsAsFactors = F, sep = '\t')
TF_activities_agreement$cl = gsub('epithelial_', '', TF_activities_agreement$cl)
TF_activities_agreement$id = paste(TF_activities_agreement$cl, TF_activities_agreement$TF_activity_score)
TF_activities_agreement$annot = '+'
TF_activities_agreement$annot[ TF_activities_agreement$TF_expression == 'downregulated' ] = '-'
head(TF_activities_agreement)

Unnamed: 0_level_0,cl,TF,TF_expression,TF_expression_pvals_adj,TF_activity,TF_activity_pvals_adj,TF_activity_score,L1,id,annot
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<dbl>,<chr>,<dbl>,<chr>,<chr>,<chr>,<chr>
1,cl_ciliated_DEGs,FOXJ1,upregulated,0.0,active_regulon,2.13e-46,FOXJ1_E,cl_epithelial_ciliated_DEGs,cl_ciliated_DEGs FOXJ1_E,+
2,cl_ciliated_DEGs,RFX2,upregulated,0.0,active_regulon,0.00145,RFX2_C,cl_epithelial_ciliated_DEGs,cl_ciliated_DEGs RFX2_C,+
3,cl_ciliated_DEGs,HES6,upregulated,0.0,active_regulon,4.6e-09,HES6_E,cl_epithelial_ciliated_DEGs,cl_ciliated_DEGs HES6_E,+
4,cl_ciliated_DEGs,ZSCAN1,upregulated,8.19e-211,active_regulon,2.72e-15,ZSCAN1_E,cl_epithelial_ciliated_DEGs,cl_ciliated_DEGs ZSCAN1_E,+
5,cl_ciliated_DEGs,TCF7,upregulated,9.02e-140,inactive_regulon,0.000699,TCF7_B,cl_epithelial_ciliated_DEGs,cl_ciliated_DEGs TCF7_B,+
6,cl_ciliated_DEGs,JUND,downregulated,5.3e-125,inactive_regulon,0.0358,JUND_A,cl_epithelial_ciliated_DEGs,cl_ciliated_DEGs JUND_A,-


## Transfer labels

In [44]:
TF_activities$label = TF_activities_agreement$annot[ match(TF_activities$id, TF_activities_agreement$id) ]
TF_activities$label[ is.na(TF_activities$label) ] = ''
head(TF_activities)

Unnamed: 0_level_0,cl_name,Regulon,NES,p.value,FDR,Size,id,label
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>
2,cl_ciliated_DEGs,ZNF19_E,15.33777,4.276324e-53,9.856927e-51,175,cl_ciliated_DEGs ZNF19_E,+
3,cl_glandular_DEGs,CSRNP1_E,15.06643,2.692739e-51,8.275684999999999e-50,153,cl_glandular_DEGs CSRNP1_E,+
5,cl_ciliated_DEGs,FOXJ1_E,14.66518,1.0772819999999999e-48,2.128401e-46,150,cl_ciliated_DEGs FOXJ1_E,+
7,cl_proliferative_DEGs,ZNF749_E,12.58085,2.691199e-36,3.917819e-35,194,cl_proliferative_DEGs ZNF749_E,
8,cl_WIF1_2_DEGs,NFKB1_AA,11.1735,5.496902e-29,1.652656e-27,92,cl_WIF1_2_DEGs NFKB1_AA,+
9,cl_WIF1_2_DEGs,CSRNP1_E,10.85243,1.9419260000000002e-27,5.1647749999999997e-26,153,cl_WIF1_2_DEGs CSRNP1_E,+


## Plot heatmap - all TFs

In [45]:
# exclude scanpy
TF_activities = TF_activities[ grep('scanpy', TF_activities$cl_name, invert = T) ,]
# build matrix of TF activity scores to plot
TF_activities$value = TF_activities$NES
x = acast(TF_activities, Regulon~cl_name, fill = 0)
# build matrix labels -  aka DE sign - to overlay with the TF activities
TF_activities$value = TF_activities$label
x_label = acast(TF_activities, Regulon~cl_name, fill = '')

In [46]:
# actually plot the heatmap
pheatmap(x, 
         display_numbers = x_label, fontsize_number=10,
         cellheight = 10, cellwidth = 10, 
         filename = 'data/cellphoneDB/clusterDEGs_and_TFs/DEGs_withinEpithelial/epithelial_TFs_heatmap.pdf')

## Filter TFs in Notch and Wnt signalling

In [47]:
Wnt = read.csv('figures_primaryEpithelial_clean/cluster_markers/TF_priors/Wnt_list', stringsAsFactors = F, sep = '\t')
Wnt$gene_symbol = sapply(strsplit(Wnt$Gene.names, ' '), head, 1)

Notch = read.csv('figures_primaryEpithelial_clean/cluster_markers/TF_priors/Notch_list', stringsAsFactors = F, sep = '\t')
Notch$gene_symbol = sapply(strsplit(Notch$Gene.names, ' '), head, 1)

In [48]:
TFs_in_x = sapply(strsplit(rownames(x), '_'), head, 1)
is_Wnt = TFs_in_x %in% Wnt$gene_symbol
is_Notch = TFs_in_x %in% Notch$gene_symbol

In [49]:
any(is_Notch)
any(is_Wnt)

In [50]:
# actually plot the heatmap
pheatmap(x[is_Wnt, ], 
         display_numbers = x_label[is_Wnt, ], fontsize_number=10,
         cellheight = 10, cellwidth = 10, 
         filename = 'data/cellphoneDB/clusterDEGs_and_TFs/DEGs_withinEpithelial/epithelial_TFs_heatmap_Wnt.pdf')

# actually plot the heatmap
pheatmap(x[is_Notch, ], 
         display_numbers = x_label[is_Notch, ], fontsize_number=10,
         cellheight = 10, cellwidth = 10, 
         filename = 'data/cellphoneDB/clusterDEGs_and_TFs/DEGs_withinEpithelial/epithelial_TFs_heatmap_Notch.pdf')

# Plot top 10 active TFs

In [51]:
# Rank TFs according to their activity
TF_activities = TF_activities[order(TF_activities$NES, decreasing = T), ]
TF_activities = ddply(TF_activities, 'cl_name', function(df) data.frame(df, rank = 1:nrow(df)) )
head(TF_activities)
unique(TF_activities$cl_name)

Unnamed: 0_level_0,cl_name,Regulon,NES,p.value,FDR,Size,id,label,value,rank
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>,<chr>,<int>
1,cl_ciliated_DEGs,ZNF19_E,15.33777,4.276324e-53,9.856927e-51,175,cl_ciliated_DEGs ZNF19_E,+,+,1
2,cl_ciliated_DEGs,FOXJ1_E,14.665182,1.0772819999999999e-48,2.128401e-46,150,cl_ciliated_DEGs FOXJ1_E,+,+,2
3,cl_ciliated_DEGs,ZSCAN1_E,8.306602,9.848191000000001e-17,2.72401e-15,119,cl_ciliated_DEGs ZSCAN1_E,+,+,3
4,cl_ciliated_DEGs,ZNF311_E,7.652582,1.969831e-14,4.256682e-13,195,cl_ciliated_DEGs ZNF311_E,+,+,4
5,cl_ciliated_DEGs,ZNF610_E,7.256654,3.967829e-13,7.035266e-12,136,cl_ciliated_DEGs ZNF610_E,+,+,5
6,cl_ciliated_DEGs,ZNF713_E,6.733032,1.66163e-11,2.298035e-10,84,cl_ciliated_DEGs ZNF713_E,+,+,6


In [52]:
# order clusters
TF_activities = rbind(subset(TF_activities, cl_name %in% 'cl_proliferative_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_LGR5_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_glandular_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_ciliated_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_WIF1_1_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_WIF1_2_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_LYPD1_DEGs'))

# # order clusters
# TF_activities = rbind(subset(TF_activities, cl_name %in% 'cl_proliferative G2-S_DEGs'),
#                      subset(TF_activities, cl_name %in% 'cl_proliferative I_DEGs'),
#                      subset(TF_activities, cl_name %in% 'cl_proliferative II_DEGs'),
#                      subset(TF_activities, cl_name %in% 'cl_progenitor_LGR5-WNT7A_DEGs'),
#                      subset(TF_activities, cl_name %in% 'cl_progenitor_KRT5_DEGs'),
#                      subset(TF_activities, cl_name %in% 'cl_pre-FHL2_DEGs'),
#                      subset(TF_activities, cl_name %in% 'cl_FHL2_DEGs'),
#                      subset(TF_activities, cl_name %in% 'cl_pre-secretroy_PAEP_DEGs'),
#                      subset(TF_activities, cl_name %in% 'cl_basal_LHX1_DEGs'))

In [53]:
head(TF_activities)
TFs = read.csv('data/scenic/huTF_census.txt', stringsAsFactors = F, header = F)$V1

Unnamed: 0_level_0,cl_name,Regulon,NES,p.value,FDR,Size,id,label,value,rank
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>,<chr>,<int>
904,cl_proliferative_DEGs,ZNF749_E,12.58085,2.691199e-36,3.917819e-35,194,cl_proliferative_DEGs ZNF749_E,,,1
905,cl_proliferative_DEGs,HES6_E,9.422683,4.39704e-21,3.234631e-20,185,cl_proliferative_DEGs HES6_E,-,-,2
906,cl_proliferative_DEGs,MYC_AA,9.351921,8.60704e-21,6.199759e-20,144,cl_proliferative_DEGs MYC_AA,+,+,3
907,cl_proliferative_DEGs,KDM5A_D,9.011962,2.0240269999999997e-19,1.358849e-18,178,cl_proliferative_DEGs KDM5A_D,+,+,4
908,cl_proliferative_DEGs,ZFP69_E,8.646868,5.293227e-18,3.239174e-17,161,cl_proliferative_DEGs ZFP69_E,+,+,5
909,cl_proliferative_DEGs,ZKSCAN2_E,8.329324,8.130544e-17,4.570952e-16,190,cl_proliferative_DEGs ZKSCAN2_E,+,+,6


In [54]:
# Select top 10 ranked genes
TF_activities$Gene = sapply(strsplit(TF_activities$Regulon, '_'), head, 1)
top5 = subset(TF_activities, rank %in% 1:5 & NES > 0)$Gene

In [55]:
# build matrix of TF activity scores to plot
TF_activities$value = TF_activities$NES
x = acast(subset(TF_activities, Gene %in% top5), Gene~cl_name, fill = 0)
colnames(x)

In [56]:
x = x[, c(5, 3, 2, 1, 6, 7, 4) ]
colnames(x) = c('prol', 'LGR5', 'glandular', 'ciliated', 'WIF1_1', 'WIF1_2', 'LYPD1')
pheatmap(t(x[ top5, ]), cellheight = 10, cellwidth = 10, cluster_rows = F, cluster_cols = F, 
#          color = colorRampPalette(c("navy", "white", "red"))(50),
        filename = 'data/cellphoneDB/clusterDEGs_and_TFs/DEGs_withinEpithelial/epithelial_top5_TFs_activities.pdf')


# Plot top 10 active TFs - matching expression

In [57]:
default_TF_activities = TF_activities

In [58]:
TF_activities = subset(TF_activities, label != '')

In [59]:
# Rank TFs according to their activity
TF_activities = TF_activities[order(TF_activities$NES, decreasing = T), ]
TF_activities$rank = NULL
TF_activities = ddply(TF_activities, 'cl_name', function(df) data.frame(df, rank = 1:nrow(df)) )
head(TF_activities)

Unnamed: 0_level_0,cl_name,Regulon,NES,p.value,FDR,Size,id,label,value,Gene,rank
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>,<dbl>,<chr>,<int>
1,cl_ciliated_DEGs,ZNF19_E,15.33777,4.276324e-53,9.856927e-51,175,cl_ciliated_DEGs ZNF19_E,+,15.33777,ZNF19,1
2,cl_ciliated_DEGs,FOXJ1_E,14.665182,1.0772819999999999e-48,2.128401e-46,150,cl_ciliated_DEGs FOXJ1_E,+,14.665182,FOXJ1,2
3,cl_ciliated_DEGs,ZSCAN1_E,8.306602,9.848191000000001e-17,2.72401e-15,119,cl_ciliated_DEGs ZSCAN1_E,+,8.306602,ZSCAN1,3
4,cl_ciliated_DEGs,ZNF311_E,7.652582,1.969831e-14,4.256682e-13,195,cl_ciliated_DEGs ZNF311_E,+,7.652582,ZNF311,4
5,cl_ciliated_DEGs,ZNF610_E,7.256654,3.967829e-13,7.035266e-12,136,cl_ciliated_DEGs ZNF610_E,+,7.256654,ZNF610,5
6,cl_ciliated_DEGs,ZNF713_E,6.733032,1.66163e-11,2.298035e-10,84,cl_ciliated_DEGs ZNF713_E,+,6.733032,ZNF713,6


In [60]:
# order clusters
TF_activities = rbind(subset(TF_activities, cl_name %in% 'cl_proliferative_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_LGR5_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_glandular_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_ciliated_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_WIF1_1_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_WIF1_2_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_LYPD1_DEGs'))

In [61]:
# Select top 10 ranked genes
TF_activities$Gene = sapply(strsplit(TF_activities$Regulon, '_'), head, 1)
top5 = subset(TF_activities, rank %in% 1:5 & NES > 0)$Gene

In [62]:
# build matrix of TF activity scores to plot
TF_activities$value = TF_activities$NES
x = acast(subset(TF_activities, Gene %in% top5), Gene~cl_name, fill = 0)

In [63]:
x = x[, c(5, 3, 2, 1, 6, 7, 4) ]
colnames(x) = c('prol', 'LGR5', 'glandular', 'ciliated', 'WIF1_1', 'WIF1_2', 'LYPD1')
pheatmap(t(x[ top5, ]), cellheight = 10, cellwidth = 10, cluster_rows = F, cluster_cols = F, 
#          color = colorRampPalette(c("navy", "white", "red"))(50),
        filename = 'data/cellphoneDB/clusterDEGs_and_TFs/DEGs_withinEpithelial/epithelial_top5_TFs_activities_filtered_TFisDE.pdf')


# Plot top 10 active TFs - curated TFs - confidence AA, A, B

In [64]:
TF_activities = default_TF_activities[ grep('_E$', default_TF_activities$Regulon, invert = T) , ]
TF_activities = TF_activities[ grep('_D$', TF_activities$Regulon, invert = T) , ]
TF_activities = TF_activities[ grep('_C$', TF_activities$Regulon, invert = T) , ]

In [65]:
# Rank TFs according to their activity
TF_activities = TF_activities[order(TF_activities$NES, decreasing = T), ]
TF_activities$rank = NULL
TF_activities = ddply(TF_activities, 'cl_name', function(df) data.frame(df, rank = 1:nrow(df)) )
head(TF_activities)

Unnamed: 0_level_0,cl_name,Regulon,NES,p.value,FDR,Size,id,label,value,Gene,rank
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>,<dbl>,<chr>,<int>
1,cl_ciliated_DEGs,ZNF382_B,4.571761,4.836431e-06,2.933677e-05,10,cl_ciliated_DEGs ZNF382_B,+,4.571761,ZNF382,1
2,cl_ciliated_DEGs,HIC1_B,4.270365,1.951537e-05,0.000105189,10,cl_ciliated_DEGs HIC1_B,+,4.270365,HIC1,2
3,cl_ciliated_DEGs,PITX1_B,3.90564,9.397618e-05,0.0004289408,11,cl_ciliated_DEGs PITX1_B,+,3.90564,PITX1,3
4,cl_ciliated_DEGs,PPARD_A,3.009163,0.002619684,0.008605754,10,cl_ciliated_DEGs PPARD_A,-,3.009163,PPARD,4
5,cl_ciliated_DEGs,TP63_B,2.931932,0.003368604,0.01073452,20,cl_ciliated_DEGs TP63_B,,2.931932,TP63,5
6,cl_ciliated_DEGs,NFE2L1_B,2.907068,0.003648339,0.01149351,25,cl_ciliated_DEGs NFE2L1_B,+,2.907068,NFE2L1,6


In [66]:
# order clusters
TF_activities = rbind(subset(TF_activities, cl_name %in% 'cl_proliferative_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_LGR5_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_glandular_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_ciliated_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_WIF1_1_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_WIF1_2_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_LYPD1_DEGs'))

In [67]:
# Select top 10 ranked genes
TF_activities$Gene = sapply(strsplit(TF_activities$Regulon, '_'), head, 1)
top5 = subset(TF_activities, rank %in% 1:5 & NES > 0)$Gene

In [68]:
# build matrix of TF activity scores to plot
TF_activities$value = TF_activities$NES
x = acast(subset(TF_activities, Gene %in% top5), Gene~cl_name, fill = 0)

In [69]:
x = x[, c(5, 3, 2, 1, 6, 7, 4) ]
colnames(x) = c('prol', 'LGR5', 'glandular', 'ciliated', 'WIF1_1', 'WIF1_2', 'LYPD1')
pheatmap(t(x[ top5, ]), cellheight = 10, cellwidth = 10, cluster_rows = F, cluster_cols = F, 
#          color = colorRampPalette(c("navy", "white", "red"))(50),
        filename = 'data/cellphoneDB/clusterDEGs_and_TFs/DEGs_withinEpithelial/epithelial_top5_TFs_activities_filtered_curatedTFs.pdf')


# Plot top 10 active TFs - DE and curated

In [70]:
TF_activities = default_TF_activities[ grep('_E$', default_TF_activities$Regulon, invert = T) , ]
TF_activities = TF_activities[ grep('_D$', TF_activities$Regulon, invert = T) , ]
TF_activities = TF_activities[ grep('_C$', TF_activities$Regulon, invert = T) , ]
TF_activities = subset(TF_activities, label != '')

In [71]:
# Rank TFs according to their activity
TF_activities = TF_activities[order(TF_activities$NES, decreasing = T), ]
TF_activities$rank = NULL
TF_activities = ddply(TF_activities, 'cl_name', function(df) data.frame(df, rank = 1:nrow(df)) )
head(TF_activities)

Unnamed: 0_level_0,cl_name,Regulon,NES,p.value,FDR,Size,id,label,value,Gene,rank
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>,<dbl>,<chr>,<int>
1,cl_ciliated_DEGs,ZNF382_B,4.571761,4.836431e-06,2.933677e-05,10,cl_ciliated_DEGs ZNF382_B,+,4.571761,ZNF382,1
2,cl_ciliated_DEGs,HIC1_B,4.270365,1.951537e-05,0.000105189,10,cl_ciliated_DEGs HIC1_B,+,4.270365,HIC1,2
3,cl_ciliated_DEGs,PITX1_B,3.90564,9.397618e-05,0.0004289408,11,cl_ciliated_DEGs PITX1_B,+,3.90564,PITX1,3
4,cl_ciliated_DEGs,PPARD_A,3.009163,0.002619684,0.008605754,10,cl_ciliated_DEGs PPARD_A,-,3.009163,PPARD,4
5,cl_ciliated_DEGs,NFE2L1_B,2.907068,0.003648339,0.01149351,25,cl_ciliated_DEGs NFE2L1_B,+,2.907068,NFE2L1,5
6,cl_ciliated_DEGs,POU2F1_A,-2.334509,0.01956909,0.04658352,30,cl_ciliated_DEGs POU2F1_A,+,-2.334509,POU2F1,6


In [72]:
# order clusters
TF_activities = rbind(subset(TF_activities, cl_name %in% 'cl_proliferative_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_LGR5_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_glandular_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_ciliated_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_WIF1_1_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_WIF1_2_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_LYPD1_DEGs'))

In [73]:
# Select top 10 ranked genes
TF_activities$Gene = sapply(strsplit(TF_activities$Regulon, '_'), head, 1)
top5 = subset(TF_activities, rank %in% 1:5 & NES > 0)$Gene

In [74]:
# build matrix of TF activity scores to plot
TF_activities$value = TF_activities$NES
x = acast(subset(TF_activities, Gene %in% top5), Gene~cl_name, fill = 0)

In [75]:
x = x[, c(5, 3, 2, 1, 6, 7, 4) ]
colnames(x) = c('prol', 'LGR5', 'glandular', 'ciliated', 'WIF1_1', 'WIF1_2', 'LYPD1')

pheatmap(t(x[ top5, ]), cellheight = 10, cellwidth = 10, cluster_rows = F, cluster_cols = F, 
#          color = colorRampPalette(c("navy", "white", "red"))(50),
        filename = 'data/cellphoneDB/clusterDEGs_and_TFs/DEGs_withinEpithelial/epithelial_top5_TFs_activities_filtered_curatedTFs_TFisDE.pdf')


# Plot top 10 DE TFs

In [76]:
# Rank TFs according to their DE Fold change
DE_WIF1_2 = read.csv('data/cellphoneDB/clusterDEGs_and_TFs/DEGs_withinEpithelial/epithelial_WIF1_2_DEGs.csv', stringsAsFactors = F)
DE_WIF1_1 = read.csv('data/cellphoneDB/clusterDEGs_and_TFs/DEGs_withinEpithelial/epithelial_WIF1_1_DEGs.csv', stringsAsFactors = F)
DE_LYPD1 = read.csv('data/cellphoneDB/clusterDEGs_and_TFs/DEGs_withinEpithelial/epithelial_LYPD1_DEGs.csv', stringsAsFactors = F)
DE_LGR5 = read.csv('data/cellphoneDB/clusterDEGs_and_TFs/DEGs_withinEpithelial/epithelial_LGR5_DEGs.csv', stringsAsFactors = F)
DE_prol = read.csv('data/cellphoneDB/clusterDEGs_and_TFs/DEGs_withinEpithelial/epithelial_proliferative_DEGs.csv', stringsAsFactors = F)
DE_cil = read.csv('data/cellphoneDB/clusterDEGs_and_TFs/DEGs_withinEpithelial/epithelial_ciliated_DEGs.csv', stringsAsFactors = F)
DE_gland = read.csv('data/cellphoneDB/clusterDEGs_and_TFs/DEGs_withinEpithelial/epithelial_glandular_DEGs.csv', stringsAsFactors = F)

DE = list(prol = DE_prol, 
          LGR5 = DE_LGR5, 
          gland = DE_gland,
          cil = DE_cil,
          WIF1_1 = DE_WIF1_1, 
          WIF1_2 = DE_WIF1_2, 
          LYPD1 = DE_LYPD1)




# Rank TFs according to their DE Fold change
# DE_LHX1 = read.csv('data/cellphoneDB/clusterDEGs_and_TFs/clusterDEGs_and_TFs/basal_LHX1_DEGs.csv', stringsAsFactors = F)
# DE_FHL2 = read.csv('data/cellphoneDB/clusterDEGs_and_TFs/clusterDEGs_and_TFs/FHL2_DEGs.csv', stringsAsFactors = F)
# DE_preFHL2 = read.csv('data/cellphoneDB/clusterDEGs_and_TFs/clusterDEGs_and_TFs/WIF1_1_DEGs.csv', stringsAsFactors = F)
# DE_LYPD1 = read.csv('data/cellphoneDB/clusterDEGs_and_TFs/clusterDEGs_and_TFs/pre-secretroy_PAEP_DEGs.csv', stringsAsFactors = F)
# DE_KRT5 = read.csv('data/cellphoneDB/clusterDEGs_and_TFs/clusterDEGs_and_TFs/progenitor_KRT5_DEGs.csv', stringsAsFactors = F)
# DE_LGR5 = read.csv('data/cellphoneDB/clusterDEGs_and_TFs/clusterDEGs_and_TFs/progenitor_LGR5-WNT7A_DEGs.csv', stringsAsFactors = F)
# DE_prol_G2S = read.csv('data/cellphoneDB/clusterDEGs_and_TFs/clusterDEGs_and_TFs/proliferative G2-S_DEGs.csv', stringsAsFactors = F)
# DE_prol_1 = read.csv('data/cellphoneDB/clusterDEGs_and_TFs/clusterDEGs_and_TFs/proliferative I_DEGs.csv', stringsAsFactors = F)
# DE_prol_2 = read.csv('data/cellphoneDB/clusterDEGs_and_TFs/clusterDEGs_and_TFs/proliferative II_DEGs.csv', stringsAsFactors = F)

# DE = list(prol_G2S = DE_prol_G2S, 
#           prol_I = DE_prol_1, 
#           prol_II = DE_prol_2,
#           LGR5 = DE_LGR5,
#           KRT5 = DE_KRT5,  
#           preFHL2 = DE_preFHL2, 
#           FHL2 = DE_FHL2, 
#           LYPD1 = DE_LYPD1, 
#           LHX1 = DE_LHX1)

DE = lapply(DE, function(x){
    x = subset(x, Gene %in% TFs)
    x = x[ order(x$logFC, decreasing = T) ,]
    x$rank = 1:nrow(x)
    return(x)
})




In [77]:
DE_df = melt(DE, id.vars = names(DE[[1]]) )
head(DE_df)

Unnamed: 0_level_0,cluster,Gene,logFC,P.Value,adj.P.Val,AveExpr_cluster,AveExpr_rest,percentExpr_cluster,percentExpr_rest,rank,L1
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<chr>
1,epithelial_proliferative,PGR,1.3268391,0.0,0.0,1.678935,0.352096,0.990431,0.314191,1,prol
2,epithelial_proliferative,MSX1,1.1286395,0.0,0.0,2.038493,0.909853,0.992225,0.599519,2,prol
3,epithelial_proliferative,ESR1,0.8846107,0.0,0.0,1.421221,0.53661,0.964713,0.467228,3,prol
4,epithelial_proliferative,NPAS3,0.7478033,0.0,0.0,0.811245,0.063442,0.856459,0.106133,4,prol
5,epithelial_proliferative,MSX2,0.6681302,2.615417e-289,4.5978150000000004e-287,0.986997,0.318867,0.863636,0.319603,5,prol
6,epithelial_proliferative,MECOM,0.5904095,1.950091e-233,2.022635e-231,1.042338,0.451929,0.916866,0.423331,6,prol


In [78]:
# Select top 10 ranked genes
top5 = subset(DE_df, rank %in% 1:5)$Gene #%>% unique(.)
top5

In [79]:
# build matrix of TF activity scores to plot
DE_df$value = DE_df$logFC
x = acast(subset(DE_df, Gene %in% top5), Gene~L1, fill = 0)
pheatmap(t(x[top5, names(DE)]), cellheight = 10, cellwidth = 10, cluster_rows = F, cluster_cols = F, 
#          color = colorRampPalette(c("navy", "white", "red"))(50),
        filename = 'data/cellphoneDB/clusterDEGs_and_TFs/DEGs_withinEpithelial/epithelial_top5_TFs_DE.pdf')


In [80]:
# mydf = subset(DE_df, Gene %in% top5)
# mydf$Gene = factor(mydf$Gene, levels = unique(top5))
# mydf$cluster = factor(mydf$L1, levels = rev(names(DE)))

# require(ggplot2)
#   ggplot(mydf, 
#          aes(x = Gene,
#              y = cluster)) +        ## global aes
# #   geom_tile(aes(fill = P.Value)) +         ## to get the rect filled
#   geom_point(aes(colour = logFC, 
#                    size = percentExpr_cluster))  +    ## geom_point for circle illusion
#   scale_color_gradient2(low = "blue",   mid = 'white',
#                        high = "red")+       ## color of the corresponding aes
#   scale_size(range = c(1, 4))+             ## to tune the size of circles
#   theme_bw() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
# ggsave(filename = 'data/cellphoneDB/clusterDEGs_and_TFs/DEGs_withinEpithelial/clusterDEGs_and_TFs/top5_TFs_DE_dotplot.pdf', dpi = 300, width = 10, height = 4)