#  Plot TFs

In [218]:
library(dplyr)
library(reshape2)
library(plyr)
library(RColorBrewer)
library(ggplot2)
library(pheatmap)
library(cowplot)

## Load TF activities/enrichemnt file

In [219]:
TF_activities = read.csv('figures_primaryEpithelial_clean/cluster_markers/geneset_cl_enrichment_TFs.csv', stringsAsFactors = F)
TF_activities = TF_activities[order(TF_activities$NES, decreasing = T), ]
TF_activities$id = paste(TF_activities$cl_name, TF_activities$Regulon)

head(TF_activities)

Unnamed: 0_level_0,cl_name,Regulon,NES,p.value,FDR,Size,id
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<int>,<chr>
2,cl_glandular_DEGs,CSRNP1_E,16.31402,7.845742000000001e-60,8.346663e-58,152,cl_glandular_DEGs CSRNP1_E
4,cl_ciliated_DEGs,ZNF19_E,15.88198,8.447424e-57,2.920697e-54,175,cl_ciliated_DEGs ZNF19_E
5,cl_ciliated_DEGs,FOXJ1_E,15.11627,1.265129e-51,3.499348e-49,149,cl_ciliated_DEGs FOXJ1_E
7,cl_pre-secretory PAEP_DEGs,ZNF594_E,13.45049,3.05853e-41,1.409982e-39,197,cl_pre-secretory PAEP_DEGs ZNF594_E
8,cl_pre-secretory FHL2_DEGs,CSRNP1_E,12.8773,6.041396999999999e-38,2.088813e-35,152,cl_pre-secretory FHL2_DEGs CSRNP1_E
9,cl_pre-secretory PAEP_DEGs,ZNF493_E,12.70168,5.787402e-37,1.8190859999999999e-35,167,cl_pre-secretory PAEP_DEGs ZNF493_E


## Load TF agreement file to retrieve DE sign

In [220]:
TF_activities_agreement = read.csv('figures_primaryEpithelial_clean/cluster_markers/TFs_agreement.csv', stringsAsFactors = F, sep = '\t')
TF_activities_agreement$id = paste(TF_activities_agreement$cl, TF_activities_agreement$TF_activity_score)
TF_activities_agreement$annot = '+'
TF_activities_agreement$annot[ TF_activities_agreement$TF_expression == 'downregulated' ] = '-'
head(TF_activities_agreement)

Unnamed: 0_level_0,cl,TF,TF_expression,TF_expression_pvals_adj,TF_activity,TF_activity_pvals_adj,TF_activity_score,L1,id,annot
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<dbl>,<chr>,<dbl>,<chr>,<chr>,<chr>,<chr>
1,cl_ciliated_DEGs,FOXJ1,upregulated,0.0,active_regulon,3.5e-49,FOXJ1_E,cl_ciliated_DEGs,cl_ciliated_DEGs FOXJ1_E,+
2,cl_ciliated_DEGs,RFX2,upregulated,0.0,active_regulon,4.18e-05,RFX2_C,cl_ciliated_DEGs,cl_ciliated_DEGs RFX2_C,+
3,cl_ciliated_DEGs,HES6,upregulated,0.0,active_regulon,3.48e-11,HES6_E,cl_ciliated_DEGs,cl_ciliated_DEGs HES6_E,+
4,cl_ciliated_DEGs,ZSCAN1,upregulated,1.62e-206,active_regulon,6.09e-17,ZSCAN1_E,cl_ciliated_DEGs,cl_ciliated_DEGs ZSCAN1_E,+
5,cl_ciliated_DEGs,TCF7,upregulated,1.24e-141,inactive_regulon,0.0014,TCF7_B,cl_ciliated_DEGs,cl_ciliated_DEGs TCF7_B,+
6,cl_ciliated_DEGs,ID3,downregulated,7.079999999999999e-94,active_regulon,0.00125,ID3_C,cl_ciliated_DEGs,cl_ciliated_DEGs ID3_C,-


## Transfer labels

In [221]:
TF_activities$label = TF_activities_agreement$annot[ match(TF_activities$id, TF_activities_agreement$id) ]
TF_activities$label[ is.na(TF_activities$label) ] = ''
head(TF_activities)

Unnamed: 0_level_0,cl_name,Regulon,NES,p.value,FDR,Size,id,label
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>
2,cl_glandular_DEGs,CSRNP1_E,16.31402,7.845742000000001e-60,8.346663e-58,152,cl_glandular_DEGs CSRNP1_E,+
4,cl_ciliated_DEGs,ZNF19_E,15.88198,8.447424e-57,2.920697e-54,175,cl_ciliated_DEGs ZNF19_E,+
5,cl_ciliated_DEGs,FOXJ1_E,15.11627,1.265129e-51,3.499348e-49,149,cl_ciliated_DEGs FOXJ1_E,+
7,cl_pre-secretory PAEP_DEGs,ZNF594_E,13.45049,3.05853e-41,1.409982e-39,197,cl_pre-secretory PAEP_DEGs ZNF594_E,+
8,cl_pre-secretory FHL2_DEGs,CSRNP1_E,12.8773,6.041396999999999e-38,2.088813e-35,152,cl_pre-secretory FHL2_DEGs CSRNP1_E,+
9,cl_pre-secretory PAEP_DEGs,ZNF493_E,12.70168,5.787402e-37,1.8190859999999999e-35,167,cl_pre-secretory PAEP_DEGs ZNF493_E,+


## Plot heatmap

In [222]:
# exclude scanpy
TF_activities = TF_activities[ grep('scanpy', TF_activities$cl_name, invert = T) ,]
# build matrix of TF activity scores to plot
TF_activities$value = TF_activities$NES
x = acast(TF_activities, Regulon~cl_name, fill = 0)
# build matrix labels -  aka DE sign - to overlay with the TF activities
TF_activities$value = TF_activities$label
x_label = acast(TF_activities, Regulon~cl_name, fill = '')

In [223]:
# actually plot the heatmap
pheatmap(x, 
         display_numbers = x_label, fontsize_number=10,
         cellheight = 10, cellwidth = 10, 
         filename = 'figures_primaryEpithelial_clean/cluster_markers/TFs_heatmap.pdf')

## Filter TFs in Notch and Wnt signalling

In [224]:
Wnt = read.csv('figures_primaryEpithelial_clean/cluster_markers/TF_priors/Wnt_list', stringsAsFactors = F, sep = '\t')
Wnt$gene_symbol = sapply(strsplit(Wnt$Gene.names, ' '), head, 1)

Notch = read.csv('figures_primaryEpithelial_clean/cluster_markers/TF_priors/Notch_list', stringsAsFactors = F, sep = '\t')
Notch$gene_symbol = sapply(strsplit(Notch$Gene.names, ' '), head, 1)

In [225]:
TFs_in_x = sapply(strsplit(rownames(x), '_'), head, 1)
is_Wnt = TFs_in_x %in% Wnt$gene_symbol
is_Notch = TFs_in_x %in% Notch$gene_symbol

In [226]:
# actually plot the heatmap
pheatmap(x[is_Wnt, ], 
         display_numbers = x_label[is_Wnt, ], fontsize_number=10,
         cellheight = 10, cellwidth = 10, 
         filename = 'figures_primaryEpithelial_clean/cluster_markers/TFs_heatmap_Wnt.pdf')

# actually plot the heatmap
pheatmap(x[is_Notch, ], 
         display_numbers = x_label[is_Notch, ], fontsize_number=10,
         cellheight = 10, cellwidth = 10, 
         filename = 'figures_primaryEpithelial_clean/cluster_markers/TFs_heatmap_Notch.pdf')

# Plot top 10 active TFs

In [228]:
# Rank TFs according to their activity
TF_activities = TF_activities[order(TF_activities$NES, decreasing = T), ]
TF_activities = ddply(TF_activities, 'cl_name', function(df) data.frame(df, rank = 1:nrow(df)) )
head(TF_activities)

Unnamed: 0_level_0,cl_name,Regulon,NES,p.value,FDR,Size,id,label,value,rank
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>,<chr>,<int>
1,cl_ciliated_DEGs,ZNF19_E,15.881976,8.447424e-57,2.920697e-54,175,cl_ciliated_DEGs ZNF19_E,+,+,1
2,cl_ciliated_DEGs,FOXJ1_E,15.116268,1.265129e-51,3.499348e-49,149,cl_ciliated_DEGs FOXJ1_E,+,+,2
3,cl_ciliated_DEGs,ZSCAN1_E,8.735679,2.421973e-18,6.090162e-17,118,cl_ciliated_DEGs ZSCAN1_E,+,+,3
4,cl_ciliated_DEGs,ZNF311_E,8.074541,6.773112e-16,1.301002e-14,194,cl_ciliated_DEGs ZNF311_E,+,+,4
5,cl_ciliated_DEGs,ZNF713_E,7.498589,6.450836e-14,9.803853e-13,84,cl_ciliated_DEGs ZNF713_E,+,+,5
6,cl_ciliated_DEGs,HES6_E,6.989124,2.766088e-12,3.477727e-11,185,cl_ciliated_DEGs HES6_E,+,+,6


In [229]:
# order clusters
TF_activities = rbind(subset(TF_activities, cl_name %in% 'cl_proliferative_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_pre-secretory PAEP_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_pre-secretory FHL2_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_glandular_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_ciliated_DEGs'))

In [230]:
head(TF_activities)
TFs = read.csv('data/scenic/huTF_census.txt', stringsAsFactors = F, header = F)$V1

Unnamed: 0_level_0,cl_name,Regulon,NES,p.value,FDR,Size,id,label,value,rank
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>,<chr>,<int>
469,cl_proliferative_DEGs,ZNF749_E,10.475806,1.115827e-25,2.030511e-24,194,cl_proliferative_DEGs ZNF749_E,,,1
470,cl_proliferative_DEGs,MYC_AA,9.558607,1.193528e-21,1.774892e-20,144,cl_proliferative_DEGs MYC_AA,+,+,2
471,cl_proliferative_DEGs,HES6_E,8.848665,8.857229e-19,1.0651780000000001e-17,185,cl_proliferative_DEGs HES6_E,,,3
472,cl_proliferative_DEGs,ZFP69_E,8.840668,9.514773999999999e-19,1.1246950000000002e-17,161,cl_proliferative_DEGs ZFP69_E,+,+,4
473,cl_proliferative_DEGs,E2F4_AA,8.000039,1.243803e-15,1.178205e-14,100,cl_proliferative_DEGs E2F4_AA,+,+,5
474,cl_proliferative_DEGs,ZNF79_E,7.497223,6.518407e-14,5.564788e-13,155,cl_proliferative_DEGs ZNF79_E,,,6


In [231]:
# Select top 10 ranked genes
TF_activities$Gene = sapply(strsplit(TF_activities$Regulon, '_'), head, 1)
top10 = subset(TF_activities, rank %in% 1:10 & NES > 0)$Gene

In [232]:
# build matrix of TF activity scores to plot
TF_activities$value = TF_activities$NES
x = acast(subset(TF_activities, Gene %in% top10), Gene~cl_name, fill = 0)
colnames(x)

In [233]:
x = x[, c(5, 4, 3, 2, 1) ]
colnames(x) = c('proliferative', 'preFHL2','prePAEP', 'glandular', 'ciliated')
pheatmap(t(x[ top10, ]), cellheight = 10, cellwidth = 10, cluster_rows = F, cluster_cols = F, 
#          color = colorRampPalette(c("navy", "white", "red"))(50),
        filename = 'figures_primaryEpithelial_clean/cluster_markers/top10_TFs_activities.pdf')


# Plot top 10 active TFs - matching expression

In [234]:
default_TF_activities = TF_activities

In [235]:
TF_activities = subset(TF_activities, label != '')

In [236]:
# Rank TFs according to their activity
TF_activities = TF_activities[order(TF_activities$NES, decreasing = T), ]
TF_activities$rank = NULL
TF_activities = ddply(TF_activities, 'cl_name', function(df) data.frame(df, rank = 1:nrow(df)) )
head(TF_activities)

Unnamed: 0_level_0,cl_name,Regulon,NES,p.value,FDR,Size,id,label,value,Gene,rank
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>,<dbl>,<chr>,<int>
1,cl_ciliated_DEGs,ZNF19_E,15.881976,8.447424e-57,2.920697e-54,175,cl_ciliated_DEGs ZNF19_E,+,15.881976,ZNF19,1
2,cl_ciliated_DEGs,FOXJ1_E,15.116268,1.265129e-51,3.499348e-49,149,cl_ciliated_DEGs FOXJ1_E,+,15.116268,FOXJ1,2
3,cl_ciliated_DEGs,ZSCAN1_E,8.735679,2.421973e-18,6.090162e-17,118,cl_ciliated_DEGs ZSCAN1_E,+,8.735679,ZSCAN1,3
4,cl_ciliated_DEGs,ZNF311_E,8.074541,6.773112e-16,1.301002e-14,194,cl_ciliated_DEGs ZNF311_E,+,8.074541,ZNF311,4
5,cl_ciliated_DEGs,ZNF713_E,7.498589,6.450836e-14,9.803853e-13,84,cl_ciliated_DEGs ZNF713_E,+,7.498589,ZNF713,5
6,cl_ciliated_DEGs,HES6_E,6.989124,2.766088e-12,3.477727e-11,185,cl_ciliated_DEGs HES6_E,+,6.989124,HES6,6


In [237]:
# order clusters
TF_activities = rbind(subset(TF_activities, cl_name %in% 'cl_proliferative_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_pre-secretory PAEP_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_pre-secretory FHL2_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_glandular_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_ciliated_DEGs'))

In [238]:
# Select top 10 ranked genes
TF_activities$Gene = sapply(strsplit(TF_activities$Regulon, '_'), head, 1)
top10 = subset(TF_activities, TF_activities$rank %in% 1:10 & NES > 0)$Gene

In [239]:
# build matrix of TF activity scores to plot
TF_activities$value = TF_activities$NES
x = acast(subset(TF_activities, Gene %in% top10), Gene~cl_name, fill = 0)
colnames(x)

In [240]:
x = x[, c(5, 4, 3, 2, 1) ]
colnames(x) = c('proliferative', 'preFHL2','prePAEP', 'glandular', 'ciliated')
pheatmap(t(x[ top10, ]), cellheight = 10, cellwidth = 10, cluster_rows = F, cluster_cols = F, 
#          color = colorRampPalette(c("navy", "white", "red"))(50),
        filename = 'figures_primaryEpithelial_clean/cluster_markers/top10_TFs_activities_filtered_TFisDE.pdf')


# Plot top 10 active TFs - curated TFs - confidence A,B

In [241]:
TF_activities = default_TF_activities[ grep('_E$', default_TF_activities$Regulon, invert = T) , ]
TF_activities = TF_activities[ grep('_D$', TF_activities$Regulon, invert = T) , ]
TF_activities = TF_activities[ grep('_C$', TF_activities$Regulon, invert = T) , ]

In [242]:
# Rank TFs according to their activity
TF_activities = TF_activities[order(TF_activities$NES, decreasing = T), ]
TF_activities$rank = NULL
TF_activities = ddply(TF_activities, 'cl_name', function(df) data.frame(df, rank = 1:nrow(df)) )
head(TF_activities)

Unnamed: 0_level_0,cl_name,Regulon,NES,p.value,FDR,Size,id,label,value,Gene,rank
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>,<dbl>,<chr>,<int>
1,cl_ciliated_DEGs,PITX1_B,4.441791,8.921317e-06,4.269267e-05,11,cl_ciliated_DEGs PITX1_B,+,4.441791,PITX1,1
2,cl_ciliated_DEGs,HIC1_B,4.227835,2.359508e-05,0.0001066405,10,cl_ciliated_DEGs HIC1_B,+,4.227835,HIC1,2
3,cl_ciliated_DEGs,ZNF382_B,4.184658,2.855952e-05,0.0001278246,10,cl_ciliated_DEGs ZNF382_B,+,4.184658,ZNF382,3
4,cl_ciliated_DEGs,NFKBIA_B,3.576535,0.0003481781,0.001206843,12,cl_ciliated_DEGs NFKBIA_B,-,3.576535,NFKBIA,4
5,cl_glandular_DEGs,FOXO3_AA,5.741249,9.398083e-09,4.347006e-08,19,cl_glandular_DEGs FOXO3_AA,-,5.741249,FOXO3,1
6,cl_glandular_DEGs,HIF1A_AA,5.25366,1.491063e-07,6.174074e-07,60,cl_glandular_DEGs HIF1A_AA,+,5.25366,HIF1A,2


In [243]:
# order clusters
TF_activities = rbind(subset(TF_activities, cl_name %in% 'cl_proliferative_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_pre-secretory PAEP_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_pre-secretory FHL2_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_glandular_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_ciliated_DEGs'))

In [244]:
# Select top 10 ranked genes
TF_activities$Gene = sapply(strsplit(TF_activities$Regulon, '_'), head, 1)
top10 = subset(TF_activities, rank %in% 1:10 & NES > 0)$Gene

In [245]:
# build matrix of TF activity scores to plot
TF_activities$value = TF_activities$NES
x = acast(subset(TF_activities, Gene %in% top10), Gene~cl_name, fill = 0)
colnames(x)

In [246]:
x = x[, c(5, 4, 3, 2, 1) ]
colnames(x) = c('proliferative', 'preFHL2','prePAEP', 'glandular', 'ciliated')
pheatmap(t(x[ top10, ]), cellheight = 10, cellwidth = 10, cluster_rows = F, cluster_cols = F, 
#          color = colorRampPalette(c("navy", "white", "red"))(50),
        filename = 'figures_primaryEpithelial_clean/cluster_markers/top10_TFs_activities_filtered_curatedTFs.pdf')


# Plot top 10 active TFs - curated and DE TFs

In [247]:
TF_activities = default_TF_activities[ grep('_E$', default_TF_activities$Regulon, invert = T) , ]
TF_activities = TF_activities[ grep('_D$', TF_activities$Regulon, invert = T) , ]
TF_activities = TF_activities[ grep('_C$', TF_activities$Regulon, invert = T) , ]
TF_activities = subset(TF_activities, label != '')

In [248]:
# Rank TFs according to their activity
TF_activities = TF_activities[order(TF_activities$NES, decreasing = T), ]
TF_activities$rank = NULL
TF_activities = ddply(TF_activities, 'cl_name', function(df) data.frame(df, rank = 1:nrow(df)) )
head(TF_activities)

Unnamed: 0_level_0,cl_name,Regulon,NES,p.value,FDR,Size,id,label,value,Gene,rank
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>,<dbl>,<chr>,<int>
1,cl_ciliated_DEGs,PITX1_B,4.441791,8.921317e-06,4.269267e-05,11,cl_ciliated_DEGs PITX1_B,+,4.441791,PITX1,1
2,cl_ciliated_DEGs,HIC1_B,4.227835,2.359508e-05,0.0001066405,10,cl_ciliated_DEGs HIC1_B,+,4.227835,HIC1,2
3,cl_ciliated_DEGs,ZNF382_B,4.184658,2.855952e-05,0.0001278246,10,cl_ciliated_DEGs ZNF382_B,+,4.184658,ZNF382,3
4,cl_ciliated_DEGs,NFKBIA_B,3.576535,0.0003481781,0.001206843,12,cl_ciliated_DEGs NFKBIA_B,-,3.576535,NFKBIA,4
5,cl_glandular_DEGs,FOXO3_AA,5.741249,9.398083e-09,4.347006e-08,19,cl_glandular_DEGs FOXO3_AA,-,5.741249,FOXO3,1
6,cl_glandular_DEGs,HIF1A_AA,5.25366,1.491063e-07,6.174074e-07,60,cl_glandular_DEGs HIF1A_AA,+,5.25366,HIF1A,2


In [249]:
# order clusters
TF_activities = rbind(subset(TF_activities, cl_name %in% 'cl_proliferative_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_pre-secretory PAEP_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_pre-secretory FHL2_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_glandular_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_ciliated_DEGs'))

In [250]:
# Select top 10 ranked genes
TF_activities$Gene = sapply(strsplit(TF_activities$Regulon, '_'), head, 1)
top10 = subset(TF_activities, rank %in% 1:10 & NES > 0)$Gene

In [251]:
# build matrix of TF activity scores to plot
TF_activities$value = TF_activities$NES
x = acast(subset(TF_activities, Gene %in% top10), Gene~cl_name, fill = 0)
colnames(x)

In [252]:
x = x[, c(5, 4, 3, 2, 1) ]
colnames(x) = c('proliferative', 'preFHL2','prePAEP', 'glandular', 'ciliated')
pheatmap(t(x[ top10, ]), cellheight = 10, cellwidth = 10, cluster_rows = F, cluster_cols = F, 
#          color = colorRampPalette(c("navy", "white", "red"))(50),
        filename = 'figures_primaryEpithelial_clean/cluster_markers/top10_TFs_activities_filtered_curatedTFs_TFisDE.pdf')


# Plot top 10 DE TFs

In [253]:
TFs = read.csv('data/scenic/huTF_census.txt', stringsAsFactors = F, header = F)$V1



DE_gland = read.csv('figures_primaryEpithelial_clean/cluster_markers/glandular_DEGs.csv', stringsAsFactors = F)
DE_ciliated = read.csv('figures_primaryEpithelial_clean/cluster_markers/ciliated_DEGs.csv', stringsAsFactors = F)
DE_preFHL2 = read.csv('figures_primaryEpithelial_clean/cluster_markers/pre-secretory FHL2_DEGs.csv', stringsAsFactors = F)
DE_prePAEP = read.csv('figures_primaryEpithelial_clean/cluster_markers/pre-secretory PAEP_DEGs.csv', stringsAsFactors = F)
DE_proliferative = read.csv('figures_primaryEpithelial_clean/cluster_markers/proliferative_DEGs.csv', stringsAsFactors = F)

DE = list(proliferative=DE_proliferative,
         presecretory_FHL2=DE_preFHL2,
         presecretory_PAEP=DE_prePAEP,
         glandular=DE_gland,
         ciliated=DE_ciliated)

DE = lapply(DE, function(x){
    x = subset(x, Gene %in% TFs)
    x = x[ order(x$logFC, decreasing = T) ,]
    x$rank = 1:nrow(x)
    return(x)
})


In [254]:
DE_df = melt(DE, id.vars = names(DE[[1]]) )
head(DE_df)

Unnamed: 0_level_0,Gene,logFC,P.Value,adj.P.Val,AveExpr_cluster,AveExpr_rest,percentExpr_cluster,percentExpr_rest,rank,L1
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<chr>
1,PGR,1.1851792,0.0,0.0,1.614316,0.401176,0.972064,0.339943,1,proliferative
2,MSX1,0.8422654,7.335177e-227,1.6154889999999998e-224,2.019669,0.956652,0.988597,0.616431,2,proliferative
3,ESR1,0.6246472,1.859605e-179,2.224753e-177,1.391341,0.595358,0.958381,0.493201,3,proliferative
4,YBX1,0.6112792,6.706856e-127,4.6258220000000003e-125,2.116048,1.485907,0.995439,0.799433,4,proliferative
5,SOX17,0.6022863,3.7215240000000004e-99,1.689153e-97,1.74478,1.214902,0.976625,0.717564,5,proliferative
6,MSX2,0.5586018,6.697313e-182,8.428611e-180,0.969983,0.301522,0.851767,0.309348,6,proliferative


In [255]:
# Select top 10 ranked genes
top10 = subset(DE_df, rank %in% 1:10)$Gene #%>% unique(.)

In [256]:
# build matrix of TF activity scores to plot
DE_df$value = DE_df$logFC
x = acast(subset(DE_df, Gene %in% top10), Gene~L1, fill = 0)
pheatmap(t(x[top10, names(DE)]), cellheight = 10, cellwidth = 10, cluster_rows = F, cluster_cols = F,
        filename = 'figures_primaryEpithelial_clean/cluster_markers/top10_TFs_DE.pdf')


In [257]:
# mydf = subset(DE_df, Gene %in% top10)
# mydf$Gene = factor(mydf$Gene, levels = unique(top10))
# mydf$cluster = factor(mydf$L1, levels = rev(names(DE)))

# require(ggplot2)
#   ggplot(mydf, 
#          aes(x = Gene,
#              y = cluster)) +        ## global aes
# #   geom_tile(aes(fill = P.Value)) +         ## to get the rect filled
#   geom_point(aes(colour = logFC, 
#                    size = percentExpr_cluster))  +    ## geom_point for circle illusion
#   scale_color_gradient2(low = "blue",   mid = 'white',
#                        high = "red")+       ## color of the corresponding aes
#   scale_size(range = c(1, 4))+             ## to tune the size of circles
#   theme_bw() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
# ggsave(filename = 'figures_primaryEpithelial_clean/cluster_markers/top10_TFs_DE_dotplot.pdf', dpi = 300, width = 10, height = 4)