#  Plot TFs

In [1]:
library(dplyr)
library(reshape2)
library(plyr)
library(RColorBrewer)
library(ggplot2)
library(pheatmap)
library(cowplot)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


------------------------------------------------------------------------------

You have loaded plyr after dplyr - this is likely to cause problems.
If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
library(plyr); library(dplyr)

------------------------------------------------------------------------------


Attaching package: ‘plyr’


The following objects are masked from ‘package:dplyr’:

    arrange, count, desc, failwith, id, mutate, rename, summarise,
    summarize



********************************************************

Note: As of version 1.0.0, cowplot does not change the

  default ggplot2 theme anymore. To recover the previous

  behavior, execute:
  theme_set(theme_cowplot())

********************************************************




## Load TF activities/enrichemnt file

In [2]:
TF_activities = read.csv('figures_both_organoid/cluster_markers/TFs_activities.csv', stringsAsFactors = F)
TF_activities = TF_activities[order(TF_activities$NES, decreasing = T), ]
TF_activities$id = paste(TF_activities$cl_name, TF_activities$Regulon)

head(TF_activities)

Unnamed: 0_level_0,cl_name,Regulon,NES,p.value,FDR,Size,id
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<int>,<chr>
1,cl_Inter_PGRpos_DEGs,CSRNP1_E,18.08183,4.431211e-73,4.383734e-71,154,cl_Inter_PGRpos_DEGs CSRNP1_E
2,cl_Ciliated_G2M_DEGs,ZNF19_E,15.42087,1.184992e-53,4.103034e-51,175,cl_Ciliated_G2M_DEGs ZNF19_E
3,cl_Ciliated_DEGs,FOXJ1_E,15.33469,4.484099e-53,1.940774e-51,154,cl_Ciliated_DEGs FOXJ1_E
4,cl_KRT5_KRT13_scanpy_DEGs,ZBED2_E,15.09429,1.7658829999999998e-51,2.650787e-49,183,cl_KRT5_KRT13_scanpy_DEGs ZBED2_E
7,cl_proliferative_DEGs,ZNF749_E,14.86131,5.876604e-50,1.45341e-48,195,cl_proliferative_DEGs ZNF749_E
9,cl_Inter_PGRpos_DEGs,ZNF749_E,14.61859,2.1379059999999998e-48,1.0575e-46,195,cl_Inter_PGRpos_DEGs ZNF749_E


## Load TF agreement file to retrieve DE sign

In [3]:
TF_activities_agreement = read.csv('figures_both_organoid/cluster_markers/TFactivities_and_expression_agreement.csv', stringsAsFactors = F, sep = '\t')
TF_activities_agreement$id = paste(TF_activities_agreement$cl, TF_activities_agreement$TF_activity_score)
TF_activities_agreement$annot = '+'
TF_activities_agreement$annot[ TF_activities_agreement$TF_expression == 'downregulated' ] = '-'
head(TF_activities_agreement)

Unnamed: 0_level_0,cl,TF,TF_expression,TF_expression_pvals_adj,TF_activity,TF_activity_pvals_adj,TF_activity_score,L1,id,annot
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<dbl>,<chr>,<dbl>,<chr>,<chr>,<chr>,<chr>
1,cl_Ciliated_DEGs,FOXJ1,upregulated,0.0,active_regulon,1.94e-51,FOXJ1_E,cl_Ciliated_DEGs,cl_Ciliated_DEGs FOXJ1_E,+
2,cl_Ciliated_DEGs,TP73,upregulated,0.0,active_regulon,0.00148,TP73_C,cl_Ciliated_DEGs,cl_Ciliated_DEGs TP73_C,+
3,cl_Ciliated_DEGs,MYC,downregulated,4.0699999999999995e-180,inactive_regulon,5.660000000000001e-27,MYC_AA,cl_Ciliated_DEGs,cl_Ciliated_DEGs MYC_AA,-
4,cl_Ciliated_DEGs,ETV4,downregulated,7.93e-149,inactive_regulon,0.0447,ETV4_A,cl_Ciliated_DEGs,cl_Ciliated_DEGs ETV4_A,-
5,cl_Ciliated_DEGs,FOXO3,upregulated,2.9099999999999997e-141,active_regulon,0.00352,FOXO3_AA,cl_Ciliated_DEGs,cl_Ciliated_DEGs FOXO3_AA,+
6,cl_Ciliated_DEGs,ID1,downregulated,1.48e-118,active_regulon,1.58e-06,ID1_E,cl_Ciliated_DEGs,cl_Ciliated_DEGs ID1_E,-


## Transfer labels

In [4]:
TF_activities$label = TF_activities_agreement$annot[ match(TF_activities$id, TF_activities_agreement$id) ]
TF_activities$label[ is.na(TF_activities$label) ] = ''
head(TF_activities)

Unnamed: 0_level_0,cl_name,Regulon,NES,p.value,FDR,Size,id,label
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>
1,cl_Inter_PGRpos_DEGs,CSRNP1_E,18.08183,4.431211e-73,4.383734e-71,154,cl_Inter_PGRpos_DEGs CSRNP1_E,+
2,cl_Ciliated_G2M_DEGs,ZNF19_E,15.42087,1.184992e-53,4.103034e-51,175,cl_Ciliated_G2M_DEGs ZNF19_E,+
3,cl_Ciliated_DEGs,FOXJ1_E,15.33469,4.484099e-53,1.940774e-51,154,cl_Ciliated_DEGs FOXJ1_E,+
4,cl_KRT5_KRT13_scanpy_DEGs,ZBED2_E,15.09429,1.7658829999999998e-51,2.650787e-49,183,cl_KRT5_KRT13_scanpy_DEGs ZBED2_E,+
7,cl_proliferative_DEGs,ZNF749_E,14.86131,5.876604e-50,1.45341e-48,195,cl_proliferative_DEGs ZNF749_E,-
9,cl_Inter_PGRpos_DEGs,ZNF749_E,14.61859,2.1379059999999998e-48,1.0575e-46,195,cl_Inter_PGRpos_DEGs ZNF749_E,


## Plot heatmap

In [5]:
# exclude scanpy
TF_activities = TF_activities[ grep('scanpy', TF_activities$cl_name, invert = T) ,]
# build matrix of TF activity scores to plot
TF_activities$value = TF_activities$NES
x = acast(TF_activities, Regulon~cl_name, fill = 0)
# build matrix labels -  aka DE sign - to overlay with the TF activities
TF_activities$value = TF_activities$label
x_label = acast(TF_activities, Regulon~cl_name, fill = '')

In [6]:
# actually plot the heatmap
pheatmap(x, 
         display_numbers = x_label, fontsize_number=10,
         cellheight = 10, cellwidth = 10, 
         filename = 'figures_both_organoid/cluster_markers/TFs_heatmap.pdf')

## Filter TFs in Notch and Wnt signalling

In [7]:
Wnt = read.csv('figures_primaryEpithelial_clean/cluster_markers/TF_priors/Wnt_list', stringsAsFactors = F, sep = '\t')
Wnt$gene_symbol = sapply(strsplit(Wnt$Gene.names, ' '), head, 1)

Notch = read.csv('figures_primaryEpithelial_clean/cluster_markers/TF_priors/Notch_list', stringsAsFactors = F, sep = '\t')
Notch$gene_symbol = sapply(strsplit(Notch$Gene.names, ' '), head, 1)

In [8]:
TFs_in_x = sapply(strsplit(rownames(x), '_'), head, 1)
is_Wnt = TFs_in_x %in% Wnt$gene_symbol
is_Notch = TFs_in_x %in% Notch$gene_symbol
table(is_Wnt)
table(is_Notch)

is_Wnt
FALSE  TRUE 
  506     8 

is_Notch
FALSE  TRUE 
  513     1 

In [9]:
# actually plot the heatmap
pheatmap(x[is_Wnt, ], 
         display_numbers = x_label[is_Wnt, ], fontsize_number=10,
         cellheight = 10, cellwidth = 10, 
         filename = 'figures_both_organoid/cluster_markers/TFs_heatmap_Wnt.pdf')

# # actually plot the heatmap
# pheatmap(x[is_Notch, ], 
#          display_numbers = x_label[is_Notch, ], fontsize_number=10,
#          cellheight = 10, cellwidth = 10, 
#          filename = 'figures_both_organoid/cluster_markers/TFs_heatmap_Notch.pdf')

# Plot top 10 active TFs

In [10]:
# Rank TFs according to their activity
TF_activities = TF_activities[order(TF_activities$NES, decreasing = T), ]
TF_activities = ddply(TF_activities, 'cl_name', function(df) data.frame(df, rank = 1:nrow(df)) )
head(TF_activities)
unique(TF_activities$cl_name)

Unnamed: 0_level_0,cl_name,Regulon,NES,p.value,FDR,Size,id,label,value,rank
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>,<chr>,<int>
1,cl_Ciliated_DEGs,FOXJ1_E,15.33469,4.484099e-53,1.940774e-51,154,cl_Ciliated_DEGs FOXJ1_E,+,+,1
2,cl_Ciliated_DEGs,ZNF19_E,13.06571,5.169969e-39,1.704859e-37,175,cl_Ciliated_DEGs ZNF19_E,+,+,2
3,cl_Ciliated_DEGs,ZSCAN1_E,7.106026,1.194321e-12,1.355848e-11,122,cl_Ciliated_DEGs ZSCAN1_E,,,3
4,cl_Ciliated_DEGs,ZNF160_E,6.297482,3.025193e-10,2.869789e-09,196,cl_Ciliated_DEGs ZNF160_E,+,+,4
5,cl_Ciliated_DEGs,ZNF93_E,6.076328,1.229661e-09,1.084764e-08,56,cl_Ciliated_DEGs ZNF93_E,-,-,5
6,cl_Ciliated_DEGs,ZNF713_E,5.651316,1.592244e-08,1.252988e-07,83,cl_Ciliated_DEGs ZNF713_E,+,+,6


In [11]:
# order clusters
TF_activities = rbind(subset(TF_activities, cl_name %in% 'cl_proliferative_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_KRT5_LRG6_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_Inter_PGRneg_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_KRT5_KRT13_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_Inter_PGRpos_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_Secretory_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_Ciliated_G2M_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_Ciliated_DEGs'))

In [12]:
head(TF_activities)
TFs = read.csv('data/scenic/huTF_census.txt', stringsAsFactors = F, header = F)$V1

Unnamed: 0_level_0,cl_name,Regulon,NES,p.value,FDR,Size,id,label,value,rank
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>,<chr>,<int>
1167,cl_proliferative_DEGs,ZNF749_E,14.86131,5.876604e-50,1.45341e-48,195,cl_proliferative_DEGs ZNF749_E,-,-,1
1168,cl_proliferative_DEGs,E2F4_AA,12.80696,1.499014e-37,2.8055859999999997e-36,100,cl_proliferative_DEGs E2F4_AA,+,+,2
1169,cl_proliferative_DEGs,HES6_E,12.40955,2.319423e-35,3.7792949999999997e-34,184,cl_proliferative_DEGs HES6_E,+,+,3
1170,cl_proliferative_DEGs,ZNF557_E,11.25382,2.217834e-29,2.982233e-28,182,cl_proliferative_DEGs ZNF557_E,+,+,4
1171,cl_proliferative_DEGs,ZNF79_E,11.13473,8.500548e-29,1.1003050000000001e-27,155,cl_proliferative_DEGs ZNF79_E,,,5
1172,cl_proliferative_DEGs,ZFP69_E,10.96599,5.5685860000000005e-28,7.011356000000001e-27,161,cl_proliferative_DEGs ZFP69_E,+,+,6


In [13]:
# Select top 10 ranked genes
TF_activities$Gene = sapply(strsplit(TF_activities$Regulon, '_'), head, 1)
top5 = subset(TF_activities, rank %in% 1:5 & NES > 0)$Gene

In [14]:
# build matrix of TF activity scores to plot
TF_activities$value = TF_activities$NES
x = acast(subset(TF_activities, Gene %in% top5), Gene~cl_name, fill = 0)
colnames(x)

In [15]:
x = x[, c(7, 6, 3, 5, 4, 8, 2, 1) ]
pheatmap(t(x[ top5, ]), cellheight = 10, cellwidth = 10, cluster_rows = F, cluster_cols = F, 
#          color = colorRampPalette(c("navy", "white", "red"))(50),
        filename = 'figures_both_organoid/cluster_markers/top5_TFs_activities.pdf')


# Plot top 10 active TFs - matching expression

In [16]:
default_TF_activities = TF_activities

In [17]:
TF_activities = subset(TF_activities, label != '')

In [18]:
# Rank TFs according to their activity
TF_activities = TF_activities[order(TF_activities$NES, decreasing = T), ]
TF_activities$rank = NULL
TF_activities = ddply(TF_activities, 'cl_name', function(df) data.frame(df, rank = 1:nrow(df)) )
head(TF_activities)

Unnamed: 0_level_0,cl_name,Regulon,NES,p.value,FDR,Size,id,label,value,Gene,rank
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>,<dbl>,<chr>,<int>
1,cl_Ciliated_DEGs,FOXJ1_E,15.33469,4.484099e-53,1.940774e-51,154,cl_Ciliated_DEGs FOXJ1_E,+,15.33469,FOXJ1,1
2,cl_Ciliated_DEGs,ZNF19_E,13.06571,5.169969e-39,1.704859e-37,175,cl_Ciliated_DEGs ZNF19_E,+,13.06571,ZNF19,2
3,cl_Ciliated_DEGs,ZNF160_E,6.297482,3.025193e-10,2.869789e-09,196,cl_Ciliated_DEGs ZNF160_E,+,6.297482,ZNF160,3
4,cl_Ciliated_DEGs,ZNF93_E,6.076328,1.229661e-09,1.084764e-08,56,cl_Ciliated_DEGs ZNF93_E,-,6.076328,ZNF93,4
5,cl_Ciliated_DEGs,ZNF713_E,5.651316,1.592244e-08,1.252988e-07,83,cl_Ciliated_DEGs ZNF713_E,+,5.651316,ZNF713,5
6,cl_Ciliated_DEGs,RFX1_C,5.450041,5.035832e-08,3.770069e-07,42,cl_Ciliated_DEGs RFX1_C,-,5.450041,RFX1,6


In [19]:
# order clusters
TF_activities = rbind(subset(TF_activities, cl_name %in% 'cl_proliferative_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_KRT5_LRG6_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_Inter_PGRneg_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_KRT5_KRT13_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_Inter_PGRpos_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_Secretory_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_Ciliated_G2M_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_Ciliated_DEGs'))

In [20]:
# Select top 10 ranked genes
TF_activities$Gene = sapply(strsplit(TF_activities$Regulon, '_'), head, 1)
top5 = subset(TF_activities, TF_activities$rank %in% 1:5 & NES > 0)$Gene

In [21]:
# build matrix of TF activity scores to plot
TF_activities$value = TF_activities$NES
x = acast(subset(TF_activities, Gene %in% top5), Gene~cl_name, fill = 0)
colnames(x)

In [22]:
x = x[, c(7, 6, 3, 5, 4, 8, 2, 1) ]
pheatmap(t(x[ top5, ]), cellheight = 10, cellwidth = 10, cluster_rows = F, cluster_cols = F, 
#          color = colorRampPalette(c("navy", "white", "red"))(50),
        filename = 'figures_both_organoid/cluster_markers/top5_TFs_activities_filtered_TFisDE.pdf')


# Plot top 10 active TFs - curated TFs - confidence A,B

In [23]:
TF_activities = default_TF_activities[ grep('_E$', default_TF_activities$Regulon, invert = T) , ]
TF_activities = TF_activities[ grep('_D$', TF_activities$Regulon, invert = T) , ]
TF_activities = TF_activities[ grep('_C$', TF_activities$Regulon, invert = T) , ]

In [24]:
# Rank TFs according to their activity
TF_activities = TF_activities[order(TF_activities$NES, decreasing = T), ]
TF_activities$rank = NULL
TF_activities = ddply(TF_activities, 'cl_name', function(df) data.frame(df, rank = 1:nrow(df)) )
head(TF_activities)

Unnamed: 0_level_0,cl_name,Regulon,NES,p.value,FDR,Size,id,label,value,Gene,rank
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>,<dbl>,<chr>,<int>
1,cl_Ciliated_DEGs,STAT2_B,4.231719,2.319121e-05,0.0001225948,44,cl_Ciliated_DEGs STAT2_B,+,4.231719,STAT2,1
2,cl_Ciliated_DEGs,ATF4_AA,3.326269,0.0008801687,0.0035029704,9,cl_Ciliated_DEGs ATF4_AA,-,3.326269,ATF4,2
3,cl_Ciliated_DEGs,FOXO3_AA,3.32429,0.0008864381,0.0035178133,17,cl_Ciliated_DEGs FOXO3_AA,+,3.32429,FOXO3,3
4,cl_Ciliated_DEGs,NFE2L2_A,3.21799,0.001290924,0.004842146,12,cl_Ciliated_DEGs NFE2L2_A,+,3.21799,NFE2L2,4
5,cl_Ciliated_DEGs,ZNF382_B,3.197719,0.001385194,0.0051572397,10,cl_Ciliated_DEGs ZNF382_B,+,3.197719,ZNF382,5
6,cl_Ciliated_DEGs,FOXO4_A,3.062332,0.002196194,0.007779356,12,cl_Ciliated_DEGs FOXO4_A,+,3.062332,FOXO4,6


In [25]:
# order clusters
TF_activities = rbind(subset(TF_activities, cl_name %in% 'cl_proliferative_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_KRT5_LRG6_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_Inter_PGRneg_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_KRT5_KRT13_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_Inter_PGRpos_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_Secretory_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_Ciliated_G2M_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_Ciliated_DEGs'))

In [26]:
# Select top 10 ranked genes
TF_activities$Gene = sapply(strsplit(TF_activities$Regulon, '_'), head, 1)
top5 = subset(TF_activities, rank %in% 1:5 & NES > 0)$Gene

In [27]:
# build matrix of TF activity scores to plot
TF_activities$value = TF_activities$NES
x = acast(subset(TF_activities, Gene %in% top5), Gene~cl_name, fill = 0)
colnames(x)

In [28]:
x = x[, c(7, 6, 3, 5, 4, 8, 2, 1) ]
pheatmap(t(x[ top5, ]), cellheight = 10, cellwidth = 10, cluster_rows = F, cluster_cols = F, 
#          color = colorRampPalette(c("navy", "white", "red"))(50),
        filename = 'figures_both_organoid/cluster_markers/top5_TFs_activities_filtered_curatedTFs.pdf')


# Plot top 10 active TFs - curated and DE TFs

In [29]:
TF_activities = default_TF_activities[ grep('_E$', default_TF_activities$Regulon, invert = T) , ]
TF_activities = TF_activities[ grep('_D$', TF_activities$Regulon, invert = T) , ]
TF_activities = TF_activities[ grep('_C$', TF_activities$Regulon, invert = T) , ]
TF_activities = subset(TF_activities, label != '')

In [30]:
# Rank TFs according to their activity
TF_activities = TF_activities[order(TF_activities$NES, decreasing = T), ]
TF_activities$rank = NULL
TF_activities = ddply(TF_activities, 'cl_name', function(df) data.frame(df, rank = 1:nrow(df)) )
head(TF_activities)

Unnamed: 0_level_0,cl_name,Regulon,NES,p.value,FDR,Size,id,label,value,Gene,rank
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>,<dbl>,<chr>,<int>
1,cl_Ciliated_DEGs,STAT2_B,4.231719,2.319121e-05,0.0001225948,44,cl_Ciliated_DEGs STAT2_B,+,4.231719,STAT2,1
2,cl_Ciliated_DEGs,ATF4_AA,3.326269,0.0008801687,0.0035029704,9,cl_Ciliated_DEGs ATF4_AA,-,3.326269,ATF4,2
3,cl_Ciliated_DEGs,FOXO3_AA,3.32429,0.0008864381,0.0035178133,17,cl_Ciliated_DEGs FOXO3_AA,+,3.32429,FOXO3,3
4,cl_Ciliated_DEGs,NFE2L2_A,3.21799,0.001290924,0.004842146,12,cl_Ciliated_DEGs NFE2L2_A,+,3.21799,NFE2L2,4
5,cl_Ciliated_DEGs,ZNF382_B,3.197719,0.001385194,0.0051572397,10,cl_Ciliated_DEGs ZNF382_B,+,3.197719,ZNF382,5
6,cl_Ciliated_DEGs,FOXO4_A,3.062332,0.002196194,0.007779356,12,cl_Ciliated_DEGs FOXO4_A,+,3.062332,FOXO4,6


In [31]:
# order clusters
TF_activities = rbind(subset(TF_activities, cl_name %in% 'cl_proliferative_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_KRT5_LRG6_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_Inter_PGRneg_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_KRT5_KRT13_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_Inter_PGRpos_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_Secretory_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_Ciliated_G2M_DEGs'),
                     subset(TF_activities, cl_name %in% 'cl_Ciliated_DEGs'))

In [32]:
# Select top 10 ranked genes
TF_activities$Gene = sapply(strsplit(TF_activities$Regulon, '_'), head, 1)
top5 = subset(TF_activities, rank %in% 1:5 & NES > 0)$Gene

In [33]:
# build matrix of TF activity scores to plot
TF_activities$value = TF_activities$NES
x = acast(subset(TF_activities, Gene %in% top5), Gene~cl_name, fill = 0)
colnames(x)

In [34]:
x = x[, c(7, 6, 3, 5, 4, 8, 2, 1) ]
pheatmap(t(x[ top5, ]), cellheight = 10, cellwidth = 10, cluster_rows = F, cluster_cols = F, 
#          color = colorRampPalette(c("navy", "white", "red"))(50),
        filename = 'figures_both_organoid/cluster_markers/top5_TFs_activities_filtered_curatedTFs_TFisDE.pdf')


# Plot top 10 DE TFs

In [35]:
TFs = read.csv('data/scenic/huTF_census.txt', stringsAsFactors = F, header = F)$V1



DE_proliferative = read.csv('figures_both_organoid/cluster_markers/proliferative_DEGs.csv', stringsAsFactors = F)
DE_KRT5_LRG6 = read.csv('figures_both_organoid/cluster_markers/KRT5_LRG6_DEGs.csv', stringsAsFactors = F)
DE_Inter_PGRneg = read.csv('figures_both_organoid/cluster_markers/Inter_PGRneg_DEGs.csv', stringsAsFactors = F)
DE_KRT5_KRT13 = read.csv('figures_both_organoid/cluster_markers/KRT5_KRT13_DEGs.csv', stringsAsFactors = F)
DE_Inter_PGRpos = read.csv('figures_both_organoid/cluster_markers/Inter_PGRpos_DEGs.csv', stringsAsFactors = F)
DE_Secretory = read.csv('figures_both_organoid/cluster_markers/Secretory_DEGs.csv', stringsAsFactors = F)
DE_Ciliated_G2M = read.csv('figures_both_organoid/cluster_markers/Ciliated_G2M_DEGs.csv', stringsAsFactors = F)
DE_Ciliated = read.csv('figures_both_organoid/cluster_markers/Ciliated_DEGs.csv', stringsAsFactors = F)

DE = list(proliferative=DE_proliferative,
         KRT5_LRG6=DE_KRT5_LRG6,
         Inter_PGRneg=DE_Inter_PGRneg,
         KRT5_KRT13=DE_KRT5_KRT13,
         Inter_PGRpos=DE_Inter_PGRpos,
         Secretory=DE_Secretory,
         Ciliated_G2M=DE_Ciliated_G2M,
         Ciliated=DE_Ciliated)

DE = lapply(DE, function(x){
    x = subset(x, Gene %in% TFs)
    x = x[ order(x$logFC, decreasing = T) ,]
    x$rank = 1:nrow(x)
    return(x)
})


In [36]:
DE_df = melt(DE, id.vars = names(DE[[1]]) )
head(DE_df)

Unnamed: 0_level_0,Gene,logFC,P.Value,adj.P.Val,AveExpr_cluster,AveExpr_rest,percentExpr_cluster,percentExpr_rest,rank,L1
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<chr>
1,MYBL2,0.5551049,0,0,0.692711,0.136919,0.6714,0.14705,1,proliferative
2,DNMT1,0.4785667,0,0,1.103327,0.623507,0.8858,0.574062,2,proliferative
3,SSRP1,0.3691107,0,0,1.14312,0.778406,0.9082,0.679517,3,proliferative
4,TFDP1,0.3541961,0,0,0.868315,0.511302,0.7928,0.504296,4,proliferative
5,FOXM1,0.3504815,0,0,0.438449,0.088996,0.479,0.106243,5,proliferative
6,LYAR,0.3464958,0,0,0.759308,0.40988,0.7214,0.412836,6,proliferative


In [37]:
# Select top 10 ranked genes
top5 = subset(DE_df, rank %in% 1:5)$Gene #%>% unique(.)

In [38]:
# build matrix of TF activity scores to plot
DE_df$value = DE_df$logFC
x = acast(subset(DE_df, Gene %in% top5), Gene~L1, fill = 0)
pheatmap(t(x[top5, names(DE)]), cellheight = 10, cellwidth = 10, cluster_rows = F, cluster_cols = F,
        filename = 'figures_both_organoid/cluster_markers/top5_TFs_DE.pdf')


In [39]:
# mydf = subset(DE_df, Gene %in% top5)
# mydf$Gene = factor(mydf$Gene, levels = unique(top5))
# mydf$cluster = factor(mydf$L1, levels = rev(names(DE)))

# require(ggplot2)
#   ggplot(mydf, 
#          aes(x = Gene,
#              y = cluster)) +        ## global aes
# #   geom_tile(aes(fill = P.Value)) +         ## to get the rect filled
#   geom_point(aes(colour = logFC, 
#                    size = percentExpr_cluster))  +    ## geom_point for circle illusion
#   scale_color_gradient2(low = "blue",   mid = 'white',
#                        high = "red")+       ## color of the corresponding aes
#   scale_size(range = c(1, 4))+             ## to tune the size of circles
#   theme_bw() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
# ggsave(filename = 'figures_both_organoid/cluster_markers/top5_TFs_DE_dotplot.pdf', dpi = 300, width = 10, height = 4)