#  Plot TFs

In [54]:
library(dplyr)
library(reshape2)
library(plyr)
library(RColorBrewer)
library(ggplot2)
library(pheatmap)
library(cowplot)

## Load TF activities/enrichemnt file

In [55]:
TF_activities = read.csv('data/spatial/DEGs/TFs_activities.csv', stringsAsFactors = F)


TF_activities = TF_activities[order(TF_activities$NES, decreasing = T), ]
TF_activities$id = paste(TF_activities$cl_name, TF_activities$Regulon)
head(TF_activities)

Unnamed: 0_level_0,cl_name,Regulon,NES,p.value,FDR,Size,id
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<int>,<chr>
3,cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423,SP6_E,6.977937,2.995461e-12,2.878971e-11,139,cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423 SP6_E
5,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423,SMAD7_B,6.608119,3.892325e-11,5.28135e-10,14,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423 SMAD7_B
6,cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423,KDM5A_D,6.224759,4.822969e-10,3.667577e-09,185,cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423 KDM5A_D
8,cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423,IKZF2_E,5.859704,4.63694e-09,3.161342e-08,190,cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423 IKZF2_E
10,cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423,THAP1_C,5.82867,5.587086e-09,3.753654e-08,53,cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423 THAP1_C
11,cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423,ELF1_C,5.782228,7.371767e-09,4.90506e-08,56,cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423 ELF1_C


Intersect with any TF relevant in epithelial from the scRNA

In [56]:
# TF_activities_scRNA = read.csv('data/cellphoneDB/clusterDEGs_and_TFs/DEGs_withinEpithelial/TFs_activities.csv', stringsAsFactors = F)
# TF_activities = subset(TF_activities, Regulon %in% subset(TF_activities_scRNA, FDR < 0.05 & NES > 2)$Regulon )

## Load TF agreement file to retrieve DE sign

In [57]:
TF_activities_agreement = read.csv('data/spatial/DEGs/TFactivities_and_expression_agreement.csv', stringsAsFactors = F, sep = '\t')
TF_activities_agreement$id = paste(TF_activities_agreement$cl, TF_activities_agreement$TF_activity_score)
TF_activities_agreement$annot = '+'
TF_activities_agreement$annot[ TF_activities_agreement$TF_expression == 'downregulated' ] = '-'
head(TF_activities_agreement)

Unnamed: 0_level_0,cl,TF,TF_expression,TF_expression_pvals_adj,TF_activity,TF_activity_pvals_adj,TF_activity_score,L1,id,annot
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<dbl>,<chr>,<dbl>,<chr>,<chr>,<chr>,<chr>
1,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423,KLF5,downregulated,1.3e-07,inactive_regulon,0.00029,KLF5_B,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423 KLF5_B,-
2,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423,EPAS1,upregulated,0.0191,inactive_regulon,5.04e-07,EPAS1_B,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423 EPAS1_B,+
3,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423,SNAPC4,downregulated,0.0441,inactive_regulon,0.00599,SNAPC4_C,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423 SNAPC4_C,-
4,cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423,KLF6,upregulated,6.07e-20,active_regulon,0.0263,KLF6_B,cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423,cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423 KLF6_B,+
5,cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423,ID1,upregulated,7.36e-10,inactive_regulon,0.00332,ID1_E,cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423,cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423 ID1_E,+
6,cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423,POU5F1,upregulated,7.94e-08,inactive_regulon,0.00631,POU5F1_A,cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423,cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423 POU5F1_A,+


## Transfer labels

In [58]:
TF_activities$label = TF_activities_agreement$annot[ match(TF_activities$id, TF_activities_agreement$id) ]
TF_activities$label[ is.na(TF_activities$label) ] = ''
head(TF_activities)

Unnamed: 0_level_0,cl_name,Regulon,NES,p.value,FDR,Size,id,label
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>
3,cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423,SP6_E,6.977937,2.995461e-12,2.878971e-11,139,cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423 SP6_E,
5,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423,SMAD7_B,6.608119,3.892325e-11,5.28135e-10,14,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423 SMAD7_B,
6,cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423,KDM5A_D,6.224759,4.822969e-10,3.667577e-09,185,cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423 KDM5A_D,
8,cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423,IKZF2_E,5.859704,4.63694e-09,3.161342e-08,190,cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423 IKZF2_E,
10,cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423,THAP1_C,5.82867,5.587086e-09,3.753654e-08,53,cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423 THAP1_C,
11,cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423,ELF1_C,5.782228,7.371767e-09,4.90506e-08,56,cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423 ELF1_C,


## Plot heatmap - all TFs

In [59]:
# exclude scanpy
TF_activities = TF_activities[ grep('scanpy', TF_activities$cl_name, invert = T) ,]
# build matrix of TF activity scores to plot
TF_activities$value = TF_activities$NES
x = acast(TF_activities, Regulon~cl_name, fill = 0)
# build matrix labels -  aka DE sign - to overlay with the TF activities
TF_activities$value = TF_activities$label
x_label = acast(TF_activities, Regulon~cl_name, fill = '')

In [60]:
# # actually plot the heatmap
# pheatmap(x, 
#          display_numbers = x_label, fontsize_number=10,
#          cellheight = 10, cellwidth = 10, 
#          filename = 'data/spatial/DEGs/TFs_heatmap.pdf')

## Filter TFs in Notch and Wnt signalling

In [61]:
Wnt = read.csv('figures_primaryEpithelial_clean/cluster_markers/TF_priors/Wnt_list', stringsAsFactors = F, sep = '\t')
Wnt$gene_symbol = sapply(strsplit(Wnt$Gene.names, ' '), head, 1)

Notch = read.csv('figures_primaryEpithelial_clean/cluster_markers/TF_priors/Notch_list', stringsAsFactors = F, sep = '\t')
Notch$gene_symbol = sapply(strsplit(Notch$Gene.names, ' '), head, 1)

In [62]:
TFs_in_x = sapply(strsplit(rownames(x), '_'), head, 1)
is_Wnt = TFs_in_x %in% Wnt$gene_symbol
is_Notch = TFs_in_x %in% Notch$gene_symbol

In [63]:
table(is_Notch)
table(is_Wnt)

is_Notch
FALSE  TRUE 
  185     2 

is_Wnt
FALSE 
  187 

In [64]:
# # actually plot the heatmap
# pheatmap(x[is_Wnt, ], 
#          display_numbers = x_label[is_Wnt, ], fontsize_number=10,
#          cellheight = 10, cellwidth = 10, 
#          filename = 'data/spatial/DEGs/TFs_heatmap_Wnt.pdf')

# # actually plot the heatmap
# pheatmap(x[is_Notch, ], 
#          display_numbers = x_label[is_Notch, ], fontsize_number=10,
#          cellheight = 10, cellwidth = 10, 
#          filename = 'data/spatial/DEGs/TFs_heatmap_Notch.pdf')

# Plot selected active TFs

In [65]:
# Rank TFs according to their activity
TF_activities = TF_activities[order(TF_activities$NES, decreasing = T), ]
TF_activities = ddply(TF_activities, 'cl_name', function(df) data.frame(df, rank = 1:nrow(df)) )
head(TF_activities)
unique(TF_activities$cl_name)

Unnamed: 0_level_0,cl_name,Regulon,NES,p.value,FDR,Size,id,label,value,rank
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>,<chr>,<int>
1,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423,SMAD7_B,6.608119,3.892325e-11,5.28135e-10,14,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423 SMAD7_B,,,1
2,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423,ZBTB7A_C,5.427291,5.721575e-08,5.209644e-07,90,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423 ZBTB7A_C,,,2
3,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423,MKX_E,4.783607,1.721773e-06,1.23468e-05,189,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423 MKX_E,,,3
4,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423,MNT_C,4.624174,3.760938e-06,2.602569e-05,56,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423 MNT_C,,,4
5,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423,THAP11_C,4.229917,2.337772e-05,0.0001412872,114,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423 THAP11_C,,,5
6,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423,RFX5_AA,4.203369,2.629717e-05,0.0001575553,11,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423 RFX5_AA,,,6


In [66]:
# order clusters
TF_activities = rbind(subset(TF_activities, cl_name %in% 'cl_epi_luminal_vs_epi_rest_limma_DEGs_20200423'),
                      subset(TF_activities, cl_name %in% 'cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423'),
                     subset(TF_activities, cl_name %in% 'cl_epi_basal_vs_epi_rest_limma_DEGs_20200423'))

In [67]:
head(TF_activities)
TFs = read.csv('data/scenic/huTF_census.txt', stringsAsFactors = F, header = F)$V1

Unnamed: 0_level_0,cl_name,Regulon,NES,p.value,FDR,Size,id,label,value,rank
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>,<chr>,<int>
208,cl_epi_luminal_vs_epi_rest_limma_DEGs_20200423,RFX8_E,5.402977,6.554399e-08,9.754073e-07,173,cl_epi_luminal_vs_epi_rest_limma_DEGs_20200423 RFX8_E,,,1
209,cl_epi_luminal_vs_epi_rest_limma_DEGs_20200423,ZSCAN1_E,4.955873,7.200627e-07,8.897918e-06,119,cl_epi_luminal_vs_epi_rest_limma_DEGs_20200423 ZSCAN1_E,,,2
210,cl_epi_luminal_vs_epi_rest_limma_DEGs_20200423,HOXC11_D,4.620891,3.820947e-06,4.036787e-05,8,cl_epi_luminal_vs_epi_rest_limma_DEGs_20200423 HOXC11_D,,,3
211,cl_epi_luminal_vs_epi_rest_limma_DEGs_20200423,HOXC4_E,4.146294,3.378998e-05,0.0002710421,68,cl_epi_luminal_vs_epi_rest_limma_DEGs_20200423 HOXC4_E,,,4
212,cl_epi_luminal_vs_epi_rest_limma_DEGs_20200423,GLI1_B,4.10561,4.032498e-05,0.000317101,55,cl_epi_luminal_vs_epi_rest_limma_DEGs_20200423 GLI1_B,,,5
213,cl_epi_luminal_vs_epi_rest_limma_DEGs_20200423,FOXF1_E,3.789237,0.000151111,0.001089258,192,cl_epi_luminal_vs_epi_rest_limma_DEGs_20200423 FOXF1_E,,,6


In [68]:
TF_activities$Gene = sapply(strsplit(TF_activities$Regulon, '_'), head, 1)

In [73]:
'CEBPD' %in% TF_activities$Gene

In [69]:
# build matrix of TF activity scores to plot
TF_activities$value = TF_activities$NES
x = acast(TF_activities, Gene~cl_name, fill = 0)
colnames(x)

In [70]:
x = x[, c(3, 2, 1) ]
colnames(x) = c('luminal',  'glandular', 'basal')


In [74]:
pheatmap(t(x[ c('GLI1', 'FOXJ1', 'HIF1A', 'CSRNP1', 'FOXO3', 'HEY1', 'ELF1') , ]), cellheight = 10, cellwidth = 10, cluster_rows = F, cluster_cols = F,
         color = colorRampPalette(c(brewer.pal(n = 5, name = 'Blues')[4], "white", brewer.pal(n = 5, name = 'Reds')[4]))(50),
        filename = 'data/spatial/DEGs/selected_TFs_activities.pdf')

# Plot top 10 active TFs

In [34]:
# Rank TFs according to their activity
TF_activities = TF_activities[order(TF_activities$NES, decreasing = T), ]
TF_activities = ddply(TF_activities, 'cl_name', function(df) data.frame(df, rank = 1:nrow(df)) )
head(TF_activities)
unique(TF_activities$cl_name)

Unnamed: 0_level_0,cl_name,Regulon,NES,p.value,FDR,Size,id,label,value,rank
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>,<chr>,<int>
1,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423,SMAD7_B,6.608119,3.892325e-11,5.28135e-10,14,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423 SMAD7_B,,,1
2,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423,ZBTB7A_C,5.427291,5.721575e-08,5.209644e-07,90,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423 ZBTB7A_C,,,2
3,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423,MKX_E,4.783607,1.721773e-06,1.23468e-05,189,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423 MKX_E,,,3
4,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423,MNT_C,4.624174,3.760938e-06,2.602569e-05,56,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423 MNT_C,,,4
5,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423,THAP11_C,4.229917,2.337772e-05,0.0001412872,114,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423 THAP11_C,,,5
6,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423,RFX5_AA,4.203369,2.629717e-05,0.0001575553,11,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423 RFX5_AA,,,6


In [35]:
# order clusters
TF_activities = rbind(subset(TF_activities, cl_name %in% 'cl_epi_luminal_vs_epi_rest_limma_DEGs_20200423'),
                      subset(TF_activities, cl_name %in% 'cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423'),
                     subset(TF_activities, cl_name %in% 'cl_epi_basal_vs_epi_rest_limma_DEGs_20200423'))

In [36]:
head(TF_activities)
TFs = read.csv('data/scenic/huTF_census.txt', stringsAsFactors = F, header = F)$V1

Unnamed: 0_level_0,cl_name,Regulon,NES,p.value,FDR,Size,id,label,value,rank
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>,<chr>,<int>
208,cl_epi_luminal_vs_epi_rest_limma_DEGs_20200423,RFX8_E,5.402977,6.554399e-08,9.754073e-07,173,cl_epi_luminal_vs_epi_rest_limma_DEGs_20200423 RFX8_E,,,1
209,cl_epi_luminal_vs_epi_rest_limma_DEGs_20200423,ZSCAN1_E,4.955873,7.200627e-07,8.897918e-06,119,cl_epi_luminal_vs_epi_rest_limma_DEGs_20200423 ZSCAN1_E,,,2
210,cl_epi_luminal_vs_epi_rest_limma_DEGs_20200423,HOXC11_D,4.620891,3.820947e-06,4.036787e-05,8,cl_epi_luminal_vs_epi_rest_limma_DEGs_20200423 HOXC11_D,,,3
211,cl_epi_luminal_vs_epi_rest_limma_DEGs_20200423,HOXC4_E,4.146294,3.378998e-05,0.0002710421,68,cl_epi_luminal_vs_epi_rest_limma_DEGs_20200423 HOXC4_E,,,4
212,cl_epi_luminal_vs_epi_rest_limma_DEGs_20200423,GLI1_B,4.10561,4.032498e-05,0.000317101,55,cl_epi_luminal_vs_epi_rest_limma_DEGs_20200423 GLI1_B,,,5
213,cl_epi_luminal_vs_epi_rest_limma_DEGs_20200423,FOXF1_E,3.789237,0.000151111,0.001089258,192,cl_epi_luminal_vs_epi_rest_limma_DEGs_20200423 FOXF1_E,,,6


In [37]:
# Select top 10 ranked genes
TF_activities$Gene = sapply(strsplit(TF_activities$Regulon, '_'), head, 1)
top30 = subset(TF_activities, rank %in% 1:30 & NES > 0)$Gene

In [38]:
# build matrix of TF activity scores to plot
TF_activities$value = TF_activities$NES
x = acast(subset(TF_activities, Gene %in% top30), Gene~cl_name, fill = 0)
colnames(x)

In [39]:
x = x[, c(3, 2, 1) ]
colnames(x) = c('luminal',  'glandular', 'basal')
pheatmap(t(x[ top30, ]), cellheight = 10, cellwidth = 10, cluster_rows = F, cluster_cols = F, 
         color = colorRampPalette(c(brewer.pal(n = 5, name = 'Blues')[4], "white", brewer.pal(n = 5, name = 'Reds')[4]))(50),
        filename = 'data/spatial/DEGs/top30_TFs_activities.pdf')


In [40]:
pheatmap(t(x[ c('GLI1', 'FOXJ1', 'FOSL2', 'HIF1A', 'CSRNP1', 'FOXO3', 'HEY1', 'ELF1', 'CEBPD') , ]), cellheight = 10, cellwidth = 10, cluster_rows = F, cluster_cols = F,
         color = colorRampPalette(c(brewer.pal(n = 5, name = 'Blues')[4], "white", brewer.pal(n = 5, name = 'Reds')[4]))(50),
        filename = 'data/spatial/DEGs/selected_TFs_activities.pdf')

ERROR: Error in x[c("GLI1", "FOXJ1", "FOSL2", "HIF1A", "CSRNP1", "FOXO3", "HEY1", : subscript out of bounds


# Plot top 10 active TFs - matching expression

In [19]:
default_TF_activities = TF_activities

In [20]:
TF_activities = subset(TF_activities, label != '')

In [21]:
# Rank TFs according to their activity
TF_activities = TF_activities[order(TF_activities$NES, decreasing = T), ]
TF_activities$rank = NULL
TF_activities = ddply(TF_activities, 'cl_name', function(df) data.frame(df, rank = 1:nrow(df)) )
head(TF_activities)

Unnamed: 0_level_0,cl_name,Regulon,NES,p.value,FDR,Size,id,label,value,Gene,rank
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>,<dbl>,<chr>,<int>
1,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423,SNAPC4_C,-3.178725,0.001479241,0.005986169,30,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423 SNAPC4_C,-,-3.178725,SNAPC4,1
2,cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423,HIF1A_AA,4.971304,6.650424e-07,3.638018e-06,59,cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423 HIF1A_AA,+,4.971304,HIF1A,1
3,cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423,NFE2L1_B,3.7103,0.0002070142,0.000732756,25,cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423 NFE2L1_B,+,3.7103,NFE2L1,2
4,cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423,FOXO3_AA,3.584532,0.0003376827,0.001151115,19,cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423 FOXO3_AA,+,3.584532,FOXO3,3
5,cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423,ZNF600_E,2.857067,0.00427576,0.01185902,78,cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423 ZNF600_E,+,2.857067,ZNF600,4
6,cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423,TCF25_C,-2.565934,0.01028984,0.02632372,40,cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423 TCF25_C,+,-2.565934,TCF25,5


In [22]:
# order clusters
TF_activities = rbind(subset(TF_activities, cl_name %in% 'cl_epi_luminal_vs_epi_rest_limma_DEGs_20200423'),
                      subset(TF_activities, cl_name %in% 'cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423'),
                     subset(TF_activities, cl_name %in% 'cl_epi_basal_vs_epi_rest_limma_DEGs_20200423'))

In [23]:
# Select top 10 ranked genes
TF_activities$Gene = sapply(strsplit(TF_activities$Regulon, '_'), head, 1)
top30 = subset(TF_activities, rank %in% 1:30 & NES > 0)$Gene

In [24]:
# build matrix of TF activity scores to plot
TF_activities$value = TF_activities$NES
x = acast(subset(TF_activities, Gene %in% top30), Gene~cl_name, fill = 0)
colnames(x) 

In [25]:
x = x[, c(2, 1) ]
colnames(x) = c('luminal',  'glandular')
pheatmap(t(x[ top30, ]), cellheight = 10, cellwidth = 10, cluster_rows = F, cluster_cols = F, 
         color = colorRampPalette(c(brewer.pal(n = 5, name = 'Blues')[4], "white", brewer.pal(n = 5, name = 'Reds')[4]))(50),
        filename = 'data/spatial/DEGs/top30_TFs_activities_filtered_TFisDE.pdf')


# Plot top 10 active TFs - curated TFs - confidence AA, A, B

In [26]:
TF_activities = default_TF_activities[ grep('_E$', default_TF_activities$Regulon, invert = T) , ]
TF_activities = TF_activities[ grep('_D$', TF_activities$Regulon, invert = T) , ]
TF_activities = TF_activities[ grep('_C$', TF_activities$Regulon, invert = T) , ]

In [27]:
# Rank TFs according to their activity
TF_activities = TF_activities[order(TF_activities$NES, decreasing = T), ]
TF_activities$rank = NULL
TF_activities = ddply(TF_activities, 'cl_name', function(df) data.frame(df, rank = 1:nrow(df)) )
head(TF_activities)

Unnamed: 0_level_0,cl_name,Regulon,NES,p.value,FDR,Size,id,label,value,Gene,rank
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>,<dbl>,<chr>,<int>
1,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423,SMAD7_B,6.608119,3.892325e-11,5.28135e-10,14,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423 SMAD7_B,,6.608119,SMAD7,1
2,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423,RFX5_AA,4.203369,2.629717e-05,0.0001575553,11,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423 RFX5_AA,,4.203369,RFX5,2
3,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423,SRF_A,3.026489,0.002474115,0.009215843,14,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423 SRF_A,,3.026489,SRF,3
4,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423,WT1_A,3.02151,0.002515176,0.009332448,26,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423 WT1_A,,3.02151,WT1,4
5,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423,ETS1_AA,2.544535,0.01094234,0.0334309,63,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423 ETS1_AA,,2.544535,ETS1,5
6,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423,STAT1_AA,-2.625432,0.0086539,0.02734474,82,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423 STAT1_AA,,-2.625432,STAT1,6


In [28]:
# order clusters
TF_activities = rbind(subset(TF_activities, cl_name %in% 'cl_epi_luminal_vs_epi_rest_limma_DEGs_20200423'),
                      subset(TF_activities, cl_name %in% 'cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423'),
                     subset(TF_activities, cl_name %in% 'cl_epi_basal_vs_epi_rest_limma_DEGs_20200423'))

In [29]:
# Select top 10 ranked genes
TF_activities$Gene = sapply(strsplit(TF_activities$Regulon, '_'), head, 1)
top30 = subset(TF_activities, rank %in% 1:30 & NES > 0)$Gene

In [30]:
# build matrix of TF activity scores to plot
TF_activities$value = TF_activities$NES
x = acast(subset(TF_activities, Gene %in% top30), Gene~cl_name, fill = 0)
colnames(x)

In [31]:
x = x[, c(3, 2, 1) ]
colnames(x) = c('luminal',  'glandular', 'basal')
pheatmap(t(x[ top30, ]), cellheight = 10, cellwidth = 10, cluster_rows = F, cluster_cols = F, 
         color = colorRampPalette(c(brewer.pal(n = 5, name = 'Blues')[4], "white", brewer.pal(n = 5, name = 'Reds')[4]))(50),
        filename = 'data/spatial/DEGs/top30_TFs_activities_filtered_curatedTFs.pdf')


# Plot top 10 active TFs - DE or curated

In [32]:
idx = unique(c( which(default_TF_activities$label != ''),
               grep('_E$', default_TF_activities$Regulon, invert = T),
               grep('_D$', default_TF_activities$Regulon, invert = T),
               grep('_C$', default_TF_activities$Regulon, invert = T)
              )) %>% sort(.)
TF_activities = default_TF_activities[ idx, ]

In [33]:
# Rank TFs according to their activity
TF_activities = TF_activities[order(TF_activities$NES, decreasing = T), ]
TF_activities$rank = NULL
TF_activities = ddply(TF_activities, 'cl_name', function(df) data.frame(df, rank = 1:nrow(df)) )
head(TF_activities)

Unnamed: 0_level_0,cl_name,Regulon,NES,p.value,FDR,Size,id,label,value,Gene,rank
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>,<dbl>,<chr>,<int>
1,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423,SMAD7_B,6.608119,3.892325e-11,5.28135e-10,14,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423 SMAD7_B,,6.608119,SMAD7,1
2,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423,ZBTB7A_C,5.427291,5.721575e-08,5.209644e-07,90,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423 ZBTB7A_C,,5.427291,ZBTB7A,2
3,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423,MKX_E,4.783607,1.721773e-06,1.23468e-05,189,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423 MKX_E,,4.783607,MKX,3
4,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423,MNT_C,4.624174,3.760938e-06,2.602569e-05,56,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423 MNT_C,,4.624174,MNT,4
5,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423,THAP11_C,4.229917,2.337772e-05,0.0001412872,114,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423 THAP11_C,,4.229917,THAP11,5
6,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423,RFX5_AA,4.203369,2.629717e-05,0.0001575553,11,cl_epi_basal_vs_epi_rest_limma_DEGs_20200423 RFX5_AA,,4.203369,RFX5,6


In [34]:
# order clusters
TF_activities = rbind(subset(TF_activities, cl_name %in% 'cl_epi_luminal_vs_epi_rest_limma_DEGs_20200423'),
                      subset(TF_activities, cl_name %in% 'cl_epi_glandular_vs_epi_rest_limma_DEGs_20200423'),
                     subset(TF_activities, cl_name %in% 'cl_epi_basal_vs_epi_rest_limma_DEGs_20200423'))

In [35]:
# Select top 10 ranked genes
TF_activities$Gene = sapply(strsplit(TF_activities$Regulon, '_'), head, 1)
top30 = subset(TF_activities, rank %in% 1:30 & NES > 0)$Gene

In [36]:
# build matrix of TF activity scores to plot
TF_activities$value = TF_activities$NES
x = acast(subset(TF_activities, Gene %in% top30), Gene~cl_name, fill = 0)
colnames(x) 

In [37]:
x = x[, c(3, 2, 1) ]
colnames(x) = c('luminal',  'glandular', 'basal')
pheatmap(t(x[ top30, ]), cellheight = 10, cellwidth = 10, cluster_rows = F, cluster_cols = F, 
         color = colorRampPalette(c(brewer.pal(n = 5, name = 'Blues')[4], "white", brewer.pal(n = 5, name = 'Reds')[4]))(50),
        filename = 'data/spatial/DEGs/top30_TFs_activities_filtered_curatedTFs_TFisDE.pdf')


# Plot top 10 DE TFs

In [38]:
TFs = read.csv('data/scenic/huTF_census.txt', stringsAsFactors = F, header = F)$V1

DE_gland = read.csv('data/spatial/DEGs/epi_glandular_vs_epi_rest_limma_DEGs_20200423.csv', stringsAsFactors = F)
DE_luminal = read.csv('data/spatial/DEGs/epi_luminal_vs_epi_rest_limma_DEGs_20200423.csv', stringsAsFactors = F)
DE_basal = read.csv('data/spatial/DEGs/epi_basal_vs_epi_rest_limma_DEGs_20200423.csv', stringsAsFactors = F)

DE = list(luminal=DE_luminal,
         gland=DE_gland,
         basal=DE_basal)

DE = lapply(DE, function(x){
    x = subset(x, Gene %in% TFs)
    x = x[ order(x$logFC, decreasing = T) ,]
    x$rank = 1:nrow(x)
    return(x)
})

In [39]:
DE_df = melt(DE, id.vars = names(DE[[1]]) )
head(DE_df)

Unnamed: 0_level_0,Gene,logFC,P.Value,adj.P.Val,AveExpr_cluster,AveExpr_rest,percentExpr_cluster,percentExpr_rest,rank,L1
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<chr>
1,HAND2,0.2604295,2.649793e-05,0.001369203,1.352028,1.091599,0.972603,0.924471,1,luminal
2,NR2F2,0.2403253,0.000164707,0.006357223,1.098132,0.857806,0.90411,0.867069,2,luminal
3,ESR1,0.2208052,0.0001307507,0.005293044,0.797663,0.576857,0.780822,0.749245,3,luminal
4,CUX1,0.2117044,0.0005227082,0.016023274,0.964216,0.752511,0.890411,0.8429,4,luminal
5,OSR2,0.2029021,0.001112307,0.028315565,1.640227,1.437325,0.958904,0.966767,5,luminal
6,HES4,0.1834099,0.001105987,0.02823006,0.793668,0.610258,0.849315,0.761329,6,luminal


In [40]:
# Select top 10 ranked genes
top30 = subset(DE_df, rank %in% 1:30)$Gene #%>% unique(.)
top30

In [41]:
# build matrix of TF activity scores to plot
DE_df$value = DE_df$logFC
x = acast(subset(DE_df, Gene %in% top30), Gene~L1, fill = 0)
pheatmap(t(x[top30, names(DE)]), cellheight = 10, cellwidth = 10, cluster_rows = F, cluster_cols = F, 
         color = colorRampPalette(c(brewer.pal(n = 5, name = 'Blues')[4], "white", brewer.pal(n = 5, name = 'Reds')[4]))(50),
        filename = 'data/spatial/DEGs/top30_TFs_DE.pdf')


In [42]:
# mydf = subset(DE_df, Gene %in% top30)
# mydf$Gene = factor(mydf$Gene, levels = unique(top30))
# mydf$cluster = factor(mydf$L1, levels = rev(names(DE)))

# require(ggplot2)
#   ggplot(mydf, 
#          aes(x = Gene,
#              y = cluster)) +        ## global aes
# #   geom_tile(aes(fill = P.Value)) +         ## to get the rect filled
#   geom_point(aes(colour = logFC, 
#                    size = percentExpr_cluster))  +    ## geom_point for circle illusion
#   scale_color_gradient2(low = "blue",   mid = 'white',
#                        high = "red")+       ## color of the corresponding aes
#   scale_size(range = c(1, 4))+             ## to tune the size of circles
#   theme_bw() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
# ggsave(filename = 'data/spatial/DEGs/clusterDEGs_and_TFs/top30_TFs_DE_dotplot.pdf', dpi = 300, width = 10, height = 4)