In [7]:
library(dplyr)
library(RColorBrewer)
library(harmony)
library(ggplot2)
library(pheatmap)
library(cowplot)
require(viper)
require(reshape2)

## Load DE expression info

In [8]:
# you need to put in a folder the DE files of the clusters that you're interested in
# The file must contain at least four columns (aka, you can reuse the output of the DEG files):
# - Gene: gene symbol
# - cluster: cell type
# - adj.P.Val: adjusted p-value from the DE test
# - logFC: from the DE test
path_DE = '/lustre/scratch117/cellgen/team292/lh20/Sharing/Luz/bulkorg_noinhib_DEtables.rds'
DE_df = readRDS(path_DE)

# # Build dictionary clusters2DEGs
DE_df = lapply(DE_df, function(x) {
    x$Log2FC = round(x$Log2FC, digits = 2)
    return(x)
} )
               
# length(DE_df)
names(DE_df)

In [9]:
DEGs_table = melt(DE_df, id.vars = names(DE_df[[1]]) )
DEGs_table = cbind(DEGs_table[, 12:13], DEGs_table[, -c(12:13)])
names(DEGs_table)[2] = 'organoidCtrl_cluster'
nrow(DEGs_table)
DEGs_table = subset(DEGs_table, 10^DESeq2Log10Pvalue < 0.1)
nrow(DEGs_table)
write.csv(DEGs_table, file = '/home/jovyan/farm/endometrium_v2/invitro/epithelial_DEGs.csv', row.names = F, quote = F)

### Estimate Dorothea Regulons

In [10]:
viper_gset = get(load('~/farm/gsea/genesets/dorotheav2-top10scoring_VentoLab20201111.rdata'))

In [11]:
# For each DEG file
results = list()
for (cl_name in names(DE_df)){

  DEsignature = DE_df[[cl_name]]    
  DEsignature$Gene = rownames(DEsignature)
    
  # Estimate z-score values for the GES. Cheeck VIPER manual for details
  myStatistics = matrix(DEsignature$Log2FC, dimnames = list(DEsignature$Gene, 'logFC') )
  myPvalue = matrix(10^DEsignature$DESeq2Log10Pvalue, dimnames = list(DEsignature$Gene, 'P.Value') )
  mySignature = (qnorm(myPvalue/2, lower.tail = FALSE) * sign(myStatistics))[, 1]
  mySignature = mySignature[order(mySignature, decreasing = T)]
  # Estimate TF activities
  mrs = msviper(ges = mySignature, regulon = viper_gset, minsize = 4, ges.filter = F)
#   mrs = msviper(ges = myStatistics[,1][order(myStatistics[,1], decreasing = T)], regulon = viper_gset, minsize = 3, ges.filter = F)
  cl_enrichment = data.frame(Regulon = names(mrs$es$nes),
                             cluster = cl_name,
                             Size = mrs$es$size[ names(mrs$es$nes) ], 
                             NES = mrs$es$nes, 
                             p.value = mrs$es$p.value, 
                             FDR = p.adjust(mrs$es$p.value, method = 'fdr'))
  cl_enrichment = subset(cl_enrichment, Size < 200)
#   cl_enrichment = subset(cl_enrichment, FDR < 0.1)
  cl_enrichment = cl_enrichment[ order(cl_enrichment$p.value), ]
  if( nrow(cl_enrichment) > 0 )
    results[[cl_name]] = cl_enrichment
}

df = melt(results, id.vars = names(results[[1]]))
df = df[, c(2,1,4:6,3)]
df = df[ order(df$p.value), ]
write.csv(df, file = '/home/jovyan/farm/endometrium_v2/invitro/out/TFs_activities.csv', row.names = F, quote = F)

Computing regulon enrichment with aREA algorithm





Computing regulon enrichment with aREA algorithm





Computing regulon enrichment with aREA algorithm





Computing regulon enrichment with aREA algorithm





Computing regulon enrichment with aREA algorithm





Computing regulon enrichment with aREA algorithm





In [12]:
agreementTFs = list()

# For each cluster, check TFs agreement
for (cl in names(DE_df)){
  print(cl)
    
  # Load DEGs for cluster
  DEGs = DE_df[[cl]]  
  DEGs$Gene = rownames(DEGs)
  
  # Filter TF activities for cluster
  cl_TFact = subset(df, cluster == cl)
  
  if(nrow(cl_TFact) == 0)
    next()
  
  # Find DEG and activities agreement
  cl_TFact$TF = sapply(strsplit(cl_TFact$Regulon, ' - '), head, 1)
  cl_TFact$TF = sapply(strsplit(cl_TFact$TF, '_'), head, 1)
  sharedTFs = intersect(subset(DEGs, 10^DESeq2Log10Pvalue < 0.1 )$Gene,
                        subset(cl_TFact, FDR < 0.1)$TF)
  
  if( length(sharedTFs) > 0 ) {
    
    # Add TFs agreement info
    TF_df = data.frame(cl = cl, TF = sharedTFs, stringsAsFactors = F)
    TF_df$TF_expression = 'upregulated'
    TF_df$TF_expression[ TF_df$TF %in% subset(DEGs, Log2FC < 0)$Gene ] = 'downregulated'
    TF_df$TF_expression_pvals = 10^DEGs$DESeq2Log10Pvalue[ match(TF_df$TF, DEGs$Gene) ]
    TF_df$TF_activity = 'active_regulon'
    TF_df$TF_activity_FDR = cl_TFact$FDR[ match(TF_df$TF, cl_TFact$TF) ]
    TF_df$TF_activity_score = cl_TFact$NES[ match(TF_df$TF, cl_TFact$TF) ]
    TF_df$TF_activity[ TF_df$TF_activity_score < 0 ] = 'inactive_regulon'
    agreementTFs[[cl]] = TF_df
    
  }
  
}


# merge TF agreement
if( length(agreementTFs)>0 ){
  TF_df = melt(agreementTFs, id.vars = names(agreementTFs[[1]]))
  TF_df$TF_expression_pvals = signif(TF_df$TF_expression_pvals,3)
  TF_df$TF_activity_FDR = signif(TF_df$TF_activity_FDR,3)
  TF_df = TF_df[order(TF_df$TF_expression_pvals), ]
  TF_df = TF_df[order(TF_df$cl), ]
  write.table(TF_df, file = '/home/jovyan/farm/endometrium_v2/invitro/out/TFactivities_and_expression_agreement.tsv', row.names = F, quote = F, sep = '\t')
}

[1] "Estrogen_Induced"
[1] "Preciliated"
[1] "Ciliated"
[1] "Inflamatory"
[1] "Secretory"
[1] "Secretory_Proliferative"
