# Running DoRothEA "along" the invading trophoblast states (according to trajectory progression)

## Previously DE analysis has been performed in the following way:

- GC vs iEVT
- iEVT vs EVT_2
- eEVT vs EVT_2
- EVT_2 vs EVT_1
- EVT_1 vs VCT_CCC
- VCT_CCC vs VCT [VCT_p + VCT]

In [1]:
%load_ext rpy2.ipython

In [2]:
%%R

# checking limma and edgeR versions in R

sessionInfo()

R version 4.0.4 (2021-02-15)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04.1 LTS

Matrix products: default
BLAS/LAPACK: /opt/conda/lib/libopenblasp-r0.3.12.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] tools     stats     graphics  grDevices utils     datasets  methods  
[8] base     

loaded via a namespace (and not attached):
[1] compiler_4.0.4


In [3]:
%%R
require(viper)
require(data.table)
require(ggplot2)
require(ggpubr)


R[write to console]: Loading required package: viper

R[write to console]: Loading required package: Biobase

R[write to console]: Loading required package: BiocGenerics

R[write to console]: 
Attaching package: ‘BiocGenerics’


R[write to console]: The following objects are masked from ‘package:stats’:

    IQR, mad, sd, var, xtabs


R[write to console]: The following objects are masked from ‘package:base’:

    anyDuplicated, append, as.data.frame, basename, cbind, colnames,
    dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
    grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
    order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
    rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
    union, unique, unsplit, which.max, which.min


R[write to console]: Welcome to Bioconductor

    Vignettes contain introductory material; view with
    'browseVignettes()'. To cite Bioconductor, see
    'citation("Biobase")', and for packag

In [4]:
%%R

# Load TF regulon genesets in VIPER format
load('./dorotheav2-top10scoring_VentoLab20201111.rdata')
# Clean TF names & explore object
#names(viper_regulon) = sapply(strsplit(names(viper_regulon), split = ' - '), head, 1)
# Explore the regulons object
names(viper_regulon)[1:10]
viper_regulon[[1]]

$tfmode
      ABCC1       ABCC6       ABHD5        ABT1  AC002066.1  AC002069.2 
          1           1           1           1           1           1 
 AC004231.3  AC004917.1  AC005514.1  AC005622.1  AC006076.1  AC006483.2 
          1           1           1           1           1           1 
 AC007220.1  AC007319.1  AC007792.1  AC010649.1  AC010768.3  AC011257.1 
          1           1           1           1           1           1 
 AC011287.1  AC011509.1  AC012085.2  AC012464.1  AC016586.1  AC016954.1 
          1           1           1           1           1           1 
 AC016957.1  AC023232.1  AC024084.1  AC026316.2  AC073284.1  AC073592.1 
          1           1           1           1           1           1 
 AC073593.1  AC073863.1  AC079316.2  AC079384.1  AC084083.1  AC091114.1 
          1           1           1           1           1           1 
 AC091806.1  AC092127.2  AC092171.2  AC098587.1  AC099489.1  AC105265.3 
          1           1           1        

In [25]:
%%R

##########################################################################################
## Example 2: Computing TF activity changes from a differential gene expression signature
##########################################################################################
# Load differential expression signature

# Explore the signature
file <- fread('../2-3_DE_analysis_inv_troph/limma_DEGs/20211123/20211123_VCT_CCC_vs_VCT_limma_DEGs.csv')

# Exclude probes with unknown or duplicated gene symbol
DEsignature = subset(file, Gene != "" )
DEsignature = subset(DEsignature, ! duplicated(Gene))

In [26]:
%%R

# Estimate z-score values for the GES. Cheeck VIPER manual for details
myStatistics = matrix(DEsignature$logFC, dimnames = list(DEsignature$Gene, 'logFC') )
myPvalue = matrix(DEsignature$adj.P.Val, dimnames = list(DEsignature$Gene, 'P.Value') )
mySignature = (qnorm(myPvalue/2, lower.tail = FALSE) * sign(myStatistics))[, 1]
mySignature = mySignature[order(mySignature, decreasing = T)]

In [27]:
%%R
# Estimate TF activities
mrs = msviper(ges = mySignature, regulon = viper_regulon, minsize = 4, ges.filter = F)
TF_activities = data.frame(Regulon = names(mrs$es$nes),
                           Size = mrs$es$size[ names(mrs$es$nes) ],
                           NES = mrs$es$nes,
                           p.value = mrs$es$p.value,
                           FDR = p.adjust(mrs$es$p.value, method = 'fdr'))
TF_activities = TF_activities[ order(TF_activities$p.value), ]

R[write to console]: Computing regulon enrichment with aREA algorithm





In [28]:
%%R

# need to save this for plotting later, per comparison
write.table(TF_activities, './results/20211129_TF_activities_VCT_CCC_vs_VCT.csv', append = FALSE, sep = "\t",
            row.names = TRUE, col.names = TRUE)