# Germ lineage both sex - TFs intersection

In [5]:
library(Seurat)
library(dplyr)
library(RColorBrewer)
library(destiny, quietly = TRUE)
library(gam)
library(harmony)
library(ggplot2)
library(pheatmap)
library(cowplot)
library(reshape2)
library(SoupX)
# source('G_lib_TFacts.r')

luz_colors = c('#c466b0', 
               '#779eed', '#aad3f2', '#71a2c7', '#60bddb',   '#70ccbe', 
               '#7b9e99', '#91bd80', '#bdb380',  '#d4db81',  '#dbc381', '#ebd3c3', 
               '#e08b8b', '#edb7b7', '#d9abb7', '#d96884', '#e64e74', '#d9a5c3', '#cc8fdb')

colors = c(luz_colors, 
           brewer.pal(n=12, name = 'Paired'), 
           brewer.pal(n=8, name = 'Set2'),
          brewer.pal(n=8, name = 'Set3'),
          brewer.pal(n=8, name = 'Accent'),
          brewer.pal(n=8, name = 'Pastel1'))

def_colors = list( reds = luz_colors[13:length(luz_colors) ],
                 yellows = luz_colors[7:12],
                 blues = luz_colors[2:6],
                 greys = c('#e8e8e8', '#b5b5b5', '#a6a6a6') )

col_dot = c('grey90',  brewer.pal(n = 9, name = 'Reds')[7])

downsample_data = function(anndataO, var = 'annotated_clusters', N=200){
    samples = list()
    for (cl in unique(anndataO@meta.data[,var]) ){
        cells = colnames(anndataO)[ anndataO@meta.data[,var] == cl ]
        samples[[cl]] = cells
        if( length(cells) > N )
            samples[[cl]] = sample(cells, size = N, replace = F)
    }
    anndataO = subset(anndataO, cells = unlist(samples))
    
    gene_counts = tabulate(anndataO@assays$RNA@counts@i + 1)
    anndataO = subset(anndataO, features = names(gene_counts)[ gene_counts > 3 ] )
    
    return(anndataO)
}


In [6]:
sessionInfo()

R version 4.0.3 (2020-10-10)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Ubuntu 20.04.1 LTS

Matrix products: default
BLAS:   /home/jovyan/my-conda-envs/R_Seurat3/lib/libblas.so.3.8.0
LAPACK: /home/jovyan/my-conda-envs/R_Seurat3/lib/liblapack.so.3.8.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] splines   stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] SoupX_1.5.0        reshape2_1.4.4     cowplot_1.1.1      pheatmap_1.0.12   
 [5] ggplot2_3.3.3      harmony_1.0        Rcpp_1.0.6         gam_1.20          
 [9] foreach_1.5.1      destiny_3.4.0      RColorBrewer_1.1-2 dplyr_1.0

# Load RNA derived TFs info

In [7]:
TFact = read.csv('figures-germcells/0_FS1_table_TFacts.csv', stringsAsFactors = F)
# TFact$cluster[ TFact$cluster == 'Sertoli' ] = 'Sertoli_NR4A1'
TFact$TF = strsplit(TFact$gene, split = '-[A-E]') %>% sapply(., head, 1)
TFact$ID = paste0(TFact$cluster, '__', TFact$TF)
TFact$scRNA_TFact = ifelse(TFact$p_val_adj < 0.05 & TFact$avg_log2FC > 0.1 , 1, 0) 


DEGs = read.csv('figures-germcells/0_FS1_table_DEGs.csv', stringsAsFactors = F)
# DEGs = subset(DEGs, gene %in% TFact$TF)
DEGs$TF = DEGs$gene
DEGs$ID = paste0(DEGs$cluster, '__', DEGs$TF)
# DEGs$avg_log2FC = DEGs$avg_logFC
DEGs$scRNA_DEGs = ifelse(DEGs$p_val_adj < 0.05 & DEGs$avg_log2FC > 0.1, 1, 0) 

# Load ATAC derived TFs info

In [8]:
TFmotifs = read.csv('/nfs/team292/vl6/my_MULTIOME_dir/germcells_apr2021/TFs/DARs/all_DA_peaks_LR.csv', stringsAsFactors = F)

TFmotifs$cluster = gsub('-', '_', TFmotifs$cluster)
TFmotifs$cluster = gsub('oogonia_meiotic', 'oogonia_meiosis', TFmotifs$cluster)
TFmotifs$cluster = gsub('pre_spermatogonia', 'pre-spermatogonia', TFmotifs$cluster)
TFmotifs$TF = strsplit(TFmotifs$X, split = '\\.') %>% sapply(., head, 1)
TFmotifs$TF = gsub('-HUMAN', '', TFmotifs$TF)
TFmotifs = subset(TFmotifs, TF %in% c(TFact$TF, DEGs$TF) )
TFmotifs$ID = paste0(TFmotifs$cluster, '__', TFmotifs$TF)
# TFmotifs = subset(TFmotifs, TF %in% TFact$TF)
TFmotifs = TFmotifs[ order(TFmotifs$avg_log2FC, decreasing = T) , ]
TFmotifs = TFmotifs[ ! duplicated(TFmotifs$ID) , ]

TFmotifs$snATAC_TFact = ifelse(TFmotifs$p_val_adj < 0.05 & TFmotifs$avg_log2FC > 0.1 , 1, 0) 

# Intersect cluster names

In [9]:
unique(TFmotifs$cluster)
unique(TFact$cluster)
unique(DEGs$cluster)


intersect(TFact$cluster,TFmotifs$cluster)

## Merge for plotting

In [10]:
cnames = intersect(colnames(TFact), colnames(DEGs)) %>% intersect(., colnames(TFmotifs))
df2plot = data.frame(TFact[,cnames], type='scRNA_TFactivities', stringsAsFactors = F)
df2plot = rbind(df2plot,
               data.frame(DEGs[, cnames], type='scRNA_Expression', stringsAsFactors = F))
df2plot = rbind(df2plot,
               data.frame(TFmotifs[, cnames], type='snATA_TFmotif_access', stringsAsFactors = F))

# Intersect TFs

In [11]:
names(TFact)[c(1:2,5)] = paste0('scRNA_TFact_', names(TFact)[c(1:2,5)])
names(DEGs)[c(1:5)] = paste0('scRNA_DEGs_', names(DEGs)[c(1:5)])
names(TFmotifs)[c(2:3,6)] = paste0('snATAC_TFact_', names(TFmotifs)[c(2:3,6)])
names(TFmotifs)[1] = 'snATAC_PWM' 

In [12]:
df = merge(DEGs, TFact, by = 'ID', all = T)
df = df[, grep('y$', names(df), invert = T) ] # remove unwanted columns
df = merge(df, TFmotifs, by = 'ID', all = T)
df = df[, grep('y$', names(df), invert = T) ] # remove unwanted columns
df = df[, grep('x$', names(df), invert = T) ] # remove unwanted columns
head(df)

Unnamed: 0_level_0,ID,scRNA_DEGs_p_val,scRNA_DEGs_avg_logFC,scRNA_DEGs_pct.1,scRNA_DEGs_pct.2,scRNA_DEGs_p_val_adj,avg_log2FC,scRNA_DEGs,scRNA_TFact_p_val,scRNA_TFact_avg_log2FC,scRNA_TFact_p_val_adj,scRNA_TFact,snATAC_PWM,snATAC_TFact_p_val,snATAC_TFact_avg_log2FC,snATAC_TFact_p_val_adj,cluster,gene,TF,snATAC_TFact
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<dbl>
1,GC_mitotic__A1BG,2.464194e-08,0.12690515,0.42,0.178,0.0005785188,0.12690515,1,,,,,,,,,,,,
2,GC_mitotic__A2M,0.001094769,0.08178806,0.24,0.102,1.0,0.08178806,0,,,,,,,,,,,,
3,GC_mitotic__A4GALT,7.813121e-07,-0.19754753,0.13,0.248,0.0183428636,-0.19754753,0,,,,,,,,,,,,
4,GC_mitotic__AAAS,1.099515e-05,0.10908761,0.785,0.542,0.2581332145,0.10908761,0,,,,,,,,,,,,
5,GC_mitotic__AACS,0.04215926,-0.05749859,0.075,0.085,1.0,-0.05749859,0,,,,,,,,,,,,
6,GC_mitotic__AADACL2-AS1,0.1004445,0.03099426,0.26,0.169,1.0,0.03099426,0,,,,,,,,,,,,


In [13]:
df = df[ , ! names(df) %in% c('L1' ) ]
df$cluster = strsplit(df$ID, '__') %>% sapply(., head, 1)
df$TF = strsplit(df$ID, '__') %>% sapply(., tail, 1)
df = df[, sort(names(df)) ]
df$N_agreement = apply(df[, c('scRNA_DEGs', 'scRNA_TFact', 'snATAC_TFact') ], 1, function(x){
    x[ is.na(x) ] = 0
    sum(x)
})
df = df[, -1]
head(df)

Unnamed: 0_level_0,cluster,gene,ID,scRNA_DEGs,scRNA_DEGs_avg_logFC,scRNA_DEGs_p_val,scRNA_DEGs_p_val_adj,scRNA_DEGs_pct.1,scRNA_DEGs_pct.2,scRNA_TFact,scRNA_TFact_avg_log2FC,scRNA_TFact_p_val,scRNA_TFact_p_val_adj,snATAC_PWM,snATAC_TFact,snATAC_TFact_avg_log2FC,snATAC_TFact_p_val,snATAC_TFact_p_val_adj,TF,N_agreement
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>
1,GC_mitotic,,GC_mitotic__A1BG,1,0.12690515,2.464194e-08,0.0005785188,0.42,0.178,,,,,,,,,,A1BG,1
2,GC_mitotic,,GC_mitotic__A2M,0,0.08178806,0.001094769,1.0,0.24,0.102,,,,,,,,,,A2M,0
3,GC_mitotic,,GC_mitotic__A4GALT,0,-0.19754753,7.813121e-07,0.0183428636,0.13,0.248,,,,,,,,,,A4GALT,0
4,GC_mitotic,,GC_mitotic__AAAS,0,0.10908761,1.099515e-05,0.2581332145,0.785,0.542,,,,,,,,,,AAAS,0
5,GC_mitotic,,GC_mitotic__AACS,0,-0.05749859,0.04215926,1.0,0.075,0.085,,,,,,,,,,AACS,0
6,GC_mitotic,,GC_mitotic__AADACL2-AS1,0,0.03099426,0.1004445,1.0,0.26,0.169,,,,,,,,,,AADACL2-AS1,0


In [14]:
df = df[ order(df$N_agreement, decreasing = T) , ]
df = df[ , c(1,19, 20,  14,  3:(ncol(df)-2)) ]
write.csv(df, file = 'figures-germcells/0_FS1_table_TFs_intersection.csv', row.names=F)
head(df)

Unnamed: 0_level_0,cluster,TF,N_agreement,snATAC_PWM,ID,scRNA_DEGs,scRNA_DEGs_avg_logFC,scRNA_DEGs_p_val,scRNA_DEGs_p_val_adj,scRNA_DEGs_pct.1,scRNA_DEGs_pct.2,scRNA_TFact,scRNA_TFact_avg_log2FC,scRNA_TFact_p_val,scRNA_TFact_p_val_adj,snATAC_PWM.1,snATAC_TFact,snATAC_TFact_avg_log2FC,snATAC_TFact_p_val,snATAC_TFact_p_val_adj
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>
5123,GC_mitotic,HOXA5,3,HOXA5.MA0158.1.1,GC_mitotic__HOXA5,1,0.1884437,1.042499e-12,2.447475e-08,0.39,0.105,1,0.3878363,3.212809e-13,4.44974e-10,HOXA5.MA0158.1.1,1,1.9730629,4.052564e-09,7.780923e-06
7329,GC_mitotic,NFIA,3,NFIA-HUMAN.H10MO.S.1,GC_mitotic__NFIA,1,0.1705531,2.282502e-07,0.00535863,0.53,0.28,1,0.5336686,4.086865e-19,5.660308e-16,NFIA-HUMAN.H10MO.S.1,1,6.5930849,3.4079970000000003e-32,6.543355e-29
32142,oogonia_meiosis,KLF10,3,KLF10.HOMER.3,oogonia_meiosis__KLF10,1,0.5677691,6.61741e-09,0.0001553569,0.4,0.479,1,0.2870639,1.464499e-12,2.028332e-09,KLF10.HOMER.3,1,1.2544136,6.144267e-12,1.179699e-08
36807,oogonia_meiosis,SOX30,3,SOX30.SwissRegulon.3,oogonia_meiosis__SOX30,1,0.3588502,5.536458e-09,0.0001299794,0.205,0.116,1,1.0267208,5.894308e-65,8.163617e-62,SOX30.SwissRegulon.3,1,0.745179,5.138769e-06,0.009866436
44639,oogonia_STRA8,HOXA2,3,HOXA2.HOMER.2,oogonia_STRA8__HOXA2,1,0.1490289,1.74245e-25,4.09075e-21,0.245,0.026,1,0.4544766,1.736572e-10,2.405152e-07,HOXA2.HOMER.2,1,0.3725193,1.482851e-35,2.847074e-32
44640,oogonia_STRA8,HOXA3,3,HOXA3.SwissRegulon.2,oogonia_STRA8__HOXA3,1,0.1300804,2.599976e-08,0.0006103963,0.36,0.112,1,0.9092205,1.8031540000000001e-28,2.497368e-25,HOXA3.SwissRegulon.2,1,0.4873332,5.638224e-14,1.082539e-10


# Plot - shared

In [15]:
# Define clusters to plot 
CLofinterest = c( 'PGC', 'PGC_mitotic',
                 'oogonia_STRA8', 
                 'oogonia_meiosis', 'oocyte',  'pre-spermatogonia')

# and order accordingly
idx_cl = sapply(CLofinterest, function(x) which(df$cluster == x)) %>% unlist(.)
df =  df[idx_cl, ]                

                
                
# Define clusters to plot                
idx_tf = which(df$N_agreement == 3 | 
               (df$N_agreement == 2 & df$scRNA_DEGs == 1 & df$scRNA_DEGs_avg_logFC > 0.5 ) |
               (df$N_agreement == 2 & df$scRNA_DEGs == 1 & df$snATAC_TFact_avg_log2FC > 1 ) |
               (df$N_agreement == 2 & df$scRNA_DEGs == 1 & df$scRNA_TFact_avg_log2FC > 1 )
              )
TFofinterest = unique(df[idx_tf, ]$TF  )
length(TFofinterest)
write.csv(df[idx_tf, ], file = 'figures-germcells/0_FS1_table_TFs_intersection_SELECTED.csv', row.names=F)
# TFofinterest = c('POU5F1',  'KLF4', 'TFAP2C', 'ETV5', 'NANOG',  'SOX15',  'SOX17', 
#                  'ESX1', 'ZGLP1', 'RARA',  'HOXA2', 'HOXA4', 'HOXA5', 'HOXB3','RFX6',
#                  'DMRTB1', 'DMRTC2', 'HMGA1', 
#                  'FIGLA', 'NOBOX', 'SOHLH1',   'SALL3', 'ZFPM2', 'TP63',
#                 'MSX2', 'HOXC10', 'HOXD11', 'HSF1', 'HSF2', 'SMAD1', 'SIX1')

## as heatmaps

In [16]:
# Load TF act Dorothea matrix
x_TFact = read.csv('figures-germcells/0_FS1_table_TFact_percluster.csv', stringsAsFactors = F, row.names = 1)
colnames(x_TFact) = strsplit(colnames(x_TFact), split = '\\.') %>% sapply(., head, 1)
x_TFact = x_TFact[ rownames(x_TFact) %in% CLofinterest , colnames(x_TFact) %in%  TFofinterest ]
rn = rownames(x_TFact)
x_TFact = apply(x_TFact, 2, scale)
rownames(x_TFact) = rn

# Add labels
x_TFact_Lab = x_TFact
x_TFact_Lab[] = ''
for (i in rownames(x_TFact_Lab) )
    for (j in colnames(x_TFact_Lab) ){
        x = subset(df, cluster == i & TF == j)
        if( nrow(x) == 1 )
             if( x[3] > 0 & ! is.na(x[12])){
                 if(x[12] == 1 )
                         x_TFact_Lab[i,j] = 'a'
                 if(x[3] == 3 )
                         x_TFact_Lab[i,j] = '*'
             }
#              if( x[3] > 1 & ! is.na(x[12])){
#                 if(x[3] == 3)
#                     x_TFact_Lab[i,j] = '*'
#                  if ( x[3] == 2 & x[12] == 1 ){
#                     if( ! is.na( x[17])) 
#                         if(x[17] == 1)
#                          x_TFact_Lab[i,j] = 'o'
#                     if( ! is.na( x[6])) 
#                         if(x[6] == 1 )
#                          x_TFact_Lab[i,j] = 'e'
#                     }

#             }
    }
            
            

# Load TFmotifs matrix
x_TFmotifs = read.csv('/nfs/team292/vl6/my_MULTIOME_dir/germcells_apr2021/germcells__chromvar_celltype_zscore.csv', stringsAsFactors = F, row.names = 1)
x_TFmotifs = x_TFmotifs[ rownames(x_TFmotifs) %in% CLofinterest , ]

idx = colnames(x_TFmotifs) %in% TFmotifs$X
x_TFmotifs = x_TFmotifs[ , idx ] # remove duplicated motifs to match TFmotifs
colnames(x_TFmotifs) = strsplit(colnames(x_TFmotifs), split = '\\.') %>% sapply(., head, 1)


# Load Expr matrix
x_Expre = read.csv('figures_manual_annotation/data_utils/average_log.csv', stringsAsFactors = F, row.names = 1) %>% t(.)
rownames(x_Expre) = gsub('pre.spermatogonia', 'pre-spermatogonia', rownames(x_Expre))
x_Expre = x_Expre[ rownames(x_Expre) %in% CLofinterest ,  colnames(x_Expre) %in%  TFofinterest  ]
rn = rownames(x_Expre)
x_Expre = apply(x_Expre, 2, scale)
rownames(x_Expre) = rn

# Add labels
x_Expre_Lab = x_Expre
x_Expre_Lab[] = ''
for (i in rownames(x_Expre_Lab) )
    for (j in colnames(x_Expre_Lab) ){
        x = subset(df, cluster == i & TF == j)
        if( nrow(x) == 1 )
             if( x[3] > 1 & ! is.na(x[6]) ){
                if(x[3] == 3)
                    x_Expre_Lab[i,j] = '*'
                 if ( x[3] == 2 & x[6] == 1 ){
                    if( ! is.na( x[17])) 
                        if(x[17] == 1)
                         x_Expre_Lab[i,j] = 'o'
                    if( ! is.na( x[12])) 
                        if(x[12] == 1 )
                         x_Expre_Lab[i,j] = 'a'
                    }

            }
    }


In [17]:
myHeatmap = function(X, TFs, cl, pdf_file, display_numbers = NULL){
    print(cl)
    tfs = intersect(TFs, colnames(X))
    cls =  intersect(cl, rownames(X))
    X = X[ cls,  tfs ]
    print(dim(X))
    
    paletteLength <- 50
    myColor <- colorRampPalette(c(brewer.pal(n = 5, name = 'Blues')[4], "white", brewer.pal(n = 5, name = 'Reds')[4]))(paletteLength)
    # length(breaks) == length(paletteLength) + 1
    # use floor and ceiling to deal with even/odd length pallettelengths
    myBreaks <- c(seq(min(X), 0, length.out=ceiling(paletteLength/2) + 1), 
                  seq(max(X)/paletteLength, max(X), length.out=floor(paletteLength/2)))    
    if( is.null(display_numbers) ){
        pheatmap(X, cellheight = 10, cellwidth = 10, cluster_rows = F, cluster_cols = F, 
                 color=myColor, breaks=myBreaks,
                filename = pdf_file)
    }else{     
        display_numbers = display_numbers[ cls,  tfs ]
        print(dim(display_numbers))
        pheatmap(X, cellheight = 10, cellwidth = 10, cluster_rows = F, cluster_cols = F, display_numbers = display_numbers, 
                 color=myColor, breaks=myBreaks,
                filename = pdf_file)
        }
}
myHeatmap(x_TFact, TFofinterest, CLofinterest, 'figures-germcells/0_FS1_heatmap_TFintersection_scRNA_TFact.pdf')
myHeatmap(x_Expre, TFofinterest, CLofinterest, 'figures-germcells/0_FS1_heatmap_TFintersection_scRNA_DEGs.pdf', display_numbers = x_Expre_Lab)
# myHeatmap(x_TFmotifs, TFofinterest, CLofinterest, 'figures-germcells/0_FS1_heatmap_TFintersection_snATAC_TFact.pdf')

[1] "PGC"               "PGC_mitotic"       "oogonia_STRA8"    
[4] "oogonia_meiosis"   "oocyte"            "pre-spermatogonia"
[1]    6 1376
[1] "PGC"               "PGC_mitotic"       "oogonia_STRA8"    
[4] "oogonia_meiosis"   "oocyte"            "pre-spermatogonia"
[1]     6 14791


ERROR: Error in seq.default(min(X), 0, length.out = ceiling(paletteLength/2) + : 'from' must be a finite number


In [20]:
# plot cellphoneDB TFs
cellphoneTFs = c('ELK1', 'STAT1', 'STAT3', 'SMAD1', 'SMAD5', 'ZGLP1', 'RXRA', 'ESR2')
CLofinterest = c( 'PGC', 
                 'oogonia_STRA8', 
                 'oogonia_meiosis', 'oocyte')
#                  ,  'pre-spermatogonia')
myHeatmap(x_TFact, cellphoneTFs, 
          rev(CLofinterest), 'figures-germcells/0_FS1_heatmap_TFact_cellphoneDB.pdf', display_numbers = x_TFact_Lab)

myHeatmap(x_TFact, cellphoneTFs, rev(CLofinterest), 'figures-germcells/0_FS1_heatmap_TFact_cellphoneDBII.pdf')


# # myHeatmap(x_Expre, cellphoneTFs, 
# #           CLofinterest, 'figures-germcells/0_FS1_heatmap_TFexp_cellphoneDB.pdf', display_numbers = x_Expre_Lab)

# # myHeatmap(x_Expre, cellphoneTFs, CLofinterest, 'figures-germcells/0_FS1_heatmap_TFexp_cellphoneDBII.pdf')

[1] "oocyte"          "oogonia_meiosis" "oogonia_STRA8"   "PGC"            
[1] 4 8
[1] 4 8
[1] "oocyte"          "oogonia_meiosis" "oogonia_STRA8"   "PGC"            
[1] 4 8


## what are the Dorothea targets of the new TFs

In [None]:
# newTFs = c('SOX4', 'HOXA2', 'HOXA4', 'HOXA5', 'HOXB5', 'MXD4', 'DMRTB1', 'TP63')
# source('G_lib_TFacts.r')
# dorothea_regulon$TF_name = strsplit(dorothea_regulon$tf, '_') %>% sapply(., head, 1 )

In [None]:
# markers_crosspecie = c('DPPA4', 'ETV4', 'IFITM1', 'KIT', 'MYBL2', 'NANOG', 'NANOS3', 'POU5F1', 'PRDM1', 'RCC2', 'SALL4', 'SOX15', 'SOX17', 'TEAD4', 'TFAP2C', 'XIST', 'GATA3', 'PPARA', 
#     'DAZL', 'DDX4', 'MAEL', 'ESX1', 'MSX1', 'RFX6', 'STRA8', 'ZGLP1', 
#     'CHMP2A', 'DMC1', 'DMRTC2', 'INCA1', 'MEIKIN', 'MEIOB', 'MSH4', 'RAD51AP2', 'SPO11', 'SYCP1', 'SYCP3', 'TEX11', 'ZNF711', 'MLH3', 'ZNF131', 'ASTL', 'FIGLA', 'GDF9', 'JAG1', 'NOBOX', 'OOSP2', 'SOHLH1', 'SOHLH2', 'YBX2', 'ZAR1', 'ZP3','BMP15','FOXO3')


# subset(dorothea_regulon, TF_name %in% newTFs & 
#        target %in% DEGs$gene[ DEGs$scRNA_DEGs_p_val_adj < 0.1] &
#       target %in% markers_crosspecie )

# subset(dorothea_regulon, TF_name %in% 'MXD4'  & 
#        target %in% DEGs$gene[ DEGs$scRNA_DEGs_p_val_adj < 0.001 & DEGs$scRNA_DEGs_avg_log2FC > 0.5 & DEGs$cluster == 'oogonia_STRA8' ])


# subset(dorothea_regulon, TF_name %in% 'ZGLP1'  & 
#        target %in% DEGs$gene[ DEGs$scRNA_DEGs_p_val_adj < 0.001 & DEGs$scRNA_DEGs_avg_log2FC > 0.5 & DEGs$cluster == 'oogonia_STRA8' ])