# Supporting lineage - TFs intersection

In [20]:
library(Seurat)
library(dplyr)
library(RColorBrewer)
library(destiny, quietly = TRUE)
library(gam)
library(harmony)
library(ggplot2)
library(pheatmap)
library(cowplot)
library(reshape2)
library(SoupX)
# source('G_lib_TFacts.r')

luz_colors = c('#c466b0', 
               '#779eed', '#aad3f2', '#71a2c7', '#60bddb',   '#70ccbe', 
               '#7b9e99', '#91bd80', '#bdb380',  '#d4db81',  '#dbc381', '#ebd3c3', 
               '#e08b8b', '#edb7b7', '#d9abb7', '#d96884', '#e64e74', '#d9a5c3', '#cc8fdb')

colors = c(luz_colors, 
           brewer.pal(n=12, name = 'Paired'), 
           brewer.pal(n=8, name = 'Set2'),
          brewer.pal(n=8, name = 'Set3'),
          brewer.pal(n=8, name = 'Accent'),
          brewer.pal(n=8, name = 'Pastel1'))

def_colors = list( reds = luz_colors[13:length(luz_colors) ],
                 yellows = luz_colors[7:12],
                 blues = luz_colors[2:6],
                 greys = c('#e8e8e8', '#b5b5b5', '#a6a6a6') )

col_dot = c('grey90',  brewer.pal(n = 9, name = 'Reds')[7])

downsample_data = function(anndataO, var = 'annotated_clusters', N=200){
    samples = list()
    for (cl in unique(anndataO@meta.data[,var]) ){
        cells = colnames(anndataO)[ anndataO@meta.data[,var] == cl ]
        samples[[cl]] = cells
        if( length(cells) > N )
            samples[[cl]] = sample(cells, size = N, replace = F)
    }
    anndataO = subset(anndataO, cells = unlist(samples))
    
    gene_counts = tabulate(anndataO@assays$RNA@counts@i + 1)
    anndataO = subset(anndataO, features = names(gene_counts)[ gene_counts > 3 ] )
    
    return(anndataO)
}


In [21]:
sessionInfo()

R version 4.0.3 (2020-10-10)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Ubuntu 20.04.1 LTS

Matrix products: default
BLAS:   /home/jovyan/my-conda-envs/R_Seurat3/lib/libblas.so.3.8.0
LAPACK: /home/jovyan/my-conda-envs/R_Seurat3/lib/liblapack.so.3.8.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] splines   stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] SoupX_1.5.0        reshape2_1.4.4     cowplot_1.1.1      pheatmap_1.0.12   
 [5] ggplot2_3.3.3      harmony_1.0        Rcpp_1.0.6         gam_1.20          
 [9] foreach_1.5.1      destiny_3.4.0      RColorBrewer_1.1-2 dplyr_1.0

# Load RNA derived TFs info

In [22]:
TFact = read.csv('figures-supporting/0_FS2_table_TFacts.csv', stringsAsFactors = F)
# TFact$cluster[ TFact$cluster == 'Sertoli' ] = 'Sertoli_NR4A1'
TFact$TF = strsplit(TFact$gene, split = '-[A-E]') %>% sapply(., head, 1)
TFact$ID = paste0(TFact$cluster, '__', TFact$TF)
TFact$scRNA_TFact = ifelse(TFact$p_val_adj < 0.05 & TFact$avg_log2FC > 0.1 , 1, 0) 


DEGs = read.csv('figures-supporting/0_FS2_table_DEGsAll.csv', stringsAsFactors = F)
# DEGs = subset(DEGs, gene %in% TFact$TF)
DEGs$TF = DEGs$gene
DEGs$ID = paste0(DEGs$cluster, '__', DEGs$TF)
# DEGs$avg_log2FC = DEGs$avg_logFC
DEGs$scRNA_DEGs = ifelse(DEGs$p_val_adj < 0.05 & DEGs$avg_log2FC > 0.1, 1, 0) 

# Load ATAC derived TFs info

In [23]:
TFmotifs = read.csv('/nfs/team292/vl6/my_MULTIOME_dir/supporting_apr2021/TFs/DARs/all_DA_peaks_LR.csv', stringsAsFactors = F)

TFmotifs$cluster = gsub('-', '_', TFmotifs$cluster)
TFmotifs$cluster = gsub('FetalLeydig_like', 'FetalLeydig-like', TFmotifs$cluster)
TFmotifs$TF = strsplit(TFmotifs$X, split = '\\.') %>% sapply(., head, 1)
TFmotifs$TF = gsub('-HUMAN', '', TFmotifs$TF)
TFmotifs = subset(TFmotifs, TF %in% c(TFact$TF, DEGs$TF) )
TFmotifs$ID = paste0(TFmotifs$cluster, '__', TFmotifs$TF)
# TFmotifs = subset(TFmotifs, TF %in% TFact$TF)
TFmotifs = TFmotifs[ order(TFmotifs$avg_log2FC, decreasing = T) , ]
TFmotifs = TFmotifs[ ! duplicated(TFmotifs$ID) , ]

TFmotifs$snATAC_TFact = ifelse(TFmotifs$p_val_adj < 0.05 & TFmotifs$avg_log2FC > 0.1 , 1, 0) 

# Intersect cluster names

In [24]:
unique(TFmotifs$cluster)
unique(TFact$cluster)
unique(DEGs$cluster)


intersect(TFact$cluster,TFmotifs$cluster)

## Merge for plotting

In [25]:
cnames = intersect(colnames(TFact), colnames(DEGs)) %>% intersect(., colnames(TFmotifs))
df2plot = data.frame(TFact[,cnames], type='scRNA_TFactivities', stringsAsFactors = F)
df2plot = rbind(df2plot,
               data.frame(DEGs[, cnames], type='scRNA_Expression', stringsAsFactors = F))
df2plot = rbind(df2plot,
               data.frame(TFmotifs[, cnames], type='snATA_TFmotif_access', stringsAsFactors = F))

# Intersect TFs

In [26]:
names(TFact)[c(1:2,5)] = paste0('scRNA_TFact_', names(TFact)[c(1:2,5)])
names(DEGs)[c(1:5)] = paste0('scRNA_DEGs_', names(DEGs)[c(1:5)])
names(TFmotifs)[c(2:3,6)] = paste0('snATAC_TFact_', names(TFmotifs)[c(2:3,6)])
names(TFmotifs)[1] = 'snATAC_PWM' 

In [27]:
df = merge(DEGs, TFact, by = 'ID', all = T)
df = df[, grep('y$', names(df), invert = T) ] # remove unwanted columns
df = merge(df, TFmotifs, by = 'ID', all = T)
df = df[, grep('y$', names(df), invert = T) ] # remove unwanted columns
df = df[, grep('x$', names(df), invert = T) ] # remove unwanted columns
head(df)

Unnamed: 0_level_0,ID,scRNA_DEGs_p_val,scRNA_DEGs_avg_log2FC,scRNA_DEGs_pct.1,scRNA_DEGs_pct.2,scRNA_DEGs_p_val_adj,scRNA_DEGs,scRNA_TFact_p_val,scRNA_TFact_avg_log2FC,scRNA_TFact_p_val_adj,scRNA_TFact,snATAC_PWM,snATAC_TFact_p_val,snATAC_TFact_avg_log2FC,snATAC_TFact_p_val_adj,cluster,gene,TF,snATAC_TFact
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<dbl>
1,coelEpi__A1BG,3.90886e-05,0.09081064,0.194,0.145,1.0,0,,,,,,,,,,,,
2,coelEpi__A2ML1,1.09257e-43,-0.18541273,0.009,0.127,3.084324e-39,0,,,,,,,,,,,,
3,coelEpi__A4GALT,1.805626e-33,-0.25571978,0.086,0.216,5.097282e-29,0,,,,,,,,,,,,
4,coelEpi__AAGAB,0.0001225512,-0.0757595,0.137,0.165,1.0,0,,,,,,,,,,,,
5,coelEpi__AAMP,3.491967e-17,-0.21063174,0.433,0.525,9.857824e-13,0,,,,,,,,,,,,
6,coelEpi__AASDH,0.0002507657,-0.0774273,0.131,0.156,1.0,0,,,,,,,,,,,,


In [28]:
df = df[ , ! names(df) %in% c('L1' ) ]
df$cluster = strsplit(df$ID, '__') %>% sapply(., head, 1)
df$TF = strsplit(df$ID, '__') %>% sapply(., tail, 1)
df = df[, sort(names(df)) ]
df$N_agreement = apply(df[, c('scRNA_DEGs', 'scRNA_TFact', 'snATAC_TFact') ], 1, function(x){
    x[ is.na(x) ] = 0
    sum(x)
})
head(df)

Unnamed: 0_level_0,cluster,gene,ID,scRNA_DEGs,scRNA_DEGs_avg_log2FC,scRNA_DEGs_p_val,scRNA_DEGs_p_val_adj,scRNA_DEGs_pct.1,scRNA_DEGs_pct.2,scRNA_TFact,scRNA_TFact_avg_log2FC,scRNA_TFact_p_val,scRNA_TFact_p_val_adj,snATAC_PWM,snATAC_TFact,snATAC_TFact_avg_log2FC,snATAC_TFact_p_val,snATAC_TFact_p_val_adj,TF,N_agreement
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>
1,coelEpi,,coelEpi__A1BG,0,0.09081064,3.90886e-05,1.0,0.194,0.145,,,,,,,,,,A1BG,0
2,coelEpi,,coelEpi__A2ML1,0,-0.18541273,1.09257e-43,3.084324e-39,0.009,0.127,,,,,,,,,,A2ML1,0
3,coelEpi,,coelEpi__A4GALT,0,-0.25571978,1.805626e-33,5.097282e-29,0.086,0.216,,,,,,,,,,A4GALT,0
4,coelEpi,,coelEpi__AAGAB,0,-0.0757595,0.0001225512,1.0,0.137,0.165,,,,,,,,,,AAGAB,0
5,coelEpi,,coelEpi__AAMP,0,-0.21063174,3.491967e-17,9.857824e-13,0.433,0.525,,,,,,,,,,AAMP,0
6,coelEpi,,coelEpi__AASDH,0,-0.0774273,0.0002507657,1.0,0.131,0.156,,,,,,,,,,AASDH,0


In [29]:
df = df[ order(df$N_agreement, decreasing = T) , ]
df = df[ , c(1,19, 20,  14,  3:(ncol(df)-2)) ]
write.csv(df, file = 'figures-supporting/0_FS2_table_TFs_intersection.csv', row.names=F)
head(df)

Unnamed: 0_level_0,cluster,TF,N_agreement,snATAC_PWM,ID,scRNA_DEGs,scRNA_DEGs_avg_log2FC,scRNA_DEGs_p_val,scRNA_DEGs_p_val_adj,scRNA_DEGs_pct.1,scRNA_DEGs_pct.2,scRNA_TFact,scRNA_TFact_avg_log2FC,scRNA_TFact_p_val,scRNA_TFact_p_val_adj,snATAC_PWM.1,snATAC_TFact,snATAC_TFact_avg_log2FC,snATAC_TFact_p_val,snATAC_TFact_p_val_adj
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>
14575,FetalLeydig-like,YBX1,3,YBX1.SwissRegulon,FetalLeydig-like__YBX1,1,0.5508477,9.972751e-16,2.815308e-11,0.78,0.88,1,0.1610803,2.000404e-09,2.770559e-06,YBX1.SwissRegulon,1,0.3061308,3.36226e-14,6.45554e-11
15534,ovarianSurf,ARX,3,ARX-HUMAN.H10MO.D.4,ovarianSurf__ARX,1,0.2637713,9.065451e-74,2.559177e-69,0.256,0.043,1,0.4953523,1.461708e-28,2.0244650000000001e-25,ARX-HUMAN.H10MO.D.4,1,2.0150131,0.0,0.0
16626,ovarianSurf,EMX1,3,EMX1.MA0612.1.4,ovarianSurf__EMX1,1,0.1091104,6.854979e-54,1.935161e-49,0.088,0.004,1,0.4774452,3.028194e-30,4.194049e-27,EMX1.MA0612.1.4,1,14.264031,0.0,0.0
16627,ovarianSurf,EMX2,3,EMX2.MA0886.1.4,ovarianSurf__EMX2,1,0.417731,2.43623e-86,6.877476e-82,0.809,0.473,1,0.2357156,4.0944250000000004e-23,5.670778e-20,EMX2.MA0886.1.4,1,16.3513353,0.0,0.0
17318,ovarianSurf,HOXD10,3,HOXD10.SwissRegulon.4,ovarianSurf__HOXD10,1,0.1006133,1.566094e-12,4.421082e-08,0.087,0.032,1,0.2693982,1.83799e-13,2.545616e-10,HOXD10.SwissRegulon.4,1,0.6432385,5.581362000000001e-190,1.071621e-186
17324,ovarianSurf,HOXD9,3,HOXD9.SwissRegulon.4,ovarianSurf__HOXD9,1,0.354684,2.594671e-62,7.324757e-58,0.543,0.256,1,0.2419463,1.765061e-06,0.00244461,HOXD9.SwissRegulon.4,1,3.5176353,0.0,0.0


# Plot - shared

In [43]:
# # Define clusters to plot 
# CLofinterest = c('coelEpi','sKITLG','sLGR5_male', 'sLGR5_female', 'sPAX8b', 'sPAX8m', 
#                  'Sertoli_WFDC2', 'Sertoli', 'Sertoli_low',  'FetalLeydig-like',
#                  'preGC_I_OSR1', 'ovarianSurf', 'preGC_II', 'preGC_II_hypoxia', 'preGC_III', 'preGC_III_Notch')
CLofinterest = c('sLGR5_female', 
                 'preGC_I_OSR1', 'ovarianSurf', 'preGC_II', 'preGC_II_hypoxia', 'preGC_III', 'preGC_III_Notch')


# and order accordingly
idx_cl = sapply(CLofinterest, function(x) which(df$cluster == x)) %>% unlist(.)
df =  df[idx_cl, ]                

                
                
# Define clusters to plot                
idx_tf = which(df$N_agreement == 3 | 
               (df$N_agreement == 2 & df$scRNA_DEGs == 1 & df$scRNA_DEGs_avg_log2FC > 0.5 ) |
               (df$N_agreement == 2 & df$scRNA_DEGs == 1 & df$snATAC_TFact_avg_log2FC > 1 ) |
               (df$N_agreement == 2 & df$scRNA_DEGs == 1 & df$scRNA_TFact_avg_log2FC > 1 )
              )
TFofinterest = df[idx_tf, ]$TF  
# TFofinterest = unique(df[df$N_agreement >= 1, ]$TF)                 
length(TFofinterest)
# TFofinterest = c('POU5F1',  'KLF4', 'TFAP2C', 'ETV5', 'NANOG',  'SOX15',  'SOX17', 
#                  'ESX1', 'ZGLP1', 'RARA',  'HOXA2', 'HOXA4', 'HOXA5', 'HOXB3','RFX6',
#                  'DMRTB1', 'DMRTC2', 'HMGA1', 
#                  'FIGLA', 'NOBOX', 'SOHLH1',   'SALL3', 'ZFPM2', 'TP63',
#                 'MSX2', 'HOXC10', 'HOXD11', 'HSF1', 'HSF2', 'SMAD1', 'SIX1')

## as heatmaps

In [44]:
# Load TF act Dorothea matrix
x_TFact = read.csv('figures-supporting/0_FS2_table_TFact_percluster.csv', stringsAsFactors = F, row.names = 1)
colnames(x_TFact) = strsplit(colnames(x_TFact), split = '\\.') %>% sapply(., head, 1)
x_TFact = x_TFact[ rownames(x_TFact) %in% CLofinterest , colnames(x_TFact) %in%  TFofinterest ]
rn = rownames(x_TFact)
x_TFact = apply(x_TFact, 2, scale)
rownames(x_TFact) = rn

# Add labels
x_TFact_Lab = x_TFact
x_TFact_Lab[] = ''
for (i in rownames(x_TFact_Lab) )
    for (j in colnames(x_TFact_Lab) ){
        x = subset(df, cluster == i & TF == j)
        if( nrow(x) == 1 )
             if( x[3] > 0 & ! is.na(x[12])){
                 if(x[12] == 1 )
                         x_TFact_Lab[i,j] = 'a'
                 if(x[3] == 3 )
                         x_TFact_Lab[i,j] = '*'
             }
#              if( x[3] > 1 & ! is.na(x[12])){
#                 if(x[3] == 3)
#                     x_TFact_Lab[i,j] = '*'
#                  if ( x[3] == 2 & x[12] == 1 ){
#                     if( ! is.na( x[17])) 
#                         if(x[17] == 1)
#                          x_TFact_Lab[i,j] = 'o'
#                     if( ! is.na( x[6])) 
#                         if(x[6] == 1 )
#                          x_TFact_Lab[i,j] = 'e'
#                     }

#             }
    }
            
            

# Load TFmotifs matrix
x_TFmotifs = read.csv('/nfs/team292/vl6/my_MULTIOME_dir/supporting_apr2021/supporting__chromvar_celltype_zscore.csv', stringsAsFactors = F, row.names = 1)
x_TFmotifs = x_TFmotifs[ rownames(x_TFmotifs) %in% CLofinterest , ]

idx = colnames(x_TFmotifs) %in% TFmotifs$X
x_TFmotifs = x_TFmotifs[ , idx ] # remove duplicated motifs to match TFmotifs
colnames(x_TFmotifs) = strsplit(colnames(x_TFmotifs), split = '\\.') %>% sapply(., head, 1)


# Load Expr matrix
x_Expre = read.csv('figures_manual_annotation/data_utils/average_log.csv', stringsAsFactors = F, row.names = 1) %>% t(.)
rownames(x_Expre) = gsub('FetalLeydig.like', 'FetalLeydig-like', rownames(x_Expre))
x_Expre = x_Expre[ rownames(x_Expre) %in% CLofinterest ,  colnames(x_Expre) %in%  TFofinterest  ]
rn = rownames(x_Expre)
x_Expre = apply(x_Expre, 2, scale)
rownames(x_Expre) = rn

# Add labels
x_Expre_Lab = x_Expre
x_Expre_Lab[] = ''
for (i in rownames(x_Expre_Lab) )
    for (j in colnames(x_Expre_Lab) ){
        x = subset(df, cluster == i & TF == j)
        if( nrow(x) == 1 )
             if( x[3] > 1 & ! is.na(x[6]) ){
                if(x[3] == 3)
                    x_Expre_Lab[i,j] = '*'
                 if ( x[3] == 2 & x[6] == 1 ){
                    if( ! is.na( x[17])) 
                        if(x[17] == 1)
                         x_Expre_Lab[i,j] = 'o'
                    if( ! is.na( x[12])) 
                        if(x[12] == 1 )
                         x_Expre_Lab[i,j] = 'a'
                 }
#                 if ( x[3] == 2 & x[6] == 0 ){
#                     if( ! is.na( x[17])) 
#                         if(x[17] == 1)
#                             if( ! is.na( x[12])) 
#                                 if(x[12] == 1 )
#                                      x_Expre_Lab[i,j] = 'oa'
#                  }

            }
    }


In [47]:
myHeatmap = function(X, TFs, cl, pdf_file, display_numbers = NULL){
    print(cl)
    tfs = intersect(TFs, colnames(X))
    cls =  intersect(cl, rownames(X))
    X = X[ cls,  tfs ]
    print(dim(X))
    
    paletteLength <- 50
    myColor <- colorRampPalette(c(brewer.pal(n = 5, name = 'Blues')[4], "white", brewer.pal(n = 5, name = 'Reds')[4]))(paletteLength)
    # length(breaks) == length(paletteLength) + 1
    # use floor and ceiling to deal with even/odd length pallettelengths
    myBreaks <- c(seq(min(X), 0, length.out=ceiling(paletteLength/2) + 1), 
                  seq(max(X)/paletteLength, max(X), length.out=floor(paletteLength/2)))    
    print(myBreaks)
    if( is.null(display_numbers) ){
        pheatmap(X, cellheight = 10, cellwidth = 10, cluster_rows = F, cluster_cols = F, 
                 color=myColor, breaks=myBreaks,
                filename = pdf_file)
    }else{     
        display_numbers = display_numbers[ cls,  tfs ]
        print(dim(display_numbers))
        pheatmap(X, cellheight = 10, cellwidth = 10, cluster_rows = F, cluster_cols = F, display_numbers = display_numbers, 
                 color=myColor, breaks=myBreaks,
                filename = pdf_file)
        }
}

CLofinterest = c('coelEpi','sKITLG', 'sLGR5_female', 'sPAX8b',  
                 'preGC_I_OSR1', 'ovarianSurf', 'preGC_II', 'preGC_II_hypoxia', 'preGC_III', 'preGC_III_Notch')

myHeatmap(x_TFact, TFofinterest, CLofinterest, 'figures-supporting/0_FS2_heatmap_TFintersection_scRNA_TFact.pdf')
myHeatmap(x_Expre, TFofinterest, CLofinterest, 'figures-supporting/0_FS2_heatmap_TFintersection_scRNA_DEGs.pdf', display_numbers = x_Expre_Lab)
# myHeatmap(x_TFmotifs, TFofinterest, CLofinterest, 'figures-supporting/0_FS2_heatmap_TFintersection_snATAC_TFact.pdf')

 [1] "coelEpi"          "sKITLG"           "sLGR5_female"     "sPAX8b"          
 [5] "preGC_I_OSR1"     "ovarianSurf"      "preGC_II"         "preGC_II_hypoxia"
 [9] "preGC_III"        "preGC_III_Notch" 
[1]  7 87
 [1] -2.13453640 -2.04915495 -1.96377349 -1.87839203 -1.79301058 -1.70762912
 [7] -1.62224767 -1.53686621 -1.45148475 -1.36610330 -1.28072184 -1.19534039
[13] -1.10995893 -1.02457747 -0.93919602 -0.85381456 -0.76843311 -0.68305165
[19] -0.59767019 -0.51228874 -0.42690728 -0.34152582 -0.25614437 -0.17076291
[25] -0.08538146  0.00000000  0.04394328  0.13366080  0.22337832  0.31309584
[31]  0.40281336  0.49253088  0.58224840  0.67196592  0.76168344  0.85140096
[37]  0.94111848  1.03083600  1.12055352  1.21027104  1.29998857  1.38970609
[43]  1.47942361  1.56914113  1.65885865  1.74857617  1.83829369  1.92801121
[49]  2.01772873  2.10744625  2.19716377
 [1] "coelEpi"          "sKITLG"           "sLGR5_female"     "sPAX8b"          
 [5] "preGC_I_OSR1"     "ovarianSurf"      "pre

In [46]:
# Plot only female subset now
# CLofinterest = c('coelEpi','sKITLG', 'sLGR5_female', 'sPAX8b',
#                  'preGC_I_OSR1', 'ovarianSurf', 'preGC_II', 'preGC_II_hypoxia', 'preGC_III', 'preGC_III_Notch')




# and order accordingly
idx_cl = sapply(CLofinterest, function(x) which(df$cluster == x)) %>% unlist(.)
df =  df[idx_cl, ]                

                
                
# Define clusters to plot                
idx_tf = which(df$N_agreement == 3 | 
               (df$N_agreement == 2 & df$scRNA_DEGs == 1 & df$scRNA_DEGs_avg_log2FC > 0.5 ) |
               (df$N_agreement == 2 & df$scRNA_DEGs == 1 & df$snATAC_TFact_avg_log2FC > 1 ) |
               (df$N_agreement == 2 & df$scRNA_DEGs == 1 & df$scRNA_TFact_avg_log2FC > 1 )
              )
TFofinterest = df[idx_tf, ]$TF  
length(TFofinterest)                

CLofinterest = c('sLGR5_female',  'preGC_I_OSR1', 'ovarianSurf', 'preGC_II', 'preGC_II_hypoxia', 'preGC_III', 'preGC_III_Notch')
                
myHeatmap(x_TFact, TFofinterest, CLofinterest, 'figures-supporting/0_FS2_heatmap_TFintersection_scRNA_TFact_FEMALES.pdf')
myHeatmap(x_Expre, TFofinterest, CLofinterest, 'figures-supporting/0_FS2_heatmap_TFintersection_scRNA_DEGs_FEMALES.pdf', display_numbers = x_Expre_Lab)
# myHeatmap(x_TFmotifs, TFofinterest, CLofinterest, 'figures-supporting/0_FS2_heatmap_TFintersection_snATAC_TFact.pdf')

[1] "sLGR5_female"     "preGC_I_OSR1"     "ovarianSurf"      "preGC_II"        
[5] "preGC_II_hypoxia" "preGC_III"        "preGC_III_Notch" 
[1]  7 87
 [1] -2.13453640 -2.04915495 -1.96377349 -1.87839203 -1.79301058 -1.70762912
 [7] -1.62224767 -1.53686621 -1.45148475 -1.36610330 -1.28072184 -1.19534039
[13] -1.10995893 -1.02457747 -0.93919602 -0.85381456 -0.76843311 -0.68305165
[19] -0.59767019 -0.51228874 -0.42690728 -0.34152582 -0.25614437 -0.17076291
[25] -0.08538146  0.00000000  0.04394328  0.13366080  0.22337832  0.31309584
[31]  0.40281336  0.49253088  0.58224840  0.67196592  0.76168344  0.85140096
[37]  0.94111848  1.03083600  1.12055352  1.21027104  1.29998857  1.38970609
[43]  1.47942361  1.56914113  1.65885865  1.74857617  1.83829369  1.92801121
[49]  2.01772873  2.10744625  2.19716377
[1] "sLGR5_female"     "preGC_I_OSR1"     "ovarianSurf"      "preGC_II"        
[5] "preGC_II_hypoxia" "preGC_III"        "preGC_III_Notch" 
[1]  7 87
 [1] -1.74354715 -1.67380527 -1.60406338 

In [None]:
write.csv(unique(df[idx_tf, 1:2]), file = 'supporting_female_relevant_TFs.csv')

In [None]:
# plot cellphoneDB TFs
cellphoneTFs = c('HES4', 'RBPJ')
CLofinterest = rev(CLofinterest)
myHeatmap(x_TFact, cellphoneTFs, 
          CLofinterest, 'figures-supporting/0_FS2_heatmap_TFact_cellphoneDB.pdf', display_numbers = x_TFact_Lab)

myHeatmap(x_TFact, cellphoneTFs, CLofinterest, 'figures-supporting/0_FS2_heatmap_TFact_cellphoneDBII.pdf')


# myHeatmap(x_Expre, cellphoneTFs, 
#           CLofinterest, 'figures-supporting/0_FS2_heatmap_TFexp_cellphoneDB.pdf', display_numbers = x_Expre_Lab)

# myHeatmap(x_Expre, cellphoneTFs, CLofinterest, 'figures-supporting/0_FS2_heatmap_TFexp_cellphoneDBII.pdf')

## what are the Dorothea targets of the new TFs

In [None]:
# newTFs = c('SOX4', 'HOXA2', 'HOXA4', 'HOXA5', 'HOXB5', 'MXD4', 'DMRTB1', 'TP63')
# source('G_lib_TFacts.r')
# dorothea_regulon$TF_name = strsplit(dorothea_regulon$tf, '_') %>% sapply(., head, 1 )

In [None]:
# markers_crosspecie = c('DPPA4', 'ETV4', 'IFITM1', 'KIT', 'MYBL2', 'NANOG', 'NANOS3', 'POU5F1', 'PRDM1', 'RCC2', 'SALL4', 'SOX15', 'SOX17', 'TEAD4', 'TFAP2C', 'XIST', 'GATA3', 'PPARA', 
#     'DAZL', 'DDX4', 'MAEL', 'ESX1', 'MSX1', 'RFX6', 'STRA8', 'ZGLP1', 
#     'CHMP2A', 'DMC1', 'DMRTC2', 'INCA1', 'MEIKIN', 'MEIOB', 'MSH4', 'RAD51AP2', 'SPO11', 'SYCP1', 'SYCP3', 'TEX11', 'ZNF711', 'MLH3', 'ZNF131', 'ASTL', 'FIGLA', 'GDF9', 'JAG1', 'NOBOX', 'OOSP2', 'SOHLH1', 'SOHLH2', 'YBX2', 'ZAR1', 'ZP3','BMP15','FOXO3')


# subset(dorothea_regulon, TF_name %in% newTFs & 
#        target %in% DEGs$gene[ DEGs$scRNA_DEGs_p_val_adj < 0.1] &
#       target %in% markers_crosspecie )

# subset(dorothea_regulon, TF_name %in% 'MXD4'  & 
#        target %in% DEGs$gene[ DEGs$scRNA_DEGs_p_val_adj < 0.001 & DEGs$scRNA_DEGs_avg_log2FC > 0.5 & DEGs$cluster == 'oogonia_STRA8' ])


# subset(dorothea_regulon, TF_name %in% 'ZGLP1'  & 
#        target %in% DEGs$gene[ DEGs$scRNA_DEGs_p_val_adj < 0.001 & DEGs$scRNA_DEGs_avg_log2FC > 0.5 & DEGs$cluster == 'oogonia_STRA8' ])