# CellSign - GRANULOSA-GERM cells

In [1]:
library(dplyr)
library(RColorBrewer)
library(ggplot2)
library(pheatmap)
library(cowplot)
library(reshape2)
# library(plyr)

R2TF_df = read.csv('~/gonads/human_v2/cellphoneDB/database/R2TF_interactions_curated.csv', stringsAsFactors = F, sep=',')
R2TF = lapply(unique(R2TF_df$receptor_symbol_a), function(re) subset(R2TF_df, receptor_symbol_a==re)$TF_symbol_a )
names(R2TF) = unique(R2TF_df$receptor_symbol_a)
CellSign_TFs = unlist(R2TF) %>% unique(.)
              
              
extract_relevant_cell2receptor = function(result){
    
    # simplify names to match TF database
    result$name_a = result$gene_a
    result$name_a[ grep('complex', result$partner_a) ] = gsub('complex:', '', result$partner_a[ grep('complex', result$partner_a) ])

    result$name_b = result$gene_b
    result$name_b[ grep('complex', result$partner_b) ] = gsub('complex:', '', result$partner_b[ grep('complex', result$partner_b) ])

    
    # filte rinteractions containing such receptors
    result = subset(result,  name_a %in% names(R2TF) | name_b %in% names(R2TF))
    
    
    # partner - A
    result_a = subset(result,  name_a %in% names(R2TF) )[ , grep('\\.\\.\\.', colnames(result) ) ]
    receptors_a = subset(result,  name_a %in% names(R2TF) )$name_a
    result_a = result_a[ , colSums(result_a) > 0 ]



    celltype_a = sapply(strsplit(colnames(result_a), split = '\\.\\.\\.'), head, 1)
    result_a = sapply(unique(celltype_a), function(cell){
        apply(result_a[ , celltype_a == cell ], 1, sum) %>% unname(.)
    })


    result_a = sapply(unique(receptors_a), function(receptor){
        x = result_a[ which(receptors_a == receptor) , ]
        if( is.matrix(x) ) {
            x = apply(x, 2, sum) %>% unname(.)
        }else{
            x
        }
        return(x)
    })

    rownames(result_a) = unique(celltype_a)
    df_a = melt(result_a)
    df_a = subset(df_a, value > 0)
    df_a
    
    # partner - B
    result_b = subset(result,  name_b %in% names(R2TF) )[ , grep('\\.\\.\\.', colnames(result) ) ]
    receptors_b = subset(result,  name_b %in% names(R2TF) )$name_b
    result_b = result_b[ , colSums(result_b) > 0 ]



    celltype_b = sapply(strsplit(colnames(result_b), split = '\\.\\.\\.'), tail, 1)
    result_b = sapply(unique(celltype_b), function(cell){
        apply(result_b[ , celltype_b == cell ], 1, sum) %>% unname(.)
    })


    result_b = sapply(unique(receptors_b), function(receptor){
        x = result_b[ which(receptors_b == receptor) , ]
        if( is.matrix(x) ) {
            x = apply(x, 2, sum) %>% unname(.)
        }else{
            x
        }
        return(x)
    })

    rownames(result_b) = unique(celltype_b)
    df_b = melt(result_b)
    df_b = subset(df_b, value > 0)
    df_b
    
    df = rbind(df_a[,1:2], df_b[,1:2]) %>% unique()
    names(df) = c('celltype', 'receptor')
    
    return(df)
    
}


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




## Identify relevant TFs

In [2]:
germ_TFs = read.csv('figures_germcells/human_TFs_measurements.csv', stringsAsFactors = F)
germ_TFs = subset(germ_TFs, RNA_TFact == 1  |  ATAC_TFact==1)
germ_TFs = subset(germ_TFs, RNA_DEGs.avg_logFC > 0.1)
germ_TFs = subset(germ_TFs, TF %in% CellSign_TFs)
unique(germ_TFs$TF)


soma_TFs = read.csv('figures_somatic/granulosa_human_TFs_measurements.csv', stringsAsFactors = F)
soma_TFs = subset(soma_TFs, RNA_TFact == 1 |  ATAC_TFact==1)
soma_TFs = subset(soma_TFs, RNA_DEGs.avg_logFC > 0.1 )
soma_TFs = subset(soma_TFs, TF %in% CellSign_TFs)
unique(soma_TFs$TF)

cell2TF_relevant = rbind(germ_TFs, soma_TFs)[, c('cluster', 'TF', 'ID') ]
head(cell2TF_relevant)

Unnamed: 0_level_0,cluster,TF,ID
Unnamed: 0_level_1,<chr>,<chr>,<chr>
1,GC,SOX4,GC__SOX4
4,oocyte,HEY2,oocyte__HEY2
5,oocyte,RORA,oocyte__RORA
6,oocyte,STAT1,oocyte__STAT1
25,PGC,STAT3,PGC__STAT3
27,pre_spermatogonia,AR,pre_spermatogonia__AR


# CellSign: identify relevant Receptors linked to TFs

In [3]:
result = read.csv('cellphoneDB/out_Rversion/relevant_interactions.txt', stringsAsFactors = F, sep = '\t')

In [4]:
cell2R_relevant = extract_relevant_cell2receptor(result)
cell2R_relevant$celltype = gsub('Somatic.', '', cell2R_relevant$celltype)
cell2R_relevant$celltype = gsub('Germ.', '', cell2R_relevant$celltype)
cell2R_relevant$ID = paste(cell2R_relevant$celltype, cell2R_relevant$receptor, sep = '__')

In [5]:
MERGED = list()
for ( relR in intersect(names(R2TF), cell2R_relevant$receptor)  ){
    if( relR %in%  cell2R_relevant$receptor){
        celltypes_with_R = subset(cell2R_relevant, receptor == relR)$celltype 
        for (downstreamTF in R2TF[[relR]] ){
            celltypes_with_TFR = subset(cell2TF_relevant, cluster %in% celltypes_with_R & TF == downstreamTF)$cluster %>% unique(.)
            if( length(celltypes_with_TFR) >= 1){
                for (cell in celltypes_with_TFR )
                    id = paste(relR, downstreamTF, cell)
                    MERGED[[id]] = cbind( subset(cell2R_relevant, receptor == relR & celltype == cell),
                                         subset(cell2TF_relevant, TF == downstreamTF & cluster == cell)
                )
            }
            
        }
            
    }    
}

In [6]:
CellSign_results = melt(MERGED, id.vars = names(MERGED[[1]]) )  
write.csv(df, file = 'cellphoneDB/out_Rversion/relevant_downstream_CellSign.txt', row.names=F)

## Explore results

In [7]:
head(CellSign_results)

Unnamed: 0_level_0,celltype,receptor,ID,cluster,TF,ID.1,L1
Unnamed: 0_level_1,<chr>,<fct>,<chr>,<chr>,<chr>,<chr>,<chr>
1,oocyte,ACVR_1B2B_receptor,oocyte__ACVR_1B2B_receptor,oocyte,SMAD2,oocyte__ACVR_1B2B_receptor,ACVR_1B2B_receptor SMAD2 oocyte
2,preGC_I,AR,preGC_I__AR,preGC_I,AR,preGC_I__AR,AR AR preGC_I
3,preGC_I,BMPR1A_BMPR2,preGC_I__BMPR1A_BMPR2,preGC_I,SMAD1,preGC_I__BMPR1A_BMPR2,BMPR1A_BMPR2 SMAD1 preGC_I
4,oogonia_STRA8,BMPR1A_BMPR2,oogonia_STRA8__BMPR1A_BMPR2,oogonia_STRA8,ZGLP1,oogonia_STRA8__BMPR1A_BMPR2,BMPR1A_BMPR2 ZGLP1 oogonia_STRA8
5,preGC_I,BMPR1B_BMPR2,preGC_I__BMPR1B_BMPR2,preGC_I,SMAD1,preGC_I__BMPR1B_BMPR2,BMPR1B_BMPR2 SMAD1 preGC_I
6,oogonia_STRA8,BMPR1B_BMPR2,oogonia_STRA8__BMPR1B_BMPR2,oogonia_STRA8,ZGLP1,oogonia_STRA8__BMPR1B_BMPR2,BMPR1B_BMPR2 ZGLP1 oogonia_STRA8


In [8]:
CellSign_percelltype = list()
for (cl in unique(CellSign_results$celltype) ){
    message(cl)
    x = subset(CellSign_results, celltype == cl)
    CellSign_percelltype[[cl]] = x
    print(x[, c('celltype', 'receptor', 'TF') ])
}
CellSign_results = melt(CellSign_percelltype, id.vars = names(CellSign_percelltype[[1]]) )  

oocyte



   celltype           receptor    TF
1    oocyte ACVR_1B2B_receptor SMAD2
18   oocyte               ESR2  ESR2
31   oocyte               RORA  RORA
38   oocyte                DCC  OTX2


preGC_I



   celltype     receptor    TF
2   preGC_I           AR    AR
3   preGC_I BMPR1A_BMPR2 SMAD1
5   preGC_I BMPR1B_BMPR2 SMAD1
8   preGC_I  BMR1A_AVR2B SMAD1
11  preGC_I  BMR1B_AVR2B SMAD1
22  preGC_I        MERTK STAT1


oogonia_STRA8



        celltype             receptor    TF
4  oogonia_STRA8         BMPR1A_BMPR2 ZGLP1
6  oogonia_STRA8         BMPR1B_BMPR2 ZGLP1
7  oogonia_STRA8          BMR1A_ACR2A ZGLP1
9  oogonia_STRA8          BMR1A_AVR2B ZGLP1
10 oogonia_STRA8          BMR1B_AVR2A ZGLP1
12 oogonia_STRA8          BMR1B_AVR2B ZGLP1
32 oogonia_STRA8 RAreceptor_RARA_RXRA  RARA
33 oogonia_STRA8 RAreceptor_RARB_RXRB  RARB
34 oogonia_STRA8      RAreceptor_RARA  RARA
35 oogonia_STRA8      RAreceptor_RARB  RARB


early_supporting



           celltype receptor    TF
13 early_supporting    CXCR4 STAT1
30 early_supporting   PDGFRA STAT1


PGC



   celltype receptor    TF
14      PGC    CXCR4 STAT3
20      PGC      KIT STAT3


OSE



   celltype receptor    TF
15      OSE     EGFR STAT3
19      OSE    FGFR2  ESR1
27      OSE   NOTCH2   ID4


granulosa



    celltype receptor     TF
16 granulosa     EGFR STAT5B
23 granulosa   NOTCH1   HES1
24 granulosa   NOTCH1   HEY1
25 granulosa   NOTCH1   HEY2
26 granulosa   NOTCH2   HES1
28 granulosa   NOTCH3   HES4
37 granulosa    EPHB4   ELK1
39 granulosa    EPHB2   ELK1


preGC_IIb



    celltype receptor    TF
17 preGC_IIb     ESR1  ESR1
29 preGC_IIb    NR1H4 NR1H4


oogonia_meiotic



          celltype receptor     TF
21 oogonia_meiotic      KIT STAT5B


CoelEpi_LHX9



       celltype        receptor    TF
36 CoelEpi_LHX9 RAreceptor_RARG  RARG
40 CoelEpi_LHX9           PTH1R FOXC1


# plot

In [9]:
priorTF_germs = subset(CellSign_results, celltype %in% germ_TFs$cluster )$TF  %>% unique(.)
priorTF_soma = subset(CellSign_results, celltype %in% soma_TFs$cluster )$TF  %>% unique(.)