# C. Plots

In [7]:
library(dplyr)
library(reshape2)
library(plyr)
library(RColorBrewer)
library(ggplot2)
library(pheatmap)
library(cowplot)



get_cells2genes = function(CPresults, genes2filter = NULL, show_only_selected = F){
    if( ! is.null(genes2filter) ){
        idx = sapply(rownames(CPresults), function(x) any(unlist(Int2Gene[[x]]) %in%  genes2filter) )
        message(sum(idx), ' interactions')             
        CPresults = CPresults[ idx, ]
        CPresults = CPresults[ , apply(CPresults, 2, sum) > 0]
    }
    # reformat as dataframe                                    
    df = melt(as.matrix(CPresults))
    df$cellphoneDB_hit = df$value                 
    if (show_only_selected) # this is to show only expression values if int selected by cellphoneDB
        df = subset(df, cellphoneDB_hit == 1)
    # extract interaction partners
    xA = apply(df, 1, function(x){
        data.frame(cell = strsplit(x[2], '--')[[1]][1],
            gene = Int2Gene[[x[1]]]$partner_a,
                   interaction = x[1], 
                   partner = strsplit(x[2], '--')[[1]][2],
                   cellphoneDB_hit = x[length(x)],
                   row.names = NULL)
    })
    xB = apply(df, 1, function(x){
        data.frame(cell = strsplit(x[2], '--')[[1]][2],
            gene = Int2Gene[[x[1]]]$partner_b,
                   interaction = x[1], 
                   partner = strsplit(x[2], '--')[[1]][1],
                   cellphoneDB_hit = x[length(x)],
                   row.names = NULL)
    })
    x = append(xA, xB)
    # Merge as data frame for plotting with ggplot
    cells2genesCP = melt(x, id.vars = names(x[[1]]))
    cells2genesCP = unique(cells2genesCP[,-6])
    # transfer expression info                                  
    cells2genesCP$id = paste(cells2genesCP$cell, cells2genesCP$gene)
    cells2genesCP$AveExpr_cluster =  AVG$avg[ match(cells2genesCP$id, AVG$id) ]
    cells2genesCP$percentExpr_cluster =  PERC$percent[ match(cells2genesCP$id, PERC$id) ] 

    return(cells2genesCP)
}
    
                                      
                                      
get_combinations_of_interest = function(Atype, Btype){
    all_celltypes = c(Atype, Btype)
    # Get all pairwise cluster combinations: A--B, B--A
    cluster_combinations = combn(all_celltypes, 2, simplify = T)

    # add self interactions
    cluster_combinations = cbind(sapply(all_celltypes, function(x) rep(x, 2) ), cluster_combinations)


    # We only want to test pairs including at least one cluster in Atype and Btype
    keepA = apply(cluster_combinations, 2, function(x) any(x %in% Atype))
    keepB = apply(cluster_combinations, 2, function(x) any(x %in% Btype))
    cluster_combinations = cluster_combinations[, keepA & keepB]

    # # We only want to test pairs including at least one cluster named 'media'
    # keep = apply(cluster_combinations, 2, function(x) any(x %in% 'Media'))
    # cluster_combinations = cluster_combinations[, keep]
    # ncol(cluster_combinations)

    # Consider the reverse interaction too: A--B but B--A as well
    cluster_combinations = cbind(cluster_combinations, cluster_combinations[2:1,]) %>% t(.) %>% unique(.)  %>% t(.) 

    # Make cluster pair labels: celltypeA--celltypeB             
    cluster_combinations_labels = apply(cluster_combinations, 2, paste, collapse = '--')

    colnames(cluster_combinations) = cluster_combinations_labels
}

In [8]:
# Define variables
filter_int_user_curated = T #Use only user_curated interactions?
per_cutoff = 0.1 # min % of cells in the cluster required  with expression > 0 for the gene
pval_cutoff = 0.001 # max adjusted p-value requeired to consider a gene as DEG
logFC_cutoff = 0.2 # min logFC to consider a gene as DEG

# Load Microenviroments

In [9]:
germs = c('PGC_mitotic', 
          'PGC', 
          'oogonia_STRA8', 'oogonia_meiosis', 'oocyte',
         'pre-spermatogonia')
somaF = c( 'sLGR5_female', 'preGC_I_OSR1',
          'ovarianSurf', 'preGC_II', 'preGC_II_hypoxia', 'preGC_III', 'preGC_III_Notch')
somaM = c('sPAX8m',
    'Sertoli', 'Sertoli_WFDC2', 
            'sLGR5_male' )
somaB = c('coelEpi', 'sKITLG',
         'sPAX8b')
soma = c(somaM, somaB, somaF)


micro_df = read.csv('cellphoneDB/input/microenviroments/microenviroments.csv', stringsAsFactors = F)
microenviroments = split(micro_df, micro_df$microenviroment)  %>%  sapply(., select, celltype)
names(microenviroments) = gsub('.celltype', '', names(microenviroments))

# Load Cellphone Interactions

In [10]:
# Load cellphone database - genes and complexes file
# Gene names
genes_cpDB = read.delim('~/gonads/main/cellphoneDB/hsa_uniprot.txt', stringsAsFactors = F, sep = '\t')

# Complexes members
com_cpDB = read.csv('~/gonads/main/cellphoneDB/database/complex_curated_032021.tsv', stringsAsFactors = F, sep='\t')
com_cpDB = unique(com_cpDB)
com_cpDB$complex_name = paste0('complex:', com_cpDB$complex_name)
# Generate complexes2gene symbol dictionary
Com2Gene = lapply(com_cpDB$complex_name, function(cx) subset(genes_cpDB, uniprot %in% unlist(com_cpDB[com_cpDB$complex_name == cx, 2:5]) )$gene_name )
Com2Gene = lapply(Com2Gene, unique)
names(Com2Gene) = com_cpDB$complex_name
                  
# Load interactions from cellphoneDB/out/means.txt output file                   
int_cpDB = read.delim('~/gonads/main/cellphoneDB/out/means.txt', stringsAsFactors = F)[, 1:10]
# MANDATORY: remove "curated" because we have cleaned and renamed them (this is a long story, just do it)
int_cpDB = subset(int_cpDB, annotation_strategy != 'curated')
# OPTIONAL: Use only user_curated interactions?
if(filter_int_user_curated)
    int_cpDB = subset(int_cpDB, annotation_strategy == 'user_curated')
# Generate Int2Gene dictionary
Int2Gene = apply(int_cpDB, 1, function(int) {
    int = unname(int)
    if(int[5] == '') # if partnerA is complex, then retrieve members from dictionary
       partnerA = Com2Gene[[int[3]]]
    else  # if partnerA is a single gene, take name from file
       partnerA = int[5]
    if(int[6] == '')  # if partnerB is complex, then retrieve members from dictionary
       partnerB = Com2Gene[[int[4]]]
    else  # if partnerB is a single gene, take name from file
       partnerB = int[6]
    list(partner_a=partnerA,partner_b=partnerB)
    }) 
names(Int2Gene) = int_cpDB$interacting_pair

# Load data

## Load cellphone results file

In [12]:
CPresults = read.csv(paste0('cellphoneDB/out/interactions_' , pval_cutoff, '_logFC', logFC_cutoff, '.csv'), stringsAsFactors = F)
rownames(CPresults) = CPresults$X
CPresults = CPresults[, -1]
colnames(CPresults) = gsub('\\...', '--', colnames(CPresults))
head(CPresults)[, 1:5]

colnames(CPresults) = gsub('pre--ermatogonia', 'pre-spermatogonia', colnames(CPresults)) # correct R replacing characters in headers

Unnamed: 0_level_0,PGC--PGC,PGC_mitotic--PGC_mitotic,coelEpi--coelEpi,ovarianSurf--ovarianSurf,preGC_II--preGC_II
Unnamed: 0_level_1,<int>,<int>,<int>,<int>,<int>
22Hydroxycholesterol_byCYP11A1_NR1H4,0,0,0,0,0
Androsterone_byHSD17B6_NR1H4,0,0,0,0,0
2arachidonoylglycerol_byDAGLB_CNR1,0,0,0,0,0
FN1_integrin_a4b1_complex,0,0,0,0,0
PLAUR_integrin_a4b1_complex,0,0,0,0,0
JAM2_integrin_a4b1_complex,0,0,0,0,0


# Load expression information

In [13]:
# load DE expression info
# you need to put in a folder the DE files of the clusters that you're interested in
# The file must contain at least four columns (aka, you can reuse the output of the DEG files):
# - Gene: gene symbol
# - cluster: cell type
# - adj.P.Val: adjusted p-value from the DE test
# - logFC: from the DE test
path_DEGs = 'cellphoneDB/input/DEGs/merged.csv'


# Load
DE_df = read.csv(path_DEGs, stringsAsFactors = F) %>%
        filter(., gene %in% genes_cpDB$gene_name) %>%
        filter(., p_val_adj < pval_cutoff) %>%
        filter(., avg_logFC > logFC_cutoff)
        


# Build dictionary clusters2expressed genes
clusters = unique(DE_df$cluster)
is_DE = lapply(clusters, function(cl){
    filter(DE_df, cluster == cl)$gene
})
names(is_DE) = clusters
names(is_DE) = gsub('pre.spermatogonia', 'pre-spermatogonia', names(is_DE))

In [14]:
# Load percentage expressions
path_Per = 'cellphoneDB/input/percent.csv'


# Load
Per_mat = read.csv(path_Per, stringsAsFactors = F, row.names = 1) 
Per_df = setNames(melt(as.matrix(Per_mat), factorsAsStrings = T), c('gene', 'cluster', 'percent'))
colnames(Per_mat) = gsub('pre.spermatogonia', 'pre-spermatogonia', colnames(Per_mat))


# Filetr expressed
# Per_df = subset(Per_df, percent >= per_cutoff)
Per_df = Per_df[ order(Per_df$gene), ]

# Build dictionary clusters2expressed genes                       
PERC = Per_df
PERC$cluster = gsub('pre.spermatogonia', 'pre-spermatogonia', PERC$cluster)
PERC$id = paste(PERC$cluster, PERC$gene)                      
head(PERC)
unique(PERC$cluster)

Unnamed: 0_level_0,gene,cluster,percent,id
Unnamed: 0_level_1,<fct>,<chr>,<dbl>,<chr>
1,MIR1302-2HG,PGC,0,PGC MIR1302-2HG
28231,MIR1302-2HG,PGC_mitotic,0,PGC_mitotic MIR1302-2HG
56461,MIR1302-2HG,Sertoli,0,Sertoli MIR1302-2HG
84691,MIR1302-2HG,Sertoli_WFDC2,0,Sertoli_WFDC2 MIR1302-2HG
112921,MIR1302-2HG,coelEpi,0,coelEpi MIR1302-2HG
141151,MIR1302-2HG,oocyte,0,oocyte MIR1302-2HG


In [15]:
# Load average expressions
path_Exp = 'cellphoneDB/input/average_log.csv'

# Load
Avg_mat = read.csv(path_Exp, stringsAsFactors = F, row.names = 1) 
colnames(Avg_mat) = gsub('pre.spermatogonia', 'pre-spermatogonia', colnames(Avg_mat))
Avg_mat = Avg_mat[, colnames(Avg_mat) %in% c(germs, soma)]
# scale within lineages
cn = colnames(Avg_mat)
Avg_matG =  apply(Avg_mat[ ,  colnames(Avg_mat) %in%  germs],1,scale) %>% t(.)
colnames(Avg_matG) = intersect(colnames(Avg_mat), germs)
Avg_matS =  apply(Avg_mat[ ,  colnames(Avg_mat) %in%  soma],1,scale) %>% t(.)
colnames(Avg_matS) = intersect(colnames(Avg_mat), soma)
# Avg_matSf =  apply(Avg_mat[ , somaF],1,scale) %>% t(.)
# Avg_matSm =  apply(Avg_mat[ , somaM],1,scale) %>% t(.)
# Avg_mat = cbind(Avg_matG, Avg_matSf, Avg_matSm)
Avg_mat = cbind(Avg_matG, Avg_matS)
# colnames(Avg_mat) = cn

# convert into a data frame for plotting
Avg_df = setNames(melt(as.matrix(Avg_mat), factorsAsStrings = T), c('gene', 'cluster', 'avg'))
# Filetr expressed
Avg_df = Avg_df[ order(Avg_df$gene),  ]

# Build dictionary clusters2expressed genes                       
AVG = Avg_df
AVG$id = paste(AVG$cluster, AVG$gene)                      
head(AVG)

Unnamed: 0_level_0,gene,cluster,avg,id
Unnamed: 0_level_1,<fct>,<fct>,<dbl>,<chr>
1,MIR1302-2HG,PGC,-0.386049,PGC MIR1302-2HG
28231,MIR1302-2HG,PGC_mitotic,-0.5941947,PGC_mitotic MIR1302-2HG
56461,MIR1302-2HG,oocyte,-0.7596473,oocyte MIR1302-2HG
84691,MIR1302-2HG,oogonia_STRA8,0.3858307,oogonia_STRA8 MIR1302-2HG
112921,MIR1302-2HG,oogonia_meiosis,-0.5177983,oogonia_meiosis MIR1302-2HG
141151,MIR1302-2HG,pre-spermatogonia,1.8718586,pre-spermatogonia MIR1302-2HG


# Save results per tissue

In [18]:
# Filter cells in ovary only
cells = unique(c(microenviroments$cortex, microenviroments$medulla))
cells = setdiff(cells, c('sPAX8m', 'sPAX8b'))
comb_of_interest = get_combinations_of_interest(intersect(germs, cells), intersect(soma, cells))
fCPresults = CPresults[ , colnames(CPresults) %in% comb_of_interest  ]
fCPresults = fCPresults[ rowSums(fCPresults) > 0,  ]
fCPresults = fCPresults[ , colSums(fCPresults) > 0 ]
write.csv(fCPresults, file = 'cellphoneDB/out/ovarian_interactions_0.001_logFC0.2.csv')


# Filter cells in testis only
cells = unique(c(microenviroments$testis_cords))
cells = setdiff(cells, c('sPAX8m', 'sPAX8b'))
comb_of_interest = get_combinations_of_interest(intersect(germs, cells), intersect(soma, cells))
fCPresults = CPresults[ , colnames(CPresults) %in% comb_of_interest  ]
fCPresults = fCPresults[ rowSums(fCPresults) > 0,  ]
fCPresults = fCPresults[ , colSums(fCPresults) > 0 ]
write.csv(fCPresults, file = 'cellphoneDB/out/testis_interactions_0.001_logFC0.2.csv')

# PLOTS
### Identify interactions per microenviroment

In [11]:
# Filter interactions within microenviroments
intXmicroenviroment = list()
for (mienv in names(microenviroments)){
    idx = sapply(colnames(CPresults), function(x) all(unlist(strsplit(x, split = '--')) %in% microenviroments[[mienv]] )) 
    intXmicroenviroment[[ mienv ]] = rownames(CPresults)[ apply(CPresults[ , idx], 1, sum ) > 0 ]
}             

### Extract interactions and plot

In [12]:
gens_of_interest = rownames(Avg_mat)

for (mi in names(microenviroments)){
    print(mi)
    outdir = paste0('cellphoneDB/out/', mi)
    dir.create(outdir, showWarnings = F)
    
    cells = microenviroments[[mi]]
    interactions = intXmicroenviroment[[ mi ]]
    
    # Gem-somatic interactions
    comb_of_interest = get_combinations_of_interest(intersect(germs, cells), intersect(soma, cells))
    
    fCPresults = CPresults[ interactions ,]
    fCPresults = fCPresults[ , colnames(CPresults) %in% comb_of_interest  ]
    fCPresults = fCPresults[ rowSums(fCPresults) > 0,  ]
    fCPresults = fCPresults[ , colSums(fCPresults) > 0 ]

    cells2genes = get_cells2genes(fCPresults, genes2filter = gens_of_interest)
    head(cells2genes)
    cells2genes$value = 1
    cells2genes$cell = as.character(cells2genes$cell)
    cells2genes$gene = as.character(cells2genes$gene)
    
    G_plot = subset(cells2genes, cell %in% germs)
    G_plot$cell = factor(G_plot$cell, levels = germs)
    df2plot = G_plot[ , names(G_plot) != 'interaction' ] %>% unique(.)
    ggplot(df2plot,  aes(x = gene,
                        y = cell,
                        color = AveExpr_cluster,
                        size = percentExpr_cluster)) +      
        geom_point() + scale_color_gradient2(low = brewer.pal(n = 5, name = 'Blues')[4], 
                              high = brewer.pal(n = 5, name = 'Reds')[4], 
                              mid = 'grey90')+
        theme_bw() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) 
    #     theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
    wi = round(length(unique(G_plot$gene))/5, 0)+ 1
    ggsave(filename = paste0(outdir,'/GSint_germs_' , pval_cutoff, '_logFC', logFC_cutoff, '.pdf'), dpi = 300, width = wi, height = 3)
    G_plot = subset(G_plot,  cellphoneDB_hit == 1)
    write.csv(G_plot, file = paste0(outdir,'/GSint_germs_' , pval_cutoff, '_logFC', logFC_cutoff, '.csv'))
    
    S_plot = subset(cells2genes, cell %in% soma)
    S_plot$cell = factor(S_plot$cell, levels = soma)
    df2plot = S_plot[ , names(S_plot) != 'interaction' ] %>% unique(.)
    ggplot(df2plot,  aes(x = gene,
                        y = cell,
                        color = AveExpr_cluster,
                        size = percentExpr_cluster)) +      
        geom_point() + scale_color_gradient2(low = brewer.pal(n = 5, name = 'Blues')[4], 
                              high = brewer.pal(n = 5, name = 'Reds')[4], 
                              mid = 'grey90')+
        theme_bw() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) 
    #     theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

    wi = round(length(unique(S_plot$gene))/5, 0)+ 1
    ggsave(filename = paste0(outdir,'/GSint_soma_' , pval_cutoff, '_logFC', logFC_cutoff, '.pdf'), dpi = 300, width = wi, height = 3)
    S_plot = subset(S_plot,  cellphoneDB_hit == 1)
    write.csv(S_plot, file = paste0(outdir,'/GSint_soma_' , pval_cutoff, '_logFC', logFC_cutoff, '.csv'))
    
    
    # somatic-somatic interactions
    comb_of_interest = get_combinations_of_interest(intersect(soma, cells), intersect(soma, cells))
    
    fCPresults = CPresults[ interactions ,]
    fCPresults = fCPresults[, colnames(CPresults) %in% comb_of_interest  ]
    fCPresults = fCPresults[ rowSums(fCPresults) > 0,  ]
    fCPresults = fCPresults[ , colSums(fCPresults) > 0 ]

    cells2genes = get_cells2genes(fCPresults, genes2filter = gens_of_interest)
    head(cells2genes)
    cells2genes$value = 1
    cells2genes$cell = as.character(cells2genes$cell)
    cells2genes$gene = as.character(cells2genes$gene)
    
    S_plot = subset(cells2genes, cell %in% intersect(microenviroments[[mi]], soma))
    S_plot$cell = factor(S_plot$cell, levels = soma)
    df2plot = S_plot[ , names(S_plot) != 'interaction' ] %>% unique(.)
    ggplot(df2plot,  aes(x = gene,
                        y = cell,
                        color = AveExpr_cluster,
                        size = percentExpr_cluster)) +      
        geom_point() + scale_color_gradient2(low = brewer.pal(n = 5, name = 'Blues')[4], 
                              high = brewer.pal(n = 5, name = 'Reds')[4], 
                              mid = 'grey90')+
        theme_bw() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) 
    #     theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
    wi = round(length(unique(S_plot$gene))/5, 0) + 1
    ggsave(filename = paste0(outdir,'/SSint_soma_' , pval_cutoff, '_logFC', logFC_cutoff, '.pdf'), dpi = 300, width = wi, height = 4)
    
    S_plot = subset(S_plot,  cellphoneDB_hit == 1)
    write.csv(S_plot, file = paste0(outdir,'/SSint_soma_' , pval_cutoff, '_logFC', logFC_cutoff, '.csv'))
    
     
}

[1] "cortex"


71 interactions

150 interactions



[1] "medulla"


171 interactions

153 interactions



[1] "testis_cords"


155 interactions

179 interactions

