#  Plots and tables from CellphoneDB

In [1]:
library(dplyr)
library(reshape2)
library(plyr)
library(RColorBrewer)
library(ggplot2)
library(pheatmap)
library(cowplot)

get_cells2genes = function(CPresults, genes2filter = NULL){
    if( ! is.null(genes2filter) ){
        CPresults = CPresults[ sapply(rownames(CPresults), function(x) any(unlist(Int2Gene[[x]]) %in%  genes2filter) ), ]
        CPresults = CPresults[ , apply(CPresults, 2, sum) > 0]
    }
    # reformat as dataframe                                    
    df = melt(as.matrix(CPresults))
    df = subset(df, value == 1)
    # extract interaction partners
    xA = apply(df, 1, function(x){
        data.frame(cell = strsplit(x[2], '--')[[1]][1],
            gene = Int2Gene[[x[1]]]$partner_a)
    })
    xB = apply(df, 1, function(x){
        data.frame(cell = strsplit(x[2], '--')[[1]][2],
            gene = Int2Gene[[x[1]]]$partner_b)
    })
    x = append(xA, xB)
    # Merge as data frame for plotting with ggplot
    cells2genesCP = melt(x, id.vars = names(x[[1]]))
    cells2genesCP = unique(cells2genesCP[,-3])
    # transfer expression info                                  
    cells2genesCP$id = paste(cells2genesCP$cell, cells2genesCP$gene)
    cells2genesCP$AveExpr_cluster =  PERC$AveExpr_cluster[ match(cells2genesCP$id, PERC$id) ]
    cells2genesCP$percentExpr_cluster =  PERC$percentExpr_cluster[ match(cells2genesCP$id, PERC$id) ]

    return(cells2genesCP)
}


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


------------------------------------------------------------------------------

You have loaded plyr after dplyr - this is likely to cause problems.
If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
library(plyr); library(dplyr)

------------------------------------------------------------------------------


Attaching package: ‘plyr’


The following objects are masked from ‘package:dplyr’:

    arrange, count, desc, failwith, id, mutate, rename, summarise,
    summarize



********************************************************

Note: As of version 1.0.0, cowplot does not change the

  default ggplot2 theme anymore. To recover the previous

  behavior, execute:
  theme_set(theme_cowplot())

********************************************************




# Load Cellphone Genes

In [2]:
# Define variables
filter_int_user_curated = T #Use only user_curated interactions?
per_cutoff = 0.1 # min % of cells in the cluster required  with expression > 0 for the gene
pval_cutoff = 0.001 # max adjusted p-value requeired to consider a gene as DEG
logFC_cutoff = 0 # min logFC to consider a gene as DEG

In [None]:
# Load cellphone database - genes and complexes file
# Gene names
genes_cpDB = read.delim('~/cellphoneDB/hsa_uniprot.txt', stringsAsFactors = F, sep = '\t')

# Complexes members
com_cpDB = read.csv('~/farm/CellPhoneDB-data_smallmolecules/data/sources/complex_curated.csv', stringsAsFactors = F)
com_cpDB = unique(com_cpDB)
com_cpDB$complex_name = paste0('complex:', com_cpDB$complex_name)
# Generate complexes2gene symbol dictionary
Com2Gene = lapply(com_cpDB$complex_name, function(cx) subset(genes_cpDB, uniprot %in% unlist(com_cpDB[com_cpDB$complex_name == cx, 2:5]) )$gene_name )
Com2Gene = lapply(Com2Gene, unique)
names(Com2Gene) = com_cpDB$complex_name
                  
# Load interactions from cellphoneDB/out/means.txt output file                   
int_cpDB = read.delim('data/cellphoneDB/out/means.txt', stringsAsFactors = F)[, 1:10]
# MANDATORY: remove "curated" because we have cleaned and renamed them (this is a long story, just do it)
int_cpDB = subset(int_cpDB, annotation_strategy != 'curated')
# OPTIONAL: Use only user_curated interactions?
if(filter_int_user_curated)
    int_cpDB = subset(int_cpDB, annotation_strategy == 'user_curated')
# Generate Int2Gene dictionary
Int2Gene = apply(int_cpDB, 1, function(int) {
    int = unname(int)
    if(int[5] == '') # if partnerA is complex, then retrieve members from dictionary
       partnerA = Com2Gene[[int[3]]]
    else  # if partnerA is a single gene, take name from file
       partnerA = int[5]
    if(int[6] == '')  # if partnerB is complex, then retrieve members from dictionary
       partnerB = Com2Gene[[int[4]]]
    else  # if partnerB is a single gene, take name from file
       partnerB = int[6]
    list(partner_a=partnerA,partner_b=partnerB)
    }) 
names(Int2Gene) = int_cpDB$interacting_pair

## Load cellphone results file

In [None]:
CPresults = read.csv('data/cellphoneDB/out/DEapproach_results_pval0.001_logFC0.csv', stringsAsFactors = F)
rownames(CPresults) = CPresults$X
CPresults = CPresults[, -1]
colnames(CPresults) = gsub('\\...', '--', colnames(CPresults))
colnames(CPresults) = gsub('stromal_eS_S--M', 'stromal_eS_S-G2M', colnames(CPresults))
colnames(CPresults) = gsub('_WIF1', '_CLU', colnames(CPresults))
head(CPresults)

# Load expression information

In [None]:
# Load percentage expression info
# you need to put in a folder a file for each cluster that you included in cellphone
# The file must contain at least three columns (aka, you can reuse the output of the DEG files):
# - Gene: gene symbol
# - cluster: cell type
# - percentExpr_cluster: % of cells in the cluster with expression > 0 for the gene
path_Exp = '/home/jovyan/farm/endometrium/data/cellphoneDB/clusterDEGs_and_TFs/DEGs/'

# Load
Per_files = list.files(path_Exp, pattern = 'DEGs.csv', full.names = T)
Per_df = lapply(Per_files, read.csv, stringsAsFactors = F) 

# Set rownames as genes, for convenience
Per_df = lapply(Per_df, function(x){ 
    rownames(x) = x$Gene
    return(x) })

# Remove genes not in cellphone
Per_df = lapply(Per_df, function(x) subset(x, Gene %in% genes_cpDB$gene_name ))
                
# Build dictionary clusters2expressed genes                       
names(Per_df) = sapply(Per_df, function(x) x$cluster[1])
PERC = melt(Per_df, id.vars = names(Per_df[[1]]))
PERC$cluster = gsub('_WIF1', '_CLU', PERC$cluster)
PERC$id = paste(PERC$cluster, PERC$Gene)                      
head(PERC)

subset(Per_df$epithelial_LGR5, Gene == 'LGR4')

# PLOTS: L/R per phase

## Proliferative:  Receptors

receptors; NOTCH4, NOTCH3, NOTCH2, NOTCH1, KREMEN1, LGR5, LGR4, LRP6, LRP5, FZD8, FZD7, FZD6, FZD5, FZD3, FZD2, FZD10, FZD1




### Load genes of interest

In [None]:
genes2plot = c(read.csv('data/cellphoneDB/genes2plot/dotplot1.txt', stringsAsFactors = F, sep = '\t', header = F)$V1,
               read.csv('data/cellphoneDB/genes2plot/dotplot3.txt', stringsAsFactors = F, sep = '\t', header = F)$V1)
genes2plot

### Generate cells2genes in cellphone for interactions containing genes of interest

In [None]:
cells2genes = get_cells2genes(CPresults, genes2filter = genes2plot)
head(cells2genes)
cells2genes$value = 1
cells2genes$cell = as.character(cells2genes$cell)
cells2genes$gene = as.character(cells2genes$gene)
cells2genes$cell[ cells2genes$cell == 'TRUE' ] = 'Tcell'

In [None]:
# filter genes
df = subset(cells2genes, gene %in% genes2plot)
# ordre NOTCH4, NOTCH3, NOTCH2, NOTCH1, KREMEN1, LGR5, LGR4, LRP6, LRP5, FZD8, FZD7, FZD6, FZD5, FZD3, FZD2, FZD10, FZD1
df$gene = factor(df$gene, levels = c(grep('NOTCH', sort(unique(df$gene)), value = T) ,
                                     grep('KREMEN', sort(unique(df$gene)), value = T),
                                     grep('LGR', sort(unique(df$gene)), value = T),
                                     grep('LRP', sort(unique(df$gene)), value = T),
                                     grep('FZD', sort(unique(df$gene)), value = T)))
# filter cells
df$cell = gsub('_WIF1', '_CLU', df$cell)
cells = c('epithelial_ciliated', 'epithelial_LGR5', 'epithelial_proliferative', 'epithelial_CLU_1', 'epithelial_CLU_2')
df = df[ df$cell %in% cells ,]
# ordre epi_ciliated; epi_LGR5; epi_proliferative; Epi_CLU1;Epi_CLU2; 
df$cell = factor(df$cell, levels = cells)
ggplot(df,  aes(y = gene,
                    x = cell,
                    color = AveExpr_cluster,
                    size = percentExpr_cluster)) +      
    geom_point() + 
    scale_color_gradient(low = 'orange', high = 'blue') +
    theme_bw() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) 
#     theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
ggsave(filename = 'data/cellphoneDB/out/dotplotReceptors_proliferative.pdf', dpi = 300, width = 6, height = 5)

## Proliferative: Ligands

 ligands: JAG2, JAG1, DLL1, NDP, SOSTDC1, WIF1, DKK1, RSPO3, RSPO1, WNT7A, WNT6, WNT5B, WNT5A, WNT4, WNT2B, WNT2, WNT11




### Load genes of interest

In [None]:
genes2plot = c(read.csv('data/cellphoneDB/genes2plot/dotplot2.txt', stringsAsFactors = F, sep = '\t', header = F)$V1,
               read.csv('data/cellphoneDB/genes2plot/dotplot4.txt', stringsAsFactors = F, sep = '\t', header = F)$V1)
genes2plot

### Generate cells2genes in cellphone for interactions containing genes of interest

In [None]:
cells2genes = get_cells2genes(CPresults, genes2filter = genes2plot)
head(cells2genes)
cells2genes$value = 1
cells2genes$cell = as.character(cells2genes$cell)
cells2genes$gene = as.character(cells2genes$gene)
cells2genes$cell[ cells2genes$cell == 'TRUE' ] = 'Tcell'

In [None]:
unique(cells2genes$cell)

In [None]:
# filter genes
df = subset(cells2genes, gene %in% genes2plot)
# ordre JAG2, JAG1, DLL1, NDP, SOSTDC1, WIF1, DKK1, RSPO3, RSPO1, WNT7A, WNT6, WNT5B, WNT5A, WNT4, WNT2B, WNT2, WNT11
df$gene = factor(df$gene, levels = c(grep('JAG', sort(unique(df$gene)), value = T),
                                     grep('DLL', sort(unique(df$gene)), value = T),
                                     'NDP', 'SOSTDC1','WIF1','DKK1',
                                     grep('RSP', sort(unique(df$gene)), value = T),
                                     grep('WNT', sort(unique(df$gene)), value = T)
                                    ))
# filter cells 
df$cell = gsub('_WIF1', '_CLU', df$cell)
cells = c( 'epithelial_CLU_1', 'epithelial_CLU_2', 'fibroblasts',  'stromal_eS_G2M', 'stromal_eS_S-G2M', 'stromal_eS', 
          'epithelial_proliferative', 'epithelial_LGR5', 'epithelial_ciliated')
df = subset(df, cell %in% cells)
# ordre Epi_CLU2 (antes WIF2); Epi_CLU1 (antes WIF1); Fibro_basal; stromal_eS_G2M; stromal_eS_G2M; stromal_eS; 
# epi_proliferative; epi_LGR5; epi_ciliated
df$cell = factor(df$cell, levels = cells )
ggplot(df,  aes(y = gene,
                    x = cell,
                    color = AveExpr_cluster,
                    size = percentExpr_cluster)) +      
    geom_point() + 
    scale_color_gradient(low = 'orange', high = 'blue') +
    theme_bw() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) 
#     theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
ggsave(filename = 'data/cellphoneDB/out/dotplotLigands_proliferative.pdf', dpi = 300, width = 6, height = 5)

## Secretory:  Receptors

receptors; NOTCH4, NOTCH3, NOTCH2, NOTCH1, KREMEN1, LGR5, LGR4, LRP6, LRP5, FZD8, FZD7, FZD6, FZD5, FZD3, FZD2, FZD10, FZD1




### Load genes of interest

In [None]:
genes2plot = c(read.csv('data/cellphoneDB/genes2plot/dotplot1.txt', stringsAsFactors = F, sep = '\t', header = F)$V1,
               read.csv('data/cellphoneDB/genes2plot/dotplot3.txt', stringsAsFactors = F, sep = '\t', header = F)$V1)
genes2plot

### Generate cells2genes in cellphone for interactions containing genes of interest

In [None]:
cells2genes = get_cells2genes(CPresults, genes2filter = genes2plot)
head(cells2genes)
cells2genes$value = 1
cells2genes$cell = as.character(cells2genes$cell)
cells2genes$gene = as.character(cells2genes$gene)
cells2genes$cell[ cells2genes$cell == 'TRUE' ] = 'Tcell'

In [None]:
# filter genes
df = subset(cells2genes, gene %in% genes2plot)
# ordre NOTCH4, NOTCH3, NOTCH2, NOTCH1, KREMEN1, LGR5, LGR4, LRP6, LRP5, FZD8, FZD7, FZD6, FZD5, FZD3, FZD2, FZD10, FZD1
df$gene = factor(df$gene, levels = c(grep('NOTCH', sort(unique(df$gene)), value = T) ,
                                     grep('KREMEN', sort(unique(df$gene)), value = T),
                                     grep('LGR', sort(unique(df$gene)), value = T),
                                     grep('LRP', sort(unique(df$gene)), value = T),
                                     grep('FZD', sort(unique(df$gene)), value = T)))
# filter cells
df$cell = gsub('_WIF1', '_CLU', df$cell)
cells = c('epithelial_ciliated', 'epithelial_LGR5', 'epithelial_glandular', 'epithelial_CLU_1', 'epithelial_CLU_2')
df = df[ df$cell %in% cells ,]
# ordre epi_ciliated; epi_LGR5; epi_proliferative; Epi_CLU1;Epi_CLU2; 
df$cell = factor(df$cell, levels = cells)
ggplot(df,  aes(y = gene,
                    x = cell,
                    color = AveExpr_cluster,
                    size = percentExpr_cluster)) +      
    geom_point() + 
    scale_color_gradient(low = 'orange', high = 'blue') +
    theme_bw() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) 
#     theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
ggsave(filename = 'data/cellphoneDB/out/dotplotReceptors_secretory.pdf', dpi = 300, width = 6, height = 5)

## Secretory: Ligands

 ligands: JAG2, JAG1, DLL1, NDP, SOSTDC1, WIF1, DKK1, RSPO3, RSPO1, WNT7A, WNT6, WNT5B, WNT5A, WNT4, WNT2B, WNT2, WNT11




### Load genes of interest

In [None]:
genes2plot = c(read.csv('data/cellphoneDB/genes2plot/dotplot2.txt', stringsAsFactors = F, sep = '\t', header = F)$V1,
               read.csv('data/cellphoneDB/genes2plot/dotplot4.txt', stringsAsFactors = F, sep = '\t', header = F)$V1)
genes2plot

### Generate cells2genes in cellphone for interactions containing genes of interest

In [None]:
cells2genes = get_cells2genes(CPresults, genes2filter = genes2plot)
head(cells2genes)
cells2genes$value = 1
cells2genes$cell = as.character(cells2genes$cell)
cells2genes$gene = as.character(cells2genes$gene)
cells2genes$cell[ cells2genes$cell == 'TRUE' ] = 'Tcell'

In [None]:
unique(cells2genes$cell)

In [None]:
# filter genes
df = subset(cells2genes, gene %in% genes2plot)
# ordre JAG2, JAG1, DLL1, NDP, SOSTDC1, WIF1, DKK1, RSPO3, RSPO1, WNT7A, WNT6, WNT5B, WNT5A, WNT4, WNT2B, WNT2, WNT11
df$gene = factor(df$gene, levels = c(grep('JAG', sort(unique(df$gene)), value = T),
                                     grep('DLL', sort(unique(df$gene)), value = T),
                                     'NDP', 'SOSTDC1','WIF1','DKK1',
                                     grep('RSP', sort(unique(df$gene)), value = T),
                                     grep('WNT', sort(unique(df$gene)), value = T)
                                    ))
# filter cells 
df$cell = gsub('_WIF1', '_CLU', df$cell)
cells = c('epithelial_CLU_2',  'epithelial_CLU_1', 'fibroblasts',  'stromal_dS', 
          'epithelial_glandular', 'epithelial_LGR5', 'epithelial_ciliated')
df = subset(df, cell %in% cells)
# ordre Epi_CLU2 (antes WIF2); Epi_CLU1 (antes WIF1); Fibro_basal; stromal_dS; epi_glandular; epi_LGR5;  epi_ciliated
df$cell = factor(df$cell, levels = cells )
ggplot(df,  aes(y = gene,
                    x = cell,
                    color = AveExpr_cluster,
                    size = percentExpr_cluster)) +      
    geom_point() + 
    scale_color_gradient(low = 'orange', high = 'blue') +
    theme_bw() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) 
#     theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
ggsave(filename = 'data/cellphoneDB/out/dotplotLigands_secretory.pdf', dpi = 300, width = 6, height = 5)

# Generate tables for microenviroments

Lumen:
 - ciliated & LGR5
 - LGR5 & LGR5
 - ciliated & ciliated

Functional proliferative:
 - LGR5 & fibro_eS_G2M
 - LGR5 & fibro_eS_S_G2M
 - LGR5 & fibro_eS
 - LGR5 & proliferative
 - LGR5 & LGR5
 - cproliferative & fibro_eS_G2M
 - cproliferative & fibro_eS_S_G2M
 - cproliferative & fibro_eS
 - cproliferative & proliferative
 - cproliferative & LGR5

Functional secretory:
 - epi secretory & stromal dS
 - epi secretory & epi secretory

Basal:
 - CLU2-CLU1
 - CLU2-CLU2
 - CLU1-CLU1
 - CLU1-fibro_basal
 - CLU2-fibro_basal


In [None]:
colnames(CPresults) = gsub('_WIF1', '_CLU', colnames(CPresults))
cells = strsplit(colnames(CPresults), split = '--') %>% unlist(.) %>% unique(.) %>% sort(.)
cells

In [None]:
lumen = c('epithelial_ciliated', 'epithelial_LGR5')
functionalP = c('epithelial_LGR5', 'stromal_eS', 'stromal_eS_G2M', 'stromal_eS_S-G2M', 'epithelial_proliferative')
functionalS = c('epithelial_glandular', 'stromal_dS')
basal = c('fibroblasts', 'epithelial_CLU_1', 'epithelial_CLU_2')

In [None]:
filterCellPairs = function(cells){
    idx = sapply(strsplit(colnames(CPresults), split = '--'), function(x) all(unlist(x) %in% cells) ) %>% which(.)
    mat = CPresults[, idx]            
    mat = mat[, colSums(mat) > 0 ]    
    mat = mat[rowSums(mat) > 0 , ]
    return(mat)
}
lumen_mat = filterCellPairs(lumen)  
fP_mat = filterCellPairs(functionalP)   
fS_mat = filterCellPairs(functionalS)   
basal_mat = filterCellPairs(basal)         

dim(lumen_mat)
dim(fP_mat)
dim(fS_mat)
dim(basal_mat)
                 
write.csv(lumen_mat, file = 'data/cellphoneDB/out/DEapproach_results_pval0.001_logFC0_lumen.csv', row.names=T )
write.csv(fP_mat, file = 'data/cellphoneDB/out/DEapproach_results_pval0.001_logFC0_functionalP.csv', row.names=T )
write.csv(fS_mat, file = 'data/cellphoneDB/out/DEapproach_results_pval0.001_logFC0_functionalS.csv', row.names=T )
write.csv(basal_mat, file = 'data/cellphoneDB/out/DEapproach_results_pval0.001_logFC0_basal.csv', row.names=T )

# Plot receptros for these ligands

In [None]:
genes = c('RSPO3', 'WNT4', 'FN1', 'IGF1')

In [None]:
idx = sapply( Int2Gene[ rownames(CPresults) ], function(x) any(unlist(x) %in% genes ) ) %>% which(.)

In [None]:
df = subset(int_cpDB, interacting_pair %in% names(Int2Gene[ rownames(CPresults) ])[idx])
write.csv(df, file = 'data/cellphoneDB/out/complexes_WNT4_IGF1_FN1_RSPO3.csv', rownames = F)

In [None]:
mat = CPresults[idx , ]
mat = mat[, colSums(mat) > 0 ]    
mat = mat[rowSums(mat) > 0 , ]
mat = mat[, colSums(mat) > 0 ]    
dim(mat)

In [None]:
intOfInterest = lapply(genes, function(g)  
    Int2Gene[ rownames(mat) ][ sapply( Int2Gene[ rownames(mat) ], function(x) any(unlist(x) %in% g ) )]  )
names(intOfInterest) = genes

In [None]:
genes2plot = lapply(intOfInterest, function(x) sort(setdiff(unlist(x), genes)))
genes2plot

In [None]:
genes2plot = unlist(genes2plot)

### Generate cells2genes in cellphone for interactions containing genes of interest

In [None]:
cells2genes = get_cells2genes(CPresults, genes2filter = genes2plot)
head(cells2genes)
cells2genes$value = 1
cells2genes$cell = as.character(cells2genes$cell)
cells2genes$gene = as.character(cells2genes$gene)
cells2genes$cell[ cells2genes$cell == 'TRUE' ] = 'Tcell'

In [None]:
unique(cells2genes$cell)

In [None]:
# filter genes
df = subset(cells2genes, gene %in% genes2plot)
# ordre JAG2, JAG1, DLL1, NDP, SOSTDC1, WIF1, DKK1, RSPO3, RSPO1, WNT7A, WNT6, WNT5B, WNT5A, WNT4, WNT2B, WNT2, WNT11
df$gene = factor(df$gene, levels = genes2plot)
# filter cells 
df$cell = gsub('_WIF1', '_CLU', df$cell)
cells = c('epithelial_ciliated', 'epithelial_LGR5',  'epithelial_proliferative',
          'epithelial_glandular',  'epithelial_CLU_2',  'epithelial_CLU_1', 'epithelial_LYPD1')
df = subset(df, cell %in% cells)
# ordre Epi_CLU2 (antes WIF2); Epi_CLU1 (antes WIF1); Fibro_basal; stromal_dS; epi_glandular; epi_LGR5;  epi_ciliated
df$cell = factor(df$cell, levels = cells )
ggplot(df,  aes(y = gene,
                    x = cell,
                    color = AveExpr_cluster,
                    size = percentExpr_cluster)) +      
    geom_point() + 
    scale_color_gradient(low = 'orange', high = 'blue') +
    theme_bw() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) 
#     theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
ggsave(filename = 'data/cellphoneDB/out/dotplot_ligandsOf_WNT4_IGF1_FN1_RSPO3.pdf', dpi = 300, width = 6, height = 7)