In [1]:
library(dplyr)
library(RColorBrewer)
library(ggplot2)
library(pheatmap)
library(cowplot)
library(reshape2)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



********************************************************

Note: As of version 1.0.0, cowplot does not change the

  default ggplot2 theme anymore. To recover the previous

  behavior, execute:
  theme_set(theme_cowplot())

********************************************************




# Load spatial

In [9]:
f = list.files('data/spatial/DEGs/', full.names = T, pattern = 'DEGs')
spatial = lapply(f, read.csv,  stringsAsFactors = F)
spatial = lapply(spatial, function(x){
    rownames(x) = x$Gene
    x
})
names(spatial) = sapply(strsplit(f, '/epi_'), tail, 1) %>%  gsub('_vs_epi_rest_limma_DEGs_20200423.csv', '', .)
names(spatial)
head(spatial$basal)

Unnamed: 0_level_0,Gene,logFC,P.Value,adj.P.Val,AveExpr_cluster,AveExpr_rest,percentExpr_cluster,percentExpr_rest
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
CLU,CLU,0.860545,3.602928e-29,6.869703e-25,2.596186,1.735641,1.0,0.989474
COL3A1,COL3A1,-0.5792378,5.130887e-27,4.8915310000000005e-23,2.830775,3.410013,0.97479,1.0
SLC40A1,SLC40A1,0.6735332,1.014873e-23,6.450195e-20,1.885815,1.212282,0.983193,0.947368
COL1A2,COL1A2,-0.5243485,2.0454459999999998e-20,9.750129000000001e-17,1.772438,2.296787,0.957983,1.0
TMSB10,TMSB10,-0.3098104,2.4600149999999997e-19,8.490781e-16,3.131659,3.441469,1.0,1.0
SLC16A3,SLC16A3,-0.511239,2.6718769999999996e-19,8.490781e-16,0.650318,1.161557,0.663866,0.940351


## Load all datasets

In [11]:
f = list.files('data/cellphoneDB/clusterDEGs_and_TFs/DEGs_withinEpithelial/', full.names = T, pattern = 'DEGs.csv')
all = lapply(f, read.csv,  stringsAsFactors = F)
all = lapply(all, function(x){
    rownames(x) = x$Gene
    x
})
names(all) = sapply(strsplit(f, '/epithelial_'), tail, 1)
names(all) = gsub('_DEGs.csv', '', names(all))
names(all)

### All cells

In [32]:
create_contingency_table = function(cl, GenSet){
  cl_GenSet = intersect(GenSet, cl) %>% length(.)
  nocl_GenSet =  setdiff(GenSet, cl) %>% length(.)
  outGenSet = setdiff(background, GenSet) 
  cl_outGenSet = intersect(outGenSet, cl) %>% length(.)
  nocl_outGenSet = setdiff(outGenSet, cl) %>% length(.)
  m = matrix(c(cl_GenSet, cl_outGenSet,
               nocl_GenSet, nocl_outGenSet), 
             ncol = 2, dimnames = list(c('in_cluster', 'out_cluster'), c('in_GenSet', 'out_GenSet')) )
  return(m)  
}


enrichment = function(cl, GenSet){
  funtable = create_contingency_table(cl, GenSet)
  out = NULL
  if ( funtable[1,1] > 1 ){
    ft = fisher.test(funtable)
    pvalue = ft$p.value
    estimate = ft$estimate
    conf.int = ft$conf.int
    out = data.frame(pvalue= pvalue, odds.ratio = estimate, min_confint = conf.int[1], max_confint = conf.int[2],  stringsAsFactors = F)
  return(out)
  }
}


allSPATIAL = lapply(spatial, function(x) subset(x, adj.P.Val < 0.01 & logFC > 0)$Gene ) %>% unlist(.) %>% unique(.)
allscRNA = lapply(all, function(x) subset(x, adj.P.Val < 0.01 & logFC > 0)$Gene ) %>% unlist(.) %>% unique(.)
background = allscRNA # intersect(allSPATIAL, allscRNA)

In [33]:
SPATIAL = lapply(spatial, function(x) subset(x, adj.P.Val < 0.01 & logFC > 0)$Gene )
names(SPATIAL) = names(spatial)
            
results = list()
for( cl in names(all) ){
    message(cl)
    cl_df = subset(all[[cl]], adj.P.Val < 0.01 & logFC > 0)
    cl_genes = cl_df$Gene
    ENRCH = lapply(SPATIAL, enrichment, cl=cl_genes)
    fENRCH = ENRCH[ ! sapply(ENRCH, is.null) ] 
    df_ENRCH = melt(fENRCH, id.vars = colnames(fENRCH[[1]]) )
    names(df_ENRCH)[5] = 'spatial'
    df_ENRCH$cl = cl
    if( nrow(df_ENRCH) > 0 )
        results[[cl]] = df_ENRCH
}
df = melt(results, id.vars = names(results[[1]]))
df = df[ order(df$pvalue) , -ncol(df) ]
df$assos = 'none'
df$assos[ df$odds.ratio > 1 & df$pvalue < 0.01 ] = '+'  
df$assos[ df$odds.ratio < 1 & df$pvalue < 0.01 ] = '-'    
df
write.csv(df, file = 'data/spatial/DEGs/spatial2scRNA_associations.csv', row.names = F, quote = F)

ciliated

glandular

LGR5

LYPD1

proliferative

WIF1_1

WIF1_2



Unnamed: 0_level_0,pvalue,odds.ratio,min_confint,max_confint,spatial,cl,assos
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>
5,1.267154e-156,11.6989532,9.8745058,13.8628506,glandular,glandular,+
19,9.958905e-32,13.1848779,8.4504463,20.8902531,basal,WIF1_2,+
9,1.1290779999999999e-26,5.0162097,3.8071617,6.5682576,luminal,LGR5,+
16,6.204226e-18,6.5578134,4.2543172,10.1645754,basal,WIF1_1,+
2,9.006152e-13,0.4905507,0.3949566,0.6048777,glandular,ciliated,-
10,6.323497e-11,4.479976,2.8721256,6.9254839,basal,LYPD1,+
6,1.021605e-10,2.9925219,2.1810393,4.0503318,luminal,glandular,+
4,5.031085e-10,4.8628431,3.0108819,7.6854948,basal,glandular,+
3,4.082144e-07,1.9176187,1.486419,2.4700905,luminal,ciliated,+
12,4.345847e-06,2.0385234,1.5055608,2.7284387,luminal,LYPD1,+
