# Merge DEGs files
Compare CTRL and ENDO cells using the genotype as a covariate

In [1]:
library(ggplot2)
library(dplyr)
library(reshape2)
library(tidyr)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



Attaching package: ‘tidyr’


The following object is masked from ‘package:reshape2’:

    smiths




# Load DEG results

In [2]:
lineage = 'mesenchymal'
columns2keep = c('id', 'Gene', 'celltype', 'test', 'logFC', 'adj.P.Val', 'meaning')

### Limma

In [3]:
path_to_DEGs = '/nfs/team292/lg18/endometriosis/DEGs_endometriosis/limma/'
file = 'corrected.tsv' # nuclei_mesenchymal_uncorrected.tsv
ufile = 'uncorrected.tsv' # nuclei_mesenchymal_uncorrected.tsv

files = list(
#              paste0(path_to_DEGs, 'cells_', lineage, '_' , file),
             paste0(path_to_DEGs, 'cells_', lineage, '_' , ufile),
#              paste0(path_to_DEGs, 'nuclei_', lineage, '_' , file),
             paste0(path_to_DEGs, 'nuclei_', lineage, '_' , ufile)
)

ldf_limma = lapply(files, read.delim, stringsAsFactors = F)
names(ldf_limma) = gsub('.tsv', '', files)
names(ldf_limma) = gsub(lineage, '', names(ldf_limma))
names(ldf_limma) = sapply(names(ldf_limma), function(x) tail(strsplit(x, split = '/')[[1]], n=1) )
for (i in names(ldf_limma)){
    ldf_limma[[i]]$meaning = ifelse(ldf_limma[[i]]$logFC > 0, 'up_CONTROL', 'up_ENDOMETRIOSIS')
    ldf_limma[[i]]$meaning[ ldf_limma[[i]]$adj.P.Val > 0.05 ] = 'not_significant'
    ldf_limma[[i]]$id = paste(ldf_limma[[i]]$celltype, ldf_limma[[i]]$Gene, sep = '_')
    ldf_limma[[i]]$test = paste0(i, '_limma')
    ldf_limma[[i]] = ldf_limma[[i]][, columns2keep ]
    rownames(ldf_limma[[i]]) = ldf_limma[[i]]$id
}
                    

### edgeR

In [4]:
path_to_DEGs = '/nfs/team292/lg18/endometriosis/DEGs_endometriosis/edgeR/'
files = list.files(path_to_DEGs, recursive = T, full.names = T)
files = grep(lineage, files, value = T)
files = grep('whole_uterus', files, value = T, invert = T)
ldf = lapply(files, read.delim, stringsAsFactors = F, sep=',')
names(ldf) = gsub('.csv', '', files)

In [5]:
# ldf_edgeR[[test_type]]$id [ duplicated(ldf_edgeR[[test_type]]$id) ]

In [6]:
ldf_edgeR = list()

test_type = 'cells_uncorrected_edgeR'
cells = grep('DEGs_cells', grep('/group/', names(ldf), value = T), value = T)
ldf_edgeR[[test_type]] = melt(ldf[cells], id.vars = names(ldf[[1]]))
ldf_edgeR[[test_type]]$Gene = ldf_edgeR[[test_type]]$X
ldf_edgeR[[test_type]]$celltype = ldf_edgeR[[test_type]]$cell_type
ldf_edgeR[[test_type]]$adj.P.Val = ldf_edgeR[[test_type]]$FDR
ldf_edgeR[[test_type]]$logFC  = ldf_edgeR[[test_type]]$logFC * (-1)
ldf_edgeR[[test_type]]$meaning = ifelse(ldf_edgeR[[test_type]]$logFC > 0, 'up_CONTROL', 'up_ENDOMETRIOSIS')
ldf_edgeR[[test_type]]$meaning[ ldf_edgeR[[test_type]]$FDR > 0.05 ] = 'not_significant'
ldf_edgeR[[test_type]]$id = paste( ldf_edgeR[[test_type]]$cell_type,  ldf_edgeR[[test_type]]$Gene, sep = '_')
ldf_edgeR[[test_type]]$test = test_type
ldf_edgeR[[test_type]] = ldf_edgeR[[test_type]][, columns2keep ]
rownames(ldf_edgeR[[test_type]]) = ldf_edgeR[[test_type]]$id


# test_type = 'cells_corrected_edgeR'
# cells = grep('DEGs_cells', grep('/group_dataset/', names(ldf), value = T), value = T)
# ldf_edgeR[[test_type]] = melt(ldf[cells], id.vars = names(ldf[[1]]))
# ldf_edgeR[[test_type]]$Gene = ldf_edgeR[[test_type]]$X
# ldf_edgeR[[test_type]]$celltype = ldf_edgeR[[test_type]]$cell_type
# ldf_edgeR[[test_type]]$adj.P.Val = ldf_edgeR[[test_type]]$FDR
# ldf_edgeR[[test_type]]$logFC  = ldf_edgeR[[test_type]]$logFC * (-1)
# ldf_edgeR[[test_type]]$meaning = ifelse(ldf_edgeR[[test_type]]$logFC > 0, 'up_CONTROL', 'up_ENDOMETRIOSIS')
# ldf_edgeR[[test_type]]$meaning[ ldf_edgeR[[test_type]]$FDR > 0.05 ] = 'not_significant'
# ldf_edgeR[[test_type]]$id = paste( ldf_edgeR[[test_type]]$cell_type,  ldf_edgeR[[test_type]]$Gene, sep = '_')
# ldf_edgeR[[test_type]]$test = test_type
# ldf_edgeR[[test_type]] = ldf_edgeR[[test_type]][, columns2keep ]
# rownames(ldf_edgeR[[test_type]]) = ldf_edgeR[[test_type]]$id



test_type = 'nuclei_uncorrected_edgeR'
cells = grep('DEGs_nuclei', grep('/group/', names(ldf), value = T), value = T)
ldf_edgeR[[test_type]] = melt(ldf[cells], id.vars = names(ldf[[1]]))
ldf_edgeR[[test_type]]$Gene = ldf_edgeR[[test_type]]$X
ldf_edgeR[[test_type]]$celltype = ldf_edgeR[[test_type]]$cell_type
ldf_edgeR[[test_type]]$adj.P.Val = ldf_edgeR[[test_type]]$FDR
ldf_edgeR[[test_type]]$logFC  = ldf_edgeR[[test_type]]$logFC * (-1)
ldf_edgeR[[test_type]]$meaning = ifelse(ldf_edgeR[[test_type]]$logFC > 0, 'up_CONTROL', 'up_ENDOMETRIOSIS')
ldf_edgeR[[test_type]]$meaning[ ldf_edgeR[[test_type]]$FDR > 0.05 ] = 'not_significant'
ldf_edgeR[[test_type]]$id = paste( ldf_edgeR[[test_type]]$cell_type,  ldf_edgeR[[test_type]]$Gene, sep = '_')
ldf_edgeR[[test_type]]$test = test_type
ldf_edgeR[[test_type]] = ldf_edgeR[[test_type]][, columns2keep ]
rownames(ldf_edgeR[[test_type]]) = ldf_edgeR[[test_type]]$id


names(ldf_edgeR)

# Merge into one table

In [7]:
merged_table = data.frame(id = ldf_edgeR$cells_uncorrected$id, 
                          Gene = ldf_edgeR$cells_uncorrected$Gene,
                          celltype=ldf_edgeR$cells_uncorrected$celltype, 
                          stringsAsFactors = F)
merged_table = merged_table[ order(merged_table$Gene), ]
merged_table = merged_table[ grep('PV', merged_table$celltype, invert = T) , ] # We are not interested on these cells
# merged_table = merged_table[ grep('cells', merged_table$celltype, invert = T) , ] # We are not interested on these cells
rownames(merged_table) = merged_table$id
combinations = merged_table$id

In [8]:
# cells
merged_table$logFC_cells_uncorrected_limma = ldf_limma$cells__uncorrected[ combinations , ]$logFC
merged_table$adj.P.Val_cells_uncorrected_limma = ldf_limma$cells__uncorrected[ combinations , ]$adj.P.Val
merged_table$meaning_cells_uncorrected_limma = ldf_limma$cells__uncorrected[ combinations , ]$meaning


merged_table$logFC_cells_uncorrected_edgeR = ldf_edgeR$cells_uncorrected[ combinations , ]$logFC
merged_table$adj.P.Val_cells_uncorrected_edgeR = ldf_edgeR$cells_uncorrected[ combinations , ]$adj.P.Val
merged_table$meaning_cells_uncorrected_edgeR = ldf_edgeR$cells_uncorrected[ combinations , ]$meaning


# merged_table$logFC_cells_corrected_limma = ldf_limma$cells__corrected[ combinations , ]$logFC
# merged_table$adj.P.Val_cells_corrected_limma = ldf_limma$cells__corrected[ combinations , ]$adj.P.Val
# merged_table$meaning_cells_corrected_limma = ldf_limma$cells__corrected[ combinations , ]$meaning


# merged_table$logFC_cells_corrected_edgeR = ldf_edgeR$cells_corrected[ combinations , ]$logFC
# merged_table$adj.P.Val_cells_corrected_edgeR = ldf_edgeR$cells_corrected[ combinations , ]$adj.P.Val
# merged_table$meaning_cells_corrected_edgeR = ldf_edgeR$cells_corrected[ combinations , ]$meaning

In [9]:
# nuclei
merged_table$logFC_nuclei_uncorrected_limma = ldf_limma$nuclei__uncorrected[ combinations , ]$logFC
merged_table$adj.P.Val_nuclei_uncorrected_limma = ldf_limma$nuclei__uncorrected[ combinations , ]$adj.P.Val
merged_table$meaning_nuclei_uncorrected_limma = ldf_limma$nuclei__uncorrected[ combinations , ]$meaning


merged_table$logFC_nuclei_uncorrected_edgeR = ldf_edgeR$nuclei_uncorrected[ combinations , ]$logFC
merged_table$adj.P.Val_nuclei_uncorrected_edgeR = ldf_edgeR$nuclei_uncorrected[ combinations , ]$adj.P.Val
merged_table$meaning_nuclei_uncorrected_edgeR = ldf_edgeR$nuclei_uncorrected[ combinations , ]$meaning


# merged_table$logFC_nuclei_corrected_limma = ldf_limma$nuclei__corrected[ combinations , ]$logFC
# merged_table$adj.P.Val_nuclei_corrected_limma = ldf_limma$nuclei__corrected[ combinations , ]$adj.P.Val
# merged_table$meaning_nuclei_corrected_limma = ldf_limma$nuclei__corrected[ combinations , ]$meaning


# merged_table$logFC_nuclei_corrected_edgeR = ldf_edgeR$nuclei_corrected[ combinations , ]$logFC
# merged_table$adj.P.Val_nuclei_corrected_edgeR = ldf_edgeR$nuclei_corrected[ combinations , ]$adj.P.Val
# merged_table$meaning_nuclei_corrected_edgeR = ldf_edgeR$nuclei_corrected[ combinations , ]$meaning

In [10]:
# count significants
x = apply(merged_table[ , grep('meaning', colnames(merged_table))], 1, table)
xx = melt(x)
xx = dcast(xx,  L1 ~ Var1, fill = 0)
rownames(xx) = xx$L1
head(xx)

Unnamed: 0_level_0,L1,not_significant,up_ENDOMETRIOSIS,up_CONTROL
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>
dStromal_early_A1BG,dStromal_early_A1BG,1,0,1
dStromal_early_A2M,dStromal_early_A2M,1,1,2
dStromal_early_AAAS,dStromal_early_AAAS,2,0,0
dStromal_early_AACS,dStromal_early_AACS,3,0,0
dStromal_early_AAK1,dStromal_early_AAK1,4,0,0
dStromal_early_AAMDC,dStromal_early_AAMDC,2,0,1


In [11]:
merged_table$up_CONTROL =  xx[combinations, ]$up_CONTROL
merged_table$up_ENDOMETRIOSIS =  xx[combinations, ]$up_ENDOMETRIOSIS
merged_table$not_significant =  xx[combinations, ]$not_significant

# Save

In [12]:
head(merged_table)
merged_table[ 'dStromal_mid_IGF2', ]
merged_table[ 'dStromal_mid_DKK1', ]
merged_table[ 'Glandular_ESR1', ]
write.table(merged_table, file = paste0('/nfs/team292/lg18/endometriosis/DEGs_endometriosis/', lineage, '_merged_table.tsv'), sep = '\t', row.names = F, quote = F )

Unnamed: 0_level_0,id,Gene,celltype,logFC_cells_uncorrected_limma,adj.P.Val_cells_uncorrected_limma,meaning_cells_uncorrected_limma,logFC_cells_uncorrected_edgeR,adj.P.Val_cells_uncorrected_edgeR,meaning_cells_uncorrected_edgeR,logFC_nuclei_uncorrected_limma,adj.P.Val_nuclei_uncorrected_limma,meaning_nuclei_uncorrected_limma,logFC_nuclei_uncorrected_edgeR,adj.P.Val_nuclei_uncorrected_edgeR,meaning_nuclei_uncorrected_edgeR,up_CONTROL,up_ENDOMETRIOSIS,not_significant
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>
eStromal_A1BG,eStromal_A1BG,A1BG,eStromal,-0.27929252,0.00211356,up_ENDOMETRIOSIS,-0.31063568,0.0794235168,not_significant,-0.14701589,0.22839121,not_significant,,,,0,1,2
dStromal_early_A1BG,dStromal_early_A1BG,A1BG,dStromal_early,0.26874047,0.013900692,up_CONTROL,0.2306317,0.1411019311,not_significant,,,,,,,1,0,1
dStromal_late_A1BG,dStromal_late_A1BG,A1BG,dStromal_late,0.12872314,0.546254068,not_significant,0.06130456,0.854043532,not_significant,-0.05585452,0.841292956,not_significant,,,,0,0,3
dStromal_mid_A1BG,dStromal_mid_A1BG,A1BG,dStromal_mid,0.06937395,0.48844935,not_significant,0.13657248,0.3751666448,not_significant,,,,,,,0,0,2
eStromal_A2M,eStromal_A2M,A2M,eStromal,-0.30608757,0.022590903,up_ENDOMETRIOSIS,-0.48217561,0.0648017351,not_significant,-0.29556752,0.046899419,up_ENDOMETRIOSIS,,,,0,2,1
dStromal_early_A2M,dStromal_early_A2M,A2M,dStromal_early,0.73119506,0.003045924,up_CONTROL,1.6274458,0.0002546488,up_CONTROL,-0.59731257,0.001480451,up_ENDOMETRIOSIS,-1.295136,0.2228998,not_significant,2,1,1


Unnamed: 0_level_0,id,Gene,celltype,logFC_cells_uncorrected_limma,adj.P.Val_cells_uncorrected_limma,meaning_cells_uncorrected_limma,logFC_cells_uncorrected_edgeR,adj.P.Val_cells_uncorrected_edgeR,meaning_cells_uncorrected_edgeR,logFC_nuclei_uncorrected_limma,adj.P.Val_nuclei_uncorrected_limma,meaning_nuclei_uncorrected_limma,logFC_nuclei_uncorrected_edgeR,adj.P.Val_nuclei_uncorrected_edgeR,meaning_nuclei_uncorrected_edgeR,up_CONTROL,up_ENDOMETRIOSIS,not_significant
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>
dStromal_mid_IGF2,dStromal_mid_IGF2,IGF2,dStromal_mid,2.327181,1.464995e-10,up_CONTROL,2.656567,6.045183e-06,up_CONTROL,-0.01239687,0.9758602,not_significant,,,,2,0,1


Unnamed: 0_level_0,id,Gene,celltype,logFC_cells_uncorrected_limma,adj.P.Val_cells_uncorrected_limma,meaning_cells_uncorrected_limma,logFC_cells_uncorrected_edgeR,adj.P.Val_cells_uncorrected_edgeR,meaning_cells_uncorrected_edgeR,logFC_nuclei_uncorrected_limma,adj.P.Val_nuclei_uncorrected_limma,meaning_nuclei_uncorrected_limma,logFC_nuclei_uncorrected_edgeR,adj.P.Val_nuclei_uncorrected_edgeR,meaning_nuclei_uncorrected_edgeR,up_CONTROL,up_ENDOMETRIOSIS,not_significant
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>
dStromal_mid_DKK1,dStromal_mid_DKK1,DKK1,dStromal_mid,0.8701829,0.0005948595,up_CONTROL,0.4342849,0.3990446,not_significant,,,,-0.3939856,0.6129408,not_significant,1,0,2


Unnamed: 0_level_0,id,Gene,celltype,logFC_cells_uncorrected_limma,adj.P.Val_cells_uncorrected_limma,meaning_cells_uncorrected_limma,logFC_cells_uncorrected_edgeR,adj.P.Val_cells_uncorrected_edgeR,meaning_cells_uncorrected_edgeR,logFC_nuclei_uncorrected_limma,adj.P.Val_nuclei_uncorrected_limma,meaning_nuclei_uncorrected_limma,logFC_nuclei_uncorrected_edgeR,adj.P.Val_nuclei_uncorrected_edgeR,meaning_nuclei_uncorrected_edgeR,up_CONTROL,up_ENDOMETRIOSIS,not_significant
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>
,,,,,,,,,,,,,,,,,,


In [13]:
table(merged_table[, c('meaning_cells_uncorrected_limma', 'meaning_cells_uncorrected_edgeR') ])

                               meaning_cells_uncorrected_edgeR
meaning_cells_uncorrected_limma not_significant up_CONTROL up_ENDOMETRIOSIS
               not_significant            17613        112              203
               up_CONTROL                  1948       1011                0
               up_ENDOMETRIOSIS            2573          0              876

# Add GWAS genes

In [16]:
gwas_genes1 = read.delim('/nfs/team292/lg18/endometriosis/fGWAs/Rahmioglu2023_endometriosis/loci_genes.tsv', stringsAsFactors = F)
gwas_genes2 = read.delim('/nfs/team292/lg18/endometriosis/fGWAs/Rahmioglu2023_endometriosis/credible_sets_genes.tsv', stringsAsFactors = F)
gwas_genes3 = read.delim('/nfs/team292/lg18/endometriosis/fGWAs/Rahmioglu2023_endometriosis/metaregression_genes.tsv', stringsAsFactors = F)

gwas_genes = c(gwas_genes1[,1],
              gwas_genes2[,1],
              gwas_genes3[,1])

In [20]:
head(merged_table)

Unnamed: 0_level_0,id,Gene,celltype,logFC_cells_uncorrected_limma,adj.P.Val_cells_uncorrected_limma,meaning_cells_uncorrected_limma,logFC_cells_uncorrected_edgeR,adj.P.Val_cells_uncorrected_edgeR,meaning_cells_uncorrected_edgeR,logFC_nuclei_uncorrected_limma,adj.P.Val_nuclei_uncorrected_limma,meaning_nuclei_uncorrected_limma,logFC_nuclei_uncorrected_edgeR,adj.P.Val_nuclei_uncorrected_edgeR,meaning_nuclei_uncorrected_edgeR,up_CONTROL,up_ENDOMETRIOSIS,not_significant,is_GWAS_gene
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<chr>
eStromal_A1BG,eStromal_A1BG,A1BG,eStromal,-0.27929252,0.00211356,up_ENDOMETRIOSIS,-0.31063568,0.0794235168,not_significant,-0.14701589,0.22839121,not_significant,,,,0,1,2,no
dStromal_early_A1BG,dStromal_early_A1BG,A1BG,dStromal_early,0.26874047,0.013900692,up_CONTROL,0.2306317,0.1411019311,not_significant,,,,,,,1,0,1,no
dStromal_late_A1BG,dStromal_late_A1BG,A1BG,dStromal_late,0.12872314,0.546254068,not_significant,0.06130456,0.854043532,not_significant,-0.05585452,0.841292956,not_significant,,,,0,0,3,no
dStromal_mid_A1BG,dStromal_mid_A1BG,A1BG,dStromal_mid,0.06937395,0.48844935,not_significant,0.13657248,0.3751666448,not_significant,,,,,,,0,0,2,no
eStromal_A2M,eStromal_A2M,A2M,eStromal,-0.30608757,0.022590903,up_ENDOMETRIOSIS,-0.48217561,0.0648017351,not_significant,-0.29556752,0.046899419,up_ENDOMETRIOSIS,,,,0,2,1,no
dStromal_early_A2M,dStromal_early_A2M,A2M,dStromal_early,0.73119506,0.003045924,up_CONTROL,1.6274458,0.0002546488,up_CONTROL,-0.59731257,0.001480451,up_ENDOMETRIOSIS,-1.295136,0.2228998,not_significant,2,1,1,no


In [18]:
merged_table$is_GWAS_gene = ifelse(merged_table$Gene %in% gwas_genes, 'GWAS_gene', 'no')

In [21]:
write.table(merged_table, file = paste0('/nfs/team292/lg18/endometriosis/DEGs_endometriosis/', lineage, '_merged_table.tsv'), sep = '\t', row.names = F, quote = F )