In [1]:
suppressPackageStartupMessages({
    library(Matrix)
    library(ggplot2)
    library(cygnus)
    library(dplyr)
    library(data.table)
    library(purrr)
    library(ggthemes)
    library(viridis)
    library(patchwork)
    library(glue)
    library(sf)
    library(ComplexHeatmap)
    library(RSpectra)
    library(mclust)
    library(Seurat)
    library(DESeq2)
    })

fig.size <- function(w,h) {
    options(repr.plot.height = h, repr.plot.width = w)
}

In [21]:
aggs1 = readRDS("outputs/aggs1.rds")
counts_data = aggs1$counts
metadata = aggs1$meta_data
dmt1 = readRDS('outputs/dmt1.rds')
genes = read.table('data/VizgenLungHacohen/cells/genes.txt')
row.names(counts_data) = genes$V1

In [22]:
# Input unnormalized counts into DESeq as required by the package instructiondim(counts_data)
meta = dmt1$pts[,c('agg_id', 'spatial_cluster')]
meta = distinct(meta)
meta = meta[order(agg_id)]
head(meta)
# Help solve the error with DESeq2
# Every gene contains at least one zero, cannot compute log geometric means

counts_data = counts_data + 1

agg_id,spatial_cluster
<int>,<fct>
1,13
2,0
3,1
4,13
5,1
6,1


In [8]:
dds <- DESeqDataSetFromMatrix(countData=counts_data, colData=meta, design= ~ 0 + spatial_cluster)
dds <- DESeq(dds)

converting counts to integer mode

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

-- note: fitType='parametric', but the dispersion trend was not well captured by the
   function: y = a/x + b, and a local regression fit was automatically substituted.
   specify fitType='local' or 'mean' to avoid this message next time.

final dispersion estimates

fitting model and testing



In [35]:
create_contrast <- function(c, total=14){
    arr = rep(-1/(total-1), times = total)
    arr[c] = 1
    return(arr)
}
create_contrast(5)

In [43]:
res = c()
full_result = c()
clusters = as.character(seq(1:14))
for (i in 1:length(clusters)){
  print(i, clusters[i])
  deseq2results <- results(dds, contrast=create_contrast(i, length(clusters)))
  deseq2results$cluster <- i - 1
  top_ten = deseq2results[order(deseq2results$log2FoldChange, decreasing=TRUE),][1:10,]
  print(top_ten)
  res <- rbind(res, top_ten)
  full_result <- rbind(full_result, deseq2results[order(deseq2results$log2FoldChange, decreasing=TRUE),])
    
}


[1] 1
log2 fold change (MLE): +1,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769 
Wald test p-value: +1,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769 
DataFrame with 10 rows and 7 columns
          baseMean log2FoldChange     lfcSE      stat    pvalue      padj
         <numeric>      <numeric> <numeric> <numeric> <numeric> <numeric>
ANXA1      43.7465        1.84744 0.0154988  119.1988         0         0
MET        18.7413        1.64930 0.0135998  121.2739         0         0
EFNB2      20.8796        1.64332 0.0143290  114.6847         0         0
LY6E       22.19

In [None]:
#saveRDS(dds, 'outputs/p1DE.rds')
res <- read.csv('outputs/p1deseq_full.csv', row.names=1)
res

In [37]:
de <- results(dds, contrast=create_contrast(1))
de[order(de$log2FoldChange, decreasing=TRUE),][1:10,]

log2 fold change (MLE): +1,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769 
Wald test p-value: +1,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769 
DataFrame with 10 rows and 6 columns
          baseMean log2FoldChange     lfcSE      stat    pvalue      padj
         <numeric>      <numeric> <numeric> <numeric> <numeric> <numeric>
ANXA1      43.7465        1.84744 0.0154988  119.1988         0         0
MET        18.7413        1.64930 0.0135998  121.2739         0         0
EFNB2      20.8796        1.64332 0.0143290  114.6847         0         0
LY6E       22.1931    

In [42]:
de <- results(dds, contrast=create_contrast(7))
de[order(de$log2FoldChange, decreasing=TRUE),][1:15,]

log2 fold change (MLE): -0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,+1,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769 
Wald test p-value: -0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,+1,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769 
DataFrame with 15 rows and 6 columns
         baseMean log2FoldChange     lfcSE      stat    pvalue      padj
        <numeric>      <numeric> <numeric> <numeric> <numeric> <numeric>
IL7R      5.95612        2.02017 0.0166083  121.6362         0         0
CXCR4     7.11035        1.61693 0.0168015   96.2371         0         0
KLF2      5.04495        1.48375 0.0176964   83.8444         0         0
CCR7      2.93095        1.

In [38]:
de <- results(dds, contrast=create_contrast(3))
head(de[order(de$log2FoldChange, decreasing=TRUE),]

log2 fold change (MLE): -0.0769230769230769,-0.0769230769230769,+1,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769 
Wald test p-value: -0.0769230769230769,-0.0769230769230769,+1,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769,-0.0769230769230769 
DataFrame with 479 rows and 6 columns
          baseMean log2FoldChange     lfcSE      stat    pvalue      padj
         <numeric>      <numeric> <numeric> <numeric> <numeric> <numeric>
COL4A1    22.92547        3.10842 0.0211948  146.6597         0         0
VWF        5.43087        2.40629 0.0179365  134.1561         0         0
TGFB3      7.07404        1.35384 0.0178935   75.6611         0         0
RGS5       3.29071   

In [72]:
save_df = res %>% data.frame() %>% View()
save_df = save_df[,c(7,2,1,3,4,5,6)]
write.csv(save_df, 'outputs/p1DE_top10.csv')

Unnamed: 0_level_0,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj,cluster
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
ANXA1,43.746518,1.847438,0.01549880,119.19884,0,0,0
MET,18.741284,1.649297,0.01359978,121.27386,0,0,0
EFNB2,20.879604,1.643316,0.01432900,114.68467,0,0,0
LY6E,22.193112,1.561680,0.01356899,115.09185,0,0,0
WARS,15.312091,1.474655,0.01684217,87.55726,0,0,0
CCND1,10.796370,1.435717,0.01380460,104.00278,0,0,0
SERPINA1,14.974446,1.358014,0.01459223,93.06418,0,0,0
DNAJB1,16.448795,1.351962,0.01364049,99.11390,0,0,0
EGR1,21.851525,1.260834,0.01718710,73.35936,0,0,0
SMARCA4,16.606612,1.259426,0.01167626,107.86211,0,0,0


In [71]:
save_full_df = full_result %>% data.frame() %>% View()
save_full_df = save_full_df[,c(7,2,1,3,4,5,6)]
write.csv(save_full_df, 'outputs/p1DE_fullres.csv')

Unnamed: 0_level_0,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj,cluster
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
ANXA1,43.746518,1.8474385,0.01549880,119.19884,0,0,0
MET,18.741284,1.6492973,0.01359978,121.27386,0,0,0
EFNB2,20.879604,1.6433160,0.01432900,114.68467,0,0,0
LY6E,22.193112,1.5616798,0.01356899,115.09185,0,0,0
WARS,15.312091,1.4746545,0.01684217,87.55726,0,0,0
CCND1,10.796370,1.4357166,0.01380460,104.00278,0,0,0
SERPINA1,14.974446,1.3580142,0.01459223,93.06418,0,0,0
DNAJB1,16.448795,1.3519618,0.01364049,99.11390,0,0,0
EGR1,21.851525,1.2608345,0.01718710,73.35936,0,0,0
SMARCA4,16.606612,1.2594263,0.01167626,107.86211,0,0,0


In [74]:
save_df

Unnamed: 0_level_0,cluster,log2FoldChange,baseMean,lfcSE,stat,pvalue,padj
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
ANXA1,0,1.847438,43.746518,0.01549880,119.19884,0,0
MET,0,1.649297,18.741284,0.01359978,121.27386,0,0
EFNB2,0,1.643316,20.879604,0.01432900,114.68467,0,0
LY6E,0,1.561680,22.193112,0.01356899,115.09185,0,0
WARS,0,1.474655,15.312091,0.01684217,87.55726,0,0
CCND1,0,1.435717,10.796370,0.01380460,104.00278,0,0
SERPINA1,0,1.358014,14.974446,0.01459223,93.06418,0,0
DNAJB1,0,1.351962,16.448795,0.01364049,99.11390,0,0
EGR1,0,1.260834,21.851525,0.01718710,73.35936,0,0
SMARCA4,0,1.259426,16.606612,0.01167626,107.86211,0,0
