In [None]:
library(ExOutBench)
library(tidyr)
library(radiant.data)
library(textshape)
library(dplyr)
library(magrittr)
library(FSA)
library(ggplot2)
library(readr)
library(stringr)

In [None]:
args = commandArgs(trailingOnly=TRUE)

In [None]:
tissueList <- c('Whole Blood', 'Brain - Frontal Cortex (BA9)',
       'Adipose - Subcutaneous', 'Muscle - Skeletal', 'Artery - Tibial',
       'Artery - Coronary', 'Heart - Atrial Appendage',
       'Adipose - Visceral (Omentum)', 'Ovary', 'Uterus', 'Vagina',
       'Breast - Mammary Tissue', 'Skin - Not Sun Exposed (Suprapubic)',
       'Minor Salivary Gland', 'Brain - Cortex', 'Adrenal Gland',
       'Thyroid', 'Lung', 'Spleen', 'Pancreas', 'Esophagus - Muscularis',
       'Esophagus - Mucosa', 'Esophagus - Gastroesophageal Junction',
       'Stomach', 'Colon - Sigmoid', 'Small Intestine - Terminal Ileum',
       'Colon - Transverse', 'Prostate', 'Testis',
       'Skin - Sun Exposed (Lower leg)', 'Nerve - Tibial',
       'Heart - Left Ventricle', 'Pituitary', 'Brain - Cerebellum',
       'Cells - Cultured fibroblasts', 'Artery - Aorta',
       'Cells - EBV-transformed lymphocytes',
       'Brain - Cerebellar Hemisphere', 'Brain - Caudate (basal ganglia)',
       'Brain - Nucleus accumbens (basal ganglia)',
       'Brain - Putamen (basal ganglia)', 'Brain - Hypothalamus',
       'Brain - Spinal cord (cervical c-1)', 'Liver',
       'Brain - Hippocampus', 'Brain - Anterior cingulate cortex (BA24)',
       'Brain - Substantia nigra', 'Kidney - Cortex', 'Brain - Amygdala',
       'Cervix - Ectocervix', 'Fallopian Tube', 'Cervix - Endocervix',
       'Bladder', 'Kidney - Medulla', 'Cells - Leukemia cell line (CML)')

In [None]:
# read in tissue name
currTissue <- tissueList[strtoi(args[1])]

In [None]:
# read in the rare variants
rare.variants <- read_tsv( "/gpfs/home/ydong/enrichment/all_rare_variants_SNPs_10kb_genebody_w_consdetail_no_NA.tsv",   progress = F)

In [None]:
# read in the P values
pVal <- read_csv(paste("/gpfs/home/ydong/residualP/",currTissue,".csv",sep = ""))

In [None]:
# reformat and align each sample with outlier snps
pVal  %<>% 
  gather(SampleName, outlier.score, -GeneID) %>%
  filter(complete.cases(.)) 

In [None]:
enrichment.by.annotation.output.residual <- 
  enrichment_by_annotation(
    pVal ,
    rare.variants)

In [None]:
enrichment.by.significance.output.residual <- 
  enrichment_by_significance(
    pVal ,
    rare.variants)

In [None]:
expP <-read_csv("/gpfs/home/ydong/enrichment/expression_Muscle.csv")

expP  %<>% 
  gather(SampleName, outlier.score, -GeneID) %>%
  filter(complete.cases(.)) 

In [None]:
enrichment.by.annotation.output.expression <- 
  enrichment_by_annotation(
    expP ,
    rare.variants)


In [None]:
enrichment.by.significance.output.expression <- 
  enrichment_by_significance(
    expP ,
    rare.variants)


In [None]:
full_enrichment_plt <- list(`Residual` = enrichment.by.significance.output.residual, `Expression` = enrichment.by.significance.output.expression) %>%
  bind_rows(.id = "color_code")
full_enrichment_plt$sig <- factor(full_enrichment_plt$sig)

In [None]:
full_enrichment_plt

In [None]:
plt.tbl <- full_enrichment_plt
plt.tbl <- plt.tbl %>%
  arrange(desc(sig)) %>%
  filter(n.outliers.w.var > 1) 

print(
  ggplot(plt.tbl, aes(sig, ratio, color_code)) + 
    theme_linedraw() +
    geom_pointrange(aes(ymin = lower.q, ymax = upper.q, color=plt.tbl$color_code),alpha = 0.70)  +
    
    geom_hline(yintercept = 1, color = "red") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1), title=element_text(color="black", size=9.5, face="bold"), 
    legend.position = "bottom", legend.title = element_text(color="black", size=9, face="bold")) + 
    scale_x_discrete(limits = rev(levels(plt.tbl$sig)))  + 
    labs(title="Modeling Residuals vs GTEx expression data enrichment by significance_muscle", color = "Data type") 
)
ggsave(paste("./graphs/significance_enrichment_",currTissue,".png",sep = ""))


In [None]:
full_annotation_enrichment_plt <- list(`Residual` = enrichment.by.annotation.output.residual, `Expression` = enrichment.by.annotation.output.expression) %>%
  bind_rows(.id = "color_code")
full_annotation_enrichment_plt$anno <- factor(full_annotation_enrichment_plt$anno)

In [None]:
full_annotation_enrichment_plt$anno <- as.character(full_annotation_enrichment_plt$anno)

In [None]:
#sort new dataframe based on y value
#mtcars[order(mpg, cyl),]
full_annotation_enrichment_plt <- full_annotation_enrichment_plt[order(full_annotation_enrichment_plt$ratio),]

In [None]:
  plt.tbl <- full_annotation_enrichment_plt
    plt.tbl <- plt.tbl %>%
      arrange(desc(ratio)) %>%
      filter(n.outliers.w.var > 1) %>%
      group_by(anno) %>%
      filter(n() > 1)

    print(
      ggplot(plt.tbl, aes(reorder(anno, -ratio), ratio)) +
        theme_linedraw() +
        geom_pointrange(aes(ymin = lower.q, ymax = upper.q, color=plt.tbl$color_code),alpha = 0.70)  +
        geom_hline(yintercept = 1, color = "red") +
        scale_y_continuous(trans='log2') +
        theme(axis.text.x = element_text(angle = 45, hjust = 1), title=element_text(color="black", size=9.5, face="bold"),
        legend.position = "bottom", legend.title = element_text(color="black", size=9, face="bold")) + 
        labs(title="Model Residual vs GTEx expression data enrichment by annotation (Muscle)", color = "Data type" ) + xlab("variant annotation") 

    )
ggsave(paste("./graphs/annotation_enrichment_",currTissue,".png",sep = ""))
