Merge pull request #10 from virtual-labs/testing

Testing
virtual-labs · Sep 11, 2021 · d1d06f5 · d1d06f5
2 parents c3234e0 + 672fce5
commit d1d06f5
Show file tree

Hide file tree

Showing 4 changed files with 367 additions and 11 deletions.
diff --git a/experiment/simulation/RNA_Seq_sample_code.R b/experiment/simulation/RNA_Seq_sample_code.R
@@ -0,0 +1,253 @@
+
+
+# Install and load packages
+
+if (!requireNamespace("BiocManager", quietly = TRUE))
+  install.packages("BiocManager")
+
+BiocManager::install("DESeq2")
+BiocManager::install("edgeR")
+BiocManager::install("biomaRt")
+BiocManager::install('PCAtools')
+
+library(edgeR)
+library(DESeq2)
+library("biomaRt")
+
+###################### load the raw count matrix #######################
+
+setwd("wrkng_directory/")
+
+rawcount<-read.table ("your_Rawgenedata.tsv",header=TRUE,  sep="\t",  row.names=1)
+
+######################  Filter for coding genes ########################
+
+mart <- useMart(biomart="ensembl", dataset="hsapiens_gene_ensembl")
+all_coding_genes <- getBM(attributes = c( "hgnc_symbol"), filters = c("biotype"), values = list(biotype="protein_coding"), mart = mart)
+rawcount <- rawcount[row.names(rawcount) %in%  all_coding_genes$hgnc_symbol,]
+
+######################  Filter low count gene  #########################
+
+keep <- rowSums(cpm(rawcount)>1) >= 5   ## depends case to case and on the number of samples
+rawcount<- rawcount[keep,]
+
+## if you not sure about the threshold, there are automatics calculation for the threshold. Please refer to the following link
+## https://seqqc.wordpress.com/2020/02/17/removing-low-count-genes-for-rna-seq-downstream-analysis/
+## I am keeping this script simple as much as possible. But you can check the relevant function at the end of this script.
+
+###################### Data annotation  #################################
+
+anno <-read.table ("Your_Annotation_of_samples.csv",header=TRUE,  sep=",") ##In this case Two coulmns (a) sample (b) Condition
+rownames(anno) <- anno$sample
+
+# Define conditions (for contrast) that you want to compare if you have more than one #control #case
+# This is pair-wise comparison, so only consider one pair at one time
+
+firstC<-"case1"       #case1 #case2 #case3 etc          
+SecondC <-"Control"     
+p.threshold <- 0.05   ##define threshold for filtering
+
+
+### subset raw and conditional data for defined pairs
+
+anno <- anno[(anno$Condition ==firstC |anno$Condition ==SecondC),]
+rawcount <- rawcount[,names(rawcount) %in% anno$sample]
+
+############################### Create DESeq2 datasets #############################
+
+dds <- DESeqDataSetFromMatrix(countData = rawcount, colData = anno, design = ~Condition )
+
+## Run DESEQ2
+dds <- DESeq(dds)
+
+################# contrast based  comparison ##########################
+
+#In case of multiple comparisons ## we need to change the contrast for every comparision
+contrast<- c("Condition",firstC,SecondC)
+
+res <- results(dds, contrast=contrast)
+res$threshold <- as.logical(res$padj < p.threshold)  #Threshold defined earlier
+
+nam <- paste('down_in',firstC, sep = '_')
+#res$nam <- as.logical(res$log2FoldChange < 0)
+res[, nam] <- as.logical(res$log2FoldChange < 0)
+
+genes.deseq <- row.names(res)[which(res$threshold)]
+
+genes_deseq2_sig <- res[which(res$threshold),]
+
+file <- paste('Deseq2_',firstC,'_v_',SecondC,'_results_significant_padj0.05.csv',sep = '')
+all_results <- paste('Deseq2_',firstC,'_v_',SecondC,'_all_results.csv',sep = '')
+
+write.table(genes_deseq2_sig,file,sep = ",")
+write.table(res,all_results,sep = ",")
+
+################### PCA and Heat-MAp Plots ############################
+
+## Varinace transformation vst or rlog
+vsd <- vst(dds, blind=FALSE)   #Variance type (a) Vst or (b) rlog
+#rld <- rlog(dds, blind=FALSE) 
+
+###### PCA with design consideration ###
+pcaData <- plotPCA(vsd, intgroup=c("Condition", "sample"), returnData=TRUE)
+percentVar <- round(100 * attr(pcaData, "percentVar"))
+ggplot(pcaData, aes(PC1, PC2, color=sample, shape=Condition)) +
+  geom_point(size=3) +
+  xlab(paste0("PC1: ",percentVar[1],"% variance")) +
+  ylab(paste0("PC2: ",percentVar[2],"% variance")) + 
+  coord_fixed()
+
+##heatmap
+sampleDists <- dist(t(assay(vsd)))
+library("RColorBrewer")
+library('pheatmap')
+sampleDistMatrix <- as.matrix(sampleDists)
+
+rownames(sampleDistMatrix) <- paste(vsd$Condition, vsd$sample, sep="-")
+
+colnames(sampleDistMatrix) <- NULL
+colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
+pheatmap(sampleDistMatrix,
+         clustering_distance_rows=sampleDists,
+         clustering_distance_cols=sampleDists,
+         col=colors)
+
+
+############ Quick enrichment analysis ##################
+BiocManager::install("ReactomePA")
+library(ReactomePA)
+
+all <- genes.deseq   ## retreive EntrezGene id's
+
+genes=getBM(attributes = c("hgnc_symbol", "entrezgene_id"), filters = "hgnc_symbol", values = all, bmHeader = T, mart = mart)
+
+genes1 <- genes$`NCBI gene (formerly Entrezgene) ID` 
+
+
+
+#?enrichPathway #pvalueCutoff=0.02, #pAdjustMethod = "BH", qvalueCutoff = 0.01,
+x <- enrichPathway(gene=genes1,  pvalueCutoff=0.05,readable=T)
+
+#head(as.data.frame(x))
+barplot(x, showCategory=10)
+dotplot(x, showCategory=10)
+emapplot(x)
+cnetplot(x, categorySize="pvalue", foldChange=genes1)
+emapplot(x, color="pvalue")
+viewPathway("Extracellular matrix organization", readable=TRUE, foldChange=genes1)   ## it's an example
+
+
+############## edgeR  ##########################
+
+
+dge <- DGEList(counts=rawcount, group=anno$Condition)
+
+# Normalize by total count
+dge <- calcNormFactors(dge, method = "TMM")
+
+## PCA ## for more details, please visit following link
+##https://bioconductor.org/packages/release/bioc/vignettes/PCAtools/inst/doc/PCAtools.html
+library(PCAtools)
+
+cpmlog <- cpm(dge, log = TRUE, prior.count = 1)
+
+p <-pca(cpmlog, metadata = anno, removeVar = 0.2) ## -- removing the lower 20% of variables based on variance
+#biplot(p)
+plotloadings(p)
+
+biplot(p,
+       lab = paste0(p$metadata$sample),
+       colby = 'Condition',
+       hline = 0, vline = 0,
+       legendPosition = 'right')
+
+
+
+
+# Create the contrast matrix
+design.mat <- model.matrix(~ 0 + dge$samples$group)
+colnames(design.mat) <- levels(dge$samples$group)
+design.mat
+
+# Estimate dispersion parameter for GLM
+dge <- estimateGLMCommonDisp(dge, design.mat)
+dge <- estimateGLMTrendedDisp(dge, design.mat) 
+dge<- estimateGLMTagwiseDisp(dge,design.mat)
+# Plot mean-variance
+#plotBCV(dge)
+# Model fitting
+
+fit.edgeR <- glmFit(dge, design.mat)
+
+# Differential expression
+
+contrasts.edgeR <- makeContrasts(case1 - Control, levels=design.mat)    ##FirstC-SecondC ##Define 
+
+
+lrt.edgeR <- glmLRT(fit.edgeR, contrast=contrasts.edgeR)
+
+##### DGE at padjust 0.05
+
+# Access results tables
+edgeR_results <- lrt.edgeR$table
+sig.edgeR <- decideTestsDGE(lrt.edgeR, adjust.method="BH", p.value = p.threshold)
+#View(sig.edgeR) 
+significant_table <- edgeR_results[which(sig.edgeR != 0),]
+significant_table$gene <- row.names(significant_table)
+genes.edgeR <- row.names(edgeR_results)[which(sig.edgeR != 0)]
+
+edgeR_results$genes <- row.names(edgeR_results)
+
+
+file_sigTab <- paste('edgeR_',firstC,'_v_',SecondC,'_results_significant_padj0.05.csv',sep = '')
+file_allRes <- paste('edgeR_',firstC,'_v_',SecondC,'_all_results.csv',sep = '')
+
+write.table(significant_table,file_sigTab,sep = ",")
+write.table(edgeR_results,file_allRes,sep = ",")
+
+
+################# Overlapped genes between deseq2 and edgeR  ##########
+
+library(gplots)
+
+venn(list(edgeR = genes.edgeR, DESeq2 = genes.deseq))
+overlapped_genes <- intersect(genes.deseq,genes.edgeR)
+
+
+file_common <- paste('Common_DEG_deseq2_edgeR_',firstC,'_v_',SecondC,'.csv',sep = '')
+write.table(overlapped_genes,file_common,sep = ",", row.names = F)
+
+## Save session info
+sessionInfo()
+writeLines(capture.output(sessionInfo()), "sessionInfo.txt")
+
+
+########## Automate Function for filtering the low count read ###########
+## adopted from ## https://seqqc.wordpress.com/2020/02/17/removing-low-count-genes-for-rna-seq-downstream-analysis/
+
+selectGenes <- function(counts, min.count=10, N=0.90){
+
+  lib.size <- colSums(counts)
+  MedianLibSize <- median(lib.size)
+  CPM.Cutoff <- min.count / MedianLibSize*1e6
+  CPM <- edgeR::cpm(counts,lib.size=lib.size)
+
+  min.samples <- round(N * ncol(counts))
+
+  f1 <- genefilter::kOverA(min.samples, CPM.Cutoff)
+  flist <- genefilter::filterfun(f1)
+  keep <- genefilter::genefilter(CPM, flist)
+
+  ## the same as:
+  #keep <- apply(CPM, 1, function(x, n = min.samples){
+  #  t = sum(x >= CPM.Cutoff) >= n
+  #  t
+  #})
+
+  return(keep)
+}
+
+keep.exprs <- selectGenes(assay(x), min.count=10, N=0.90)
+myFilt <- x[keep.exprs,]
+dim(myFilt)
+###########################################################################
diff --git a/experiment/simulation/images/Amritalogo.png b/experiment/simulation/images/Amritalogo.png
diff --git a/experiment/simulation/images/vlablogo2.jpg b/experiment/simulation/images/vlablogo2.jpg
diff --git a/experiment/simulation/index.html b/experiment/simulation/index.html
@@ -1,13 +1,116 @@
-<!DOCTYPE html>
 <html>
-<head>
-	<!-- Add CSS at the head of HTML file -->
-    <link rel="stylesheet" href="./css/main.css">
-</head>
-<body>
-		<!-- Your code goes here-->
-
-	<!-- Add JS at the bottom of HTML file --> 
-        <script src="./js/main.js"></script>
-</body>
+    <head>
+        <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css" integrity="sha384-Vkoo8x4CGsO3+Hhxv8T/Q5PaXtkKtu6ug5TOeNV6gBiFeWPGFN9MuhOf23Q9Ifjh" crossorigin="anonymous">
+
+    </head>
+    <title>
+        Vlab experiment GC count
+    </title>
+    <body>
+
+        <div class="card-header" style="max-width: 70rem; background: rgb(241, 239, 239);">
+            <div class="row">
+
+                <div class="col-1" style="left: 10px; text-align: center;">
+                    <a href="https://www.amrita.edu/center/mind-brain" target="_blank">
+                        <img alt="amritalogo" src="./images/Amritalogo.png" style="width:300px;height:90px;">
+                    </a>
+                </div>
+
+                <div class="col-2" style="border-right: thick solid #d8d8d8ee; margin-left: 80px;"></div>
+
+                <div class="col-6" style="margin-left: 40px;">
+                    <!--h2><b>Simulator</b></h1-->
+                    </br>
+                    <h5 class="card-subtitle mb-2 text-muted"><b><i>Differential expression analyses of RNA-seq</i></b></h2>
+                </div>
+
+            </div>
+
+        </div>
+
+        <div class="card-header" style="max-width: 70rem; background: rgb(253, 253, 253);">
+            <form style="padding: 150px;">
+
+                <div class="form-group">
+
+                    <p>Download the sample R code and run in your local system</p>
+                    <br>
+                    <p>Need to install the given below <span><b>Bioconductor</b></span> R packages in your R </p>
+
+                    <br>
+
+                    <code>
+                        if (!requireNamespace("BiocManager", quietly = TRUE))<br>
+                        install.packages("BiocManager")<br><br>
+
+                        BiocManager::install("DESeq2")<br>
+                        BiocManager::install("edgeR")<br>
+                        BiocManager::install("biomaRt")<br>
+                        BiocManager::install('PCAtools')<br>
+
+                    </code>
+
+                    <br>
+
+                    <a href="./RNA_Seq_sample_code.R" download="RNA_Seq_sample_code.R" > <button class="btn btn-success" type="button"> Sample Code </button> </a> 
+
+                    <br>
+                    <br>
+
+                    <output id="GCValueout">
+
+                    </output>
+
+                </div>
+
+            </form>
+        </div>
+
+        <script>
+
+            //finding the GC and AT Content of a DNA
+
+            function FindGC() {
+
+                let dna_seq = document.getElementById('dnainput').value.toUpperCase();
+
+                if(dna_seq.match(/^\s+$/) || dna_seq == ""){
+
+                    alert("Please enter a DNA Sequence");
+                    return
+                }
+
+                let a = count(dna_seq, "A")
+                let c = count(dna_seq, "C")
+                let g = count(dna_seq, "G")
+                let t = count(dna_seq, "T")
+
+                let GC_content = ((g + c) / (g + c + a + t) * 100).toFixed(2) + "%";
+
+                if (dna_seq == "") {
+
+                    document.getElementById('dnainput').style.border = "1px solid red";
+                    document.getElementById('GCValueout').innerHTML = "";
+
+                } else {
+                    document.getElementById('GCValueout').innerHTML = "GC Content: " + GC_content;
+                }
+            }
+
+            function count(str, letter) {
+                var letter_Count = 0;
+                for (var position = 0; position < str.length; position++) {
+
+                if (str.charAt(position) == letter) {
+                    letter_Count++;
+                }
+
+                }
+                return letter_Count;
+            }
+
+        </script>
+
+    </body>
 </html>