## Data Visualization with R

### Install required packages

In [None]:
install.packages("ggplot2")
install.packages("ggdendro")
install.packages("BiocManager")
BiocManager::install("ComplexHeatmap")

In [None]:
setwd('/home/ec2-user/SageMaker/NIGMS-Sandbox-Repository-MeRIP-UH')
getwd()

### Make volcano plots

In [None]:
library(ggplot2)

# Read in the toptable
toptable = read.table("viz_data/Toptable_VolcanoPlot.txt")

# Declare significance thresholds
sig = 0.05
FC = 0.6

# Add a significance column to the dataframe
# Do this as a factor where we define the levels as Up, Down, and Not_Sig
# This allows us to easily specify the color of the points for each category
toptable$Significance = factor(ifelse(toptable$logFC > FC & toptable$P.Value < sig,"Up",
                               ifelse(toptable$logFC < -FC & toptable$P.Value < sig,"Down", "Not_Sig")),
                              levels=c("Up","Down","Not_Sig"))

# Add color to the plot based on the values above with the scale_color_manual scale setting
ggplot(data = toptable, aes(x=logFC, y=-log10(adj.P.Val),col=Significance)) + 
geom_point() + 
scale_color_manual(values=c("red","blue","grey"))

### Make heatmap with the complexHeatmap package

In [None]:
suppressPackageStartupMessages(library(ComplexHeatmap)) #suppresses certain package messages when loading

# First, read in the expression data
counts = read.csv("viz_data/heatmap_matrix.csv")

# Next, read in the sample metadata
traits = read.csv("viz_data/heatmap_traits.csv")

# Now convert the counts data frame to a matrix since ComplexHeatmap only works on matrices
# We will randomly select 25 genes to plot. 
genes = sample(nrow(counts), 25)
mat = as.matrix(counts[genes,-1])

column_ha = HeatmapAnnotation(Treatment = traits$Treatment, Fat_Content = anno_barplot(traits$ab_fat),
                             col = list(Treatment = c("A" = "orange", "B" = "green")))

Heatmap(mat, name = "mat", show_column_names = FALSE, top_annotation = column_ha) + 
    rowAnnotation(gene = anno_text(counts[,1][genes], just = "center", 
    location = unit(0.5, "npc"), show_name = FALSE), 
    annotation_name_rot = 0)

### Make dendrogram with ggdendro package

In [None]:
library(ggplot2)
library(ggdendro)

# Load a dataset. We will use the US Arrests dataset that comes with R
data("USArrests")
  
# Let's look at the first few rows to see how the dataset is structured
head(USArrests)

In [None]:
# Create a distance matrix
d = dist(USArrests)
# Create a hierarchical clustering object from the distance matrix
hc = hclust(d)
# Print the distance matrix
# d
plot(hc)

In [None]:
# Now let's plot the dendrogram and see how it looks. We are also using the element_text feature to incrase the font size
ggdendrogram(hc, rotate= FALSE) + theme(text = element_text(size = 13))