In [2]:
library(gplots)
library(ggplot2)
# Get the dendextend package
library(RColorBrewer)
source("https://raw.githubusercontent.com/talgalili/dendextend/master/R/attr_access.R")

require(gdata)
library(scales)

### GO analysis

In [3]:
df = read.csv('~/Projects/Gutierrez/EBV-recomb/recomb/rdp4_results/ids_171_ebv/10Miter/results/filtered_gene_vs_call.csv')
colnames(df)[3] <- 'group'



In [4]:
library(RColorBrewer)
contrasting_colors = c("#000000", "#FFFF00", "#1CE6FF", "#FF34FF", "#FF4A46", "#008941", "#006FA6", "#A30059",
"#FFDBE5", "#7A4900", "#0000A6", "#63FFAC", "#B79762", "#004D43", "#8FB0FF", "#997D87",
"#5A0007", "#809693", "#FEFFE6", "#1B4400", "#4FC601", "#3B5DFF", "#4A3B53", "#FF2F80",
"#61615A", "#BA0900", "#6B7900", "#00C2A0", "#FFAA92", "#FF90C9", "#B903AA", "#D16100",
"#DDEFFF", "#000035", "#7B4F4B", "#A1C299", "#300018", "#0AA6D8", "#013349", "#00846F",
"#372101", "#FFB500", "#C2FFED", "#A079BF", "#CC0744", "#C0B9B2", "#C2FF99", "#001E09",
"#00489C", "#6F0062", "#0CBD66", "#EEC3FF", "#456D75", "#B77B68", "#7A87A1", "#788D66",
"#885578", "#FAD09F", "#FF8A9A", "#D157A0", "#BEC459", "#456648", "#0086ED", "#886F4C",
"#34362D", "#B4A8BD", "#00A6AA", "#452C2C", "#636375", "#A3C8C9", "#FF913F", "#938A81",
"#575329", "#00FECF", "#B05B6F", "#8CD0FF", "#3B9700", "#04F757", "#C8A1A1", "#1E6E00",
"#7900D7", "#A77500", "#6367A9", "#A05837", "#6B002C", "#772600", "#D790FF", "#9B9700",
"#549E79", "#FFF69F", "#201625", "#72418F", "#BC23FF", "#99ADC0", "#3A2465", "#922329",
"#5B4534", "#FDE8DC", "#404E55", "#0089A3", "#CB7E98", "#A4E804", "#324E72", "#6A3A4C")

In [5]:
bubble_plot <- function(df,group_color='group',group_y='GENE',score = 'Mean.Rho.bp.',scale = 2,transf = 'identity',range = c(1,3),
                        xlab='x',ylab='y'){
    if(xlab=='x') xlab = group_color
    if(ylab=='y') ylab = group_y
    
    df['Score'] = as.numeric(df[[score]])
    #df['signif'] = log10(as.numeric(df[[score]]))
    #df['signif'] = df[['signif']]+ abs(min(df[['signif']]))
    
    
    myColors <- sample(contrasting_colors,length(unique(df[[group_color]])))
    names(myColors) <- levels(df[[group_color]])
    df['color']=myColors[df[[group_color]]]
    
    plot = ggplot(df,aes(eval(parse(text = group_color)),eval(parse(text = group_y))))+
    geom_point(aes(colour=color,size=Score),alpha = 0.5)+
    #  geom_text(aes(label=pval),size=2) 
    scale_size(trans = transf ,range = range)+
    #  scale_size_identity()
    #scale_size_continuous(range = range) +
    theme(axis.text.x = element_text(angle = 60, hjust = 1))+
    labs(x = xlab,y=ylab)+
    guides(colour=FALSE)
    
    return(plot)
    }

In [9]:
motifs = c('AGGAG','CCCAG','CCTCCCCT', 'GGGCT', 'TGGAG', 'TGGTGG')
rep = c('inverted', 'tandem', 'nan', 'type A', 'type B', 'type C', 'type D','EBNA triplet repeat GGA,GCA,GGG','PstI repeats', 'TERMINAL')

Motifs = subset(df,group %in% motifs)
Motifs <- droplevels(Motifs)

Repeats = subset(df,group %in% rep)
Repeats <- droplevels(Repeats)



In [8]:
filename = "~/Projects/Gutierrez/EBV-recomb/recomb/rdp4_results/ids_171_ebv/10Miter/results/filtered_bubble_Motifs.png"
png(filename, width = 5, height = 8,res = 500,units = 'in')
plot = bubble_plot(df=Motifs,group_color = 'group',group_y='GENE',score = 'Mean.Rho.bp.',range = c(0.01,8),
                  ,xlab = 'Motifs', ylab= 'gene')
plot
dev.off()


In [10]:
filename = "~/Projects/Gutierrez/EBV-recomb/recomb/rdp4_results/ids_171_ebv/10Miter/results/filtered_bubble_repeats.png"
png(filename, width = 5, height = 8,res = 500,units = 'in')
plot = bubble_plot(df=Repeats,group_color = 'group',group_y='GENE',score = 'Mean.Rho.bp.',range = c(0.01,8),
                  ,xlab = 'repeat_region', ylab= 'gene')
plot
dev.off()


In [16]:
regions = c('CDS','exon','intron','mRNA','regulatory','repeat_region')
regions = subset(df,group %in% regions)
regions <- droplevels(regions)

library(reshape)
#Using cast to move conditions into columns is a snap.

reg = cast(regions, GENE ~ group)
reg[is.na(reg)]<-0

reg <- as.matrix(reg)

hc <-hclust(dist(reg))
oo = hc$order

names(oo)<- hc$labels
regions['ord']<-oo[regions$GENE]


Using Mean.Rho.bp. as value column.  Use the value argument to cast to override this choice


In [17]:
filename = "~/Projects/Gutierrez/EBV-recomb/recomb/rdp4_results/ids_171_ebv/10Miter/results/filtered_bubble_regions.png"
png(filename, width = 5, height = 8,res = 500,units = 'in')
plot = bubble_plot(df=regions,group_color = 'group',group_y='GENE',score = 'Mean.Rho.bp.',range = c(1,7),
                  ,xlab = 'genome region', ylab= 'gene',transf ='atanh')
plot
dev.off()