/
venn_diag_dartcounts.R
executable file
·339 lines (300 loc) · 18.4 KB
/
venn_diag_dartcounts.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
require(plyr)
require(VennDiagram)
require(gridExtra)
require(gdata)
venn_3_samples <- function(sample1 = sample1, sample2 = sample2, sample3 = sample3, name1 = name1, name2 = name2, name3 = name3, clone_name = clone_name, save_ids = "FALSE"){
sample1 <- unique(sample1[complete.cases(sample1)])
sample2 <- unique(sample2[complete.cases(sample2)])
sample3 <- unique(sample3[complete.cases(sample3)])
inter_12_full <- unique(intersect(sample1, sample2))
inter_13_full <- unique(intersect(sample1, sample3))
inter_23_full <- unique(intersect(sample2, sample3))
inter_123_full <- unique(intersect(inter_12_full, sample3))
inter_12 <- unique(inter_12_full[!inter_12_full %in% inter_123_full])
inter_13 <- unique(inter_13_full[!inter_13_full %in% inter_123_full])
inter_23 <- unique(inter_23_full[!inter_23_full %in% inter_123_full])
unic_s1 <- unique(sample1[!sample1 %in% c(inter_12, inter_13, inter_123_full)])
unic_s2 <- unique(sample2[!sample2 %in% c(inter_12, inter_23, inter_123_full)])
unic_s3 <- unique(sample3[!sample3 %in% c(inter_13, inter_23, inter_123_full)])
if(save_ids == "TRUE"){
write.table(inter_123_full, paste(clone_name, name1, "vs", name2, "vs", name3, "intersection_marks.txt", sep = "_"), row.names = F, col.names = F, quote = F)
write.table(inter_12, paste(clone_name, "only", name1, "vs", name2, "intersection_marks.txt", sep = "_"), row.names = F, col.names = F, quote = F)
write.table(inter_13, paste(clone_name, "only", name1, "vs", name3, "intersection_marks.txt", sep = "_"), row.names = F, col.names = F, quote = F)
write.table(inter_23, paste(clone_name, "only", name2, "vs", name3, "intersection_marks.txt", sep = "_"), row.names = F, col.names = F, quote = F)
write.table(inter_12_full, paste(clone_name, "all", name1, "vs", name2, "intersection_marks.txt", sep = "_"), row.names = F, col.names = F, quote = F)
write.table(inter_13_full, paste(clone_name, "all", name1, "vs", name3, "intersection_marks.txt", sep = "_"), row.names = F, col.names = F, quote = F)
write.table(inter_23_full, paste(clone_name, "all", name2, "vs", name3, "intersection_marks.txt", sep = "_"), row.names = F, col.names = F, quote = F)
write.table(unic_s1, paste(clone_name, name1, "exclusive_marks.txt", sep = "_"), row.names = F, col.names = F, quote = F)
write.table(unic_s2, paste(clone_name, name2, "exclusive_marks.txt", sep = "_"), row.names = F, col.names = F, quote = F)
write.table(unic_s3, paste(clone_name, name3, "exclusive_marks.txt", sep = "_"), row.names = F, col.names = F, quote = F)
}else if(save_ids == "FALSE"){
}else{
print("Invalid save_ids option!")
}
grid.newpage();
venn.plot <- draw.triple.venn(
area1 = length(sample1),
area2 = length(sample2),
area3 = length(sample3),
n12 = length(inter_12_full),
n13 = length(inter_13_full),
n23 = length(inter_23_full),
n123 = length(inter_123_full),
cross.area = length(inter),
alpha = 0.75,
category = c(deparse(name1), deparse(name2), deparse(name3)),
fill = c("darkgreen", "green", "#8B4513"),
lty = "blank",
cex = 4,
cat.cex = 2.5,
cat.dist = 0.055,
ext.pos = 0,
ext.dist = -0.05,
ext.length = 0.85,
ext.line.lwd = 2,
ext.line.lty = "dashed",
scaled = T,
print.mode = c("raw", "percent"),
rotation.degree = 0)
}
venn_3_samples_out_gt <- function(sample1 = sample1, sample2 = sample2, sample3 = sample3, name1 = name1, name2 = name2, name3 = name3, clone_name = clone_name, save_ids = "FALSE"){
sample1 <- unique(sample1[complete.cases(sample1)])
sample2 <- unique(sample2[complete.cases(sample2)])
sample3 <- unique(sample3[complete.cases(sample3)])
inter_12_full <- unique(intersect(sample1, sample2))
inter_13_full <- unique(intersect(sample1, sample3))
inter_23_full <- unique(intersect(sample2, sample3))
inter_123_full <- unique(intersect(inter_12_full, sample3))
inter_12 <- unique(inter_12_full[!inter_12_full %in% inter_123_full])
inter_13 <- unique(inter_13_full[!inter_13_full %in% inter_123_full])
inter_23 <- unique(inter_23_full[!inter_23_full %in% inter_123_full])
unic_s1 <- unique(sample1[!sample1 %in% c(inter_12, inter_13, inter_123_full)])
unic_s2 <- unique(sample2[!sample2 %in% c(inter_12, inter_23, inter_123_full)])
unic_s3 <- unique(sample3[!sample3 %in% c(inter_13, inter_23, inter_123_full)])
if(save_ids == "TRUE"){
write.table(inter_123_full, paste(clone_name, name1, "vs", name2, "vs", name3, "intersection_marks_out_gt.txt", sep = "_"), row.names = F, col.names = F, quote = F)
write.table(inter_12, paste(clone_name, "only", name1, "vs", name2, "intersection_marks_out_gt.txt", sep = "_"), row.names = F, col.names = F, quote = F)
write.table(inter_13, paste(clone_name, "only", name1, "vs", name3, "intersection_marks_out_gt.txt", sep = "_"), row.names = F, col.names = F, quote = F)
write.table(inter_23, paste(clone_name, "only", name2, "vs", name3, "intersection_marks_out_gt.txt", sep = "_"), row.names = F, col.names = F, quote = F)
write.table(inter_12_full, paste(clone_name, "all", name1, "vs", name2, "intersection_marks_out_gt.txt", sep = "_"), row.names = F, col.names = F, quote = F)
write.table(inter_13_full, paste(clone_name, "all", name1, "vs", name3, "intersection_marks_out_gt.txt", sep = "_"), row.names = F, col.names = F, quote = F)
write.table(inter_23_full, paste(clone_name, "all", name2, "vs", name3, "intersection_marks_out_gt.txt", sep = "_"), row.names = F, col.names = F, quote = F)
write.table(unic_s1, paste(clone_name, name1, "exclusive_marks_out_gt.txt", sep = "_"), row.names = F, col.names = F, quote = F)
write.table(unic_s2, paste(clone_name, name2, "exclusive_marks_out_gt.txt", sep = "_"), row.names = F, col.names = F, quote = F)
write.table(unic_s3, paste(clone_name, name3, "exclusive_marks_out_gt.txt", sep = "_"), row.names = F, col.names = F, quote = F)
}else if(save_ids == "FALSE"){
}else{
print("Invalid save_ids option!")
}
grid.newpage();
venn.plot <- draw.triple.venn(
area1 = length(sample1),
area2 = length(sample2),
area3 = length(sample3),
n12 = length(inter_12_full),
n13 = length(inter_13_full),
n23 = length(inter_23_full),
n123 = length(inter_123_full),
cross.area = length(inter),
alpha = 0.75,
category = c(deparse(name1), deparse(name2), deparse(name3)),
fill = c("darkgreen", "green", "#8B4513"),
lty = "blank",
cex = 4,
cat.cex = 2.5,
cat.dist = 0.055,
ext.pos = 0,
ext.dist = -0.05,
ext.length = 0.85,
ext.line.lwd = 2,
ext.line.lty = "dashed",
scaled = T,
print.mode = c("raw", "percent"),
rotation.degree = 0)
}
venn_3_samples_in_t <- function(sample1 = sample1, sample2 = sample2, sample3 = sample3, name1 = name1, name2 = name2, name3 = name3, clone_name = clone_name, save_ids = "FALSE"){
sample1 <- unique(sample1[complete.cases(sample1)])
sample2 <- unique(sample2[complete.cases(sample2)])
sample3 <- unique(sample3[complete.cases(sample3)])
inter_12_full <- unique(intersect(sample1, sample2))
inter_13_full <- unique(intersect(sample1, sample3))
inter_23_full <- unique(intersect(sample2, sample3))
inter_123_full <- unique(intersect(inter_12_full, sample3))
inter_12 <- unique(inter_12_full[!inter_12_full %in% inter_123_full])
inter_13 <- unique(inter_13_full[!inter_13_full %in% inter_123_full])
inter_23 <- unique(inter_23_full[!inter_23_full %in% inter_123_full])
unic_s1 <- unique(sample1[!sample1 %in% c(inter_12, inter_13, inter_123_full)])
unic_s2 <- unique(sample2[!sample2 %in% c(inter_12, inter_23, inter_123_full)])
unic_s3 <- unique(sample3[!sample3 %in% c(inter_13, inter_23, inter_123_full)])
if(save_ids == "TRUE"){
write.table(inter_123_full, paste(clone_name, name1, "vs", name2, "vs", name3, "intersection_marks_in_t.txt", sep = "_"), row.names = F, col.names = F, quote = F)
write.table(inter_12, paste(clone_name, "only", name1, "vs", name2, "intersection_marks_in_t.txt", sep = "_"), row.names = F, col.names = F, quote = F)
write.table(inter_13, paste(clone_name, "only", name1, "vs", name3, "intersection_marks_in_t.txt", sep = "_"), row.names = F, col.names = F, quote = F)
write.table(inter_23, paste(clone_name, "only", name2, "vs", name3, "intersection_marks_in_t.txt", sep = "_"), row.names = F, col.names = F, quote = F)
write.table(inter_12_full, paste(clone_name, "all", name1, "vs", name2, "intersection_marks_in_t.txt", sep = "_"), row.names = F, col.names = F, quote = F)
write.table(inter_13_full, paste(clone_name, "all", name1, "vs", name3, "intersection_marks_in_t.txt", sep = "_"), row.names = F, col.names = F, quote = F)
write.table(inter_23_full, paste(clone_name, "all", name2, "vs", name3, "intersection_marks_in_t.txt", sep = "_"), row.names = F, col.names = F, quote = F)
write.table(unic_s1, paste(clone_name, name1, "exclusive_marks_in_t.txt", sep = "_"), row.names = F, col.names = F, quote = F)
write.table(unic_s2, paste(clone_name, name2, "exclusive_marks_in_t.txt", sep = "_"), row.names = F, col.names = F, quote = F)
write.table(unic_s3, paste(clone_name, name3, "exclusive_marks_in_t.txt", sep = "_"), row.names = F, col.names = F, quote = F)
}else if(save_ids == "FALSE"){
}else{
print("Invalid save_ids option!")
}
grid.newpage();
venn.plot <- draw.triple.venn(
area1 = length(sample1),
area2 = length(sample2),
area3 = length(sample3),
n12 = length(inter_12_full),
n13 = length(inter_13_full),
n23 = length(inter_23_full),
n123 = length(inter_123_full),
cross.area = length(inter),
alpha = 0.75,
category = c(deparse(name1), deparse(name2), deparse(name3)),
fill = c("darkgreen", "green", "#8B4513"),
lty = "blank",
cex = 4,
cat.cex = 2.5,
cat.dist = 0.055,
ext.pos = 0,
ext.dist = -0.05,
ext.length = 0.85,
ext.line.lwd = 2,
ext.line.lty = "dashed",
scaled = T,
print.mode = c("raw", "percent"),
rotation.degree = 0)
}
venn_3_samples_in_g <- function(sample1 = sample1, sample2 = sample2, sample3 = sample3, name1 = name1, name2 = name2, name3 = name3, clone_name = clone_name, save_ids = "FALSE"){
sample1 <- unique(sample1[complete.cases(sample1)])
sample2 <- unique(sample2[complete.cases(sample2)])
sample3 <- unique(sample3[complete.cases(sample3)])
inter_12_full <- unique(intersect(sample1, sample2))
inter_13_full <- unique(intersect(sample1, sample3))
inter_23_full <- unique(intersect(sample2, sample3))
inter_123_full <- unique(intersect(inter_12_full, sample3))
inter_12 <- unique(inter_12_full[!inter_12_full %in% inter_123_full])
inter_13 <- unique(inter_13_full[!inter_13_full %in% inter_123_full])
inter_23 <- unique(inter_23_full[!inter_23_full %in% inter_123_full])
unic_s1 <- unique(sample1[!sample1 %in% c(inter_12, inter_13, inter_123_full)])
unic_s2 <- unique(sample2[!sample2 %in% c(inter_12, inter_23, inter_123_full)])
unic_s3 <- unique(sample3[!sample3 %in% c(inter_13, inter_23, inter_123_full)])
if(save_ids == "TRUE"){
write.table(inter_123_full, paste(clone_name, name1, "vs", name2, "vs", name3, "intersection_marks_in_g.txt", sep = "_"), row.names = F, col.names = F, quote = F)
write.table(inter_12, paste(clone_name, "only", name1, "vs", name2, "intersection_marks_in_g.txt", sep = "_"), row.names = F, col.names = F, quote = F)
write.table(inter_13, paste(clone_name, "only", name1, "vs", name3, "intersection_marks_in_g.txt", sep = "_"), row.names = F, col.names = F, quote = F)
write.table(inter_23, paste(clone_name, "only", name2, "vs", name3, "intersection_marks_in_g.txt", sep = "_"), row.names = F, col.names = F, quote = F)
write.table(inter_12_full, paste(clone_name, "all", name1, "vs", name2, "intersection_marks_in_g.txt", sep = "_"), row.names = F, col.names = F, quote = F)
write.table(inter_13_full, paste(clone_name, "all", name1, "vs", name3, "intersection_marks_in_g.txt", sep = "_"), row.names = F, col.names = F, quote = F)
write.table(inter_23_full, paste(clone_name, "all", name2, "vs", name3, "intersection_marks_in_g.txt", sep = "_"), row.names = F, col.names = F, quote = F)
write.table(unic_s1, paste(clone_name, name1, "exclusive_marks_in_g.txt", sep = "_"), row.names = F, col.names = F, quote = F)
write.table(unic_s2, paste(clone_name, name2, "exclusive_marks_in_g.txt", sep = "_"), row.names = F, col.names = F, quote = F)
write.table(unic_s3, paste(clone_name, name3, "exclusive_marks_in_g.txt", sep = "_"), row.names = F, col.names = F, quote = F)
}else if(save_ids == "FALSE"){
}else{
print("Invalid save_ids option!")
}
grid.newpage();
venn.plot <- draw.triple.venn(
area1 = length(sample1),
area2 = length(sample2),
area3 = length(sample3),
n12 = length(inter_12_full),
n13 = length(inter_13_full),
n23 = length(inter_23_full),
n123 = length(inter_123_full),
cross.area = length(inter),
alpha = 0.75,
category = c(deparse(name1), deparse(name2), deparse(name3)),
fill = c("darkgreen", "green", "#8B4513"),
lty = "blank",
cex = 4,
cat.cex = 2.5,
cat.dist = 0.055,
ext.pos = 0,
ext.dist = -0.05,
ext.length = 0.85,
ext.line.lwd = 2,
ext.line.lty = "dashed",
scaled = T,
print.mode = c("raw", "percent"),
rotation.degree = 0)
}
# Reads the dataset
G4_int <- read.table(snakemake@input[[1]], header = T, na.strings = "NA", colClasses = "character")
## MSD methylated sites outside genes and TEs
out_gt <- read.table(snakemake@input[[2]], colClasses = "character")[4]
out_gt_sub <- data.frame()
out_gt_sub <- cbind(G4_int[G4_int$BRASUZ1_leaf.ad %in% out_gt$V4,])["BRASUZ1_leaf.ad"]
out_gt_sub <- cbindX(out_gt_sub, (G4_int[G4_int$BRASUZ1_leaf.juv %in% out_gt$V4,]["BRASUZ1_leaf.juv"]))
out_gt_sub <- cbindX(out_gt_sub, (G4_int[G4_int$BRASUZ1_wood %in% out_gt$V4,]["BRASUZ1_wood"]))
# Writes the file with marks out of genes or tranposons for each sample.
write.table(out_gt_sub, snakemake@output[[1]], col.names = T, row.names = F, quote = F, sep = "\t")
## MSD methylated sites in TEs
in_t <- read.table(snakemake@input[[3]], colClasses = "character")[4]
in_tg <- read.table(snakemake@input[[4]], colClasses = "character")[4]
in_t <- unique(c(in_t$V4, in_tg$V4))
in_t_sub <- data.frame()
in_t_sub <- cbind(G4_int[G4_int$BRASUZ1_leaf.ad %in% in_t,])["BRASUZ1_leaf.ad"]
in_t_sub <- cbindX(in_t_sub, (G4_int[G4_int$BRASUZ1_leaf.juv %in% in_t,]["BRASUZ1_leaf.juv"]))
in_t_sub <- cbindX(in_t_sub, (G4_int[G4_int$BRASUZ1_wood %in% in_t,]["BRASUZ1_wood"]))
write.table(in_t_sub, snakemake@output[[2]], col.names = T, row.names = F, quote = F, sep = "\t")
## MSD methylated sites in genes
in_g <- read.table(snakemake@input[[5]], colClasses = "character")[4]
in_g_noncod <- read.table(snakemake@input[[6]], colClasses = "character")[4]
in_g <- unique(c(in_g$V4, in_g_noncod$V4))
in_g_sub <- data.frame()
in_g_sub <- cbind(G4_int[G4_int$BRASUZ1_leaf.ad %in% in_g,])["BRASUZ1_leaf.ad"]
in_g_sub <- cbindX(in_g_sub, (G4_int[G4_int$BRASUZ1_leaf.juv %in% in_g,]["BRASUZ1_leaf.juv"]))
in_g_sub <- cbindX(in_g_sub, (G4_int[G4_int$BRASUZ1_wood %in% in_g,]["BRASUZ1_wood"]))
## Plots
## All MSD methylated sites
A <- venn_3_samples(sample1 = G4_int[,1],
sample2 = G4_int[,2],
sample3 = G4_int[,3],
name1 = snakemake@params[["tissues"]][1],
name2 = snakemake@params[["tissues"]][2],
name3 = snakemake@params[["tissues"]][3],
clone_name = snakemake@params[["clone_name"]],
save_ids = snakemake@params[["save_ids"]])
svg(filename = snakemake@output[[3]], width = 12, height = 12, pointsize = 12)
grid.arrange(grobTree(A), ncol=1, top=textGrob(snakemake@params[["clone_name"]], gp=gpar(fontsize=30,font=8)))
dev.off()
## MSD methylated sites outside of genes and TEs
B <- venn_3_samples_out_gt(sample1 = out_gt_sub[,1],
sample2 = out_gt_sub[,2],
sample3 = out_gt_sub[,3],
name1 = snakemake@params[["tissues"]][1],
name2 = snakemake@params[["tissues"]][2],
name3 = snakemake@params[["tissues"]][3],
clone_name = snakemake@params[["clone_name"]],
save_ids = snakemake@params[["save_ids"]])
svg(filename = snakemake@output[[4]], width = 12, height = 12, pointsize = 12)
grid.arrange(grobTree(B), ncol=1, top=textGrob(snakemake@params[["clone_name"]], gp=gpar(fontsize=30,font=8)))
dev.off()
## MSD methylated sites in TEs
C <- venn_3_samples_in_t(sample1 = in_t_sub[,1],
sample2 = in_t_sub[,2],
sample3 = in_t_sub[,3],
name1 = snakemake@params[["tissues"]][1],
name2 = snakemake@params[["tissues"]][2],
name3 = snakemake@params[["tissues"]][3],
clone_name = snakemake@params[["clone_name"]],
save_ids = snakemake@params[["save_ids"]])
svg(filename = snakemake@output[[5]], width = 12, height = 12, pointsize = 12)
grid.arrange(grobTree(C), ncol=1, top=textGrob(snakemake@params[["clone_name"]], gp=gpar(fontsize=30,font=8)))
dev.off()
## MSD methylated sites in genes
D <- venn_3_samples_in_g(sample1 = in_g_sub[,1],
sample2 = in_g_sub[,2],
sample3 = in_g_sub[,3],
name1 = snakemake@params[["tissues"]][1],
name2 = snakemake@params[["tissues"]][2],
name3 = snakemake@params[["tissues"]][3],
clone_name = snakemake@params[["clone_name"]],
save_ids = snakemake@params[["save_ids"]])
svg(filename = snakemake@output[[6]], width = 12, height = 12, pointsize = 12)
grid.arrange(grobTree(D), ncol=1, top=textGrob(snakemake@params[["clone_name"]], gp=gpar(fontsize=30, font=8)))
dev.off()