-
Notifications
You must be signed in to change notification settings - Fork 3
/
process_sims.r
47 lines (35 loc) · 1.33 KB
/
process_sims.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
library(tidyverse)
sims <- list.files('.', pattern = 'sim.*\\.tsv')
parse_name <- function(x) {
res <- gsub('sim_([^N]+)N_([^s]+)s_([^r]+)rbp_(\\d+)rep.tsv',
'\\1;;\\2;;\\3;;\\4', x)
mat <- do.call(rbind, strsplit(res, ';;'))
tibble(N = as.integer(mat[, 1]),
s = as.numeric(mat[, 2]),
rbp = as.numeric(mat[, 3]),
rep = as.integer(mat[, 4]))
}
midpoint <- function(x) {
out <- lapply(strsplit(as.character(x), ','), function(y) {
as.numeric(gsub('(\\)|\\]|\\[|\\()', '', y))
})
bins <- do.call(rbind, out)
rowMeans(bins)
}
bin_region <- function(x, nbins, total_length) {
cut(x, seq(0, total_length, length.out=nbins))
}
d <- tibble(sims = sims) %>%
mutate(params = map(sims, parse_name)) %>%
mutate(results = map(sims, read_tsv, col_types = "id")) %>%
unnest(c(params, results))
ds <- d %>%
mutate(bins = bin_region(pos, 140, 50e6),
midpoint = midpoint(bins)) %>%
mutate(s = as.factor(s)) %>%
group_by(N, s, rbp, midpoint) %>%
summarize(het = mean(het))
p <- ggplot(ds, aes(midpoint, het, color=s)) + geom_point() +
facet_wrap(~ rbp) + geom_smooth(se=FALSE, span=0.5) +
ylab("average heterozygosity") + xlab("position")
ggsave('recurrent_sweeps.pdf', p, width = 7, height = 4)