Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 78 lines (66 sloc) 2.351 kb
c7ee203 @wesm ENH: merge/join functions, compress group index with possible number of ...
authored
1 N <- 10000
2 indices = rep(NA, N)
2bd8354 @wesm TST: more test coverage, merge benchmark tweaks
authored
3 indices2 = rep(NA, N)
4 for (i in 1:N) {
c7ee203 @wesm ENH: merge/join functions, compress group index with possible number of ...
authored
5 indices[i] <- paste(sample(letters, 10), collapse="")
2bd8354 @wesm TST: more test coverage, merge benchmark tweaks
authored
6 indices2[i] <- paste(sample(letters, 10), collapse="")
7 }
c7ee203 @wesm ENH: merge/join functions, compress group index with possible number of ...
authored
8 left <- data.frame(key=rep(indices, 10),
2bd8354 @wesm TST: more test coverage, merge benchmark tweaks
authored
9 key2=rep(indices2, 10),
c7ee203 @wesm ENH: merge/join functions, compress group index with possible number of ...
authored
10 value=rnorm(100000))
11 right <- data.frame(key=indices,
2bd8354 @wesm TST: more test coverage, merge benchmark tweaks
authored
12 key2=indices2,
13 value2=rnorm(10000))
14
15 left <- data.frame(key=rep(indices[1:1000], 10),
16 key2=rep(indices2[1:1000], 10),
17 value=rnorm(100000))
18 right <- data.frame(key=indices[1:1000],
19 key2=indices2[1:1000],
c7ee203 @wesm ENH: merge/join functions, compress group index with possible number of ...
authored
20 value2=rnorm(10000))
c13aec6 @wesm ENH: merge benchmarks, use khash to factorize
authored
21
22 timeit <- function(func, niter=10) {
23 timing = rep(NA, niter)
24 for (i in 1:niter) {
25 gc()
26 timing[i] <- system.time(func())[3]
27 }
28 mean(timing)
29 }
30
31 left.join <- function(sort=TRUE) {
32 result <- merge(left, right, all.x=TRUE, sort=sort)
33 }
34
35 right.join <- function(sort=TRUE) {
36 result <- merge(left, right, all.y=TRUE, sort=sort)
37 }
38
39 outer.join <- function(sort=TRUE) {
40 result <- merge(left, right, all=TRUE, sort=sort)
41 }
42
43 inner.join <- function(sort=TRUE) {
44 reuslt <- merge(left, right, sort=sort)
45 }
46
47 sort.options <- c(FALSE, TRUE)
48
49 results <- matrix(nrow=4, ncol=2)
50 colnames(results) <- c("dont_sort", "sort")
51 rownames(results) <- c("inner", "outer", "left", "right")
52
53 join.functions <- c(inner.join, outer.join, left.join, right.join)
54 for (i in 1:4) {
55 results[1, 1] <- timeit(function() {inner.join(sort=sort.options[1])})
56 results[1, 2] <- timeit(function() {inner.join(sort=sort.options[2])})
57 results[2, 1] <- timeit(function() {outer.join(sort=sort.options[1])})
58 results[2, 2] <- timeit(function() {outer.join(sort=sort.options[2])})
59 results[3, 1] <- timeit(function() {left.join(sort=sort.options[1])})
60 results[3, 2] <- timeit(function() {left.join(sort=sort.options[2])})
61 results[4, 1] <- timeit(function() {right.join(sort=sort.options[1])})
62 results[4, 2] <- timeit(function() {right.join(sort=sort.options[2])})
63 }
64
65 ## do.something <- function(df, f) {
66 ## f(df)
67 ## }
68 ## df <- matrix(nrow=4, ncol=2)
69 ## functions <- c(colSums, rowSums)
70 ## g <- functions[1]
71 ## do.something(df, function(df) g(df))
72
73 ## dont_sort sort
74 ## inner 0.2297 0.2286
75 ## outer 1.1811 1.2843
76 ## left 0.6706 0.7766
77 ## right 0.2995 0.3371
Something went wrong with that request. Please try again.