Skip to content

Commit

Permalink
apacheGH-14907: [R] right_join() function does not produce the expect…
Browse files Browse the repository at this point in the history
…ed outcome (apache#15077)

* Closes: apache#14907

Authored-by: Nic Crane <thisisnic@gmail.com>
Signed-off-by: Will Jones <willjones127@gmail.com>
  • Loading branch information
thisisnic authored and vibhatha committed Jan 9, 2023
1 parent c305f93 commit 52b067e
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 38 deletions.
9 changes: 7 additions & 2 deletions r/R/dplyr-join.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,13 @@ do_join <- function(x,

# For outer joins, we need to output the join keys on both sides so we
# can coalesce them afterwards.
left_output <- names(x)
right_output <- if (keep || join_type == "FULL_OUTER") {
left_output <- if (!keep && join_type == "RIGHT_OUTER") {
setdiff(names(x), by)
} else {
names(x)
}

right_output <- if (keep || join_type %in% c("FULL_OUTER", "RIGHT_OUTER")) {
names(y)
} else {
setdiff(names(y), by)
Expand Down
100 changes: 64 additions & 36 deletions r/tests/testthat/test-dplyr-join.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ left <- example_data
left$some_grouping <- rep(c(1, 2), 5)

to_join <- tibble::tibble(
some_grouping = c(1, 2),
capital_letters = c("A", "B"),
some_grouping = c(1, 2, 3),
capital_letters = c("A", "B", "C"),
another_column = TRUE
)

Expand Down Expand Up @@ -140,42 +140,64 @@ test_that("Error handling", {
# TODO: casting: int and float columns?

test_that("right_join", {
for (keep in c(TRUE, FALSE)) {
compare_dplyr_binding(
.input %>%
right_join(to_join, by = "some_grouping", keep = !!keep) %>%
collect(),
left
)
}
compare_dplyr_binding(
.input %>%
right_join(to_join, by = "some_grouping", keep = TRUE) %>%
collect(),
left
)

compare_dplyr_binding(
.input %>%
right_join(to_join, by = "some_grouping", keep = FALSE) %>%
collect(),
left
)
})

test_that("inner_join", {
for (keep in c(TRUE, FALSE)) {
compare_dplyr_binding(
.input %>%
inner_join(to_join, by = "some_grouping", keep = !!keep) %>%
collect(),
left
)
}
compare_dplyr_binding(
.input %>%
inner_join(to_join, by = "some_grouping", keep = TRUE) %>%
collect(),
left
)

compare_dplyr_binding(
.input %>%
inner_join(to_join, by = "some_grouping", keep = FALSE) %>%
collect(),
left
)
})

test_that("full_join", {
for (keep in c(TRUE, FALSE)) {
compare_dplyr_binding(
.input %>%
full_join(to_join, by = "some_grouping", keep = !!keep) %>%
collect(),
left
)
}
compare_dplyr_binding(
.input %>%
full_join(to_join, by = "some_grouping", keep = TRUE) %>%
collect(),
left
)

compare_dplyr_binding(
.input %>%
full_join(to_join, by = "some_grouping", keep = FALSE) %>%
collect(),
left
)
})

test_that("semi_join", {
compare_dplyr_binding(
.input %>%
semi_join(to_join, by = "some_grouping") %>%
semi_join(to_join, by = "some_grouping", keep = TRUE) %>%
collect(),
left
)

compare_dplyr_binding(
.input %>%
semi_join(to_join, by = "some_grouping", keep = FALSE) %>%
collect(),
left
)
Expand Down Expand Up @@ -358,13 +380,19 @@ test_that("full joins handle keep", {
z = 6:10
)

for (keep in c(TRUE, FALSE)) {
compare_dplyr_binding(
.input %>%
full_join(full_data_df, by = c("y", "x"), keep = !!keep) %>%
arrange(index) %>%
collect(),
small_dataset_df
)
}
compare_dplyr_binding(
.input %>%
full_join(full_data_df, by = c("y", "x"), keep = TRUE) %>%
arrange(index) %>%
collect(),
small_dataset_df
)

compare_dplyr_binding(
.input %>%
full_join(full_data_df, by = c("y", "x"), keep = FALSE) %>%
arrange(index) %>%
collect(),
small_dataset_df
)
})

0 comments on commit 52b067e

Please sign in to comment.