From 52b067eb3fc59faf232aea1c52d435b16d145430 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Wed, 4 Jan 2023 19:08:37 +0000 Subject: [PATCH] GH-14907: [R] right_join() function does not produce the expected outcome (#15077) * Closes: #14907 Authored-by: Nic Crane Signed-off-by: Will Jones --- r/R/dplyr-join.R | 9 ++- r/tests/testthat/test-dplyr-join.R | 100 ++++++++++++++++++----------- 2 files changed, 71 insertions(+), 38 deletions(-) diff --git a/r/R/dplyr-join.R b/r/R/dplyr-join.R index 5e2b6084b21e0..fad44b5ef2751 100644 --- a/r/R/dplyr-join.R +++ b/r/R/dplyr-join.R @@ -35,8 +35,13 @@ do_join <- function(x, # For outer joins, we need to output the join keys on both sides so we # can coalesce them afterwards. - left_output <- names(x) - right_output <- if (keep || join_type == "FULL_OUTER") { + left_output <- if (!keep && join_type == "RIGHT_OUTER") { + setdiff(names(x), by) + } else { + names(x) + } + + right_output <- if (keep || join_type %in% c("FULL_OUTER", "RIGHT_OUTER")) { names(y) } else { setdiff(names(y), by) diff --git a/r/tests/testthat/test-dplyr-join.R b/r/tests/testthat/test-dplyr-join.R index cae741ae27652..5c6798aeebc18 100644 --- a/r/tests/testthat/test-dplyr-join.R +++ b/r/tests/testthat/test-dplyr-join.R @@ -21,8 +21,8 @@ left <- example_data left$some_grouping <- rep(c(1, 2), 5) to_join <- tibble::tibble( - some_grouping = c(1, 2), - capital_letters = c("A", "B"), + some_grouping = c(1, 2, 3), + capital_letters = c("A", "B", "C"), another_column = TRUE ) @@ -140,42 +140,64 @@ test_that("Error handling", { # TODO: casting: int and float columns? test_that("right_join", { - for (keep in c(TRUE, FALSE)) { - compare_dplyr_binding( - .input %>% - right_join(to_join, by = "some_grouping", keep = !!keep) %>% - collect(), - left - ) - } + compare_dplyr_binding( + .input %>% + right_join(to_join, by = "some_grouping", keep = TRUE) %>% + collect(), + left + ) + + compare_dplyr_binding( + .input %>% + right_join(to_join, by = "some_grouping", keep = FALSE) %>% + collect(), + left + ) }) test_that("inner_join", { - for (keep in c(TRUE, FALSE)) { - compare_dplyr_binding( - .input %>% - inner_join(to_join, by = "some_grouping", keep = !!keep) %>% - collect(), - left - ) - } + compare_dplyr_binding( + .input %>% + inner_join(to_join, by = "some_grouping", keep = TRUE) %>% + collect(), + left + ) + + compare_dplyr_binding( + .input %>% + inner_join(to_join, by = "some_grouping", keep = FALSE) %>% + collect(), + left + ) }) test_that("full_join", { - for (keep in c(TRUE, FALSE)) { - compare_dplyr_binding( - .input %>% - full_join(to_join, by = "some_grouping", keep = !!keep) %>% - collect(), - left - ) - } + compare_dplyr_binding( + .input %>% + full_join(to_join, by = "some_grouping", keep = TRUE) %>% + collect(), + left + ) + + compare_dplyr_binding( + .input %>% + full_join(to_join, by = "some_grouping", keep = FALSE) %>% + collect(), + left + ) }) test_that("semi_join", { compare_dplyr_binding( .input %>% - semi_join(to_join, by = "some_grouping") %>% + semi_join(to_join, by = "some_grouping", keep = TRUE) %>% + collect(), + left + ) + + compare_dplyr_binding( + .input %>% + semi_join(to_join, by = "some_grouping", keep = FALSE) %>% collect(), left ) @@ -358,13 +380,19 @@ test_that("full joins handle keep", { z = 6:10 ) - for (keep in c(TRUE, FALSE)) { - compare_dplyr_binding( - .input %>% - full_join(full_data_df, by = c("y", "x"), keep = !!keep) %>% - arrange(index) %>% - collect(), - small_dataset_df - ) - } + compare_dplyr_binding( + .input %>% + full_join(full_data_df, by = c("y", "x"), keep = TRUE) %>% + arrange(index) %>% + collect(), + small_dataset_df + ) + + compare_dplyr_binding( + .input %>% + full_join(full_data_df, by = c("y", "x"), keep = FALSE) %>% + arrange(index) %>% + collect(), + small_dataset_df + ) })