From 85c98ff2e9fb26078988c6854909a2e08c54c6fe Mon Sep 17 00:00:00 2001 From: Stefan Bundfuss Date: Fri, 13 Oct 2023 09:09:26 +0000 Subject: [PATCH] #2126 unify_joined: update derive_vars_joined() --- R/derive_joined.R | 99 +++++++++++++++++++---------- tests/testthat/test-derive_joined.R | 10 +++ 2 files changed, 74 insertions(+), 35 deletions(-) diff --git a/R/derive_joined.R b/R/derive_joined.R index 586f256360..7a8c365808 100644 --- a/R/derive_joined.R +++ b/R/derive_joined.R @@ -325,10 +325,17 @@ derive_vars_joined <- function(dataset, by_vars = NULL, order = NULL, new_vars = NULL, + tmp_obs_nr_var = NULL, join_vars = NULL, + join_type, filter_add = NULL, + first_cond_lower = NULL, + first_cond_upper = NULL, filter_join = NULL, mode = NULL, + exist_flag = NULL, + true_value = "Y", + false_value = NA_character_, missing_values = NULL, check_type = "warning") { assert_vars(by_vars, optional = TRUE) @@ -346,8 +353,12 @@ derive_vars_joined <- function(dataset, ) ) + tmp_obs_nr_var <- assert_symbol(enexpr(tmp_obs_nr_var), optional = TRUE) filter_add <- assert_filter_cond(enexpr(filter_add), optional = TRUE) + first_cond_lower <- assert_filter_cond(enexpr(first_cond_lower), optional = TRUE) + first_cond_upper <- assert_filter_cond(enexpr(first_cond_upper), optional = TRUE) filter_join <- assert_filter_cond(enexpr(filter_join), optional = TRUE) + exist_flag <- assert_symbol(enexpr(exist_flag), optional = TRUE) if (is.null(new_vars)) { new_vars <- chr2vars(colnames(dataset_add)) @@ -376,44 +387,62 @@ derive_vars_joined <- function(dataset, check_type = "none" ) - # prepare right side of the join, - # by_vars are renamed here, new_vars will be renamed at the end - data_right <- dataset_add %>% - mutate(!!!order, !!!join_vars) %>% - filter_if(filter_add) %>% - select( - !!!by_vars, - !!!replace_values_by_names(extract_vars(order)), - !!!replace_values_by_names(join_vars), - !!!intersect(unname(extract_vars(new_vars)), chr2vars(colnames(dataset_add))) - ) - - # join dataset (if no by variable, a full join is performed) - data_joined <- left_join( + data_return <- filter_joined( data, - data_right, - by = vars2chr(by_vars_left), - suffix = c("", ".join") + dataset_add = dataset_add, + by_vars = by_vars, + join_vars = expr_c( + join_vars, + intersect(unname(extract_vars(new_vars)), chr2vars(colnames(dataset_add))) + ), + join_type = join_type, + first_cond_lower = !!first_cond_lower, + first_cond_upper = !!first_cond_upper, + order = order, + tmp_obs_nr_var = !!tmp_obs_nr_var, + filter_add = !!filter_add, + filter_join = !!filter_join, + check_type = check_type ) - # select observations for the new variables - data_return <- filter_if(data_joined, filter_join) - - common_vars <- - chr2vars(setdiff(intersect(colnames(data), colnames(data_right)), vars2chr(by_vars))) - if (!is.null(order)) { - data_return <- filter_extreme( - data_return, - by_vars = expr_c(by_vars_left, tmp_obs_nr), - order = add_suffix_to_vars( - replace_values_by_names(order), - vars = common_vars, - suffix = ".join" - ), - mode = mode, - check_type = check_type - ) - } + # # prepare right side of the join, + # # by_vars are renamed here, new_vars will be renamed at the end + # data_right <- dataset_add %>% + # mutate(!!!order, !!!join_vars) %>% + # filter_if(filter_add) %>% + # select( + # !!!by_vars, + # !!!replace_values_by_names(extract_vars(order)), + # !!!replace_values_by_names(join_vars), + # !!!intersect(unname(extract_vars(new_vars)), chr2vars(colnames(dataset_add))) + # ) + # + # # join dataset (if no by variable, a full join is performed) + # data_joined <- left_join( + # data, + # data_right, + # by = vars2chr(by_vars_left), + # suffix = c("", ".join") + # ) + # + # # select observations for the new variables + # data_return <- filter_if(data_joined, filter_join) + # + # common_vars <- + # chr2vars(setdiff(intersect(colnames(data), colnames(data_right)), vars2chr(by_vars))) + # if (!is.null(order)) { + # data_return <- filter_extreme( + # data_return, + # by_vars = expr_c(by_vars_left, tmp_obs_nr), + # order = add_suffix_to_vars( + # replace_values_by_names(order), + # vars = common_vars, + # suffix = ".join" + # ), + # mode = mode, + # check_type = check_type + # ) + # } # merge new variables to the input dataset and rename them data %>% diff --git a/tests/testthat/test-derive_joined.R b/tests/testthat/test-derive_joined.R index eeb1afcc23..6875dd183e 100644 --- a/tests/testthat/test-derive_joined.R +++ b/tests/testthat/test-derive_joined.R @@ -24,6 +24,7 @@ test_that("derive_vars_joined Test 1: no by_vars, no order, no new_vars", { select(expected, USUBJID, ADY), dataset_add = windows, join_vars = exprs(AWHI, AWLO), + join_type = "all", filter_join = AWLO <= ADY & ADY <= AWHI ), keys = c("USUBJID", "ADY") @@ -54,6 +55,7 @@ test_that("derive_vars_joined Test 2: new_vars with rename", { order = exprs(AVAL), new_vars = exprs(NADIR = AVAL), join_vars = exprs(ADY), + join_type = "all", filter_add = ADY > 0, filter_join = ADY.join < ADY, mode = "first", @@ -91,6 +93,7 @@ test_that("derive_vars_joined Test 3: by_vars with rename", { order = exprs(FADT), new_vars = exprs(ATOXGR_pre = FAORRES), join_vars = exprs(FADT), + join_type = "all", filter_join = FADT < TRTSDTM, mode = "last" ), @@ -125,6 +128,7 @@ test_that("derive_vars_joined Test 4: order with expression", { order = exprs(FADT = convert_dtc_to_dt(FADTC)), new_vars = exprs(ATOXGR_pre = FAORRES), join_vars = exprs(FADT), + join_type = "all", filter_join = FADT < TRTSDTM, mode = "last" ), @@ -166,6 +170,7 @@ test_that("derive_vars_joined Test 5: join_vars with expression", { order = exprs(TRSTRESN), new_vars = exprs(AVAL = TRSTRESN), join_vars = exprs(TRDT = convert_dtc_to_dt(TRDTC)), + join_type = "all", filter_join = TRDT <= ADT, mode = "first", check_type = "none" @@ -201,6 +206,7 @@ test_that("derive_vars_joined Test 6: no join_vars, no filter_join", { dataset_add = faae, by_vars = exprs(AEGRPID = FAGRPID), order = exprs(FAORRES), + join_type = "all", new_vars = exprs(ATOXGR_pre = FAORRES), mode = "first" ), @@ -232,6 +238,7 @@ test_that("derive_vars_joined Test 7: new_vars expressions using variables from dataset_add = ex, by_vars = exprs(USUBJID), order = exprs(EXSDT = convert_dtc_to_dt(EXSDTC)), + join_type = "all", new_vars = exprs(LSTDSDUR = compute_duration( start_date = EXSDT, end_date = ASTDT )), @@ -251,6 +258,7 @@ test_that("derive_vars_joined Test 8: error if new_vars are already in dataset", myd, dataset_add = myd, order = exprs(day), + join_type = "all", mode = "last", filter_join = day < day.join ), @@ -293,6 +301,7 @@ test_that("derive_vars_joined Test 9: fixing a bug from issue 1966", { # nolint dataset_add = adlb_tbili_pbl, by_vars = exprs(STUDYID, USUBJID), order = exprs(ADTM, ASEQ), + join_type = "all", new_vars = exprs(TBILI_ADT = ADT), filter_join = ADT <= ADT.join, mode = "first" @@ -316,6 +325,7 @@ test_that("derive_vars_joined Test 10: order vars are selected properly in funct dataset_add = myd, new_vars = exprs(first_val = val), join_vars = exprs(day), + join_type = "all", order = exprs(-day), mode = "last", filter_join = day < day.join