Skip to content

Commit

Permalink
#2126 unify_joined: update derive_vars_joined()
Browse files Browse the repository at this point in the history
  • Loading branch information
bundfussr committed Oct 13, 2023
1 parent b30339e commit 85c98ff
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 35 deletions.
99 changes: 64 additions & 35 deletions R/derive_joined.R
Original file line number Diff line number Diff line change
Expand Up @@ -325,10 +325,17 @@ derive_vars_joined <- function(dataset,
by_vars = NULL,
order = NULL,
new_vars = NULL,
tmp_obs_nr_var = NULL,
join_vars = NULL,
join_type,
filter_add = NULL,
first_cond_lower = NULL,
first_cond_upper = NULL,
filter_join = NULL,
mode = NULL,
exist_flag = NULL,
true_value = "Y",
false_value = NA_character_,
missing_values = NULL,
check_type = "warning") {
assert_vars(by_vars, optional = TRUE)
Expand All @@ -346,8 +353,12 @@ derive_vars_joined <- function(dataset,
)
)

tmp_obs_nr_var <- assert_symbol(enexpr(tmp_obs_nr_var), optional = TRUE)
filter_add <- assert_filter_cond(enexpr(filter_add), optional = TRUE)
first_cond_lower <- assert_filter_cond(enexpr(first_cond_lower), optional = TRUE)
first_cond_upper <- assert_filter_cond(enexpr(first_cond_upper), optional = TRUE)
filter_join <- assert_filter_cond(enexpr(filter_join), optional = TRUE)
exist_flag <- assert_symbol(enexpr(exist_flag), optional = TRUE)

if (is.null(new_vars)) {
new_vars <- chr2vars(colnames(dataset_add))
Expand Down Expand Up @@ -376,44 +387,62 @@ derive_vars_joined <- function(dataset,
check_type = "none"
)

# prepare right side of the join,
# by_vars are renamed here, new_vars will be renamed at the end
data_right <- dataset_add %>%
mutate(!!!order, !!!join_vars) %>%
filter_if(filter_add) %>%
select(
!!!by_vars,
!!!replace_values_by_names(extract_vars(order)),
!!!replace_values_by_names(join_vars),
!!!intersect(unname(extract_vars(new_vars)), chr2vars(colnames(dataset_add)))
)

# join dataset (if no by variable, a full join is performed)
data_joined <- left_join(
data_return <- filter_joined(
data,
data_right,
by = vars2chr(by_vars_left),
suffix = c("", ".join")
dataset_add = dataset_add,
by_vars = by_vars,
join_vars = expr_c(
join_vars,
intersect(unname(extract_vars(new_vars)), chr2vars(colnames(dataset_add)))
),
join_type = join_type,
first_cond_lower = !!first_cond_lower,
first_cond_upper = !!first_cond_upper,
order = order,
tmp_obs_nr_var = !!tmp_obs_nr_var,
filter_add = !!filter_add,
filter_join = !!filter_join,
check_type = check_type
)

# select observations for the new variables
data_return <- filter_if(data_joined, filter_join)

common_vars <-
chr2vars(setdiff(intersect(colnames(data), colnames(data_right)), vars2chr(by_vars)))
if (!is.null(order)) {
data_return <- filter_extreme(
data_return,
by_vars = expr_c(by_vars_left, tmp_obs_nr),
order = add_suffix_to_vars(
replace_values_by_names(order),
vars = common_vars,
suffix = ".join"
),
mode = mode,
check_type = check_type
)
}
# # prepare right side of the join,
# # by_vars are renamed here, new_vars will be renamed at the end
# data_right <- dataset_add %>%
# mutate(!!!order, !!!join_vars) %>%
# filter_if(filter_add) %>%
# select(
# !!!by_vars,
# !!!replace_values_by_names(extract_vars(order)),
# !!!replace_values_by_names(join_vars),
# !!!intersect(unname(extract_vars(new_vars)), chr2vars(colnames(dataset_add)))
# )
#
# # join dataset (if no by variable, a full join is performed)
# data_joined <- left_join(
# data,
# data_right,
# by = vars2chr(by_vars_left),
# suffix = c("", ".join")
# )
#
# # select observations for the new variables
# data_return <- filter_if(data_joined, filter_join)
#
# common_vars <-
# chr2vars(setdiff(intersect(colnames(data), colnames(data_right)), vars2chr(by_vars)))
# if (!is.null(order)) {
# data_return <- filter_extreme(
# data_return,
# by_vars = expr_c(by_vars_left, tmp_obs_nr),
# order = add_suffix_to_vars(
# replace_values_by_names(order),
# vars = common_vars,
# suffix = ".join"
# ),
# mode = mode,
# check_type = check_type
# )
# }

# merge new variables to the input dataset and rename them
data %>%
Expand Down
10 changes: 10 additions & 0 deletions tests/testthat/test-derive_joined.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ test_that("derive_vars_joined Test 1: no by_vars, no order, no new_vars", {
select(expected, USUBJID, ADY),
dataset_add = windows,
join_vars = exprs(AWHI, AWLO),
join_type = "all",
filter_join = AWLO <= ADY & ADY <= AWHI
),
keys = c("USUBJID", "ADY")
Expand Down Expand Up @@ -54,6 +55,7 @@ test_that("derive_vars_joined Test 2: new_vars with rename", {
order = exprs(AVAL),
new_vars = exprs(NADIR = AVAL),
join_vars = exprs(ADY),
join_type = "all",
filter_add = ADY > 0,
filter_join = ADY.join < ADY,
mode = "first",
Expand Down Expand Up @@ -91,6 +93,7 @@ test_that("derive_vars_joined Test 3: by_vars with rename", {
order = exprs(FADT),
new_vars = exprs(ATOXGR_pre = FAORRES),
join_vars = exprs(FADT),
join_type = "all",
filter_join = FADT < TRTSDTM,
mode = "last"
),
Expand Down Expand Up @@ -125,6 +128,7 @@ test_that("derive_vars_joined Test 4: order with expression", {
order = exprs(FADT = convert_dtc_to_dt(FADTC)),
new_vars = exprs(ATOXGR_pre = FAORRES),
join_vars = exprs(FADT),
join_type = "all",
filter_join = FADT < TRTSDTM,
mode = "last"
),
Expand Down Expand Up @@ -166,6 +170,7 @@ test_that("derive_vars_joined Test 5: join_vars with expression", {
order = exprs(TRSTRESN),
new_vars = exprs(AVAL = TRSTRESN),
join_vars = exprs(TRDT = convert_dtc_to_dt(TRDTC)),
join_type = "all",
filter_join = TRDT <= ADT,
mode = "first",
check_type = "none"
Expand Down Expand Up @@ -201,6 +206,7 @@ test_that("derive_vars_joined Test 6: no join_vars, no filter_join", {
dataset_add = faae,
by_vars = exprs(AEGRPID = FAGRPID),
order = exprs(FAORRES),
join_type = "all",
new_vars = exprs(ATOXGR_pre = FAORRES),
mode = "first"
),
Expand Down Expand Up @@ -232,6 +238,7 @@ test_that("derive_vars_joined Test 7: new_vars expressions using variables from
dataset_add = ex,
by_vars = exprs(USUBJID),
order = exprs(EXSDT = convert_dtc_to_dt(EXSDTC)),
join_type = "all",
new_vars = exprs(LSTDSDUR = compute_duration(
start_date = EXSDT, end_date = ASTDT
)),
Expand All @@ -251,6 +258,7 @@ test_that("derive_vars_joined Test 8: error if new_vars are already in dataset",
myd,
dataset_add = myd,
order = exprs(day),
join_type = "all",
mode = "last",
filter_join = day < day.join
),
Expand Down Expand Up @@ -293,6 +301,7 @@ test_that("derive_vars_joined Test 9: fixing a bug from issue 1966", { # nolint
dataset_add = adlb_tbili_pbl,
by_vars = exprs(STUDYID, USUBJID),
order = exprs(ADTM, ASEQ),
join_type = "all",
new_vars = exprs(TBILI_ADT = ADT),
filter_join = ADT <= ADT.join,
mode = "first"
Expand All @@ -316,6 +325,7 @@ test_that("derive_vars_joined Test 10: order vars are selected properly in funct
dataset_add = myd,
new_vars = exprs(first_val = val),
join_vars = exprs(day),
join_type = "all",
order = exprs(-day),
mode = "last",
filter_join = day < day.join
Expand Down

0 comments on commit 85c98ff

Please sign in to comment.