Skip to content

Commit

Permalink
Merge pull request #190 from tidymodels/num_comp-keep_original_cols
Browse files Browse the repository at this point in the history
  • Loading branch information
EmilHvitfeldt authored Jun 20, 2023
2 parents 2d810e3 + df76b33 commit c6ec1e1
Show file tree
Hide file tree
Showing 9 changed files with 58 additions and 18 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# embed (development version)

* `step_pca_sparse()`, `step_pca_truncated()` and `step_pca_sparse_bayes()` now returns data unaltered if `num_comp = 0`. This is done to be consistent with recipes steps of the same nature.

# embed 1.1.1

## Bug Fixes
Expand Down
6 changes: 2 additions & 4 deletions R/pca_sparse.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#' zero coefficients.
#'
#' @inheritParams step_lencode_bayes
#' @inheritParams recipes::step_pca
#' @inherit step_lencode_bayes return
#' @param ... One or more selector functions to choose which variables will be
#' used to compute the components. See [selections()] for more details. For
Expand All @@ -13,9 +14,6 @@
#' they be assigned? By default, the function assumes that the new principal
#' component columns created by the original variables will be used as
#' predictors in a model.
#' @param num_comp The number of PCA components to retain as new predictors. If
#' `num_comp` is greater than the number of columns or the number of possible
#' components, a smaller value will be used.
#' @param predictor_prop The maximum number of original predictors that can have
#' non-zero coefficients for each PCA component (via regularization).
#' @param keep_original_cols A logical to keep the original variables in the
Expand Down Expand Up @@ -140,7 +138,7 @@ step_pca_sparse_new <-
prep.step_pca_sparse <- function(x, training, info = NULL, ...) {
col_names <- recipes_eval_select(x$terms, training, info)

if (length(col_names) > 0) {
if (length(col_names) > 0 && x$num_comp > 0) {
check_type(training[, col_names], types = c("double", "integer"))

p <- length(col_names)
Expand Down
9 changes: 3 additions & 6 deletions R/pca_sparse_bayes.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#' some zero coefficients.
#'
#' @inheritParams step_lencode_bayes
#' @inheritParams recipes::step_pca
#' @inherit step_lencode_bayes return
#' @param ... One or more selector functions to choose which variables will be
#' used to compute the components. See [selections()] for more details. For
Expand All @@ -13,10 +14,6 @@
#' they be assigned? By default, the function assumes that the new principal
#' component columns created by the original variables will be used as
#' predictors in a model.
#' @param num_comp The number of PCA components to retain as new predictors. If
#' `num_comp` is greater than the number of columns or the number of possible
#' components, a smaller value will be used. A value of zero indicates that
#' PCA will _not_ be used on the data.
#' @param prior_slab_dispersion This value is proportional to the dispersion (or
#' scale) parameter for the slab portion of the prior. Smaller values result
#' in an increase in zero coefficients.
Expand Down Expand Up @@ -193,12 +190,12 @@ prep.step_pca_sparse_bayes <- function(x, training, info = NULL, ...) {
)
res <- rlang::eval_tidy(cl)
rotation <- svd(res$loadings)$u
rownames(rotation) <- col_names
colnames(rotation) <- names0(x$num_comp, prefix = x$prefix)
} else {
# fake a rotation matrix so that the resolved names can be used for tidy()
rotation <- matrix(NA, nrow = length(col_names), ncol = p)
}
rownames(rotation) <- col_names
colnames(rotation) <- names0(x$num_comp, prefix = x$prefix)
} else {
rotation <- NA
}
Expand Down
3 changes: 2 additions & 1 deletion R/pca_truncated.R
Original file line number Diff line number Diff line change
Expand Up @@ -148,12 +148,13 @@ prep.step_pca_truncated <- function(x, training, info = NULL, ...) {
} else {
prc_obj <- recipes::pca_wts(training[, col_names, drop = FALSE], wts = wts)
}
rownames(prc_obj$rotation) <- col_names
} else {
prc_obj <- NULL
prc_obj$rotation <- matrix(nrow = 0, ncol = 0)
}

rownames(prc_obj$rotation) <- col_names


if (is.null(prc_obj$center)) {
prc_obj$center <- FALSE
Expand Down
8 changes: 5 additions & 3 deletions man/step_pca_sparse.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 5 additions & 4 deletions man/step_pca_sparse_bayes.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 13 additions & 0 deletions tests/testthat/test-pca_sparse.R
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,19 @@ test_that("tunable", {
)
})

test_that("Do nothing for num_comps = 0 and keep_original_cols = FALSE", {
# https://github.com/tidymodels/recipes/issues/1152

rec <- recipe(carb ~ ., data = mtcars) %>%
step_pca_sparse(all_predictors(),
num_comp = 0, keep_original_cols = FALSE) %>%
prep()

res <- bake(rec, new_data = NULL)

expect_identical(res, tibble::as_tibble(mtcars))
})

# Infrastructure ---------------------------------------------------------------

test_that("bake method errors when needed non-standard role columns are missing", {
Expand Down
13 changes: 13 additions & 0 deletions tests/testthat/test-pca_sparse_bayes.R
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,19 @@ test_that("tunable", {
)
})

test_that("Do nothing for num_comps = 0 and keep_original_cols = FALSE", {
# https://github.com/tidymodels/recipes/issues/1152

rec <- recipe(carb ~ ., data = mtcars) %>%
step_pca_sparse_bayes(all_predictors(),
num_comp = 0, keep_original_cols = FALSE) %>%
prep()

res <- bake(rec, new_data = NULL)

expect_identical(res, tibble::as_tibble(mtcars))
})

# Infrastructure ---------------------------------------------------------------

test_that("bake method errors when needed non-standard role columns are missing", {
Expand Down
13 changes: 13 additions & 0 deletions tests/testthat/test-pca_truncated.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,19 @@ test_that("check_name() is used", {
)
})

test_that("Do nothing for num_comps = 0 and keep_original_cols = FALSE", {
# https://github.com/tidymodels/recipes/issues/1152

rec <- recipe(carb ~ ., data = mtcars) %>%
step_pca_truncated(all_predictors(),
num_comp = 0, keep_original_cols = FALSE) %>%
prep()

res <- bake(rec, new_data = NULL)

expect_identical(res, tibble::as_tibble(mtcars))
})

# Infrastructure ---------------------------------------------------------------

test_that("bake method errors when needed non-standard role columns are missing", {
Expand Down

0 comments on commit c6ec1e1

Please sign in to comment.