improve documentation of tidy methods

tidymodels · Feb 24, 2024 · d64f7f6 · d64f7f6
1 parent 238a924
commit d64f7f6
Show file tree

Hide file tree

Showing 29 changed files with 294 additions and 76 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -2,14 +2,16 @@
 
 * `step_umap()` has gained `initial` and `target_weight` arguments. (#213)
 
+* Calling `?tidy.step_*()` now sends you to the documentation for `step_*()` where the outcome is documented. (#216)
+
+* Documentation for tidy methods for all steps has been improved to describe the return value more accurately. (#217)
+
 # embed 1.1.3
 
 * `step_collapse_stringdist()` will now return predictors as factors. (#204)
 
 * Fixed regression from 1.1.2 in `step_lencode_glm()` where it couldn't be used on multiple columns.
 
-* Calling `?tidy.step_*()` now sends you to the documentation for `step_*()` where the outcome is documented. (#216)
-
 # embed 1.1.2
 
 ## Improvements

diff --git a/R/collapse_cart.R b/R/collapse_cart.R
@@ -41,11 +41,16 @@
 #' find any signal in the data.
 #'
 #' # Tidying
-#'
-#' When you [`tidy()`][tidy.recipe()] this step, a tibble with columns `"terms"`
-#' (the column being modified), `"old"` (the old levels), `"new"` (the new
-#' levels), and `"id"`. If the CART model failed or could not find a good split,
-#' the requested predictor will not be in the results.
+#' 
+#' When you [`tidy()`][tidy.recipe()] this step, a tibble is retruned with
+#' columns `terms`, `old`, `new`, and `id`:
+#' 
+#' \describe{
+#'   \item{terms}{character, the selectors or variables selected}
+#'   \item{old}{character, the old levels}
+#'   \item{new}{character, the new levels}
+#'   \item{id}{character, id of this step}
+#' }
 #'
 #' @template case-weights-not-supported
 #'

diff --git a/R/collapse_stringdist.R b/R/collapse_stringdist.R
@@ -27,10 +27,16 @@
 #' @details
 #'
 #' # Tidying
-#'
-#' When you [`tidy()`][tidy.recipe()] this step, a tibble with columns `"terms"`
-#' (the column being modified), `"from"` (the old levels), `"to"` (the new
-#' levels), and `"id"`.
+#' 
+#' When you [`tidy()`][tidy.recipe()] this step, a tibble is retruned with
+#' columns `terms`, `from`, `to`, and `id`:
+#' 
+#' \describe{
+#'   \item{terms}{character, the selectors or variables selected}
+#'   \item{from}{character, the old levels}
+#'   \item{too}{character, the new levels}
+#'   \item{id}{character, id of this step}
+#' }
 #'
 #' @template case-weights-not-supported
 #'

diff --git a/R/discretize_cart.R b/R/discretize_cart.R
@@ -45,9 +45,15 @@
 #' Note that the original data will be replaced with the new bins.
 #'
 #' # Tidying
-#'
-#' When you [`tidy()`][tidy.recipe()] this step, a tibble with columns `terms`
-#' (the columns that is selected), `values` is returned.
+#' 
+#' When you [`tidy()`][tidy.recipe()] this step, a tibble is retruned with
+#' columns `terms`, `value`, and `id`:
+#' 
+#' \describe{
+#'   \item{terms}{character, the selectors or variables selected}
+#'   \item{value}{numeric, location of the splits}
+#'   \item{id}{character, id of this step}
+#' }
 #'
 #' ```{r, echo = FALSE, results="asis"}
 #' step <- "step_discretize_cart"

diff --git a/R/discretize_xgb.R b/R/discretize_xgb.R
@@ -62,9 +62,15 @@
 #' Note that the original data will be replaced with the new bins.
 #'
 #' # Tidying
-#'
-#' When you [`tidy()`][tidy.recipe()] this step, a tibble with columns `terms`
-#' (the columns that is selected), `values` is returned.
+#' 
+#' When you [`tidy()`][tidy.recipe()] this step, a tibble is retruned with
+#' columns `terms`, `value`, and `id`:
+#' 
+#' \describe{
+#'   \item{terms}{character, the selectors or variables selected}
+#'   \item{value}{numeric, location of the splits}
+#'   \item{id}{character, id of this step}
+#' }
 #'
 #' ```{r, echo = FALSE, results="asis"}
 #' step <- "step_discretize_xgb"

diff --git a/R/embed.R b/R/embed.R
@@ -102,10 +102,16 @@
 #' this step with `caret`, avoid parallel processing.
 #'
 #' # Tidying
-#'
-#' When you [`tidy()`][tidy.recipe()] this step, a tibble with columns `terms`
-#' (the selectors or variables selected), `levels` (levels in variable), and a
-#' number of columns with embedding information are returned.
+#' 
+#' When you [`tidy()`][tidy.recipe()] this step, a tibble is retruned with
+#' a number of columns with embedding information, and columns `terms`, 
+#' `levels`, and `id`:
+#' 
+#' \describe{
+#'   \item{terms}{character, the selectors or variables selected}
+#'   \item{levels}{character, levels in variable}
+#'   \item{id}{character, id of this step}
+#' }
 #'
 #' ```{r, echo = FALSE, results="asis"}
 #' step <- "step_embed"

diff --git a/R/feature_hash.R b/R/feature_hash.R
@@ -34,9 +34,14 @@
 #' [recipes::step_zv()]) is recommended for any recipe that uses hashed columns.
 #'
 #' # Tidying
-#'
-#' When you [`tidy()`][tidy.recipe()] this step, a tibble with columns `terms`
-#' (the columns that is selected)  is returned.
+#' 
+#' When you [`tidy()`][tidy.recipe()] this step, a tibble is retruned with
+#' columns `terms` and `id`:
+#' 
+#' \describe{
+#'   \item{terms}{character, the selectors or variables selected}
+#'   \item{id}{character, id of this step}
+#' }
 #'
 #' @template case-weights-not-supported
 #'

diff --git a/R/lencode_bayes.R b/R/lencode_bayes.R
@@ -61,9 +61,16 @@
 #'
 #' # Tidying
 #'
-#' When you [`tidy()`][tidy.recipe()] this step, a tibble with columns `terms`
-#' (the selectors or variables selected), `value` and `component` is returned.
-#'
+#' When you [`tidy()`][tidy.recipe()] this step, a tibble is retruned with
+#' columns `level`, `value`, `terms`, and `id`:
+#' 
+#' \describe{
+#'   \item{level}{character, the factor levels}
+#'   \item{value}{numeric, the encoding}
+#'   \item{terms}{character, the selectors or variables selected}
+#'   \item{id}{character, id of this step}
+#' }
+#' 
 #' @template case-weights-supervised
 #'
 #' @references
@@ -78,7 +85,7 @@
 #' "Hierarchical Partial Pooling for Repeated Binary Trials"
 #' \url{https://tinyurl.com/stan-pooling}
 #'
-#' "Prior Distributions for `rstanarm`` Models"
+#' "Prior Distributions for `rstanarm` Models"
 #' \url{https://tinyurl.com/stan-priors}
 #'
 #' "Estimating Generalized (Non-)Linear Models with Group-Specific Terms with

diff --git a/R/lencode_glm.R b/R/lencode_glm.R
@@ -44,9 +44,16 @@
 #'
 #' # Tidying
 #'
-#' When you [`tidy()`][tidy.recipe()] this step, a tibble with columns `terms`
-#' (the selectors or variables selected), `value` and `component` is returned.
-#'
+#' When you [`tidy()`][tidy.recipe()] this step, a tibble is retruned with
+#' columns `level`, `value`, `terms`, and `id`:
+#' 
+#' \describe{
+#'   \item{level}{character, the factor levels}
+#'   \item{value}{numeric, the encoding}
+#'   \item{terms}{character, the selectors or variables selected}
+#'   \item{id}{character, id of this step}
+#' }
+#' 
 #' @template case-weights-supervised
 #'
 #' @references

diff --git a/R/lencode_mixed.R b/R/lencode_mixed.R
@@ -57,8 +57,15 @@
 #'
 #' # Tidying
 #'
-#' When you [`tidy()`][tidy.recipe()] this step, a tibble with columns `terms`
-#' (the selectors or variables selected), `value` and `component` is returned.
+#' When you [`tidy()`][tidy.recipe()] this step, a tibble is retruned with
+#' columns `level`, `value`, `terms`, and `id`:
+#' 
+#' \describe{
+#'   \item{level}{character, the factor levels}
+#'   \item{value}{numeric, the encoding}
+#'   \item{terms}{character, the selectors or variables selected}
+#'   \item{id}{character, id of this step}
+#' }
 #'
 #' @template case-weights-supervised
 #'

diff --git a/R/pca_sparse.R b/R/pca_sparse.R
@@ -46,8 +46,15 @@
 #'
 #' # Tidying
 #'
-#' When you [`tidy()`][tidy.recipe()] this step, a tibble with columns `terms`
-#' (the selectors or variables selected), `value` and `component` is returned.
+#' When you [`tidy()`][tidy.recipe()] this step, a tibble is retruned with
+#' columns `terms`, `value`, `component`, and `id`:
+#'
+#' \describe{
+#'   \item{terms}{character, the selectors or variables selected}
+#'   \item{value}{numeric, variable loading}
+#'   \item{component}{character, principle component}
+#'   \item{id}{character, id of this step}
+#' }
 #'
 #' ```{r, echo = FALSE, results="asis"}
 #' step <- "step_pca_sparse"

diff --git a/R/pca_sparse_bayes.R b/R/pca_sparse_bayes.R
@@ -65,9 +65,16 @@
 #'
 #' # Tidying
 #'
-#' When you [`tidy()`][tidy.recipe()] this step, a tibble with columns `terms`
-#' (the selectors or variables selected), `value` and `component` is returned.
+#' When you [`tidy()`][tidy.recipe()] this step, a tibble is retruned with
+#' columns `terms`, `value`, `component`, and `id`:
 #'
+#' \describe{
+#'   \item{terms}{character, the selectors or variables selected}
+#'   \item{value}{numeric, variable loading}
+#'   \item{component}{character, principle component}
+#'   \item{id}{character, id of this step}
+#' }
+#' 
 #' ```{r, echo = FALSE, results="asis"}
 #' step <- "step_pca_sparse_bayes"
 #' result <- knitr::knit_child("man/rmd/tunable-args.Rmd")

diff --git a/R/pca_truncated.R b/R/pca_truncated.R
@@ -35,9 +35,26 @@
 #' 
 #' # Tidying
 #'
-#' When you [`tidy()`][tidy.recipe()] this step, use either `type = "coef"` for
-#' the variable loadings per component or `type = "variance"` for how much
-#' variance each component accounts for.
+#' When you [`tidy()`][tidy.recipe()] this step two things can happen depending
+#' the `type` argument. If `type = "coef"` a tibble returned with 4 columns
+#' `terms`, `value`, `component` , and `id`:
+#'
+#' \describe{
+#'   \item{terms}{character, the selectors or variables selected}
+#'   \item{value}{numeric, variable loading}
+#'   \item{component}{character, principle component}
+#'   \item{id}{character, id of this step}
+#' }
+#'
+#' If `type = "variance"` a tibble returned with 4 columns `terms`, `value`,
+#' `component` , and `id`:
+#'
+#' \describe{
+#'   \item{terms}{character, type of variance}
+#'   \item{value}{numeric, value of the variance}
+#'   \item{component}{integer, principle component}
+#'   \item{id}{character, id of this step}
+#' }
 #' 
 #' ```{r, echo = FALSE, results="asis"}
 #' step <- "step_pca_truncated"

diff --git a/R/umap.R b/R/umap.R
@@ -57,8 +57,13 @@
 #'
 #' # Tidying
 #'
-#' When you [`tidy()`][tidy.recipe()] this step, a tibble with columns `terms`
-#' (the selectors or variables selected) is returned.
+#' When you [`tidy()`][tidy.recipe()] this step, a tibble is retruned with
+#'  columns `terms` and `id`:
+#' 
+#' \describe{
+#'   \item{terms}{character, the selectors or variables selected}
+#'   \item{id}{character, id of this step}
+#' }
 #' 
 #' ```{r, echo = FALSE, results="asis"}
 #' step <- "step_umap"

diff --git a/R/woe.R b/R/woe.R
@@ -16,7 +16,7 @@
 #'   predictors in a model.
 #' @param outcome The bare name of the binary outcome encased in `vars()`.
 #' @param dictionary A tbl. A map of levels and woe values. It must have the
-#'   same layout than the output returned from [dictionary()]. If `NULL`` the
+#'   same layout than the output returned from [dictionary()]. If `NULL` the
 #'   function will build a dictionary with those variables passed to \code{...}.
 #'   See [dictionary()] for details.
 #' @param Laplace The Laplace smoothing parameter. A value usually applied to
@@ -67,6 +67,23 @@
 #' `p_bad`, `p_good`, `woe` and `outcome` is returned.. See [dictionary()] for
 #' more information.
 #' 
+#' When you [`tidy()`][tidy.recipe()] this step, a tibble is retruned with
+#' columns `terms` `value`, `n_tot`, `n_bad`, `n_good`, `p_bad`, `p_good`, `woe`
+#' and `outcome` and `id`:
+#' 
+#' \describe{
+#'   \item{terms}{character, the selectors or variables selected}
+#'   \item{value}{character, level of the outcome}
+#'   \item{n_tot}{integer, total number}
+#'   \item{n_bad}{integer, number of bad examples}
+#'   \item{n_good}{integer, number of good examples}
+#'   \item{p_bad}{numeric, p of bad examples}
+#'   \item{p_good}{numeric, p of good examples}
+#'   \item{woe}{numeric, weight of evidence}
+#'   \item{outcome}{character, name of outcome variable}
+#'   \item{id}{character, id of this step}
+#' }
+#' 
 #' ```{r, echo = FALSE, results="asis"}
 #' step <- "step_woe"
 #' result <- knitr::knit_child("man/rmd/tunable-args.Rmd")

diff --git a/man/step_collapse_cart.Rd b/man/step_collapse_cart.Rd
diff --git a/man/step_collapse_stringdist.Rd b/man/step_collapse_stringdist.Rd
diff --git a/man/step_discretize_cart.Rd b/man/step_discretize_cart.Rd
diff --git a/man/step_discretize_xgb.Rd b/man/step_discretize_xgb.Rd