Skip to content

Commit

Permalink
[R-package] require lgb.Dataset, remove support for passing 'colnames…
Browse files Browse the repository at this point in the history
…' and 'categorical_feature' for lgb.train() and lgb.cv() (#6714)
  • Loading branch information
jameslamb authored Feb 13, 2025
1 parent c6d90bc commit d24260f
Show file tree
Hide file tree
Showing 13 changed files with 20 additions and 163 deletions.
2 changes: 1 addition & 1 deletion R-package/R/callback.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ CB_ENV <- R6::R6Class(

}

return(paste0(msg, collapse = " "))
return(paste(msg, collapse = " "))

}

Expand Down
2 changes: 1 addition & 1 deletion R-package/R/lgb.Dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -457,7 +457,7 @@ Dataset <- R6::R6Class(
if (!.is_null_handle(x = private$handle)) {

# Merge names with tab separation
merged_name <- paste0(as.list(private$colnames), collapse = "\t")
merged_name <- paste(as.list(private$colnames), collapse = "\t")
.Call(
LGBM_DatasetSetFeatureNames_R
, private$handle
Expand Down
2 changes: 1 addition & 1 deletion R-package/R/lgb.convert_with_rules.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
vapply(
X = df
, FUN = function(x) {
paste0(class(x), collapse = ",")
paste(class(x), collapse = ",")
}
, FUN.VALUE = character(1L)
)
Expand Down
63 changes: 2 additions & 61 deletions R-package/R/lgb.cv.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@ CVBooster <- R6::R6Class(
#' @description Cross validation logic used by LightGBM
#' @inheritParams lgb_shared_params
#' @param nfold the original dataset is randomly partitioned into \code{nfold} equal size subsamples.
#' @param label Deprecated. See "Deprecated Arguments" section below.
#' @param weight Deprecated. See "Deprecated Arguments" section below.
#' @param record Boolean, TRUE will record iteration message to \code{booster$record_evals}
#' @param showsd \code{boolean}, whether to show standard deviation of cross validation.
#' This parameter defaults to \code{TRUE}. Setting it to \code{FALSE} can lead to a
Expand All @@ -36,8 +34,6 @@ CVBooster <- R6::R6Class(
#' @param folds \code{list} provides a possibility to use a list of pre-defined CV folds
#' (each element must be a vector of test fold's indices). When folds are supplied,
#' the \code{nfold} and \code{stratified} parameters are ignored.
#' @param colnames Deprecated. See "Deprecated Arguments" section below.
#' @param categorical_feature Deprecated. See "Deprecated Arguments" section below.
#' @param callbacks List of callback functions that are applied at each iteration.
#' @param reset_data Boolean, setting it to TRUE (not the default value) will transform the booster model
#' into a predictor model which frees up memory and the original datasets
Expand Down Expand Up @@ -69,20 +65,12 @@ CVBooster <- R6::R6Class(
#' )
#' }
#'
#' @section Deprecated Arguments:
#'
#' A future release of \code{lightgbm} will require passing an \code{lgb.Dataset}
#' to argument \code{'data'}. It will also remove support for passing arguments
#' \code{'categorical_feature'}, \code{'colnames'}, \code{'label'}, and \code{'weight'}.
#'
#' @importFrom data.table data.table setorderv
#' @export
lgb.cv <- function(params = list()
, data
, nrounds = 100L
, nfold = 3L
, label = NULL
, weight = NULL
, obj = NULL
, eval = NULL
, verbose = 1L
Expand All @@ -92,8 +80,6 @@ lgb.cv <- function(params = list()
, stratified = TRUE
, folds = NULL
, init_model = NULL
, colnames = NULL
, categorical_feature = NULL
, early_stopping_rounds = NULL
, callbacks = list()
, reset_data = FALSE
Expand All @@ -104,33 +90,8 @@ lgb.cv <- function(params = list()
if (nrounds <= 0L) {
stop("nrounds should be greater than zero")
}

# If 'data' is not an lgb.Dataset, try to construct one using 'label'
if (!.is_Dataset(x = data)) {
warning(paste0(
"Passing anything other than an lgb.Dataset object to lgb.cv() is deprecated. "
, "Either pass an lgb.Dataset object, or use lightgbm()."
))
if (is.null(label)) {
stop("'label' must be provided for lgb.cv if 'data' is not an 'lgb.Dataset'")
}
data <- lgb.Dataset(data = data, label = label)
}

# raise deprecation warnings if necessary
# ref: https://github.com/microsoft/LightGBM/issues/6435
args <- names(match.call())
if ("categorical_feature" %in% args) {
.emit_dataset_kwarg_warning("categorical_feature", "lgb.cv")
}
if ("colnames" %in% args) {
.emit_dataset_kwarg_warning("colnames", "lgb.cv")
}
if ("label" %in% args) {
.emit_dataset_kwarg_warning("label", "lgb.cv")
}
if ("weight" %in% args) {
.emit_dataset_kwarg_warning("weight", "lgb.cv")
stop("lgb.cv: data must be an lgb.Dataset instance")
}

# set some parameters, resolving the way they were passed in with other parameters
Expand Down Expand Up @@ -214,37 +175,17 @@ lgb.cv <- function(params = list()
data$construct()

# Check interaction constraints
cnames <- NULL
if (!is.null(colnames)) {
cnames <- colnames
} else if (!is.null(data$get_colnames())) {
cnames <- data$get_colnames()
}
params[["interaction_constraints"]] <- .check_interaction_constraints(
interaction_constraints = interaction_constraints
, column_names = cnames
, column_names = data$get_colnames()
)

if (!is.null(weight)) {
data$set_field(field_name = "weight", data = weight)
}

# Update parameters with parsed parameters
data$update_params(params = params)

# Create the predictor set
data$.__enclos_env__$private$set_predictor(predictor = predictor)

# Write column names
if (!is.null(colnames)) {
data$set_colnames(colnames = colnames)
}

# Write categorical features
if (!is.null(categorical_feature)) {
data$set_categorical_feature(categorical_feature = categorical_feature)
}

if (!is.null(folds)) {

# Check for list of folds or for single value
Expand Down
36 changes: 1 addition & 35 deletions R-package/R/lgb.train.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@
#' @inheritParams lgb_shared_params
#' @param valids a list of \code{lgb.Dataset} objects, used for validation
#' @param record Boolean, TRUE will record iteration message to \code{booster$record_evals}
#' @param colnames Deprecated. See "Deprecated Arguments" section below.
#' @param categorical_feature Deprecated. See "Deprecated Arguments" section below.
#' @param callbacks List of callback functions that are applied at each iteration.
#' @param reset_data Boolean, setting it to TRUE (not the default value) will transform the
#' booster model into a predictor model which frees up memory and the
Expand Down Expand Up @@ -42,12 +40,6 @@
#' )
#' }
#'
#' @section Deprecated Arguments:
#'
#' A future release of \code{lightgbm} will remove support for passing arguments
#' \code{'categorical_feature'} and \code{'colnames'}. Pass those things to
#' \code{lgb.Dataset} instead.
#'
#' @export
lgb.train <- function(params = list(),
data,
Expand All @@ -59,8 +51,6 @@ lgb.train <- function(params = list(),
record = TRUE,
eval_freq = 1L,
init_model = NULL,
colnames = NULL,
categorical_feature = NULL,
early_stopping_rounds = NULL,
callbacks = list(),
reset_data = FALSE,
Expand All @@ -83,16 +73,6 @@ lgb.train <- function(params = list(),
}
}

# raise deprecation warnings if necessary
# ref: https://github.com/microsoft/LightGBM/issues/6435
args <- names(match.call())
if ("categorical_feature" %in% args) {
.emit_dataset_kwarg_warning("categorical_feature", "lgb.train")
}
if ("colnames" %in% args) {
.emit_dataset_kwarg_warning("colnames", "lgb.train")
}

# set some parameters, resolving the way they were passed in with other parameters
# in `params`.
# this ensures that the model stored with Booster$save() correctly represents
Expand Down Expand Up @@ -171,21 +151,12 @@ lgb.train <- function(params = list(),

# Construct datasets, if needed
data$update_params(params = params)
if (!is.null(categorical_feature)) {
data$set_categorical_feature(categorical_feature)
}
data$construct()

# Check interaction constraints
cnames <- NULL
if (!is.null(colnames)) {
cnames <- colnames
} else if (!is.null(data$get_colnames())) {
cnames <- data$get_colnames()
}
params[["interaction_constraints"]] <- .check_interaction_constraints(
interaction_constraints = interaction_constraints
, column_names = cnames
, column_names = data$get_colnames()
)

# Update parameters with parsed parameters
Expand All @@ -194,11 +165,6 @@ lgb.train <- function(params = list(),
# Create the predictor set
data$.__enclos_env__$private$set_predictor(predictor)

# Write column names
if (!is.null(colnames)) {
data$set_colnames(colnames)
}

valid_contain_train <- FALSE
train_data_name <- "train"
reduced_valid_sets <- list()
Expand Down
24 changes: 4 additions & 20 deletions R-package/R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

# If a parameter has multiple values, join those values together with commas.
# trimws() is necessary because format() will pad to make strings the same width
val <- paste0(
val <- paste(
trimws(
format(
x = unname(params[[i]])
Expand All @@ -46,7 +46,7 @@
if (nchar(val) <= 0L) next # Skip join

# Join key value
pair <- paste0(c(param_names[[i]], val), collapse = "=")
pair <- paste(c(param_names[[i]], val), collapse = "=")
ret <- c(ret, pair)

}
Expand All @@ -55,7 +55,7 @@
return("")
}

return(paste0(ret, collapse = " "))
return(paste(ret, collapse = " "))

}

Expand Down Expand Up @@ -115,7 +115,7 @@
# Turn indices 0-based and convert to string
for (j in seq_along(interaction_constraints)) {
interaction_constraints[[j]] <- paste0(
"[", paste0(interaction_constraints[[j]] - 1L, collapse = ","), "]"
"[", paste(interaction_constraints[[j]] - 1L, collapse = ","), "]"
)
}
return(interaction_constraints)
Expand Down Expand Up @@ -258,19 +258,3 @@
return(a == b)
}
}

# ref: https://github.com/microsoft/LightGBM/issues/6435
.emit_dataset_kwarg_warning <- function(calling_function, argname) {
msg <- sprintf(
paste0(
"Argument '%s' to %s() is deprecated and will be removed in a future release. "
, "Set '%s' with lgb.Dataset() instead. "
, "See https://github.com/microsoft/LightGBM/issues/6435."
)
, argname
, calling_function
, argname
)
warning(msg)
return(invisible(NULL))
}
20 changes: 0 additions & 20 deletions R-package/man/lgb.cv.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 0 additions & 14 deletions R-package/man/lgb.train.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion R-package/src/install.libs.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ inst_dir <- file.path(R_PACKAGE_SOURCE, "inst", fsep = "/")
, "make this faster."
))
}
cmd <- paste0(cmd, " ", paste0(args, collapse = " "))
cmd <- paste0(cmd, " ", paste(args, collapse = " "))
exit_code <- system(cmd)
}

Expand Down
Loading

0 comments on commit d24260f

Please sign in to comment.