diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index 75f7b938cb29..c9344ceebab7 100755 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -63,4 +63,4 @@ Imports: utils SystemRequirements: ~~CXXSTD~~ -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 diff --git a/R-package/R/lgb.Dataset.R b/R-package/R/lgb.Dataset.R index ed1216c7986a..45dd01bd5045 100644 --- a/R-package/R/lgb.Dataset.R +++ b/R-package/R/lgb.Dataset.R @@ -758,8 +758,13 @@ Dataset <- R6::R6Class( ) #' @title Construct \code{lgb.Dataset} object -#' @description Construct \code{lgb.Dataset} object from dense matrix, sparse matrix -#' or local file (that was created previously by saving an \code{lgb.Dataset}). +#' @description LightGBM does not train on raw data. +#' It discretizes continuous features into histogram bins, tries to +#' combine categorical features, and automatically handles missing and +# infinite values. +#' +#' The \code{Dataset} class handles that preprocessing, and holds that +#' alternative representation of the input data. #' @inheritParams lgb_shared_dataset_params #' @param data a \code{matrix} object, a \code{dgCMatrix} object, #' a character representing a path to a text file (CSV, TSV, or LibSVM), diff --git a/R-package/man/lgb.Dataset.Rd b/R-package/man/lgb.Dataset.Rd index 2605657b060a..16b241d4a599 100644 --- a/R-package/man/lgb.Dataset.Rd +++ b/R-package/man/lgb.Dataset.Rd @@ -60,8 +60,12 @@ second group, etc.} constructed dataset } \description{ -Construct \code{lgb.Dataset} object from dense matrix, sparse matrix - or local file (that was created previously by saving an \code{lgb.Dataset}). +LightGBM does not train on raw data. + It discretizes continuous features into histogram bins, tries to + combine categorical features, and automatically handles missing and + + The \code{Dataset} class handles that preprocessing, and holds that + alternative representation of the input data. } \examples{ \donttest{ diff --git a/docs/env.yml b/docs/env.yml index a532316ef85d..5c9c3dd73853 100644 --- a/docs/env.yml +++ b/docs/env.yml @@ -12,7 +12,7 @@ dependencies: - r-markdown=1.12 - r-matrix=1.6_4 - r-pkgdown=2.0.7 - - r-roxygen2=7.3.1 + - r-roxygen2=7.3.2 - scikit-learn>=1.4.0 - sphinx>=6.0 - sphinx_rtd_theme>=2.0 diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index 194d9ca6c5b0..af4d757f480b 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -1745,7 +1745,15 @@ def current_iteration(self) -> int: class Dataset: - """Dataset in LightGBM.""" + """ + Dataset in LightGBM. + + LightGBM does not train on raw data. + It discretizes continuous features into histogram bins, tries to combine categorical features, + and automatically handles missing and infinite values. + + This class handles that preprocessing, and holds that alternative representation of the input data. + """ def __init__( self,