Merge branch 'master' into dev/cpu/memory_realloc_optimisation

razdoburdin · Dec 17, 2024 · 335ccdf · 335ccdf
2 parents ea340d9 + f06dcf8
commit 335ccdf
Show file tree

Hide file tree

Showing 103 changed files with 1,573 additions and 845 deletions.
diff --git a/.github/workflows/r_tests.yml b/.github/workflows/r_tests.yml
@@ -34,7 +34,7 @@ jobs:
       - name: Install system dependencies
         run: |
           sudo apt update
-          sudo apt install libcurl4-openssl-dev libssl-dev libssh2-1-dev libgit2-dev libglpk-dev libxml2-dev libharfbuzz-dev libfribidi-dev
+          sudo apt install libcurl4-openssl-dev libssl-dev libssh2-1-dev libgit2-dev libglpk-dev libxml2-dev libharfbuzz-dev libfribidi-dev librsvg2-dev librsvg2-2
         if: matrix.os == 'ubuntu-latest'
       - uses: actions/checkout@v4
         with:
@@ -76,7 +76,7 @@ jobs:
         run: |
           # Must run before checkout to have the latest git installed.
           # No need to add pandoc, the container has it figured out.
-          apt update && apt install libcurl4-openssl-dev libssl-dev libssh2-1-dev libgit2-dev libglpk-dev libxml2-dev libharfbuzz-dev libfribidi-dev git -y
+          apt update && apt install libcurl4-openssl-dev libssl-dev libssh2-1-dev libgit2-dev libglpk-dev libxml2-dev libharfbuzz-dev libfribidi-dev git librsvg2-dev librsvg2-2 -y
       - name: Trust git cloning project sources
         run: |
           git config --global --add safe.directory "${GITHUB_WORKSPACE}"

diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION
@@ -51,6 +51,9 @@ Suggests:
     rmarkdown,
     ggplot2 (>= 1.0.1),
     DiagrammeR (>= 0.9.0),
+    DiagrammeRsvg,
+    rsvg,
+    htmlwidgets,
     Ckmeans.1d.dp (>= 3.3.1),
     vcd (>= 1.3),
     testthat,

diff --git a/R-package/R/callbacks.R b/R-package/R/callbacks.R
@@ -960,17 +960,17 @@ xgb.cb.cv.predict <- function(save_models = FALSE, outputmargin = FALSE) {
 #'   booster = "gblinear",
 #'   objective = "reg:logistic",
 #'   eval_metric = "auc",
-#'   lambda = 0.0003,
-#'   alpha = 0.0003,
+#'   reg_lambda = 0.0003,
+#'   reg_alpha = 0.0003,
 #'   nthread = nthread
 #' )
 #'
-#' # For 'shotgun', which is a default linear updater, using high eta values may result in
+#' # For 'shotgun', which is a default linear updater, using high learning_rate values may result in
 #' # unstable behaviour in some datasets. With this simple dataset, however, the high learning
 #' # rate does not break the convergence, but allows us to illustrate the typical pattern of
 #' # "stochastic explosion" behaviour of this lock-free algorithm at early boosting iterations.
 #' bst <- xgb.train(
-#'   c(param, list(eta = 1.)),
+#'   c(param, list(learning_rate = 1.)),
 #'   dtrain,
 #'   evals = list(tr = dtrain),
 #'   nrounds = 200,
@@ -987,7 +987,7 @@ xgb.cb.cv.predict <- function(save_models = FALSE, outputmargin = FALSE) {
 #'   c(
 #'     param,
 #'     xgb.params(
-#'       eta = 0.8,
+#'       learning_rate = 0.8,
 #'       updater = "coord_descent",
 #'       feature_selector = "thrifty",
 #'       top_k = 1
@@ -1000,12 +1000,20 @@ xgb.cb.cv.predict <- function(save_models = FALSE, outputmargin = FALSE) {
 #' )
 #' matplot(xgb.gblinear.history(bst), type = "l")
 #' #  Componentwise boosting is known to have similar effect to Lasso regularization.
-#' # Try experimenting with various values of top_k, eta, nrounds,
+#' # Try experimenting with various values of top_k, learning_rate, nrounds,
 #' # as well as different feature_selectors.
 #'
 #' # For xgb.cv:
 #' bst <- xgb.cv(
-#'   c(param, list(eta = 0.8)),
+#'   c(
+#'     param,
+#'     xgb.params(
+#'       learning_rate = 0.8,
+#'       updater = "coord_descent",
+#'       feature_selector = "thrifty",
+#'       top_k = 1
+#'     )
+#'   ),
 #'   dtrain,
 #'   nfold = 5,
 #'   nrounds = 100,
@@ -1022,15 +1030,15 @@ xgb.cb.cv.predict <- function(save_models = FALSE, outputmargin = FALSE) {
 #'   booster = "gblinear",
 #'   objective = "multi:softprob",
 #'   num_class = 3,
-#'   lambda = 0.0003,
-#'   alpha = 0.0003,
+#'   reg_lambda = 0.0003,
+#'   reg_alpha = 0.0003,
 #'   nthread = nthread
 #' )
 #'
 #' # For the default linear updater 'shotgun' it sometimes is helpful
-#' # to use smaller eta to reduce instability
+#' # to use smaller learning_rate to reduce instability
 #' bst <- xgb.train(
-#'   c(param, list(eta = 0.5)),
+#'   c(param, list(learning_rate = 0.5)),
 #'   dtrain,
 #'   evals = list(tr = dtrain),
 #'   nrounds = 50,
@@ -1044,7 +1052,7 @@ xgb.cb.cv.predict <- function(save_models = FALSE, outputmargin = FALSE) {
 #'
 #' # CV:
 #' bst <- xgb.cv(
-#'   c(param, list(eta = 0.5)),
+#'   c(param, list(learning_rate = 0.5)),
 #'   dtrain,
 #'   nfold = 5,
 #'   nrounds = 70,

diff --git a/R-package/R/utils.R b/R-package/R/utils.R
@@ -493,7 +493,6 @@ NULL
 #'   nrounds = 2,
 #'   params = xgb.params(
 #'     max_depth = 2,
-#'     eta = 1,
 #'     nthread = 2,
 #'     objective = "binary:logistic"
 #'   )

diff --git a/R-package/R/xgb.Booster.R b/R-package/R/xgb.Booster.R
@@ -267,7 +267,6 @@ xgb.get.handle <- function(object) {
 #'   nrounds = 5,
 #'   params = xgb.params(
 #'     max_depth = 2,
-#'     eta = 0.5,
 #'     nthread = nthread,
 #'     objective = "binary:logistic"
 #'   )
@@ -312,7 +311,6 @@ xgb.get.handle <- function(object) {
 #'   nrounds = 10,
 #'   params = xgb.params(
 #'     max_depth = 4,
-#'     eta = 0.5,
 #'     nthread = 2,
 #'     subsample = 0.5,
 #'     objective = "multi:softprob",
@@ -336,7 +334,6 @@ xgb.get.handle <- function(object) {
 #'   nrounds = 10,
 #'   params = xgb.params(
 #'     max_depth = 4,
-#'     eta = 0.5,
 #'     nthread = 2,
 #'     subsample = 0.5,
 #'     objective = "multi:softmax",
@@ -671,7 +668,6 @@ validate.features <- function(bst, newdata) {
 #'   nrounds = 2,
 #'   params = xgb.params(
 #'     max_depth = 2,
-#'     eta = 1,
 #'     nthread = 2,
 #'     objective = "binary:logistic"
 #'   )
@@ -779,7 +775,6 @@ xgb.attributes <- function(object) {
 #'   nrounds = 2,
 #'   params = xgb.params(
 #'     max_depth = 2,
-#'     eta = 1,
 #'     nthread = nthread,
 #'     objective = "binary:logistic"
 #'   )
@@ -834,13 +829,13 @@ xgb.config <- function(object) {
 #'   nrounds = 2,
 #'   params = xgb.params(
 #'     max_depth = 2,
-#'     eta = 1,
+#'     learning_rate = 1,
 #'     nthread = 2,
 #'     objective = "binary:logistic"
 #'   )
 #' )
 #'
-#' xgb.model.parameters(bst) <- list(eta = 0.1)
+#' xgb.model.parameters(bst) <- list(learning_rate = 0.1)
 #'
 #' @rdname xgb.model.parameters
 #' @export
@@ -1055,6 +1050,10 @@ xgb.best_iteration <- function(bst) {
   return(out)
 }
 
+xgb.has_categ_features <- function(bst) {
+  return("c" %in% xgb.feature_types(bst))
+}
+
 #' Extract coefficients from linear booster
 #'
 #' @description
@@ -1281,7 +1280,6 @@ xgb.is.same.Booster <- function(obj1, obj2) {
 #'   nrounds = 2,
 #'   params = xgb.params(
 #'     max_depth = 2,
-#'     eta = 1,
 #'     nthread = 2,
 #'     objective = "binary:logistic"
 #'   )

diff --git a/R-package/R/xgb.DMatrix.R b/R-package/R/xgb.DMatrix.R
@@ -569,7 +569,6 @@ xgb.ProxyDMatrix <- function(proxy_handle, data_iterator) {
     tmp <- .process.df.for.dmatrix(lst$data, lst$feature_types)
     lst$feature_types <- tmp$feature_types
     .Call(XGProxyDMatrixSetDataColumnar_R, proxy_handle, tmp$lst)
-    rm(tmp)
   } else if (is.matrix(lst$data)) {
     .Call(XGProxyDMatrixSetDataDense_R, proxy_handle, lst$data)
   } else if (inherits(lst$data, "dgRMatrix")) {

diff --git a/R-package/R/xgb.create.features.R b/R-package/R/xgb.create.features.R
@@ -52,7 +52,7 @@
 #' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
 #' dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
 #'
-#' param <- list(max_depth = 2, eta = 1, objective = 'binary:logistic', nthread = 1)
+#' param <- list(max_depth = 2, learning_rate = 1, objective = 'binary:logistic', nthread = 1)
 #' nrounds = 4
 #'
 #' bst <- xgb.train(params = param, data = dtrain, nrounds = nrounds)

diff --git a/R-package/R/xgb.cv.R b/R-package/R/xgb.cv.R
@@ -96,7 +96,6 @@
 #'   params = xgb.params(
 #'     nthread = 2,
 #'     max_depth = 3,
-#'     eta = 1,
 #'     objective = "binary:logistic"
 #'   ),
 #'   nfold = 5,
@@ -316,7 +315,6 @@ xgb.cv <- function(params = xgb.params(), data, nrounds, nfold,
 #'   nrounds = 2,
 #'   params = xgb.params(
 #'     max_depth = 2,
-#'     eta = 1,
 #'     nthread = 2,
 #'     objective = "binary:logistic"
 #'   )

diff --git a/R-package/R/xgb.dump.R b/R-package/R/xgb.dump.R
@@ -34,7 +34,6 @@
 #'   nrounds = 2,
 #'   params = xgb.params(
 #'     max_depth = 2,
-#'     eta = 1,
 #'     nthread = 2,
 #'     objective = "binary:logistic"
 #'   )

diff --git a/R-package/R/xgb.importance.R b/R-package/R/xgb.importance.R
@@ -12,15 +12,11 @@
 #' @param feature_names Character vector used to overwrite the feature names
 #'   of the model. The default is `NULL` (use original feature names).
 #' @param model Object of class `xgb.Booster`.
-#' @param trees An integer vector of tree indices that should be included
+#' @param trees An integer vector of (base-1) tree indices that should be included
 #'   into the importance calculation (only for the "gbtree" booster).
 #'   The default (`NULL`) parses all trees.
 #'   It could be useful, e.g., in multiclass classification to get feature importances
-#'   for each class separately. *Important*: the tree index in XGBoost models
-#'   is zero-based (e.g., use `trees = 0:4` for the first five trees).
-#' @param data Deprecated.
-#' @param label Deprecated.
-#' @param target Deprecated.
+#'   for each class separately.
 #' @return A `data.table` with the following columns:
 #'
 #' For a tree model:
@@ -33,7 +29,9 @@
 #' For a linear model:
 #' - `Features`: Names of the features used in the model.
 #' - `Weight`: Linear coefficient of this feature.
-#' - `Class`: Class label (only for multiclass models).
+#' - `Class`: Class label (only for multiclass models). For objects of class `xgboost` (as
+#'   produced by [xgboost()]), it will be a `factor`, while for objects of class `xgb.Booster`
+#'   (as produced by [xgb.train()]), it will be a zero-based integer vector.
 #'
 #' If `feature_names` is not provided and `model` doesn't have `feature_names`,
 #' the index of the features will be used instead. Because the index is extracted from the model dump
@@ -49,7 +47,6 @@
 #'   nrounds = 2,
 #'   params = xgb.params(
 #'     max_depth = 2,
-#'     eta = 1,
 #'     nthread = 2,
 #'     objective = "binary:logistic"
 #'   )
@@ -63,7 +60,7 @@
 #'   nrounds = 20,
 #'   params = xgb.params(
 #'     booster = "gblinear",
-#'     eta = 0.3,
+#'     learning_rate = 0.3,
 #'     nthread = 1,
 #'     objective = "binary:logistic"
 #'   )
@@ -82,7 +79,6 @@
 #'   nrounds = nrounds,
 #'   params = xgb.params(
 #'     max_depth = 3,
-#'     eta = 0.2,
 #'     nthread = 2,
 #'     objective = "multi:softprob",
 #'     num_class = nclass
@@ -94,14 +90,14 @@
 #'
 #' # inspect importances separately for each class:
 #' xgb.importance(
-#'   model = mbst, trees = seq(from = 0, by = nclass, length.out = nrounds)
-#' )
-#' xgb.importance(
 #'   model = mbst, trees = seq(from = 1, by = nclass, length.out = nrounds)
 #' )
 #' xgb.importance(
 #'   model = mbst, trees = seq(from = 2, by = nclass, length.out = nrounds)
 #' )
+#' xgb.importance(
+#'   model = mbst, trees = seq(from = 3, by = nclass, length.out = nrounds)
+#' )
 #'
 #' # multiclass classification using "gblinear":
 #' mbst <- xgb.train(
@@ -112,7 +108,7 @@
 #'   nrounds = 15,
 #'   params = xgb.params(
 #'     booster = "gblinear",
-#'     eta = 0.2,
+#'     learning_rate = 0.2,
 #'     nthread = 1,
 #'     objective = "multi:softprob",
 #'     num_class = nclass
@@ -122,15 +118,21 @@
 #' xgb.importance(model = mbst)
 #'
 #' @export
-xgb.importance <- function(model = NULL, feature_names = getinfo(model, "feature_name"), trees = NULL,
-                           data = NULL, label = NULL, target = NULL) {
-
-  if (!(is.null(data) && is.null(label) && is.null(target)))
-    warning("xgb.importance: parameters 'data', 'label' and 'target' are deprecated")
+xgb.importance <- function(model = NULL, feature_names = getinfo(model, "feature_name"), trees = NULL) {
 
   if (!(is.null(feature_names) || is.character(feature_names)))
     stop("feature_names: Has to be a character vector")
 
+  if (!is.null(trees)) {
+    if (!is.vector(trees)) {
+      stop("'trees' must be a vector of tree indices.")
+    }
+    trees <- trees - 1L
+    if (anyNA(trees)) {
+      stop("Passed invalid tree indices.")
+    }
+  }
+
   handle <- xgb.get.handle(model)
   if (xgb.booster_type(model) == "gblinear") {
     args <- list(importance_type = "weight", feature_names = feature_names)
@@ -144,11 +146,19 @@ xgb.importance <- function(model = NULL, feature_names = getinfo(model, "feature
         n_classes <- 0
     }
     importance <- if (n_classes == 0) {
-      data.table(Feature = results$features, Weight = results$weight)[order(-abs(Weight))]
+      return(data.table(Feature = results$features, Weight = results$weight)[order(-abs(Weight))])
     } else {
-      data.table(
+      out <- data.table(
         Feature = rep(results$features, each = n_classes), Weight = results$weight, Class = seq_len(n_classes) - 1
       )[order(Class, -abs(Weight))]
+      if (inherits(model, "xgboost") && NROW(attributes(model)$metadata$y_levels)) {
+        class_vec <- out$Class
+        class_vec <- as.integer(class_vec) + 1L
+        attributes(class_vec)$levels <- attributes(model)$metadata$y_levels
+        attributes(class_vec)$class <- "factor"
+        out[, Class := class_vec]
+      }
+      return(out[])
     }
   } else {
     concatenated <- list()

diff --git a/R-package/R/xgb.load.R b/R-package/R/xgb.load.R
@@ -35,7 +35,6 @@
 #'   nrounds = 2,
 #'   params = xgb.params(
 #'     max_depth = 2,
-#'     eta = 1,
 #'     nthread = nthread,
 #'     objective = "binary:logistic"
 #'   )