Skip to content

Commit

Permalink
Merge branch 'master' into dev/cpu/memory_realloc_optimisation
Browse files Browse the repository at this point in the history
  • Loading branch information
razdoburdin authored Dec 17, 2024
2 parents ea340d9 + f06dcf8 commit 335ccdf
Show file tree
Hide file tree
Showing 103 changed files with 1,573 additions and 845 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/r_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ jobs:
- name: Install system dependencies
run: |
sudo apt update
sudo apt install libcurl4-openssl-dev libssl-dev libssh2-1-dev libgit2-dev libglpk-dev libxml2-dev libharfbuzz-dev libfribidi-dev
sudo apt install libcurl4-openssl-dev libssl-dev libssh2-1-dev libgit2-dev libglpk-dev libxml2-dev libharfbuzz-dev libfribidi-dev librsvg2-dev librsvg2-2
if: matrix.os == 'ubuntu-latest'
- uses: actions/checkout@v4
with:
Expand Down Expand Up @@ -76,7 +76,7 @@ jobs:
run: |
# Must run before checkout to have the latest git installed.
# No need to add pandoc, the container has it figured out.
apt update && apt install libcurl4-openssl-dev libssl-dev libssh2-1-dev libgit2-dev libglpk-dev libxml2-dev libharfbuzz-dev libfribidi-dev git -y
apt update && apt install libcurl4-openssl-dev libssl-dev libssh2-1-dev libgit2-dev libglpk-dev libxml2-dev libharfbuzz-dev libfribidi-dev git librsvg2-dev librsvg2-2 -y
- name: Trust git cloning project sources
run: |
git config --global --add safe.directory "${GITHUB_WORKSPACE}"
Expand Down
3 changes: 3 additions & 0 deletions R-package/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ Suggests:
rmarkdown,
ggplot2 (>= 1.0.1),
DiagrammeR (>= 0.9.0),
DiagrammeRsvg,
rsvg,
htmlwidgets,
Ckmeans.1d.dp (>= 3.3.1),
vcd (>= 1.3),
testthat,
Expand Down
32 changes: 20 additions & 12 deletions R-package/R/callbacks.R
Original file line number Diff line number Diff line change
Expand Up @@ -960,17 +960,17 @@ xgb.cb.cv.predict <- function(save_models = FALSE, outputmargin = FALSE) {
#' booster = "gblinear",
#' objective = "reg:logistic",
#' eval_metric = "auc",
#' lambda = 0.0003,
#' alpha = 0.0003,
#' reg_lambda = 0.0003,
#' reg_alpha = 0.0003,
#' nthread = nthread
#' )
#'
#' # For 'shotgun', which is a default linear updater, using high eta values may result in
#' # For 'shotgun', which is a default linear updater, using high learning_rate values may result in
#' # unstable behaviour in some datasets. With this simple dataset, however, the high learning
#' # rate does not break the convergence, but allows us to illustrate the typical pattern of
#' # "stochastic explosion" behaviour of this lock-free algorithm at early boosting iterations.
#' bst <- xgb.train(
#' c(param, list(eta = 1.)),
#' c(param, list(learning_rate = 1.)),
#' dtrain,
#' evals = list(tr = dtrain),
#' nrounds = 200,
Expand All @@ -987,7 +987,7 @@ xgb.cb.cv.predict <- function(save_models = FALSE, outputmargin = FALSE) {
#' c(
#' param,
#' xgb.params(
#' eta = 0.8,
#' learning_rate = 0.8,
#' updater = "coord_descent",
#' feature_selector = "thrifty",
#' top_k = 1
Expand All @@ -1000,12 +1000,20 @@ xgb.cb.cv.predict <- function(save_models = FALSE, outputmargin = FALSE) {
#' )
#' matplot(xgb.gblinear.history(bst), type = "l")
#' # Componentwise boosting is known to have similar effect to Lasso regularization.
#' # Try experimenting with various values of top_k, eta, nrounds,
#' # Try experimenting with various values of top_k, learning_rate, nrounds,
#' # as well as different feature_selectors.
#'
#' # For xgb.cv:
#' bst <- xgb.cv(
#' c(param, list(eta = 0.8)),
#' c(
#' param,
#' xgb.params(
#' learning_rate = 0.8,
#' updater = "coord_descent",
#' feature_selector = "thrifty",
#' top_k = 1
#' )
#' ),
#' dtrain,
#' nfold = 5,
#' nrounds = 100,
Expand All @@ -1022,15 +1030,15 @@ xgb.cb.cv.predict <- function(save_models = FALSE, outputmargin = FALSE) {
#' booster = "gblinear",
#' objective = "multi:softprob",
#' num_class = 3,
#' lambda = 0.0003,
#' alpha = 0.0003,
#' reg_lambda = 0.0003,
#' reg_alpha = 0.0003,
#' nthread = nthread
#' )
#'
#' # For the default linear updater 'shotgun' it sometimes is helpful
#' # to use smaller eta to reduce instability
#' # to use smaller learning_rate to reduce instability
#' bst <- xgb.train(
#' c(param, list(eta = 0.5)),
#' c(param, list(learning_rate = 0.5)),
#' dtrain,
#' evals = list(tr = dtrain),
#' nrounds = 50,
Expand All @@ -1044,7 +1052,7 @@ xgb.cb.cv.predict <- function(save_models = FALSE, outputmargin = FALSE) {
#'
#' # CV:
#' bst <- xgb.cv(
#' c(param, list(eta = 0.5)),
#' c(param, list(learning_rate = 0.5)),
#' dtrain,
#' nfold = 5,
#' nrounds = 70,
Expand Down
1 change: 0 additions & 1 deletion R-package/R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -493,7 +493,6 @@ NULL
#' nrounds = 2,
#' params = xgb.params(
#' max_depth = 2,
#' eta = 1,
#' nthread = 2,
#' objective = "binary:logistic"
#' )
Expand Down
14 changes: 6 additions & 8 deletions R-package/R/xgb.Booster.R
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,6 @@ xgb.get.handle <- function(object) {
#' nrounds = 5,
#' params = xgb.params(
#' max_depth = 2,
#' eta = 0.5,
#' nthread = nthread,
#' objective = "binary:logistic"
#' )
Expand Down Expand Up @@ -312,7 +311,6 @@ xgb.get.handle <- function(object) {
#' nrounds = 10,
#' params = xgb.params(
#' max_depth = 4,
#' eta = 0.5,
#' nthread = 2,
#' subsample = 0.5,
#' objective = "multi:softprob",
Expand All @@ -336,7 +334,6 @@ xgb.get.handle <- function(object) {
#' nrounds = 10,
#' params = xgb.params(
#' max_depth = 4,
#' eta = 0.5,
#' nthread = 2,
#' subsample = 0.5,
#' objective = "multi:softmax",
Expand Down Expand Up @@ -671,7 +668,6 @@ validate.features <- function(bst, newdata) {
#' nrounds = 2,
#' params = xgb.params(
#' max_depth = 2,
#' eta = 1,
#' nthread = 2,
#' objective = "binary:logistic"
#' )
Expand Down Expand Up @@ -779,7 +775,6 @@ xgb.attributes <- function(object) {
#' nrounds = 2,
#' params = xgb.params(
#' max_depth = 2,
#' eta = 1,
#' nthread = nthread,
#' objective = "binary:logistic"
#' )
Expand Down Expand Up @@ -834,13 +829,13 @@ xgb.config <- function(object) {
#' nrounds = 2,
#' params = xgb.params(
#' max_depth = 2,
#' eta = 1,
#' learning_rate = 1,
#' nthread = 2,
#' objective = "binary:logistic"
#' )
#' )
#'
#' xgb.model.parameters(bst) <- list(eta = 0.1)
#' xgb.model.parameters(bst) <- list(learning_rate = 0.1)
#'
#' @rdname xgb.model.parameters
#' @export
Expand Down Expand Up @@ -1055,6 +1050,10 @@ xgb.best_iteration <- function(bst) {
return(out)
}

xgb.has_categ_features <- function(bst) {
return("c" %in% xgb.feature_types(bst))
}

#' Extract coefficients from linear booster
#'
#' @description
Expand Down Expand Up @@ -1281,7 +1280,6 @@ xgb.is.same.Booster <- function(obj1, obj2) {
#' nrounds = 2,
#' params = xgb.params(
#' max_depth = 2,
#' eta = 1,
#' nthread = 2,
#' objective = "binary:logistic"
#' )
Expand Down
1 change: 0 additions & 1 deletion R-package/R/xgb.DMatrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -569,7 +569,6 @@ xgb.ProxyDMatrix <- function(proxy_handle, data_iterator) {
tmp <- .process.df.for.dmatrix(lst$data, lst$feature_types)
lst$feature_types <- tmp$feature_types
.Call(XGProxyDMatrixSetDataColumnar_R, proxy_handle, tmp$lst)
rm(tmp)
} else if (is.matrix(lst$data)) {
.Call(XGProxyDMatrixSetDataDense_R, proxy_handle, lst$data)
} else if (inherits(lst$data, "dgRMatrix")) {
Expand Down
2 changes: 1 addition & 1 deletion R-package/R/xgb.create.features.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
#' dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
#'
#' param <- list(max_depth = 2, eta = 1, objective = 'binary:logistic', nthread = 1)
#' param <- list(max_depth = 2, learning_rate = 1, objective = 'binary:logistic', nthread = 1)
#' nrounds = 4
#'
#' bst <- xgb.train(params = param, data = dtrain, nrounds = nrounds)
Expand Down
2 changes: 0 additions & 2 deletions R-package/R/xgb.cv.R
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,6 @@
#' params = xgb.params(
#' nthread = 2,
#' max_depth = 3,
#' eta = 1,
#' objective = "binary:logistic"
#' ),
#' nfold = 5,
Expand Down Expand Up @@ -316,7 +315,6 @@ xgb.cv <- function(params = xgb.params(), data, nrounds, nfold,
#' nrounds = 2,
#' params = xgb.params(
#' max_depth = 2,
#' eta = 1,
#' nthread = 2,
#' objective = "binary:logistic"
#' )
Expand Down
1 change: 0 additions & 1 deletion R-package/R/xgb.dump.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
#' nrounds = 2,
#' params = xgb.params(
#' max_depth = 2,
#' eta = 1,
#' nthread = 2,
#' objective = "binary:logistic"
#' )
Expand Down
52 changes: 31 additions & 21 deletions R-package/R/xgb.importance.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,11 @@
#' @param feature_names Character vector used to overwrite the feature names
#' of the model. The default is `NULL` (use original feature names).
#' @param model Object of class `xgb.Booster`.
#' @param trees An integer vector of tree indices that should be included
#' @param trees An integer vector of (base-1) tree indices that should be included
#' into the importance calculation (only for the "gbtree" booster).
#' The default (`NULL`) parses all trees.
#' It could be useful, e.g., in multiclass classification to get feature importances
#' for each class separately. *Important*: the tree index in XGBoost models
#' is zero-based (e.g., use `trees = 0:4` for the first five trees).
#' @param data Deprecated.
#' @param label Deprecated.
#' @param target Deprecated.
#' for each class separately.
#' @return A `data.table` with the following columns:
#'
#' For a tree model:
Expand All @@ -33,7 +29,9 @@
#' For a linear model:
#' - `Features`: Names of the features used in the model.
#' - `Weight`: Linear coefficient of this feature.
#' - `Class`: Class label (only for multiclass models).
#' - `Class`: Class label (only for multiclass models). For objects of class `xgboost` (as
#' produced by [xgboost()]), it will be a `factor`, while for objects of class `xgb.Booster`
#' (as produced by [xgb.train()]), it will be a zero-based integer vector.
#'
#' If `feature_names` is not provided and `model` doesn't have `feature_names`,
#' the index of the features will be used instead. Because the index is extracted from the model dump
Expand All @@ -49,7 +47,6 @@
#' nrounds = 2,
#' params = xgb.params(
#' max_depth = 2,
#' eta = 1,
#' nthread = 2,
#' objective = "binary:logistic"
#' )
Expand All @@ -63,7 +60,7 @@
#' nrounds = 20,
#' params = xgb.params(
#' booster = "gblinear",
#' eta = 0.3,
#' learning_rate = 0.3,
#' nthread = 1,
#' objective = "binary:logistic"
#' )
Expand All @@ -82,7 +79,6 @@
#' nrounds = nrounds,
#' params = xgb.params(
#' max_depth = 3,
#' eta = 0.2,
#' nthread = 2,
#' objective = "multi:softprob",
#' num_class = nclass
Expand All @@ -94,14 +90,14 @@
#'
#' # inspect importances separately for each class:
#' xgb.importance(
#' model = mbst, trees = seq(from = 0, by = nclass, length.out = nrounds)
#' )
#' xgb.importance(
#' model = mbst, trees = seq(from = 1, by = nclass, length.out = nrounds)
#' )
#' xgb.importance(
#' model = mbst, trees = seq(from = 2, by = nclass, length.out = nrounds)
#' )
#' xgb.importance(
#' model = mbst, trees = seq(from = 3, by = nclass, length.out = nrounds)
#' )
#'
#' # multiclass classification using "gblinear":
#' mbst <- xgb.train(
Expand All @@ -112,7 +108,7 @@
#' nrounds = 15,
#' params = xgb.params(
#' booster = "gblinear",
#' eta = 0.2,
#' learning_rate = 0.2,
#' nthread = 1,
#' objective = "multi:softprob",
#' num_class = nclass
Expand All @@ -122,15 +118,21 @@
#' xgb.importance(model = mbst)
#'
#' @export
xgb.importance <- function(model = NULL, feature_names = getinfo(model, "feature_name"), trees = NULL,
data = NULL, label = NULL, target = NULL) {

if (!(is.null(data) && is.null(label) && is.null(target)))
warning("xgb.importance: parameters 'data', 'label' and 'target' are deprecated")
xgb.importance <- function(model = NULL, feature_names = getinfo(model, "feature_name"), trees = NULL) {

if (!(is.null(feature_names) || is.character(feature_names)))
stop("feature_names: Has to be a character vector")

if (!is.null(trees)) {
if (!is.vector(trees)) {
stop("'trees' must be a vector of tree indices.")
}
trees <- trees - 1L
if (anyNA(trees)) {
stop("Passed invalid tree indices.")
}
}

handle <- xgb.get.handle(model)
if (xgb.booster_type(model) == "gblinear") {
args <- list(importance_type = "weight", feature_names = feature_names)
Expand All @@ -144,11 +146,19 @@ xgb.importance <- function(model = NULL, feature_names = getinfo(model, "feature
n_classes <- 0
}
importance <- if (n_classes == 0) {
data.table(Feature = results$features, Weight = results$weight)[order(-abs(Weight))]
return(data.table(Feature = results$features, Weight = results$weight)[order(-abs(Weight))])
} else {
data.table(
out <- data.table(
Feature = rep(results$features, each = n_classes), Weight = results$weight, Class = seq_len(n_classes) - 1
)[order(Class, -abs(Weight))]
if (inherits(model, "xgboost") && NROW(attributes(model)$metadata$y_levels)) {
class_vec <- out$Class
class_vec <- as.integer(class_vec) + 1L
attributes(class_vec)$levels <- attributes(model)$metadata$y_levels
attributes(class_vec)$class <- "factor"
out[, Class := class_vec]
}
return(out[])
}
} else {
concatenated <- list()
Expand Down
1 change: 0 additions & 1 deletion R-package/R/xgb.load.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
#' nrounds = 2,
#' params = xgb.params(
#' max_depth = 2,
#' eta = 1,
#' nthread = nthread,
#' objective = "binary:logistic"
#' )
Expand Down
Loading

0 comments on commit 335ccdf

Please sign in to comment.