From 9ef694f8dd55975124edcd313be9641a8a78eb11 Mon Sep 17 00:00:00 2001 From: be-marc Date: Fri, 15 Dec 2023 16:04:24 +0100 Subject: [PATCH 01/21] feat: ties methods for archive$best() --- R/ArchiveFSelect.R | 43 ++++++++++ inst/testthat/helper_expectations.R | 4 + inst/testthat/helper_misc.R | 4 +- tests/testthat/test_ArchiveFSelect.R | 118 +++++++++++++++++++++++++++ 4 files changed, 167 insertions(+), 2 deletions(-) diff --git a/R/ArchiveFSelect.R b/R/ArchiveFSelect.R index 05880920..0773615b 100644 --- a/R/ArchiveFSelect.R +++ b/R/ArchiveFSelect.R @@ -141,6 +141,48 @@ ArchiveFSelect = R6Class("ArchiveFSelect", print = function() { catf(format(self)) print(self$data[, setdiff(names(self$data), "uhash"), with = FALSE], digits=2) + }, + + #' @description + #' Returns the best scoring feature sets. + #' + #' @param batch (`integer()`)\cr + #' The batch number(s) to limit the best results to. + #' Default is all batches. + #' @param ties_method (`character(1)`)\cr + #' How to handle ties. + #' Default is "first" which returns the first added best feature set. + #' "random" returns a random feature set from the best feature sets. + #' "n_features" returns the feature set with the least features. + #' + #' @return [data.table::data.table()] + best = function(batch = NULL, ties_method = "first") { + assert_choice(ties_method, c("first", "random", "n_features")) + assert_subset(batch, seq_len(self$n_batch)) + if (self$n_batch == 0L) return(data.table()) + + if (is.null(batch)) { + tab = self$data + } else { + tab = self$data[list(batch), , on = "batch_nr"] + } + + if (self$codomain$target_length == 1L) { + y = tab[[self$cols_y]] * -self$codomain$maximization_to_minimization + + if (ties_method == "n_features") { + ii = which(y == max(y)) + ii = which.min(rowSums(tab[ii, self$cols_x, with = FALSE])) + tab[ii] + } else { + ii = which_max(y, ties_method = ties_method) + tab[ii] + } + } else { + ymat = t(as.matrix(tab[, self$cols_y, with = FALSE])) + ymat = self$codomain$maximization_to_minimization * ymat + tab[!is_dominated(ymat)] + } } ) ) @@ -157,6 +199,7 @@ as.data.table.ArchiveFSelect = function(x, ..., exclude_columns = "uhash", measu # add feature vector tab[, "features" := lapply(transpose(.SD), function(col) x$cols_x[col]), .SDcols = x$cols_x] + tab[, "n_features" := map(get("features"), length)] if (x$benchmark_result$n_resample_results) { # add extra measures diff --git a/inst/testthat/helper_expectations.R b/inst/testthat/helper_expectations.R index 8942da90..cf1e8110 100644 --- a/inst/testthat/helper_expectations.R +++ b/inst/testthat/helper_expectations.R @@ -17,3 +17,7 @@ expect_max_features = function(features, n) { res = max(rowSums(features)) expect_set_equal(res, n) } + +expect_features = function(res, identical_to = NULL, must_include = NULL) { + expect_names(names(res)[as.logical(res)], must.include = must_include, identical.to = identical_to) +} diff --git a/inst/testthat/helper_misc.R b/inst/testthat/helper_misc.R index 53d3d752..bd522a8f 100644 --- a/inst/testthat/helper_misc.R +++ b/inst/testthat/helper_misc.R @@ -29,7 +29,7 @@ TEST_MAKE_INST_2D = function(n = 4L, folds = 2L, store_models = FALSE, store_ben MeasureDummy = R6Class("MeasureDummy", inherit = MeasureRegr, public = list( - initialize = function(score_design = NULL) { + initialize = function(score_design = NULL, minimize = FALSE) { if (is.null(score_design)) { score_design = data.table( score = c(1, 2, 4, 3), @@ -37,7 +37,7 @@ MeasureDummy = R6Class("MeasureDummy", inherit = MeasureRegr, ) } private$.score_design = score_design - super$initialize(id = "dummy", range = c(0, 4), minimize = FALSE) + super$initialize(id = "dummy", range = c(0, 4), minimize = minimize) } ), private = list( diff --git a/tests/testthat/test_ArchiveFSelect.R b/tests/testthat/test_ArchiveFSelect.R index 4615cc39..3be916e4 100644 --- a/tests/testthat/test_ArchiveFSelect.R +++ b/tests/testthat/test_ArchiveFSelect.R @@ -139,3 +139,121 @@ test_that("ArchiveFSelect as.data.table function works", { tab = as.data.table(instance$archive) expect_equal(tab$batch_nr, 1:10) }) + +test_that("best method works with ties", { + + design = mlr3misc::rowwise_table( + ~x1, ~x2, ~x3, ~x4, + TRUE, FALSE, FALSE, TRUE, + TRUE, FALSE, FALSE, FALSE, + TRUE, TRUE, FALSE, FALSE + ) + + score_design = data.table( + score = c(0.2, 0.2, 0.1), + features = list(c("x1", "x4"), "x1", c("x1", "x2")) + ) + measure = msr("dummy", score_design = score_design, minimize = FALSE) + + instance = fselect( + fselector = fs("design_points", design = design), + task = TEST_MAKE_TSK(), + learner = lrn("regr.rpart"), + resampling = rsmp("cv", folds = 3), + measures = measure + ) + + instance$archive$best() + instance$archive$best(ties_method = "first") + instance$archive$best(ties_method = "random") + instance$archive$best(ties_method = "n_features") +}) + +test_that("best method works with ties and maximization", { + + design = mlr3misc::rowwise_table( + ~x1, ~x2, ~x3, ~x4, + TRUE, FALSE, FALSE, TRUE, + TRUE, FALSE, FALSE, FALSE, + FALSE, TRUE, FALSE, FALSE + ) + + score_design = data.table( + score = c(0.2, 0.2, 0.1), + features = list(c("x1", "x4"), "x1", c("x1", "x2")) + ) + measure = msr("dummy", score_design = score_design, minimize = FALSE) + + instance = fselect( + fselector = fs("design_points", design = design), + task = TEST_MAKE_TSK(), + learner = lrn("regr.rpart"), + resampling = rsmp("cv", folds = 3), + measures = measure + ) + + expect_features(instance$archive$best()[, list(x1, x2, x3, x4)], identical_to = c("x1", "x4")) + expect_features(instance$archive$best(ties_method = "first")[, list(x1, x2, x3, x4)], identical_to = c("x1", "x4")) + expect_features(instance$archive$best(ties_method = "random")[, list(x1, x2, x3, x4)], must_include = "x1") + expect_features(instance$archive$best(ties_method = "n_features")[, list(x1, x2, x3, x4)], identical_to = "x1") +}) + +test_that("best method works with ties and minimization", { + + design = mlr3misc::rowwise_table( + ~x1, ~x2, ~x3, ~x4, + TRUE, FALSE, FALSE, TRUE, + TRUE, FALSE, FALSE, FALSE, + FALSE, TRUE, FALSE, FALSE, + FALSE, TRUE, FALSE, TRUE + ) + + score_design = data.table( + score = c(0.2, 0.2, 0.1, 0.1), + features = list(c("x1", "x4"), "x1", c("x1", "x2"), c("x2", "x4")) + ) + measure = msr("dummy", score_design = score_design, minimize = TRUE) + + instance = fselect( + fselector = fs("design_points", design = design), + task = TEST_MAKE_TSK(), + learner = lrn("regr.rpart"), + resampling = rsmp("cv", folds = 3), + measures = measure + ) + + expect_features(instance$archive$best()[, list(x1, x2, x3, x4)], identical_to = "x2") + expect_features(instance$archive$best(ties_method = "first")[, list(x1, x2, x3, x4)], identical_to = "x2") + expect_features(instance$archive$best(ties_method = "random")[, list(x1, x2, x3, x4)], must_include = "x2") + expect_features(instance$archive$best(ties_method = "n_features")[, list(x1, x2, x3, x4)], identical_to = "x2") +}) + +test_that("best method works with batches and ties", { + + design = mlr3misc::rowwise_table( + ~x1, ~x2, ~x3, ~x4, + TRUE, FALSE, FALSE, TRUE, + TRUE, FALSE, FALSE, FALSE, + FALSE, TRUE, TRUE, FALSE, + FALSE, TRUE, FALSE, FALSE + ) + + score_design = data.table( + score = c(0.2, 0.2, 0.2, 0.1), + features = list(c("x1", "x4"), "x1", c("x2", "x3"), c("x1", "x2")) + ) + measure = msr("dummy", score_design = score_design, minimize = FALSE) + + instance = fselect( + fselector = fs("design_points", design = design, batch_size = 1), + task = TEST_MAKE_TSK(), + learner = lrn("regr.rpart"), + resampling = rsmp("cv", folds = 3), + measures = measure + ) + + expect_features(instance$archive$best(batch = c(2, 3))[, list(x1, x2, x3, x4)], identical_to = "x1") + expect_features(instance$archive$best(batch = c(1, 3), ties_method = "first")[, list(x1, x2, x3, x4)], identical_to = c("x1", "x4")) + expect_features(instance$archive$best(batch = c(1, 2), ties_method = "random")[, list(x1, x2, x3, x4)], must_include = "x1") + expect_features(instance$archive$best(batch = c(2, 3), ties_method = "n_features")[, list(x1, x2, x3, x4)], identical_to = "x1") +}) From c8c588bcd4610359ea46649dc2246e808e192b01 Mon Sep 17 00:00:00 2001 From: be-marc Date: Fri, 15 Dec 2023 16:40:26 +0100 Subject: [PATCH 02/21] fix: n_features --- R/ArchiveFSelect.R | 3 ++- R/FSelectorRFE.R | 5 +++-- tests/testthat/test_ArchiveFSelect.R | 7 ++++--- tests/testthat/test_FSelectInstanceSingleCrit.R | 4 ++-- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/R/ArchiveFSelect.R b/R/ArchiveFSelect.R index 0773615b..e846478e 100644 --- a/R/ArchiveFSelect.R +++ b/R/ArchiveFSelect.R @@ -172,7 +172,8 @@ ArchiveFSelect = R6Class("ArchiveFSelect", if (ties_method == "n_features") { ii = which(y == max(y)) - ii = which.min(rowSums(tab[ii, self$cols_x, with = FALSE])) + tab = tab[ii] + ii = which.min(rowSums(tab[, self$cols_x, with = FALSE])) tab[ii] } else { ii = which_max(y, ties_method = ties_method) diff --git a/R/FSelectorRFE.R b/R/FSelectorRFE.R index c61d8869..aea6065a 100644 --- a/R/FSelectorRFE.R +++ b/R/FSelectorRFE.R @@ -105,7 +105,8 @@ FSelectorRFE = R6Class("FSelectorRFE", feature_number = p_int(lower = 1), subset_sizes = p_uty(), recursive = p_lgl(default = TRUE), - aggregation = p_fct(c("mean", "rank"), default = "rank") + aggregation = p_fct(c("mean", "rank"), default = "rank"), + ties_method = p_fct(c("first", "random", "n_features"), default = "first") ) ps$values = list(recursive = TRUE, aggregation = "rank") @@ -204,7 +205,7 @@ rfe_subsets = function(n, n_features, feature_number, subset_sizes, feature_frac # Run recursive feature elimination # instance is changed by reference -rfe_workhorse = function(inst, subsets, recursive, aggregation = raw_importance, folds = 1) { +rfe_workhorse = function(inst, subsets, recursive, aggregation = raw_importance, ties_method, folds = 1) { archive = inst$archive feature_names = inst$archive$cols_x n = length(feature_names) diff --git a/tests/testthat/test_ArchiveFSelect.R b/tests/testthat/test_ArchiveFSelect.R index fdb51b37..306327ac 100644 --- a/tests/testthat/test_ArchiveFSelect.R +++ b/tests/testthat/test_ArchiveFSelect.R @@ -173,14 +173,15 @@ test_that("best method works with ties and maximization", { design = mlr3misc::rowwise_table( ~x1, ~x2, ~x3, ~x4, + FALSE, TRUE, FALSE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE ) score_design = data.table( - score = c(0.2, 0.2, 0.1), - features = list(c("x1", "x4"), "x1", c("x1", "x2")) + score = c(0.1, 0.2, 0.2, 0.1), + features = list(c("x2", "x4"), c("x1", "x4"), "x1", c("x1", "x2")) ) measure = msr("dummy", score_design = score_design, minimize = FALSE) @@ -210,7 +211,7 @@ test_that("best method works with ties and minimization", { score_design = data.table( score = c(0.2, 0.2, 0.1, 0.1), - features = list(c("x1", "x4"), "x1", c("x1", "x2"), c("x2", "x4")) + features = list(c("x1", "x4"), "x1", "x2", c("x2", "x4")) ) measure = msr("dummy", score_design = score_design, minimize = TRUE) diff --git a/tests/testthat/test_FSelectInstanceSingleCrit.R b/tests/testthat/test_FSelectInstanceSingleCrit.R index 548738e5..c3efc622 100644 --- a/tests/testthat/test_FSelectInstanceSingleCrit.R +++ b/tests/testthat/test_FSelectInstanceSingleCrit.R @@ -89,7 +89,7 @@ test_that("always include variable works", { expect_names(instance$archive$cols_x, disjunct.from = "gloucose") expect_names(names(instance$archive$data), disjunct.from = "gloucose") walk(data$resample_result, function(rr) { - expect_names(names(rr$learners[[1]]$state$data_prototype), must.include = "glucose") + expect_names(names(rr$learners[[1]]$state$data_prototype) %??% rr$learners[[1]]$state$feature_names, must.include = "glucose") }) }) @@ -115,6 +115,6 @@ test_that("always include variables works", { expect_names(instance$archive$cols_x, disjunct.from = c("glucose", "age")) expect_names(names(instance$archive$data), disjunct.from = c("glucose", "age")) walk(data$resample_result, function(rr) { - expect_names(names(rr$learners[[1]]$state$data_prototype), must.include = c("glucose", "age")) + expect_names(names(rr$learners[[1]]$state$data_prototype) %??% rr$learners[[1]]$state$feature_names, must.include = c("glucose", "age")) }) }) From 0a3e79d06417ef52044c51e5e8efe6fe78a22549 Mon Sep 17 00:00:00 2001 From: be-marc Date: Fri, 15 Dec 2023 18:00:38 +0100 Subject: [PATCH 03/21] feat: add global ties option --- R/ArchiveFSelect.R | 50 ++++++++++++++++++++++------ R/AutoFSelector.R | 16 ++++++++- R/FSelectInstanceSingleCrit.R | 17 ++++++++-- R/fselect.R | 52 ++++++++++++++++++++++------- man-roxygen/param_ties_method.R | 8 +++++ man/ArchiveFSelect.Rd | 57 ++++++++++++++++++++++++++++++-- man/AutoFSelector.Rd | 12 ++++++- man/FSelectInstanceSingleCrit.Rd | 12 ++++++- man/fselect.Rd | 12 ++++++- 9 files changed, 206 insertions(+), 30 deletions(-) create mode 100644 man-roxygen/param_ties_method.R diff --git a/R/ArchiveFSelect.R b/R/ArchiveFSelect.R index e846478e..01599d56 100644 --- a/R/ArchiveFSelect.R +++ b/R/ArchiveFSelect.R @@ -49,6 +49,8 @@ #' * `measures` (list of [mlr3::Measure])\cr #' Score feature sets on additional measures. #' +#' @template param_ties_method +#' #' @export ArchiveFSelect = R6Class("ArchiveFSelect", inherit = Archive, @@ -72,8 +74,14 @@ ArchiveFSelect = R6Class("ArchiveFSelect", #' #' @param check_values (`logical(1)`)\cr #' If `TRUE` (default), hyperparameter configurations are check for validity. - initialize = function(search_space, codomain, check_values = TRUE) { + initialize = function( + search_space, + codomain, + check_values = TRUE, + ties_method = "n_features" + ) { super$initialize(search_space, codomain, check_values) + self$ties_method = ties_method # initialize empty benchmark result self$benchmark_result = BenchmarkResult$new() @@ -147,17 +155,21 @@ ArchiveFSelect = R6Class("ArchiveFSelect", #' Returns the best scoring feature sets. #' #' @param batch (`integer()`)\cr - #' The batch number(s) to limit the best results to. - #' Default is all batches. + #' The batch number(s) to limit the best results to. + #' Default is all batches. #' @param ties_method (`character(1)`)\cr - #' How to handle ties. - #' Default is "first" which returns the first added best feature set. - #' "random" returns a random feature set from the best feature sets. - #' "n_features" returns the feature set with the least features. - #' + #' Method to handle ties. + #' If `NULL` (default), the global ties method set during initialization is used. + #' Can be one of `n_features`, `first`, `random`. + #' The option `n_features` (default) selects the feature set with the least features. + #' If there are multiple best feature sets with the same number of features, the first one is selected. + #' The `first` method returns the first added best feature set. + #' The `random` method returns a random feature set from the best feature sets. + # #' @return [data.table::data.table()] - best = function(batch = NULL, ties_method = "first") { - assert_choice(ties_method, c("first", "random", "n_features")) + best = function(batch = NULL, ties_method = NULL) { + assert_choice(ties_method, c("first", "random", "n_features"), null.ok = TRUE) + if (is.null(ties_method)) ties_method = self$ties_method assert_subset(batch, seq_len(self$n_batch)) if (self$n_batch == 0L) return(data.table()) @@ -185,6 +197,24 @@ ArchiveFSelect = R6Class("ArchiveFSelect", tab[!is_dominated(ymat)] } } + ), + + active = list( + + #' @field ties_method (`character(1)`)\cr + #' Method to handle ties. + ties_method = function(rhs) { + if (!missing(rhs)) { + assert_choice(rhs, c("first", "random", "n_features")) + private$.ties_method = rhs + } else { + private$.ties_method + } + } + ), + + private = list( + .ties_method = NULL ) ) diff --git a/R/AutoFSelector.R b/R/AutoFSelector.R index df2d2998..40f9eef3 100644 --- a/R/AutoFSelector.R +++ b/R/AutoFSelector.R @@ -36,6 +36,7 @@ #' @template param_store_models #' @template param_check_values #' @template param_callbacks +#' @template param_ties_method #' #' @export #' @examples @@ -111,7 +112,19 @@ AutoFSelector = R6Class("AutoFSelector", #' #' @param fselector ([FSelector])\cr #' Optimization algorithm. - initialize = function(fselector, learner, resampling, measure = NULL, terminator, store_fselect_instance = TRUE, store_benchmark_result = TRUE, store_models = FALSE, check_values = FALSE, callbacks = list()) { + initialize = function( + fselector, + learner, + resampling, + measure = NULL, + terminator, + store_fselect_instance = TRUE, + store_benchmark_result = TRUE, + store_models = FALSE, + check_values = FALSE, + callbacks = list(), + ties_method = "n_features" + ) { ia = list() self$fselector = assert_r6(fselector, "FSelector")$clone() ia$learner = assert_learner(as_learner(learner, clone = TRUE)) @@ -125,6 +138,7 @@ AutoFSelector = R6Class("AutoFSelector", ia$check_values = assert_flag(check_values) ia$callbacks = assert_callbacks(as_callbacks(callbacks)) + ia$ties_method = assert_choice(ties_method, c("n_features", "first", "random")) self$instance_args = ia super$initialize( diff --git a/R/FSelectInstanceSingleCrit.R b/R/FSelectInstanceSingleCrit.R index 90261e53..a4f19a89 100644 --- a/R/FSelectInstanceSingleCrit.R +++ b/R/FSelectInstanceSingleCrit.R @@ -54,6 +54,7 @@ #' @template param_store_benchmark_result #' @template param_callbacks #' @template param_xdt +#' @template param_ties_method #' #' @export #' @examples @@ -93,12 +94,24 @@ FSelectInstanceSingleCrit = R6Class("FSelectInstanceSingleCrit", #' @description #' Creates a new instance of this [R6][R6::R6Class] class. - initialize = function(task, learner, resampling, measure, terminator, store_benchmark_result = TRUE, store_models = FALSE, check_values = FALSE, callbacks = list()) { + initialize = function( + task, + learner, + resampling, + measure, + terminator, + store_benchmark_result = TRUE, + store_models = FALSE, + check_values = FALSE, + callbacks = list(), + ties_method = "n_features" + ) { # initialized specialized fselect archive and objective archive = ArchiveFSelect$new( search_space = task_to_domain(assert_task(task)), codomain = measures_to_codomain(assert_measure(measure)), - check_values = check_values) + check_values = check_values, + ties_method = ties_method) objective = ObjectiveFSelect$new( task = task, diff --git a/R/fselect.R b/R/fselect.R index 4425dfee..c2474841 100644 --- a/R/fselect.R +++ b/R/fselect.R @@ -36,6 +36,7 @@ #' @template param_store_models #' @template param_check_values #' @template param_callbacks +#' @template param_ties_method #' #' @export #' @examples @@ -60,21 +61,48 @@ #' #' # Inspect all evaluated configurations #' as.data.table(instance$archive) -fselect = function(fselector, task, learner, resampling, measures = NULL, term_evals = NULL, term_time = NULL, terminator = NULL, store_benchmark_result = TRUE, store_models = FALSE, check_values = FALSE, callbacks = list()) { +fselect = function( + fselector, + task, + learner, + resampling, + measures = NULL, + term_evals = NULL, + term_time = NULL, + terminator = NULL, + store_benchmark_result = TRUE, + store_models = FALSE, + check_values = FALSE, + callbacks = list(), + ties_method = "n_features" + ) { assert_fselector(fselector) terminator = terminator %??% terminator_selection(term_evals, term_time) - FSelectInstance = if (!is.list(measures)) FSelectInstanceSingleCrit else FSelectInstanceMultiCrit - instance = FSelectInstance$new( - task = task, - learner = learner, - resampling = resampling, - measures, - terminator = terminator, - store_benchmark_result = store_benchmark_result, - store_models = store_models, - check_values = check_values, - callbacks = callbacks) + instance = if (!is.list(measures)) { + FSelectInstanceSingleCrit$new( + task = task, + learner = learner, + resampling = resampling, + measure = measures, + terminator = terminator, + store_benchmark_result = store_benchmark_result, + store_models = store_models, + check_values = check_values, + callbacks = callbacks, + ties_method = ties_method) + } else { + FSelectInstanceMultiCrit$new( + task = task, + learner = learner, + resampling = resampling, + measures = measures, + terminator = terminator, + store_benchmark_result = store_benchmark_result, + store_models = store_models, + check_values = check_values, + callbacks = callbacks) + } fselector$optimize(instance) instance diff --git a/man-roxygen/param_ties_method.R b/man-roxygen/param_ties_method.R new file mode 100644 index 00000000..e98c9bf0 --- /dev/null +++ b/man-roxygen/param_ties_method.R @@ -0,0 +1,8 @@ +#' @param ties_method (`character(1)`)\cr +#' The method to break ties when selecting sets while optimizing and when selecting the best set. +#' Can be one of `n_features`, `first`, `random`. +#' The option `n_features` (default) selects the feature set with the least features. +#' If there are multiple best feature sets with the same number of features, the first one is selected. +#' The `first` method returns the first added best feature set. +#' The `random` method returns a random feature set from the best feature sets. +#' Ignored if multiple measures are used. diff --git a/man/ArchiveFSelect.Rd b/man/ArchiveFSelect.Rd index 8a7f9ea8..859a55b6 100644 --- a/man/ArchiveFSelect.Rd +++ b/man/ArchiveFSelect.Rd @@ -74,6 +74,14 @@ Benchmark result.} } \if{html}{\out{}} } +\section{Active bindings}{ +\if{html}{\out{
}} +\describe{ +\item{\code{ties_method}}{(\code{character(1)})\cr +Method to handle ties.} +} +\if{html}{\out{
}} +} \section{Methods}{ \subsection{Public methods}{ \itemize{ @@ -83,6 +91,7 @@ Benchmark result.} \item \href{#method-ArchiveFSelect-predictions}{\code{ArchiveFSelect$predictions()}} \item \href{#method-ArchiveFSelect-resample_result}{\code{ArchiveFSelect$resample_result()}} \item \href{#method-ArchiveFSelect-print}{\code{ArchiveFSelect$print()}} +\item \href{#method-ArchiveFSelect-best}{\code{ArchiveFSelect$best()}} \item \href{#method-ArchiveFSelect-clone}{\code{ArchiveFSelect$clone()}} } } @@ -90,7 +99,6 @@ Benchmark result.}
Inherited methods
  • bbotk::Archive$add_evals()
  • -
  • bbotk::Archive$best()
  • bbotk::Archive$clear()
  • bbotk::Archive$format()
  • bbotk::Archive$nds_selection()
  • @@ -103,7 +111,12 @@ Benchmark result.} \subsection{Method \code{new()}}{ Creates a new instance of this \link[R6:R6Class]{R6} class. \subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{ArchiveFSelect$new(search_space, codomain, check_values = TRUE)}\if{html}{\out{
    }} +\if{html}{\out{
    }}\preformatted{ArchiveFSelect$new( + search_space, + codomain, + check_values = TRUE, + ties_method = "n_features" +)}\if{html}{\out{
    }} } \subsection{Arguments}{ @@ -119,6 +132,15 @@ Internally created from provided \link[mlr3:Measure]{mlr3::Measure}s by instance \item{\code{check_values}}{(\code{logical(1)})\cr If \code{TRUE} (default), hyperparameter configurations are check for validity.} + +\item{\code{ties_method}}{(\code{character(1)})\cr +The method to break ties when selecting sets while optimizing and when selecting the best set. +Can be one of \code{n_features}, \code{first}, \code{random}. +The option \code{n_features} (default) selects the feature set with the least features. +If there are multiple best feature sets with the same number of features, the first one is selected. +The \code{first} method returns the first added best feature set. +The \code{random} method returns a random feature set from the best feature sets. +Ignored if multiple measures are used.} } \if{html}{\out{}} } @@ -232,6 +254,37 @@ Printer. } } \if{html}{\out{
    }} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ArchiveFSelect-best}{}}} +\subsection{Method \code{best()}}{ +Returns the best scoring feature sets. +\subsection{Usage}{ +\if{html}{\out{
    }}\preformatted{ArchiveFSelect$best(batch = NULL, ties_method = NULL)}\if{html}{\out{
    }} +} + +\subsection{Arguments}{ +\if{html}{\out{
    }} +\describe{ +\item{\code{batch}}{(\code{integer()})\cr +The batch number(s) to limit the best results to. +Default is all batches.} + +\item{\code{ties_method}}{(\code{character(1)})\cr +Method to handle ties. +If \code{NULL} (default), the global ties method set during initialization is used. +Can be one of \code{n_features}, \code{first}, \code{random}. +The option \code{n_features} (default) selects the feature set with the least features. +If there are multiple best feature sets with the same number of features, the first one is selected. +The \code{first} method returns the first added best feature set. +The \code{random} method returns a random feature set from the best feature sets.} +} +\if{html}{\out{
    }} +} +\subsection{Returns}{ +\code{\link[data.table:data.table]{data.table::data.table()}} +} +} +\if{html}{\out{
    }} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-ArchiveFSelect-clone}{}}} \subsection{Method \code{clone()}}{ diff --git a/man/AutoFSelector.Rd b/man/AutoFSelector.Rd index 7eba701d..a8ec1ac9 100644 --- a/man/AutoFSelector.Rd +++ b/man/AutoFSelector.Rd @@ -174,7 +174,8 @@ Creates a new instance of this \link[R6:R6Class]{R6} class. store_benchmark_result = TRUE, store_models = FALSE, check_values = FALSE, - callbacks = list() + callbacks = list(), + ties_method = "n_features" )}\if{html}{\out{}} } @@ -214,6 +215,15 @@ validity?} \item{\code{callbacks}}{(list of \link{CallbackFSelect})\cr List of callbacks.} + +\item{\code{ties_method}}{(\code{character(1)})\cr +The method to break ties when selecting sets while optimizing and when selecting the best set. +Can be one of \code{n_features}, \code{first}, \code{random}. +The option \code{n_features} (default) selects the feature set with the least features. +If there are multiple best feature sets with the same number of features, the first one is selected. +The \code{first} method returns the first added best feature set. +The \code{random} method returns a random feature set from the best feature sets. +Ignored if multiple measures are used.} } \if{html}{\out{}} } diff --git a/man/FSelectInstanceSingleCrit.Rd b/man/FSelectInstanceSingleCrit.Rd index ae9b804a..32987e5c 100644 --- a/man/FSelectInstanceSingleCrit.Rd +++ b/man/FSelectInstanceSingleCrit.Rd @@ -137,7 +137,8 @@ Creates a new instance of this \link[R6:R6Class]{R6} class. store_benchmark_result = TRUE, store_models = FALSE, check_values = FALSE, - callbacks = list() + callbacks = list(), + ties_method = "n_features" )}\if{html}{\out{}} } @@ -173,6 +174,15 @@ validity?} \item{\code{callbacks}}{(list of \link{CallbackFSelect})\cr List of callbacks.} + +\item{\code{ties_method}}{(\code{character(1)})\cr +The method to break ties when selecting sets while optimizing and when selecting the best set. +Can be one of \code{n_features}, \code{first}, \code{random}. +The option \code{n_features} (default) selects the feature set with the least features. +If there are multiple best feature sets with the same number of features, the first one is selected. +The \code{first} method returns the first added best feature set. +The \code{random} method returns a random feature set from the best feature sets. +Ignored if multiple measures are used.} } \if{html}{\out{}} } diff --git a/man/fselect.Rd b/man/fselect.Rd index 7cbaa5ad..65d7a758 100644 --- a/man/fselect.Rd +++ b/man/fselect.Rd @@ -16,7 +16,8 @@ fselect( store_benchmark_result = TRUE, store_models = FALSE, check_values = FALSE, - callbacks = list() + callbacks = list(), + ties_method = "n_features" ) } \arguments{ @@ -61,6 +62,15 @@ validity?} \item{callbacks}{(list of \link{CallbackFSelect})\cr List of callbacks.} + +\item{ties_method}{(\code{character(1)})\cr +The method to break ties when selecting sets while optimizing and when selecting the best set. +Can be one of \code{n_features}, \code{first}, \code{random}. +The option \code{n_features} (default) selects the feature set with the least features. +If there are multiple best feature sets with the same number of features, the first one is selected. +The \code{first} method returns the first added best feature set. +The \code{random} method returns a random feature set from the best feature sets. +Ignored if multiple measures are used.} } \value{ \link{FSelectInstanceSingleCrit} | \link{FSelectInstanceMultiCrit} From 72a58ec0a9eaf5cf578faf6ec86c6f8805275738 Mon Sep 17 00:00:00 2001 From: be-marc Date: Fri, 15 Dec 2023 18:02:34 +0100 Subject: [PATCH 04/21] fix: rfe --- R/FSelectorRFE.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/FSelectorRFE.R b/R/FSelectorRFE.R index aea6065a..cdc95755 100644 --- a/R/FSelectorRFE.R +++ b/R/FSelectorRFE.R @@ -205,7 +205,7 @@ rfe_subsets = function(n, n_features, feature_number, subset_sizes, feature_frac # Run recursive feature elimination # instance is changed by reference -rfe_workhorse = function(inst, subsets, recursive, aggregation = raw_importance, ties_method, folds = 1) { +rfe_workhorse = function(inst, subsets, recursive, aggregation = raw_importance, folds = 1) { archive = inst$archive feature_names = inst$archive$cols_x n = length(feature_names) From 349f1722d4df534df74ca1a247c2147ad5bd06a9 Mon Sep 17 00:00:00 2001 From: be-marc Date: Fri, 15 Dec 2023 18:03:40 +0100 Subject: [PATCH 05/21] test: remove default --- tests/testthat/test_ArchiveFSelect.R | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/testthat/test_ArchiveFSelect.R b/tests/testthat/test_ArchiveFSelect.R index 306327ac..6b7987b9 100644 --- a/tests/testthat/test_ArchiveFSelect.R +++ b/tests/testthat/test_ArchiveFSelect.R @@ -193,7 +193,6 @@ test_that("best method works with ties and maximization", { measures = measure ) - expect_features(instance$archive$best()[, list(x1, x2, x3, x4)], identical_to = c("x1", "x4")) expect_features(instance$archive$best(ties_method = "first")[, list(x1, x2, x3, x4)], identical_to = c("x1", "x4")) expect_features(instance$archive$best(ties_method = "random")[, list(x1, x2, x3, x4)], must_include = "x1") expect_features(instance$archive$best(ties_method = "n_features")[, list(x1, x2, x3, x4)], identical_to = "x1") @@ -223,7 +222,6 @@ test_that("best method works with ties and minimization", { measures = measure ) - expect_features(instance$archive$best()[, list(x1, x2, x3, x4)], identical_to = "x2") expect_features(instance$archive$best(ties_method = "first")[, list(x1, x2, x3, x4)], identical_to = "x2") expect_features(instance$archive$best(ties_method = "random")[, list(x1, x2, x3, x4)], must_include = "x2") expect_features(instance$archive$best(ties_method = "n_features")[, list(x1, x2, x3, x4)], identical_to = "x2") @@ -253,7 +251,6 @@ test_that("best method works with batches and ties", { measures = measure ) - expect_features(instance$archive$best(batch = c(2, 3))[, list(x1, x2, x3, x4)], identical_to = "x1") expect_features(instance$archive$best(batch = c(1, 3), ties_method = "first")[, list(x1, x2, x3, x4)], identical_to = c("x1", "x4")) expect_features(instance$archive$best(batch = c(1, 2), ties_method = "random")[, list(x1, x2, x3, x4)], must_include = "x1") expect_features(instance$archive$best(batch = c(2, 3), ties_method = "n_features")[, list(x1, x2, x3, x4)], identical_to = "x1") From b07e7329f1b72cc07555b8c5ad4acb425179ac2e Mon Sep 17 00:00:00 2001 From: be-marc Date: Fri, 15 Dec 2023 18:04:22 +0100 Subject: [PATCH 06/21] test: remove duplicated test --- tests/testthat/test_ArchiveFSelect.R | 29 ---------------------------- 1 file changed, 29 deletions(-) diff --git a/tests/testthat/test_ArchiveFSelect.R b/tests/testthat/test_ArchiveFSelect.R index 6b7987b9..0822f5bf 100644 --- a/tests/testthat/test_ArchiveFSelect.R +++ b/tests/testthat/test_ArchiveFSelect.R @@ -140,35 +140,6 @@ test_that("ArchiveFSelect as.data.table function works", { expect_equal(tab$batch_nr, 1:10) }) -test_that("best method works with ties", { - - design = mlr3misc::rowwise_table( - ~x1, ~x2, ~x3, ~x4, - TRUE, FALSE, FALSE, TRUE, - TRUE, FALSE, FALSE, FALSE, - TRUE, TRUE, FALSE, FALSE - ) - - score_design = data.table( - score = c(0.2, 0.2, 0.1), - features = list(c("x1", "x4"), "x1", c("x1", "x2")) - ) - measure = msr("dummy", score_design = score_design, minimize = FALSE) - - instance = fselect( - fselector = fs("design_points", design = design), - task = TEST_MAKE_TSK(), - learner = lrn("regr.rpart"), - resampling = rsmp("cv", folds = 3), - measures = measure - ) - - instance$archive$best() - instance$archive$best(ties_method = "first") - instance$archive$best(ties_method = "random") - instance$archive$best(ties_method = "n_features") -}) - test_that("best method works with ties and maximization", { design = mlr3misc::rowwise_table( From e75c5199d49f48938b4872ee6384e612468c2f49 Mon Sep 17 00:00:00 2001 From: be-marc Date: Fri, 15 Dec 2023 18:18:02 +0100 Subject: [PATCH 07/21] feat: add to fsi --- R/fselect.R | 2 +- R/mlr_callbacks.R | 1 + R/sugar.R | 40 +++++++++++++++++-- man/fsi.Rd | 12 +++++- man/mlr3fselect.one_se_rule.Rd | 1 + tests/testthat/test_ArchiveFSelect.R | 60 ++++++++++++++++++++++++++-- tests/testthat/test_fselect.R | 2 +- 7 files changed, 109 insertions(+), 9 deletions(-) diff --git a/R/fselect.R b/R/fselect.R index c2474841..31164a84 100644 --- a/R/fselect.R +++ b/R/fselect.R @@ -91,7 +91,7 @@ fselect = function( check_values = check_values, callbacks = callbacks, ties_method = ties_method) - } else { + } else { FSelectInstanceMultiCrit$new( task = task, learner = learner, diff --git a/R/mlr_callbacks.R b/R/mlr_callbacks.R index 07217bcb..320a1e24 100644 --- a/R/mlr_callbacks.R +++ b/R/mlr_callbacks.R @@ -116,6 +116,7 @@ load_callback_svm_rfe = function() { #' #' @description #' Selects the smallest feature set within one standard error of the best as the result. +#' If there are multiple feature sets with the same performance and number of features, the first one is selected. #' #' @examples #' clbk("mlr3fselect.one_se_rule") diff --git a/R/sugar.R b/R/sugar.R index 916cdabb..a4193b54 100644 --- a/R/sugar.R +++ b/R/sugar.R @@ -46,13 +46,47 @@ fss = function(.keys, ...) { #' @template param_store_models #' @template param_check_values #' @template param_callbacks +#' @template param_ties_method #' #' @inheritSection FSelectInstanceSingleCrit Resources #' @inheritSection FSelectInstanceSingleCrit Default Measures #' #' @export #' @inherit FSelectInstanceSingleCrit examples -fsi = function(task, learner, resampling, measures = NULL, terminator, store_benchmark_result = TRUE, store_models = FALSE, check_values = FALSE, callbacks = list()) { - FSelectInstance = if (!is.list(measures)) FSelectInstanceSingleCrit else FSelectInstanceMultiCrit - FSelectInstance$new(task, learner, resampling, measures, terminator, store_benchmark_result, store_models, check_values, callbacks) +fsi = function( + task, + learner, + resampling, + measures = NULL, + terminator, + store_benchmark_result = TRUE, + store_models = FALSE, + check_values = FALSE, + callbacks = list(), + ties_method = "n_features" + ) { + if (!is.list(measures)) { + FSelectInstanceSingleCrit$new( + task = task, + learner = learner, + resampling = resampling, + measure = measures, + terminator = terminator, + store_benchmark_result = store_benchmark_result, + store_models = store_models, + check_values = check_values, + callbacks = callbacks, + ties_method = ties_method) + } else { + FSelectInstanceMultiCrit$new( + task = task, + learner = learner, + resampling = resampling, + measures = measures, + terminator = terminator, + store_benchmark_result = store_benchmark_result, + store_models = store_models, + check_values = check_values, + callbacks = callbacks) + } } diff --git a/man/fsi.Rd b/man/fsi.Rd index 6ef6990c..b38ff294 100644 --- a/man/fsi.Rd +++ b/man/fsi.Rd @@ -13,7 +13,8 @@ fsi( store_benchmark_result = TRUE, store_models = FALSE, check_values = FALSE, - callbacks = list() + callbacks = list(), + ties_method = "n_features" ) } \arguments{ @@ -47,6 +48,15 @@ validity?} \item{callbacks}{(list of \link{CallbackFSelect})\cr List of callbacks.} + +\item{ties_method}{(\code{character(1)})\cr +The method to break ties when selecting sets while optimizing and when selecting the best set. +Can be one of \code{n_features}, \code{first}, \code{random}. +The option \code{n_features} (default) selects the feature set with the least features. +If there are multiple best feature sets with the same number of features, the first one is selected. +The \code{first} method returns the first added best feature set. +The \code{random} method returns a random feature set from the best feature sets. +Ignored if multiple measures are used.} } \description{ Function to construct a \link{FSelectInstanceSingleCrit} or \link{FSelectInstanceMultiCrit}. diff --git a/man/mlr3fselect.one_se_rule.Rd b/man/mlr3fselect.one_se_rule.Rd index cde3ab3f..2be08be4 100644 --- a/man/mlr3fselect.one_se_rule.Rd +++ b/man/mlr3fselect.one_se_rule.Rd @@ -5,6 +5,7 @@ \title{One Standard Error Rule Callback} \description{ Selects the smallest feature set within one standard error of the best as the result. +If there are multiple feature sets with the same performance and number of features, the first one is selected. } \examples{ clbk("mlr3fselect.one_se_rule") diff --git a/tests/testthat/test_ArchiveFSelect.R b/tests/testthat/test_ArchiveFSelect.R index 0822f5bf..ff041cad 100644 --- a/tests/testthat/test_ArchiveFSelect.R +++ b/tests/testthat/test_ArchiveFSelect.R @@ -140,7 +140,61 @@ test_that("ArchiveFSelect as.data.table function works", { expect_equal(tab$batch_nr, 1:10) }) -test_that("best method works with ties and maximization", { +test_that("global ties method works", { + design = mlr3misc::rowwise_table( + ~x1, ~x2, ~x3, ~x4, + FALSE, TRUE, FALSE, TRUE, + TRUE, FALSE, FALSE, TRUE, + TRUE, FALSE, FALSE, FALSE, + FALSE, TRUE, FALSE, FALSE + ) + + score_design = data.table( + score = c(0.1, 0.2, 0.2, 0.1), + features = list(c("x2", "x4"), c("x1", "x4"), "x1", c("x1", "x2")) + ) + measure = msr("dummy", score_design = score_design, minimize = FALSE) + + # n_features + instance = fselect( + fselector = fs("design_points", design = design), + task = TEST_MAKE_TSK(), + learner = lrn("regr.rpart"), + resampling = rsmp("cv", folds = 3), + measures = measure, + ties_method = "n_features" + ) + + expect_equal(instance$result_feature_set, "x1") + + # first + instance$clear() + instance = fselect( + fselector = fs("design_points", design = design), + task = TEST_MAKE_TSK(), + learner = lrn("regr.rpart"), + resampling = rsmp("cv", folds = 3), + measures = measure, + ties_method = "first" + ) + + expect_equal(instance$result_feature_set, c("x1", "x4")) + + # random + instance$clear() + instance = fselect( + fselector = fs("design_points", design = design), + task = TEST_MAKE_TSK(), + learner = lrn("regr.rpart"), + resampling = rsmp("cv", folds = 3), + measures = measure, + ties_method = "random" + ) + + expect_names(instance$result_feature_set, must.include = "x1") +}) + +test_that("local ties method works when maximize measure", { design = mlr3misc::rowwise_table( ~x1, ~x2, ~x3, ~x4, @@ -169,7 +223,7 @@ test_that("best method works with ties and maximization", { expect_features(instance$archive$best(ties_method = "n_features")[, list(x1, x2, x3, x4)], identical_to = "x1") }) -test_that("best method works with ties and minimization", { +test_that("local ties method works when minimize measure", { design = mlr3misc::rowwise_table( ~x1, ~x2, ~x3, ~x4, @@ -198,7 +252,7 @@ test_that("best method works with ties and minimization", { expect_features(instance$archive$best(ties_method = "n_features")[, list(x1, x2, x3, x4)], identical_to = "x2") }) -test_that("best method works with batches and ties", { +test_that("local ties method works with batches", { design = mlr3misc::rowwise_table( ~x1, ~x2, ~x3, ~x4, diff --git a/tests/testthat/test_fselect.R b/tests/testthat/test_fselect.R index 82ed5ac1..6e83c196 100644 --- a/tests/testthat/test_fselect.R +++ b/tests/testthat/test_fselect.R @@ -38,7 +38,7 @@ test_that("fselect interface is equal to FSelectInstanceSingleCrit", { test_that("fselect interface is equal to FSelectInstanceMultiCrit", { fselect_args = formalArgs(fselect) - fselect_args = fselect_args[fselect_args != "fselector"] + fselect_args = fselect_args[fselect_args %nin% c("fselector", "ties_method")] instance_args = formalArgs(FSelectInstanceMultiCrit$public_methods$initialize) instance_args = c(instance_args, "term_evals", "term_time") From b35e5af878ec3c99bedcc203711c4763897a8bfb Mon Sep 17 00:00:00 2001 From: be-marc Date: Fri, 15 Dec 2023 18:21:45 +0100 Subject: [PATCH 08/21] test: formal args --- tests/testthat/test_FSelectInstanceSingleCrit.R | 4 ++-- tests/testthat/test_fsi.R | 6 +++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test_FSelectInstanceSingleCrit.R b/tests/testthat/test_FSelectInstanceSingleCrit.R index c3efc622..a9401f8c 100644 --- a/tests/testthat/test_FSelectInstanceSingleCrit.R +++ b/tests/testthat/test_FSelectInstanceSingleCrit.R @@ -79,7 +79,7 @@ test_that("always include variable works", { task = task, learner = learner, resampling = resampling, - measure = msr("classif.ce"), + measures = msr("classif.ce"), terminator = trm("evals", n_evals = 100), store_models = TRUE ) @@ -105,7 +105,7 @@ test_that("always include variables works", { task = task, learner = learner, resampling = resampling, - measure = msr("classif.ce"), + measures = msr("classif.ce"), terminator = trm("evals", n_evals = 100), store_models = TRUE ) diff --git a/tests/testthat/test_fsi.R b/tests/testthat/test_fsi.R index 5b5d2ab0..a0d9a85d 100644 --- a/tests/testthat/test_fsi.R +++ b/tests/testthat/test_fsi.R @@ -42,7 +42,11 @@ test_that("fsi and FSelectInstanceSingleCrit are equal", { }) test_that("fsi and FSelectInstanceMultiCrit are equal", { - expect_equal(formalArgs(fsi), formalArgs(FSelectInstanceMultiCrit$public_methods$initialize)) + fsi_args = formalArgs(fsi) + fsi_args[fsi_args == "measures"] = "measure" + fsi_args = fsi_args[fsi_args != "ties_method"] + + expect_equal(fsi_args, formalArgs(FSelectInstanceMultiCrit$public_methods$initialize)) task = tsk("pima") learner = lrn("classif.rpart") From 915afd439d2e28fa6a19d3d4fdc0258283fb53cf Mon Sep 17 00:00:00 2001 From: be-marc Date: Fri, 15 Dec 2023 18:24:19 +0100 Subject: [PATCH 09/21] docs: global default --- R/ArchiveFSelect.R | 4 ++-- man/ArchiveFSelect.Rd | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/ArchiveFSelect.R b/R/ArchiveFSelect.R index 01599d56..bc51a803 100644 --- a/R/ArchiveFSelect.R +++ b/R/ArchiveFSelect.R @@ -159,9 +159,9 @@ ArchiveFSelect = R6Class("ArchiveFSelect", #' Default is all batches. #' @param ties_method (`character(1)`)\cr #' Method to handle ties. - #' If `NULL` (default), the global ties method set during initialization is used. #' Can be one of `n_features`, `first`, `random`. - #' The option `n_features` (default) selects the feature set with the least features. + #' If `NULL` (default), the global ties method set during initialization is used. + #' The default global ties method is `n_features` which selects the feature set with the least features. #' If there are multiple best feature sets with the same number of features, the first one is selected. #' The `first` method returns the first added best feature set. #' The `random` method returns a random feature set from the best feature sets. diff --git a/man/ArchiveFSelect.Rd b/man/ArchiveFSelect.Rd index 859a55b6..485fd1c3 100644 --- a/man/ArchiveFSelect.Rd +++ b/man/ArchiveFSelect.Rd @@ -271,9 +271,9 @@ Default is all batches.} \item{\code{ties_method}}{(\code{character(1)})\cr Method to handle ties. -If \code{NULL} (default), the global ties method set during initialization is used. Can be one of \code{n_features}, \code{first}, \code{random}. -The option \code{n_features} (default) selects the feature set with the least features. +If \code{NULL} (default), the global ties method set during initialization is used. +The default global ties method is \code{n_features} which selects the feature set with the least features. If there are multiple best feature sets with the same number of features, the first one is selected. The \code{first} method returns the first added best feature set. The \code{random} method returns a random feature set from the best feature sets.} From dbfe950f21c3659335899b338162a1deea024b24 Mon Sep 17 00:00:00 2001 From: be-marc Date: Fri, 15 Dec 2023 18:29:49 +0100 Subject: [PATCH 10/21] test: fsi --- tests/testthat/test_fsi.R | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/testthat/test_fsi.R b/tests/testthat/test_fsi.R index a0d9a85d..1c53b18a 100644 --- a/tests/testthat/test_fsi.R +++ b/tests/testthat/test_fsi.R @@ -43,7 +43,6 @@ test_that("fsi and FSelectInstanceSingleCrit are equal", { test_that("fsi and FSelectInstanceMultiCrit are equal", { fsi_args = formalArgs(fsi) - fsi_args[fsi_args == "measures"] = "measure" fsi_args = fsi_args[fsi_args != "ties_method"] expect_equal(fsi_args, formalArgs(FSelectInstanceMultiCrit$public_methods$initialize)) From 4d85646fb97f3cbe11b9bf9d2b2465011459a632 Mon Sep 17 00:00:00 2001 From: be-marc Date: Fri, 15 Dec 2023 21:58:46 +0100 Subject: [PATCH 11/21] chore: update news --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index f8882873..50ba7714 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # mlr3fselect (development version) +* feat: Add `ties_method` options `"n_features"`, `"first"` and `"random"` to `ArchiveFSelect$best()`. +* refactor: Optimize runtime of `ArchiveFSelect$best()` method. * feat: Add importance scores to result of `FSelectorRFE`. * feat: Add number of features to `as.data.table.ArchiveFSelect()`. * feat: Features can be always included with the `always_include` column role. From ca61f35102a7f35f4b1987d4b08bfef30fb7aa7f Mon Sep 17 00:00:00 2001 From: be-marc Date: Sat, 16 Dec 2023 17:47:44 +0100 Subject: [PATCH 12/21] fix: ties method to auto_fselector --- R/auto_fselector.R | 20 ++++++++++++++++++-- man/auto_fselector.Rd | 12 +++++++++++- 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/R/auto_fselector.R b/R/auto_fselector.R index d1713f9b..27bba10d 100644 --- a/R/auto_fselector.R +++ b/R/auto_fselector.R @@ -19,10 +19,25 @@ #' @template param_store_models #' @template param_check_values #' @template param_callbacks +#' @template param_ties_method #' #' @export #' @inherit AutoFSelector examples -auto_fselector = function(fselector, learner, resampling, measure = NULL, term_evals = NULL, term_time = NULL, terminator = NULL, store_fselect_instance = TRUE, store_benchmark_result = TRUE, store_models = FALSE, check_values = FALSE, callbacks = list()) { +auto_fselector = function( + fselector, + learner, + resampling, + measure = NULL, + term_evals = NULL, + term_time = NULL, + terminator = NULL, + store_fselect_instance = TRUE, + store_benchmark_result = TRUE, + store_models = FALSE, + check_values = FALSE, + callbacks = list(), + ties_method = "n_features" + ) { terminator = terminator %??% terminator_selection(term_evals, term_time) AutoFSelector$new( @@ -35,5 +50,6 @@ auto_fselector = function(fselector, learner, resampling, measure = NULL, term_e store_benchmark_result = store_benchmark_result, store_models = store_models, check_values = check_values, - callbacks = callbacks) + callbacks = callbacks, + ties_method = ties_method) } diff --git a/man/auto_fselector.Rd b/man/auto_fselector.Rd index 6737995e..039529d7 100644 --- a/man/auto_fselector.Rd +++ b/man/auto_fselector.Rd @@ -16,7 +16,8 @@ auto_fselector( store_benchmark_result = TRUE, store_models = FALSE, check_values = FALSE, - callbacks = list() + callbacks = list(), + ties_method = "n_features" ) } \arguments{ @@ -61,6 +62,15 @@ validity?} \item{callbacks}{(list of \link{CallbackFSelect})\cr List of callbacks.} + +\item{ties_method}{(\code{character(1)})\cr +The method to break ties when selecting sets while optimizing and when selecting the best set. +Can be one of \code{n_features}, \code{first}, \code{random}. +The option \code{n_features} (default) selects the feature set with the least features. +If there are multiple best feature sets with the same number of features, the first one is selected. +The \code{first} method returns the first added best feature set. +The \code{random} method returns a random feature set from the best feature sets. +Ignored if multiple measures are used.} } \value{ \link{AutoFSelector}. From e7dad77277ab7dd4bc760c589940167d0e8ae254 Mon Sep 17 00:00:00 2001 From: be-marc Date: Sat, 16 Dec 2023 18:00:36 +0100 Subject: [PATCH 13/21] refactor: rename to least features and remove first option --- DESCRIPTION | 2 +- R/ArchiveFSelect.R | 23 ++++++++--------------- R/AutoFSelector.R | 2 +- man-roxygen/param_ties_method.R | 7 +++---- man/ArchiveFSelect.Rd | 13 +++++-------- man/AutoFSelector.Rd | 7 +++---- man/FSelectInstanceSingleCrit.Rd | 7 +++---- man/auto_fselector.Rd | 7 +++---- man/fselect.Rd | 7 +++---- man/fsi.Rd | 7 +++---- 10 files changed, 33 insertions(+), 49 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 39d44575..ffd32745 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -45,7 +45,7 @@ Encoding: UTF-8 Language: en-US NeedsCompilation: no Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.2.3.9000 Collate: 'ArchiveFSelect.R' 'AutoFSelector.R' diff --git a/R/ArchiveFSelect.R b/R/ArchiveFSelect.R index bc51a803..578af5f3 100644 --- a/R/ArchiveFSelect.R +++ b/R/ArchiveFSelect.R @@ -159,36 +159,29 @@ ArchiveFSelect = R6Class("ArchiveFSelect", #' Default is all batches. #' @param ties_method (`character(1)`)\cr #' Method to handle ties. - #' Can be one of `n_features`, `first`, `random`. #' If `NULL` (default), the global ties method set during initialization is used. - #' The default global ties method is `n_features` which selects the feature set with the least features. - #' If there are multiple best feature sets with the same number of features, the first one is selected. - #' The `first` method returns the first added best feature set. + #' The default global ties method is `least_features` which selects the feature set with the least features. + #' If there are multiple best feature sets with the same number of features, one is selected randomly. #' The `random` method returns a random feature set from the best feature sets. # #' @return [data.table::data.table()] best = function(batch = NULL, ties_method = NULL) { - assert_choice(ties_method, c("first", "random", "n_features"), null.ok = TRUE) - if (is.null(ties_method)) ties_method = self$ties_method + ties_method = assert_choice(ties_method, c("least_features", "random"), null.ok = TRUE) %??% self$ties_method assert_subset(batch, seq_len(self$n_batch)) if (self$n_batch == 0L) return(data.table()) - if (is.null(batch)) { - tab = self$data - } else { - tab = self$data[list(batch), , on = "batch_nr"] - } + tab = if (is.null(batch)) self$data else self$data[list(batch), , on = "batch_nr"] if (self$codomain$target_length == 1L) { y = tab[[self$cols_y]] * -self$codomain$maximization_to_minimization - if (ties_method == "n_features") { + if (ties_method == "least_features") { ii = which(y == max(y)) tab = tab[ii] - ii = which.min(rowSums(tab[, self$cols_x, with = FALSE])) + ii = which_min(rowSums(tab[, self$cols_x, with = FALSE]), ties_method = "random") tab[ii] } else { - ii = which_max(y, ties_method = ties_method) + ii = which_max(y, ties_method = "random") tab[ii] } } else { @@ -205,7 +198,7 @@ ArchiveFSelect = R6Class("ArchiveFSelect", #' Method to handle ties. ties_method = function(rhs) { if (!missing(rhs)) { - assert_choice(rhs, c("first", "random", "n_features")) + assert_choice(rhs, c("least_features", "random")) private$.ties_method = rhs } else { private$.ties_method diff --git a/R/AutoFSelector.R b/R/AutoFSelector.R index 40f9eef3..f184e632 100644 --- a/R/AutoFSelector.R +++ b/R/AutoFSelector.R @@ -138,7 +138,7 @@ AutoFSelector = R6Class("AutoFSelector", ia$check_values = assert_flag(check_values) ia$callbacks = assert_callbacks(as_callbacks(callbacks)) - ia$ties_method = assert_choice(ties_method, c("n_features", "first", "random")) + ia$ties_method = assert_choice(ties_method, c("least_features", "random")) self$instance_args = ia super$initialize( diff --git a/man-roxygen/param_ties_method.R b/man-roxygen/param_ties_method.R index e98c9bf0..6cf5b978 100644 --- a/man-roxygen/param_ties_method.R +++ b/man-roxygen/param_ties_method.R @@ -1,8 +1,7 @@ #' @param ties_method (`character(1)`)\cr #' The method to break ties when selecting sets while optimizing and when selecting the best set. -#' Can be one of `n_features`, `first`, `random`. -#' The option `n_features` (default) selects the feature set with the least features. -#' If there are multiple best feature sets with the same number of features, the first one is selected. -#' The `first` method returns the first added best feature set. +#' Can be `"least_features"` or `"random"`. +#' The option `"least_features"` (default) selects the feature set with the least features. +#' If there are multiple best feature sets with the same number of features, one is selected randomly. #' The `random` method returns a random feature set from the best feature sets. #' Ignored if multiple measures are used. diff --git a/man/ArchiveFSelect.Rd b/man/ArchiveFSelect.Rd index 485fd1c3..4313d760 100644 --- a/man/ArchiveFSelect.Rd +++ b/man/ArchiveFSelect.Rd @@ -135,10 +135,9 @@ If \code{TRUE} (default), hyperparameter configurations are check for validity.} \item{\code{ties_method}}{(\code{character(1)})\cr The method to break ties when selecting sets while optimizing and when selecting the best set. -Can be one of \code{n_features}, \code{first}, \code{random}. -The option \code{n_features} (default) selects the feature set with the least features. -If there are multiple best feature sets with the same number of features, the first one is selected. -The \code{first} method returns the first added best feature set. +Can be \code{"least_features"} or \code{"random"}. +The option \code{"least_features"} (default) selects the feature set with the least features. +If there are multiple best feature sets with the same number of features, one is selected randomly. The \code{random} method returns a random feature set from the best feature sets. Ignored if multiple measures are used.} } @@ -271,11 +270,9 @@ Default is all batches.} \item{\code{ties_method}}{(\code{character(1)})\cr Method to handle ties. -Can be one of \code{n_features}, \code{first}, \code{random}. If \code{NULL} (default), the global ties method set during initialization is used. -The default global ties method is \code{n_features} which selects the feature set with the least features. -If there are multiple best feature sets with the same number of features, the first one is selected. -The \code{first} method returns the first added best feature set. +The default global ties method is \code{least_features} which selects the feature set with the least features. +If there are multiple best feature sets with the same number of features, one is selected randomly. The \code{random} method returns a random feature set from the best feature sets.} } \if{html}{\out{}} diff --git a/man/AutoFSelector.Rd b/man/AutoFSelector.Rd index a8ec1ac9..5da4b3b2 100644 --- a/man/AutoFSelector.Rd +++ b/man/AutoFSelector.Rd @@ -218,10 +218,9 @@ List of callbacks.} \item{\code{ties_method}}{(\code{character(1)})\cr The method to break ties when selecting sets while optimizing and when selecting the best set. -Can be one of \code{n_features}, \code{first}, \code{random}. -The option \code{n_features} (default) selects the feature set with the least features. -If there are multiple best feature sets with the same number of features, the first one is selected. -The \code{first} method returns the first added best feature set. +Can be \code{"least_features"} or \code{"random"}. +The option \code{"least_features"} (default) selects the feature set with the least features. +If there are multiple best feature sets with the same number of features, one is selected randomly. The \code{random} method returns a random feature set from the best feature sets. Ignored if multiple measures are used.} } diff --git a/man/FSelectInstanceSingleCrit.Rd b/man/FSelectInstanceSingleCrit.Rd index 32987e5c..fa44e995 100644 --- a/man/FSelectInstanceSingleCrit.Rd +++ b/man/FSelectInstanceSingleCrit.Rd @@ -177,10 +177,9 @@ List of callbacks.} \item{\code{ties_method}}{(\code{character(1)})\cr The method to break ties when selecting sets while optimizing and when selecting the best set. -Can be one of \code{n_features}, \code{first}, \code{random}. -The option \code{n_features} (default) selects the feature set with the least features. -If there are multiple best feature sets with the same number of features, the first one is selected. -The \code{first} method returns the first added best feature set. +Can be \code{"least_features"} or \code{"random"}. +The option \code{"least_features"} (default) selects the feature set with the least features. +If there are multiple best feature sets with the same number of features, one is selected randomly. The \code{random} method returns a random feature set from the best feature sets. Ignored if multiple measures are used.} } diff --git a/man/auto_fselector.Rd b/man/auto_fselector.Rd index 039529d7..fc8e088e 100644 --- a/man/auto_fselector.Rd +++ b/man/auto_fselector.Rd @@ -65,10 +65,9 @@ List of callbacks.} \item{ties_method}{(\code{character(1)})\cr The method to break ties when selecting sets while optimizing and when selecting the best set. -Can be one of \code{n_features}, \code{first}, \code{random}. -The option \code{n_features} (default) selects the feature set with the least features. -If there are multiple best feature sets with the same number of features, the first one is selected. -The \code{first} method returns the first added best feature set. +Can be \code{"least_features"} or \code{"random"}. +The option \code{"least_features"} (default) selects the feature set with the least features. +If there are multiple best feature sets with the same number of features, one is selected randomly. The \code{random} method returns a random feature set from the best feature sets. Ignored if multiple measures are used.} } diff --git a/man/fselect.Rd b/man/fselect.Rd index 65d7a758..bc7c4078 100644 --- a/man/fselect.Rd +++ b/man/fselect.Rd @@ -65,10 +65,9 @@ List of callbacks.} \item{ties_method}{(\code{character(1)})\cr The method to break ties when selecting sets while optimizing and when selecting the best set. -Can be one of \code{n_features}, \code{first}, \code{random}. -The option \code{n_features} (default) selects the feature set with the least features. -If there are multiple best feature sets with the same number of features, the first one is selected. -The \code{first} method returns the first added best feature set. +Can be \code{"least_features"} or \code{"random"}. +The option \code{"least_features"} (default) selects the feature set with the least features. +If there are multiple best feature sets with the same number of features, one is selected randomly. The \code{random} method returns a random feature set from the best feature sets. Ignored if multiple measures are used.} } diff --git a/man/fsi.Rd b/man/fsi.Rd index b38ff294..3a076389 100644 --- a/man/fsi.Rd +++ b/man/fsi.Rd @@ -51,10 +51,9 @@ List of callbacks.} \item{ties_method}{(\code{character(1)})\cr The method to break ties when selecting sets while optimizing and when selecting the best set. -Can be one of \code{n_features}, \code{first}, \code{random}. -The option \code{n_features} (default) selects the feature set with the least features. -If there are multiple best feature sets with the same number of features, the first one is selected. -The \code{first} method returns the first added best feature set. +Can be \code{"least_features"} or \code{"random"}. +The option \code{"least_features"} (default) selects the feature set with the least features. +If there are multiple best feature sets with the same number of features, one is selected randomly. The \code{random} method returns a random feature set from the best feature sets. Ignored if multiple measures are used.} } From 15b5dc89b1cbf677b38e99ae443e84945aaf91fb Mon Sep 17 00:00:00 2001 From: be-marc Date: Sat, 16 Dec 2023 18:03:19 +0100 Subject: [PATCH 14/21] chore: whitespace --- R/ArchiveFSelect.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/ArchiveFSelect.R b/R/ArchiveFSelect.R index 578af5f3..55729e7a 100644 --- a/R/ArchiveFSelect.R +++ b/R/ArchiveFSelect.R @@ -170,7 +170,7 @@ ArchiveFSelect = R6Class("ArchiveFSelect", assert_subset(batch, seq_len(self$n_batch)) if (self$n_batch == 0L) return(data.table()) - tab = if (is.null(batch)) self$data else self$data[list(batch), , on = "batch_nr"] + tab = if (is.null(batch)) self$data else self$data[list(batch), , on = "batch_nr"] if (self$codomain$target_length == 1L) { y = tab[[self$cols_y]] * -self$codomain$maximization_to_minimization From 7c3dd63465e88601d94bd2ed122c7b9a2b072660 Mon Sep 17 00:00:00 2001 From: be-marc Date: Sat, 16 Dec 2023 18:06:05 +0100 Subject: [PATCH 15/21] fix: default --- R/AutoFSelector.R | 2 +- R/FSelectInstanceSingleCrit.R | 2 +- R/fselect.R | 2 +- R/fselect_nested.R | 22 ++++++++++++++++++++-- R/sugar.R | 2 +- 5 files changed, 24 insertions(+), 6 deletions(-) diff --git a/R/AutoFSelector.R b/R/AutoFSelector.R index f184e632..906704ab 100644 --- a/R/AutoFSelector.R +++ b/R/AutoFSelector.R @@ -123,7 +123,7 @@ AutoFSelector = R6Class("AutoFSelector", store_models = FALSE, check_values = FALSE, callbacks = list(), - ties_method = "n_features" + ties_method = "least_features" ) { ia = list() self$fselector = assert_r6(fselector, "FSelector")$clone() diff --git a/R/FSelectInstanceSingleCrit.R b/R/FSelectInstanceSingleCrit.R index 476c1fd3..13cfa008 100644 --- a/R/FSelectInstanceSingleCrit.R +++ b/R/FSelectInstanceSingleCrit.R @@ -104,7 +104,7 @@ FSelectInstanceSingleCrit = R6Class("FSelectInstanceSingleCrit", store_models = FALSE, check_values = FALSE, callbacks = list(), - ties_method = "n_features" + ties_method = "least_features" ) { # initialized specialized fselect archive and objective archive = ArchiveFSelect$new( diff --git a/R/fselect.R b/R/fselect.R index 31164a84..ce2e8a0f 100644 --- a/R/fselect.R +++ b/R/fselect.R @@ -74,7 +74,7 @@ fselect = function( store_models = FALSE, check_values = FALSE, callbacks = list(), - ties_method = "n_features" + ties_method = "least_features" ) { assert_fselector(fselector) terminator = terminator %??% terminator_selection(term_evals, term_time) diff --git a/R/fselect_nested.R b/R/fselect_nested.R index dcf9895d..3f29f3b3 100644 --- a/R/fselect_nested.R +++ b/R/fselect_nested.R @@ -22,6 +22,7 @@ #' @template param_store_models #' @template param_check_values #' @template param_callbacks +#' @template param_ties_method #' #' @export #' @examples @@ -40,7 +41,23 @@ #' #' # Unbiased performance of the final model trained on the full data set #' rr$aggregate() -fselect_nested = function(fselector, task, learner, inner_resampling, outer_resampling, measure = NULL, term_evals = NULL, term_time = NULL, terminator = NULL, store_fselect_instance = TRUE, store_benchmark_result = TRUE, store_models = FALSE, check_values = FALSE, callbacks = list()) { +fselect_nested = function( + fselector, + task, + learner, + inner_resampling, + outer_resampling, + measure = NULL, + term_evals = NULL, + term_time = NULL, + terminator = NULL, + store_fselect_instance = TRUE, + store_benchmark_result = TRUE, + store_models = FALSE, + check_values = FALSE, + callbacks = list() + ties_method = "least_features" + ) { assert_task(task) assert_resampling(inner_resampling) assert_resampling(outer_resampling) @@ -56,7 +73,8 @@ fselect_nested = function(fselector, task, learner, inner_resampling, outer_resa store_benchmark_result = store_benchmark_result, store_models = store_models, check_values = check_values, - callbacks = callbacks) + callbacks = callbacks, + ties_method = ties_method) resample(task, afs, outer_resampling, store_models = TRUE) } diff --git a/R/sugar.R b/R/sugar.R index a4193b54..72e36353 100644 --- a/R/sugar.R +++ b/R/sugar.R @@ -63,7 +63,7 @@ fsi = function( store_models = FALSE, check_values = FALSE, callbacks = list(), - ties_method = "n_features" + ties_method = "least_features" ) { if (!is.list(measures)) { FSelectInstanceSingleCrit$new( From f4fcb46c23b08b54b674183db77a0b0e63e3b347 Mon Sep 17 00:00:00 2001 From: be-marc Date: Sat, 16 Dec 2023 18:07:42 +0100 Subject: [PATCH 16/21] fix: nested --- R/auto_fselector.R | 2 +- R/fselect_nested.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/auto_fselector.R b/R/auto_fselector.R index 27bba10d..918795ea 100644 --- a/R/auto_fselector.R +++ b/R/auto_fselector.R @@ -36,7 +36,7 @@ auto_fselector = function( store_models = FALSE, check_values = FALSE, callbacks = list(), - ties_method = "n_features" + ties_method = "least_features" ) { terminator = terminator %??% terminator_selection(term_evals, term_time) diff --git a/R/fselect_nested.R b/R/fselect_nested.R index 3f29f3b3..c5588077 100644 --- a/R/fselect_nested.R +++ b/R/fselect_nested.R @@ -55,7 +55,7 @@ fselect_nested = function( store_benchmark_result = TRUE, store_models = FALSE, check_values = FALSE, - callbacks = list() + callbacks = list(), ties_method = "least_features" ) { assert_task(task) From 48bb9f1318f8757b9bfbb4bc8a9694315a6b07e6 Mon Sep 17 00:00:00 2001 From: be-marc Date: Sat, 16 Dec 2023 18:09:14 +0100 Subject: [PATCH 17/21] tests: remove first --- tests/testthat/test_ArchiveFSelect.R | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/tests/testthat/test_ArchiveFSelect.R b/tests/testthat/test_ArchiveFSelect.R index ff041cad..cb36f4a2 100644 --- a/tests/testthat/test_ArchiveFSelect.R +++ b/tests/testthat/test_ArchiveFSelect.R @@ -162,24 +162,11 @@ test_that("global ties method works", { learner = lrn("regr.rpart"), resampling = rsmp("cv", folds = 3), measures = measure, - ties_method = "n_features" + ties_method = "least_features" ) expect_equal(instance$result_feature_set, "x1") - # first - instance$clear() - instance = fselect( - fselector = fs("design_points", design = design), - task = TEST_MAKE_TSK(), - learner = lrn("regr.rpart"), - resampling = rsmp("cv", folds = 3), - measures = measure, - ties_method = "first" - ) - - expect_equal(instance$result_feature_set, c("x1", "x4")) - # random instance$clear() instance = fselect( @@ -218,9 +205,8 @@ test_that("local ties method works when maximize measure", { measures = measure ) - expect_features(instance$archive$best(ties_method = "first")[, list(x1, x2, x3, x4)], identical_to = c("x1", "x4")) expect_features(instance$archive$best(ties_method = "random")[, list(x1, x2, x3, x4)], must_include = "x1") - expect_features(instance$archive$best(ties_method = "n_features")[, list(x1, x2, x3, x4)], identical_to = "x1") + expect_features(instance$archive$best(ties_method = "least_features")[, list(x1, x2, x3, x4)], identical_to = "x1") }) test_that("local ties method works when minimize measure", { @@ -247,9 +233,8 @@ test_that("local ties method works when minimize measure", { measures = measure ) - expect_features(instance$archive$best(ties_method = "first")[, list(x1, x2, x3, x4)], identical_to = "x2") expect_features(instance$archive$best(ties_method = "random")[, list(x1, x2, x3, x4)], must_include = "x2") - expect_features(instance$archive$best(ties_method = "n_features")[, list(x1, x2, x3, x4)], identical_to = "x2") + expect_features(instance$archive$best(ties_method = "least_features")[, list(x1, x2, x3, x4)], identical_to = "x2") }) test_that("local ties method works with batches", { @@ -276,7 +261,6 @@ test_that("local ties method works with batches", { measures = measure ) - expect_features(instance$archive$best(batch = c(1, 3), ties_method = "first")[, list(x1, x2, x3, x4)], identical_to = c("x1", "x4")) expect_features(instance$archive$best(batch = c(1, 2), ties_method = "random")[, list(x1, x2, x3, x4)], must_include = "x1") - expect_features(instance$archive$best(batch = c(2, 3), ties_method = "n_features")[, list(x1, x2, x3, x4)], identical_to = "x1") + expect_features(instance$archive$best(batch = c(2, 3), ties_method = "least_features")[, list(x1, x2, x3, x4)], identical_to = "x1") }) From ca44cc977dcc1246ebadadb1be5927bdedc1db72 Mon Sep 17 00:00:00 2001 From: be-marc Date: Sat, 16 Dec 2023 18:10:19 +0100 Subject: [PATCH 18/21] fix: archive --- R/ArchiveFSelect.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/ArchiveFSelect.R b/R/ArchiveFSelect.R index 55729e7a..bc9f7d36 100644 --- a/R/ArchiveFSelect.R +++ b/R/ArchiveFSelect.R @@ -78,7 +78,7 @@ ArchiveFSelect = R6Class("ArchiveFSelect", search_space, codomain, check_values = TRUE, - ties_method = "n_features" + ties_method = "least_features" ) { super$initialize(search_space, codomain, check_values) self$ties_method = ties_method From 7d815d45d5ab7af48609a537be8f6736a45f95d1 Mon Sep 17 00:00:00 2001 From: be-marc Date: Sat, 16 Dec 2023 18:29:00 +0100 Subject: [PATCH 19/21] docs: update --- DESCRIPTION | 2 +- man/ArchiveFSelect.Rd | 2 +- man/AutoFSelector.Rd | 2 +- man/FSelectInstanceSingleCrit.Rd | 2 +- man/auto_fselector.Rd | 2 +- man/fselect.Rd | 2 +- man/fselect_nested.Rd | 11 ++++++++++- man/fsi.Rd | 2 +- 8 files changed, 17 insertions(+), 8 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index ffd32745..39d44575 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -45,7 +45,7 @@ Encoding: UTF-8 Language: en-US NeedsCompilation: no Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3.9000 +RoxygenNote: 7.2.3 Collate: 'ArchiveFSelect.R' 'AutoFSelector.R' diff --git a/man/ArchiveFSelect.Rd b/man/ArchiveFSelect.Rd index 4313d760..b15739e0 100644 --- a/man/ArchiveFSelect.Rd +++ b/man/ArchiveFSelect.Rd @@ -115,7 +115,7 @@ Creates a new instance of this \link[R6:R6Class]{R6} class. search_space, codomain, check_values = TRUE, - ties_method = "n_features" + ties_method = "least_features" )}\if{html}{\out{}} } diff --git a/man/AutoFSelector.Rd b/man/AutoFSelector.Rd index 5da4b3b2..26485557 100644 --- a/man/AutoFSelector.Rd +++ b/man/AutoFSelector.Rd @@ -175,7 +175,7 @@ Creates a new instance of this \link[R6:R6Class]{R6} class. store_models = FALSE, check_values = FALSE, callbacks = list(), - ties_method = "n_features" + ties_method = "least_features" )}\if{html}{\out{}} } diff --git a/man/FSelectInstanceSingleCrit.Rd b/man/FSelectInstanceSingleCrit.Rd index fa44e995..280102fb 100644 --- a/man/FSelectInstanceSingleCrit.Rd +++ b/man/FSelectInstanceSingleCrit.Rd @@ -138,7 +138,7 @@ Creates a new instance of this \link[R6:R6Class]{R6} class. store_models = FALSE, check_values = FALSE, callbacks = list(), - ties_method = "n_features" + ties_method = "least_features" )}\if{html}{\out{}} } diff --git a/man/auto_fselector.Rd b/man/auto_fselector.Rd index fc8e088e..61ab526d 100644 --- a/man/auto_fselector.Rd +++ b/man/auto_fselector.Rd @@ -17,7 +17,7 @@ auto_fselector( store_models = FALSE, check_values = FALSE, callbacks = list(), - ties_method = "n_features" + ties_method = "least_features" ) } \arguments{ diff --git a/man/fselect.Rd b/man/fselect.Rd index bc7c4078..2431c713 100644 --- a/man/fselect.Rd +++ b/man/fselect.Rd @@ -17,7 +17,7 @@ fselect( store_models = FALSE, check_values = FALSE, callbacks = list(), - ties_method = "n_features" + ties_method = "least_features" ) } \arguments{ diff --git a/man/fselect_nested.Rd b/man/fselect_nested.Rd index 4d914a43..c939347d 100644 --- a/man/fselect_nested.Rd +++ b/man/fselect_nested.Rd @@ -18,7 +18,8 @@ fselect_nested( store_benchmark_result = TRUE, store_models = FALSE, check_values = FALSE, - callbacks = list() + callbacks = list(), + ties_method = "least_features" ) } \arguments{ @@ -67,6 +68,14 @@ validity?} \item{callbacks}{(list of \link{CallbackFSelect})\cr List of callbacks.} + +\item{ties_method}{(\code{character(1)})\cr +The method to break ties when selecting sets while optimizing and when selecting the best set. +Can be \code{"least_features"} or \code{"random"}. +The option \code{"least_features"} (default) selects the feature set with the least features. +If there are multiple best feature sets with the same number of features, one is selected randomly. +The \code{random} method returns a random feature set from the best feature sets. +Ignored if multiple measures are used.} } \value{ \link[mlr3:ResampleResult]{mlr3::ResampleResult} diff --git a/man/fsi.Rd b/man/fsi.Rd index 3a076389..99a53968 100644 --- a/man/fsi.Rd +++ b/man/fsi.Rd @@ -14,7 +14,7 @@ fsi( store_models = FALSE, check_values = FALSE, callbacks = list(), - ties_method = "n_features" + ties_method = "least_features" ) } \arguments{ From c9d51d922b71a70529309e9e74ffd207f6c620cd Mon Sep 17 00:00:00 2001 From: be-marc Date: Sat, 16 Dec 2023 18:36:01 +0100 Subject: [PATCH 20/21] chore: news --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 50ba7714..e3589ebb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,6 @@ # mlr3fselect (development version) -* feat: Add `ties_method` options `"n_features"`, `"first"` and `"random"` to `ArchiveFSelect$best()`. +* feat: Add `ties_method` options `"least_features"` and `"random"` to `ArchiveFSelect$best()`. * refactor: Optimize runtime of `ArchiveFSelect$best()` method. * feat: Add importance scores to result of `FSelectorRFE`. * feat: Add number of features to `as.data.table.ArchiveFSelect()`. From ae094562e6d4574d185eb1af5e09aaa82741c4e7 Mon Sep 17 00:00:00 2001 From: be-marc Date: Sat, 16 Dec 2023 18:37:01 +0100 Subject: [PATCH 21/21] fix: rfe --- R/FSelectorRFE.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/FSelectorRFE.R b/R/FSelectorRFE.R index 550c7d8e..138e1b33 100644 --- a/R/FSelectorRFE.R +++ b/R/FSelectorRFE.R @@ -104,8 +104,7 @@ FSelectorRFE = R6Class("FSelectorRFE", feature_number = p_int(lower = 1), subset_sizes = p_uty(), recursive = p_lgl(default = TRUE), - aggregation = p_fct(c("mean", "rank"), default = "rank"), - ties_method = p_fct(c("first", "random", "n_features"), default = "first") + aggregation = p_fct(c("mean", "rank"), default = "rank") ) ps$values = list(recursive = TRUE, aggregation = "rank")