From da4d63207084ce01bc9a56ed23c713af414432d6 Mon Sep 17 00:00:00 2001 From: Bernd Bischl Date: Sat, 24 Aug 2024 12:32:25 +0200 Subject: [PATCH] ... (#1118) --- NEWS.md | 1 + R/LearnerClassifDebug.R | 32 +++++++++++++++++-- R/LearnerRegrDebug.R | 21 ++++++++++++ man/Measure.Rd | 12 +------ man/MeasureClassif.Rd | 12 +------ man/MeasureRegr.Rd | 12 +------ man/MeasureSimilarity.Rd | 12 +------ man/mlr_learners_classif.debug.Rd | 28 ++++++++++++++++ man/mlr_learners_regr.debug.Rd | 28 ++++++++++++++++ man/mlr_measures_regr.pinball.Rd | 6 ++-- .../test_mlr_learners_classif_debug.R | 10 ++++++ tests/testthat/test_mlr_learners_regr_debug.R | 25 +++++++++++++++ 12 files changed, 151 insertions(+), 48 deletions(-) create mode 100644 tests/testthat/test_mlr_learners_regr_debug.R diff --git a/NEWS.md b/NEWS.md index ef75c8717..7ecd4feac 100644 --- a/NEWS.md +++ b/NEWS.md @@ -23,6 +23,7 @@ * feat: Added new measure `mu_auc`. * feat: Add option to calculate the mean of the true values on the train set in `msr("regr.rsq")`. * feat: Default fallback learner is set when encapsulation is activated. +* feat: Learners classif.debug and regr.debug have new methods `$importance()` and `$selected_features()` for testing, also in downstream packages # mlr3 0.20.2 diff --git a/R/LearnerClassifDebug.R b/R/LearnerClassifDebug.R index f22a0dd24..908bb7965 100644 --- a/R/LearnerClassifDebug.R +++ b/R/LearnerClassifDebug.R @@ -105,6 +105,27 @@ LearnerClassifDebug = R6Class("LearnerClassifDebug", inherit = LearnerClassif, #' Additional arguments passed to [`unmarshal_model()`]. unmarshal = function(...) { learner_unmarshal(.learner = self, ...) + }, + + #' @description + #' Returns 0 for each feature seen in training. + #' @return Named `numeric()`. + importance = function() { + if (is.null(self$model)) { + stopf("No model stored") + } + fns = self$state$feature_names + set_names(rep(0, length(fns)), fns) + }, + + #' @description + #' Always returns character(0). + #' @return `character()`. + selected_features = function() { + if (is.null(self$model)) { + stopf("No model stored") + } + character(0) } ), active = list( @@ -169,8 +190,15 @@ LearnerClassifDebug = R6Class("LearnerClassifDebug", inherit = LearnerClassif, stopf("Early stopping is only possible when a validation task is present.") } - model = list(response = as.character(sample(task$truth(), 1L)), pid = Sys.getpid(), id = UUIDgenerate(), - random_number = sample(100000, 1), iter = if (isTRUE(pv$early_stopping)) sample(pv$iter %??% 1L, 1L) else pv$iter %??% 1L + model = list( + response = as.character(sample(task$truth(), 1L)), + pid = Sys.getpid(), + id = UUIDgenerate(), + random_number = sample(100000, 1), + iter = if (isTRUE(pv$early_stopping)) + sample(pv$iter %??% 1L, 1L) + else + pv$iter %??% 1L ) if (!is.null(valid_truth)) { diff --git a/R/LearnerRegrDebug.R b/R/LearnerRegrDebug.R index 4f7ea2672..f9257282c 100644 --- a/R/LearnerRegrDebug.R +++ b/R/LearnerRegrDebug.R @@ -48,6 +48,27 @@ LearnerRegrDebug = R6Class("LearnerRegrDebug", inherit = LearnerRegr, man = "mlr3::mlr_learners_regr.debug", label = "Debug Learner for Regression" ) + }, + + #' @description + #' Returns 0 for each feature seen in training. + #' @return Named `numeric()`. + importance = function() { + if (is.null(self$model)) { + stopf("No model stored") + } + fns = self$state$feature_names + set_names(rep(0, length(fns)), fns) + }, + + #' @description + #' Always returns character(0). + #' @return `character()`. + selected_features = function() { + if (is.null(self$model)) { + stopf("No model stored") + } + character(0) } ), private = list( diff --git a/man/Measure.Rd b/man/Measure.Rd index d80dd4bb1..564c16555 100644 --- a/man/Measure.Rd +++ b/man/Measure.Rd @@ -264,22 +264,12 @@ Supported by \code{mlr3}: \itemize{ \item \code{"requires_task"} (requires the complete \link{Task}), \item \code{"requires_learner"} (requires the trained \link{Learner}), -<<<<<<< HEAD \item \code{"requires_model"} (requires the trained \link{Learner}, including the fitted model), \item \code{"requires_train_set"} (requires the training indices from the \link{Resampling}), \item \code{"na_score"} (the measure is expected to occasionally return \code{NA} or \code{NaN}), \item \code{"weights"} (support weighted scoring using sample weights from task, column role \code{weights_measure}), and \item \code{"primary_iters"} (the measure explictly handles resamplings that only use a subset of their iterations for the point estimate) -======= -\item \code{"requires_model"} (requires the trained \link{Learner}, including the fitted -model), -\item \code{"requires_train_set"} (requires the training indices from the \link{Resampling}), and -\item \code{"na_score"} (the measure is expected to occasionally return \code{NA} or \code{NaN}). -\item \code{"primary_iters"} (the measure explictly handles resamplings that only use a subset -of their iterations for the point estimate). -\item \code{"requires_no_prediction"} (No prediction is required; This usually means that the -measure extracts some information from the learner state.). ->>>>>>> main +\item \code{"requires_no_prediction"} (No prediction is required; This usually means that the measure extracts some information from the learner state.). }} \item{\code{predict_type}}{(\code{character(1)})\cr diff --git a/man/MeasureClassif.Rd b/man/MeasureClassif.Rd index ef59fa8e0..74b03382e 100644 --- a/man/MeasureClassif.Rd +++ b/man/MeasureClassif.Rd @@ -133,22 +133,12 @@ Supported by \code{mlr3}: \itemize{ \item \code{"requires_task"} (requires the complete \link{Task}), \item \code{"requires_learner"} (requires the trained \link{Learner}), -<<<<<<< HEAD \item \code{"requires_model"} (requires the trained \link{Learner}, including the fitted model), \item \code{"requires_train_set"} (requires the training indices from the \link{Resampling}), \item \code{"na_score"} (the measure is expected to occasionally return \code{NA} or \code{NaN}), \item \code{"weights"} (support weighted scoring using sample weights from task, column role \code{weights_measure}), and \item \code{"primary_iters"} (the measure explictly handles resamplings that only use a subset of their iterations for the point estimate) -======= -\item \code{"requires_model"} (requires the trained \link{Learner}, including the fitted -model), -\item \code{"requires_train_set"} (requires the training indices from the \link{Resampling}), and -\item \code{"na_score"} (the measure is expected to occasionally return \code{NA} or \code{NaN}). -\item \code{"primary_iters"} (the measure explictly handles resamplings that only use a subset -of their iterations for the point estimate). -\item \code{"requires_no_prediction"} (No prediction is required; This usually means that the -measure extracts some information from the learner state.). ->>>>>>> main +\item \code{"requires_no_prediction"} (No prediction is required; This usually means that the measure extracts some information from the learner state.). }} \item{\code{predict_type}}{(\code{character(1)})\cr diff --git a/man/MeasureRegr.Rd b/man/MeasureRegr.Rd index a38ba9755..b533e2b10 100644 --- a/man/MeasureRegr.Rd +++ b/man/MeasureRegr.Rd @@ -133,22 +133,12 @@ Supported by \code{mlr3}: \itemize{ \item \code{"requires_task"} (requires the complete \link{Task}), \item \code{"requires_learner"} (requires the trained \link{Learner}), -<<<<<<< HEAD \item \code{"requires_model"} (requires the trained \link{Learner}, including the fitted model), \item \code{"requires_train_set"} (requires the training indices from the \link{Resampling}), \item \code{"na_score"} (the measure is expected to occasionally return \code{NA} or \code{NaN}), \item \code{"weights"} (support weighted scoring using sample weights from task, column role \code{weights_measure}), and \item \code{"primary_iters"} (the measure explictly handles resamplings that only use a subset of their iterations for the point estimate) -======= -\item \code{"requires_model"} (requires the trained \link{Learner}, including the fitted -model), -\item \code{"requires_train_set"} (requires the training indices from the \link{Resampling}), and -\item \code{"na_score"} (the measure is expected to occasionally return \code{NA} or \code{NaN}). -\item \code{"primary_iters"} (the measure explictly handles resamplings that only use a subset -of their iterations for the point estimate). -\item \code{"requires_no_prediction"} (No prediction is required; This usually means that the -measure extracts some information from the learner state.). ->>>>>>> main +\item \code{"requires_no_prediction"} (No prediction is required; This usually means that the measure extracts some information from the learner state.). }} \item{\code{predict_type}}{(\code{character(1)})\cr diff --git a/man/MeasureSimilarity.Rd b/man/MeasureSimilarity.Rd index 4aafb7f2a..f55bed160 100644 --- a/man/MeasureSimilarity.Rd +++ b/man/MeasureSimilarity.Rd @@ -147,22 +147,12 @@ Supported by \code{mlr3}: \itemize{ \item \code{"requires_task"} (requires the complete \link{Task}), \item \code{"requires_learner"} (requires the trained \link{Learner}), -<<<<<<< HEAD \item \code{"requires_model"} (requires the trained \link{Learner}, including the fitted model), \item \code{"requires_train_set"} (requires the training indices from the \link{Resampling}), \item \code{"na_score"} (the measure is expected to occasionally return \code{NA} or \code{NaN}), \item \code{"weights"} (support weighted scoring using sample weights from task, column role \code{weights_measure}), and \item \code{"primary_iters"} (the measure explictly handles resamplings that only use a subset of their iterations for the point estimate) -======= -\item \code{"requires_model"} (requires the trained \link{Learner}, including the fitted -model), -\item \code{"requires_train_set"} (requires the training indices from the \link{Resampling}), and -\item \code{"na_score"} (the measure is expected to occasionally return \code{NA} or \code{NaN}). -\item \code{"primary_iters"} (the measure explictly handles resamplings that only use a subset -of their iterations for the point estimate). -\item \code{"requires_no_prediction"} (No prediction is required; This usually means that the -measure extracts some information from the learner state.). ->>>>>>> main +\item \code{"requires_no_prediction"} (No prediction is required; This usually means that the measure extracts some information from the learner state.). }} \item{\code{predict_type}}{(\code{character(1)})\cr diff --git a/man/mlr_learners_classif.debug.Rd b/man/mlr_learners_classif.debug.Rd index 2f7aa5cdb..cc2ad3509 100644 --- a/man/mlr_learners_classif.debug.Rd +++ b/man/mlr_learners_classif.debug.Rd @@ -144,6 +144,8 @@ a ratio in $(0, 1)$, \code{"test"}, or \code{"predefined"}.} \item \href{#method-LearnerClassifDebug-new}{\code{LearnerClassifDebug$new()}} \item \href{#method-LearnerClassifDebug-marshal}{\code{LearnerClassifDebug$marshal()}} \item \href{#method-LearnerClassifDebug-unmarshal}{\code{LearnerClassifDebug$unmarshal()}} +\item \href{#method-LearnerClassifDebug-importance}{\code{LearnerClassifDebug$importance()}} +\item \href{#method-LearnerClassifDebug-selected_features}{\code{LearnerClassifDebug$selected_features()}} \item \href{#method-LearnerClassifDebug-clone}{\code{LearnerClassifDebug$clone()}} } } @@ -208,6 +210,32 @@ Additional arguments passed to \code{\link[=unmarshal_model]{unmarshal_model()}} } } \if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-LearnerClassifDebug-importance}{}}} +\subsection{Method \code{importance()}}{ +Returns 0 for each feature seen in training. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{LearnerClassifDebug$importance()}\if{html}{\out{
}} +} + +\subsection{Returns}{ +Named \code{numeric()}. +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-LearnerClassifDebug-selected_features}{}}} +\subsection{Method \code{selected_features()}}{ +Always returns character(0). +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{LearnerClassifDebug$selected_features()}\if{html}{\out{
}} +} + +\subsection{Returns}{ +\code{character()}. +} +} +\if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-LearnerClassifDebug-clone}{}}} \subsection{Method \code{clone()}}{ diff --git a/man/mlr_learners_regr.debug.Rd b/man/mlr_learners_regr.debug.Rd index e214aa4c5..2d589304b 100644 --- a/man/mlr_learners_regr.debug.Rd +++ b/man/mlr_learners_regr.debug.Rd @@ -93,6 +93,8 @@ Other Learner: \subsection{Public methods}{ \itemize{ \item \href{#method-LearnerRegrDebug-new}{\code{LearnerRegrDebug$new()}} +\item \href{#method-LearnerRegrDebug-importance}{\code{LearnerRegrDebug$importance()}} +\item \href{#method-LearnerRegrDebug-selected_features}{\code{LearnerRegrDebug$selected_features()}} \item \href{#method-LearnerRegrDebug-clone}{\code{LearnerRegrDebug$clone()}} } } @@ -119,6 +121,32 @@ Creates a new instance of this \link[R6:R6Class]{R6} class. \if{html}{\out{
}}\preformatted{LearnerRegrDebug$new()}\if{html}{\out{
}} } +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-LearnerRegrDebug-importance}{}}} +\subsection{Method \code{importance()}}{ +Returns 0 for each feature seen in training. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{LearnerRegrDebug$importance()}\if{html}{\out{
}} +} + +\subsection{Returns}{ +Named \code{numeric()}. +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-LearnerRegrDebug-selected_features}{}}} +\subsection{Method \code{selected_features()}}{ +Always returns character(0). +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{LearnerRegrDebug$selected_features()}\if{html}{\out{
}} +} + +\subsection{Returns}{ +\code{character()}. +} } \if{html}{\out{
}} \if{html}{\out{}} diff --git a/man/mlr_measures_regr.pinball.Rd b/man/mlr_measures_regr.pinball.Rd index 744ae6970..b836c7fe4 100644 --- a/man/mlr_measures_regr.pinball.Rd +++ b/man/mlr_measures_regr.pinball.Rd @@ -32,8 +32,10 @@ msr("regr.pinball") } \section{Parameters}{ - -Empty ParamSet +\tabular{llll}{ + Id \tab Type \tab Default \tab Levels \cr + use_weights \tab logical \tab FALSE \tab TRUE, FALSE \cr +} } \section{Meta Information}{ diff --git a/tests/testthat/test_mlr_learners_classif_debug.R b/tests/testthat/test_mlr_learners_classif_debug.R index a5d78d676..8af219e4f 100644 --- a/tests/testthat/test_mlr_learners_classif_debug.R +++ b/tests/testthat/test_mlr_learners_classif_debug.R @@ -81,3 +81,13 @@ test_that("marshaling", { p2 = l$marshal()$unmarshal()$predict(task) expect_equal(p1, p2) }) + +test_that("importance and selected features", { + l = lrn("classif.debug") + task = tsk("iris") + l$train(task) + expect_equal(l$selected_features(), character(0)) + expect_equal(l$importance(), set_names(rep(0, task$n_features), task$feature_names)) +}) + + diff --git a/tests/testthat/test_mlr_learners_regr_debug.R b/tests/testthat/test_mlr_learners_regr_debug.R new file mode 100644 index 000000000..55f6c4e85 --- /dev/null +++ b/tests/testthat/test_mlr_learners_regr_debug.R @@ -0,0 +1,25 @@ +# this test / files was missing, only classif.debug was unit-tested +# I added at least a few basic tests when i added methods "importance" and "selected_features" + +test_that("Simple training/predict", { + task = tsk("mtcars") + learner = lrn("regr.debug") + expect_learner(learner, task) + + prediction = learner$train(task)$predict(task) + expect_class(learner$model, "regr.debug_model") + expect_numeric(learner$model$response, len = 1L, any.missing = FALSE) + expect_numeric(prediction$response, any.missing = FALSE) +}) + + +test_that("importance and selected features", { + l = lrn("regr.debug") + task = tsk("mtcars") + l$train(task) + expect_equal(l$selected_features(), character(0)) + expect_equal(l$importance(), set_names(rep(0, task$n_features), task$feature_names)) +}) + + +