mlr-org · damirpolat · Feb 6, 2024 · Feb 3, 2024 · Feb 3, 2024 · Feb 5, 2024
diff --git a/R/LearnerClustAffinityPropagation.R b/R/LearnerClustAffinityPropagation.R
@@ -26,13 +26,7 @@ LearnerClustAP = R6Class("LearnerClustAP",
     initialize = function() {
       ps = ps(
         s = p_uty(tags = c("required", "train")),
-        p = p_uty(custom_check = function(x) {
-          if (test_numeric(x)) {
-            return(TRUE)
-          } else {
-            stop("`p` needs to be a numeric vector")
-          }
-        }, default = NA, tags = "train"),
+        p = p_uty(default = NA, tags = "train", custom_check = crate(check_numeric)),
         q = p_dbl(lower = 0L, upper = 1L, tags = "train"),
         maxits = p_int(lower = 1L, default = 1000L, tags = "train"),
         convits = p_int(lower = 1L, default = 100L, tags = "train"),

diff --git a/R/LearnerClustAgnes.R b/R/LearnerClustAgnes.R
@@ -27,22 +27,22 @@ LearnerClustAgnes = R6Class("LearnerClustAgnes",
         method = p_fct(default = "average", levels = c("average", "single", "complete", "ward", "weighted", "flexible", "gaverage"), tags = "train"),
         trace.lev = p_int(lower = 0L, default = 0L, tags = "train"),
         k = p_int(lower = 1L, default = 2L, tags = "predict"),
-        par.method = p_uty(tags = "train", custom_check = function(x) {
-            if (test_numeric(x) || test_list(x)) {
-              if (length(x) %in% c(1L, 3L, 4L)) {
-                return(TRUE)
-              }
-              stop("`par.method` needs be of length 1, 3, or 4")
-            } else {
-              stop("`par.method` needs to be a numeric vector")
-            }
-          })
+        par.method = p_uty(tags = "train", custom_check = crate(function(x) {
+          if (!(test_numeric(x) || test_list(x))) {
+            return("`par.method` needs to be a numeric vector")
+          }
+          if (length(x) %in% c(1L, 3L, 4L)) {
+            TRUE
+          } else {
+            "`par.method` needs be of length 1, 3, or 4"
+          }
+        }))
       )
 
       # param deps
       ps$add_dep("par.method", "method", CondAnyOf$new(c("flexible", "gaverage")))
 
-      ps$values = list(k = 2L)
+      ps$set_values(k = 2L)
 
       super$initialize(
         id = "clust.agnes",

diff --git a/R/LearnerClustCMeans.R b/R/LearnerClustCMeans.R
@@ -23,43 +23,26 @@ LearnerClustCMeans = R6Class("LearnerClustCMeans",
     #' Creates a new instance of this [R6][R6::R6Class] class.
     initialize = function() {
       ps = ps(
-        centers = p_uty(tags = c("required", "train"), default = 2L,
-          custom_check = function(x) {
-            if (test_data_frame(x)) {
-              return(TRUE)
-            } else if (test_int(x)) {
-              assert_true(x >= 1L)
-            } else {
-              return("`centers` must be integer or data.frame with initial cluster centers")
-            }
-          }
-        ),
+        centers = p_uty(tags = c("required", "train"), default = 2L, custom_check = crate(check_centers)),
         iter.max = p_int(lower = 1L, default = 100L, tags = "train"),
         verbose = p_lgl(default = FALSE, tags = "train"),
         dist = p_fct(levels = c("euclidean", "manhattan"), default = "euclidean", tags = "train"),
         method = p_fct(levels = c("cmeans", "ufcl"), default = "cmeans", tags = "train"),
         m = p_dbl(lower = 1L, default = 2L, tags = "train"),
         rate.par = p_dbl(lower = 0L, upper = 1L, tags = "train"),
-        weights = p_uty(default = 1L, custom_check = function(x) {
-            if (test_numeric(x)) {
-              if (sum(sign(x)) == length(x)) {
-                return(TRUE)
-              } else {
-                return("`weights` must contain only positive numbers")
-              }
-            } else if (test_count(x)) {
-              return(TRUE)
-            } else {
-              return("`weights` must be positive numeric vector or a single positive number")
-            }
-          },
-          tags = "train"),
+        weights = p_uty(default = 1L, tags = "train", custom_check = crate(function(x) {
+          if (test_numeric(x) && !all(x > 0) || check_count(x)) {
+            TRUE
+          } else {
+            "`weights` must be positive numeric vector or a single positive number"
+          }
+        })),
         control = p_uty(tags = "train")
       )
       # add deps
       ps$add_dep("rate.par", "method", CondEqual$new("ufcl"))
 
-      ps$values = list(centers = 2L)
+      ps$set_values(centers = 2L)
 
       super$initialize(
         id = "clust.cmeans",

diff --git a/R/LearnerClustDBSCAN.R b/R/LearnerClustDBSCAN.R
@@ -24,13 +24,7 @@ LearnerClustDBSCAN = R6Class("LearnerClustDBSCAN",
         eps = p_dbl(lower = 0L, tags = c("required", "train")),
         minPts = p_int(lower = 0L, default = 5L, tags = "train"),
         borderPoints = p_lgl(default = TRUE, tags = "train"),
-        weights = p_uty(custom_check = function(x) {
-          if (test_numeric(x)) {
-            return(TRUE)
-          } else {
-            stop("`weights` need to be a numeric vector")
-          }
-        }, tags = "train"),
+        weights = p_uty(tags = "train"), custom_check = crate(check_numeric),
         search = p_fct(levels = c("kdtree", "linear", "dist"), default = "kdtree", tags = "train"),
         bucketSize = p_int(lower = 1L, default = 10L, tags = "train"),
         splitRule = p_fct(levels = c("STD", "MIDPT", "FAIR", "SL_MIDPT", "SL_FAIR", "SUGGEST"), default = "SUGGEST", tags = "train"),

diff --git a/R/LearnerClustDBSCANfpc.R b/R/LearnerClustDBSCANfpc.R
@@ -24,28 +24,25 @@ LearnerClustDBSCANfpc = R6Class("LearnerClustDBSCANfpc",
         scale = p_lgl(default = FALSE, tags = "train"),
         method = p_fct(levels = c("hybrid", "raw", "dist"), tags = "train"),
         seeds = p_lgl(default = TRUE, tags = "train"),
-        showplot = p_uty(custom_check = function(x) {
-          if (test_flag(x)) {
-            return(TRUE)
-          } else if (test_int(x, lower = 0, upper = 2)) {
-            return(TRUE)
+        showplot = p_uty(default = FALSE, tags = "train", custom_check = crate(function(x) {
+          if (test_flag(x) && test_int(x, lower = 0, upper = 2)) {
+            TRUE
           } else {
-            stop("`showplot` need to be either logical or integer between 0 and 2")
+            "`showplot` need to be either logical or integer between 0 and 2"
           }
-        }, default = FALSE, tags = "train"),
-        countmode = p_uty(custom_check = function(x) {
-          if (test_integer(x)) {
-            return(TRUE)
-          } else if (test_null(x)) {
-            return(TRUE)
+        })),
+        countmode = p_uty(default = NULL, tags = "train", custom_check = crate(function(x) {
+          if (test_integer(x, null.ok = TRUE)) {
+            TRUE
           } else {
-            stop("`countmode` need to be NULL or vector of integers")
+            "`countmode` need to be NULL or vector of integers"
           }
-        }, default = NULL, tags = "train")
+        }))
       )
 
-      param_set$values = list(MinPts = 5L, scale = FALSE, seeds = TRUE,
-        showplot = FALSE, countmode = NULL)
+      param_set$set_values(
+        MinPts = 5L, scale = FALSE, seeds = TRUE, showplot = FALSE, countmode = NULL
+      )
 
       super$initialize(
         id = "clust.dbscan_fpc",

diff --git a/R/LearnerClustDiana.R b/R/LearnerClustDiana.R
@@ -27,7 +27,7 @@ LearnerClustDiana = R6Class("LearnerClustDiana",
         trace.lev = p_int(lower = 0L, default = 0L, tags = "train"),
         k = p_int(lower = 1L, default = 2L, tags = "predict")
       )
-      ps$values = list(k = 2L)
+      ps$set_values(k = 2L)
 
       super$initialize(
         id = "clust.diana",

diff --git a/R/LearnerClustFanny.R b/R/LearnerClustFanny.R
@@ -33,7 +33,7 @@ LearnerClustFanny = R6Class("LearnerClustFanny",
         tol = p_dbl(lower = 0L, default = 1e-15, tags = "train"),
         trace.lev = p_int(lower = 0L, default = 0L, tags = "train")
       )
-      ps$values = list(k = 2L)
+      ps$set_values(k = 2L)
 
       super$initialize(
         id = "clust.fanny",

diff --git a/R/LearnerClustFeatureless.R b/R/LearnerClustFeatureless.R
@@ -21,7 +21,7 @@ LearnerClustFeatureless = R6Class("LearnerClustFeatureless",
       ps = ps(
         num_clusters = p_int(lower = 1L, default = 1L, tags = c("required", "train", "predict"))
       )
-      ps$values = list(num_clusters = 1L)
+      ps$set_values(num_clusters = 1L)
 
       super$initialize(
         id = "clust.featureless",

diff --git a/R/LearnerClustHclust.R b/R/LearnerClustHclust.R
@@ -32,7 +32,7 @@ LearnerClustHclust = R6Class("LearnerClustHclust",
 
       # param deps
       ps$add_dep("p", "distmethod", CondAnyOf$new("minkowski"))
-      ps$values = list(k = 2L, distmethod = "euclidean")
+      ps$set_values(k = 2L, distmethod = "euclidean")
 
       super$initialize(
         id = "clust.hclust",

diff --git a/R/LearnerClustKKMeans.R b/R/LearnerClustKKMeans.R
@@ -24,19 +24,9 @@ LearnerClustKKMeans = R6Class("LearnerClustKKMeans",
     #' Creates a new instance of this [R6][R6::R6Class] class.
     initialize = function() {
       ps = ps(
-        centers = p_uty(tags = c("required", "train"), default = 2L,
-          custom_check = function(x) {
-            if (test_data_frame(x)) {
-              return(TRUE)
-            } else if (test_int(x)) {
-              assert_true(x >= 1L)
-            } else {
-              return("`centers` must be integer or data.frame with initial cluster centers")
-            }
-          }
-        ),
+        centers = p_uty(tags = c("required", "train"), default = 2L, custom_check = crate(check_centers)),
         kernel = p_fct(default = "rbfdot",
-          levels = c( "vanilladot", "polydot", "rbfdot", "tanhdot", "laplacedot", "besseldot", "anovadot", "splinedot"),
+          levels = c("vanilladot", "polydot", "rbfdot", "tanhdot", "laplacedot", "besseldot", "anovadot", "splinedot"),
           tags = "train"),
         sigma = p_dbl(lower = 0, tags = "train"),
         degree = p_int(default = 3L, lower = 1L, tags = "train"),
@@ -46,7 +36,7 @@ LearnerClustKKMeans = R6Class("LearnerClustKKMeans",
         alg = p_fct(levels = c("kkmeans", "kerninghan"), default = "kkmeans", tags = "train"),
         p = p_dbl(default = 1, tags = "train")
       )
-      ps$values = list(centers = 2L)
+      ps$set_values(centers = 2L)
 
       # add deps
       ps$add_dep(
@@ -90,10 +80,10 @@ LearnerClustKKMeans = R6Class("LearnerClustKKMeans",
       d_xc = matrix(kernlab::kernelMatrix(K, as.matrix(task$data()), c), ncol = nrow(c))
       # kernel product between each new datapoint and itself: rows are identical
       d_xx = matrix(rep(diag(kernlab::kernelMatrix(K, as.matrix(task$data()))),
-                        each = ncol(d_xc)), ncol = ncol(d_xc), byrow = TRUE)
+        each = ncol(d_xc)), ncol = ncol(d_xc), byrow = TRUE)
       # kernel product between each center and itself: columns are identical
       d_cc = matrix(rep(diag(kernlab::kernelMatrix(K, as.matrix(c))),
-                        each = nrow(d_xc)), nrow = nrow(d_xc))
+        each = nrow(d_xc)), nrow = nrow(d_xc))
       # this is the squared kernel distance to the centers
       d2 = d_xx + d_cc - 2 * d_xc
       # the nearest center determines cluster assignment

diff --git a/R/LearnerClustKMeans.R b/R/LearnerClustKMeans.R
@@ -23,23 +23,13 @@ LearnerClustKMeans = R6Class("LearnerClustKMeans",
     #' Creates a new instance of this [R6][R6::R6Class] class.
     initialize = function() {
       ps = ps(
-        centers = p_uty(tags = c("required", "train"), default = 2L,
-          custom_check = function(x) {
-            if (test_data_frame(x)) {
-              return(TRUE)
-            } else if (test_int(x)) {
-              assert_true(x >= 1L)
-            } else {
-              return("`centers` must be integer or data.frame with initial cluster centers")
-            }
-          }
-        ),
+        centers = p_uty(tags = c("required", "train"), default = 2L, custom_check = crate(check_centers), ),
         iter.max = p_int(lower = 1L, default = 10L, tags = c("train")),
         algorithm = p_fct(levels = c("Hartigan-Wong", "Lloyd", "Forgy", "MacQueen"), default = "Hartigan-Wong", tags = c("train")),
         nstart = p_int(lower = 1L, default = 1L, tags = c("train")),
         trace = p_int(lower = 0L, default = 0L, tags = c("train"))
       )
-      ps$values = list(centers = 2L)
+      ps$set_values(centers = 2L)
 
       super$initialize(
         id = "clust.kmeans",

diff --git a/R/LearnerClustMclust.R b/R/LearnerClustMclust.R
@@ -21,48 +21,12 @@ LearnerClustMclust = R6Class("LearnerClustMclust",
     #' Creates a new instance of this [R6][R6::R6Class] class.
     initialize = function() {
       ps = ps(
-        G = p_uty(default = c(1:9), custom_check = function(x) {
-          if (test_numeric(x)) {
-            return(TRUE)
-          } else {
-            stop("`G` need to be a numeric vector")
-          }
-        }, tags = "train"),
-        modelNames = p_uty(custom_check = function(x) {
-          if (test_character(x)) {
-            return(TRUE)
-          } else {
-            stop("`modelNames` need to be a character vector")
-          }
-        }, tags = "train"),
-        prior = p_uty(custom_check = function(x) {
-          if (test_list(x)) {
-            return(TRUE)
-          } else {
-            stop("`prior` need to be a list")
-          }
-        }, tags = "train"),
-        control = p_uty(default = mclust::emControl(), custom_check = function(x) {
-          if (test_list(x)) {
-            return(TRUE)
-          } else {
-            stop("`control` need to be a list of control parameters for EM")
-          }
-        }, tags = "train"),
-        initialization = p_uty(custom_check = function(x) {
-          if (test_list(x)) {
-            return(TRUE)
-          } else {
-            stop("`initialization` need to be a list of initialization components")
-          }
-        }, tags = "train"),
-        x = p_uty(custom_check = function(x) {
-          if (test_class(x, "mclustBIC")) {
-            return(TRUE)
-          } else {
-            stop("`x` need to be an object of class 'mclustBIC'")
-          }
-        }, tags = "train")
+        G = p_uty(default = 1:9, tags = "train", custom_check = crate(check_numeric)),
+        modelNames = p_uty(tags = "train", custom_check = crate(check_character)),
+        prior = p_uty(tags = "train", custom_check = crate(check_list)),
+        control = p_uty(default = mclust::emControl(), tags = "train", custom_check = crate(check_list)),
+        initialization = p_uty(tags = "train", custom_check = crate(check_list)),
+        x = p_uty(tags = "train", custom_check = crate(function(x) check_class(x, "mclustBIC"))),
       )
 
       super$initialize(

diff --git a/R/LearnerClustMeanShift.R b/R/LearnerClustMeanShift.R
@@ -21,20 +21,14 @@ LearnerClustMeanShift = R6Class("LearnerClustMeanShift",
     #' Creates a new instance of this [R6][R6::R6Class] class.
     initialize = function() {
       ps = ps(
-        h = p_uty(custom_check = function(x) {
-            if (test_numeric(x) || test_int(x)) {
-              return(TRUE)
-            } else {
-              return("`h` must be either integer or numeric vector")
-            }
-          }, tags = "train"),
-        subset = p_uty(custom_check = function(x) {
-          if (test_numeric(x)) {
-            return(TRUE)
+        h = p_uty(tags = "train", custom_check = crate(function(x) {
+          if (test_numeric(x) || test_int(x)) {
+            TRUE
           } else {
-            return("`subset` must be a numeric vector")
+            "`h` must be either integer or numeric vector"
           }
-        }, tags = "train"),
+        })),
+        subset = p_uty(tags = "train", custom_check = crate(check_numeric)),
         scaled = p_int(lower = 0L, default = 1, tags = "train"),
         iter = p_int(lower = 1L, default = 200L, tags = "train"),
         thr = p_dbl(default = 0.01, tags = "train")

diff --git a/R/LearnerClustMiniBatchKMeans.R b/R/LearnerClustMiniBatchKMeans.R
@@ -37,7 +37,7 @@ LearnerClustMiniBatchKMeans = R6Class("LearnerClustMiniBatchKMeans",
         tol_optimal_init = p_dbl(default = 0.3, lower = 0, tags = "train"),
         seed = p_int(default = 1L, tags = "train")
       )
-      ps$values = list(clusters = 2L)
+      ps$set_values(clusters = 2L)
 
       # add deps
       ps$add_dep("init_fraction", "initializer", CondAnyOf$new(c("kmeans++", "optimal_init")))

diff --git a/R/LearnerClustPAM.R b/R/LearnerClustPAM.R
@@ -26,23 +26,13 @@ LearnerClustPAM = R6Class("LearnerClustPAM",
       ps = ps(
         k = p_int(lower = 1L, default = 2L, tags = c("required", "train")),
         metric = p_fct(levels = c("euclidian", "manhattan"), tags = "train"),
-        medoids = p_uty(default = NULL, tags = "train",
-          custom_check = function(x) {
-            if (test_integerish(x)) {
-              return(TRUE)
-            } else if (test_null(x)) {
-              return(TRUE)
-            } else {
-              stop("`medoids` needs to be either `NULL` or vector with row indices!")
-            }
-          }
-        ),
+        medoids = p_uty(default = NULL, tags = "train", custom_check = crate(function(x) check_integerish(x, null.ok = TRUE))),
         stand = p_lgl(default = FALSE, tags = "train"),
         do.swap = p_lgl(default = TRUE, tags = "train"),
         pamonce = p_int(lower = 0L, upper = 5L, default = 0, tags = "train"),
         trace.lev = p_int(lower = 0L, default = 0L, tags = "train")
       )
-      ps$values = list(k = 2L)
+      ps$set_values(k = 2L)
 
       super$initialize(
         id = "clust.pam",

diff --git a/R/helper.R b/R/helper.R
@@ -16,3 +16,11 @@ check_centers_param = function(centers, task, test_class, name) {
     }
   }
 }
+
+check_centers = function(x) {
+  if (test_data_frame(x) && test_int(x, lower = 1L)) {
+    TRUE
+  } else {
+    "`centers` must be integer or data.frame with initial cluster centers"
+  }
+}