Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: clean and fix custom checks and use set_values for param_set #49

Merged
merged 12 commits into from
Feb 6, 2024
Merged
8 changes: 1 addition & 7 deletions R/LearnerClustAffinityPropagation.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,7 @@ LearnerClustAP = R6Class("LearnerClustAP",
initialize = function() {
ps = ps(
s = p_uty(tags = c("required", "train")),
p = p_uty(custom_check = function(x) {
if (test_numeric(x)) {
return(TRUE)
} else {
stop("`p` needs to be a numeric vector")
}
}, default = NA, tags = "train"),
p = p_uty(default = NA, tags = "train", custom_check = crate(check_numeric)),
q = p_dbl(lower = 0L, upper = 1L, tags = "train"),
maxits = p_int(lower = 1L, default = 1000L, tags = "train"),
convits = p_int(lower = 1L, default = 100L, tags = "train"),
Expand Down
22 changes: 11 additions & 11 deletions R/LearnerClustAgnes.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,22 +27,22 @@ LearnerClustAgnes = R6Class("LearnerClustAgnes",
method = p_fct(default = "average", levels = c("average", "single", "complete", "ward", "weighted", "flexible", "gaverage"), tags = "train"),
trace.lev = p_int(lower = 0L, default = 0L, tags = "train"),
k = p_int(lower = 1L, default = 2L, tags = "predict"),
par.method = p_uty(tags = "train", custom_check = function(x) {
if (test_numeric(x) || test_list(x)) {
if (length(x) %in% c(1L, 3L, 4L)) {
return(TRUE)
}
stop("`par.method` needs be of length 1, 3, or 4")
} else {
stop("`par.method` needs to be a numeric vector")
}
})
par.method = p_uty(tags = "train", custom_check = crate(function(x) {
if (!(test_numeric(x) || test_list(x))) {
return("`par.method` needs to be a numeric vector")
}
if (length(x) %in% c(1L, 3L, 4L)) {
TRUE
} else {
"`par.method` needs be of length 1, 3, or 4"
}
}))
)

# param deps
ps$add_dep("par.method", "method", CondAnyOf$new(c("flexible", "gaverage")))

ps$values = list(k = 2L)
ps$set_values(k = 2L)

super$initialize(
id = "clust.agnes",
Expand Down
35 changes: 9 additions & 26 deletions R/LearnerClustCMeans.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,43 +23,26 @@ LearnerClustCMeans = R6Class("LearnerClustCMeans",
#' Creates a new instance of this [R6][R6::R6Class] class.
initialize = function() {
ps = ps(
centers = p_uty(tags = c("required", "train"), default = 2L,
custom_check = function(x) {
if (test_data_frame(x)) {
return(TRUE)
} else if (test_int(x)) {
assert_true(x >= 1L)
} else {
return("`centers` must be integer or data.frame with initial cluster centers")
}
}
),
centers = p_uty(tags = c("required", "train"), default = 2L, custom_check = crate(check_centers)),
iter.max = p_int(lower = 1L, default = 100L, tags = "train"),
verbose = p_lgl(default = FALSE, tags = "train"),
dist = p_fct(levels = c("euclidean", "manhattan"), default = "euclidean", tags = "train"),
method = p_fct(levels = c("cmeans", "ufcl"), default = "cmeans", tags = "train"),
m = p_dbl(lower = 1L, default = 2L, tags = "train"),
rate.par = p_dbl(lower = 0L, upper = 1L, tags = "train"),
weights = p_uty(default = 1L, custom_check = function(x) {
if (test_numeric(x)) {
if (sum(sign(x)) == length(x)) {
return(TRUE)
} else {
return("`weights` must contain only positive numbers")
}
} else if (test_count(x)) {
return(TRUE)
} else {
return("`weights` must be positive numeric vector or a single positive number")
}
},
tags = "train"),
weights = p_uty(default = 1L, tags = "train", custom_check = crate(function(x) {
if (test_numeric(x) && !all(x > 0) || check_count(x)) {
TRUE
} else {
"`weights` must be positive numeric vector or a single positive number"
}
})),
control = p_uty(tags = "train")
)
# add deps
ps$add_dep("rate.par", "method", CondEqual$new("ufcl"))

ps$values = list(centers = 2L)
ps$set_values(centers = 2L)

super$initialize(
id = "clust.cmeans",
Expand Down
8 changes: 1 addition & 7 deletions R/LearnerClustDBSCAN.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,7 @@ LearnerClustDBSCAN = R6Class("LearnerClustDBSCAN",
eps = p_dbl(lower = 0L, tags = c("required", "train")),
minPts = p_int(lower = 0L, default = 5L, tags = "train"),
borderPoints = p_lgl(default = TRUE, tags = "train"),
weights = p_uty(custom_check = function(x) {
if (test_numeric(x)) {
return(TRUE)
} else {
stop("`weights` need to be a numeric vector")
}
}, tags = "train"),
weights = p_uty(tags = "train"), custom_check = crate(check_numeric),
search = p_fct(levels = c("kdtree", "linear", "dist"), default = "kdtree", tags = "train"),
bucketSize = p_int(lower = 1L, default = 10L, tags = "train"),
splitRule = p_fct(levels = c("STD", "MIDPT", "FAIR", "SL_MIDPT", "SL_FAIR", "SUGGEST"), default = "SUGGEST", tags = "train"),
Expand Down
29 changes: 13 additions & 16 deletions R/LearnerClustDBSCANfpc.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,28 +24,25 @@ LearnerClustDBSCANfpc = R6Class("LearnerClustDBSCANfpc",
scale = p_lgl(default = FALSE, tags = "train"),
method = p_fct(levels = c("hybrid", "raw", "dist"), tags = "train"),
seeds = p_lgl(default = TRUE, tags = "train"),
showplot = p_uty(custom_check = function(x) {
if (test_flag(x)) {
return(TRUE)
} else if (test_int(x, lower = 0, upper = 2)) {
return(TRUE)
showplot = p_uty(default = FALSE, tags = "train", custom_check = crate(function(x) {
if (test_flag(x) && test_int(x, lower = 0, upper = 2)) {
TRUE
} else {
stop("`showplot` need to be either logical or integer between 0 and 2")
"`showplot` need to be either logical or integer between 0 and 2"
}
}, default = FALSE, tags = "train"),
countmode = p_uty(custom_check = function(x) {
if (test_integer(x)) {
return(TRUE)
} else if (test_null(x)) {
return(TRUE)
})),
countmode = p_uty(default = NULL, tags = "train", custom_check = crate(function(x) {
if (test_integer(x, null.ok = TRUE)) {
TRUE
} else {
stop("`countmode` need to be NULL or vector of integers")
"`countmode` need to be NULL or vector of integers"
}
}, default = NULL, tags = "train")
}))
)

param_set$values = list(MinPts = 5L, scale = FALSE, seeds = TRUE,
showplot = FALSE, countmode = NULL)
param_set$set_values(
MinPts = 5L, scale = FALSE, seeds = TRUE, showplot = FALSE, countmode = NULL
)

super$initialize(
id = "clust.dbscan_fpc",
Expand Down
2 changes: 1 addition & 1 deletion R/LearnerClustDiana.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ LearnerClustDiana = R6Class("LearnerClustDiana",
trace.lev = p_int(lower = 0L, default = 0L, tags = "train"),
k = p_int(lower = 1L, default = 2L, tags = "predict")
)
ps$values = list(k = 2L)
ps$set_values(k = 2L)

super$initialize(
id = "clust.diana",
Expand Down
2 changes: 1 addition & 1 deletion R/LearnerClustFanny.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ LearnerClustFanny = R6Class("LearnerClustFanny",
tol = p_dbl(lower = 0L, default = 1e-15, tags = "train"),
trace.lev = p_int(lower = 0L, default = 0L, tags = "train")
)
ps$values = list(k = 2L)
ps$set_values(k = 2L)

super$initialize(
id = "clust.fanny",
Expand Down
2 changes: 1 addition & 1 deletion R/LearnerClustFeatureless.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ LearnerClustFeatureless = R6Class("LearnerClustFeatureless",
ps = ps(
num_clusters = p_int(lower = 1L, default = 1L, tags = c("required", "train", "predict"))
)
ps$values = list(num_clusters = 1L)
ps$set_values(num_clusters = 1L)

super$initialize(
id = "clust.featureless",
Expand Down
2 changes: 1 addition & 1 deletion R/LearnerClustHclust.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ LearnerClustHclust = R6Class("LearnerClustHclust",

# param deps
ps$add_dep("p", "distmethod", CondAnyOf$new("minkowski"))
ps$values = list(k = 2L, distmethod = "euclidean")
ps$set_values(k = 2L, distmethod = "euclidean")

super$initialize(
id = "clust.hclust",
Expand Down
20 changes: 5 additions & 15 deletions R/LearnerClustKKMeans.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,19 +24,9 @@ LearnerClustKKMeans = R6Class("LearnerClustKKMeans",
#' Creates a new instance of this [R6][R6::R6Class] class.
initialize = function() {
ps = ps(
centers = p_uty(tags = c("required", "train"), default = 2L,
custom_check = function(x) {
if (test_data_frame(x)) {
return(TRUE)
} else if (test_int(x)) {
assert_true(x >= 1L)
} else {
return("`centers` must be integer or data.frame with initial cluster centers")
}
}
),
centers = p_uty(tags = c("required", "train"), default = 2L, custom_check = crate(check_centers)),
kernel = p_fct(default = "rbfdot",
levels = c( "vanilladot", "polydot", "rbfdot", "tanhdot", "laplacedot", "besseldot", "anovadot", "splinedot"),
levels = c("vanilladot", "polydot", "rbfdot", "tanhdot", "laplacedot", "besseldot", "anovadot", "splinedot"),
tags = "train"),
sigma = p_dbl(lower = 0, tags = "train"),
degree = p_int(default = 3L, lower = 1L, tags = "train"),
Expand All @@ -46,7 +36,7 @@ LearnerClustKKMeans = R6Class("LearnerClustKKMeans",
alg = p_fct(levels = c("kkmeans", "kerninghan"), default = "kkmeans", tags = "train"),
p = p_dbl(default = 1, tags = "train")
)
ps$values = list(centers = 2L)
ps$set_values(centers = 2L)

# add deps
ps$add_dep(
Expand Down Expand Up @@ -90,10 +80,10 @@ LearnerClustKKMeans = R6Class("LearnerClustKKMeans",
d_xc = matrix(kernlab::kernelMatrix(K, as.matrix(task$data()), c), ncol = nrow(c))
# kernel product between each new datapoint and itself: rows are identical
d_xx = matrix(rep(diag(kernlab::kernelMatrix(K, as.matrix(task$data()))),
each = ncol(d_xc)), ncol = ncol(d_xc), byrow = TRUE)
each = ncol(d_xc)), ncol = ncol(d_xc), byrow = TRUE)
# kernel product between each center and itself: columns are identical
d_cc = matrix(rep(diag(kernlab::kernelMatrix(K, as.matrix(c))),
each = nrow(d_xc)), nrow = nrow(d_xc))
each = nrow(d_xc)), nrow = nrow(d_xc))
# this is the squared kernel distance to the centers
d2 = d_xx + d_cc - 2 * d_xc
# the nearest center determines cluster assignment
Expand Down
14 changes: 2 additions & 12 deletions R/LearnerClustKMeans.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,23 +23,13 @@ LearnerClustKMeans = R6Class("LearnerClustKMeans",
#' Creates a new instance of this [R6][R6::R6Class] class.
initialize = function() {
ps = ps(
centers = p_uty(tags = c("required", "train"), default = 2L,
custom_check = function(x) {
if (test_data_frame(x)) {
return(TRUE)
} else if (test_int(x)) {
assert_true(x >= 1L)
} else {
return("`centers` must be integer or data.frame with initial cluster centers")
}
}
),
centers = p_uty(tags = c("required", "train"), default = 2L, custom_check = crate(check_centers), ),
iter.max = p_int(lower = 1L, default = 10L, tags = c("train")),
algorithm = p_fct(levels = c("Hartigan-Wong", "Lloyd", "Forgy", "MacQueen"), default = "Hartigan-Wong", tags = c("train")),
nstart = p_int(lower = 1L, default = 1L, tags = c("train")),
trace = p_int(lower = 0L, default = 0L, tags = c("train"))
)
ps$values = list(centers = 2L)
ps$set_values(centers = 2L)

super$initialize(
id = "clust.kmeans",
Expand Down
48 changes: 6 additions & 42 deletions R/LearnerClustMclust.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,48 +21,12 @@ LearnerClustMclust = R6Class("LearnerClustMclust",
#' Creates a new instance of this [R6][R6::R6Class] class.
initialize = function() {
ps = ps(
G = p_uty(default = c(1:9), custom_check = function(x) {
if (test_numeric(x)) {
return(TRUE)
} else {
stop("`G` need to be a numeric vector")
}
}, tags = "train"),
modelNames = p_uty(custom_check = function(x) {
if (test_character(x)) {
return(TRUE)
} else {
stop("`modelNames` need to be a character vector")
}
}, tags = "train"),
prior = p_uty(custom_check = function(x) {
if (test_list(x)) {
return(TRUE)
} else {
stop("`prior` need to be a list")
}
}, tags = "train"),
control = p_uty(default = mclust::emControl(), custom_check = function(x) {
if (test_list(x)) {
return(TRUE)
} else {
stop("`control` need to be a list of control parameters for EM")
}
}, tags = "train"),
initialization = p_uty(custom_check = function(x) {
if (test_list(x)) {
return(TRUE)
} else {
stop("`initialization` need to be a list of initialization components")
}
}, tags = "train"),
x = p_uty(custom_check = function(x) {
if (test_class(x, "mclustBIC")) {
return(TRUE)
} else {
stop("`x` need to be an object of class 'mclustBIC'")
}
}, tags = "train")
G = p_uty(default = 1:9, tags = "train", custom_check = crate(check_numeric)),
modelNames = p_uty(tags = "train", custom_check = crate(check_character)),
prior = p_uty(tags = "train", custom_check = crate(check_list)),
control = p_uty(default = mclust::emControl(), tags = "train", custom_check = crate(check_list)),
initialization = p_uty(tags = "train", custom_check = crate(check_list)),
x = p_uty(tags = "train", custom_check = crate(function(x) check_class(x, "mclustBIC"))),
)

super$initialize(
Expand Down
18 changes: 6 additions & 12 deletions R/LearnerClustMeanShift.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,14 @@ LearnerClustMeanShift = R6Class("LearnerClustMeanShift",
#' Creates a new instance of this [R6][R6::R6Class] class.
initialize = function() {
ps = ps(
h = p_uty(custom_check = function(x) {
if (test_numeric(x) || test_int(x)) {
return(TRUE)
} else {
return("`h` must be either integer or numeric vector")
}
}, tags = "train"),
subset = p_uty(custom_check = function(x) {
if (test_numeric(x)) {
return(TRUE)
h = p_uty(tags = "train", custom_check = crate(function(x) {
if (test_numeric(x) || test_int(x)) {
TRUE
} else {
return("`subset` must be a numeric vector")
"`h` must be either integer or numeric vector"
}
}, tags = "train"),
})),
subset = p_uty(tags = "train", custom_check = crate(check_numeric)),
scaled = p_int(lower = 0L, default = 1, tags = "train"),
iter = p_int(lower = 1L, default = 200L, tags = "train"),
thr = p_dbl(default = 0.01, tags = "train")
Expand Down
2 changes: 1 addition & 1 deletion R/LearnerClustMiniBatchKMeans.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ LearnerClustMiniBatchKMeans = R6Class("LearnerClustMiniBatchKMeans",
tol_optimal_init = p_dbl(default = 0.3, lower = 0, tags = "train"),
seed = p_int(default = 1L, tags = "train")
)
ps$values = list(clusters = 2L)
ps$set_values(clusters = 2L)

# add deps
ps$add_dep("init_fraction", "initializer", CondAnyOf$new(c("kmeans++", "optimal_init")))
Expand Down
14 changes: 2 additions & 12 deletions R/LearnerClustPAM.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,23 +26,13 @@ LearnerClustPAM = R6Class("LearnerClustPAM",
ps = ps(
k = p_int(lower = 1L, default = 2L, tags = c("required", "train")),
metric = p_fct(levels = c("euclidian", "manhattan"), tags = "train"),
medoids = p_uty(default = NULL, tags = "train",
custom_check = function(x) {
if (test_integerish(x)) {
return(TRUE)
} else if (test_null(x)) {
return(TRUE)
} else {
stop("`medoids` needs to be either `NULL` or vector with row indices!")
}
}
),
medoids = p_uty(default = NULL, tags = "train", custom_check = crate(function(x) check_integerish(x, null.ok = TRUE))),
stand = p_lgl(default = FALSE, tags = "train"),
do.swap = p_lgl(default = TRUE, tags = "train"),
pamonce = p_int(lower = 0L, upper = 5L, default = 0, tags = "train"),
trace.lev = p_int(lower = 0L, default = 0L, tags = "train")
)
ps$values = list(k = 2L)
ps$set_values(k = 2L)

super$initialize(
id = "clust.pam",
Expand Down
8 changes: 8 additions & 0 deletions R/helper.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,11 @@ check_centers_param = function(centers, task, test_class, name) {
}
}
}

check_centers = function(x) {
if (test_data_frame(x) && test_int(x, lower = 1L)) {
TRUE
} else {
"`centers` must be integer or data.frame with initial cluster centers"
}
}