-
-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
41 changed files
with
1,651 additions
and
110 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,3 +18,8 @@ | |
^\.lintr$ | ||
^\.vscode$ | ||
^gfortran.* | ||
^joss$ | ||
^man-roxygen$ | ||
^BinaryFiles$ | ||
^cluto$ | ||
^zzz$ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
## tic GitHub Actions template: linux-macos-windows-deploy | ||
## tic GitHub Actions template: custom-deploy | ||
## revision date: 2020-07-29 | ||
on: | ||
push: | ||
|
@@ -22,9 +22,11 @@ jobs: | |
config: | ||
# use a different tic template type if you do not want to build on all listed platforms | ||
- { os: windows-latest, r: "release" } | ||
- { os: macOS-latest, r: "release", pkgdown: "true", latex: "true" } | ||
# [Custom matrix env var] | ||
- { os: macOS-latest, r: "release", pkgdown: "false", latex: "true" } | ||
- { os: ubuntu-latest, r: "devel" } | ||
- { os: ubuntu-latest, r: "release" } | ||
# [Custom matrix env var] | ||
- { os: ubuntu-latest, r: "release", pkgdown: "true" } | ||
|
||
env: | ||
# otherwise remotes::fun() errors cause the build to fail. Example: Unavailability of binaries | ||
|
@@ -72,8 +74,8 @@ jobs: | |
uses: pat-s/[email protected] | ||
with: | ||
path: ${{ env.R_LIBS_USER }} | ||
key: ${{ runner.os }}-r-${{ matrix.config.r }}-${{steps.date.outputs.date}} | ||
restore-keys: ${{ runner.os }}-r-${{ matrix.config.r }}-${{steps.date.outputs.date}} | ||
key: ${{ runner.os }}-r-${{ matrix.config.r }}-${{steps.date.outputs.date}}1 | ||
restore-keys: ${{ runner.os }}-r-${{ matrix.config.r }}-${{steps.date.outputs.date}}1 | ||
|
||
- name: "[Custom block] [Linux] Install spatial libraries" | ||
if: runner.os == 'Linux' | ||
|
@@ -83,6 +85,8 @@ jobs: | |
- name: "[Custom block] [macOS] Install spatial libraries" | ||
if: runner.os == 'macOS' | ||
run: | | ||
# conflicts with gfortran from r-lib/actions when linking gcc | ||
rm '/usr/local/bin/gfortran' | ||
brew install ccache gdal geos proj udunits Caskroom/cask/xquartz | ||
# for some strange Windows reason this step and the next one need to be decoupled | ||
|
@@ -106,11 +110,15 @@ jobs: | |
echo -e 'options(Ncpus = 4, pkgType = "source", repos = structure(c(CRAN = "https://cloud.r-project.org/")))' > $HOME/.Rprofile | ||
Rscript -e "remotes::install_github('ropensci/tic')" -e "print(tic::dsl_load())" -e "tic::prepare_all_stages()" -e "tic::before_install()" -e "tic::install()" | ||
# [Custom block] | ||
- name: "[Stage] Before Script" | ||
run: Rscript -e 'tic::before_script()' | ||
|
||
- name: "[Stage] Script" | ||
run: Rscript -e 'tic::script()' | ||
|
||
- name: "[Stage] After Success" | ||
if: matrix.config.os == 'macOS-latest' && matrix.config.r == 'release' | ||
if: matrix.config.os == 'ubuntu-latest' && matrix.config.r == 'release' | ||
run: Rscript -e "tic::after_success()" | ||
|
||
- name: "[Stage] Upload R CMD check artifacts" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,4 +18,4 @@ repos: | |
rev: v2.4.0 | ||
hooks: | ||
- id: check-added-large-files | ||
args: ['--maxkb=800'] | ||
args: ['--maxkb=2300'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,223 @@ | ||
#' @title Repeated Spatio-Temporal Cluster Resampling | ||
#' | ||
#' @import mlr3 | ||
#' | ||
#' @description Spatio-temporal cluster partitioning via the `vcluster` | ||
#' executable of the | ||
#' [CLUTO](http://glaros.dtc.umn.edu/gkhome/cluto/cluto/overview) clustering | ||
#' application. | ||
#' | ||
#' This partitioning method relies on the external CLUTO library. | ||
#' To use it, CLUTO's executables need to be downloaded and installed into | ||
#' this package. | ||
#' | ||
#' See \url{https://gist.github.com/pat-s/6430470cf817050e27d26c43c0e9be72} for an | ||
#' installation approach that should work on Windows and Linux. | ||
#' macOS is not supported by CLUTO. | ||
#' | ||
#' Before using this method, please check the restrictive | ||
#' [copyright](http://glaros.dtc.umn.edu/gkhome/cluto/cluto/download) shown | ||
#' below. | ||
#' | ||
#' @details | ||
#' By default, `-clmethod='direct'` is passed to the `vcluster` executable in | ||
#' contrast to the upstream default `-clmethod='rb'`. | ||
#' There is no evidence or research that this method is the best among the | ||
#' available ones ("rb", "rbr", "direct", "agglo", "graph", "bagglo"). Also, | ||
#' various other parameters can be set via argument `cluto_parameters` to | ||
#' achieve different clustering results. | ||
#' | ||
#' Parameter `-clusterfile` is handled by \CRANpkg{skmeans} and cannot be | ||
#' changed. | ||
#' | ||
#' @section Copyright: | ||
#' | ||
#' CLUTO's copyright is as follows: | ||
#' | ||
#' The CLUTO package is copyrighted by the Regents of the University of | ||
#' Minnesota. | ||
#' It can be freely used for educational and research purposes by non-profit | ||
#' institutions and US government agencies only. | ||
#' Other organizations are allowed to use CLUTO only for evaluation purposes, | ||
#' and any further uses will require prior approval. | ||
#' The software may not be sold or redistributed without prior approval. | ||
#' One may make copies of the software for their use provided that the copies, | ||
#' are not sold or distributed, are used under the same terms and conditions. | ||
#' As unestablished research software, this code is provided on an “as is” basis | ||
#' without warranty of any kind, either expressed or implied. | ||
#' The downloading, or executing any part of this software constitutes an | ||
#' implicit agreement to these terms. These terms and conditions are subject to | ||
#' change at any time without prior notice. | ||
#' | ||
#' In addition, a different seed is used in every repetition to enforce | ||
#' different clusters for each repetition. By default, all repetitions would use | ||
#' the same seed and hence be identical. Note that setting an R seed has no | ||
#' effect here. | ||
#' | ||
#' @export | ||
#' @examples | ||
#' \dontrun{ | ||
#' library(mlr3) | ||
#' library(mlr3spatiotempcv) | ||
#' task = tsk("cookfarm") | ||
#' | ||
#' # Instantiate Resampling | ||
#' rrcv = rsmp("repeated-spcv-cluto", folds = 3, repeats = 5) | ||
#' rrcv$instantiate(task, time_var = "Date") | ||
#' | ||
#' # Individual sets: | ||
#' rrcv$iters | ||
#' rrcv$folds(1:6) | ||
#' rrcv$repeats(1:6) | ||
#' | ||
#' # Individual sets: | ||
#' rrcv$train_set(1) | ||
#' rrcv$test_set(1) | ||
#' intersect(rrcv$train_set(1), rrcv$test_set(1)) | ||
#' | ||
#' # Internal storage: | ||
#' rrcv$instance # table | ||
#' } | ||
ResamplingRepeatedSptCVCluto = R6Class("ResamplingRepeatedSptCVCluto", | ||
inherit = mlr3::Resampling, | ||
|
||
public = list( | ||
#' @description | ||
#' Create an "coordinate-based" repeated resampling instance. | ||
#' @param id `character(1)`\cr | ||
#' Identifier for the resampling strategy. | ||
initialize = function(id = "repeated-spcv-cluto") { | ||
ps = ParamSet$new(params = list( | ||
ParamInt$new("folds", lower = 1L, default = 10L, tags = "required"), | ||
ParamInt$new("repeats", lower = 1, default = 1L, tags = "required") | ||
)) | ||
ps$values = list(folds = 10L, repeats = 1) | ||
super$initialize( | ||
id = id, | ||
param_set = ps, | ||
man = "mlr3spatiotempcv::mlr_resamplings_repeated_SptCVCluto" | ||
) | ||
}, | ||
|
||
#' @description Translates iteration numbers to fold number. | ||
#' @param iters `integer()`\cr | ||
#' Iteration number. | ||
folds = function(iters) { | ||
iters = assert_integerish(iters, any.missing = FALSE, coerce = TRUE) | ||
((iters - 1L) %% as.integer(self$param_set$values$repeats)) + 1L | ||
}, | ||
|
||
#' @description Translates iteration numbers to repetition number. | ||
#' @param iters `integer()`\cr | ||
#' Iteration number. | ||
repeats = function(iters) { | ||
iters = assert_integerish(iters, any.missing = FALSE, coerce = TRUE) | ||
((iters - 1L) %/% as.integer(self$param_set$values$folds)) + 1L | ||
}, | ||
|
||
#' @description | ||
#' Materializes fixed training and test splits for a given task. | ||
#' @param task [Task]\cr | ||
#' A task to instantiate. | ||
#' @param time_var [character]\cr | ||
#' The name of the variable which represents the time dimension. | ||
#' Must be of type numeric. | ||
#' @param clmethod [character]\cr | ||
#' Name of the clustering method to use within `vcluster`. | ||
#' See Details for more information. | ||
#' @param cluto_parameters [character]\cr | ||
#' Additional parameters to pass to `vcluster`. | ||
#' Must be given as a single character string, e.g. | ||
#' `"param1='value1'param2='value2'"`. | ||
#' See the CLUTO documentation for a full list of supported parameters. | ||
#' @param verbose [logical]\cr | ||
#' Whether to show `vcluster` progress and summary output. | ||
instantiate = function(task, time_var, clmethod = "direct", | ||
cluto_parameters = NULL, verbose = TRUE) { | ||
|
||
requireNamespace("skmeans", quietly = TRUE) | ||
|
||
assert_task(task) | ||
groups = task$groups | ||
|
||
if (!is.null(groups)) { | ||
stopf("Grouping is not supported for spatial resampling methods") | ||
} | ||
|
||
time = as.POSIXct(task$data()[[time_var]]) | ||
# time in seconds since 1/1/1970 | ||
time_num = as.numeric(time) | ||
|
||
data_matrix = data.matrix(data.frame(task$coordinates(), time_num)) | ||
colnames(data_matrix) = c("x", "y", "z") | ||
|
||
instance = private$.sample( | ||
task$row_ids, data_matrix, clmethod, cluto_parameters, verbose | ||
) | ||
|
||
self$instance = instance | ||
self$task_hash = task$hash | ||
invisible(self) | ||
} | ||
), | ||
|
||
active = list( | ||
|
||
#' @field iters `integer(1)`\cr | ||
#' Returns the number of resampling iterations, depending on the | ||
#' values stored in the `param_set`. | ||
iters = function() { | ||
pv = self$param_set$values | ||
as.integer(pv$repeats) * as.integer(pv$folds) | ||
} | ||
), | ||
|
||
private = list( | ||
.sample = function(ids, data_matrix, clmethod, cluto_parameters, verbose) { | ||
|
||
vcluster_loc = check_cluto_path() | ||
|
||
pv = self$param_set$values | ||
folds = as.integer(pv$folds) | ||
|
||
if (is.null(cluto_parameters)) { | ||
control_cluto = sprintf('-clmethod="%s"', clmethod) | ||
} else { | ||
control_cluto = sprintf('-clmethod="%s""%s"', clmethod, cluto_parameters) | ||
} | ||
|
||
mlr3misc::map_dtr(seq_len(pv$repeats), function(i) { | ||
data.table( | ||
row_id = ids, rep = i, | ||
fold = skmeans::skmeans(data_matrix, | ||
k = folds, | ||
method = "CLUTO", | ||
control = list( | ||
vcluster = vcluster_loc, | ||
verbose = verbose, | ||
control = paste(control_cluto, sprintf("-seed='%s'", i)) | ||
) | ||
)$cluster | ||
) | ||
}) | ||
}, | ||
|
||
.get_train = function(i) { | ||
i = as.integer(i) - 1L | ||
folds = as.integer(self$param_set$values$folds) | ||
rep = i %/% folds + 1L | ||
fold = i %% folds + 1L | ||
ii = data.table(rep = rep, fold = seq_len(folds)[-fold]) | ||
self$instance[ii, "row_id", on = names(ii), nomatch = 0L][[1L]] | ||
}, | ||
|
||
.get_test = function(i) { | ||
i = as.integer(i) - 1L | ||
folds = as.integer(self$param_set$values$folds) | ||
rep = i %/% folds + 1L | ||
fold = i %% folds + 1L | ||
ii = data.table(rep = rep, fold = fold) | ||
self$instance[ii, "row_id", on = names(ii), nomatch = 0L][[1L]] | ||
} | ||
) | ||
) |
Oops, something went wrong.