Addressing issues #10 #11 and #12

USCbiostats · Jun 11, 2019 · 0bd9f03 · 0bd9f03
1 parent 87a0f97
commit 0bd9f03
Show file tree

Hide file tree

Showing 25 changed files with 718 additions and 364 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -23,5 +23,5 @@ Suggests:
     testthat,
     covr
 Imports: 
-    utils
+    utils, parallel
 VignetteBuilder: knitr
diff --git a/NAMESPACE b/NAMESPACE
@@ -40,5 +40,9 @@ export(squeue)
 export(state)
 export(the_plan)
 export(write_slurm_job)
+importFrom(parallel,mcMap)
+importFrom(parallel,mclapply)
+importFrom(parallel,splitIndices)
+importFrom(utils,head)
 importFrom(utils,sessionInfo)
 importFrom(utils,str)
diff --git a/R/Map.R b/R/Map.R
@@ -66,8 +66,12 @@ Slurm_Map <- function(
   dots    <- list(...)
 
   # Checking names
-  if (length(FUNargs) != length(names(dots)))
-    stop("All arguments passed via `...` should be named arguments.", call.=FALSE)
+  # All arguments in ... must be named
+  dots_names <- names(dots)
+  dots_names <- dots_names[dots_names!=""]
+  if (length(dots_names) != length(dots))
+    stop("One or more arguments in `...` are unnamed. All arguments passed ",
+         "via `...` must be named.", call. = FALSE)
 
   if (length(dots) && length(setdiff(names(dots), FUNargs)))
     stop("Some arguments passed via `...` are not part of `f`:\n -",
@@ -76,6 +80,16 @@ Slurm_Map <- function(
   # Verify lengths and recycle
   verify_lengths(environment(), "dots")
 
+  # Checking the lengths
+  if (length(dots[[1]]) < njobs) {
+    warning("The number of jobs is greater than the length of `",
+            names(dots)[1], "`. The variable `njobs` will be set equal to the ",
+            "length of it.", call. = FALSE,
+            immediate. = TRUE)
+
+    njobs <- length(dots[[1]])
+  }
+
   # Setting the job name
   opts_sluRm$set_chdir(job_path)
   opts_sluRm$set_job_name(job_name)
@@ -91,7 +105,7 @@ Slurm_Map <- function(
   # Adding readRDS
   rscript$add_rds(list(INDICES = INDICES), split = FALSE, compress = FALSE)
   rscript$add_rds(list(f = f, mc.cores=mc.cores), split = FALSE, compress = compress)
-  rscript$add_rds(list(...), split = TRUE, compress = compress)
+  rscript$add_rds(dots, split = TRUE, compress = compress)
 
   if (length(export))
     rscript$add_rds(mget(export, envir=parent.frame()), split = FALSE, compress = compress)

diff --git a/R/lapply.R b/R/lapply.R
@@ -4,6 +4,13 @@
 #' @template slurm
 #' @references Job Array Support https://slurm.schedmd.com/job_array.html
 #' @export
+#' @details The function `Slurm_lapply` will submit `njobs` to the queue and distribute
+#' `X` according to [parallel::splitIndices]. For example, if `X` is list with
+#' 1,000 elements, and `njobs = 2`, then `Slurm_lapply` will submit 2 jobs with
+#' 500 elements of `X` each (2 chunks of data). The same principle applies to
+#' `Slurm_sapply` and `Slurm_Map`, this is, the data is splited by chunks so all
+#' the information is sent at once when the job is submitted.
+#'
 #' @seealso For resubmitting a job, see the example in [sbatch].
 #' @examples
 #' \dontrun{
@@ -61,6 +68,16 @@ Slurm_lapply <- function(
     X <- as.list(X)
   }
 
+  # Checking the lengths
+  if (length(X) < njobs) {
+    warning("The number of jobs is greater than the length of `X`. The ",
+            "`njobs`will be set equal to the length of `X`.", call. = FALSE,
+            immediate. = TRUE)
+
+    njobs <- length(X)
+  }
+
+
   if (length(export) && !is.character(export))
     stop("`export` must be a character vector of object names.",
          call. = FALSE)
@@ -69,6 +86,13 @@ Slurm_lapply <- function(
   FUNargs <- names(formals(FUN))
   dots    <- list(...)
 
+  # All arguments in ... must be named
+  dots_names <- names(dots)
+  dots_names <- dots_names[dots_names!=""]
+  if (length(dots_names) != length(dots))
+    stop("One or more arguments in `...` are unnamed. All arguments passed ",
+         "via `...` must be named.", call. = FALSE)
+
   if (length(dots) && length(setdiff(names(dots), FUNargs)))
     stop("Some arguments passed via `...` are not part of `FUN`:\n -",
          paste(setdiff(names(dots), FUNargs), collapse="\n -"), call. = FALSE)

diff --git a/R/options.R b/R/options.R
@@ -5,6 +5,10 @@
 #' options to write and submit jobs to **Slurm**. These options have global
 #' defaults that are set and retrieved using `opts_sluRm`.
 #'
+#' Whatever the path specified on `chdir`, all nodes should have access to it.
+#' Moreover, it is recommended to use a path located in a high-performing drive.
+#' See for example \url{https://hpcc.usc.edu/support/infrastructure/temporary-disk-space/}.
+#'
 #' @details Current supported options are:
 #'
 #' Debuggin mode

diff --git a/R/sapply.R b/R/sapply.R
@@ -1,5 +1,8 @@
 #' @export
 #' @param simplify Logical scalar. See [sapply].
+#' @details Just like [sapply] is to [lapply], `Slurm_sapply` is just a wrapper of
+#' `Slurm_lapply` with an extra argument, `simplify`. When `TRUE`, once the job
+#' is collected, the function [simplify2array] is called.
 #' @rdname Slurm_lapply
 Slurm_sapply <- function(
   X,

diff --git a/R/sluRm.R b/R/sluRm.R
@@ -3,7 +3,8 @@
 #' @name sluRm
 NULL
 
-#' @importFrom utils sessionInfo str
+#' @importFrom utils sessionInfo str head
+#' @importFrom parallel splitIndices mclapply mcMap
 NULL
 
 .onAttach <- function(libname, pkgname) {

diff --git a/R/slurm_job-class.R b/R/slurm_job-class.R
@@ -93,9 +93,9 @@ print.slurm_job <- function(x, ...) {
   )
 
   if (!is.na(x$jobid)) {
-    print(head(sa <- sacct(x), 5L))
+    print(utils::head(sa <- sacct(x), 5L))
     if (nrow(sa) > 5L)
-      cat(nrow(sa), " rows skipped.\n")
+      cat(nrow(sa), " rows skipped. Use `sacct` to retrieve all the rows.\n")
   }
 
   invisible(x)

diff --git a/README.Rmd b/README.Rmd
@@ -22,7 +22,7 @@ knitr::opts_chunk$set(
 
 # sluRm: A lightweight wrapper for Slurm <img src="man/figures/logo.png" height="180px" align="right"/>
 
-Slurm Workload Manager is a popular HPC cluster job scheduler found in many of the top 500 super computers. The `sluRm` R package provides an R wrapper to it that matches the parallel package's syntax, this is, just like `parallel` provides the `parLapply`, `parMap`, `parSapply`, etc., `sluRm` provides `Slurm_lapply`, `Slurm_Map`, `Slurm_sapply`, etc.
+Slurm Workload Manager is a popular HPC cluster job scheduler found in many of the top 500 super computers. The `sluRm` R package provides an R wrapper to it that matches the parallel package's syntax, this is, just like `parallel` provides the `parLapply`, `clusterMap`, `parSapply`, etc., `sluRm` provides `Slurm_lapply`, `Slurm_Map`, `Slurm_sapply`, etc.
 
 While there are other alternatives such as `future.batchtools`, `batchtools`, `clustermq`, and `rslurm`, this R package has the following goals: