From 9c2eeff076c89fa6589fc14f79e3a61bb5c43d5d Mon Sep 17 00:00:00 2001
From: Fran Barton <francis.barton@nuh.nhs.uk>
Date: Thu, 8 Feb 2024 05:05:30 +0000
Subject: [PATCH] Fixes issue #20 by removing `batch_it_simple()`

---
 .Rbuildignore                    |   1 +
 .lintr                           |   3 +
 DESCRIPTION                      |  31 ++--
 NAMESPACE                        |   4 -
 NEWS.md                          |  21 ++-
 R/NHSRpostcodetools-package.R    |   5 +-
 R/batch_it.R                     | 253 +++----------------------------
 R/postcode_data_join.R           |   2 +-
 man/NHSRpostcodetools-package.Rd |   2 +-
 man/batch_it.Rd                  |  67 +-------
 vignettes/NHSRpostcodetools.Rmd  |   6 +-
 11 files changed, 72 insertions(+), 323 deletions(-)

diff --git a/.Rbuildignore b/.Rbuildignore
index 7530c83..aff0459 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -7,3 +7,4 @@
 ^docs$
 ^pkgdown$
 ^\.github$
+^\.lintr
\ No newline at end of file
diff --git a/.lintr b/.lintr
index bb3cbb1..b0cec63 100644
--- a/.lintr
+++ b/.lintr
@@ -1,2 +1,5 @@
 linters: all_linters()
+exclusions: list(
+  "vignettes"
+  )
 encoding: "UTF-8"
diff --git a/DESCRIPTION b/DESCRIPTION
index a3556e1..5789e76 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,17 +1,28 @@
 Package: NHSRpostcodetools
 Title: Package to work with England Postcodes in R
-Version: 0.0.0.9000
-Authors@R: c(
-          person("Zoë", "Turner", , "zoe.turner3@nhs.net", c("cre", "aut"), comment = c(ORCID = "0000-0003-1033-9158")),
-          person("Fran", "Barton", ,"fbarton@alwaysdata.net", "aut"),
-          person("NHS-R community", email = "nhs.rcommunity@nhs.net", role = "cph")
-          )
+Version: 0.0.0.9001
+Authors@R:
+    c(
+        person(
+            "Zoë", "Turner", , "zoe.turner3@nhs.net", c("cre", "aut"),
+            comment = c(ORCID = "0000-0003-1033-9158")
+        ),
+        person(
+            "Fran", "Barton", , "fbarton@alwaysdata.net", "aut",
+            comment = c(ORCID = "0000-0002-5650-1176")
+        ),
+        person(
+            "NHS-R community", email = "nhs.rcommunity@nhs.net", role = "cph"
+        )
+    )
 Maintainer: Zoë Turner <zoe.turner3@nhs.net>
 Description: Functions related to England Postcodes and geographical areas.
 License: MIT + file LICENSE
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.1
+Depends: 
+    R (>= 4.1.0)
 Suggests: 
     knitr,
     lubridate,
@@ -20,9 +31,9 @@ Suggests:
 Config/testthat/edition: 3
 Imports: 
     assertthat,
-    dplyr,
-    httr2,
-    purrr,
+    dplyr (>= 1.1.0),
+    httr2 (>= 1.0.0),
+    purrr (>= 1.0.0),
     rlang,
     stringr,
     tibble,
diff --git a/NAMESPACE b/NAMESPACE
index 4b49c3f..15225e3 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,14 +1,10 @@
 # Generated by roxygen2: do not edit by hand
 
 export(batch_it)
-export(batch_it_simple)
 export(postcode_data_join)
 importFrom(dplyr,across)
 importFrom(rlang,.data)
 importFrom(rlang,`:=`)
 importFrom(tidyselect,all_of)
 importFrom(usethis,ui_info)
-importFrom(usethis,ui_nope)
-importFrom(usethis,ui_oops)
-importFrom(usethis,ui_stop)
 importFrom(utils,URLencode)
diff --git a/NEWS.md b/NEWS.md
index 92ea733..2d0b0a4 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,22 @@
-# NHSRpostcodetools (development version)
+# NHSRpostcodetools development log
 
-* Package created with the functions from package [{myrmidon}](https://github.com/francisbarton/myrmidon) created by Fran Barton. Added `postcode_data_join.R` which uses the [postcodes.io](https://postcodes.io/) API to get additional postcode data and which removes the requirement to save the large file from the [Open Geography Portal](https://geoportal.statistics.gov.uk/datasets/postcode-to-output-area-to-lower-layer-super-output-area-to-middle-layer-super-output-area-to-local-authority-district-november-2018-lookup-in-the-uk-3/about) maintained by the ONS (Office of National Statistics).
+## Development version 0.0.0.9001 (8 Feb 2024)
 
+* Fixed issue #20 by removing `batch_it_simple()` and simplifying the code in
+    `batch_it()` so it in turn is simpler.
+    For the purposes of this package, it only has to do a simple thing.
+* Updated `lintr` rules to exclude vignette `.Rmd`s.
+* Ran `styler` over the package.
+* Updated DESCRIPTION file to add an OrcID and to add minimum R version and
+    some minimum package versions.
+
+## Development version 0.0.0.9000 (23 Nov 2023)
+
+* Package created with the functions from package [{myrmidon}][myr_gh] created by Fran Barton.
+* Added `postcode_data_join.R` which uses the [postcodes.io][pio_api] API to get additional postcode data and which removes the requirement to save the large file from the [Open Geography portal][ogp] maintained by the ONS (Office for National Statistics).
 * Added dependency function `batch_it()`
+
+
+[myr_gh]: https://github.com/francisbarton/myrmidon
+[pio_api]: https://postcodes.io
+[ogp]: https://geoportal.statistics.gov.uk/datasets/postcode-to-output-area-to-lower-layer-super-output-area-to-middle-layer-super-output-area-to-local-authority-district-november-2018-lookup-in-the-uk-3/about
\ No newline at end of file
diff --git a/R/NHSRpostcodetools-package.R b/R/NHSRpostcodetools-package.R
index 69c85a1..35fe153 100644
--- a/R/NHSRpostcodetools-package.R
+++ b/R/NHSRpostcodetools-package.R
@@ -1,12 +1,9 @@
 #' @keywords internal
 "_PACKAGE"
 
-# The following block is used by usethis to automatically manage
-# roxygen namespace tags. Modify with care!
 #' @importFrom dplyr across
 #' @importFrom rlang .data `:=`
 #' @importFrom tidyselect all_of
-#' @importFrom usethis ui_info ui_nope ui_oops ui_stop
+#' @importFrom usethis ui_info
 #' @importFrom utils URLencode
-
 NULL
diff --git a/R/batch_it.R b/R/batch_it.R
index 9af7ef0..75c7bfe 100644
--- a/R/batch_it.R
+++ b/R/batch_it.R
@@ -1,252 +1,33 @@
-# batch_it() --------------------------------------------------------------
-
-
-#' Convert a list or vector to a batched list of its elements
-#'
-#' @description
-#' Batch up a long vector, or list of vectors. For example so they can be
-#' passed via a `map` function to services with length-limited APIs.
-#'
-#' @param x a vector, or a list flattenable to a vector
-#' @param batches numeric. The size (length) of batches to create. Can be a
-#'   single value or multiple values (see Examples). Should be a whole,
-#'   positive number, if provided, else `NULL`.
-#' @param proportion numeric. Proportional sizes of batches to be created.
-#'   For example `c(4, 6)` will create two batches of approximately 40% and
-#'   60% of the length of the target vector (`x`). When multiple
-#'   `proportion` values are provided, these are not repeated.
-#'   A single proportion less than 1 is repeated as many times as possible to
-#'   get near to the length of the target vector. For example, a `proportion`
-#'   of 0.1 will be treated as a tenth, and batch sizes will be rounded to
-#'   an integer size nearest to a tenth of the length of `x`.
-#' @param maximise Boolean, `FALSE` by default. If `TRUE`, a vector of batch
-#'   sizes will be partially repeated to fit maximally to the length
-#'   of the target vector. See examples below.
-#' @param quiet Boolean, `TRUE` by default. Whether to show informative
-#'   `ui_*` messages from `{usethis}`.
+#' Batch a vector or list into a list of elements with a maximum size
 #'
-#' @seealso [batch_it_simple()] which does the same thing but has fewer options
-#'   and works just fine for simpler cases.
-#'
-#' @returns All the elements of `x` batched into a list.
+#' @param x A vector or list
+#' @param batch_size numeric. The size (length) of batches to create. Should be
+#'   a single positive integer value (see Examples).
 #'
 #' @examples
-#' batch_it(seq(2L, 60L, 2L), 6L)
-#' batch_it(seq(2L, 60L, 2L), proportion = 0.2)
-#'
-#' batch_it(1:100, batches = c(20L, 30L, 50L))
-#' batch_it(letters, batches = c(4L, 6L))
-#' batch_it(letters, batches = c(4L, 6L), maximise = TRUE)
-#' batch_it(letters, proportion = c(4L, 6L))
-#'
 #' # ----
-#' as_year <- function(x) {
-#'   lubridate::as_date(
-#'     lubridate::ymd(paste0(x, "-01-01")):
-#'     lubridate::ymd(paste0(x, "-12-31"))
-#'   )
-#' }
-#' month_lengths <- function(year) {
-#'   lubridate::as_date(paste0(year, "-", 1:12, "-01")) |>
-#'     lubridate::days_in_month()
-#' }
-#' batch_it(x = as_year(2022L), batches = month_lengths(2022L))
+#' batch_it(letters, 6L)
+#' batch_it(letters, 27L)
 #'
 #' @export
-batch_it <- function(
-    x,
-    batches = NULL,
-    proportion = NULL,
-    maximise = FALSE,
-    quiet = TRUE) {
-
-  # Ensure x is a reasonable vector
-  if (is.list(x)) {
-    if (!quiet) ui_info("Flattening list to vector")
-    while (purrr::pluck_depth(x) > 2L) {
-      x <- purrr::list_flatten(x)
-    }
-    x <- purrr::list_c(x)
-  }
-
+batch_it <- function(x, batch_size) {
   assertthat::assert_that(
-    is.atomic(x),
-    msg = ui_stop("This function only works with lists or vectors")
+    is.list(as.list(x)),
+    msg = "x must be a vector or a list"
   )
 
-  if (length(batches) == 1L && (length(x) <= batches)) x
-
-  if (purrr::every(list(batches, proportion), rlang::is_null)) {
-    ui_stop("batch_it: Either `batches` or `proportion` must be supplied.")
-  }
-
-  # prefer batches if both are supplied
-  if (purrr::none(list(batches, proportion), rlang::is_null)) {
-    proportion <- NULL
-    if (!quiet) {
-      ui_info(
-        "batch_it: Values for both `batches` and `proportion` have been
-        supplied. The `batches` value is prioritised.")
-    }
-  }
-
-  if (length(x) > 10e6L) {
-    ui_nope(
-      "batch_it: Easy, tiger! That vector has more than a million
-      items. Are you sure you want to continue?"
-    )
-  }
-
-
-  # A sub-routine to handle proportion parameter
-  if (!is.null(proportion)) {
-    assertthat::assert_that(
-      is.numeric(proportion),
-      msg = ui_oops("batch_it: The proportion parameter is not numeric")
-    )
-    batches <- convert_proportion_to_batches(x, proportion)
-  }
-
-  # Just checking
-  assertthat::assert_that(is.numeric(batches),
-    msg = ui_oops("batch_it: Batch sizes provided are not numeric")
-  )
-
-  assertthat::assert_that(all(batches > 0L),
-    msg = ui_oops("batch_it: Batch sizes must be greater than zero")
+  assertthat::assert_that(
+    length(batch_size) == 1L,
+    round(batch_size) == batch_size,
+    batch_size >= 1L,
+    msg = "The batch_size parameter must be a single positive integer value"
   )
 
+  batch_size <- min(length(x), batch_size)
 
-  batches <- round(batches)
-  batches <- batches[which(batches > 0L)]
-  batches <- maximise_batches(x, batches, maximise)
-
-
-  # This shouldn't be able to happen...
-  if (sum(batches) > length(x)) {
-    ui_stop("Batch sizes ended up longer than the length of the vector")
-  }
-
-  if (length(x) - sum(batches) > 0L) {
-    if (!quiet) {
-      ui_info(
-        "The length of the target vector `x` is not an exact multiple of the
-        batch length(s) supplied. The remaining elements of `x` will be added
-        as a final batch."
-      )
-    }
-    batches <- c(batches, length(x) - sum(batches))
-  }
-
-  list_a <- c(0L, utils::head(batches, -1L)) |>
-    rlang::set_names(names(batches)) |>
-    purrr::accumulate(sum, .simplify = TRUE)
-  list_b <- batches |>
-    purrr::accumulate(sum, .simplify = TRUE)
-
-  purrr::map2(list_a, list_b, \(a, b) x[(a + 1L):b])
-}
-# end of main function
-
-
-
-# helper functions (internal) ---------------------------------------------
-
-
-#' @noRd
-convert_proportion_to_batches <- function(x, proportion) {
-  if (!all(proportion > 0L)) {
-    ui_stop("Proportions must be positive numbers")
-  }
-
-  if (length(proportion) == 1L && proportion < 1L) {
-    proportion <- rep(proportion, times = floor(1L / proportion))
-    if (sum(proportion) < 1L) {
-      proportion <- c(proportion, 1L - sum(proportion))
-    }
-  }
-
-  (proportion / sum(proportion)) * length(x)
-}
-
-
-#' @noRd
-maximise_batches <- function(x, batches, maximise) {
-  # If maximise = TRUE and `batches` has length > 0, partially repeat the
-  # batch lengths as far as possible within the length of x.
-  # If maximise = FALSE, only repeat the batch lengths in full as far as they
-  # will fit. then return the remainder as a final batch.
-  if (maximise) {
-    batches <- rep(batches, times = ceiling(length(x) / sum(batches)))
-    while (sum(batches) > length(x)) {
-      batches <- utils::head(batches, -1L)
-    }
-    batches
-  } else {
-    rep(batches, times = floor(length(x) / sum(batches)))
-  }
-}
-
-
-
-# batch_it_simple() -------------------------------------------------------
-
-
-
-#' Convert a list or vector to a batched list of its elements
-#'
-#' @rdname batch_it
-#'
-#' @param batch_size numeric. The size (length) of batches to create. Should be
-#'   a single value (see Examples). If supplied as a decimal (<1), it will be
-#'   interpreted as a proportion of `length(x)`.
-#'
-#' @examples
-#' # ----
-#' batch_it_simple(letters, 6L)
-#' batch_it_simple(letters, 0.45)
-#'
-#' @export
-batch_it_simple <- function(x, batch_size) {
-
-  # ensure x is a reasonable vector
-  if (is.list(x)) {
-    ui_info("Converting list to single vector")
-    x <- purrr::list_c(x)
-  }
-
-  if (!is.vector(x)) {
-    ui_stop("This function only works with lists or vectors")
-  }
-
-  if (length(x) > 10e6L) {
-    ui_nope(
-      "Easy, tiger! That vector has more than a million items.
-      Are you sure you want to continue?"
-    )
-  }
-
-  # ensure batch_size is an appropriate single positive number
-  if (length(batch_size) != 1L || batch_size <= 0L) {
-    ui_stop("The batch_size parameter must be a single positive value")
-  }
-
-  # if batch_size is supplied as a decimal between 0 and 1, interpret this as
-  # a proportion of the length of `x`, and convert to an integer
-  if (batch_size < 1L) {
-    batch_size <- ceiling(length(x) * batch_size)
-  }
-
-  if (batch_size > length(x)) {
-    batch_size <- length(x)
-  }
-
-  batch_size <- round(batch_size)
-  assertthat::assert_that(batch_size > 0L)
-
-  # do the batching by creating a vector of factors of length(x)
+  # Do the batching by creating a vector of factors of length(x),
   # then use this as the factor argument to split(x)
-  f <- rep(1L:ceiling(length(x) / batch_size), each = batch_size) |>
+  f <- rep(seq_len(ceiling(length(x) / batch_size)), each = batch_size) |>
     utils::head(length(x))
   unname(split(x, f))
 }
diff --git a/R/postcode_data_join.R b/R/postcode_data_join.R
index c830a34..7c4df98 100644
--- a/R/postcode_data_join.R
+++ b/R/postcode_data_join.R
@@ -105,7 +105,7 @@ postcode_data_join <- function(x, var = "postcode", fix_invalid = TRUE) {
 
           fixed_ac_data <- ac_results |>
             purrr::list_c() |>
-            batch_it_simple(100L) |>
+            batch_it(100L) |>
             purrr::map_df(bulk_lookup) |>
             unnest_codes() |>
             dplyr::rename(new_postcode = "postcode")
diff --git a/man/NHSRpostcodetools-package.Rd b/man/NHSRpostcodetools-package.Rd
index 9ae7389..3967fce 100644
--- a/man/NHSRpostcodetools-package.Rd
+++ b/man/NHSRpostcodetools-package.Rd
@@ -21,7 +21,7 @@ Useful links:
 
 Authors:
 \itemize{
-  \item Fran Barton \email{fbarton@alwaysdata.net}
+  \item Fran Barton \email{fbarton@alwaysdata.net} (\href{https://orcid.org/0000-0002-5650-1176}{ORCID})
 }
 
 Other contributors:
diff --git a/man/batch_it.Rd b/man/batch_it.Rd
index 6c909cd..0a4bb3d 100644
--- a/man/batch_it.Rd
+++ b/man/batch_it.Rd
@@ -2,75 +2,22 @@
 % Please edit documentation in R/batch_it.R
 \name{batch_it}
 \alias{batch_it}
-\alias{batch_it_simple}
-\title{Convert a list or vector to a batched list of its elements}
+\title{Batch a vector or list into a list of elements with a maximum size}
 \usage{
-batch_it(x, batches = NULL, proportion = NULL, maximise = FALSE, quiet = TRUE)
-
-batch_it_simple(x, batch_size)
+batch_it(x, batch_size)
 }
 \arguments{
-\item{x}{a vector, or a list flattenable to a vector}
-
-\item{batches}{numeric. The size (length) of batches to create. Can be a
-single value or multiple values (see Examples). Should be a whole,
-positive number, if provided, else \code{NULL}.}
-
-\item{proportion}{numeric. Proportional sizes of batches to be created.
-For example \code{c(4, 6)} will create two batches of approximately 40\% and
-60\% of the length of the target vector (\code{x}). When multiple
-\code{proportion} values are provided, these are not repeated.
-A single proportion less than 1 is repeated as many times as possible to
-get near to the length of the target vector. For example, a \code{proportion}
-of 0.1 will be treated as a tenth, and batch sizes will be rounded to
-an integer size nearest to a tenth of the length of \code{x}.}
-
-\item{maximise}{Boolean, \code{FALSE} by default. If \code{TRUE}, a vector of batch
-sizes will be partially repeated to fit maximally to the length
-of the target vector. See examples below.}
-
-\item{quiet}{Boolean, \code{TRUE} by default. Whether to show informative
-\verb{ui_*} messages from \code{{usethis}}.}
+\item{x}{A vector or list}
 
 \item{batch_size}{numeric. The size (length) of batches to create. Should be
-a single value (see Examples). If supplied as a decimal (<1), it will be
-interpreted as a proportion of \code{length(x)}.}
-}
-\value{
-All the elements of \code{x} batched into a list.
+a single positive integer value (see Examples).}
 }
 \description{
-Batch up a long vector, or list of vectors. For example so they can be
-passed via a \code{map} function to services with length-limited APIs.
+Batch a vector or list into a list of elements with a maximum size
 }
 \examples{
-batch_it(seq(2L, 60L, 2L), 6L)
-batch_it(seq(2L, 60L, 2L), proportion = 0.2)
-
-batch_it(1:100, batches = c(20L, 30L, 50L))
-batch_it(letters, batches = c(4L, 6L))
-batch_it(letters, batches = c(4L, 6L), maximise = TRUE)
-batch_it(letters, proportion = c(4L, 6L))
-
-# ----
-as_year <- function(x) {
-  lubridate::as_date(
-    lubridate::ymd(paste0(x, "-01-01")):
-    lubridate::ymd(paste0(x, "-12-31"))
-  )
-}
-month_lengths <- function(year) {
-  lubridate::as_date(paste0(year, "-", 1:12, "-01")) |>
-    lubridate::days_in_month()
-}
-batch_it(x = as_year(2022L), batches = month_lengths(2022L))
-
 # ----
-batch_it_simple(letters, 6L)
-batch_it_simple(letters, 0.45)
+batch_it(letters, 6L)
+batch_it(letters, 27L)
 
 }
-\seealso{
-\code{\link[=batch_it_simple]{batch_it_simple()}} which does the same thing but has fewer options
-and works just fine for simpler cases.
-}
diff --git a/vignettes/NHSRpostcodetools.Rmd b/vignettes/NHSRpostcodetools.Rmd
index 3715e79..5fe39db 100644
--- a/vignettes/NHSRpostcodetools.Rmd
+++ b/vignettes/NHSRpostcodetools.Rmd
@@ -39,14 +39,13 @@ Join this vector to the postcode data
 
 ```{r}
 postcode_data_join(postcodes, fix_invalid = TRUE)
-
 ```
 
 ### A tibble of postcodes
 
 ```{r}
 test_df1 <- dplyr::tibble(
-  place = paste0("place_", 1:3),
+  place = paste0("place_", seq(3L)),
   postcode = postcodes
 )
 ```
@@ -57,14 +56,12 @@ to be recognised.
 
 ```{r}
 postcode_data_join(test_df1, fix_invalid = TRUE)
-
 ```
 
 Note that the parameter `fix_invalid = TRUE` defaults to TRUE:
 
 ```{r}
 postcode_data_join(test_df1)
-
 ```
 
 And if it is set to FALSE the same message appears but the `new_postcode` is not
@@ -72,5 +69,4 @@ populated and has `NA`.
 
 ```{r}
 postcode_data_join(test_df1, fix_invalid = FALSE)
-
 ```