Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into issues-46-47/check-su…
Browse files Browse the repository at this point in the history
…ggests

* origin/main:
  Remove link tables from print output (#55)
  Add progress bar (#58)
  Add get_records() to API and df() to Registry (#54)
  • Loading branch information
lazappi committed Oct 25, 2024
2 parents 0983e4d + 668574f commit 2c497ce
Show file tree
Hide file tree
Showing 11 changed files with 222 additions and 39 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Features:
* Auto-generate modules and classes from the instance schema
* Fetch a record
* Fetch a record's related data
* Fetch record summary table
* Cache S3 artifact
* Load AnnData artifact

Expand All @@ -23,6 +24,8 @@ For more information, please visit the [package website](https://laminr.lamin.ai

* Add `to_string()` and `print()` methods to remaining classes (PR #31)

* Add `InstanceAPI$get_records()` and `Registry$df()` methods (PR #54)

## MAJOR CHANGES

* Refactored the internal class data structures for better modularity and extensibility (PR #8).
Expand All @@ -40,6 +43,10 @@ For more information, please visit the [package website](https://laminr.lamin.ai

* Define a current user and current instance with lamin-cli prior to testing and generating documentation in the CI (PR #23).

* Add progress bars to `Artifact$cache()` (PR #58)

* Remove link tables from object print output (PR #55)

* Improve checking for suggested packages and provide installation instructions if missing (PR #56)

## TESTING
Expand Down
1 change: 1 addition & 0 deletions R/Artifact.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ ArtifactRecord <- R6::R6Class( # nolint object_name_linter
s3::s3_get(
paste0(artifact_storage$root, "/", artifact_key),
region = artifact_storage$region,
progress = TRUE,
data_dir = root_dir
)
} else {
Expand Down
16 changes: 1 addition & 15 deletions R/Instance.R
Original file line number Diff line number Diff line change
Expand Up @@ -175,19 +175,10 @@ Instance <- R6::R6Class( # nolint object_name_linter
}
)

link_lines <- purrr::map_chr(
names(registries)[is_link_table],
function(.registry) {
cli::col_blue(paste0(" ", .registry))
}
)

lines <- c(
cli::style_bold(cli::col_green(private$.settings$name)),
cli::style_italic(cli::col_magenta(" Core registries")),
standard_lines,
cli::style_italic(cli::col_magenta(" Core link tables")),
link_lines
standard_lines
)

module_names <- self$get_module_names()
Expand Down Expand Up @@ -230,11 +221,6 @@ Instance <- R6::R6Class( # nolint object_name_linter
collapse = ", "
),
"]"
),
"CoreLinkTables" = paste0(
"[",
paste(names(registries[is_link_table]), collapse = ", "),
"]"
)
)

Expand Down
93 changes: 93 additions & 0 deletions R/InstanceAPI.R
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,99 @@ InstanceAPI <- R6::R6Class( # nolint object_name_linter
tolower(include_foreign_keys)
)

if (verbose) {
cli_inform("URL: {url}")
cli_inform("Body: {jsonlite::minify(body)}")
}

response <- httr::POST(
url,
httr::add_headers(
accept = "application/json",
`Content-Type` = "application/json"
),
body = body
)

private$process_response(response, "get record")
},
#' @description
#' Get a summary of available records from the instance.
#'
#' @param module_name Name of the module to query, e.g. "core"
#' @param registry_name Name of the registry to query
#' @param limit Maximum number of records to return
#' @param offset Offset for the first returned record
#' @param limit_to_many Maximum number of related foreign fields to return
#' @param include_foreign_keys Boolean, whether to return foreign keys
#' @param search Search string included in the query body
#' @param verbose Boolean, whether to print progress messages
#'
#' @return Content of the API response, if successful a list of record
#' summaries
#'
#' @importFrom jsonlite toJSON
get_records = function(module_name,
registry_name,
limit = 50,
offset = 0,
limit_to_many = 10,
include_foreign_keys = FALSE,
search = NULL,
verbose = FALSE) {
if (!is.null(search) && !is.character(search)) {
cli_abort("search must be a character vector")
}

if (limit > 200) {
cli::cli_abort("This API call is limited to 200 results per call")
}

if (verbose) {
cli_inform(c(
paste0(
"Getting records from module '", module_name, "', ",
"registry '", registry_name, "' with the following arguments:"
),
" " = "limit: {limit}",
" " = "offset: {offset}",
" " = "limit_to_many: {limit_to_many}",
" " = "include_foreign_keys: {include_foreign_keys}",
" " = "search: '{search}'"
))
}

body_data <- list(search = jsonlite::unbox(""))
if (!is.null(search)) {
body_data$search <- jsonlite::unbox(search)
}
body <- jsonlite::toJSON(body_data)

url <- paste0(
private$.instance_settings$api_url,
"/instances/",
private$.instance_settings$id,
"/modules/",
module_name,
"/",
registry_name,
"?schema_id=",
private$.instance_settings$schema_id,
"&limit=",
limit,
"&offset=",
offset,
"&limit_to_many=",
limit_to_many,
"&include_foreign_keys=",
tolower(include_foreign_keys)
)

if (verbose) {
cli_inform("URL: {url}")
cli_inform("Body: {jsonlite::minify(body)}")
}

response <- httr::POST(
url,
httr::add_headers(
Expand Down
16 changes: 1 addition & 15 deletions R/Module.R
Original file line number Diff line number Diff line change
Expand Up @@ -113,19 +113,10 @@ Module <- R6::R6Class( # nolint object_name_linter
}
)

link_lines <- purrr::map_chr(
names(registries)[is_link_table],
function(.registry) {
cli::col_blue(paste0(" ", .registry))
}
)

lines <- c(
cli::style_bold(cli::col_green(private$.module_name)),
cli::style_italic(cli::col_magenta(" Registries")),
standard_lines,
cli::style_italic(cli::col_magenta(" Link tables")),
link_lines
standard_lines
)

if (isFALSE(style)) {
Expand Down Expand Up @@ -158,11 +149,6 @@ Module <- R6::R6Class( # nolint object_name_linter
collapse = ", "
),
"]"
),
"LinkTables" = paste0(
"[",
paste(names(registries[is_link_table]), collapse = ", "),
"]"
)
),
quote_strings = FALSE
Expand Down
68 changes: 68 additions & 0 deletions R/Registry.R
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,74 @@ Registry <- R6::R6Class( # nolint object_name_linter
private$.record_class$new(data = data)
},
#' @description
#' Get a data frame summarising records in the registry
#'
#' @param limit Maximum number of records to return
#' @param verbose Boolean, whether to print progress messages
#'
#' @return A data.frame containing the available records
df = function(limit = 100, verbose = FALSE) {
# The API is limited to 200 records at a time so we need multiple requests
n_requests <- ceiling(limit / 200)
if ((verbose && n_requests > 1) || n_requests >= 10) {
caution <- ifelse(n_requests >= 10, "CAUTION: ", "")
cli::cli_alert_warning(
"{caution}Retrieving {limit} records will require up to {n_requests} API requests"
)
}

data_list <- list()
attr(data_list, "finished") <- FALSE
data_list <- purrr::reduce(
cli::cli_progress_along(seq_len(n_requests), name = "Sending requests"),
\(.data_list, .n) {
# Hacky way of avoiding unneeded requests until there is an easy way
# to get the total number of records
if (isTRUE(attr(.data_list, "finished"))) {
return(.data_list)
}

offset <- (.n - 1) * 200
# Reduce limit for final request to get the correct total
current_limit <- ifelse(.n == n_requests, limit %% 200, 200)
if (verbose) {
cli::cli_alert_info(
"Requesting records {offset} to {offset + current_limit}..."
)
}
current_data <- private$.api$get_records(
module_name = private$.module$name,
registry_name = private$.registry_name,
limit = current_limit,
offset = offset,
verbose = verbose
)

.data_list <- c(.data_list, current_data)
# If not the final request and less than 200 records returned then
# there are no more records and we can skip remaining requests
if ((.n != n_requests) && length(.data_list) < 200) {
cli::cli_alert_info(paste(
"Found all records. Stopping early with {length(.data_list)}",
"record{?s} after {(.n)} request{?s}."
))
attr(.data_list, "finished") <- TRUE
}

return(.data_list)
},
.init = data_list
)

data_list |>
# Replace NULL with NA so columns aren't lost
purrr::modify_depth(2, \(x) ifelse(is.null(x), NA, x)) |>
# Convert each entry to a data.frame
purrr::map(as.data.frame) |>
# Bind entries as rows
purrr::list_rbind()
},
#' @description
#' Get the fields in the registry.
#'
#' @return A list of [Field] objects.
Expand Down
9 changes: 0 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,15 +71,6 @@ db
$FeatureSet
$ParamValue
$FeatureValue
Core link tables
runparamvalue
artifactulabel
collectionulabel
featuresetfeature
artifactfeatureset
artifactparamvalue
collectionartifact
artifactfeaturevalue
Additional modules
bionty

Expand Down
23 changes: 23 additions & 0 deletions man/Registry.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/laminr-package.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions tests/testthat/test-Registry.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
skip_if_offline()

test_that("df works", {
local_setup_lamindata_instance()

db <- connect("laminlabs/lamindata")

records <- db$Storage$df()

expect_s3_class(records, "data.frame")
expect_true(all(c("description", "created_at", "id", "uid") %in% colnames(records)))
})
15 changes: 15 additions & 0 deletions tests/testthat/test-instance_api.R
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,18 @@ test_that("get_record fails gracefully", {
)
# nolint end: commented_code
})

test_that("get_records works", {
local_setup_lamindata_instance()

instance_file <- .settings_store__instance_settings_file("laminlabs", "lamindata")
instance_settings <- .settings_load__load_instance_settings()

api <- InstanceAPI$new(instance_settings)

records <- api$get_records("core", "storage")

expect_type(records, "list")
expect_type(records[[1]], "list")
expect_true(all(c("description", "created_at", "id", "uid") %in% names(records[[1]])))
})

0 comments on commit 2c497ce

Please sign in to comment.