From cfc279b3649da948ed1b450dcc20fc8beae3d602 Mon Sep 17 00:00:00 2001 From: Luke Zappia Date: Tue, 26 Nov 2024 10:37:44 +0100 Subject: [PATCH 1/9] Add Artifact$Open() method --- R/Artifact.R | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/R/Artifact.R b/R/Artifact.R index f5ea10b..944f2ea 100644 --- a/R/Artifact.R +++ b/R/Artifact.R @@ -46,6 +46,32 @@ ArtifactRecord <- R6::R6Class( # nolint object_name_linter } }, #' @description + #' Return a backed data object. Currently only supports TileDB-SOMA + #' artifacts. + #' + #' @return A [tiledbsoma::SOMACollection] or [tiledbsoma::SOMAExperiment] + #' object + open = function() { + is_tiledbsoma <- private$get_value("suffix") == ".tiledbsoma" || + private$get_value("_accessor") == "tiledbsoma" + + if (!is_tiledbsoma) { + cli::cli_abort( + "The {.code open} method is only supported for TileDB-SOMA artifacts" + ) + } + + check_requires("Opening TileDB-SOMA artifacts", "tiledbsoma") + + artifact_uri <- paste0( + private$get_value("storage")$root, + "/", + private$get_value("key") + ) + + tiledbsoma::SOMAOpen(artifact_uri) + }, + #' @description #' Print a more detailed description of an `ArtifactRecord` #' #' @param style Logical, whether the output is styled using ANSI codes From ad1e016f29d80a06dbcc5d602ac2607c2fdb84f0 Mon Sep 17 00:00:00 2001 From: Luke Zappia Date: Tue, 26 Nov 2024 11:24:38 +0100 Subject: [PATCH 2/9] Modify check_requires() to handle extra repos Also adjust alert types and add tests --- R/Artifact.R | 5 +++- R/Instance.R | 2 +- R/utils.R | 49 +++++++++++++++++++++++++------------ tests/testthat/test-utils.R | 15 ++++++++++++ 4 files changed, 54 insertions(+), 17 deletions(-) diff --git a/R/Artifact.R b/R/Artifact.R index 944f2ea..af4e913 100644 --- a/R/Artifact.R +++ b/R/Artifact.R @@ -61,7 +61,10 @@ ArtifactRecord <- R6::R6Class( # nolint object_name_linter ) } - check_requires("Opening TileDB-SOMA artifacts", "tiledbsoma") + check_requires( + "Opening TileDB-SOMA artifacts", "tiledbsoma", + extra_repos = "https://chanzuckerberg.r-universe.dev" + ) artifact_uri <- paste0( private$get_value("storage")$root, diff --git a/R/Instance.R b/R/Instance.R index 2444258..b32b275 100644 --- a/R/Instance.R +++ b/R/Instance.R @@ -61,7 +61,7 @@ create_instance <- function(instance_settings, is_default = FALSE) { py_lamin <- NULL if (isTRUE(is_default)) { - check_requires("Connecting to Python", "reticulate", type = "warning") + check_requires("Connecting to Python", "reticulate", alert = "warning") py_lamin <- tryCatch( reticulate::import("lamindb"), diff --git a/R/utils.R b/R/utils.R index f6de669..80437d8 100644 --- a/R/utils.R +++ b/R/utils.R @@ -6,37 +6,56 @@ #' @param what A message stating what the packages are required for. Used at the #' start of the error message e.g. "{what} requires...". #' @param requires Character vector of required package names -#' @param type Type of message to give if packages are missing +#' @param alert Type of message to give if packages are missing +#' @param extra_repos Additional repositories that are required to install the +#' checked packages #' #' @return Invisibly, Boolean whether or not all packages are available or #' raises an error if any are missing and `type = "error"` #' @noRd -check_requires <- function(what, requires, type = c("error", "warning")) { - type <- match.arg(type) +check_requires <- function(what, requires, + alert = c("error", "warning", "message", "none"), + extra_repos = NULL) { + alert <- match.arg(alert) is_available <- map_lgl(requires, requireNamespace, quietly = TRUE) - msg_fun <- switch(type, + msg_fun <- switch(alert, error = cli::cli_abort, - warning = cli::cli_warn + warning = cli::cli_warn, + message = cli::cli_inform, + none = NULL ) - if (any(!is_available)) { + if (!any(is_available) && !is.null(msg_fun)) { missing <- requires[!is_available] missing_str <- paste0("'", paste(missing, collapse = "', '"), "'") # nolint object_usage_linter - msg_fun( - c( - "{what} requires the {.pkg {missing}} package{?s}", - "i" = paste( - "Install {cli::qty(missing)}{?it/them} using", - "{.run install.packages(c({missing_str}))}" + + msg <- "{what} requires the {.pkg {missing}} package{?s}" + + if (!is.null(extra_repos)) { + msg <- c( + msg, + "i" = paste0( + "Add repositories using {.run options(repos = c(", + paste0("'", paste(extra_repos, collapse = "', '"), "'"), + ", getOption('repos'))}, then:" ) - ), - call = rlang::caller_env() + ) + } + + msg <- c( + msg, + "i" = paste( + "Install {cli::qty(missing)}{?it/them} using", + "{.run install.packages(c({missing_str}))}" + ) ) + + msg_fun(msg, call = rlang::caller_env()) } - invisible(all(is_available)) + invisible(any(is_available)) } #' Check if we are in a knitr notebook diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index ea58623..c9c4ff6 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -1,7 +1,22 @@ test_that("check_requires works", { expect_true(check_requires("Imported packages", "cli")) + expect_error( check_requires("Missing packages", "a_missing_package"), regexp = "Missing packages requires" ) + + expect_warning( + check_requires("Missing packages", "a_missing_package", alert = "warning"), + regexp = "Missing packages requires" + ) + + expect_message( + check_requires("Missing packages", "a_missing_package", alert = "message"), + regexp = "Missing packages requires" + ) + + expect_false( + check_requires("Missing packages", "a_missing_package", alert = "none") + ) }) From 95a7f9ae70697361e2f8b4a05a3634ec0bd93085 Mon Sep 17 00:00:00 2001 From: Luke Zappia Date: Tue, 26 Nov 2024 11:28:33 +0100 Subject: [PATCH 3/9] Install {tiledbsoma} on CI If it works... --- .github/workflows/R-CMD-check.yaml | 6 ++++++ .github/workflows/pkgdown.yaml | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index e53ab3b..c742426 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -62,6 +62,12 @@ jobs: echo "LDFLAGS=-L$OPENBLAS/lib" >> $GITHUB_ENV echo "CPPFLAGS=-I$OPENBLAS/include" >> $GITHUB_ENV + - name: Install {tiledbsoma} + run: | + options(repos = c("https://chanzuckerberg.r-universe.dev", getOption(repos))) + install.packages("tiledbsoma") + shell: Rscript {0} + - name: Install lamindb run: | pip install lamindb[aws] diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index 96daebc..8dbb8f7 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -41,6 +41,12 @@ jobs: needs: website quarto-version: pre-release + - name: Install {tiledbsoma} + run: | + options(repos = c("https://chanzuckerberg.r-universe.dev", getOption(repos))) + install.packages("tiledbsoma") + shell: Rscript {0} + - name: Install lamindb run: | pip install lamindb[aws] From 7940721d09ef33a2bb0b67c695910136e8ce8cf8 Mon Sep 17 00:00:00 2001 From: Luke Zappia Date: Tue, 26 Nov 2024 11:32:11 +0100 Subject: [PATCH 4/9] Only try to install {tiledbsoma} on Ubuntu GHA --- .github/workflows/R-CMD-check.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index c742426..fe07141 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -63,6 +63,7 @@ jobs: echo "CPPFLAGS=-I$OPENBLAS/include" >> $GITHUB_ENV - name: Install {tiledbsoma} + if: runner.os == 'Linux' run: | options(repos = c("https://chanzuckerberg.r-universe.dev", getOption(repos))) install.packages("tiledbsoma") From 8a284fa4c94cad2eb0c9fdc2d11a0807e6ee41e8 Mon Sep 17 00:00:00 2001 From: Luke Zappia Date: Tue, 26 Nov 2024 12:15:21 +0100 Subject: [PATCH 5/9] Replace {cellxgene.census} example in Get Started Use Artifact$open() to access a store instead --- vignettes/laminr.Rmd | 45 +++++++++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/vignettes/laminr.Rmd b/vignettes/laminr.Rmd index 151ae0d..ce5555d 100644 --- a/vignettes/laminr.Rmd +++ b/vignettes/laminr.Rmd @@ -16,7 +16,6 @@ knitr::opts_chunk$set( # actually upload results to the LaminDB instance # -> testuser1 is a test account that cannot upload results submit_eval <- laminr:::.get_user_settings()$handle != "testuser1" -submit_eval <- FALSE ``` This vignette introduces the basic **{laminr}** workflow. @@ -149,29 +148,37 @@ DotPlot(seurat_obj, features = unique(markers$gene)) + ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5)) ``` -# Slice the tiledbsoma array store +# Slice a TileDB-SOMA array store -Alternatively to accessing individual CELLxGENE datasets from LaminDB, the **{cellxgene.census}** package can be used to slice the TileDB-SOMA array store for CELLxGENE Census, a concatenated version of most datasets in CELLxGENE. +When artifacts contain TileDB-SOMA array stores they can be opened and sliced using the [**{tiledbsoma}** package](https://single-cell-data.github.io/TileDB-SOMA/index.html). -```{r slice-tiledbsoma, eval=FALSE} -library(cellxgene.census) +```{r slice-tiledbsoma, eval = requireNamespace("tiledbsoma", quietly = TRUE)} +# Set some environment variables to avoid an issue with {tiledbsoma} +# https://github.com/chanzuckerberg/cellxgene-census/issues/1261 +Sys.setenv(TILEDB_VFS_S3_REGION = "us-west-2") +Sys.setenv(AWS_DEFAULT_REGION = "us-west-2") +Sys.setenv(TILEDB_VFS_S3_NO_SIGN_REQUEST = "true") -census <- open_soma() - -organism <- "Homo sapiens" -gene_filter <- "feature_id %in% c('ENSG00000107317', 'ENSG00000106034')" -cell_filter <- "cell_type == 'sympathetic neuron'" -cell_columns <- c( - "assay", "cell_type", "tissue", "tissue_general", "suspension_type", "disease" +# Define a filter to select specific cells +value_filter <- paste( + "tissue == 'brain' &&", + "cell_type %in% c('microglial cell', 'neuron') &&", + "suspension_type == 'cell' &&", + "assay == '10x 3\\' v3'" ) -seurat_obj2 <- get_seurat( - census = census, - organism = organism, - var_value_filter = gene_filter, - obs_value_filter = cell_filter, - obs_column_names = cell_columns -) +# Get the artifact containing the CELLxGENE Census TileDB-SOMA store +census_artifact <- cellxgene$Artifact$get("FYMewVq5twKMDXVy0001") +# Open the SOMACollection +soma_collection <- census_artifact$open() +# Slice the store to get a SOMADataFrame containing metadata for the cells of interest +cell_metadata <- soma_collection$get("census_data")$get("homo_sapiens")$obs$read(value_filter = value_filter) +# Concatenate the results to an arrow::Table +cell_metadata <- cell_metadata$concat() +# Convert to a data.frame +cell_metadata <- cell_metadata$to_data_frame() + +cell_metadata ``` # Save the results From 7b8acc345357738296515151cb244330575177ad Mon Sep 17 00:00:00 2001 From: Luke Zappia Date: Tue, 26 Nov 2024 12:57:27 +0100 Subject: [PATCH 6/9] Fix installing {tiledbsoma} in GHA --- .github/workflows/R-CMD-check.yaml | 2 +- .github/workflows/pkgdown.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index fe07141..c65b12f 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -65,7 +65,7 @@ jobs: - name: Install {tiledbsoma} if: runner.os == 'Linux' run: | - options(repos = c("https://chanzuckerberg.r-universe.dev", getOption(repos))) + options(repos = c("https://chanzuckerberg.r-universe.dev", getOption("repos"))) install.packages("tiledbsoma") shell: Rscript {0} diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index 8dbb8f7..778c7c1 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -43,7 +43,7 @@ jobs: - name: Install {tiledbsoma} run: | - options(repos = c("https://chanzuckerberg.r-universe.dev", getOption(repos))) + options(repos = c("https://chanzuckerberg.r-universe.dev", getOption("repos"))) install.packages("tiledbsoma") shell: Rscript {0} From 07ff95d4a7a3f5ef4e37125ec73545d37af3192f Mon Sep 17 00:00:00 2001 From: Luke Zappia Date: Tue, 26 Nov 2024 15:34:09 +0100 Subject: [PATCH 7/9] Update architecture vignette --- vignettes/architecture.qmd | 1 + 1 file changed, 1 insertion(+) diff --git a/vignettes/architecture.qmd b/vignettes/architecture.qmd index 205f3a7..613deac 100644 --- a/vignettes/architecture.qmd +++ b/vignettes/architecture.qmd @@ -417,6 +417,7 @@ classDiagram +...field value accessors... +cache(): String +load(): AnnData | DataFrame | ... + +open(): SOMACollection | SOMAExperiment +describe(): NULL } style Artifact fill:#ffe1c9 From bbaf11a0b9fe17e972797a8c65ec5f882cc54f9e Mon Sep 17 00:00:00 2001 From: Luke Zappia Date: Tue, 26 Nov 2024 15:38:15 +0100 Subject: [PATCH 8/9] Update development vignette --- vignettes/development.qmd | 3 +++ 1 file changed, 3 insertions(+) diff --git a/vignettes/development.qmd b/vignettes/development.qmd index a208475..5801deb 100644 --- a/vignettes/development.qmd +++ b/vignettes/development.qmd @@ -71,6 +71,8 @@ This document outlines the features of the **{laminr}** package and the roadmap * [x] **Delete artifacts**: Delete an existing artifact. * [ ] **Manage artifact metadata**: Add, update, and delete artifact metadata. * [ ] **Work with collections**: Create, manage, and query collections of artifacts. +* [ ] **Stream backed artifacts**: Connect to file-backed artifacts (`$open`). + - [x] `tiledbsoma`: Stream TileDB-SOMA objects ### Track notebooks & scripts @@ -141,6 +143,7 @@ A first version of the package that allows users to: * Implement data lineage visualization. * Introduce data curation features (validation, standardization, annotation). * Enhance support for bionty registries and ontology interactions. +* Connect to TileDB-SOMA artifacts. ### Future versions From 73e19683c7864454c096547c609529b8f86a7518 Mon Sep 17 00:00:00 2001 From: Luke Zappia Date: Tue, 26 Nov 2024 15:40:40 +0100 Subject: [PATCH 9/9] Update CHANGELOG --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 53ce999..2bd37ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +# laminr devel + +## NEW FUNCTIONALITY + +- Add a `open()` method to the `Artifact` class to connect to TileDB-SOMA stores (PR #117). + # laminr v0.2.0 This release adds support for creating new artifacts in a LaminDB instance.