From 41e7eb0cbbb98d264a47334ea42ae06fb9430b97 Mon Sep 17 00:00:00 2001 From: Isaac Virshup Date: Tue, 19 Sep 2023 15:17:03 +0200 Subject: [PATCH] Anndata obsm varm3 (#125) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Start obsm/ varm * InMemory + tests * Add valdation * Start roundtrip * Start validation * Style * Get tests running * Remove some TODOs * obsp/ varp * Don't add row/ col names in validation (still needs tests) * tests obsp, varp validation * Consolidate layer validation with aligned mapping validation * Shorten lines * Fix repeated trackstatus item * add a helpful error message when duplicate trackstatus are found * add docs, change trackstatus from wip to done * try line length fix * Manually 😭 fix linting issues * Bump build * import testthat? --------- Co-authored-by: Robrecht Cannoodt --- .lintr | 2 +- R/AbstractAnnData.R | 104 ++++++++++++++++---- R/AnnData.R | 24 ++++- R/HDF5AnnData.R | 132 +++++++++++++++++++++++++- R/InMemoryAnnData.R | 117 ++++++++++++++++++++++- man/AbstractAnnData.Rd | 12 +++ man/AnnData.Rd | 22 ++++- man/HDF5AnnData.Rd | 34 ++++++- man/InMemoryAnnData.Rd | 34 ++++++- tests/testthat/test-HDF5AnnData.R | 57 +++++++++++ tests/testthat/test-InMemoryAnnData.R | 22 ++++- tests/testthat/test-roundtrip.R | 70 ++++++++++++++ 12 files changed, 596 insertions(+), 34 deletions(-) create mode 100644 tests/testthat/test-roundtrip.R diff --git a/.lintr b/.lintr index 93f199ec..7497bbf0 100644 --- a/.lintr +++ b/.lintr @@ -1,4 +1,4 @@ linters: linters_with_defaults( line_length_linter = line_length_linter(120L), - object_name_linter = object_name_linter(styles = c("snake_case", "symbols", "CamelCase")) + object_name_linter = object_name_linter(styles = c("snake_case", "symbols", "CamelCase", "SNAKE_CASE")) ) diff --git a/R/AbstractAnnData.R b/R/AbstractAnnData.R index 4ce69690..754b5786 100644 --- a/R/AbstractAnnData.R +++ b/R/AbstractAnnData.R @@ -49,6 +49,26 @@ AbstractAnnData <- R6::R6Class("AbstractAnnData", # nolint #' vector. var_names = function(value) { .abstract_function("ad$var_names") + }, + #' @field obsm The obsm slot. Must be `NULL` or a named list with + #' with all elements having the same number of rows as `obs`. + obsm = function(value) { + .abstract_function("ad$obsm") + }, + #' @field varm The varm slot. Must be `NULL` or a named list with + #' with all elements having the same number of rows as `var`. + varm = function(value) { + .abstract_function("ad$varm") + }, + #' @field obsp The obsp slot. Must be `NULL` or a named list with + #' with all elements having the same number of rows and columns as `obs`. + obsp = function(value) { + .abstract_function("ad$obsp") + }, + #' @field varp The varp slot. Must be `NULL` or a named list with + #' with all elements having the same number of rows and columns as `var`. + varp = function(value) { + .abstract_function("ad$varp") } ), public = list( @@ -158,31 +178,79 @@ AbstractAnnData <- R6::R6Class("AbstractAnnData", # nolint mat }, - # @description `.validate_layers()` checks for named lists and - # correct dimensions on elements. - # @param layers A named list of 0 or more matrix elements with - # dimensions consistent with `obs` and `var`. - .validate_layers = function(layers) { - if (is.null(layers)) { - return(layers) + + # @description `.validate_aligned_array()` checks that dimensions are + # consistent with the anndata object. + # @param mat A matrix to validate + # @param label Must be `"X"` or `"layer[[...]]"` where `...` is + # the name of a layer. + # @param shape Expected dimensions of matrix + # @param expected_rownames + # @param excepted_colnames + .validate_aligned_array = function(mat, label, shape, expected_rownames = NULL, expected_colnames = NULL) { + mat_dims <- dim(mat) + for (i in seq_along(shape)) { + expected_dim <- shape[i] + found_dim <- mat_dims[i] + if (found_dim != expected_dim) { + stop("dim(", label, ")[", i, "] should have shape: ", expected_dim, ", found: ", found_dim, ".") + } + } + if (!is.null(expected_rownames) & !is.null(rownames(mat))) { + if (!identical(rownames(mat), expected_rownames)) { + stop("rownames(", label, ") should be the same as expected_rownames") + } + } + if (!is.null(rownames(mat))) { + warning(wrap_message( + "rownames(", label, ") should be NULL, removing them from the matrix" + )) + rownames(mat) <- NULL + } + if (!is.null(expected_colnames) & !is.null(colnames(mat))) { + if (!identical(colnames(mat), expected_colnames)) { + stop("colnames(", label, ") should be the same as expected_colnames") + } + } + if (!is.null(colnames(mat))) { + warning(wrap_message( + "colnames(", label, ") should be NULL, removing them from the matrix" + )) + colnames(mat) <- NULL } - ## layers and names - layer_names <- names(layers) - if (!is.list(layers) || is.null(layer_names)) { - stop("'layers' must must be a named list") + mat + }, + # @description `.validate_aligned_mapping()` checks for named lists and + # correct dimensions on elements. + # @param collection A named list of 0 or more matrix elements with + # whose entries will be validated + # @param label The label of the collection, used for error messages + # @param shape Expected dimensions of arrays. Arrays may have more dimensions than specified here + # @param expected_rownames + # @param expected_colnames + .validate_aligned_mapping = function(collection, label, shape, expected_rownames = NULL, expected_colnames = NULL) { + if (is.null(collection)) { + return(collection) } - if (any(!nzchar(layer_names))) { - stop("all 'layers' elements must have non-trivial names") + + collection_names <- names(collection) + if (!is.list(collection) || ((length(collection) != 0) && is.null(collection_names))) { + stop(paste0(label, " must be a named list, was ", class(collection))) } - ## layer elements - for (layer in layer_names) { - layer_name <- paste0("layers[[", layer, "]]") - private$.validate_matrix(layers[[layer]], layer_name) + for (mtx_name in collection_names) { + collection_name <- paste0(label, "[['", mtx_name, "']]") + private$.validate_aligned_array( + collection[[mtx_name]], + collection_name, + shape = shape, + expected_rownames = expected_rownames, + expected_colnames = expected_colnames + ) } - layers + collection }, # @description `.validate_obsvar_dataframe()` checks that the diff --git a/R/AnnData.R b/R/AnnData.R index 65d08a37..a09fa155 100644 --- a/R/AnnData.R +++ b/R/AnnData.R @@ -27,6 +27,18 @@ #' @param var Either `NULL` or a `data.frame` with columns containing #' information about variables. If `NULL`, an `n_vars`×0 data frame will #' automatically be generated. +#' @param obsm The obsm slot is used to store multi-dimensional annotation +#' arrays. It must be either `NULL` or a named list, where each element is a +#' matrix with `n_obs` rows and an arbitrary number of columns. +#' @param varm The varm slot is used to store multi-dimensional annotation +#' arrays. It must be either `NULL` or a named list, where each element is a +#' matrix with `n_vars` rows and an arbitrary number of columns. +#' @param obsp The obsp slot is used to store sparse multi-dimensional +#' annotation arrays. It must be either `NULL` or a named list, where each +#' element is a sparse matrix where each dimension has length `n_obs`. +#' @param varp The varp slot is used to store sparse multi-dimensional +#' annotation arrays. It must be either `NULL` or a named list, where each +#' element is a sparse matrix where each dimension has length `n_vars`. #' #' @export #' @@ -51,13 +63,21 @@ AnnData <- function( X = NULL, obs = NULL, var = NULL, - layers = NULL) { + layers = NULL, + obsm = NULL, + varm = NULL, + obsp = NULL, + varp = NULL) { InMemoryAnnData$new( obs_names = obs_names, var_names = var_names, X = X, obs = obs, var = var, - layers = layers + layers = layers, + obsm = obsm, + varm = varm, + obsp = obsp, + varp = varp ) } diff --git a/R/HDF5AnnData.R b/R/HDF5AnnData.R index b34bde7b..9703716d 100644 --- a/R/HDF5AnnData.R +++ b/R/HDF5AnnData.R @@ -9,7 +9,11 @@ HDF5AnnData <- R6::R6Class("HDF5AnnData", # nolint .n_obs = NULL, .n_vars = NULL, .obs_names = NULL, - .var_names = NULL + .var_names = NULL, + .obsm = NULL, + .varm = NULL, + .obsp = NULL, + .varp = NULL ), active = list( #' @field X The X slot @@ -32,10 +36,87 @@ HDF5AnnData <- R6::R6Class("HDF5AnnData", # nolint read_h5ad_element(private$.h5obj, "layers") } else { # trackstatus: class=HDF5AnnData, feature=set_layers, status=done - value <- private$.validate_layers(value) + value <- private$.validate_aligned_mapping( + value, + "layers", + c(self$n_obs(), self$n_vars()), + expected_rownames = rownames(self), + expected_colnames = colnames(self) + ) write_h5ad_element(value, private$.h5obj, "/layers") } }, + #' @field obsm The obsm slot. Must be `NULL` or a named list with + #' with all elements having the same number of rows as `obs`. + obsm = function(value) { + if (missing(value)) { + # trackstatus: class=HDF5AnnData, feature=get_obsm, status=done + read_h5ad_element(private$.h5obj, "obsm") + } else { + # trackstatus: class=HDF5AnnData, feature=set_obsm, status=done + value <- private$.validate_aligned_mapping( + value, + "obsm", + c(self$n_obs()), + expected_rownames = rownames(self) + ) + write_h5ad_element(value, private$.h5obj, "/obsm") + } + }, + #' @field varm The varm slot. Must be `NULL` or a named list with + #' with all elements having the same number of rows as `var`. + varm = function(value) { + if (missing(value)) { + # trackstatus: class=HDF5AnnData, feature=get_varm, status=done + read_h5ad_element(private$.h5obj, "varm") + } else { + # trackstatus: class=HDF5AnnData, feature=set_varm, status=done + value <- private$.validate_aligned_mapping( + value, + "varm", + c(self$n_vars()), + expected_rownames = colnames(self) + ) + write_h5ad_element(value, private$.h5obj, "/varm") + } + }, + #' @field obsp The obsp slot. Must be `NULL` or a named list with + #' with all elements having the same number of rows and columns as `obs`. + obsp = function(value) { + if (missing(value)) { + # trackstatus: class=HDF5AnnData, feature=get_obsp, status=done + read_h5ad_element(private$.h5obj, "obsp") + } else { + # trackstatus: class=HDF5AnnData, feature=set_obsp, status=done + value <- private$.validate_aligned_mapping( + value, + "obsp", + c(self$n_obs(), self$n_obs()), + expected_rownames = rownames(self), + expected_colnames = rownames(self) + ) + write_h5ad_element(value, private$.h5obj, "/obsp") + } + }, + #' @field varp The varp slot. Must be `NULL` or a named list with + #' with all elements having the same number of rows and columns as `var`. + varp = function(value) { + if (missing(value)) { + # trackstatus: class=HDF5AnnData, feature=get_varp, status=done + read_h5ad_element(private$.h5obj, "varp") + } else { + # trackstatus: class=HDF5AnnData, feature=set_varp, status=done + value <- private$.validate_aligned_mapping( + value, + "varp", + c(self$n_vars(), self$n_vars()), + expected_rownames = colnames(self), + expected_colnames = colnames(self) + ) + write_h5ad_element(value, private$.h5obj, "/varp") + } + }, + #' @field obs The obs slot obs = function(value) { if (missing(value)) { @@ -128,6 +209,18 @@ HDF5AnnData <- R6::R6Class("HDF5AnnData", # nolint #' @param var Either `NULL` or a `data.frame` with columns containing #' information about variables. If `NULL`, an `n_vars`×0 data frame will #' automatically be generated. + #' @param obsm The obsm slot is used to store multi-dimensional annotation + #' arrays. It must be either `NULL` or a named list, where each element is a + #' matrix with `n_obs` rows and an arbitrary number of columns. + #' @param varm The varm slot is used to store multi-dimensional annotation + #' arrays. It must be either `NULL` or a named list, where each element is a + #' matrix with `n_vars` rows and an arbitrary number of columns. + #' @param obsp The obsp slot is used to store sparse multi-dimensional + #' annotation arrays. It must be either `NULL` or a named list, where each + #' element is a sparse matrix where each dimension has length `n_obs`. + #' @param varp The varp slot is used to store sparse multi-dimensional + #' annotation arrays. It must be either `NULL` or a named list, where each + #' element is a sparse matrix where each dimension has length `n_vars`. #' #' @details #' The constructor creates a new HDF5 AnnData interface object. This can @@ -136,8 +229,17 @@ HDF5AnnData <- R6::R6Class("HDF5AnnData", # nolint #' must be specified. In both cases, any additional slots provided will be #' set on the created object. This will cause data to be overwritten if the #' file already exists. - initialize = function(file, obs_names = NULL, var_names = NULL, X = NULL, - obs = NULL, var = NULL, layers = NULL) { + initialize = function(file, + obs_names = NULL, + var_names = NULL, + X = NULL, + obs = NULL, + var = NULL, + layers = NULL, + obsm = NULL, + varm = NULL, + obsp = NULL, + varp = NULL) { if (!requireNamespace("rhdf5", quietly = TRUE)) { stop("The HDF5 interface requires the 'rhdf5' package to be installed") } @@ -200,6 +302,22 @@ HDF5AnnData <- R6::R6Class("HDF5AnnData", # nolint if (!is.null(layers)) { self$layers <- layers } + + if (!is.null(obsm)) { + self$obsm <- obsm + } + + if (!is.null(varm)) { + self$varm <- varm + } + + if (!is.null(obsp)) { + self$obsp <- obsp + } + + if (!is.null(varp)) { + self$varp <- varp + } }, #' @description Number of observations in the AnnData object @@ -257,8 +375,12 @@ to_HDF5AnnData <- function(adata, file) { # nolint X = adata$X, obs = adata$obs, var = adata$var, + obsm = adata$obsm, + varm = adata$varm, obs_names = adata$obs_names, var_names = adata$var_names, - layers = adata$layers + layers = adata$layers, + obsp = adata$obsp, + varp = adata$varp ) } diff --git a/R/InMemoryAnnData.R b/R/InMemoryAnnData.R index a45ed29a..47b098e1 100644 --- a/R/InMemoryAnnData.R +++ b/R/InMemoryAnnData.R @@ -35,7 +35,11 @@ InMemoryAnnData <- R6::R6Class("InMemoryAnnData", # nolint .obs = NULL, .var = NULL, .obs_names = NULL, - .var_names = NULL + .var_names = NULL, + .obsm = NULL, + .varm = NULL, + .obsp = NULL, + .varp = NULL ), active = list( #' @field X NULL or an observation x variable matrix (without @@ -59,7 +63,13 @@ InMemoryAnnData <- R6::R6Class("InMemoryAnnData", # nolint private$.layers } else { # trackstatus: class=InMemoryAnnData, feature=set_layers, status=done - private$.layers <- private$.validate_layers(value) + private$.layers <- private$.validate_aligned_mapping( + value, + "layers", + c(self$n_obs(), self$n_vars()), + expected_rownames = rownames(self), + expected_colnames = colnames(self) + ) self } }, @@ -118,6 +128,76 @@ InMemoryAnnData <- R6::R6Class("InMemoryAnnData", # nolint private$.var_names <- private$.validate_obsvar_names(value, "var") self } + }, + #' @field obsm The obsm slot. Must be `NULL` or a named list with + #' with all elements having the same number of rows as `obs`. + obsm = function(value) { + if (missing(value)) { + # trackstatus: class=InMemoryAnnData, feature=get_obsm, status=done + private$.obsm + } else { + # trackstatus: class=InMemoryAnnData, feature=set_obsm, status=done + private$.obsm <- private$.validate_aligned_mapping( + value, + "obsm", + c(self$n_obs()), + expected_rownames = rownames(self) + ) + self + } + }, + #' @field varm The varm slot. Must be `NULL` or a named list with + #' with all elements having the same number of rows as `var`. + varm = function(value) { + if (missing(value)) { + # trackstatus: class=InMemoryAnnData, feature=get_varm, status=done + private$.varm + } else { + # trackstatus: class=InMemoryAnnData, feature=set_varm, status=done + private$.varm <- private$.validate_aligned_mapping( + value, + "varm", + c(self$n_vars()), + expected_rownames = colnames(self) + ) + self + } + }, + #' @field obsp The obsp slot. Must be `NULL` or a named list with + #' with all elements having the same number of rows and columns as `obs`. + obsp = function(value) { + if (missing(value)) { + # trackstatus: class=InMemoryAnnData, feature=get_obsp, status=done + private$.obsp + } else { + # trackstatus: class=InMemoryAnnData, feature=set_obsp, status=done + private$.obsp <- private$.validate_aligned_mapping( + value, + "obsp", + c(self$n_obs(), self$n_obs()), + expected_rownames = rownames(self), + expected_colnames = rownames(self) + ) + self + } + }, + #' @field varp The varp slot. Must be `NULL` or a named list with + #' with all elements having the same number of rows and columns as `var`. + varp = function(value) { + if (missing(value)) { + # trackstatus: class=InMemoryAnnData, feature=get_varp, status=done + private$.varp + } else { + # trackstatus: class=InMemoryAnnData, feature=set_varp, status=done + private$.varp <- private$.validate_aligned_mapping( + value, + "varp", + c(self$n_vars(), self$n_vars()), + expected_rownames = colnames(self), + expected_colnames = colnames(self) + ) + self + } } ), public = list( @@ -144,7 +224,28 @@ InMemoryAnnData <- R6::R6Class("InMemoryAnnData", # nolint #' @param var Either `NULL` or a `data.frame` with columns containing information #' about variables. If `NULL`, an `n_vars`×0 data frame will automatically #' be generated. - initialize = function(obs_names, var_names, X = NULL, obs = NULL, var = NULL, layers = NULL) { + #' @param obsm The obsm slot is used to store multi-dimensional annotation + #' arrays. It must be either `NULL` or a named list, where each element is a + #' matrix with `n_obs` rows and an arbitrary number of columns. + #' @param varm The varm slot is used to store multi-dimensional annotation + #' arrays. It must be either `NULL` or a named list, where each element is a + #' matrix with `n_vars` rows and an arbitrary number of columns. + #' @param obsp The obsp slot is used to store sparse multi-dimensional + #' annotation arrays. It must be either `NULL` or a named list, where each + #' element is a sparse matrix where each dimension has length `n_obs`. + #' @param varp The varp slot is used to store sparse multi-dimensional + #' annotation arrays. It must be either `NULL` or a named list, where each + #' element is a sparse matrix where each dimension has length `n_vars`. + initialize = function(obs_names, + var_names, + X = NULL, + obs = NULL, + var = NULL, + layers = NULL, + obsm = NULL, + varm = NULL, + obsp = NULL, + varp = NULL) { # write obs and var first, because these are used by other validators self$obs_names <- obs_names self$var_names <- var_names @@ -154,6 +255,10 @@ InMemoryAnnData <- R6::R6Class("InMemoryAnnData", # nolint self$var <- var self$X <- X self$layers <- layers + self$obsm <- obsm + self$varm <- varm + self$obsp <- obsp + self$varp <- varp } ) ) @@ -193,6 +298,10 @@ to_InMemoryAnnData <- function(adata) { # nolint var = adata$var, obs_names = adata$obs_names, var_names = adata$var_names, - layers = adata$layers + layers = adata$layers, + obsm = adata$obsm, + varm = adata$varm, + obsp = adata$obsp, + varp = adata$varp ) } diff --git a/man/AbstractAnnData.Rd b/man/AbstractAnnData.Rd index dde775c1..8ac448b8 100644 --- a/man/AbstractAnnData.Rd +++ b/man/AbstractAnnData.Rd @@ -36,6 +36,18 @@ used to identify each row of \code{var} and to act as an index into the variable dimension of the AnnData object. For compatibility with \emph{R} representations, \code{var_names} should be a character vector.} + +\item{\code{obsm}}{The obsm slot. Must be \code{NULL} or a named list with +with all elements having the same number of rows as \code{obs}.} + +\item{\code{varm}}{The varm slot. Must be \code{NULL} or a named list with +with all elements having the same number of rows as \code{var}.} + +\item{\code{obsp}}{The obsp slot. Must be \code{NULL} or a named list with +with all elements having the same number of rows and columns as \code{obs}.} + +\item{\code{varp}}{The varp slot. Must be \code{NULL} or a named list with +with all elements having the same number of rows and columns as \code{var}.} } \if{html}{\out{}} } diff --git a/man/AnnData.Rd b/man/AnnData.Rd index 662627ac..3d9ef5da 100644 --- a/man/AnnData.Rd +++ b/man/AnnData.Rd @@ -10,7 +10,11 @@ AnnData( X = NULL, obs = NULL, var = NULL, - layers = NULL + layers = NULL, + obsm = NULL, + varm = NULL, + obsp = NULL, + varp = NULL ) } \arguments{ @@ -38,6 +42,22 @@ automatically be generated.} \item{layers}{Either \code{NULL} or a named list, where each element is an observation × variable matrix with dimensions consistent with \code{obs} and \code{var}.} + +\item{obsm}{The obsm slot is used to store multi-dimensional annotation +arrays. It must be either \code{NULL} or a named list, where each element is a +matrix with \code{n_obs} rows and an arbitrary number of columns.} + +\item{varm}{The varm slot is used to store multi-dimensional annotation +arrays. It must be either \code{NULL} or a named list, where each element is a +matrix with \code{n_vars} rows and an arbitrary number of columns.} + +\item{obsp}{The obsp slot is used to store sparse multi-dimensional +annotation arrays. It must be either \code{NULL} or a named list, where each +element is a sparse matrix where each dimension has length \code{n_obs}.} + +\item{varp}{The varp slot is used to store sparse multi-dimensional +annotation arrays. It must be either \code{NULL} or a named list, where each +element is a sparse matrix where each dimension has length \code{n_vars}.} } \description{ This class is used to represent an AnnData object in memory. diff --git a/man/HDF5AnnData.Rd b/man/HDF5AnnData.Rd index 2870e6a8..27c57934 100644 --- a/man/HDF5AnnData.Rd +++ b/man/HDF5AnnData.Rd @@ -18,6 +18,18 @@ Implementation of an in memory AnnData object. with with all elements having the dimensions consistent with \code{obs} and \code{var}.} +\item{\code{obsm}}{The obsm slot. Must be \code{NULL} or a named list with +with all elements having the same number of rows as \code{obs}.} + +\item{\code{varm}}{The varm slot. Must be \code{NULL} or a named list with +with all elements having the same number of rows as \code{var}.} + +\item{\code{obsp}}{The obsp slot. Must be \code{NULL} or a named list with +with all elements having the same number of rows and columns as \code{obs}.} + +\item{\code{varp}}{The varp slot. Must be \code{NULL} or a named list with +with all elements having the same number of rows and columns as \code{var}.} + \item{\code{obs}}{The obs slot} \item{\code{var}}{The var slot} @@ -65,7 +77,11 @@ HDF5AnnData constructor X = NULL, obs = NULL, var = NULL, - layers = NULL + layers = NULL, + obsm = NULL, + varm = NULL, + obsp = NULL, + varp = NULL )}\if{html}{\out{}} } @@ -99,6 +115,22 @@ automatically be generated.} \item{\code{layers}}{Either \code{NULL} or a named list, where each element is an observation × variable matrix with dimensions consistent with \code{obs} and \code{var}.} + +\item{\code{obsm}}{The obsm slot is used to store multi-dimensional annotation +arrays. It must be either \code{NULL} or a named list, where each element is a +matrix with \code{n_obs} rows and an arbitrary number of columns.} + +\item{\code{varm}}{The varm slot is used to store multi-dimensional annotation +arrays. It must be either \code{NULL} or a named list, where each element is a +matrix with \code{n_vars} rows and an arbitrary number of columns.} + +\item{\code{obsp}}{The obsp slot is used to store sparse multi-dimensional +annotation arrays. It must be either \code{NULL} or a named list, where each +element is a sparse matrix where each dimension has length \code{n_obs}.} + +\item{\code{varp}}{The varp slot is used to store sparse multi-dimensional +annotation arrays. It must be either \code{NULL} or a named list, where each +element is a sparse matrix where each dimension has length \code{n_vars}.} } \if{html}{\out{}} } diff --git a/man/InMemoryAnnData.Rd b/man/InMemoryAnnData.Rd index c4392003..1a82e68e 100644 --- a/man/InMemoryAnnData.Rd +++ b/man/InMemoryAnnData.Rd @@ -60,6 +60,18 @@ used to identify each row of \code{var} and to act as an index into the variable dimension of the AnnData object. For compatibility with \emph{R} representations, \code{var_names} should be a character vector.} + +\item{\code{obsm}}{The obsm slot. Must be \code{NULL} or a named list with +with all elements having the same number of rows as \code{obs}.} + +\item{\code{varm}}{The varm slot. Must be \code{NULL} or a named list with +with all elements having the same number of rows as \code{var}.} + +\item{\code{obsp}}{The obsp slot. Must be \code{NULL} or a named list with +with all elements having the same number of rows and columns as \code{obs}.} + +\item{\code{varp}}{The varp slot. Must be \code{NULL} or a named list with +with all elements having the same number of rows and columns as \code{var}.} } \if{html}{\out{}} } @@ -100,7 +112,11 @@ Inherits from AbstractAnnData. X = NULL, obs = NULL, var = NULL, - layers = NULL + layers = NULL, + obsm = NULL, + varm = NULL, + obsp = NULL, + varp = NULL )}\if{html}{\out{}} } @@ -133,6 +149,22 @@ be generated.} \item{\code{layers}}{Either \code{NULL} or a named list, where each element is an observation × variable matrix with dimensions consistent with \code{obs} and \code{var}.} + +\item{\code{obsm}}{The obsm slot is used to store multi-dimensional annotation +arrays. It must be either \code{NULL} or a named list, where each element is a +matrix with \code{n_obs} rows and an arbitrary number of columns.} + +\item{\code{varm}}{The varm slot is used to store multi-dimensional annotation +arrays. It must be either \code{NULL} or a named list, where each element is a +matrix with \code{n_vars} rows and an arbitrary number of columns.} + +\item{\code{obsp}}{The obsp slot is used to store sparse multi-dimensional +annotation arrays. It must be either \code{NULL} or a named list, where each +element is a sparse matrix where each dimension has length \code{n_obs}.} + +\item{\code{varp}}{The varp slot is used to store sparse multi-dimensional +annotation arrays. It must be either \code{NULL} or a named list, where each +element is a sparse matrix where each dimension has length \code{n_vars}.} } \if{html}{\out{}} } diff --git a/tests/testthat/test-HDF5AnnData.R b/tests/testthat/test-HDF5AnnData.R index 240dd2f6..8819616f 100644 --- a/tests/testthat/test-HDF5AnnData.R +++ b/tests/testthat/test-HDF5AnnData.R @@ -27,6 +27,63 @@ test_that("reading layers works", { ) }) +test_that("reading obsm works", { + obsm <- adata$obsm + expect_true(is.list(obsm), "list") + expect_equal( + names(obsm), + c("X_pca", "X_umap") + ) +}) + +test_that("reading varm works", { + varm <- adata$varm + expect_true(is.list(varm), "list") + expect_equal( + names(varm), + c("PCs") + ) +}) + +test_that("obsm/ varm validation", { + N_OBS <- 5 + N_VAR <- 3 + + mtx <- matrix( + 0, + N_OBS, + N_VAR + ) + + adata <- AnnData( + X = mtx, + obs_names = as.character(1:N_OBS), + var_names = as.character(1:N_VAR) + ) + + adata$obsm <- list(PCA = matrix(0, N_OBS, 4)) + adata$varm <- list(PCs = matrix(0, N_VAR, 4)) + + expect_error(adata$obsm <- list(PCA = matrix(0, 4, 4))) + expect_error(adata$varm <- list(PCs = matrix(0, 4, 4))) +}) + +test_that("obsp/ varp validation", { + N_OBS <- 5 + N_VAR <- 3 + + adata <- AnnData( + obs_names = as.character(1:N_OBS), + var_names = as.character(1:N_VAR) + ) + + adata$obsp <- list(graph1 = matrix(0, N_OBS, N_OBS)) + adata$varp <- list(graph1 = matrix(0, N_VAR, N_VAR)) + + expect_error(adata$obsp <- list(graph1 = matrix(0, 4, 4))) + expect_error(adata$varp <- list(graph1 = matrix(0, 4, 4))) +}) + # trackstatus: class=HDF5AnnData, feature=test_get_obs, status=done test_that("reading obs works", { obs <- adata$obs diff --git a/tests/testthat/test-InMemoryAnnData.R b/tests/testthat/test-InMemoryAnnData.R index 75dc48ff..37524552 100644 --- a/tests/testthat/test-InMemoryAnnData.R +++ b/tests/testthat/test-InMemoryAnnData.R @@ -1,5 +1,8 @@ dummy <- dummy_data(10L, 20L) +file <- system.file("extdata", "example.h5ad", package = "anndataR") +adata <- read_h5ad(file, to = "InMemoryAnnData") + # GETTERS ---------------------------------------------------------------- test_that("create inmemory anndata", { ad <- AnnData( @@ -132,7 +135,6 @@ test_that("'layers' works", { layers_test(obs_names, var_names, list(A = matrix(0, 3, 5), B = matrix(1, 3, 5))) ## must be a named list - expect_error(AnnData(obs_names = obs_names, var_names = var_names, layers = list())) layers <- list(matrix(0, 3, 5)) expect_error(AnnData(obs_names = obs_names, var_names = var_names, layers = layers)) ## non-trivial names @@ -145,6 +147,24 @@ test_that("'layers' works", { expect_error(AnnData(obs_names = obs_names, var_names = var_names, layers = layers)) }) +test_that("reading obsm works", { + obsm <- adata$obsm + expect_true(is.list(obsm), "list") + expect_equal( + names(obsm), + c("X_pca", "X_umap") + ) +}) + +test_that("reading varm works", { + varm <- adata$varm + expect_true(is.list(varm), "list") + expect_equal( + names(varm), + c("PCs") + ) +}) + test_that("*_keys() works", { obs_names <- var_names <- character(0) ad <- AnnData(obs_names = obs_names, var_names = var_names) diff --git a/tests/testthat/test-roundtrip.R b/tests/testthat/test-roundtrip.R new file mode 100644 index 00000000..48fba1d6 --- /dev/null +++ b/tests/testthat/test-roundtrip.R @@ -0,0 +1,70 @@ +library(testthat) + +h5ad_file <- tempfile(pattern = "hdf5_write_", fileext = ".h5ad") + +base_file <- system.file("extdata", "example.h5ad", package = "anndataR") + + +gen_adata <- function(type) { + library(Matrix) + N_OBS <- 10 + N_VAR <- 15 + obs_names <- paste0("obs_", 1:N_OBS) + var_names <- paste0("var_", 1:N_VAR) + adata <- AnnData( + X = rsparsematrix(N_OBS, N_VAR, 0.1), + obs_names = obs_names, + var_names = var_names, + layers = list( + dense = matrix(1:15, N_OBS, N_VAR), + sparse = rsparsematrix(N_OBS, N_VAR, 0.1) + ), + obsm = list( + dense = matrix(1:15, N_OBS, 5), + sparse = rsparsematrix(N_OBS, 5, 0.1) + ), + varm = list( + dense = matrix(1:15, N_VAR, 5), + sparse = rsparsematrix(N_VAR, 5, 0.1) + ), + obsp = list( + dense = matrix(1:15, N_OBS, N_OBS), + sparse = rsparsematrix(N_OBS, N_OBS, 0.1) + ), + varp = list( + dense = matrix(1:15, N_VAR, N_VAR), + sparse = rsparsematrix(N_VAR, N_VAR, 0.1) + ) + ) + if (type == "HDF5AnnData") { + tempfile(pattern = "hdf5_write_", fileext = ".h5ad") + write_h5ad(adata, h5ad_file) + read_h5ad(h5ad_file, to = type) + } else if (type == "InMemoryAnnData") { + adata + } else { + stop(paste0("Unknown type: ", type)) + } +} + +check_round_trip <- function(expected, type) { + h5ad_file <- tempfile(pattern = "hdf5_write_", fileext = ".h5ad") + write_h5ad(expected, h5ad_file) + actual <- read_h5ad(h5ad_file, to = type) + + expect_equal(actual, expected) +} + +check_round_trip_example <- function(type) { + check_round_trip(read_h5ad(base_file, to = type), type) +} + +for (typ in c("HDF5AnnData", "InMemoryAnnData")) { + test_that(paste("round trip w/ example data for", typ), { + check_round_trip_example(typ) + }) + test_that(paste("round trip w/ generated data for", typ), { + adata <- gen_adata(typ) + check_round_trip(adata, typ) + }) +}