Skip to content

Commit

Permalink
Anndata obsm varm3 (#125)
Browse files Browse the repository at this point in the history
* Start obsm/ varm

* InMemory + tests

* Add valdation

* Start roundtrip

* Start validation

* Style

* Get tests running

* Remove some TODOs

* obsp/ varp

* Don't add row/ col names in validation (still needs tests)

* tests obsp, varp validation

* Consolidate layer validation with aligned mapping validation

* Shorten lines

* Fix repeated trackstatus item

* add a helpful error message when duplicate trackstatus are found

* add docs, change trackstatus from wip to done

* try line length fix

* Manually 😭 fix linting issues

* Bump build

* import testthat?

---------

Co-authored-by: Robrecht Cannoodt <[email protected]>
  • Loading branch information
ivirshup and rcannood authored Sep 19, 2023
1 parent 9eb2ebd commit 41e7eb0
Show file tree
Hide file tree
Showing 12 changed files with 596 additions and 34 deletions.
2 changes: 1 addition & 1 deletion .lintr
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
linters: linters_with_defaults(
line_length_linter = line_length_linter(120L),
object_name_linter = object_name_linter(styles = c("snake_case", "symbols", "CamelCase"))
object_name_linter = object_name_linter(styles = c("snake_case", "symbols", "CamelCase", "SNAKE_CASE"))
)
104 changes: 86 additions & 18 deletions R/AbstractAnnData.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,26 @@ AbstractAnnData <- R6::R6Class("AbstractAnnData", # nolint
#' vector.
var_names = function(value) {
.abstract_function("ad$var_names")
},
#' @field obsm The obsm slot. Must be `NULL` or a named list with
#' with all elements having the same number of rows as `obs`.
obsm = function(value) {
.abstract_function("ad$obsm")
},
#' @field varm The varm slot. Must be `NULL` or a named list with
#' with all elements having the same number of rows as `var`.
varm = function(value) {
.abstract_function("ad$varm")
},
#' @field obsp The obsp slot. Must be `NULL` or a named list with
#' with all elements having the same number of rows and columns as `obs`.
obsp = function(value) {
.abstract_function("ad$obsp")
},
#' @field varp The varp slot. Must be `NULL` or a named list with
#' with all elements having the same number of rows and columns as `var`.
varp = function(value) {
.abstract_function("ad$varp")
}
),
public = list(
Expand Down Expand Up @@ -158,31 +178,79 @@ AbstractAnnData <- R6::R6Class("AbstractAnnData", # nolint
mat
},

# @description `.validate_layers()` checks for named lists and
# correct dimensions on elements.
# @param layers A named list of 0 or more matrix elements with
# dimensions consistent with `obs` and `var`.
.validate_layers = function(layers) {
if (is.null(layers)) {
return(layers)

# @description `.validate_aligned_array()` checks that dimensions are
# consistent with the anndata object.
# @param mat A matrix to validate
# @param label Must be `"X"` or `"layer[[...]]"` where `...` is
# the name of a layer.
# @param shape Expected dimensions of matrix
# @param expected_rownames
# @param excepted_colnames
.validate_aligned_array = function(mat, label, shape, expected_rownames = NULL, expected_colnames = NULL) {
mat_dims <- dim(mat)
for (i in seq_along(shape)) {
expected_dim <- shape[i]
found_dim <- mat_dims[i]
if (found_dim != expected_dim) {
stop("dim(", label, ")[", i, "] should have shape: ", expected_dim, ", found: ", found_dim, ".")
}
}
if (!is.null(expected_rownames) & !is.null(rownames(mat))) {
if (!identical(rownames(mat), expected_rownames)) {
stop("rownames(", label, ") should be the same as expected_rownames")
}
}
if (!is.null(rownames(mat))) {
warning(wrap_message(
"rownames(", label, ") should be NULL, removing them from the matrix"
))
rownames(mat) <- NULL
}
if (!is.null(expected_colnames) & !is.null(colnames(mat))) {
if (!identical(colnames(mat), expected_colnames)) {
stop("colnames(", label, ") should be the same as expected_colnames")
}
}
if (!is.null(colnames(mat))) {
warning(wrap_message(
"colnames(", label, ") should be NULL, removing them from the matrix"
))
colnames(mat) <- NULL
}

## layers and names
layer_names <- names(layers)
if (!is.list(layers) || is.null(layer_names)) {
stop("'layers' must must be a named list")
mat
},
# @description `.validate_aligned_mapping()` checks for named lists and
# correct dimensions on elements.
# @param collection A named list of 0 or more matrix elements with
# whose entries will be validated
# @param label The label of the collection, used for error messages
# @param shape Expected dimensions of arrays. Arrays may have more dimensions than specified here
# @param expected_rownames
# @param expected_colnames
.validate_aligned_mapping = function(collection, label, shape, expected_rownames = NULL, expected_colnames = NULL) {
if (is.null(collection)) {
return(collection)
}
if (any(!nzchar(layer_names))) {
stop("all 'layers' elements must have non-trivial names")

collection_names <- names(collection)
if (!is.list(collection) || ((length(collection) != 0) && is.null(collection_names))) {
stop(paste0(label, " must be a named list, was ", class(collection)))
}

## layer elements
for (layer in layer_names) {
layer_name <- paste0("layers[[", layer, "]]")
private$.validate_matrix(layers[[layer]], layer_name)
for (mtx_name in collection_names) {
collection_name <- paste0(label, "[['", mtx_name, "']]")
private$.validate_aligned_array(
collection[[mtx_name]],
collection_name,
shape = shape,
expected_rownames = expected_rownames,
expected_colnames = expected_colnames
)
}

layers
collection
},

# @description `.validate_obsvar_dataframe()` checks that the
Expand Down
24 changes: 22 additions & 2 deletions R/AnnData.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,18 @@
#' @param var Either `NULL` or a `data.frame` with columns containing
#' information about variables. If `NULL`, an `n_vars`×0 data frame will
#' automatically be generated.
#' @param obsm The obsm slot is used to store multi-dimensional annotation
#' arrays. It must be either `NULL` or a named list, where each element is a
#' matrix with `n_obs` rows and an arbitrary number of columns.
#' @param varm The varm slot is used to store multi-dimensional annotation
#' arrays. It must be either `NULL` or a named list, where each element is a
#' matrix with `n_vars` rows and an arbitrary number of columns.
#' @param obsp The obsp slot is used to store sparse multi-dimensional
#' annotation arrays. It must be either `NULL` or a named list, where each
#' element is a sparse matrix where each dimension has length `n_obs`.
#' @param varp The varp slot is used to store sparse multi-dimensional
#' annotation arrays. It must be either `NULL` or a named list, where each
#' element is a sparse matrix where each dimension has length `n_vars`.
#'
#' @export
#'
Expand All @@ -51,13 +63,21 @@ AnnData <- function(
X = NULL,
obs = NULL,
var = NULL,
layers = NULL) {
layers = NULL,
obsm = NULL,
varm = NULL,
obsp = NULL,
varp = NULL) {
InMemoryAnnData$new(
obs_names = obs_names,
var_names = var_names,
X = X,
obs = obs,
var = var,
layers = layers
layers = layers,
obsm = obsm,
varm = varm,
obsp = obsp,
varp = varp
)
}
132 changes: 127 additions & 5 deletions R/HDF5AnnData.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,11 @@ HDF5AnnData <- R6::R6Class("HDF5AnnData", # nolint
.n_obs = NULL,
.n_vars = NULL,
.obs_names = NULL,
.var_names = NULL
.var_names = NULL,
.obsm = NULL,
.varm = NULL,
.obsp = NULL,
.varp = NULL
),
active = list(
#' @field X The X slot
Expand All @@ -32,10 +36,87 @@ HDF5AnnData <- R6::R6Class("HDF5AnnData", # nolint
read_h5ad_element(private$.h5obj, "layers")
} else {
# trackstatus: class=HDF5AnnData, feature=set_layers, status=done
value <- private$.validate_layers(value)
value <- private$.validate_aligned_mapping(
value,
"layers",
c(self$n_obs(), self$n_vars()),
expected_rownames = rownames(self),
expected_colnames = colnames(self)
)
write_h5ad_element(value, private$.h5obj, "/layers")
}
},
#' @field obsm The obsm slot. Must be `NULL` or a named list with
#' with all elements having the same number of rows as `obs`.
obsm = function(value) {
if (missing(value)) {
# trackstatus: class=HDF5AnnData, feature=get_obsm, status=done
read_h5ad_element(private$.h5obj, "obsm")
} else {
# trackstatus: class=HDF5AnnData, feature=set_obsm, status=done
value <- private$.validate_aligned_mapping(
value,
"obsm",
c(self$n_obs()),
expected_rownames = rownames(self)
)
write_h5ad_element(value, private$.h5obj, "/obsm")
}
},
#' @field varm The varm slot. Must be `NULL` or a named list with
#' with all elements having the same number of rows as `var`.
varm = function(value) {
if (missing(value)) {
# trackstatus: class=HDF5AnnData, feature=get_varm, status=done
read_h5ad_element(private$.h5obj, "varm")
} else {
# trackstatus: class=HDF5AnnData, feature=set_varm, status=done
value <- private$.validate_aligned_mapping(
value,
"varm",
c(self$n_vars()),
expected_rownames = colnames(self)
)
write_h5ad_element(value, private$.h5obj, "/varm")
}
},
#' @field obsp The obsp slot. Must be `NULL` or a named list with
#' with all elements having the same number of rows and columns as `obs`.
obsp = function(value) {
if (missing(value)) {
# trackstatus: class=HDF5AnnData, feature=get_obsp, status=done
read_h5ad_element(private$.h5obj, "obsp")
} else {
# trackstatus: class=HDF5AnnData, feature=set_obsp, status=done
value <- private$.validate_aligned_mapping(
value,
"obsp",
c(self$n_obs(), self$n_obs()),
expected_rownames = rownames(self),
expected_colnames = rownames(self)
)
write_h5ad_element(value, private$.h5obj, "/obsp")
}
},
#' @field varp The varp slot. Must be `NULL` or a named list with
#' with all elements having the same number of rows and columns as `var`.
varp = function(value) {
if (missing(value)) {
# trackstatus: class=HDF5AnnData, feature=get_varp, status=done
read_h5ad_element(private$.h5obj, "varp")
} else {
# trackstatus: class=HDF5AnnData, feature=set_varp, status=done
value <- private$.validate_aligned_mapping(
value,
"varp",
c(self$n_vars(), self$n_vars()),
expected_rownames = colnames(self),
expected_colnames = colnames(self)
)
write_h5ad_element(value, private$.h5obj, "/varp")
}
},

#' @field obs The obs slot
obs = function(value) {
if (missing(value)) {
Expand Down Expand Up @@ -128,6 +209,18 @@ HDF5AnnData <- R6::R6Class("HDF5AnnData", # nolint
#' @param var Either `NULL` or a `data.frame` with columns containing
#' information about variables. If `NULL`, an `n_vars`×0 data frame will
#' automatically be generated.
#' @param obsm The obsm slot is used to store multi-dimensional annotation
#' arrays. It must be either `NULL` or a named list, where each element is a
#' matrix with `n_obs` rows and an arbitrary number of columns.
#' @param varm The varm slot is used to store multi-dimensional annotation
#' arrays. It must be either `NULL` or a named list, where each element is a
#' matrix with `n_vars` rows and an arbitrary number of columns.
#' @param obsp The obsp slot is used to store sparse multi-dimensional
#' annotation arrays. It must be either `NULL` or a named list, where each
#' element is a sparse matrix where each dimension has length `n_obs`.
#' @param varp The varp slot is used to store sparse multi-dimensional
#' annotation arrays. It must be either `NULL` or a named list, where each
#' element is a sparse matrix where each dimension has length `n_vars`.
#'
#' @details
#' The constructor creates a new HDF5 AnnData interface object. This can
Expand All @@ -136,8 +229,17 @@ HDF5AnnData <- R6::R6Class("HDF5AnnData", # nolint
#' must be specified. In both cases, any additional slots provided will be
#' set on the created object. This will cause data to be overwritten if the
#' file already exists.
initialize = function(file, obs_names = NULL, var_names = NULL, X = NULL,
obs = NULL, var = NULL, layers = NULL) {
initialize = function(file,
obs_names = NULL,
var_names = NULL,
X = NULL,
obs = NULL,
var = NULL,
layers = NULL,
obsm = NULL,
varm = NULL,
obsp = NULL,
varp = NULL) {
if (!requireNamespace("rhdf5", quietly = TRUE)) {
stop("The HDF5 interface requires the 'rhdf5' package to be installed")
}
Expand Down Expand Up @@ -200,6 +302,22 @@ HDF5AnnData <- R6::R6Class("HDF5AnnData", # nolint
if (!is.null(layers)) {
self$layers <- layers
}

if (!is.null(obsm)) {
self$obsm <- obsm
}

if (!is.null(varm)) {
self$varm <- varm
}

if (!is.null(obsp)) {
self$obsp <- obsp
}

if (!is.null(varp)) {
self$varp <- varp
}
},

#' @description Number of observations in the AnnData object
Expand Down Expand Up @@ -257,8 +375,12 @@ to_HDF5AnnData <- function(adata, file) { # nolint
X = adata$X,
obs = adata$obs,
var = adata$var,
obsm = adata$obsm,
varm = adata$varm,
obs_names = adata$obs_names,
var_names = adata$var_names,
layers = adata$layers
layers = adata$layers,
obsp = adata$obsp,
varp = adata$varp
)
}
Loading

0 comments on commit 41e7eb0

Please sign in to comment.