Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add H5AD read/write helpers #88

Merged
merged 10 commits into from
Sep 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -47,5 +47,3 @@ Config/testthat/edition: 3
Encoding: UTF-8
Roxygen: list(markdown = TRUE, r6 = TRUE)
RoxygenNote: 7.2.3
Remotes:
rstudio/reticulate
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
# Generated by roxygen2: do not edit by hand

export(InMemoryAnnData)
export(dummy_data)
export(from_Seurat)
export(from_SingleCellExperiment)
export(read_h5ad)
export(to_HDF5AnnData)
export(to_InMemory)
export(to_Seurat)
export(to_SingleCellExperiment)
export(write_h5ad)
importFrom(Matrix,as.matrix)
importFrom(Matrix,sparseMatrix)
importFrom(Matrix,t)
Expand Down
88 changes: 65 additions & 23 deletions R/HDF5-read.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ read_h5ad_element <- function(file, name, type = NULL, version = NULL, ...) {
"' for element '", name, "'"
)
)

read_fun(file = file, name = name, version = version, ...)
}

Expand Down Expand Up @@ -183,18 +184,7 @@ read_h5ad_rec_array <- function(file, name, version = "0.2.0") {
#'
#' @return a boolean vector
read_h5ad_nullable_boolean <- function(file, name, version = "0.1.0") {
version <- match.arg(version)

element <- rhdf5::h5read(file, name)

# Get mask and convert to Boolean
mask <- as.logical(element[["mask"]])

# Get values and set missing
element <- as.logical(element[["values"]])
element[mask] <- NA

return(element)
as.logical(read_h5ad_nullable(file, name, version))
}

#' Read H5AD nullable integer
Expand All @@ -207,16 +197,32 @@ read_h5ad_nullable_boolean <- function(file, name, version = "0.1.0") {
#'
#' @return an integer vector
read_h5ad_nullable_integer <- function(file, name, version = "0.1.0") {
as.integer(read_h5ad_nullable(file, name, version))
}

#' Read H5AD nullable
#'
#' Read a nullable vector (boolean or integer) from an H5AD file
#'
#' @param file Path to a H5AD file or an open H5AD handle
#' @param name Name of the element within the H5AD file
#' @param version Encoding version of the element to read
#'
#' @return a nullable vector
read_h5ad_nullable <- function(file, name, version = "0.1.0") {
version <- match.arg(version)

element <- rhdf5::h5read(file, name)

# Get mask and convert to Boolean
mask <- as.logical(element[["mask"]])

# Get values and set missing
element <- as.integer(element[["values"]])
element[mask] <- NA_integer_
# Some versions of rhdf5 automatically apply mask, in which case
# there is no 'mask' element
if (!is.null(names(element))) {
# Get mask and convert to Boolean
mask <- as.logical(element[["mask"]])
# Get values and set missing
element <- as.vector(element[["values"]])
element[mask] <- NA
}

return(element)
}
Expand Down Expand Up @@ -272,15 +278,17 @@ read_h5ad_categorical <- function(file, name, version = "0.2.0") {

levels <- element[["categories"]]

ordered <- element[["ordered"]]
if (is.null(ordered)) {
attributes <- rhdf5::h5readAttributes(file, name)
ordered <- attributes[["ordered"]]
if (is.na(ordered)) {
# This version of {rhdf5} doesn't yet support ENUM type attributes so we
# can't tell if the categorical should be ordered,
# see https://github.com/grimbough/rhdf5/issues/125
warning(
"Unable to determine if categorical '", name,
"' is ordered, assuming it isn't"
)

ordered <- FALSE
}

Expand Down Expand Up @@ -412,13 +420,47 @@ read_h5ad_collection <- function(file, name, column_order) {
columns <- list()
for (col_name in column_order) {
new_name <- paste0(name, "/", col_name)
encoding <- rhdf5::h5readAttributes(file, new_name)
encoding <- read_h5ad_encoding(file, new_name)
columns[[col_name]] <- read_h5ad_element(
file = file,
name = new_name,
type = encoding$`encoding-type`,
version = encoding$`encoding-version`
type = encoding$type,
version = encoding$version
)
}
columns
}

#' Read H5AD
#'
#' Read data from a H5AD file
#'
#' @param path Path to the H5AD file to read
#' @param to The type of object to return. Must be one of: "SingleCellExperiment",
#' "Seurat", "HDF5AnnData", "InMemoryAnnData"
#'
#' @return The object specified by `to`
#' @export
#'
#' @examples
#' h5ad_file <- system.file("extdata", "example.h5ad", package = "anndataR")
#' # Read the H5AD as a SingleCellExperiment object
#' if (requireNamespace("SingleCellExperiment", quietly = TRUE)) {
#' sce <- read_h5ad(h5ad_file, to = "SingleCellExperiment")
#' }
#' # Read the H5AD as a Seurat object
#' if (requireNamespace("SeuratObject", quietly = TRUE)) {
#' seurat <- read_h5ad(h5ad_file, to = "Seurat")
#' }
read_h5ad <- function(path, to = c("SingleCellExperiment", "Seurat", "HDF5AnnData", "InMemoryAnnData")) {
to <- match.arg(to)

adata <- HDF5AnnData$new(path)

switch(to,
"SingleCellExperiment" = to_SingleCellExperiment(adata),
"Seurat" = to_Seurat(adata),
"HDF5AnnData" = adata,
"InMemoryAnnData" = adata$to_InMemoryAnnData()
)
}
49 changes: 49 additions & 0 deletions R/HDF5-write.R
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,8 @@ write_empty_h5ad <- function(file, obs_names, var_names, version = "0.1.0") {
#'
#' @param file Path to a HDF5 file
#' @param target_path The path within the file to test for
#'
#' @return Whether the `path` exists in `file`
hdf5_path_exists <- function(file, target_path) {
if (substr(target_path, 1, 1) != "/") {
target_path <- paste0("/", target_path)
Expand All @@ -439,3 +441,50 @@ hdf5_path_exists <- function(file, target_path) {

target_path %in% paths
}

#' Write H5AD
#'
#' Write an H5AD file
#'
#' @param object The object to write, either a "SingleCellExperiment" or a
#' "Seurat" object
#' @param path Path of the file to write to
#'
#' @return `path` invisibly
#' @export
#'
#' @examples
#' # Write a SingleCellExperiment as a H5AD
#' h5ad_file <- tempfile(fileext = ".h5ad")
#' if (requireNamespace("SingleCellExperiment", quietly = TRUE)) {
#' sce <- dummy_data(output = "SingleCellExperiment")
#' write_h5ad(sce, h5ad_file)
#' }
#'
#' # Write a Seurat as a H5AD
#' h5ad_file <- tempfile(fileext = ".h5ad")
#' if (requireNamespace("SeuratObject", quietly = TRUE)) {
#' seurat <- dummy_data(output = "Seurat")
#' write_h5ad(seurat, h5ad_file)
#' }
write_h5ad <- function(object, path) {
if (inherits(object, "SingleCellExperiment")) {
from_SingleCellExperiment(
object,
output_class = "HDF5AnnData",
file = path
)
} else if (inherits(object, "Seurat")) {
from_Seurat(
object,
output_class = "HDF5AnnData",
file = path
)
} else {
rcannood marked this conversation as resolved.
Show resolved Hide resolved
(
stop("Unable to write object of class: ", class(object))
)
}

invisible(path)
}
43 changes: 43 additions & 0 deletions R/HDF5AnnData.R
Original file line number Diff line number Diff line change
Expand Up @@ -220,3 +220,46 @@ HDF5AnnData <- R6::R6Class("HDF5AnnData", # nolint
}
)
)

#' Convert an AnnData object to an HDF5AnnData object
#'
#' This function takes an AnnData object and converts it to an HDF5AnnData
#' object, loading all fields into memory.
#'
#' @param adata An AnnData object to be converted to HDF5AnnData.
#' @param file The filename (character) of the `.h5ad` file.
#'
#' @return An HDF5AnnData object with the same data as the input AnnData
#' object.
#'
#' @export
#'
#' @examples
#' ad <- InMemoryAnnData$new(
#' X = matrix(1:5, 3L, 5L),
#' layers = list(
#' A = matrix(5:1, 3L, 5L),
#' B = matrix(letters[1:5], 3L, 5L)
#' ),
#' obs = data.frame(cell = 1:3),
#' var = data.frame(gene = 1:5),
#' obs_names = LETTERS[1:3],
#' var_names = letters[1:5]
#' )
#' to_HDF5AnnData(ad, "test.h5ad")
#' # remove file
#' file.remove("test.h5ad")
to_HDF5AnnData <- function(adata, file) { # nolint
stopifnot(
inherits(adata, "AbstractAnnData")
)
HDF5AnnData$new(
file = file,
X = adata$X,
obs = adata$obs,
var = adata$var,
obs_names = adata$obs_names,
var_names = adata$var_names,
layers = adata$layers
)
}
3 changes: 3 additions & 0 deletions R/InMemoryAnnData.R
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,9 @@ InMemoryAnnData <- R6::R6Class("InMemoryAnnData", # nolint
#' )
#' to_InMemory(ad)
to_InMemory <- function(adata) { # nolint
stopifnot(
inherits(adata, "AbstractAnnData")
)
InMemoryAnnData$new(
X = adata$X,
obs = adata$obs,
Expand Down
Loading