From 8c82d99b110a536bef5c71275e0c3bc4b6c68f14 Mon Sep 17 00:00:00 2001 From: Louise Deconinck Date: Mon, 18 Sep 2023 13:01:55 +0200 Subject: [PATCH 01/17] Draft vignette --- vignettes/getting-started.Rmd | 55 +++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 vignettes/getting-started.Rmd diff --git a/vignettes/getting-started.Rmd b/vignettes/getting-started.Rmd new file mode 100644 index 00000000..386ea83c --- /dev/null +++ b/vignettes/getting-started.Rmd @@ -0,0 +1,55 @@ +--- +title: "Getting Started" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Getting Started} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +This package allows users to work with h5ad files, access various slots in the h5ad datasets and convert .h5ad anndata files to SingleCellExperiment objects and SeuratObjects, and vice versa. + +The API of anndataR with regards to accessing different slots in the files is very similar to its Python counterpart. +Check out `?anndataR` for a full list of the functions provided by this package. + +This package provides an abstract interface for AnnData objects. This abstract interface models its Python counterpart closely, and stores a data matrix `X` and annotations corresponding to observations (`obs`, `obsm`, `obsp`) and variables (`var`, `varm`, `varp`) and unstructured metadata `uns`. + + +This abstract interface is implemented by different backends. Currently, the following backends are implemented: +1. InMemoryAnnData +2. HDF5AnnData + +The InMemoryAnnData backend allows you to construct an AnnData object in memory. +The HDF5AnnData backend allows you to read in an AnnData object from an `.h5ad` file. + +Here is an example of how to read in an `.h5ad` file and access its contents. + +```{r setup} +library(anndataR) +file <- system.file("extdata", "example.h5ad", package = "anndataR") +adata <- read_h5ad(file, to = "InMemoryAnnData") +X <- adata$X +layers <- adata$layers +obs <- adata$obs +obsm <- adata$obsm +obsp <- adata$obsp +var <- adata$var +varm <- adata$varm +varp <- adata$varp +uns <- adata$uns +``` + +You can convert the AnnData object to a SingleCellExperiment object or to a SeuratObject in the following way: + +```{r convert} +sce <- to_SingleCellExperiment(adata) +seurat <- to_Seurat(adata) +``` + From 891e350c5e15654b768cf2701d7a40de46724c8f Mon Sep 17 00:00:00 2001 From: Louise Deconinck Date: Mon, 18 Sep 2023 13:19:34 +0200 Subject: [PATCH 02/17] Working draft vignette. --- vignettes/getting-started.Rmd | 42 ++++++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/vignettes/getting-started.Rmd b/vignettes/getting-started.Rmd index 386ea83c..cefff14e 100644 --- a/vignettes/getting-started.Rmd +++ b/vignettes/getting-started.Rmd @@ -23,7 +23,9 @@ This package provides an abstract interface for AnnData objects. This abstract i This abstract interface is implemented by different backends. Currently, the following backends are implemented: + 1. InMemoryAnnData + 2. HDF5AnnData The InMemoryAnnData backend allows you to construct an AnnData object in memory. @@ -31,19 +33,47 @@ The HDF5AnnData backend allows you to read in an AnnData object from an `.h5ad` Here is an example of how to read in an `.h5ad` file and access its contents. -```{r setup} +```{r setup, eval=FALSE} library(anndataR) file <- system.file("extdata", "example.h5ad", package = "anndataR") adata <- read_h5ad(file, to = "InMemoryAnnData") X <- adata$X layers <- adata$layers obs <- adata$obs -obsm <- adata$obsm -obsp <- adata$obsp +# obsm <- adata$obsm +# obsp <- adata$obsp +var <- adata$var +# varm <- adata$varm +# varp <- adata$varp +# uns <- adata$uns +``` + +The following example details how to make an InMemoryAnnData and access its contents. + +```{r inmemory} +library(anndataR) + +adata <- AnnData( + X = matrix(1:15, 3L, 5L), + layers = list( + A = matrix(5:1, 3L, 5L), + B = matrix(letters[1:5], 3L, 5L) + ), + obs = data.frame(cell = 1:3), + var = data.frame(gene = 1:5), + obs_names = LETTERS[1:3], + var_names = letters[1:5] +) + +X <- adata$X +layers <- adata$layers +obs <- adata$obs +# obsm <- adata$obsm +# obsp <- adata$obsp var <- adata$var -varm <- adata$varm -varp <- adata$varp -uns <- adata$uns +# varm <- adata$varm +# varp <- adata$varp +# uns <- adata$uns ``` You can convert the AnnData object to a SingleCellExperiment object or to a SeuratObject in the following way: From 0effdb18872790937a9e1b52296f3d6cb0b042ac Mon Sep 17 00:00:00 2001 From: Louise Deconinck Date: Mon, 18 Sep 2023 13:26:49 +0200 Subject: [PATCH 03/17] Finetuning --- vignettes/getting-started.Rmd | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vignettes/getting-started.Rmd b/vignettes/getting-started.Rmd index cefff14e..3e903893 100644 --- a/vignettes/getting-started.Rmd +++ b/vignettes/getting-started.Rmd @@ -14,9 +14,8 @@ knitr::opts_chunk$set( ) ``` -This package allows users to work with h5ad files, access various slots in the h5ad datasets and convert .h5ad anndata files to SingleCellExperiment objects and SeuratObjects, and vice versa. +This package allows users to work with `.h5ad` files, access various slots in the datasets and convert these files to `SingleCellExperiment` objects and `SeuratObject`s, and vice versa. -The API of anndataR with regards to accessing different slots in the files is very similar to its Python counterpart. Check out `?anndataR` for a full list of the functions provided by this package. This package provides an abstract interface for AnnData objects. This abstract interface models its Python counterpart closely, and stores a data matrix `X` and annotations corresponding to observations (`obs`, `obsm`, `obsp`) and variables (`var`, `varm`, `varp`) and unstructured metadata `uns`. From c26211d136eceb4cd027cb723cf8be91d0e77de3 Mon Sep 17 00:00:00 2001 From: Louise Deconinck Date: Mon, 18 Sep 2023 13:48:40 +0200 Subject: [PATCH 04/17] Spaces typo --- vignettes/getting-started.Rmd | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/vignettes/getting-started.Rmd b/vignettes/getting-started.Rmd index 3e903893..72f5f202 100644 --- a/vignettes/getting-started.Rmd +++ b/vignettes/getting-started.Rmd @@ -53,15 +53,15 @@ The following example details how to make an InMemoryAnnData and access its cont library(anndataR) adata <- AnnData( - X = matrix(1:15, 3L, 5L), - layers = list( - A = matrix(5:1, 3L, 5L), - B = matrix(letters[1:5], 3L, 5L) - ), - obs = data.frame(cell = 1:3), - var = data.frame(gene = 1:5), - obs_names = LETTERS[1:3], - var_names = letters[1:5] + X = matrix(1:15, 3L, 5L), + layers = list( + A = matrix(5:1, 3L, 5L), + B = matrix(letters[1:5], 3L, 5L) + ), + obs = data.frame(cell = 1:3), + var = data.frame(gene = 1:5), + obs_names = LETTERS[1:3], + var_names = letters[1:5] ) X <- adata$X From 7ee138bfd3cd9719d1d861dfa8e2ee85f1c6e1c6 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Mon, 18 Sep 2023 13:54:08 +0200 Subject: [PATCH 05/17] reformat description --- DESCRIPTION | 45 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index d8629c3d..d40ce157 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,16 +2,41 @@ Package: anndataR Title: AnnData interoperability in R Version: 0.0.0.9000 Authors@R: c( - person("Robrecht", "Cannoodt", , "rcannood@gmail.com", role = c("aut", "cre"), - comment = c(ORCID = "0000-0003-3641-729X", github = "rcannood")), - person("Luke", "Zappia", , "luke@lazappi.id.au", role = "aut", - comment = c(ORCID = "0000-0001-7744-8565", github = "lazappi")), - person("Martin", "Morgan", , "mtmorgan.bioc@gmail.com", role = "aut", - comment = c(ORCID = "0000-0002-5874-8148", github = "mtmorgan")), - person("Louise", "Deconinck", , "louise.deconinck@gmail.com", role = "aut", - comment = c(ORCID = "0000-0001-8100-6823", github = "LouiseDck")), - person("Danila", "Bredikhin", , "danila.bredikhin@embl.de", role = "aut", - comment = c(ORCID = "0000-0001-8089-6983", github = "gtca")) + person( + "Robrecht", + "Cannoodt", + emaul = "rcannood@gmail.com", + role = c("aut", "cre"), + comment = c(ORCID = "0000-0003-3641-729X", github = "rcannood") + ), + person( + "Luke", + "Zappia", + email = "luke@lazappi.id.au", + role = "aut", + comment = c(ORCID = "0000-0001-7744-8565", github = "lazappi") + ), + person( + "Martin", + "Morgan", + email = "mtmorgan.bioc@gmail.com", + role = "aut", + comment = c(ORCID = "0000-0002-5874-8148", github = "mtmorgan") + ), + person( + "Louise", + "Deconinck", + email = "louise.deconinck@gmail.com", + role = "aut", + comment = c(ORCID = "0000-0001-8100-6823", github = "LouiseDck") + ), + person( + "Danila", + "Bredikhin", + email = "danila.bredikhin@embl.de", + role = "ctb", + comment = c(ORCID = "0000-0001-8089-6983", github = "gtca") + ) ) Description: Bring the power and flexibility of AnnData to the R ecosystem, allowing you to effortlessly manipulate and analyze your From eee217cd6dc87d0f55f6f0f8897f25bd3363507e Mon Sep 17 00:00:00 2001 From: Louise Deconinck Date: Mon, 18 Sep 2023 14:15:32 +0200 Subject: [PATCH 06/17] Fix lintr issues, split up functionality --- vignettes/getting-started.Rmd | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/vignettes/getting-started.Rmd b/vignettes/getting-started.Rmd index 72f5f202..23cf58c4 100644 --- a/vignettes/getting-started.Rmd +++ b/vignettes/getting-started.Rmd @@ -30,27 +30,28 @@ This abstract interface is implemented by different backends. Currently, the fol The InMemoryAnnData backend allows you to construct an AnnData object in memory. The HDF5AnnData backend allows you to read in an AnnData object from an `.h5ad` file. -Here is an example of how to read in an `.h5ad` file and access its contents. +Here is an example of how to read in an `.h5ad` file. -```{r setup, eval=FALSE} +```{r setup} library(anndataR) +``` + +```{r show_anndata_hdf5, eval = FALSE} file <- system.file("extdata", "example.h5ad", package = "anndataR") adata <- read_h5ad(file, to = "InMemoryAnnData") +``` + +The contents can be accessed as well. +```{r access_hdf5, eval = FALSE} X <- adata$X layers <- adata$layers obs <- adata$obs -# obsm <- adata$obsm -# obsp <- adata$obsp var <- adata$var -# varm <- adata$varm -# varp <- adata$varp -# uns <- adata$uns ``` -The following example details how to make an InMemoryAnnData and access its contents. +The following example details how to construct an InMemoryAnnData and access its contents. -```{r inmemory} -library(anndataR) +```{r inmemory_construct} adata <- AnnData( X = matrix(1:15, 3L, 5L), @@ -63,16 +64,15 @@ adata <- AnnData( obs_names = LETTERS[1:3], var_names = letters[1:5] ) +adata +``` +The contents can be accessed as well. +```{r access_construct} X <- adata$X layers <- adata$layers obs <- adata$obs -# obsm <- adata$obsm -# obsp <- adata$obsp var <- adata$var -# varm <- adata$varm -# varp <- adata$varp -# uns <- adata$uns ``` You can convert the AnnData object to a SingleCellExperiment object or to a SeuratObject in the following way: From 8278d929ade164bf701c537738c08323fb6cc998 Mon Sep 17 00:00:00 2001 From: Louise Deconinck Date: Mon, 18 Sep 2023 14:33:00 +0200 Subject: [PATCH 07/17] Typo --- vignettes/getting-started.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/getting-started.Rmd b/vignettes/getting-started.Rmd index 23cf58c4..85655890 100644 --- a/vignettes/getting-started.Rmd +++ b/vignettes/getting-started.Rmd @@ -58,7 +58,7 @@ adata <- AnnData( layers = list( A = matrix(5:1, 3L, 5L), B = matrix(letters[1:5], 3L, 5L) - ), + ), obs = data.frame(cell = 1:3), var = data.frame(gene = 1:5), obs_names = LETTERS[1:3], From 22bc22721b0ca7bb2118674cbc2c517872662ca9 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Mon, 18 Sep 2023 14:33:55 +0200 Subject: [PATCH 08/17] move dummy data functions to the unit tests (#101) * move dummy data functions to the unit tests * remove accidentally committed script * move functions. allow writing anndata objects to file * fix styling, run roxygen, fix typo * fix example * fix example * fix example --- DESCRIPTION | 2 +- NAMESPACE | 1 - R/AnnData.R | 13 ++- R/read_h5ad.R | 33 ++++++++ R/{HDF5-read.R => read_h5ad_helpers.R} | 34 -------- R/write_h5ad.R | 84 +++++++++++++++++++ R/{HDF5-write.R => write_h5ad_helpers.R} | 47 ----------- man/anndataR-package.Rd | 6 +- man/dummy_Seurat.Rd | 17 ---- man/dummy_SingleCellExperiment.Rd | 17 ---- man/dummy_data.Rd | 29 ------- man/dummy_list.Rd | 19 ----- man/read_h5ad.Rd | 2 +- man/read_h5ad_categorical.Rd | 2 +- man/read_h5ad_collection.Rd | 2 +- man/read_h5ad_data_frame.Rd | 2 +- man/read_h5ad_data_frame_index.Rd | 2 +- man/read_h5ad_dense_array.Rd | 2 +- man/read_h5ad_element.Rd | 2 +- man/read_h5ad_encoding.Rd | 2 +- man/read_h5ad_mapping.Rd | 2 +- man/read_h5ad_nullable.Rd | 2 +- man/read_h5ad_nullable_boolean.Rd | 2 +- man/read_h5ad_nullable_integer.Rd | 2 +- man/read_h5ad_numeric_scalar.Rd | 2 +- man/read_h5ad_rec_array.Rd | 2 +- man/read_h5ad_sparse_array.Rd | 2 +- man/read_h5ad_string_array.Rd | 2 +- man/read_h5ad_string_scalar.Rd | 2 +- man/write_h5ad.Rd | 51 +++++++++-- man/write_h5ad_dense_array.Rd | 2 +- .../testthat/helper-dummy_data.R | 5 +- 32 files changed, 193 insertions(+), 201 deletions(-) create mode 100644 R/read_h5ad.R rename R/{HDF5-read.R => read_h5ad_helpers.R} (92%) create mode 100644 R/write_h5ad.R rename R/{HDF5-write.R => write_h5ad_helpers.R} (92%) delete mode 100644 man/dummy_Seurat.Rd delete mode 100644 man/dummy_SingleCellExperiment.Rd delete mode 100644 man/dummy_data.Rd delete mode 100644 man/dummy_list.Rd rename R/dummy_data.R => tests/testthat/helper-dummy_data.R (97%) diff --git a/DESCRIPTION b/DESCRIPTION index d40ce157..cec7d974 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -5,7 +5,7 @@ Authors@R: c( person( "Robrecht", "Cannoodt", - emaul = "rcannood@gmail.com", + email = "rcannood@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-3641-729X", github = "rcannood") ), diff --git a/NAMESPACE b/NAMESPACE index 856ec175..a215612d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,7 +2,6 @@ export(AnnData) export(InMemoryAnnData) -export(dummy_data) export(from_Seurat) export(from_SingleCellExperiment) export(read_h5ad) diff --git a/R/AnnData.R b/R/AnnData.R index f0f0c553..65d08a37 100644 --- a/R/AnnData.R +++ b/R/AnnData.R @@ -46,13 +46,12 @@ #' #' adata AnnData <- function( - obs_names = NULL, - var_names = NULL, - X = NULL, - obs = NULL, - var = NULL, - layers = NULL -) { + obs_names = NULL, + var_names = NULL, + X = NULL, + obs = NULL, + var = NULL, + layers = NULL) { InMemoryAnnData$new( obs_names = obs_names, var_names = var_names, diff --git a/R/read_h5ad.R b/R/read_h5ad.R new file mode 100644 index 00000000..13449823 --- /dev/null +++ b/R/read_h5ad.R @@ -0,0 +1,33 @@ +#' Read H5AD +#' +#' Read data from a H5AD file +#' +#' @param path Path to the H5AD file to read +#' @param to The type of object to return. Must be one of: "SingleCellExperiment", +#' "Seurat", "HDF5AnnData", "InMemoryAnnData" +#' +#' @return The object specified by `to` +#' @export +#' +#' @examples +#' h5ad_file <- system.file("extdata", "example.h5ad", package = "anndataR") +#' # Read the H5AD as a SingleCellExperiment object +#' if (requireNamespace("SingleCellExperiment", quietly = TRUE)) { +#' sce <- read_h5ad(h5ad_file, to = "SingleCellExperiment") +#' } +#' # Read the H5AD as a Seurat object +#' if (requireNamespace("SeuratObject", quietly = TRUE)) { +#' seurat <- read_h5ad(h5ad_file, to = "Seurat") +#' } +read_h5ad <- function(path, to = c("SingleCellExperiment", "Seurat", "HDF5AnnData", "InMemoryAnnData")) { + to <- match.arg(to) + + adata <- HDF5AnnData$new(path) + + switch(to, + "SingleCellExperiment" = to_SingleCellExperiment(adata), + "Seurat" = to_Seurat(adata), + "HDF5AnnData" = adata, + "InMemoryAnnData" = to_InMemoryAnnData(adata) + ) +} diff --git a/R/HDF5-read.R b/R/read_h5ad_helpers.R similarity index 92% rename from R/HDF5-read.R rename to R/read_h5ad_helpers.R index 35584fc0..0cdcaa11 100644 --- a/R/HDF5-read.R +++ b/R/read_h5ad_helpers.R @@ -430,37 +430,3 @@ read_h5ad_collection <- function(file, name, column_order) { } columns } - -#' Read H5AD -#' -#' Read data from a H5AD file -#' -#' @param path Path to the H5AD file to read -#' @param to The type of object to return. Must be one of: "SingleCellExperiment", -#' "Seurat", "HDF5AnnData", "InMemoryAnnData" -#' -#' @return The object specified by `to` -#' @export -#' -#' @examples -#' h5ad_file <- system.file("extdata", "example.h5ad", package = "anndataR") -#' # Read the H5AD as a SingleCellExperiment object -#' if (requireNamespace("SingleCellExperiment", quietly = TRUE)) { -#' sce <- read_h5ad(h5ad_file, to = "SingleCellExperiment") -#' } -#' # Read the H5AD as a Seurat object -#' if (requireNamespace("SeuratObject", quietly = TRUE)) { -#' seurat <- read_h5ad(h5ad_file, to = "Seurat") -#' } -read_h5ad <- function(path, to = c("SingleCellExperiment", "Seurat", "HDF5AnnData", "InMemoryAnnData")) { - to <- match.arg(to) - - adata <- HDF5AnnData$new(path) - - switch(to, - "SingleCellExperiment" = to_SingleCellExperiment(adata), - "Seurat" = to_Seurat(adata), - "HDF5AnnData" = adata, - "InMemoryAnnData" = to_InMemoryAnnData(adata) - ) -} diff --git a/R/write_h5ad.R b/R/write_h5ad.R new file mode 100644 index 00000000..fdca177a --- /dev/null +++ b/R/write_h5ad.R @@ -0,0 +1,84 @@ +#' Write H5AD +#' +#' Write an H5AD file +#' +#' @param object The object to write, either a "SingleCellExperiment" or a +#' "Seurat" object +#' @param path Path of the file to write to +#' +#' @return `path` invisibly +#' @export +#' +#' @examples +#' adata <- AnnData( +#' X = matrix(1:15, 3L, 5L), +#' layers = list( +#' A = matrix(15:1, 3L, 5L), +#' B = matrix(letters[1:15], 3L, 5L) +#' ), +#' obs = data.frame(cell = 1:3), +#' var = data.frame(gene = 1:5), +#' obs_names = LETTERS[1:3], +#' var_names = letters[1:5] +#' ) +#' h5ad_file <- tempfile(fileext = ".h5ad") +#' write_h5ad(adata, h5ad_file) +#' +#' # Write a SingleCellExperiment as an H5AD +#' if (requireNamespace("SingleCellExperiment", quietly = TRUE)) { +#' h5ad_file <- tempfile(fileext = ".h5ad") +#' ncells <- 100 +#' counts <- matrix(rpois(20000, 5), ncol = ncells) +#' logcounts <- log2(counts + 1) +# +#' pca <- matrix(runif(ncells * 5), ncells) +#' tsne <- matrix(rnorm(ncells * 2), ncells) +#' +#' sce <- SingleCellExperiment::SingleCellExperiment( +#' assays = list(counts = counts, logcounts = logcounts), +#' reducedDims = list(PCA = pca, tSNE = tsne) +#' ) +#' } +#' +#' # Write a Seurat as a H5AD +#' if (requireNamespace("SeuratObject", quietly = TRUE)) { +#' h5ad_file <- tempfile(fileext = ".h5ad") +#' counts <- matrix(1:15, 3L, 5L) +#' dimnames(counts) <- list( +#' letters[1:3], +#' LETTERS[1:5] +#' ) +#' gene.metadata <- data.frame( +#' row.names = LETTERS[1:5], +#' gene = 1:5 +#' ) +#' obj <- SeuratObject::CreateSeuratObject(counts, meta.data = gene.metadata) +#' cell.metadata <- data.frame( +#' row.names = letters[1:3], +#' cell = 1:3 +#' ) +#' obj <- SeuratObject::AddMetaData(obj, cell.metadata) +#' +#' write_h5ad(obj, h5ad_file) +#' } +write_h5ad <- function(object, path) { + if (inherits(object, "SingleCellExperiment")) { + from_SingleCellExperiment( + object, + output_class = "HDF5AnnData", + file = path + ) + } else if (inherits(object, "Seurat")) { + from_Seurat( + object, + output_class = "HDF5AnnData", + file = path + ) + } else if (inherits(object, "AbstractAnnData")) { + to_HDF5AnnData(object, path) + } else { + stop("Unable to write object of class: ", class(object)) + } + + invisible(path) +} diff --git a/R/HDF5-write.R b/R/write_h5ad_helpers.R similarity index 92% rename from R/HDF5-write.R rename to R/write_h5ad_helpers.R index 2aa635bc..c6b5b24b 100644 --- a/R/HDF5-write.R +++ b/R/write_h5ad_helpers.R @@ -469,50 +469,3 @@ hdf5_path_exists <- function(file, target_path) { target_path %in% paths } - -#' Write H5AD -#' -#' Write an H5AD file -#' -#' @param object The object to write, either a "SingleCellExperiment" or a -#' "Seurat" object -#' @param path Path of the file to write to -#' -#' @return `path` invisibly -#' @export -#' -#' @examples -#' # Write a SingleCellExperiment as a H5AD -#' h5ad_file <- tempfile(fileext = ".h5ad") -#' if (requireNamespace("SingleCellExperiment", quietly = TRUE)) { -#' sce <- dummy_data(output = "SingleCellExperiment") -#' write_h5ad(sce, h5ad_file) -#' } -#' -#' # Write a Seurat as a H5AD -#' h5ad_file <- tempfile(fileext = ".h5ad") -#' if (requireNamespace("SeuratObject", quietly = TRUE)) { -#' seurat <- dummy_data(output = "Seurat") -#' write_h5ad(seurat, h5ad_file) -#' } -write_h5ad <- function(object, path) { - if (inherits(object, "SingleCellExperiment")) { - from_SingleCellExperiment( - object, - output_class = "HDF5AnnData", - file = path - ) - } else if (inherits(object, "Seurat")) { - from_Seurat( - object, - output_class = "HDF5AnnData", - file = path - ) - } else { - ( - stop("Unable to write object of class: ", class(object)) - ) - } - - invisible(path) -} diff --git a/man/anndataR-package.Rd b/man/anndataR-package.Rd index 9bdc1d13..75638374 100644 --- a/man/anndataR-package.Rd +++ b/man/anndataR-package.Rd @@ -25,7 +25,11 @@ Authors: \item Luke Zappia \email{luke@lazappi.id.au} (\href{https://orcid.org/0000-0001-7744-8565}{ORCID}) (lazappi) \item Martin Morgan \email{mtmorgan.bioc@gmail.com} (\href{https://orcid.org/0000-0002-5874-8148}{ORCID}) (mtmorgan) \item Louise Deconinck \email{louise.deconinck@gmail.com} (\href{https://orcid.org/0000-0001-8100-6823}{ORCID}) (LouiseDck) - \item Danila Bredikhin \email{danila.bredikhin@embl.de} (\href{https://orcid.org/0000-0001-8089-6983}{ORCID}) (gtca) +} + +Other contributors: +\itemize{ + \item Danila Bredikhin \email{danila.bredikhin@embl.de} (\href{https://orcid.org/0000-0001-8089-6983}{ORCID}) (gtca) [contributor] } } diff --git a/man/dummy_Seurat.Rd b/man/dummy_Seurat.Rd deleted file mode 100644 index 71328ccd..00000000 --- a/man/dummy_Seurat.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dummy_data.R -\name{dummy_Seurat} -\alias{dummy_Seurat} -\title{Dummy Seurat} -\usage{ -dummy_Seurat(...) -} -\arguments{ -\item{...}{Parameters passed to \code{dummy_list}} -} -\value{ -Seurat containing the generated data -} -\description{ -Generate a dummy dataset as a Seurat object -} diff --git a/man/dummy_SingleCellExperiment.Rd b/man/dummy_SingleCellExperiment.Rd deleted file mode 100644 index e6d348e4..00000000 --- a/man/dummy_SingleCellExperiment.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dummy_data.R -\name{dummy_SingleCellExperiment} -\alias{dummy_SingleCellExperiment} -\title{Dummy SingleCellExperiment} -\usage{ -dummy_SingleCellExperiment(...) -} -\arguments{ -\item{...}{Parameters passed to \code{dummy_list}} -} -\value{ -SingleCellExperiment containing the generated data -} -\description{ -Generate a dummy dataset as a SingleCellExperiment object -} diff --git a/man/dummy_data.Rd b/man/dummy_data.Rd deleted file mode 100644 index 0ab86ae1..00000000 --- a/man/dummy_data.Rd +++ /dev/null @@ -1,29 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dummy_data.R -\name{dummy_data} -\alias{dummy_data} -\title{Dummy data} -\usage{ -dummy_data( - n_obs = 10L, - n_vars = 20L, - output = c("list", "SingleCellExperiment", "Seurat") -) -} -\arguments{ -\item{n_obs}{Number of observations to generate} - -\item{n_vars}{Number of variables to generate} - -\item{output}{Object type to output, one of "list", "SingleCellExperiment", -or "Seurat"} -} -\value{ -Object containing the generated dataset as defined by \code{output} -} -\description{ -Generate a dummy dataset -} -\examples{ -dummy <- dummy_data() -} diff --git a/man/dummy_list.Rd b/man/dummy_list.Rd deleted file mode 100644 index abd208e0..00000000 --- a/man/dummy_list.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dummy_data.R -\name{dummy_list} -\alias{dummy_list} -\title{Dummy data list} -\usage{ -dummy_list(n_obs = 10L, n_vars = 20L) -} -\arguments{ -\item{n_obs}{Number of observations to generate} - -\item{n_vars}{Number of variables to generate} -} -\value{ -A list with the generated dataset -} -\description{ -Generate a dummy dataset as a list -} diff --git a/man/read_h5ad.Rd b/man/read_h5ad.Rd index 31775f98..ed078328 100644 --- a/man/read_h5ad.Rd +++ b/man/read_h5ad.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/HDF5-read.R +% Please edit documentation in R/read_h5ad.R \name{read_h5ad} \alias{read_h5ad} \title{Read H5AD} diff --git a/man/read_h5ad_categorical.Rd b/man/read_h5ad_categorical.Rd index 46f6dfca..492b0ab1 100644 --- a/man/read_h5ad_categorical.Rd +++ b/man/read_h5ad_categorical.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/HDF5-read.R +% Please edit documentation in R/read_h5ad_helpers.R \name{read_h5ad_categorical} \alias{read_h5ad_categorical} \title{Read H5AD categorical} diff --git a/man/read_h5ad_collection.Rd b/man/read_h5ad_collection.Rd index c9ee1f82..4b5e48a9 100644 --- a/man/read_h5ad_collection.Rd +++ b/man/read_h5ad_collection.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/HDF5-read.R +% Please edit documentation in R/read_h5ad_helpers.R \name{read_h5ad_collection} \alias{read_h5ad_collection} \title{Read multiple H5AD datatypes} diff --git a/man/read_h5ad_data_frame.Rd b/man/read_h5ad_data_frame.Rd index 4ab31b88..6d28cc06 100644 --- a/man/read_h5ad_data_frame.Rd +++ b/man/read_h5ad_data_frame.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/HDF5-read.R +% Please edit documentation in R/read_h5ad_helpers.R \name{read_h5ad_data_frame} \alias{read_h5ad_data_frame} \title{Read H5AD data frame} diff --git a/man/read_h5ad_data_frame_index.Rd b/man/read_h5ad_data_frame_index.Rd index 6f6ad6b6..8340197e 100644 --- a/man/read_h5ad_data_frame_index.Rd +++ b/man/read_h5ad_data_frame_index.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/HDF5-read.R +% Please edit documentation in R/read_h5ad_helpers.R \name{read_h5ad_data_frame_index} \alias{read_h5ad_data_frame_index} \title{Read H5AD data frame index} diff --git a/man/read_h5ad_dense_array.Rd b/man/read_h5ad_dense_array.Rd index 8ef5426d..c9805d70 100644 --- a/man/read_h5ad_dense_array.Rd +++ b/man/read_h5ad_dense_array.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/HDF5-read.R +% Please edit documentation in R/read_h5ad_helpers.R \name{read_h5ad_dense_array} \alias{read_h5ad_dense_array} \title{Read H5AD dense array} diff --git a/man/read_h5ad_element.Rd b/man/read_h5ad_element.Rd index cfe33a65..aca21c14 100644 --- a/man/read_h5ad_element.Rd +++ b/man/read_h5ad_element.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/HDF5-read.R +% Please edit documentation in R/read_h5ad_helpers.R \name{read_h5ad_element} \alias{read_h5ad_element} \title{Read H5AD element} diff --git a/man/read_h5ad_encoding.Rd b/man/read_h5ad_encoding.Rd index eb74011f..7f60b8ae 100644 --- a/man/read_h5ad_encoding.Rd +++ b/man/read_h5ad_encoding.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/HDF5-read.R +% Please edit documentation in R/read_h5ad_helpers.R \name{read_h5ad_encoding} \alias{read_h5ad_encoding} \title{Read H5AD encoding} diff --git a/man/read_h5ad_mapping.Rd b/man/read_h5ad_mapping.Rd index d60eb577..ca0b5c9c 100644 --- a/man/read_h5ad_mapping.Rd +++ b/man/read_h5ad_mapping.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/HDF5-read.R +% Please edit documentation in R/read_h5ad_helpers.R \name{read_h5ad_mapping} \alias{read_h5ad_mapping} \title{Read H5AD mapping} diff --git a/man/read_h5ad_nullable.Rd b/man/read_h5ad_nullable.Rd index 474cadfb..9632665a 100644 --- a/man/read_h5ad_nullable.Rd +++ b/man/read_h5ad_nullable.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/HDF5-read.R +% Please edit documentation in R/read_h5ad_helpers.R \name{read_h5ad_nullable} \alias{read_h5ad_nullable} \title{Read H5AD nullable} diff --git a/man/read_h5ad_nullable_boolean.Rd b/man/read_h5ad_nullable_boolean.Rd index a1be4915..3ba1c170 100644 --- a/man/read_h5ad_nullable_boolean.Rd +++ b/man/read_h5ad_nullable_boolean.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/HDF5-read.R +% Please edit documentation in R/read_h5ad_helpers.R \name{read_h5ad_nullable_boolean} \alias{read_h5ad_nullable_boolean} \title{Read H5AD nullable boolean} diff --git a/man/read_h5ad_nullable_integer.Rd b/man/read_h5ad_nullable_integer.Rd index 2bdef581..6a8aaacb 100644 --- a/man/read_h5ad_nullable_integer.Rd +++ b/man/read_h5ad_nullable_integer.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/HDF5-read.R +% Please edit documentation in R/read_h5ad_helpers.R \name{read_h5ad_nullable_integer} \alias{read_h5ad_nullable_integer} \title{Read H5AD nullable integer} diff --git a/man/read_h5ad_numeric_scalar.Rd b/man/read_h5ad_numeric_scalar.Rd index ff848eb6..b30d687c 100644 --- a/man/read_h5ad_numeric_scalar.Rd +++ b/man/read_h5ad_numeric_scalar.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/HDF5-read.R +% Please edit documentation in R/read_h5ad_helpers.R \name{read_h5ad_numeric_scalar} \alias{read_h5ad_numeric_scalar} \title{Read H5AD numeric scalar} diff --git a/man/read_h5ad_rec_array.Rd b/man/read_h5ad_rec_array.Rd index 3265a58e..12690dcf 100644 --- a/man/read_h5ad_rec_array.Rd +++ b/man/read_h5ad_rec_array.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/HDF5-read.R +% Please edit documentation in R/read_h5ad_helpers.R \name{read_h5ad_rec_array} \alias{read_h5ad_rec_array} \title{Read H5AD recarray} diff --git a/man/read_h5ad_sparse_array.Rd b/man/read_h5ad_sparse_array.Rd index b96c46a6..289ec386 100644 --- a/man/read_h5ad_sparse_array.Rd +++ b/man/read_h5ad_sparse_array.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/HDF5-read.R +% Please edit documentation in R/read_h5ad_helpers.R \name{read_h5ad_sparse_array} \alias{read_h5ad_sparse_array} \title{Read H5AD sparse array} diff --git a/man/read_h5ad_string_array.Rd b/man/read_h5ad_string_array.Rd index 359192cb..9b088137 100644 --- a/man/read_h5ad_string_array.Rd +++ b/man/read_h5ad_string_array.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/HDF5-read.R +% Please edit documentation in R/read_h5ad_helpers.R \name{read_h5ad_string_array} \alias{read_h5ad_string_array} \title{Read H5AD string array} diff --git a/man/read_h5ad_string_scalar.Rd b/man/read_h5ad_string_scalar.Rd index 7033b765..5fbb2131 100644 --- a/man/read_h5ad_string_scalar.Rd +++ b/man/read_h5ad_string_scalar.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/HDF5-read.R +% Please edit documentation in R/read_h5ad_helpers.R \name{read_h5ad_string_scalar} \alias{read_h5ad_string_scalar} \title{Read H5AD string scalar} diff --git a/man/write_h5ad.Rd b/man/write_h5ad.Rd index 3846c243..2c47df18 100644 --- a/man/write_h5ad.Rd +++ b/man/write_h5ad.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/HDF5-write.R +% Please edit documentation in R/write_h5ad.R \name{write_h5ad} \alias{write_h5ad} \title{Write H5AD} @@ -19,17 +19,54 @@ write_h5ad(object, path) Write an H5AD file } \examples{ -# Write a SingleCellExperiment as a H5AD +adata <- AnnData( + X = matrix(1:15, 3L, 5L), + layers = list( + A = matrix(15:1, 3L, 5L), + B = matrix(letters[1:15], 3L, 5L) + ), + obs = data.frame(cell = 1:3), + var = data.frame(gene = 1:5), + obs_names = LETTERS[1:3], + var_names = letters[1:5] +) h5ad_file <- tempfile(fileext = ".h5ad") +write_h5ad(adata, h5ad_file) + +# Write a SingleCellExperiment as an H5AD if (requireNamespace("SingleCellExperiment", quietly = TRUE)) { - sce <- dummy_data(output = "SingleCellExperiment") - write_h5ad(sce, h5ad_file) + h5ad_file <- tempfile(fileext = ".h5ad") + ncells <- 100 + counts <- matrix(rpois(20000, 5), ncol = ncells) + logcounts <- log2(counts + 1) + pca <- matrix(runif(ncells * 5), ncells) + tsne <- matrix(rnorm(ncells * 2), ncells) + + sce <- SingleCellExperiment::SingleCellExperiment( + assays = list(counts = counts, logcounts = logcounts), + reducedDims = list(PCA = pca, tSNE = tsne) + ) } # Write a Seurat as a H5AD -h5ad_file <- tempfile(fileext = ".h5ad") if (requireNamespace("SeuratObject", quietly = TRUE)) { - seurat <- dummy_data(output = "Seurat") - write_h5ad(seurat, h5ad_file) + h5ad_file <- tempfile(fileext = ".h5ad") + counts <- matrix(1:15, 3L, 5L) + dimnames(counts) <- list( + letters[1:3], + LETTERS[1:5] + ) + gene.metadata <- data.frame( + row.names = LETTERS[1:5], + gene = 1:5 + ) + obj <- SeuratObject::CreateSeuratObject(counts, meta.data = gene.metadata) + cell.metadata <- data.frame( + row.names = letters[1:3], + cell = 1:3 + ) + obj <- SeuratObject::AddMetaData(obj, cell.metadata) + + write_h5ad(obj, h5ad_file) } } diff --git a/man/write_h5ad_dense_array.Rd b/man/write_h5ad_dense_array.Rd index edcfcf7b..b33b5ce6 100644 --- a/man/write_h5ad_dense_array.Rd +++ b/man/write_h5ad_dense_array.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/HDF5-write.R +% Please edit documentation in R/write_h5ad_helpers.R \name{write_h5ad_dense_array} \alias{write_h5ad_dense_array} \title{Write H5AD dense array} diff --git a/R/dummy_data.R b/tests/testthat/helper-dummy_data.R similarity index 97% rename from R/dummy_data.R rename to tests/testthat/helper-dummy_data.R index 9cbb7040..b48585fb 100644 --- a/R/dummy_data.R +++ b/tests/testthat/helper-dummy_data.R @@ -8,7 +8,6 @@ #' or "Seurat" #' #' @return Object containing the generated dataset as defined by `output` -#' @export #' #' @examples #' dummy <- dummy_data() @@ -80,7 +79,7 @@ dummy_list <- function(n_obs = 10L, n_vars = 20L) { #' @param ... Parameters passed to `dummy_list` #' #' @return SingleCellExperiment containing the generated data -dummy_SingleCellExperiment <- function(...) { #nolint +dummy_SingleCellExperiment <- function(...) { # nolint if (!requireNamespace("SingleCellExperiment", quietly = TRUE)) { stop( "Creating a SingleCellExperiment requires the 'SingleCellExperiment'", @@ -114,7 +113,7 @@ dummy_SingleCellExperiment <- function(...) { #nolint #' @param ... Parameters passed to `dummy_list` #' #' @return Seurat containing the generated data -dummy_Seurat <- function(...) { #nolint +dummy_Seurat <- function(...) { # nolint if (!requireNamespace("SeuratObject", quietly = TRUE)) { stop( "Creating a Seurat requires the 'SeuratObject' package to be installed" From 4ac9dbbfaec11163b0b482caa1aae4f8292150a1 Mon Sep 17 00:00:00 2001 From: Louise Deconinck Date: Mon, 18 Sep 2023 14:39:03 +0200 Subject: [PATCH 09/17] Remove eval, typo --- vignettes/getting-started.Rmd | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/vignettes/getting-started.Rmd b/vignettes/getting-started.Rmd index 85655890..b7af650d 100644 --- a/vignettes/getting-started.Rmd +++ b/vignettes/getting-started.Rmd @@ -18,17 +18,17 @@ This package allows users to work with `.h5ad` files, access various slots in th Check out `?anndataR` for a full list of the functions provided by this package. -This package provides an abstract interface for AnnData objects. This abstract interface models its Python counterpart closely, and stores a data matrix `X` and annotations corresponding to observations (`obs`, `obsm`, `obsp`) and variables (`var`, `varm`, `varp`) and unstructured metadata `uns`. +This package provides an abstract interface for `AnnData` objects. This abstract interface models its Python counterpart closely, and stores a data matrix `X` and annotations corresponding to observations (`obs`, `obsm`, `obsp`) and variables (`var`, `varm`, `varp`) and unstructured metadata `uns`. This abstract interface is implemented by different backends. Currently, the following backends are implemented: -1. InMemoryAnnData +1. `InMemoryAnnData` -2. HDF5AnnData +2. `HDF5AnnData` -The InMemoryAnnData backend allows you to construct an AnnData object in memory. -The HDF5AnnData backend allows you to read in an AnnData object from an `.h5ad` file. +The `InMemoryAnnData` backend allows you to construct an `AnnData` object in memory. +The `HDF5AnnData` backend allows you to read in an `AnnData` object from an `.h5ad` file. Here is an example of how to read in an `.h5ad` file. @@ -36,13 +36,13 @@ Here is an example of how to read in an `.h5ad` file. library(anndataR) ``` -```{r show_anndata_hdf5, eval = FALSE} +```{r show_anndata_hdf5} file <- system.file("extdata", "example.h5ad", package = "anndataR") adata <- read_h5ad(file, to = "InMemoryAnnData") ``` -The contents can be accessed as well. -```{r access_hdf5, eval = FALSE} +The contents can be accessed as well: +```{r access_hdf5} X <- adata$X layers <- adata$layers obs <- adata$obs @@ -67,7 +67,7 @@ adata <- AnnData( adata ``` -The contents can be accessed as well. +The contents can be accessed as well: ```{r access_construct} X <- adata$X layers <- adata$layers From fc08613bd714b42f479cd75bb2283fcb945ac35f Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Mon, 18 Sep 2023 15:26:47 +0200 Subject: [PATCH 10/17] remove `read_h5ad_*` rd of functions that are not exported (#104) --- R/read_h5ad_helpers.R | 31 ++++++++++++++++++++++++++++++- R/write_h5ad.R | 2 +- man/read_h5ad_categorical.Rd | 21 --------------------- man/read_h5ad_collection.Rd | 21 --------------------- man/read_h5ad_data_frame.Rd | 29 ----------------------------- man/read_h5ad_data_frame_index.Rd | 21 --------------------- man/read_h5ad_dense_array.Rd | 21 --------------------- man/read_h5ad_element.Rd | 29 ----------------------------- man/read_h5ad_encoding.Rd | 19 ------------------- man/read_h5ad_mapping.Rd | 21 --------------------- man/read_h5ad_nullable.Rd | 21 --------------------- man/read_h5ad_nullable_integer.Rd | 21 --------------------- man/read_h5ad_numeric_scalar.Rd | 21 --------------------- man/read_h5ad_rec_array.Rd | 29 ----------------------------- man/read_h5ad_sparse_array.Rd | 28 ---------------------------- man/read_h5ad_string_array.Rd | 21 --------------------- man/read_h5ad_string_scalar.Rd | 21 --------------------- man/write_h5ad.Rd | 1 + 18 files changed, 32 insertions(+), 346 deletions(-) delete mode 100644 man/read_h5ad_categorical.Rd delete mode 100644 man/read_h5ad_collection.Rd delete mode 100644 man/read_h5ad_data_frame.Rd delete mode 100644 man/read_h5ad_data_frame_index.Rd delete mode 100644 man/read_h5ad_dense_array.Rd delete mode 100644 man/read_h5ad_element.Rd delete mode 100644 man/read_h5ad_encoding.Rd delete mode 100644 man/read_h5ad_mapping.Rd delete mode 100644 man/read_h5ad_nullable.Rd delete mode 100644 man/read_h5ad_nullable_integer.Rd delete mode 100644 man/read_h5ad_numeric_scalar.Rd delete mode 100644 man/read_h5ad_rec_array.Rd delete mode 100644 man/read_h5ad_sparse_array.Rd delete mode 100644 man/read_h5ad_string_array.Rd delete mode 100644 man/read_h5ad_string_scalar.Rd diff --git a/R/read_h5ad_helpers.R b/R/read_h5ad_helpers.R index 0cdcaa11..d27647e9 100644 --- a/R/read_h5ad_helpers.R +++ b/R/read_h5ad_helpers.R @@ -6,6 +6,8 @@ #' @param name Name of the element within the H5AD file #' #' @return A named list with names type and version +#' +#' @noRd read_h5ad_encoding <- function(file, name) { attrs <- rhdf5::h5readAttributes(file, name) @@ -37,8 +39,9 @@ read_h5ad_encoding <- function(file, name) { #' Encoding is automatically determined from the element using #' `read_h5ad_encoding` and used to select the appropriate reading function. #' -#' #' @return Value depending on the encoding +#' +#' @noRd read_h5ad_element <- function(file, name, type = NULL, version = NULL, ...) { if (is.null(type)) { encoding_list <- read_h5ad_encoding(file, name) @@ -77,6 +80,8 @@ read_h5ad_element <- function(file, name, type = NULL, version = NULL, ...) { #' @param version Encoding version of the element to read #' #' @return a matrix or a vector if 1D +#' +#' @noRd read_h5ad_dense_array <- function(file, name, version = "0.2.0") { version <- match.arg(version) # TODO: ideally, native = TRUE should take care of the row order and column order, @@ -118,6 +123,8 @@ read_h5ad_csc_matrix <- function(file, name, version) { #' #' @return a sparse matrix/DelayedArray???, or a vector if 1D #' @importFrom Matrix sparseMatrix +#' +#' @noRd read_h5ad_sparse_array <- function(file, name, version = "0.1.0", type = c("csr_matrix", "csc_matrix")) { version <- match.arg(version) @@ -168,6 +175,8 @@ read_h5ad_sparse_array <- function(file, name, version = "0.1.0", #' They are used by **scanpy** to score marker gene testing results. #' #' @return a named list of 1D arrays +#' +#' @noRd read_h5ad_rec_array <- function(file, name, version = "0.2.0") { version <- match.arg(version) @@ -196,6 +205,8 @@ read_h5ad_nullable_boolean <- function(file, name, version = "0.1.0") { #' @param version Encoding version of the element to read #' #' @return an integer vector +#' +#' @noRd read_h5ad_nullable_integer <- function(file, name, version = "0.1.0") { as.integer(read_h5ad_nullable(file, name, version)) } @@ -209,6 +220,8 @@ read_h5ad_nullable_integer <- function(file, name, version = "0.1.0") { #' @param version Encoding version of the element to read #' #' @return a nullable vector +#' +#' @noRd read_h5ad_nullable <- function(file, name, version = "0.1.0") { version <- match.arg(version) @@ -236,6 +249,8 @@ read_h5ad_nullable <- function(file, name, version = "0.1.0") { #' @param version Encoding version of the element to read #' #' @return a character vector/matrix +#' +#' @noRd read_h5ad_string_array <- function(file, name, version = "0.2.0") { version <- match.arg(version) # reads in transposed @@ -261,6 +276,8 @@ read_h5ad_string_array <- function(file, name, version = "0.2.0") { #' @param version Encoding version of the element to read #' #' @return a factor +#' +#' @noRd read_h5ad_categorical <- function(file, name, version = "0.2.0") { version <- match.arg(version) @@ -304,6 +321,8 @@ read_h5ad_categorical <- function(file, name, version = "0.2.0") { #' @param version Encoding version of the element to read #' #' @return a character vector of length 1 +#' +#' @noRd read_h5ad_string_scalar <- function(file, name, version = "0.2.0") { version <- match.arg(version) rhdf5::h5read(file, name) @@ -318,6 +337,8 @@ read_h5ad_string_scalar <- function(file, name, version = "0.2.0") { #' @param version Encoding version of the element to read #' #' @return a numeric vector of length 1 +#' +#' @noRd read_h5ad_numeric_scalar <- function(file, name, version = "0.2.0") { version <- match.arg(version) rhdf5::h5read(file, name) @@ -332,6 +353,8 @@ read_h5ad_numeric_scalar <- function(file, name, version = "0.2.0") { #' @param version Encoding version of the element to read #' #' @return a named list +#' +#' @noRd read_h5ad_mapping <- function(file, name, version = "0.1.0") { version <- match.arg(version) groupname <- paste0("/", name) @@ -358,6 +381,8 @@ read_h5ad_mapping <- function(file, name, version = "0.1.0") { #' is not provided in the output. In either case row names are not set. #' #' @return a data.frame +#' +#' @noRd read_h5ad_data_frame <- function(file, name, include_index = TRUE, version = "0.2.0") { version <- match.arg(version) @@ -400,6 +425,8 @@ read_h5ad_data_frame <- function(file, name, include_index = TRUE, #' @param version Encoding version of the element to read #' #' @return an object containing the index +#' +#' @noRd read_h5ad_data_frame_index <- function(file, name, version = "0.2.0") { version <- match.arg(version) @@ -416,6 +443,8 @@ read_h5ad_data_frame_index <- function(file, name, version = "0.2.0") { #' @param column_order Vector of item names (in order) #' #' @return a named list +#' +#' @noRd read_h5ad_collection <- function(file, name, column_order) { columns <- list() for (col_name in column_order) { diff --git a/R/write_h5ad.R b/R/write_h5ad.R index fdca177a..c1fb0411 100644 --- a/R/write_h5ad.R +++ b/R/write_h5ad.R @@ -30,7 +30,7 @@ #' ncells <- 100 #' counts <- matrix(rpois(20000, 5), ncol = ncells) #' logcounts <- log2(counts + 1) -# +#' # #' pca <- matrix(runif(ncells * 5), ncells) #' tsne <- matrix(rnorm(ncells * 2), ncells) #' diff --git a/man/read_h5ad_categorical.Rd b/man/read_h5ad_categorical.Rd deleted file mode 100644 index 492b0ab1..00000000 --- a/man/read_h5ad_categorical.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/read_h5ad_helpers.R -\name{read_h5ad_categorical} -\alias{read_h5ad_categorical} -\title{Read H5AD categorical} -\usage{ -read_h5ad_categorical(file, name, version = "0.2.0") -} -\arguments{ -\item{file}{Path to a H5AD file or an open H5AD handle} - -\item{name}{Name of the element within the H5AD file} - -\item{version}{Encoding version of the element to read} -} -\value{ -a factor -} -\description{ -Read a categorical from an H5AD file -} diff --git a/man/read_h5ad_collection.Rd b/man/read_h5ad_collection.Rd deleted file mode 100644 index 4b5e48a9..00000000 --- a/man/read_h5ad_collection.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/read_h5ad_helpers.R -\name{read_h5ad_collection} -\alias{read_h5ad_collection} -\title{Read multiple H5AD datatypes} -\usage{ -read_h5ad_collection(file, name, column_order) -} -\arguments{ -\item{file}{Path to a H5AD file or an open H5AD handle} - -\item{name}{Name of the element within the H5AD file} - -\item{column_order}{Vector of item names (in order)} -} -\value{ -a named list -} -\description{ -Read multiple H5AD datatypes -} diff --git a/man/read_h5ad_data_frame.Rd b/man/read_h5ad_data_frame.Rd deleted file mode 100644 index 6d28cc06..00000000 --- a/man/read_h5ad_data_frame.Rd +++ /dev/null @@ -1,29 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/read_h5ad_helpers.R -\name{read_h5ad_data_frame} -\alias{read_h5ad_data_frame} -\title{Read H5AD data frame} -\usage{ -read_h5ad_data_frame(file, name, include_index = TRUE, version = "0.2.0") -} -\arguments{ -\item{file}{Path to a H5AD file or an open H5AD handle} - -\item{name}{Name of the element within the H5AD file} - -\item{include_index}{Whether or not to include the index as a column} - -\item{version}{Encoding version of the element to read} -} -\value{ -a data.frame -} -\description{ -Read a data frame from an H5AD file -} -\details{ -If \code{include_index == TRUE} the index stored in the HDF5 file is added as a -column to output \code{data.frame} using the defined index name as the column -name and this is set as an attribute. If \code{include_index == FALSE} the index -is not provided in the output. In either case row names are not set. -} diff --git a/man/read_h5ad_data_frame_index.Rd b/man/read_h5ad_data_frame_index.Rd deleted file mode 100644 index 8340197e..00000000 --- a/man/read_h5ad_data_frame_index.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/read_h5ad_helpers.R -\name{read_h5ad_data_frame_index} -\alias{read_h5ad_data_frame_index} -\title{Read H5AD data frame index} -\usage{ -read_h5ad_data_frame_index(file, name, version = "0.2.0") -} -\arguments{ -\item{file}{Path to a H5AD file or an open H5AD handle} - -\item{name}{Name of the element within the H5AD file} - -\item{version}{Encoding version of the element to read} -} -\value{ -an object containing the index -} -\description{ -Read the index of a data frame from an H5AD file -} diff --git a/man/read_h5ad_dense_array.Rd b/man/read_h5ad_dense_array.Rd deleted file mode 100644 index c9805d70..00000000 --- a/man/read_h5ad_dense_array.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/read_h5ad_helpers.R -\name{read_h5ad_dense_array} -\alias{read_h5ad_dense_array} -\title{Read H5AD dense array} -\usage{ -read_h5ad_dense_array(file, name, version = "0.2.0") -} -\arguments{ -\item{file}{Path to a H5AD file or an open H5AD handle} - -\item{name}{Name of the element within the H5AD file} - -\item{version}{Encoding version of the element to read} -} -\value{ -a matrix or a vector if 1D -} -\description{ -Read a dense array from an H5AD file -} diff --git a/man/read_h5ad_element.Rd b/man/read_h5ad_element.Rd deleted file mode 100644 index aca21c14..00000000 --- a/man/read_h5ad_element.Rd +++ /dev/null @@ -1,29 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/read_h5ad_helpers.R -\name{read_h5ad_element} -\alias{read_h5ad_element} -\title{Read H5AD element} -\usage{ -read_h5ad_element(file, name, type = NULL, version = NULL, ...) -} -\arguments{ -\item{file}{Path to a H5AD file or an open H5AD handle} - -\item{name}{Name of the element within the H5AD file} - -\item{type}{The encoding type of the element to read} - -\item{version}{The encoding version of the element to read} - -\item{...}{Extra arguments passed to individual reading functions} -} -\value{ -Value depending on the encoding -} -\description{ -Read an element from a H5AD file -} -\details{ -Encoding is automatically determined from the element using -\code{read_h5ad_encoding} and used to select the appropriate reading function. -} diff --git a/man/read_h5ad_encoding.Rd b/man/read_h5ad_encoding.Rd deleted file mode 100644 index 7f60b8ae..00000000 --- a/man/read_h5ad_encoding.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/read_h5ad_helpers.R -\name{read_h5ad_encoding} -\alias{read_h5ad_encoding} -\title{Read H5AD encoding} -\usage{ -read_h5ad_encoding(file, name) -} -\arguments{ -\item{file}{Path to a H5AD file or an open H5AD handle} - -\item{name}{Name of the element within the H5AD file} -} -\value{ -A named list with names type and version -} -\description{ -Read the encoding and version of an element in a H5AD file -} diff --git a/man/read_h5ad_mapping.Rd b/man/read_h5ad_mapping.Rd deleted file mode 100644 index ca0b5c9c..00000000 --- a/man/read_h5ad_mapping.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/read_h5ad_helpers.R -\name{read_h5ad_mapping} -\alias{read_h5ad_mapping} -\title{Read H5AD mapping} -\usage{ -read_h5ad_mapping(file, name, version = "0.1.0") -} -\arguments{ -\item{file}{Path to a H5AD file or an open H5AD handle} - -\item{name}{Name of the element within the H5AD file} - -\item{version}{Encoding version of the element to read} -} -\value{ -a named list -} -\description{ -Read a mapping from an H5AD file -} diff --git a/man/read_h5ad_nullable.Rd b/man/read_h5ad_nullable.Rd deleted file mode 100644 index 9632665a..00000000 --- a/man/read_h5ad_nullable.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/read_h5ad_helpers.R -\name{read_h5ad_nullable} -\alias{read_h5ad_nullable} -\title{Read H5AD nullable} -\usage{ -read_h5ad_nullable(file, name, version = "0.1.0") -} -\arguments{ -\item{file}{Path to a H5AD file or an open H5AD handle} - -\item{name}{Name of the element within the H5AD file} - -\item{version}{Encoding version of the element to read} -} -\value{ -a nullable vector -} -\description{ -Read a nullable vector (boolean or integer) from an H5AD file -} diff --git a/man/read_h5ad_nullable_integer.Rd b/man/read_h5ad_nullable_integer.Rd deleted file mode 100644 index 6a8aaacb..00000000 --- a/man/read_h5ad_nullable_integer.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/read_h5ad_helpers.R -\name{read_h5ad_nullable_integer} -\alias{read_h5ad_nullable_integer} -\title{Read H5AD nullable integer} -\usage{ -read_h5ad_nullable_integer(file, name, version = "0.1.0") -} -\arguments{ -\item{file}{Path to a H5AD file or an open H5AD handle} - -\item{name}{Name of the element within the H5AD file} - -\item{version}{Encoding version of the element to read} -} -\value{ -an integer vector -} -\description{ -Read a nullable integer from an H5AD file -} diff --git a/man/read_h5ad_numeric_scalar.Rd b/man/read_h5ad_numeric_scalar.Rd deleted file mode 100644 index b30d687c..00000000 --- a/man/read_h5ad_numeric_scalar.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/read_h5ad_helpers.R -\name{read_h5ad_numeric_scalar} -\alias{read_h5ad_numeric_scalar} -\title{Read H5AD numeric scalar} -\usage{ -read_h5ad_numeric_scalar(file, name, version = "0.2.0") -} -\arguments{ -\item{file}{Path to a H5AD file or an open H5AD handle} - -\item{name}{Name of the element within the H5AD file} - -\item{version}{Encoding version of the element to read} -} -\value{ -a numeric vector of length 1 -} -\description{ -Read a numeric scalar from an H5AD file -} diff --git a/man/read_h5ad_rec_array.Rd b/man/read_h5ad_rec_array.Rd deleted file mode 100644 index 12690dcf..00000000 --- a/man/read_h5ad_rec_array.Rd +++ /dev/null @@ -1,29 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/read_h5ad_helpers.R -\name{read_h5ad_rec_array} -\alias{read_h5ad_rec_array} -\title{Read H5AD recarray} -\usage{ -read_h5ad_rec_array(file, name, version = "0.2.0") -} -\arguments{ -\item{file}{Path to a H5AD file or an open H5AD handle} - -\item{name}{Name of the element within the H5AD file} - -\item{version}{Encoding version of the element to read} -} -\value{ -a named list of 1D arrays -} -\description{ -Read a recarray from an H5AD file -} -\details{ -A "record array" (recarray) is a Python NumPy array type that contains -"fields" that can be indexed using attributes (similar to columns in a -spreadsheet). See https://numpy.org/doc/stable/reference/generated/numpy.recarray.html -for details. - -They are used by \strong{scanpy} to score marker gene testing results. -} diff --git a/man/read_h5ad_sparse_array.Rd b/man/read_h5ad_sparse_array.Rd deleted file mode 100644 index 289ec386..00000000 --- a/man/read_h5ad_sparse_array.Rd +++ /dev/null @@ -1,28 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/read_h5ad_helpers.R -\name{read_h5ad_sparse_array} -\alias{read_h5ad_sparse_array} -\title{Read H5AD sparse array} -\usage{ -read_h5ad_sparse_array( - file, - name, - version = "0.1.0", - type = c("csr_matrix", "csc_matrix") -) -} -\arguments{ -\item{file}{Path to a H5AD file or an open H5AD handle} - -\item{name}{Name of the element within the H5AD file} - -\item{version}{Encoding version of the element to read} - -\item{type}{Type of the sparse matrix, either "csr_matrix" or "csc_matrix"} -} -\value{ -a sparse matrix/DelayedArray???, or a vector if 1D -} -\description{ -Read a sparse array from an H5AD file -} diff --git a/man/read_h5ad_string_array.Rd b/man/read_h5ad_string_array.Rd deleted file mode 100644 index 9b088137..00000000 --- a/man/read_h5ad_string_array.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/read_h5ad_helpers.R -\name{read_h5ad_string_array} -\alias{read_h5ad_string_array} -\title{Read H5AD string array} -\usage{ -read_h5ad_string_array(file, name, version = "0.2.0") -} -\arguments{ -\item{file}{Path to a H5AD file or an open H5AD handle} - -\item{name}{Name of the element within the H5AD file} - -\item{version}{Encoding version of the element to read} -} -\value{ -a character vector/matrix -} -\description{ -Read a string array from an H5AD file -} diff --git a/man/read_h5ad_string_scalar.Rd b/man/read_h5ad_string_scalar.Rd deleted file mode 100644 index 5fbb2131..00000000 --- a/man/read_h5ad_string_scalar.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/read_h5ad_helpers.R -\name{read_h5ad_string_scalar} -\alias{read_h5ad_string_scalar} -\title{Read H5AD string scalar} -\usage{ -read_h5ad_string_scalar(file, name, version = "0.2.0") -} -\arguments{ -\item{file}{Path to a H5AD file or an open H5AD handle} - -\item{name}{Name of the element within the H5AD file} - -\item{version}{Encoding version of the element to read} -} -\value{ -a character vector of length 1 -} -\description{ -Read a string scalar from an H5AD file -} diff --git a/man/write_h5ad.Rd b/man/write_h5ad.Rd index 2c47df18..f47d835c 100644 --- a/man/write_h5ad.Rd +++ b/man/write_h5ad.Rd @@ -39,6 +39,7 @@ if (requireNamespace("SingleCellExperiment", quietly = TRUE)) { ncells <- 100 counts <- matrix(rpois(20000, 5), ncol = ncells) logcounts <- log2(counts + 1) + # pca <- matrix(runif(ncells * 5), ncells) tsne <- matrix(rnorm(ncells * 2), ncells) From 1f54ebe7deb05557c231dc7597c0fd2a9c639dc1 Mon Sep 17 00:00:00 2001 From: Chananchida Sang-aram Date: Mon, 18 Sep 2023 16:30:47 +0200 Subject: [PATCH 11/17] Made print message like the Python version (#106) * made print message like AnnData * removed string wrapping * changed to unicode and ran styler * Fix unicode character in code --------- Co-authored-by: Robrecht Cannoodt --- R/AbstractAnnData.R | 29 ++++++++++++++++------------- R/utilities.R | 3 +-- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/R/AbstractAnnData.R b/R/AbstractAnnData.R index 53012a55..64aec4f2 100644 --- a/R/AbstractAnnData.R +++ b/R/AbstractAnnData.R @@ -57,20 +57,23 @@ AbstractAnnData <- R6::R6Class("AbstractAnnData", # nolint #' computationally expensive. #' @param ... Optional arguments to print method. print = function(...) { - x_info <- if (!is.null(self$X)) { - class(self$X)[[1]] - } else { - NULL + cat("AnnData object with n_obs \u00D7 n_vars = ", self$n_obs(), " \u00D7 ", self$n_vars(), "\n", sep = "") + + for (attribute in c( + "obs", + "var", + "uns", + "obsm", + "varm", + "layers", + "obsp", + "varp" + )) { + attr_key <- paste0(attribute, "_keys") + if (!is.null(self[[attr_key]])) { + cat(" ", pretty_print(attribute, self[[attr_key]]()), "\n", sep = "") + } } - cat( - "class: ", class(self)[[1]], "\n", - "dim: ", self$n_obs(), " obs x ", self$n_vars(), " var\n", - "X: ", x_info, "\n", - pretty_print("layers", self$layers_keys()), "\n", - pretty_print("obs", self$obs_keys()), "\n", - pretty_print("var", self$var_keys()), "\n", - sep = "" - ) }, #' @description Dimensions (observations x variables) of the AnnData object. diff --git a/R/utilities.R b/R/utilities.R index bed3c192..a6127126 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -1,6 +1,5 @@ pretty_print <- function(label, value) { - txt <- paste0(label, ": ", paste(value, collapse = " ")) - paste0(strwrap(txt, indent = 0, exdent = 2), collapse = "\n") + paste0(label, ": ", paste0("'", value, "'", collapse = ", ")) } wrap_message <- function(...) { From 2820ec2cfe24ee29b668dd287c064b502e271984 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Tue, 19 Sep 2023 06:37:07 +0200 Subject: [PATCH 12/17] also check whether ordered is null or not (#109) --- R/read_h5ad_helpers.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/read_h5ad_helpers.R b/R/read_h5ad_helpers.R index d27647e9..d1151363 100644 --- a/R/read_h5ad_helpers.R +++ b/R/read_h5ad_helpers.R @@ -297,7 +297,7 @@ read_h5ad_categorical <- function(file, name, version = "0.2.0") { attributes <- rhdf5::h5readAttributes(file, name) ordered <- attributes[["ordered"]] - if (is.na(ordered)) { + if (is.null(ordered) || is.na(ordered)) { # This version of {rhdf5} doesn't yet support ENUM type attributes so we # can't tell if the categorical should be ordered, # see https://github.com/grimbough/rhdf5/issues/125 From e2cc818938c4fd567ecfbffff03201ecfb1a01ba Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Tue, 19 Sep 2023 07:04:53 +0200 Subject: [PATCH 13/17] update hdf5 status (#110) * update hdf5 status * update testing status --- R/HDF5AnnData.R | 25 ++++++++++++------------- tests/testthat/test-HDF5AnnData.R | 24 ++++++++++++------------ 2 files changed, 24 insertions(+), 25 deletions(-) diff --git a/R/HDF5AnnData.R b/R/HDF5AnnData.R index 89d4734d..b34bde7b 100644 --- a/R/HDF5AnnData.R +++ b/R/HDF5AnnData.R @@ -15,10 +15,10 @@ HDF5AnnData <- R6::R6Class("HDF5AnnData", # nolint #' @field X The X slot X = function(value) { if (missing(value)) { - # trackstatus: class=HDF5AnnData, feature=get_X, status=wip + # trackstatus: class=HDF5AnnData, feature=get_X, status=done read_h5ad_element(private$.h5obj, "/X") } else { - # trackstatus: class=HDF5AnnData, feature=set_X, status=wip + # trackstatus: class=HDF5AnnData, feature=set_X, status=done value <- private$.validate_matrix(value, "X") write_h5ad_element(value, private$.h5obj, "/X") } @@ -28,10 +28,10 @@ HDF5AnnData <- R6::R6Class("HDF5AnnData", # nolint #' `obs` and `var`. layers = function(value) { if (missing(value)) { - # trackstatus: class=HDF5AnnData, feature=get_layers, status=wip + # trackstatus: class=HDF5AnnData, feature=get_layers, status=done read_h5ad_element(private$.h5obj, "layers") } else { - # trackstatus: class=HDF5AnnData, feature=set_layers, status=wip + # trackstatus: class=HDF5AnnData, feature=set_layers, status=done value <- private$.validate_layers(value) write_h5ad_element(value, private$.h5obj, "/layers") } @@ -39,10 +39,10 @@ HDF5AnnData <- R6::R6Class("HDF5AnnData", # nolint #' @field obs The obs slot obs = function(value) { if (missing(value)) { - # trackstatus: class=HDF5AnnData, feature=get_obs, status=wip + # trackstatus: class=HDF5AnnData, feature=get_obs, status=done read_h5ad_element(private$.h5obj, "/obs", include_index = FALSE) } else { - # trackstatus: class=HDF5AnnData, feature=set_obs, status=wip + # trackstatus: class=HDF5AnnData, feature=set_obs, status=done value <- private$.validate_obsvar_dataframe(value, "obs") write_h5ad_element( value, @@ -55,10 +55,10 @@ HDF5AnnData <- R6::R6Class("HDF5AnnData", # nolint #' @field var The var slot var = function(value) { if (missing(value)) { - # trackstatus: class=HDF5AnnData, feature=get_var, status=wip + # trackstatus: class=HDF5AnnData, feature=get_var, status=done read_h5ad_element(private$.h5obj, "/var", include_index = FALSE) } else { - # trackstatus: class=HDF5AnnData, feature=set_var, status=wip + # trackstatus: class=HDF5AnnData, feature=set_var, status=done value <- private$.validate_obsvar_dataframe(value, "var") write_h5ad_element( value, @@ -70,9 +70,8 @@ HDF5AnnData <- R6::R6Class("HDF5AnnData", # nolint }, #' @field obs_names Names of observations obs_names = function(value) { - # TODO: directly write to and read from /obs/_index if (missing(value)) { - # trackstatus: class=HDF5AnnData, feature=get_obs_names, status=wip + # trackstatus: class=HDF5AnnData, feature=get_obs_names, status=done # obs names are cached to avoid reading all of obs whenever they are # accessed if (is.null(private$.obs_names)) { @@ -80,7 +79,7 @@ HDF5AnnData <- R6::R6Class("HDF5AnnData", # nolint } private$.obs_names } else { - # trackstatus: class=HDF5AnnData, feature=set_obs_names, status=wip + # trackstatus: class=HDF5AnnData, feature=set_obs_names, status=done value <- private$.validate_obsvar_names(value, "obs") write_h5ad_data_frame_index(value, private$.h5obj, "obs", "_index") private$.obs_names <- value @@ -90,7 +89,7 @@ HDF5AnnData <- R6::R6Class("HDF5AnnData", # nolint var_names = function(value) { # TODO: directly write to and read from /var/_index if (missing(value)) { - # trackstatus: class=HDF5AnnData, feature=get_var_names, status=wip + # trackstatus: class=HDF5AnnData, feature=get_var_names, status=done # var names are cached to avoid reading all of var whenever they are # accessed if (is.null(private$.var_names)) { @@ -98,7 +97,7 @@ HDF5AnnData <- R6::R6Class("HDF5AnnData", # nolint } private$.var_names } else { - # trackstatus: class=HDF5AnnData, feature=set_var_names, status=wip + # trackstatus: class=HDF5AnnData, feature=set_var_names, status=done value <- private$.validate_obsvar_names(value, "var") write_h5ad_data_frame_index(value, private$.h5obj, "var", "_index") private$.var_names <- value diff --git a/tests/testthat/test-HDF5AnnData.R b/tests/testthat/test-HDF5AnnData.R index 1fadfc58..240dd2f6 100644 --- a/tests/testthat/test-HDF5AnnData.R +++ b/tests/testthat/test-HDF5AnnData.R @@ -10,14 +10,14 @@ test_that("opening H5AD works", { adata <- HDF5AnnData$new(file) # GETTERS ---------------------------------------------------------------- -# trackstatus: class=HDF5AnnData, feature=test_get_X, status=wip +# trackstatus: class=HDF5AnnData, feature=test_get_X, status=done test_that("reading X works", { X <- adata$X expect_s4_class(X, "dgRMatrix") expect_equal(dim(X), c(50, 100)) }) -# trackstatus: class=HDF5AnnData, feature=test_get_layers, status=wip +# trackstatus: class=HDF5AnnData, feature=test_get_layers, status=done test_that("reading layers works", { layers <- adata$layers expect_true(is.list(layers), "list") @@ -27,7 +27,7 @@ test_that("reading layers works", { ) }) -# trackstatus: class=HDF5AnnData, feature=test_get_obs, status=wip +# trackstatus: class=HDF5AnnData, feature=test_get_obs, status=done test_that("reading obs works", { obs <- adata$obs expect_s3_class(obs, "data.frame") @@ -40,7 +40,7 @@ test_that("reading obs works", { ) }) -# trackstatus: class=HDF5AnnData, feature=test_get_var, status=wip +# trackstatus: class=HDF5AnnData, feature=test_get_var, status=done test_that("reading var works", { var <- adata$var expect_s3_class(var, "data.frame") @@ -54,13 +54,13 @@ test_that("reading var works", { ) }) -# trackstatus: class=HDF5AnnData, feature=test_get_obs_names, status=wip +# trackstatus: class=HDF5AnnData, feature=test_get_obs_names, status=done test_that("reading obs names works", { obs_names <- adata$obs_names expect_vector(obs_names, ptype = character(), size = 50) }) -# trackstatus: class=HDF5AnnData, feature=test_get_var_names, status=wip +# trackstatus: class=HDF5AnnData, feature=test_get_var_names, status=done test_that("reading var names works", { var_names <- adata$var_names expect_vector(var_names, ptype = character(), size = 100) @@ -72,7 +72,7 @@ test_that("creating empty H5AD works", { expect_silent(HDF5AnnData$new(h5ad_file, obs_names = 1:10, var_names = 1:20)) }) -# trackstatus: class=HDF5AnnData, feature=test_set_X, status=wip +# trackstatus: class=HDF5AnnData, feature=test_set_X, status=done test_that("writing X works", { h5ad_file <- withr::local_tempfile(fileext = ".h5ad") h5ad <- HDF5AnnData$new(h5ad_file, obs_names = 1:10, var_names = 1:20) @@ -81,7 +81,7 @@ test_that("writing X works", { expect_silent(h5ad$X <- X) }) -# trackstatus: class=HDF5AnnData, feature=test_set_layers, status=wip +# trackstatus: class=HDF5AnnData, feature=test_set_layers, status=done test_that("writing layers works", { h5ad_file <- withr::local_tempfile(fileext = ".h5ad") h5ad <- HDF5AnnData$new(h5ad_file, obs_names = 1:10, var_names = 1:20) @@ -90,7 +90,7 @@ test_that("writing layers works", { expect_silent(h5ad$layers <- list(layer1 = X, layer2 = X)) }) -# trackstatus: class=HDF5AnnData, feature=test_set_obs, status=wip +# trackstatus: class=HDF5AnnData, feature=test_set_obs, status=done test_that("writing obs works", { h5ad_file <- withr::local_tempfile(fileext = ".h5ad") h5ad <- HDF5AnnData$new(h5ad_file, obs_names = 1:10, var_names = 1:20) @@ -104,7 +104,7 @@ test_that("writing obs works", { expect_identical(h5ad$obs_names, 1:10) }) -# trackstatus: class=HDF5AnnData, feature=test_set_var, status=wip +# trackstatus: class=HDF5AnnData, feature=test_set_var, status=done test_that("writing var works", { h5ad_file <- withr::local_tempfile(fileext = ".h5ad") h5ad <- HDF5AnnData$new(h5ad_file, obs_names = 1:10, var_names = 1:20) @@ -118,7 +118,7 @@ test_that("writing var works", { expect_identical(h5ad$var_names, 1:20) }) -# trackstatus: class=HDF5AnnData, feature=test_set_obs_names, status=wip +# trackstatus: class=HDF5AnnData, feature=test_set_obs_names, status=done test_that("writing obs names works", { h5ad_file <- withr::local_tempfile(fileext = ".h5ad") h5ad <- HDF5AnnData$new(h5ad_file, obs_names = 1:10, var_names = 1:20) @@ -127,7 +127,7 @@ test_that("writing obs names works", { expect_identical(h5ad$obs_names, LETTERS[1:10]) }) -# trackstatus: class=HDF5AnnData, feature=test_set_var_names, status=wip +# trackstatus: class=HDF5AnnData, feature=test_set_var_names, status=done test_that("writing var names works", { h5ad_file <- withr::local_tempfile(fileext = ".h5ad") h5ad <- HDF5AnnData$new(h5ad_file, obs_names = 1:10, var_names = 1:20) From 5964c5cd258c141bda4b3ceb75ee7524d823bae5 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Tue, 19 Sep 2023 09:47:39 +0200 Subject: [PATCH 14/17] Update docs (#111) * update docs * move class diagram to vignette * remove doc folder to #54 * don't include design doc in built package --- .Rbuildignore | 5 +- README.md | 55 +++++---- README.qmd | 29 +++-- doc/challenges.md | 14 --- doc/design.md | 148 ------------------------ doc/design.qmd | 149 ------------------------- vignettes/{features.Rmd => design.Rmd} | 38 ++++++- vignettes/diagrams/class_diagram.mmd | 51 +++++++++ vignettes/diagrams/class_diagram.svg | 1 + vignettes/diagrams/script.sh | 8 ++ 10 files changed, 151 insertions(+), 347 deletions(-) delete mode 100644 doc/challenges.md delete mode 100644 doc/design.md delete mode 100644 doc/design.qmd rename vignettes/{features.Rmd => design.Rmd} (65%) create mode 100644 vignettes/diagrams/class_diagram.mmd create mode 100644 vignettes/diagrams/class_diagram.svg create mode 100755 vignettes/diagrams/script.sh diff --git a/.Rbuildignore b/.Rbuildignore index fa7ae45a..e7831676 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,5 +1,4 @@ ^LICENSE\.md$ -^doc$ ^.*\.Rproj$ ^\.Rproj\.user$ ^\.github$ @@ -10,4 +9,6 @@ ^_pkgdown\.yml$ ^docs$ ^pkgdown$ -^vignettes/features.Rmd$ \ No newline at end of file +^vignettes/diagrams/*\.mmd$ +^vignettes/diagrams/*\.svg$ +^vignettes/design\.Rmd$ \ No newline at end of file diff --git a/README.md b/README.md index 29d31163..f86be1d4 100644 --- a/README.md +++ b/README.md @@ -9,22 +9,18 @@ experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](h status](https://www.r-pkg.org/badges/version/anndataR.png)](https://CRAN.R-project.org/package=anndataR) -`{anndataR}` is an R package that brings the power and flexibility of -AnnData to the R ecosystem, allowing you to effortlessly manipulate and -analyze your single-cell data. This package lets you work with backed -h5ad and zarr files, directly access various slots (e.g. X, obs, var, -obsm, obsp), or convert the data into SingleCellExperiment and Seurat -objects. +`{anndataR}` aims to make the AnnData format a first-class citizen in +the R ecosystem, and to make it easy to work with AnnData files in R, +either directly or by converting it to a SingleCellExperiment or Seurat +object. -## Design +Feature list: -This package was initially created at the [scverse 2023-04 -hackathon](https://scverse.org/events/2023_04_hackathon/) in Heidelberg. - -When fully implemented, it will be a complete replacement for -[theislab/zellkonverter](https://github.com/theislab/zellkonverter), -[mtmorgan/h5ad](github.com/mtmorgan/h5ad/) and -[dynverse/anndata](https://github.com/dynverse/anndata). +- Provide an `R6` class to work with AnnData objects in R (either + in-memory or on-disk). +- Read/write `*.h5ad` files natively +- Convert to/from `SingleCellExperiment` objects +- Convert to/from `Seurat` objects ## Installation @@ -34,6 +30,24 @@ You can install the development version of `{anndataR}` like so: devtools::install_github("scverse/anndataR") ``` +You might need to install suggested dependencies manually, depending on +the task you want to perform. + +- To read/write `*.h5ad` files, you need to install `{rhdf5}`: + `BiocManager::install("rhdf5")` +- To convert to/from `SingleCellExperiment` objects, you need to install + `{SingleCellExperiment}`: + `BiocManager::install("SingleCellExperiment")` +- To convert to/from `Seurat` objects, you need to install + `{SeuratObject}`: `install.packages("SeuratObject")` + +You can also install all suggested dependencies at once (though note +that this might take a while to run): + +``` r +devtools::install_github("scverse/anndataR", dependencies = TRUE) +``` + ## Example Here’s a quick example of how to use `{anndataR}`. First, we download an @@ -55,15 +69,10 @@ View structure: ``` r adata -#> class: InMemoryAnnData -#> dim: 50 obs x 100 var -#> X: dgRMatrix -#> layers: counts csc_counts dense_X dense_counts -#> obs: Float FloatNA Int IntNA Bool BoolNA n_genes_by_counts -#> log1p_n_genes_by_counts total_counts log1p_total_counts leiden -#> var: String n_cells_by_counts mean_counts log1p_mean_counts -#> pct_dropout_by_counts total_counts log1p_total_counts highly_variable -#> means dispersions dispersions_norm +#> AnnData object with n_obs × n_vars = 50 × 100 +#> obs: 'Float', 'FloatNA', 'Int', 'IntNA', 'Bool', 'BoolNA', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'leiden' +#> var: 'String', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm' +#> layers: 'counts', 'csc_counts', 'dense_X', 'dense_counts' ``` Access AnnData slots: diff --git a/README.qmd b/README.qmd index 6c8f3e72..8337507a 100644 --- a/README.qmd +++ b/README.qmd @@ -27,17 +27,15 @@ knitr::opts_chunk$set( -`{anndataR}` is an R package that brings the power and flexibility of AnnData to the -R ecosystem, allowing you to effortlessly manipulate and analyze your single-cell data. -This package lets you work with backed h5ad and zarr files, directly access various slots -(e.g. X, obs, var, obsm, obsp), or convert the data into SingleCellExperiment and Seurat -objects. +`{anndataR}` aims to make the AnnData format a first-class citizen in the R ecosystem, and to make it easy to work with AnnData files in R, either directly +or by converting it to a SingleCellExperiment or Seurat object. -## Design +Feature list: -This package was initially created at the [scverse 2023-04 hackathon](https://scverse.org/events/2023_04_hackathon/) in Heidelberg. - -When fully implemented, it will be a complete replacement for [theislab/zellkonverter](https://github.com/theislab/zellkonverter), [mtmorgan/h5ad](github.com/mtmorgan/h5ad/) and [dynverse/anndata](https://github.com/dynverse/anndata). +* Provide an `R6` class to work with AnnData objects in R (either in-memory or on-disk). +* Read/write `*.h5ad` files natively +* Convert to/from `SingleCellExperiment` objects +* Convert to/from `Seurat` objects ## Installation @@ -48,6 +46,18 @@ You can install the development version of `{anndataR}` like so: devtools::install_github("scverse/anndataR") ``` +You might need to install suggested dependencies manually, depending on the task you want to perform. + +* To read/write `*.h5ad` files, you need to install `{rhdf5}`: `BiocManager::install("rhdf5")` +* To convert to/from `SingleCellExperiment` objects, you need to install `{SingleCellExperiment}`: `BiocManager::install("SingleCellExperiment")` +* To convert to/from `Seurat` objects, you need to install `{SeuratObject}`: `install.packages("SeuratObject")` + +You can also install all suggested dependencies at once (though note that this might take a while to run): + +``` r +devtools::install_github("scverse/anndataR", dependencies = TRUE) +``` + ## Example Here's a quick example of how to use `{anndataR}`. First, we download an h5ad file. @@ -94,3 +104,4 @@ Convert the AnnData object to a Seurat object: obj <- adata$to_Seurat() obj ``` + diff --git a/doc/challenges.md b/doc/challenges.md deleted file mode 100644 index a7abfa92..00000000 --- a/doc/challenges.md +++ /dev/null @@ -1,14 +0,0 @@ -# Challenges - -## Previously encountered issues - -Below are previously encountered issues when reading h5ad files using hdf5r. They could be -to create test cases. - -* [mojaveazure/seurat-disk#10](https://github.com/mojaveazure/seurat-disk/issues/10): Conversion error with SeuratDisk when copying `uns` -* [PMBio/MuDataSeurat#8](https://github.com/PMBio/MuDataSeurat/issues/8): PCA loadings issue - - -No test data yet: - -* [PMBio/MuDataSeurat#14](https://github.com/PMBio/MuDataSeurat/issues/14): H5Dvlen_reclaim invalid argument \ No newline at end of file diff --git a/doc/design.md b/doc/design.md deleted file mode 100644 index fe4c02e5..00000000 --- a/doc/design.md +++ /dev/null @@ -1,148 +0,0 @@ -# Design document - -## Proposed interface - -``` r -library(anndataR) - -# read from h5ad/h5mu file -adata <- read_h5ad("dataset.h5ad") -adata <- read_h5ad("dataset.h5ad", backed = TRUE) -mdata <- read_h5mu("dataset.h5mu") -mdata <- read_h5mu("dataset.h5mu", backed = TRUE) - -# anndata-like interface (the Python package) -adata$X -adata$obs -adata$var - -# optional feature 1: S3 helper functions for a base R-like interface -adata[1:10, 2:30] -dim(adata) -dimnames(adata) -as.matrix(adata, layer = NULL) -as.matrix(adata, layer = "counts") -t(adata) - -# optional feature 2: S3 helper functions for a bioconductor-like interface -rowData(adata) -colData(adata) -reducedDimNames(adata) - -# converters from/to sce -sce <- adata$to_sce() -from_sce(sce) - -# optional feature 3: converters from/to Seurat -seu <- adata$to_seurat() -from_seurat(seu) - -# optional feature 4: converters from/to SOMA -som <- adata$to_soma() -from_soma(som) -``` - -## Class diagram - -``` mermaid -classDiagram - class AbstractAnnData { - *X: Matrix - *layers: List[Matrix] - *obs: DataFrame - *var: DataFrame - *obsp: List[Matrix] - *varp: List[Matrix] - *obsm: List[Matrix] - *varm: List[Matrix] - *uns: List - *n_obs: int - *n_vars: int - *obs_names: Array[String] - *var_names: Array[String] - *subset(...): AbstractAnnData - *write_h5ad(): Unit - - to_sce(): SingleCellExperiment - to_seurat(): Seurat - - to_h5anndata(): H5AnnData - to_zarranndata(): ZarrAnnData - to_inmemory(): InMemoryAnnData - } - - AbstractAnnData <|-- H5AnnData - class H5AnnData { - init(h5file): H5AnnData - } - - AbstractAnnData <|-- ZarrAnnData - class ZarrAnnData { - init(zarrFile): ZarrAnnData - } - - AbstractAnnData <|-- InMemoryAnnData - class InMemoryAnnData { - init(X, obs, var, shape, ...): InMemoryAnnData - } - - AbstractAnnData <|-- ReticulateAnnData - class ReticulateAnnData { - init(pyobj): ReticulateAnnData - } - - class anndataR { - read_h5ad(path, backend): AbstractAnnData - read_h5mu(path, backend): AbstractMuData - } - anndataR --> AbstractAnnData -``` - -Notation: - -- `X: Matrix` - variable `X` is of type `Matrix` -- `*X: Matrix` - variable `X` is abstract -- `to_sce(): SingleCellExperiment` - function `to_sce` returns object of - type `SingleCellExperiment` -- `*to_sce()` - function `to_sce` is abstract - -## OO-framework - -S4, RC, or R6? - -- S4 offers formal class definitions and multiple dispatch, making it - suitable for complex projects, but may be verbose and slower compared - to other systems. -- RC provides reference semantics, familiar syntax, and encapsulation, - yet it is less popular and can have performance issues. -- R6 presents a simple and efficient OOP system with reference semantics - and growing popularity, but lacks multiple dispatch and the formality - of S4. - -Choosing an OOP system depends on the project requirements, developer -familiarity, and desired balance between formality, performance, and -ease of use. - -## Approach - -- Implement inheritance objects for `AbstractAnnData`, `H5AnnData`, - `InMemoryAnnData` -- Only containing `X`, `obs`, `var` for now -- Implement base R S3 generics -- Implement `read_h5ad()`, `$write_h5ad()` -- Implement `$to_sce()` -- Add simple unit tests - -Optional: - -- Add more fields (obsp, obsm, varp, varm, …) –\> see class diagram -- Start implementing MuData -- Implement `$to_seurat()` -- Implement `ZarrAnnData` -- Implement `ReticulateAnnData` -- Implement Bioconductor S3 generics - -## Conclusion - -- Scope and therefore the name -- What we do after this diff --git a/doc/design.qmd b/doc/design.qmd deleted file mode 100644 index f719c182..00000000 --- a/doc/design.qmd +++ /dev/null @@ -1,149 +0,0 @@ ---- -title: Design document -format: gfm ---- - -:::{.content-hidden} -Rendered using: -``` -quarto render doc/design.qmd; sed -i 's#<#<#g;s#>#>#g' doc/design.md -``` -::: - -## Proposed interface - -```r -library(anndataR) - -# read from h5ad/h5mu file -adata <- read_h5ad("dataset.h5ad") -adata <- read_h5ad("dataset.h5ad", backed = TRUE) -mdata <- read_h5mu("dataset.h5mu") -mdata <- read_h5mu("dataset.h5mu", backed = TRUE) - -# anndata-like interface (the Python package) -adata$X -adata$obs -adata$var - -# optional feature 1: S3 helper functions for a base R-like interface -adata[1:10, 2:30] -dim(adata) -dimnames(adata) -as.matrix(adata, layer = NULL) -as.matrix(adata, layer = "counts") -t(adata) - -# optional feature 2: S3 helper functions for a bioconductor-like interface -rowData(adata) -colData(adata) -reducedDimNames(adata) - -# converters from/to sce -sce <- adata$to_sce() -from_sce(sce) - -# optional feature 3: converters from/to Seurat -seu <- adata$to_seurat() -from_seurat(seu) - -# optional feature 4: converters from/to SOMA -som <- adata$to_soma() -from_soma(som) -``` - -## Class diagram - -```{mermaid} -classDiagram - class AbstractAnnData { - *X: Matrix - *layers: List[Matrix] - *obs: DataFrame - *var: DataFrame - *obsp: List[Matrix] - *varp: List[Matrix] - *obsm: List[Matrix] - *varm: List[Matrix] - *uns: List - *n_obs: int - *n_vars: int - *obs_names: Array[String] - *var_names: Array[String] - *subset(...): AbstractAnnData - *write_h5ad(): Unit - - to_sce(): SingleCellExperiment - to_seurat(): Seurat - - to_h5anndata(): H5AnnData - to_zarranndata(): ZarrAnnData - to_inmemory(): InMemoryAnnData - } - - AbstractAnnData <|-- H5AnnData - class H5AnnData { - init(h5file): H5AnnData - } - - AbstractAnnData <|-- ZarrAnnData - class ZarrAnnData { - init(zarrFile): ZarrAnnData - } - - AbstractAnnData <|-- InMemoryAnnData - class InMemoryAnnData { - init(X, obs, var, shape, ...): InMemoryAnnData - } - - AbstractAnnData <|-- ReticulateAnnData - class ReticulateAnnData { - init(pyobj): ReticulateAnnData - } - - class anndataR { - read_h5ad(path, backend): AbstractAnnData - read_h5mu(path, backend): AbstractMuData - } - anndataR --> AbstractAnnData -``` - -Notation: - - - `X: Matrix` - variable `X` is of type `Matrix` - - `*X: Matrix` - variable `X` is abstract - - `to_sce(): SingleCellExperiment` - function `to_sce` returns object of type `SingleCellExperiment` - - `*to_sce()` - function `to_sce` is abstract - -## OO-framework - -S4, RC, or R6? - -- S4 offers formal class definitions and multiple dispatch, making it suitable for complex projects, but may be verbose and slower compared to other systems. -- RC provides reference semantics, familiar syntax, and encapsulation, yet it is less popular and can have performance issues. -- R6 presents a simple and efficient OOP system with reference semantics and growing popularity, but lacks multiple dispatch and the formality of S4. - -Choosing an OOP system depends on the project requirements, developer familiarity, and desired balance between formality, performance, and ease of use. - -## Approach - -* Implement inheritance objects for `AbstractAnnData`, `H5AnnData`, `InMemoryAnnData` -* Only containing `X`, `obs`, `var` for now -* Implement base R S3 generics -* Implement `read_h5ad()`, `$write_h5ad()` -* Implement `$to_sce()` -* Add simple unit tests - -Optional: - -* Add more fields (obsp, obsm, varp, varm, ...) --> see class diagram -* Start implementing MuData -* Implement `$to_seurat()` -* Implement `ZarrAnnData` -* Implement `ReticulateAnnData` -* Implement Bioconductor S3 generics - -## Conclusion - -* Scope and therefore the name -* What we do after this diff --git a/vignettes/features.Rmd b/vignettes/design.Rmd similarity index 65% rename from vignettes/features.Rmd rename to vignettes/design.Rmd index e7d55f5c..05484031 100644 --- a/vignettes/features.Rmd +++ b/vignettes/design.Rmd @@ -1,8 +1,8 @@ --- -title: "Features" +title: "Design" output: rmarkdown::html_vignette vignette: > - %\VignetteIndexEntry{Features} + %\VignetteIndexEntry{Design} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- @@ -15,6 +15,40 @@ knitr::opts_chunk$set( ) ``` + +`{anndataR}` is designed to offer the combined functionality of the following packages: + +* [theislab/zellkonverter](https://github.com/theislab/zellkonverter): Convert AnnData files to/from `SingleCellExperiment` objects. +* [mtmorgan/h5ad](https://github.com/mtmorgan/h5ad/): Read/write `*.h5ad` files natively using `rhdf5`. +* [dynverse/anndata](https://github.com/dynverse/anndata): An R implementation of the AnnData data structures, uses `reticulate` to read/write `*.h5ad` files. + +Ideally, this package will be a complete replacement for all of these packages, and will be the go-to package for working with AnnData files in R. + +## Desired feature list + +* Provide an `R6` class to work with AnnData objects in R (either in-memory or on-disk). +* Read/write `*.h5ad` files natively +* Convert to/from `SingleCellExperiment` objects +* Convert to/from `Seurat` objects + +## Class diagram + +Here is a diagram of the main R6 classes provided by the package: + +![](diagrams/class_diagram.svg) + +Notation: + + - `X: Matrix` - variable `X` is of type `Matrix` + - `*X: Matrix` - variable `X` is abstract + - `to_SingleCellExperiment(): SingleCellExperiment` - function `to_SingleCellExperiment` returns object of type `SingleCellExperiment` + - `*to_SingleCellExperiment()` - function `to_SingleCellExperiment` is abstract + + +## Feature tracking + +The following tables show the status of the implementation of each feature in the package: + ```{r include=FALSE} library(tibble) library(knitr) diff --git a/vignettes/diagrams/class_diagram.mmd b/vignettes/diagrams/class_diagram.mmd new file mode 100644 index 00000000..daa95b5c --- /dev/null +++ b/vignettes/diagrams/class_diagram.mmd @@ -0,0 +1,51 @@ + +classDiagram + class AbstractAnnData { + *X: Matrix + *layers: List[Matrix] + *obs: DataFrame + *var: DataFrame + *obsp: List[Matrix] + *varp: List[Matrix] + *obsm: List[Matrix] + *varm: List[Matrix] + *uns: List + *n_obs: int + *n_vars: int + *obs_names: Array[String] + *var_names: Array[String] + *subset(...): AbstractAnnData + *write_h5ad(): Unit + + to_SingleCellExperiment(): SingleCellExperiment + to_Seurat(): Seurat + + to_HDF5AnnData(): HDF5AnnData + to_ZarrAnnData(): ZarrAnnData + to_InMemoryAnnData(): InMemoryAnnData + } + + AbstractAnnData <|-- HDF5AnnData + class HDF5AnnData { + init(h5file): HDF5AnnData + } + + AbstractAnnData <|-- ZarrAnnData + class ZarrAnnData { + init(zarrFile): ZarrAnnData + } + + AbstractAnnData <|-- InMemoryAnnData + class InMemoryAnnData { + init(X, obs, var, shape, ...): InMemoryAnnData + } + + AbstractAnnData <|-- ReticulateAnnData + class ReticulateAnnData { + init(pyobj): ReticulateAnnData + } + + class anndataR { + read_h5ad(path, backend): Either[AbstractAnnData, SingleCellExperiment, Seurat] + } + anndataR --> AbstractAnnData \ No newline at end of file diff --git a/vignettes/diagrams/class_diagram.svg b/vignettes/diagrams/class_diagram.svg new file mode 100644 index 00000000..93200725 --- /dev/null +++ b/vignettes/diagrams/class_diagram.svg @@ -0,0 +1 @@ +
AbstractAnnData
*X: Matrix
*layers: List[Matrix]
*obs: DataFrame
*var: DataFrame
*obsp: List[Matrix]
*varp: List[Matrix]
*obsm: List[Matrix]
*varm: List[Matrix]
*uns: List
*n_obs: int
*n_vars: int
*obs_names: Array[String]
*var_names: Array[String]
*subset(...) : AbstractAnnData
*write_h5ad() : Unit
to_SingleCellExperiment() : SingleCellExperiment
to_Seurat() : Seurat
to_HDF5AnnData() : HDF5AnnData
to_ZarrAnnData() : ZarrAnnData
to_InMemoryAnnData() : InMemoryAnnData
HDF5AnnData
init(h5file) : HDF5AnnData
ZarrAnnData
init(zarrFile) : ZarrAnnData
InMemoryAnnData
init(X, obs, var, shape, ...) : InMemoryAnnData
ReticulateAnnData
init(pyobj) : ReticulateAnnData
anndataR
read_h5ad(path, backend) : Either[AbstractAnnData, SingleCellExperiment, Seurat]
\ No newline at end of file diff --git a/vignettes/diagrams/script.sh b/vignettes/diagrams/script.sh new file mode 100755 index 00000000..c28400fb --- /dev/null +++ b/vignettes/diagrams/script.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +# Convert mermaid diagrams to different formats +# because RMarkdown doesn't support mermaid diagrams + +docker run --rm -w /pwd -u `id -u`:`id -g` \ + -v `pwd`:/pwd minlag/mermaid-cli \ + -i vignettes/diagrams/class_diagram.mmd -o vignettes/diagrams/class_diagram.svg From 3f0b91a5d5d81be8eb4232a7fa1ab5f03e8f930f Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Tue, 19 Sep 2023 10:33:43 +0200 Subject: [PATCH 15/17] update readme (#114) * update readme * update with styler --- README.md | 21 ++++++++++++++------- README.qmd | 9 ++++++--- vignettes/getting-started.Rmd | 1 - 3 files changed, 20 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index f86be1d4..a3b81c71 100644 --- a/README.md +++ b/README.md @@ -33,13 +33,15 @@ devtools::install_github("scverse/anndataR") You might need to install suggested dependencies manually, depending on the task you want to perform. -- To read/write `*.h5ad` files, you need to install `{rhdf5}`: +- To read/write \*.h5ad files, you need to install + [rhdf5](https://bioconductor.org/packages/release/bioc/html/rhdf5.html): `BiocManager::install("rhdf5")` - To convert to/from `SingleCellExperiment` objects, you need to install - `{SingleCellExperiment}`: + [SingleCellExperiment](https://bioconductor.org/packages/release/bioc/html/SingleCellExperiment.html): `BiocManager::install("SingleCellExperiment")` - To convert to/from `Seurat` objects, you need to install - `{SeuratObject}`: `install.packages("SeuratObject")` + [SeuratObject](https://cran.r-project.org/package=SeuratObject): + `install.packages("SeuratObject")` You can also install all suggested dependencies at once (though note that this might take a while to run): @@ -69,10 +71,15 @@ View structure: ``` r adata -#> AnnData object with n_obs × n_vars = 50 × 100 -#> obs: 'Float', 'FloatNA', 'Int', 'IntNA', 'Bool', 'BoolNA', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'leiden' -#> var: 'String', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm' -#> layers: 'counts', 'csc_counts', 'dense_X', 'dense_counts' +#> class: InMemoryAnnData +#> dim: 50 obs x 100 var +#> X: dgRMatrix +#> layers: counts csc_counts dense_X dense_counts +#> obs: Float FloatNA Int IntNA Bool BoolNA n_genes_by_counts +#> log1p_n_genes_by_counts total_counts log1p_total_counts leiden +#> var: String n_cells_by_counts mean_counts log1p_mean_counts +#> pct_dropout_by_counts total_counts log1p_total_counts highly_variable +#> means dispersions dispersions_norm ``` Access AnnData slots: diff --git a/README.qmd b/README.qmd index 8337507a..507cf9bb 100644 --- a/README.qmd +++ b/README.qmd @@ -48,9 +48,12 @@ devtools::install_github("scverse/anndataR") You might need to install suggested dependencies manually, depending on the task you want to perform. -* To read/write `*.h5ad` files, you need to install `{rhdf5}`: `BiocManager::install("rhdf5")` -* To convert to/from `SingleCellExperiment` objects, you need to install `{SingleCellExperiment}`: `BiocManager::install("SingleCellExperiment")` -* To convert to/from `Seurat` objects, you need to install `{SeuratObject}`: `install.packages("SeuratObject")` +* To read/write *.h5ad files, you need to install [rhdf5](https://bioconductor.org/packages/release/bioc/html/rhdf5.html): + `BiocManager::install("rhdf5")` +* To convert to/from `SingleCellExperiment` objects, you need to install [SingleCellExperiment](https://bioconductor.org/packages/release/bioc/html/SingleCellExperiment.html): + `BiocManager::install("SingleCellExperiment")` +* To convert to/from `Seurat` objects, you need to install [SeuratObject](https://cran.r-project.org/package=SeuratObject): + `install.packages("SeuratObject")` You can also install all suggested dependencies at once (though note that this might take a while to run): diff --git a/vignettes/getting-started.Rmd b/vignettes/getting-started.Rmd index b7af650d..f35a3289 100644 --- a/vignettes/getting-started.Rmd +++ b/vignettes/getting-started.Rmd @@ -52,7 +52,6 @@ var <- adata$var The following example details how to construct an InMemoryAnnData and access its contents. ```{r inmemory_construct} - adata <- AnnData( X = matrix(1:15, 3L, 5L), layers = list( From 2aa08dad0924e34b0b5aab95dd71db453cc9baec Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Tue, 19 Sep 2023 12:41:10 +0200 Subject: [PATCH 16/17] remove some of the warnings from the tests (#116) --- tests/testthat/test-HDF5-read.R | 3 ++- tests/testthat/test-Seurat.R | 3 ++- tests/testthat/test-dummy_data.R | 2 ++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-HDF5-read.R b/tests/testthat/test-HDF5-read.R index 3f695e8b..348c9bcb 100644 --- a/tests/testthat/test-HDF5-read.R +++ b/tests/testthat/test-HDF5-read.R @@ -124,6 +124,7 @@ test_that("reading H5AD as SingleCellExperiment works", { test_that("reading H5AD as Seurat works", { skip_if_not_installed("SeuratObject") - seurat <- read_h5ad(file, to = "Seurat") + # TODO: remove this suppression when the to_seurat, from_seurat functions are updated. + seurat <- suppressWarnings(read_h5ad(file, to = "Seurat")) expect_s4_class(seurat, "Seurat") }) diff --git a/tests/testthat/test-Seurat.R b/tests/testthat/test-Seurat.R index 07faab34..d133ec5c 100644 --- a/tests/testthat/test-Seurat.R +++ b/tests/testthat/test-Seurat.R @@ -14,8 +14,9 @@ test_that("to_Seurat with inmemoryanndata", { var_names = LETTERS[1:10] ) + # TODO: remove suppressWarnings when to_Seurat gets updated seu <- suppressWarnings(ad$to_Seurat()) - seu0 <- ad0$to_Seurat() + seu0 <- suppressWarnings(ad0$to_Seurat()) expect_equal(nrow(seu), 20) expect_equal(ncol(seu), 10) diff --git a/tests/testthat/test-dummy_data.R b/tests/testthat/test-dummy_data.R index c7ddb4c2..f1c3261f 100644 --- a/tests/testthat/test-dummy_data.R +++ b/tests/testthat/test-dummy_data.R @@ -13,6 +13,8 @@ test_that("generating dummy SingleCellExperiment works", { expect_s4_class(dummy, "SingleCellExperiment") }) +suppressPackageStartupMessages(library(SeuratObject)) + test_that("generating dummy Seurat works", { dummy <- dummy_data(output = "Seurat") expect_s4_class(dummy, "Seurat") From bffaf33d39bb7ae1b9b1098a62fdd8af13349931 Mon Sep 17 00:00:00 2001 From: Louise Deconinck <36621934+LouiseDck@users.noreply.github.com> Date: Tue, 19 Sep 2023 12:46:56 +0200 Subject: [PATCH 17/17] Use Bioconductor vignette standards (#112) * Use Bioconductor vignette standards * Fix DESCRIPTION * Linting indentation typo * Run styler and fix lintr issues * Apply suggestions from code review Co-authored-by: Robrecht Cannoodt --------- Co-authored-by: Robrecht Cannoodt --- DESCRIPTION | 1 + R/write_h5ad.R | 2 +- man/write_h5ad.Rd | 2 +- vignettes/getting-started.Rmd | 37 ++++++++++++++++++++++++++++++++--- 4 files changed, 37 insertions(+), 5 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index cec7d974..c9b5b0a8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -54,6 +54,7 @@ Imports: R6 Suggests: anndata, + BiocStyle, knitr, reticulate, rhdf5, diff --git a/R/write_h5ad.R b/R/write_h5ad.R index c1fb0411..df3e2c98 100644 --- a/R/write_h5ad.R +++ b/R/write_h5ad.R @@ -30,7 +30,7 @@ #' ncells <- 100 #' counts <- matrix(rpois(20000, 5), ncol = ncells) #' logcounts <- log2(counts + 1) -#' # +#' #' pca <- matrix(runif(ncells * 5), ncells) #' tsne <- matrix(rnorm(ncells * 2), ncells) #' diff --git a/man/write_h5ad.Rd b/man/write_h5ad.Rd index f47d835c..9247ca6b 100644 --- a/man/write_h5ad.Rd +++ b/man/write_h5ad.Rd @@ -39,7 +39,7 @@ if (requireNamespace("SingleCellExperiment", quietly = TRUE)) { ncells <- 100 counts <- matrix(rpois(20000, 5), ncol = ncells) logcounts <- log2(counts + 1) - # + pca <- matrix(runif(ncells * 5), ncells) tsne <- matrix(rnorm(ncells * 2), ncells) diff --git a/vignettes/getting-started.Rmd b/vignettes/getting-started.Rmd index f35a3289..360b4497 100644 --- a/vignettes/getting-started.Rmd +++ b/vignettes/getting-started.Rmd @@ -1,8 +1,15 @@ --- -title: "Getting Started" -output: rmarkdown::html_vignette +title: "Using anndataR to read and convert" +shorttitle: "Using anndataR" +author: + - name: Robrecht Cannoodt + - name: Luke Zappia + - name: Martin Morgan + - name: Louise Deconinck +package: anndataR +output: BiocStyle::html_document vignette: > - %\VignetteIndexEntry{Getting Started} + %\VignetteIndexEntry{Using anndataR to read and convert} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- @@ -14,10 +21,24 @@ knitr::opts_chunk$set( ) ``` +# Introduction + This package allows users to work with `.h5ad` files, access various slots in the datasets and convert these files to `SingleCellExperiment` objects and `SeuratObject`s, and vice versa. Check out `?anndataR` for a full list of the functions provided by this package. +# Installation + +Install using: +```{r, eval = FALSE} +if (!require("BiocManager", quietly = TRUE)) { + install.packages("BiocManager") +} +BiocManager::install("anndataR") +``` + +# Usage + This package provides an abstract interface for `AnnData` objects. This abstract interface models its Python counterpart closely, and stores a data matrix `X` and annotations corresponding to observations (`obs`, `obsm`, `obsp`) and variables (`var`, `varm`, `varp`) and unstructured metadata `uns`. @@ -30,6 +51,8 @@ This abstract interface is implemented by different backends. Currently, the fol The `InMemoryAnnData` backend allows you to construct an `AnnData` object in memory. The `HDF5AnnData` backend allows you to read in an `AnnData` object from an `.h5ad` file. +## `HDF5AnnData` backend + Here is an example of how to read in an `.h5ad` file. ```{r setup} @@ -49,6 +72,8 @@ obs <- adata$obs var <- adata$var ``` +## `InMemoryAnnData` backend + The following example details how to construct an InMemoryAnnData and access its contents. ```{r inmemory_construct} @@ -81,3 +106,9 @@ sce <- to_SingleCellExperiment(adata) seurat <- to_Seurat(adata) ``` +# Session info + +```{r} +sessionInfo() +``` +