diff --git a/DESCRIPTION b/DESCRIPTION index 17cabbc8..cf2131d6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -62,9 +62,12 @@ BugReports: https://github.com/scverse/anndataR/issues Depends: R (>= 4.0.0) Imports: + cli, Matrix, methods, - R6 + purrr, + R6, + rlang Suggests: anndata, BiocStyle, @@ -73,6 +76,7 @@ Suggests: hdf5r (>= 1.3.11), rmarkdown, S4Vectors, + Seurat, SeuratObject, SingleCellExperiment, SummarizedExperiment, diff --git a/NAMESPACE b/NAMESPACE index 71bb554d..72fce24f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,9 +5,16 @@ export(from_Seurat) export(from_SingleCellExperiment) export(generate_dataset) export(read_h5ad) +export(to_Seurat) export(write_h5ad) importFrom(Matrix,as.matrix) importFrom(Matrix,sparseMatrix) importFrom(Matrix,t) importFrom(R6,R6Class) +importFrom(cli,cli_abort) +importFrom(cli,cli_inform) +importFrom(cli,cli_warn) importFrom(methods,as) +importFrom(methods,new) +importFrom(purrr,map_lgl) +importFrom(rlang,caller_env) diff --git a/R/HDF5AnnData.R b/R/HDF5AnnData.R index a7d70af4..f18c9b6d 100644 --- a/R/HDF5AnnData.R +++ b/R/HDF5AnnData.R @@ -255,9 +255,7 @@ HDF5AnnData <- R6::R6Class("HDF5AnnData", # nolint shape = NULL, mode = c("r", "r+", "a", "w", "w-", "x"), compression = c("none", "gzip", "lzf")) { - if (!requireNamespace("hdf5r", quietly = TRUE)) { - stop("The HDF5 interface requires the 'hdf5r' package to be installed") - } + check_requires("HDF5AnnData", "hdf5r") # check arguments compression <- match.arg(compression) @@ -446,3 +444,12 @@ to_HDF5AnnData <- function( mode = mode ) } + +cleanup_HDF5AnnData <- function(...) { # nolint object_name_linter + args <- list(...) + + if (!is.null(args$file) && is.character(args$file) && file.exists(args$file)) { + cli::cli_alert("Removing file: ", args$file) + unlink(args$file) + } +} diff --git a/R/Seurat.R b/R/Seurat.R index 3445abfa..95c218c8 100644 --- a/R/Seurat.R +++ b/R/Seurat.R @@ -1,12 +1,77 @@ -#' Convert a Seurat object to an AnnData object +#' @title Convert a Seurat object to an AnnData object +#' +#' @description +#' `to_Seurat()` converts an AnnData object to a Seurat object. Only one assay can be converted at a time. +#' Arguments are used to configure the conversion. If `NULL`, the functions `to_Seurat_guess_*` will be used to guess +#' the mapping. +#' +#' @param adata An AnnData object to be converted +#' @param assay_name Name of the assay to be created (default: "RNA"). +#' @param layers_mapping A named list to map AnnData layers to Seurat layers. See section "Layer mapping" for more +#' details. +#' @param reduction_mapping A named list to map AnnData reductions to Seurat reductions. Each item in the list must be a +#' named list with keys 'key', 'obsm', and 'varm'. See section "Reduction mapping" for more details. +#' @param graph_mapping A named list to map AnnData graphs to Seurat graphs. Each item in the list must be a character +#' vector of length 1. See section "Graph mapping" for more details. +#' @param misc_mapping A named list to map miscellaneous data to the names of the data in the Seurat object. See section +#' "Miscellaneous mapping" for more details. +#' +#' @section Layer mapping: +#' +#' A named list to map AnnData layers to Seurat layers. Each item in the list must be a character vector of length 1, +#' where the values correspond to the names of the layers in the AnnData object, and the names correspond +#' to the names of the layers in the resulting Seurat object. A value of `NULL` corresponds to the AnnData `X` slot. +#' +#' Example: `layers_mapping = list(counts = "counts", data = NULL, foo = "bar")`. +#' +#' If `NULL`, the internal function `to_Seurat_guess_layers` will be used to guess the layer mapping as follows: +#' +#' * All AnnData layers are copied to Seurat layers by name. +#' +#' @section Reduction mapping: +#' +#' A named list to map AnnData `$obsm` and `$varm` to Seurat reductions. Each item in the list must be a named list +#' with keys `'key'`, `'obsm'`, and `'varm'`. +# +#' Example: `reduction_mapping = list(pca = list(key = "PC_", obsm = "X_pca", varm = "PCs"))`. +#' +#' If `NULL`, the internal function `to_Seurat_guess_reductions` will be used to guess the reduction mapping as follows: +#' +#' * All `$obsm` items starting with `X_` are copied by name. +#' +#' @section Graph mapping: +#' +#' A named list mapping graph names to the names of the graphs in the AnnData object. Each item in the list must be a +#' character vector of length 1. The values correspond to the names of the graphs in the resulting Seurat object, while +#' the names correspond to the names of the graphs in the AnnData object. +#' +#' Example: `graph_mapping = list(nn = "connectivities")`. +#' +#' If `NULL`, the internal function `to_Seurat_guess_graphs` will be used to guess the graph mapping as follows: #' -#' `to_Seurat()` converts an AnnData object to a Seurat object. +#' * An obsp named `connectivities` will be mapped to `nn`. +#' * Other graphs starting with `connectivities_` are stripped of the prefix and copied by name. #' -#' @param obj An AnnData object +#' @section Miscellaneous mapping: +#' +#' A named list mapping miscellaneous data to the names of the data in the AnnData object. Each item in the list must be +#' a vector with one or two elements. The first element must be one of: 'X', 'layers', 'obs', 'obsm', 'obsp', 'var', +#' 'varm', 'varp', 'uns'. The second element is the name of the data in the corresponding slot. If the second element is +#' not present, the whole slot as specified by the first element will be used. +#' +#' Example: `misc_mapping = list(uns = "uns", varp_neighbors = c("varp", "neighbors"))`. +#' +#' If `NULL`, the internal function `to_Seurat_guess_misc` will be used to guess the miscellaneous mapping as follows: +#' +#' * If `$uns` is defined, all values in `$uns` are copied to the Seurat misc. +#' +#' @return A Seurat object #' #' @importFrom Matrix t #' -#' @noRd +#' @export +#' +#' @rdname to_Seurat #' @examples #' ad <- AnnData( #' X = matrix(1:5, 3L, 5L), @@ -14,192 +79,778 @@ #' var = data.frame(row.names = letters[1:5], gene = 1:5) #' ) #' to_Seurat(ad) -# TODO: Add parameters to choose which how X and layers are translated into counts, data and scaled.data -to_Seurat <- function(obj) { # nolint - requireNamespace("SeuratObject") +# nolint start: object_name_linter +to_Seurat <- function( + adata, + assay_name = "RNA", + layers_mapping = NULL, + reduction_mapping = NULL, + graph_mapping = NULL, + misc_mapping = NULL) { + # nolint end: object_name_linter + check_requires("Converting AnnData to Seurat", "SeuratObject") - stopifnot(inherits(obj, "AbstractAnnData")) + stopifnot(inherits(adata, "AbstractAnnData")) - # translate var_names - # trackstatus: class=Seurat, feature=get_var_names, status=done - var_names_ <- .toseurat_check_obsvar_names(obj$var_names, "var_names") + if (is.null(layers_mapping)) { + layers_mapping <- to_Seurat_guess_layers(adata) + } + if (is.null(reduction_mapping)) { + reduction_mapping <- to_Seurat_guess_reductions(adata) + } + if (is.null(graph_mapping)) { + graph_mapping <- to_Seurat_guess_graphs(adata) + } + if (is.null(misc_mapping)) { + misc_mapping <- to_Seurat_guess_misc(adata) + } - # translate obs_names - # trackstatus: class=Seurat, feature=get_obs_names, status=done - obs_names_ <- .toseurat_check_obsvar_names(obj$obs_names, "obs_names") + if (length(adata$layers) == 0 && is.null(adata$X)) { + stop("to_Seurat: adata must have an $X slot or at least one layer") + } - # translate var - # trackstatus: class=Seurat, feature=get_var, status=done - var_ <- obj$var - rownames(var_) <- var_names_ + # store obs and var names + obs_names <- adata$obs_names[] + var_names <- adata$var_names[] + + # check seurat layers + if (is.null(names(layers_mapping))) { + names(layers_mapping) <- layers_mapping + } + if (!"counts" %in% names(layers_mapping) && !"data" %in% names(layers_mapping)) { + stop(paste0( + "layers_mapping must contain at least an item named \"counts\" or \"data\". Found names: ", + paste(names(layers_mapping), collapse = ", ") + )) + } - # translate obs # trackstatus: class=Seurat, feature=get_obs, status=done - obs_ <- - if (ncol(obj$obs) > 0) { - ob <- obj$obs - rownames(ob) <- obs_names_ - ob - } else { - NULL + # trackstatus: class=Seurat, feature=get_X, status=done + # trackstatus: class=Seurat, feature=get_layers, status=done + counts <- .to_seurat_get_matrix_by_key(adata, layers_mapping, "counts") + data <- .to_seurat_get_matrix_by_key(adata, layers_mapping, "data") + if (!is.null(counts)) { + dimnames(counts) <- list(adata$var_names, adata$obs_names) + } + if (!is.null(data)) { + dimnames(data) <- list(adata$var_names, adata$obs_names) + } + obj <- SeuratObject::CreateSeuratObject( + meta.data = adata$obs, + assay = assay_name, + counts = counts, + data = data + ) + + # trackstatus: class=Seurat, feature=get_var, status=done + if (!is.null(adata$var)) { + obj@assays[[assay_name]] <- SeuratObject::AddMetaData( + obj@assays[[assay_name]], + metadata = adata$var + ) + } + + # make sure obs and var names are set properly + # trackstatus: class=Seurat, feature=get_obs_names, status=done + # trackstatus: class=Seurat, feature=get_var_names, status=done + colnames(obj) <- obs_names + rownames(obj) <- var_names + + # copy other layers + for (i in seq_along(layers_mapping)) { + from <- layers_mapping[[i]] + to <- names(layers_mapping)[[i]] + if (!to %in% c("counts", "data")) { + SeuratObject::LayerData(obj, assay = assay_name, layer = to) <- adata$layers[[from]] } + } - # translate X - # trackstatus: class=Seurat, feature=get_X, status=wip - # TODO: should x_ be passed to counts or to data? - # TODO: creating a seurat object when th AnnData doesn't contain X or layers - # probably doesn't make any sense - x_ <- - if (!is.null(obj$X)) { - Matrix::t(obj$X) - } else { - mat <- Matrix::sparseMatrix( - i = integer(0), - p = c(0L), - x = integer(0), - dims = c(obj$n_vars(), obj$n_obs()) + # copy reductions + # trackstatus: class=Seurat, feature=get_obsm, status=wip + # trackstatus: class=Seurat, feature=get_varm, status=wip + if (!is.null(reduction_mapping)) { + if (!is.list(reduction_mapping) || (length(reduction_mapping) > 0 && is.null(names(reduction_mapping)))) { + stop("reduction_mapping must be a named list") + } + for (i in seq_along(reduction_mapping)) { + reduction_name <- names(reduction_mapping)[[i]] + reduction <- reduction_mapping[[i]] + + if ( + !is.list(reduction) || is.null(names(reduction)) || + !all(names(reduction) %in% c("key", "obsm", "varm")) || + !all(c("key", "obsm") %in% names(reduction)) + ) { + stop("each reduction must be a list with keys 'key', 'obsm', and 'varm'") + } + dr <- .to_seurat_process_reduction( + adata = adata, + key = reduction$key, + obsm_embedding = reduction$obsm, + varm_loadings = reduction$varm, + assay_name = assay_name ) - attr(mat, "is_X_null") <- TRUE # nolint - mat + if (!is.null(dr)) { + obj[[reduction_name]] <- dr + } } - dimnames(x_) <- list(var_names_, obs_names_) - x_assay <- SeuratObject::CreateAssayObject(counts = x_) + } - # create seurat object - if (ncol(var_) > 0) { - # don't add var metadata if the data frame does not contain any columns - x_assay <- SeuratObject::AddMetaData(x_assay, metadata = var_) + # trackstatus: class=Seurat, feature=get_obsp, status=wip + for (i in seq_along(graph_mapping)) { + graph_name <- names(graph_mapping)[[i]] + graph <- graph_mapping[[i]] + if (!is.character(graph) || length(graph) != 1) { + stop("item ", graph_name, " in graph_mapping is not a character vector of length 1") + } + obsp <- adata$obsp[[graph]] + if (!is.null(obsp)) { + dimnames(obsp) <- list(obs_names, obs_names) + obsp_gr <- Seurat::as.Graph(obsp) + obj[[paste0(assay_name, "_", graph_name)]] <- obsp_gr + } } - seurat_obj <- SeuratObject::CreateSeuratObject(x_assay, meta.data = obs_) - # add layers - # trackstatus: class=Seurat, feature=get_layers, status=wip - # TODO: should values be passed to counts or to data? - for (key in obj$layers_keys()) { - layer_ <- t(obj$layers[[key]]) - dimnames(layer_) <- list(var_names_, obs_names_) - seurat_obj[[key]] <- SeuratObject::CreateAssayObject(counts = layer_) + # trackstatus: class=Seurat, feature=get_uns, status=done + # trackstatus: class=Seurat, feature=get_varp, status=done + for (i in seq_along(misc_mapping)) { + misc_name <- names(misc_mapping)[[i]] + misc <- misc_mapping[[i]] + if (!is.character(misc) || length(misc) <= 0 || length(misc) > 2) { + stop("misc_mapping must be a named list with one or two elements") + } + misc_slot <- misc[[1]] + misc_key <- misc[[2]] + expected_slots <- c("X", "layers", "obs", "obsm", "obsp", "var", "varm", "varp", "uns") + if (!misc_slot %in% expected_slots) { + stop(paste0( + "The first element of each item in misc_mapping must be one of: ", + paste0("'", expected_slots, "'", collapse = ", ") + )) + } + misc_data <- adata[[misc_slot]] + if (length(misc) == 2) { + if (!misc_key %in% names(misc_data)) { + stop(paste0("misc_mapping: adata$", misc_slot, "[[", misc_key, "]] does not exist")) + } + misc_data <- misc_data[[misc_key]] + } + if (!is.null(misc_data)) { + SeuratObject::Misc(obj, misc_name) <- misc_data + } } - seurat_obj + obj } -.toseurat_check_obsvar_names <- function(names, label) { - if (any(grepl("_", names))) { - # mimic seurat behaviour - warning(wrap_message( - "'", label, "' ", - "cannot have underscores ('_') when converting to Seurat, ", - "replacing with dashes ('-')" - )) - names <- gsub("_", "-", names) +.to_seurat_is_atomic_character <- function(x) { + is.character(x) && length(x) == 1 && !is.na(x) +} + +.to_seurat_get_matrix_by_key <- function(adata, mapping, key) { + if (!key %in% names(mapping)) { + return(NULL) + } + + layer_name <- mapping[[key]] + + if (is.null(layer_name)) { + return(Matrix::t(adata$X)) + } + + if (!.to_seurat_is_atomic_character(layer_name)) { + stop("layer_name must be character vector of length 1") + } + + if (!layer_name %in% names(adata$layers)) { + stop("layer_name '", layer_name, "' is not an item in adata$layers") + } + + return(Matrix::t(adata$layers[[layer_name]])) +} + +.to_seurat_get_matrix <- function(adata, layer_name) { + if (is.null(layer_name)) { + return(Matrix::t(adata$X)) + } + + if (!.to_seurat_is_atomic_character(layer_name)) { + stop("layer_name must be the name of one of the layers or NULL") + } + + if (!layer_name %in% names(adata$layers)) { + stop("layer_name must be the name of one of the layers or NULL") + } + + return(Matrix::t(adata$layers[[layer_name]])) +} + +.to_seurat_process_reduction <- function(adata, assay_name, key, obsm_embedding, varm_loadings) { + if (!.to_seurat_is_atomic_character(key)) { + stop("key must be a character scalar") + } + if (!.to_seurat_is_atomic_character(obsm_embedding)) { + stop("obsm_embedding must be a character scalar") + } + if (!is.null(varm_loadings) && !.to_seurat_is_atomic_character(varm_loadings)) { + stop("varm_loadings must be a character scalar or NULL") + } + embed <- adata$obsm[[obsm_embedding]] + + if (is.null(embed)) { + stop(paste0("The reduction ", obsm_embedding, " is not present in adata$obsm")) + } + + rownames(embed) <- adata$obs_names + + loadings <- + if (is.null(varm_loadings)) { + new(Class = "matrix") + } else if (!varm_loadings %in% names(adata$varm)) { + stop(paste0("The loadings ", varm_loadings, " is not present in adata$varm")) + } else { + load <- adata$varm[[varm_loadings]] + rownames(load) <- adata$var_names + load + } + + SeuratObject::CreateDimReducObject( + embeddings = embed, + loadings = loadings, + key = key, + assay = assay_name, + global = TRUE + ) +} + +to_Seurat_guess_layers <- function(adata) { # nolint + if (!inherits(adata, "AbstractAnnData")) { + stop("adata must be an object inheriting from AbstractAnnData") } - names + layers <- list() + + if (!is.null(adata$X)) { + # guess the name of the X slot + layer_name_for_x <- + if (!"counts" %in% names(adata$layers)) { + "counts" + } else { + "data" + } + + layers[layer_name_for_x] <- list(NULL) + } + + for (layer_name in names(adata$layers)) { + layers[[layer_name]] <- layer_name + } + + layers +} + +to_Seurat_guess_reductions <- function(adata) { # nolint + if (!inherits(adata, "AbstractAnnData")) { + stop("adata must be an object inheriting from AbstractAnnData") + } + + reductions <- list() + + for (reduction_name in names(adata$obsm)) { + if (grepl("^X_", reduction_name)) { + name <- gsub("^X_", "", reduction_name) + out <- + if (reduction_name == "X_pca") { + list(key = "PC_", obsm = "X_pca", varm = "PCs") + } else { + list(key = paste0(name, "_"), obsm = reduction_name, varm = NULL) + } + + reductions[[name]] <- out + } + } + + reductions +} + +to_Seurat_guess_graphs <- function(adata) { # nolint + if (!inherits(adata, "AbstractAnnData")) { + stop("adata must be an object inheriting from AbstractAnnData") + } + + graphs <- list() + + for (graph_name in names(adata$obsp)) { + if (graph_name == "connectivities") { + graphs[["nn"]] <- graph_name + } else if (grepl("^connectivities_", graph_name)) { + new_name <- gsub("^connectivities_", "", graph_name) + graphs[[new_name]] <- graph_name + } + } + + graphs +} + +to_Seurat_guess_misc <- function(adata) { # nolint + if (!inherits(adata, "AbstractAnnData")) { + stop("adata must be an object inheriting from AbstractAnnData") + } + + misc_mapping <- list() + + if (!is.null(adata$uns)) { + for (key in names(adata$uns)) { + misc_mapping[[key]] <- c("uns", key) + } + } + + # TODO: copy obsm which were not used as embeddings? + # TODO: copy varm which were not used as loadings? + # Then again, the user can do this manually if needed + + misc_mapping } #' Convert a Seurat object to an AnnData object #' #' `from_Seurat()` converts a Seurat object to an AnnData object. -#' Only one assay can be converted at a time. +#' Only one assay can be converted at a time. Arguments are used to configure the conversion. +#' If `NULL`, the functions `from_Seurat_guess_*` will be used to guess the mapping. #' #' For more information on the functionality of an AnnData object, see [anndataR-package]. #' -#' @param seurat_obj An object inheriting from Seurat. +#' @param seurat_obj A Seurat object to be converted. #' @param output_class Name of the AnnData class. Must be one of `"HDF5AnnData"` or `"InMemoryAnnData"`. -#' @param assay Assay to be converted. If NULL, `DefaultAssay()` is used. -#' @param X Which of 'counts', 'data', or 'scale.data' will be used for X. By default, 'counts' will be used (if it is -#' not empty), followed by 'data', then 'scale.data'. The remaining non-empty slots will be stored in different -#' layers. +#' @param assay_name The name of the assay to be converted. If `NULL`, the default assay will be used +#' ([SeuratObject::DefaultAssay()]). +#' @param x_mapping A mapping of a Seurat layer to the AnnData `X` slot. If `NULL`, no data will be copied to the +#' `X` slot. +#' @param layers_mapping A named list mapping layer names to the names of the layers in the Seurat object. Each item in +#' the list must be a character vector of length 1. See section "`$layers` mapping" for more details. +#' @param obsm_mapping A named list mapping reductions to the names of the reductions in the Seurat object. Each item in +#' the list must be a vector of length 2. See section "`$obsm` mapping" for more details. +#' @param varm_mapping A named list mapping PCA loadings to the names of the PCA loadings in the Seurat object. +#' Each item in the list must be a character vector of length 1. See section "`$varm` mapping" for more details. +#' @param obsp_mapping A named list mapping graph names to the names of the graphs in the Seurat object. +#' Each item in the list must be a character vector of length 1. See section "`$obsp` mapping" for more details. +#' @param varp_mapping A named list mapping miscellaneous data to the names of the data in the Seurat object. +#' Each item in the list must be a named list with one or two elements. See section "`$varp` mapping" for more details. +#' @param uns_mapping A named list mapping miscellaneous data to the names of the data in the Seurat object. +#' Each item in the list must be a named list with one or two elements. See section "`$uns` mapping" for more details. #' @param ... Additional arguments passed to the generator function. #' +#' @section `$X` mapping: +#' +#' A mapping of a Seurat layer to the AnnData `X` slot. Its value must be `NULL` or a character vector of the Seurat +#' layer name to copy. If `NULL`, no data will be copied to the `X` slot. +#' +#' @section `$layers` mapping: +#' +#' A named list to map AnnData layers to Seurat layers. Each item in the list must be a character vector of length 1. +#' The `$X` key maps to the `X` slot. +#' +#' Example: `layers_mapping = list(counts = "counts", foo = "bar")`. +#' +#' If `NULL`, the internal function `from_Seurat_guess_layers` will be used to guess the layer mapping as follows: +#' +#' * All Seurat layers are copied to AnnData `layers` by name. +#' * This means that the AnnData `X` slot will be `NULL` (empty). If you want to copy data to the `X` slot, +#' you must define the layer mapping explicitly. +#' +#' @section `$obsm` mapping: +#' +#' A named list to map Seurat reductions to AnnData `$obsm`. +#' +#' Each item in the list must be a vector of length 2, +#' where the name corresponds to the name of the resulting `$obsm` slot, and the values correspond to the +#' the location of the data in the Seurat object. +#' +#' Example: `obsm_mapping = list(pca = c("reductions", "pca"), umap = c("reductions", "umap"))`. +#' +#' If `NULL`, the internal function `from_Seurat_guess_obsms` will be used to guess the obsm mapping as follows: +#' +#' * All Seurat reductions are prefixed with `X_` and copied to AnnData `$obsm`. +#' +#' @section `$varm` mapping: +#' +#' A named list to map Seurat reduction loadings to AnnData `$varm`. +#' +#' Each item in the list must be a character vector of length 2, where the name corresponds to the name of the +#' resulting `$varm` slot, and the value corresponds to the location of the data in the Seurat object. +#' +#' Example: `varm_mapping = list(PCs = c("reductions", "pca")`. +#' +#' If `NULL`, the internal function `from_Seurat_guess_varms` will be used to guess the varm mapping as follows: +#' +#' * The name of the PCA loadings is copied by name. +#' +#' @section `$obsp` mapping: +#' +#' A named list to map Seurat graphs to AnnData `$obsp`. +#' +#' Example: `obsp_mapping = list(nn = "connectivities")`. +#' +#' If `NULL`, the internal function `from_Seurat_guess_obsps` will be used to guess the obsp mapping as follows: +#' +#' * All Seurat graphs are copied to `$obsp` by name. +#' +#' @section `$varp` mapping: +#' +#' A named list to map Seurat miscellaneous data to AnnData `$varp`. The name of each item corresponds to the +#' resulting `$varp` slot, while the value of each item must be a fector which corresponds to the location of the data +#' in the Seurat object. +#' +#' Example: `varp_mapping = list(foo = c("misc", "foo"))`. +#' +#' If `NULL`, the internal function `from_Seurat_guess_varps` will be used to guess the varp mapping as follows: +#' +#' * No data is mapped to `$varp`. +#' +#' @section `$uns` mapping: +#' +#' A named list to map Seurat miscellaneous data to AnnData `uns`. Each item in the list must be a character of +#' length 2. The first element must be `"misc"`. The second element is the name of the data in the corresponding slot. +#' +#' Example: `uns_mapping = list(foo = c("misc", "foo"))`. +#' +#' If `NULL`, the internal function `from_Seurat_guess_uns` will be used to guess the uns mapping as follows: +#' +#' * All Seurat miscellaneous data is copied to `uns` by name. +#' +#' @return An AnnData object +#' #' @export #' -#' @seealso [anndataR-package] -# TODO: Add examples +#' @examples +#' library(Seurat) +#' +#' counts <- matrix(rbinom(20000, 1000, .001), nrow = 100) +#' obj <- CreateSeuratObject(counts = counts) +#' obj <- NormalizeData(obj) +#' obj <- FindVariableFeatures(obj) +#' obj <- ScaleData(obj) +#' obj <- RunPCA(obj, npcs = 10L) +#' obj <- FindNeighbors(obj) +#' obj <- RunUMAP(obj, dims = 1:10) +#' from_Seurat(obj) # nolint start: object_name_linter from_Seurat <- function( # nolint end: object_name_linter seurat_obj, output_class = c("InMemoryAnnData", "HDF5AnnData"), - assay = NULL, - X = "counts", + assay_name = NULL, + x_mapping = NULL, + layers_mapping = NULL, + obsm_mapping = NULL, + varm_mapping = NULL, + obsp_mapping = NULL, + varp_mapping = NULL, + uns_mapping = NULL, ...) { + check_requires("Converting Seurat to AnnData", "SeuratObject") + output_class <- match.arg(output_class) stopifnot(inherits(seurat_obj, "Seurat")) - if (!is.null(X)) { - if (!X %in% c("counts", "data", "scale.data")) { - stop("X must be NULL or one of: 'counts', 'data', 'scale.data'") - } + if (is.null(assay_name)) { + assay_name <- SeuratObject::DefaultAssay(seurat_obj) } - # If a specific assay is selected, use it - if (!is.null(assay)) { - if (!assay %in% names(seurat_obj@assays)) { - stop("'assay' must be NULL or one of: ", paste0("'", names(seurat_obj@assays), "'", collapse = ", ")) - } - assay_name <- assay - } else { - assay_name <- SeuratObject::DefaultAssay(seurat_obj) + seurat_assay <- seurat_obj@assays[[assay_name]] - # If Seurat object contains multiple assays, notify user the Default one is used - if (length(names(seurat_obj@assays)) > 1) { - message( - "There are ", length(names(seurat_obj@assays)), " assays in the Seurat object; using the default assay ('", - assay_name, "'). You can use the `assay` parameter to select a specific assay." - ) - } + if (is.null(seurat_assay)) { + stop(paste0("The assay '", assay_name, "' does not exist in the Seurat object")) } - # get obs + if (!inherits(seurat_assay, "Assay5")) { + stop(paste0( + "Assay '", assay_name, "' is not a valid Seurat v5 assay.\n", + "Please use `SeuratObject::UpdateSeuratObject()` to upgrade the object to Seurat v5." + )) + } + + if (is.null(layers_mapping)) { + layers_mapping <- from_Seurat_guess_layers(seurat_obj, assay_name) + } + if (is.null(obsm_mapping)) { + obsm_mapping <- from_Seurat_guess_obsms(seurat_obj, assay_name) + } + if (is.null(varm_mapping)) { + varm_mapping <- from_Seurat_guess_varms(seurat_obj, assay_name) + } + if (is.null(obsp_mapping)) { + obsp_mapping <- from_Seurat_guess_obsps(seurat_obj, assay_name) + } + if (is.null(varp_mapping)) { + varp_mapping <- from_Seurat_guess_varps(seurat_obj) + } + if (is.null(uns_mapping)) { + uns_mapping <- from_Seurat_guess_uns(seurat_obj) + } + + # fetch obs # trackstatus: class=Seurat, feature=set_obs_names, status=done # trackstatus: class=Seurat, feature=set_obs, status=done obs <- seurat_obj@meta.data - rownames(obs) <- colnames(seurat_obj) # TODO: this is probably not needed - # construct var + # fetch var # trackstatus: class=Seurat, feature=set_var_names, status=done # trackstatus: class=Seurat, feature=set_var, status=done - var <- seurat_obj@assays[[assay_name]]@meta.features - rownames(var) <- rownames(seurat_obj@assays[[assay_name]]) # TODO: this is probably not needed + var <- seurat_assay@meta.data # use generator to create new AnnData object generator <- get_anndata_constructor(output_class) - ad <- generator$new( - obs = obs, - var = var, - ... + + tryCatch( + { + adata <- generator$new( + obs = obs, + var = var, + ... + ) + + # fetch X + # trackstatus: class=Seurat, feature=set_X, status=done + if (!is.null(x_mapping)) { + adata$X <- Matrix::t(seurat_assay@layers[[layer]]) + } + + # fetch layers + # trackstatus: class=Seurat, feature=set_layers, status=done + for (i in seq_along(layers_mapping)) { + layer <- layers_mapping[[i]] + layer_name <- names(layers_mapping)[[i]] + + adata$layers[[layer_name]] <- Matrix::t(seurat_assay@layers[[layer]]) + } + + # fetch obsm + # trackstatus: class=Seurat, feature=set_obsm, status=wip + for (i in seq_along(obsm_mapping)) { + obsm <- obsm_mapping[[i]] + obsm_name <- names(obsm_mapping)[[i]] + + if (!is.character(obsm) || length(obsm) != 2) { + stop("each obsm_mapping must be a character vector of length 2") + } + + obsm_slot <- obsm[[1]] + obsm_key <- obsm[[2]] + + if (obsm_slot == "reductions") { + adata$obsm[[obsm_name]] <- SeuratObject::Embeddings(seurat_obj, obsm_key) + } else if (obsm_slot == "misc") { + adata$obsm[[obsm_name]] <- seurat_obj@misc[[obsm_key]] + } + } + + # fetch varm + # trackstatus: class=Seurat, feature=set_varm, status=wip + for (i in seq_along(varm_mapping)) { + varm <- varm_mapping[[i]] + varm_name <- names(varm_mapping)[[i]] + + if (!is.character(varm) || length(varm) < 2 || length(varm) > 3) { + stop("each varm_mapping must be a character vector of length 2 or 3") + } + + varm_slot <- varm[[1]] + varm_key <- varm[[2]] + + if (varm_slot == "reductions") { + adata$varm[[varm_name]] <- SeuratObject::Loadings(seurat_obj, varm_key) + } else if (varm_slot == "misc") { + data <- seurat_obj@misc[[varm_key]] + if (length(varm) == 3) { + data <- data[[varm[[3]]]] + } + adata$varm[[varm_name]] <- data + } + } + + # fetch obsp + # trackstatus: class=Seurat, feature=set_obsp, status=wip + for (i in seq_along(obsp_mapping)) { + obsp <- obsp_mapping[[i]] + obsp_name <- names(obsp_mapping)[[i]] + + if (!is.character(obsp) || length(obsp) < 2 || length(obsp) > 3) { + stop("each obsp_mapping must be a character vector of length 2 or 3") + } + + key1 <- obsp[[1]] + key2 <- obsp[[2]] + + if (key1 == "graphs") { + adata$obsp[[obsp_name]] <- as(seurat_obj@graphs[[key2]], "sparseMatrix") + } else if (key1 == "misc") { + data <- seurat_obj@misc[[key2]] + if (length(obsp) == 3) { + data <- data[[obsp[[3]]]] + } + adata$obsp[[obsp_name]] <- data + } + } + + # fetch varp + # trackstatus: class=Seurat, feature=set_varp, status=wip + for (i in seq_along(varp_mapping)) { + varp <- varp_mapping[[i]] + varp_name <- names(varp_mapping)[[i]] + + if (!is.character(varp) || length(varp) < 2 || length(varp) > 3) { + stop("each varp_mapping must be a character vector of length 2 or 3") + } + + key1 <- varp[[1]] + key2 <- varp[[2]] + + if (key1 == "misc") { + data <- seurat_obj@misc[[key2]] + if (length(varp) == 3) { + data <- data[[varp[[3]]]] + } + adata$varp[[varp_name]] <- data + } + } + + # fetch uns + # trackstatus: class=Seurat, feature=set_uns, status=wip + for (i in seq_along(uns_mapping)) { + uns <- uns_mapping[[i]] + uns_name <- names(uns_mapping)[[i]] + + if (!is.character(uns) || length(uns) < 2 || length(uns) > 3) { + stop("each uns_mapping must be a character vector of length 2 or 3") + } + + key1 <- uns[[1]] + key2 <- uns[[2]] + + if (key1 == "misc") { + data <- seurat_obj@misc[[key2]] + if (length(uns) == 3) { + data <- data[[uns[[3]]]] + } + adata$uns[[uns_name]] <- data + } + } + + return(adata) + }, + error = function(e) { + if (output_class == "HDF5AnnData") { + on.exit(cleanup_HDF5AnnData(adata)) + } + stop(e) + } ) +} + +from_Seurat_guess_layers <- function(seurat_obj, assay_name) { # nolint + if (!inherits(seurat_obj, "Seurat")) { + stop("The provided object must be a Seurat object") + } + + seurat_assay <- seurat_obj@assays[[assay_name]] + + if (!inherits(seurat_assay, "Assay5")) { + stop("The provided object must be a Seurat v5 assay") + } + + layers_mapping <- list() + + for (layer_name in SeuratObject::Layers(seurat_assay)) { + layers_mapping[[layer_name]] <- layer_name + } + + layers_mapping +} - if (!is.null(X)) { - # Check if the slot is not empty - if (all(dim(SeuratObject::GetAssayData(seurat_obj, slot = X, assay = assay_name)) == 0)) { - stop("The '", X, "' slot is empty.") +from_Seurat_guess_obsms <- function(seurat_obj, assay_name) { # nolint + if (!inherits(seurat_obj, "Seurat")) { + stop("The provided object must be a Seurat object") + } + + obsm_mapping <- list() + + for (reduction_name in SeuratObject::Reductions(seurat_obj)) { + # Check if the dimreduc was calculated by the selected assay + reduction <- seurat_obj@reductions[[reduction_name]] + if (reduction@assay.used != assay_name) { + next } - assay_data <- SeuratObject::GetAssayData(seurat_obj, slot = X, assay = assay_name) + obsm_mapping[[paste0("X_", reduction_name)]] <- c("reductions", reduction_name) + } + + obsm_mapping +} - # Remove names - dimnames(assay_data) <- list(NULL, NULL) - ad$X <- Matrix::t(assay_data) - } else { - # Cannot compare other values with NULL - X <- "none" +from_Seurat_guess_varms <- function(seurat_obj, assay_name) { # nolint + if (!inherits(seurat_obj, "Seurat")) { + stop("The provided object must be a Seurat object") } - # Add the remaining non-empty slots as layers - slots <- c("counts", "data", "scale.data") - slots <- slots[slots != X] + varm_mapping <- list() + + if ("pca" %in% SeuratObject::Reductions(seurat_obj)) { + # Check if the dimreduc was calculated by the selected assay + reduction <- seurat_obj@reductions[["pca"]] + if (reduction@assay.used == assay_name) { + varm_mapping[["PCs"]] <- c("reductions", "pca") + } + } + + varm_mapping +} + +from_Seurat_guess_obsps <- function(seurat_obj, assay_name) { # nolint + if (!inherits(seurat_obj, "Seurat")) { + stop("The provided object must be a Seurat object") + } + + obsp_mapping <- list() + + for (graph_name in SeuratObject::Graphs(seurat_obj)) { + graph <- seurat_obj@graphs[[graph_name]] - for (slot in slots) { - if (!all(dim(SeuratObject::GetAssayData(seurat_obj, slot = slot)) == 0)) { - assay_data <- SeuratObject::GetAssayData(seurat_obj, slot = slot, assay = assay_name) - dimnames(assay_data) <- list(NULL, NULL) - ad$layers[[slot]] <- Matrix::t(assay_data) + if (graph@assay.used != assay_name) { + next } + + dest_name <- gsub(paste0(assay_name, "_"), "", graph_name) + + if (dest_name == "nn") { + dest_name <- "connectivities" + } + + obsp_mapping[[dest_name]] <- c("graphs", graph_name) + } + + obsp_mapping +} + +from_Seurat_guess_varps <- function(seurat_obj) { # nolint + if (!inherits(seurat_obj, "Seurat")) { + stop("The provided object must be a Seurat object") + } + + list() +} + +from_Seurat_guess_uns <- function(seurat_obj) { # nolint + if (!inherits(seurat_obj, "Seurat")) { + stop("The provided object must be a Seurat object") + } + + uns_mapping <- list() + + for (uns_name in names(seurat_obj@misc)) { + uns_mapping[[uns_name]] <- c("misc", uns_name) } - return(ad) + uns_mapping } diff --git a/R/SingleCellExperiment.R b/R/SingleCellExperiment.R index f5890b5c..eb7e3800 100644 --- a/R/SingleCellExperiment.R +++ b/R/SingleCellExperiment.R @@ -28,6 +28,8 @@ #' sce <- to_SingleCellExperiment(ad) #' sce to_SingleCellExperiment <- function(object) { # nolint + check_requires("Converting AnnData to SingleCellExperiment", "SingleCellExperiment", "Bioc") + stopifnot( inherits(object, "AbstractAnnData") ) @@ -119,6 +121,7 @@ from_SingleCellExperiment <- function( sce, output_class = c("InMemory", "HDF5AnnData"), ...) { + check_requires("Converting SingleCellExperiment to AnnData", "SingleCellExperiment", "Bioc") stopifnot( inherits(sce, "SingleCellExperiment") ) @@ -168,11 +171,19 @@ from_SingleCellExperiment <- function( layers <- x_and_layers[-1] } - generator$new( - X = x, - obs = obs, - var = var, - layers = layers, - ... + tryCatch( + generator$new( + X = x, + obs = obs, + var = var, + layers = layers, + ... + ), + error = function(e) { + if (output_class == "HDF5AnnData") { + on.exit(cleanup_HDF5AnnData(...)) + } + stop(e) + } ) } diff --git a/R/anndataR-package.R b/R/anndataR-package.R index 55a0295a..04762c83 100644 --- a/R/anndataR-package.R +++ b/R/anndataR-package.R @@ -1,5 +1,4 @@ #' @keywords internal -#' @importFrom methods as #' #' @description An AnnData object. This class can either be an in-memory #' AnnData (InMemoryAnnData) or an HDF5-backed AnnData (HDF5AnnData). The @@ -61,5 +60,8 @@ "_PACKAGE" ## usethis namespace: start +#' @importFrom cli cli_abort cli_warn cli_inform +#' @importFrom purrr map_lgl +#' @importFrom methods as new ## usethis namespace: end NULL diff --git a/R/check_requires.R b/R/check_requires.R new file mode 100644 index 00000000..e4850a30 --- /dev/null +++ b/R/check_requires.R @@ -0,0 +1,48 @@ +#' Check required packages +#' +#' Check that required packages are available and give a nice error message with +#' install instructions if not +#' +#' @param what A message stating what the packages are required for. Used at the +#' start of the error message e.g. "{what} requires...". +#' @param requires Character vector of required package names +#' @param where Where to install the packages from. Either "CRAN" or "Bioc" +#' +#' @return `'TRUE` invisibly if all packages are available, otherwise calls +#' [cli::cli_abort()] +#' +#' @importFrom rlang caller_env +#' @noRd +check_requires <- function(what, requires, where = c("CRAN", "Bioc")) { + where <- match.arg(where) + + is_available <- map_lgl(requires, requireNamespace, quietly = TRUE) + + if (any(!is_available)) { + missing <- requires[!is_available] + + # nolint start object_usage_linter + missing_str <- paste0("\"", paste(missing, collapse = "\", \""), "\"") + if (length(missing) > 1) { + missing_str <- paste0("c(", missing_str, ")") + } + fun <- switch(where, + CRAN = "install.packages", + Bioc = "install.packages(\"BiocManager\"); BiocManager::install" + ) + # nolint end object_usage_linter + + cli_abort( + c( + "{what} requires the {.pkg {missing}} package{?s}", + "i" = paste( + "To continue, install {cli::qty(missing)}{?it/them} using", + "{.code {fun}({missing_str})}" + ) + ), + call = caller_env() + ) + } + + invisible(TRUE) +} diff --git a/R/generate_dataset.R b/R/generate_dataset.R index 42e9d9aa..1076fd37 100644 --- a/R/generate_dataset.R +++ b/R/generate_dataset.R @@ -256,12 +256,7 @@ generate_dataset <- function( #' #' @noRd .generate_dataset_as_sce <- function(dataset_list) { - if (!requireNamespace("SingleCellExperiment", quietly = TRUE)) { - stop( - "Creating a SingleCellExperiment requires the 'SingleCellExperiment'", - "package to be installed" - ) - } + check_requires("Creating a SingleCellExperiment", "SingleCellExperiment", "Bioc") assays_list <- c( list(X = dataset_list$X), @@ -290,11 +285,7 @@ generate_dataset <- function( #' #' @noRd .generate_dataset_as_seurat <- function(dataset_list) { - if (!requireNamespace("SeuratObject", quietly = TRUE)) { - stop( - "Creating a Seurat requires the 'SeuratObject' package to be installed" - ) - } + check_requires("Creating a SeuratObject", "SeuratObject") X <- t(dataset_list$layers[["integer_csparse"]]) colnames(X) <- dataset_list$obs_names diff --git a/man/from_Seurat.Rd b/man/from_Seurat.Rd index dc3cb954..8184c72b 100644 --- a/man/from_Seurat.Rd +++ b/man/from_Seurat.Rd @@ -7,31 +7,167 @@ from_Seurat( seurat_obj, output_class = c("InMemoryAnnData", "HDF5AnnData"), - assay = NULL, - X = "counts", + assay_name = NULL, + x_mapping = NULL, + layers_mapping = NULL, + obsm_mapping = NULL, + varm_mapping = NULL, + obsp_mapping = NULL, + varp_mapping = NULL, + uns_mapping = NULL, ... ) } \arguments{ -\item{seurat_obj}{An object inheriting from Seurat.} +\item{seurat_obj}{A Seurat object to be converted.} \item{output_class}{Name of the AnnData class. Must be one of \code{"HDF5AnnData"} or \code{"InMemoryAnnData"}.} -\item{assay}{Assay to be converted. If NULL, \code{DefaultAssay()} is used.} +\item{assay_name}{The name of the assay to be converted. If \code{NULL}, the default assay will be used +(\code{\link[SeuratObject:DefaultAssay]{SeuratObject::DefaultAssay()}}).} -\item{X}{Which of 'counts', 'data', or 'scale.data' will be used for X. By default, 'counts' will be used (if it is -not empty), followed by 'data', then 'scale.data'. The remaining non-empty slots will be stored in different -layers.} +\item{x_mapping}{A mapping of a Seurat layer to the AnnData \code{X} slot. If \code{NULL}, no data will be copied to the +\code{X} slot.} + +\item{layers_mapping}{A named list mapping layer names to the names of the layers in the Seurat object. Each item in +the list must be a character vector of length 1. See section "\verb{$layers} mapping" for more details.} + +\item{obsm_mapping}{A named list mapping reductions to the names of the reductions in the Seurat object. Each item in +the list must be a vector of length 2. See section "\verb{$obsm} mapping" for more details.} + +\item{varm_mapping}{A named list mapping PCA loadings to the names of the PCA loadings in the Seurat object. +Each item in the list must be a character vector of length 1. See section "\verb{$varm} mapping" for more details.} + +\item{obsp_mapping}{A named list mapping graph names to the names of the graphs in the Seurat object. +Each item in the list must be a character vector of length 1. See section "\verb{$obsp} mapping" for more details.} + +\item{varp_mapping}{A named list mapping miscellaneous data to the names of the data in the Seurat object. +Each item in the list must be a named list with one or two elements. See section "\verb{$varp} mapping" for more details.} + +\item{uns_mapping}{A named list mapping miscellaneous data to the names of the data in the Seurat object. +Each item in the list must be a named list with one or two elements. See section "\verb{$uns} mapping" for more details.} \item{...}{Additional arguments passed to the generator function.} } +\value{ +An AnnData object +} \description{ \code{from_Seurat()} converts a Seurat object to an AnnData object. -Only one assay can be converted at a time. +Only one assay can be converted at a time. Arguments are used to configure the conversion. +If \code{NULL}, the functions \verb{from_Seurat_guess_*} will be used to guess the mapping. } \details{ For more information on the functionality of an AnnData object, see \link{anndataR-package}. } -\seealso{ -\link{anndataR-package} +\section{\verb{$X} mapping}{ + + +A mapping of a Seurat layer to the AnnData \code{X} slot. Its value must be \code{NULL} or a character vector of the Seurat +layer name to copy. If \code{NULL}, no data will be copied to the \code{X} slot. +} + +\section{\verb{$layers} mapping}{ + + +A named list to map AnnData layers to Seurat layers. Each item in the list must be a character vector of length 1. +The \verb{$X} key maps to the \code{X} slot. + +Example: \code{layers_mapping = list(counts = "counts", foo = "bar")}. + +If \code{NULL}, the internal function \code{from_Seurat_guess_layers} will be used to guess the layer mapping as follows: +\itemize{ +\item All Seurat layers are copied to AnnData \code{layers} by name. +\item This means that the AnnData \code{X} slot will be \code{NULL} (empty). If you want to copy data to the \code{X} slot, +you must define the layer mapping explicitly. +} +} + +\section{\verb{$obsm} mapping}{ + + +A named list to map Seurat reductions to AnnData \verb{$obsm}. + +Each item in the list must be a vector of length 2, +where the name corresponds to the name of the resulting \verb{$obsm} slot, and the values correspond to the +the location of the data in the Seurat object. + +Example: \code{obsm_mapping = list(pca = c("reductions", "pca"), umap = c("reductions", "umap"))}. + +If \code{NULL}, the internal function \code{from_Seurat_guess_obsms} will be used to guess the obsm mapping as follows: +\itemize{ +\item All Seurat reductions are prefixed with \code{X_} and copied to AnnData \verb{$obsm}. +} +} + +\section{\verb{$varm} mapping}{ + + +A named list to map Seurat reduction loadings to AnnData \verb{$varm}. + +Each item in the list must be a character vector of length 2, where the name corresponds to the name of the +resulting \verb{$varm} slot, and the value corresponds to the location of the data in the Seurat object. + +Example: \verb{varm_mapping = list(PCs = c("reductions", "pca")}. + +If \code{NULL}, the internal function \code{from_Seurat_guess_varms} will be used to guess the varm mapping as follows: +\itemize{ +\item The name of the PCA loadings is copied by name. +} +} + +\section{\verb{$obsp} mapping}{ + + +A named list to map Seurat graphs to AnnData \verb{$obsp}. + +Example: \code{obsp_mapping = list(nn = "connectivities")}. + +If \code{NULL}, the internal function \code{from_Seurat_guess_obsps} will be used to guess the obsp mapping as follows: +\itemize{ +\item All Seurat graphs are copied to \verb{$obsp} by name. +} +} + +\section{\verb{$varp} mapping}{ + + +A named list to map Seurat miscellaneous data to AnnData \verb{$varp}. The name of each item corresponds to the +resulting \verb{$varp} slot, while the value of each item must be a fector which corresponds to the location of the data +in the Seurat object. + +Example: \code{varp_mapping = list(foo = c("misc", "foo"))}. + +If \code{NULL}, the internal function \code{from_Seurat_guess_varps} will be used to guess the varp mapping as follows: +\itemize{ +\item No data is mapped to \verb{$varp}. +} +} + +\section{\verb{$uns} mapping}{ + + +A named list to map Seurat miscellaneous data to AnnData \code{uns}. Each item in the list must be a character of +length 2. The first element must be \code{"misc"}. The second element is the name of the data in the corresponding slot. + +Example: \code{uns_mapping = list(foo = c("misc", "foo"))}. + +If \code{NULL}, the internal function \code{from_Seurat_guess_uns} will be used to guess the uns mapping as follows: +\itemize{ +\item All Seurat miscellaneous data is copied to \code{uns} by name. +} +} + +\examples{ +library(Seurat) + +counts <- matrix(rbinom(20000, 1000, .001), nrow = 100) +obj <- CreateSeuratObject(counts = counts) +obj <- NormalizeData(obj) +obj <- FindVariableFeatures(obj) +obj <- ScaleData(obj) +obj <- RunPCA(obj, npcs = 10L) +obj <- FindNeighbors(obj) +obj <- RunUMAP(obj, dims = 1:10) +from_Seurat(obj) } diff --git a/man/to_Seurat.Rd b/man/to_Seurat.Rd new file mode 100644 index 00000000..f8d1eccf --- /dev/null +++ b/man/to_Seurat.Rd @@ -0,0 +1,108 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Seurat.R +\name{to_Seurat} +\alias{to_Seurat} +\title{Convert a Seurat object to an AnnData object} +\usage{ +to_Seurat( + adata, + assay_name = "RNA", + layers_mapping = NULL, + reduction_mapping = NULL, + graph_mapping = NULL, + misc_mapping = NULL +) +} +\arguments{ +\item{adata}{An AnnData object to be converted} + +\item{assay_name}{Name of the assay to be created (default: "RNA").} + +\item{layers_mapping}{A named list to map AnnData layers to Seurat layers. See section "Layer mapping" for more +details.} + +\item{reduction_mapping}{A named list to map AnnData reductions to Seurat reductions. Each item in the list must be a +named list with keys 'key', 'obsm', and 'varm'. See section "Reduction mapping" for more details.} + +\item{graph_mapping}{A named list to map AnnData graphs to Seurat graphs. Each item in the list must be a character +vector of length 1. See section "Graph mapping" for more details.} + +\item{misc_mapping}{A named list to map miscellaneous data to the names of the data in the Seurat object. See section +"Miscellaneous mapping" for more details.} +} +\value{ +A Seurat object +} +\description{ +\code{to_Seurat()} converts an AnnData object to a Seurat object. Only one assay can be converted at a time. +Arguments are used to configure the conversion. If \code{NULL}, the functions \verb{to_Seurat_guess_*} will be used to guess +the mapping. +} +\section{Layer mapping}{ + + +A named list to map AnnData layers to Seurat layers. Each item in the list must be a character vector of length 1, +where the values correspond to the names of the layers in the AnnData object, and the names correspond +to the names of the layers in the resulting Seurat object. A value of \code{NULL} corresponds to the AnnData \code{X} slot. + +Example: \code{layers_mapping = list(counts = "counts", data = NULL, foo = "bar")}. + +If \code{NULL}, the internal function \code{to_Seurat_guess_layers} will be used to guess the layer mapping as follows: +\itemize{ +\item All AnnData layers are copied to Seurat layers by name. +} +} + +\section{Reduction mapping}{ + + +A named list to map AnnData \verb{$obsm} and \verb{$varm} to Seurat reductions. Each item in the list must be a named list +with keys \code{'key'}, \code{'obsm'}, and \code{'varm'}. +Example: \code{reduction_mapping = list(pca = list(key = "PC_", obsm = "X_pca", varm = "PCs"))}. + +If \code{NULL}, the internal function \code{to_Seurat_guess_reductions} will be used to guess the reduction mapping as follows: +\itemize{ +\item All \verb{$obsm} items starting with \code{X_} are copied by name. +} +} + +\section{Graph mapping}{ + + +A named list mapping graph names to the names of the graphs in the AnnData object. Each item in the list must be a +character vector of length 1. The values correspond to the names of the graphs in the resulting Seurat object, while +the names correspond to the names of the graphs in the AnnData object. + +Example: \code{graph_mapping = list(nn = "connectivities")}. + +If \code{NULL}, the internal function \code{to_Seurat_guess_graphs} will be used to guess the graph mapping as follows: +\itemize{ +\item An obsp named \code{connectivities} will be mapped to \code{nn}. +\item Other graphs starting with \code{connectivities_} are stripped of the prefix and copied by name. +} +} + +\section{Miscellaneous mapping}{ + + +A named list mapping miscellaneous data to the names of the data in the AnnData object. Each item in the list must be +a vector with one or two elements. The first element must be one of: 'X', 'layers', 'obs', 'obsm', 'obsp', 'var', +'varm', 'varp', 'uns'. The second element is the name of the data in the corresponding slot. If the second element is +not present, the whole slot as specified by the first element will be used. + +Example: \code{misc_mapping = list(uns = "uns", varp_neighbors = c("varp", "neighbors"))}. + +If \code{NULL}, the internal function \code{to_Seurat_guess_misc} will be used to guess the miscellaneous mapping as follows: +\itemize{ +\item If \verb{$uns} is defined, all values in \verb{$uns} are copied to the Seurat misc. +} +} + +\examples{ +ad <- AnnData( + X = matrix(1:5, 3L, 5L), + obs = data.frame(row.names = LETTERS[1:3], cell = 1:3), + var = data.frame(row.names = letters[1:5], gene = 1:5) +) +to_Seurat(ad) +} diff --git a/tests/testthat/test-Seurat.R b/tests/testthat/test-Seurat.R index 0ba8006f..1e47cd98 100644 --- a/tests/testthat/test-Seurat.R +++ b/tests/testthat/test-Seurat.R @@ -8,6 +8,7 @@ test_that("to_Seurat with inmemoryanndata", { ) # running to_seurat when ad0$X is null probably doesn't make any sense ad0 <- AnnData( + X = matrix(1L, nrow = 5, ncol = 10), obs = data.frame(row.names = letters[1:5]), var = data.frame(row.names = LETTERS[1:10]) ) @@ -40,14 +41,44 @@ test_that("to_Seurat with inmemoryanndata", { # trackstatus: class=Seurat, feature=test_get_var, status=done active_assay <- seu@assays[[seu@active.assay]] for (var_key in colnames(dummy$var)) { - expect_true(var_key %in% colnames(active_assay@meta.features)) - expect_equal(active_assay@meta.features[[var_key]], dummy$var[[var_key]]) + expect_true(var_key %in% colnames(active_assay@meta.data)) + expect_equal(active_assay@meta.data[[var_key]], dummy$var[[var_key]]) } }) test_that("to_Seurat() fails gracefully", { - expect_error(to_Seurat(), regexp = "obj.*is missing") + expect_error(to_Seurat(), regexp = "adata.*is missing") expect_error(to_Seurat("foo"), regexp = "AbstractAnnData.*not TRUE") }) -# TODO: test from_Seurat +test_that("from_Seurat() works", { + skip_if_not_installed("Seurat") + + library(Seurat) + + suppressWarnings({ + counts <- matrix(rbinom(20000, 1000, .001), nrow = 100) + obj <- CreateSeuratObject(counts = counts) + obj <- NormalizeData(obj) + obj <- FindVariableFeatures(obj) + obj <- ScaleData(obj) + obj <- RunPCA(obj, npcs = 10L) + obj <- FindNeighbors(obj) + obj <- RunUMAP(obj, dims = 1:10) + }) + + ad <- from_Seurat(obj) + + # trackstatus: class=AnnData, feature=test_set_X, status=done + # trackstatus: class=AnnData, feature=test_set_layers, status=done + expect_equal(ad$n_obs(), 200L) + expect_equal(ad$n_vars(), 100L) + + expect_equal(ad$layers_keys(), c("counts", "data", "scale.data")) + + # trackstatus: class=AnnData, feature=test_set_obsm, status=done + expect_equal(ad$obsm_keys(), c("X_pca", "X_umap")) + + # trackstatus: class=AnnData, feature=test_set_varm, status=done + expect_equal(ad$varm_keys(), "PCs") +})