diff --git a/R/generate_dataframe.R b/R/generate_dataframe.R new file mode 100644 index 00000000..f3b20d88 --- /dev/null +++ b/R/generate_dataframe.R @@ -0,0 +1,17 @@ +#' Generate a dataframe +#' +#' Generate a dataframe with different types of columns +#' +#' @param num_rows Number of rows to generate +#' @param types Types of columns to generate +#' +#' @return A dataframe with the generated columns +#' +#' @examples +#' generate_dataframe(10L) +generate_dataframe <- function(num_rows, types = names(vector_generators)) { + types <- match.arg(types) + data <- lapply(types, generate_vector, n = num_rows) + names(data) <- types + as.data.frame(data) +} \ No newline at end of file diff --git a/R/generate_dataset.R b/R/generate_dataset.R new file mode 100644 index 00000000..112f0efa --- /dev/null +++ b/R/generate_dataset.R @@ -0,0 +1,214 @@ +#' Generate a dataset +#' +#' Generate a dataset with different types of columns and layers +#' +#' @param n_obs Number of observations to generate +#' @param n_vars Number of variables to generate +#' @param format Object type to output, one of "list", "SingleCellExperiment", +#' or "Seurat" +#' @param ... Arguments passed to generate_dataset_as_list +#' +#' @return Object containing the generated dataset as defined by `output` +#' +#' @noRd +#' +#' @examples +#' dummy <- generate_dataset() +generate_dataset <- function( + n_obs = 10L, + n_vars = 20L, + format = c("list", "SingleCellExperiment", "Seurat"), + ... +) { + format <- match.arg(format) + + fun <- switch(format, + "list" = generate_dataset_as_list, + "SingleCellExperiment" = generate_dataset_as_sce, + "Seurat" = generate_dataset_as_seurat + ) + + fun(n_obs = n_obs, n_vars = n_vars, ...) +} + +#' Dummy data list +#' +#' Generate a dummy dataset as a list +#' +#' @param n_obs Number of observations to generate +#' @param n_vars Number of variables to generate +#' +#' @return A list with the generated dataset +#' +#' @noRd +generate_dataset_as_list <- function( + n_obs = 10L, + n_vars = 20L, + x_type = names(matrix_generators)[[1]], + layer_types = names(matrix_generators), + obs_types = names(vector_generators), + var_types = names(vector_generators), + obsm_types = c(names(matrix_generators), names(vector_generators)), + varm_types = c(names(matrix_generators), names(vector_generators)), + obsp_types = names(matrix_generators), + varp_types = names(matrix_generators) +) { + # generate X + X <- generate_matrix(n_obs, n_vars, x_type) + + # generate layers + layers <- lapply(layer_types, generate_matrix, n_obs = n_obs, n_vars = n_vars) + names(layers) <- layer_types + + # generate obs + obs <- generate_dataframe(n_obs, obs_types) + + # generate var + var <- generate_dataframe(n_vars, var_types) + + # generate obs_names + obs_names <- paste0("cell", seq_len(n_obs)) + + # generate var_names + var_names <- paste0("gene", seq_len(n_vars)) + + # generate obsm + obsm <- lapply(obsm_types, function(obsm_type) { + if (obsm_type %in% names(vector_generators)) { + generate_dataframe(n_obs, obsm_type) + } else { + generate_matrix(n_obs, n_vars = 10L, obsm_type) + } + }) + names(obsm) <- obsm_types + + # generate varm + varm <- lapply(varm_types, function(varm_type) { + if (varm_type %in% names(vector_generators)) { + generate_dataframe(n_vars, varm_type) + } else { + generate_matrix(n_vars, n_obs = 10L, varm_type) + } + }) + names(varm) <- varm_types + + # generate obsp + obsp <- lapply(obsp_types, generate_matrix, n_obs = n_obs, n_vars = n_obs) + names(obsp) <- obsp_types + + # generate varp + varp <- lapply(varp_types, generate_matrix, n_obs = n_vars, n_vars = n_vars) + names(varp) <- varp_types + + # generate uns by combining other classes + uns <- list( + integer = 1L, + numeric = 1, + character = "a", + factor = factor("a"), + logical = TRUE, + integer_na = NA_integer_, + numeric_na = NA_real_, + character_na = NA_character_, + factor_na = NA_character_, + logical_na = NA, + list = list(1L, 1, "a", factor("a"), TRUE) + ) + vectors_for_uns <- lapply(names(vector_generators), generate_vector, n = 10L) + names(vectors_for_uns) <- paste0("vec_", names(vector_generators)) + obsm_for_uns <- obsm + names(obsm_for_uns) <- paste0("obsm_", names(obsm_for_uns)) + + uns <- c( + uns, + vectors_for_uns, + obsm_for_uns + ) + + list( + X = X, + obs = obs, + obs_names = obs_names, + obsm = obsm, + obsp = obsp, + var = var, + var_names = var_names, + varm = varm, + varp = varp, + layers = layers, + uns = uns + ) +} + +#' Dummy SingleCellExperiment +#' +#' Generate a dummy dataset as a SingleCellExperiment object +#' +#' @param ... Parameters passed to `generate_dataset_as_list` +#' +#' @return SingleCellExperiment containing the generated data +#' +#' @noRd +generate_dataset_as_sce <- function(...) { + if (!requireNamespace("SingleCellExperiment", quietly = TRUE)) { + stop( + "Creating a SingleCellExperiment requires the 'SingleCellExperiment'", + "package to be installed" + ) + } + + dummy <- generate_dataset_as_list(...) + + assays_list <- c( + list(X = dummy$X), + dummy$layers + ) + assays_list <- lapply(assays_list, Matrix::t) + + sce <- SingleCellExperiment::SingleCellExperiment( + assays = assays_list, + rowData = dummy$var, + colData = dummy$obs + ) + colnames(sce) <- dummy$obs_names + rownames(sce) <- dummy$var_names + + return(sce) +} + +#' Dummy Seurat +#' +#' Generate a dummy dataset as a Seurat object +#' +#' @param ... Parameters passed to `generate_dataset_as_list` +#' +#' @return Seurat containing the generated data +generate_dataset_as_seurat <- function(...) { + if (!requireNamespace("SeuratObject", quietly = TRUE)) { + stop( + "Creating a Seurat requires the 'SeuratObject' package to be installed" + ) + } + + dummy <- generate_dataset_as_list(...) + + X <- t(dummy$layers[["integer_csparse"]]) + colnames(X) <- dummy$obs_names + rownames(X) <- dummy$var_names + + seurat <- SeuratObject::CreateSeuratObject(X) + + X2 <- Matrix::t(dummy$layers[["numeric_csparse"]]) + colnames(X2) <- dummy$obs_names + rownames(X2) <- dummy$var_names + seurat <- SeuratObject::SetAssayData(seurat, "data", X2) + + X3 <- Matrix::t(dummy$layers[["numeric_matrix"]]) + colnames(X3) <- dummy$obs_names + rownames(X3) <- dummy$var_names + seurat <- SeuratObject::SetAssayData(seurat, "scale.data", X3) + + seurat <- SeuratObject::AddMetaData(seurat, dummy$obs) + + return(seurat) +} diff --git a/R/generate_matrix.R b/R/generate_matrix.R new file mode 100644 index 00000000..6afdff11 --- /dev/null +++ b/R/generate_matrix.R @@ -0,0 +1,90 @@ +matrix_generators <- list( + numeric_matrix = function(n_obs, n_vars) { + matrix(runif(n_obs * n_vars), nrow = n_obs, ncol = n_vars) + }, + numeric_dense = function(n_obs, n_vars) { + m <- matrix(runif(n_obs * n_vars), nrow = n_obs, ncol = n_vars) + as(m, "denseMatrix") + }, + numeric_csparse = function(n_obs, n_vars) { + m <- Matrix::rsparsematrix(nrow = n_obs, ncol = n_vars, density = .1) + as(m, "CsparseMatrix") + }, + numeric_rsparse = function(n_obs, n_vars) { + m <- Matrix::rsparsematrix(nrow = n_obs, ncol = n_vars, density = .1) + as(m, "RsparseMatrix") + }, + numeric_matrix_with_nas = function(n_obs, n_vars) { + m <- matrix(runif(n_obs * n_vars), nrow = n_obs, ncol = n_vars) + m[sample.int(n_obs * n_vars, n_obs * n_vars * .1)] <- NA_real_ + m + }, + numeric_dense_with_nas = function(n_obs, n_vars) { + m <- matrix(runif(n_obs * n_vars), nrow = n_obs, ncol = n_vars) + m[sample.int(n_obs * n_vars, n_obs * n_vars * .1)] <- NA_real_ + as(m, "denseMatrix") + }, + numeric_csparse_with_nas = function(n_obs, n_vars) { + m <- Matrix::rsparsematrix(nrow = n_obs, ncol = n_vars, density = .1) + m[sample.int(n_obs * n_vars, n_obs * n_vars * .1)] <- NA_real_ + as(m, "CsparseMatrix") + }, + numeric_rsparse_with_nas = function(n_obs, n_vars) { + m <- Matrix::rsparsematrix(nrow = n_obs, ncol = n_vars, density = .1) + m[sample.int(n_obs * n_vars, n_obs * n_vars * .1)] <- NA_real_ + as(m, "RsparseMatrix") + }, + integer_matrix = function(n_obs, n_vars) { + matrix(sample.int(100L, n_obs * n_vars, replace = TRUE), nrow = n_obs, ncol = n_vars) + }, + integer_dense = function(n_obs, n_vars) { + m <- matrix(sample.int(100L, n_obs * n_vars, replace = TRUE), nrow = n_obs, ncol = n_vars) + as(m, "denseMatrix") + }, + integer_csparse = function(n_obs, n_vars) { + m <- Matrix::rsparsematrix(nrow = n_obs, ncol = n_vars, density = .1) + as(m, "CsparseMatrix") + }, + integer_rsparse = function(n_obs, n_vars) { + m <- Matrix::rsparsematrix(nrow = n_obs, ncol = n_vars, density = .1) + as(m, "RsparseMatrix") + }, + integer_matrix_with_nas = function(n_obs, n_vars) { + m <- matrix(sample.int(100L, n_obs * n_vars, replace = TRUE), nrow = n_obs, ncol = n_vars) + m[sample.int(n_obs * n_vars, n_obs * n_vars * .1)] <- NA_integer_ + m + }, + integer_dense_with_nas = function(n_obs, n_vars) { + m <- matrix(sample.int(100L, n_obs * n_vars, replace = TRUE), nrow = n_obs, ncol = n_vars) + m[sample.int(n_obs * n_vars, n_obs * n_vars * .1)] <- NA_integer_ + as(m, "denseMatrix") + }, + integer_csparse_with_nas = function(n_obs, n_vars) { + m <- Matrix::rsparsematrix(nrow = n_obs, ncol = n_vars, density = .1) + m[sample.int(n_obs * n_vars, n_obs * n_vars * .1)] <- NA_integer_ + as(m, "CsparseMatrix") + }, + integer_rsparse_with_nas = function(n_obs, n_vars) { + m <- Matrix::rsparsematrix(nrow = n_obs, ncol = n_vars, density = .1) + m[sample.int(n_obs * n_vars, n_obs * n_vars * .1)] <- NA_integer_ + as(m, "RsparseMatrix") + } +) + +#' Generate a matrix +#' +#' Generate a matrix of a given type +#' +#' @param n_obs Number of observations to generate +#' @param n_vars Number of variables to generate +#' +#' @return A matrix of the given type +#' +#' @noRd +#' +#' @examples +#' generate_matrix(10L, 20L) +generate_matrix <- function(n_obs, n_vars, type = names(matrix_generators)) { + type <- match.arg(type) + matrix_generators[[type]](n_obs, n_vars) +} \ No newline at end of file diff --git a/R/generate_vector.R b/R/generate_vector.R new file mode 100644 index 00000000..b614935e --- /dev/null +++ b/R/generate_vector.R @@ -0,0 +1,56 @@ +vector_generators <- list( + character = function(n) paste0("value", seq_len(n)), + integer = function(n) seq_len(n), + factor = function(n) factor(paste0("value", seq_len(n))), + factor_ordered = function(n) factor(paste0("value", seq_len(n)), ordered = TRUE), + logical = function(n) sample(c(TRUE, FALSE), n, replace = TRUE), + numeric = function(n) runif(n), + character_with_nas = function(n, pct = .5) { + x <- paste0("value", seq_len(n)) + x[sample.int(n, pct * n)] <- NA_character_ + x + }, + integer_with_nas = function(n, pct = .5) { + x <- seq_len(n) + x[sample.int(n, pct * n)] <- NA_integer_ + x + }, + factor_with_nas = function(n, pct = .5) { + x <- factor(paste0("value", seq_len(n))) + x[sample.int(n, pct * n)] <- NA_character_ + x + }, + factor_ordered_with_nas = function(n, pct = .5) { + x <- factor(paste0("value", seq_len(n)), ordered = TRUE) + x[sample.int(n, pct * n)] <- NA_character_ + x + }, + logical_with_nas = function(n, pct = .5) { + x <- sample(c(TRUE, FALSE), n, replace = TRUE) + x[sample.int(n, pct * n)] <- NA + x + }, + numeric_with_nas = function(n, pct = .5) { + x <- runif(n) + x[sample.int(n, pct * n)] <- NA_real_ + x + } +) + +#' Generate a vector +#' +#' Generate a vector of a given type +#' +#' @param n Number of elements to generate +#' @param type Type of vector to generate +#' +#' @return A vector of the given type +#' +#' @noRd +#' +#' @examples +#' generate_vector(10L) +generate_vector <- function(n, type = names(vector_generators)) { + type <- match.arg(type) + vector_generators[[type]](n) +} \ No newline at end of file diff --git a/tests/testthat/helper-dummy_data.R b/tests/testthat/helper-dummy_data.R deleted file mode 100644 index 5d740e19..00000000 --- a/tests/testthat/helper-dummy_data.R +++ /dev/null @@ -1,180 +0,0 @@ -#' Dummy data -#' -#' Generate a dummy dataset -#' -#' @param n_obs Number of observations to generate -#' @param n_vars Number of variables to generate -#' @param output Object type to output, one of "list", "SingleCellExperiment", -#' or "Seurat" -#' -#' @return Object containing the generated dataset as defined by `output` -#' -#' @examples -#' dummy <- dummy_data() -dummy_data <- function( - n_obs = 10L, - n_vars = 20L, - output = c( - "list", "SingleCellExperiment", "Seurat" - )) { - output <- match.arg(output) - - switch(output, - "list" = dummy_list(n_obs = n_obs, n_vars = n_vars), - "SingleCellExperiment" = dummy_SingleCellExperiment( - n_obs = n_obs, n_vars = n_vars - ), - "Seurat" = dummy_Seurat(n_obs = n_obs, n_vars = n_vars) - ) -} - -#' Dummy data list -#' -#' Generate a dummy dataset as a list -#' -#' @param n_obs Number of observations to generate -#' @param n_vars Number of variables to generate -#' -#' @return A list with the generated dataset -dummy_list <- function(n_obs = 10L, n_vars = 20L) { - # generate X - X <- Matrix::rsparsematrix(nrow = n_obs, ncol = n_vars, density = .1) - - # generate layers in dense, sparse, row compressed and column compressed - layers <- list( - dense = as.matrix(X), - CsparseMatrix = as(X, "CsparseMatrix"), - RsparseMatrix = as(X, "RsparseMatrix") - ) - - # generate obs with different types (character, integer, factors, etc.) - obs <- data.frame( - character = paste0("cell", seq_len(n_obs)), - integer = seq_len(n_obs), - factor = factor(paste0("cell", seq_len(n_obs))), - factor_ordered = factor(paste0("cell", seq_len(n_obs)), ordered = TRUE), - logical = sample(c(TRUE, FALSE), n_obs, replace = TRUE), - numeric = runif(n_obs), - character_with_nas = c(paste0("cell", seq_len(n_obs - 1)), NA), - integer_with_nas = c(seq_len(n_obs - 1), NA), - factor_with_nas = c( - factor(paste0("cell", seq_len(n_obs - 1))), - NA_character_ - ), - factor_ordered_with_nas = c( - factor(paste0("cell", seq_len(n_obs - 1)), ordered = TRUE), - NA_character_ - ), - logical_with_nas = c(sample(c(TRUE, FALSE), n_obs - 1, replace = TRUE), NA), - numeric_with_nas = c(runif(n_obs - 1), NA) - ) - - # generate var with different types (character, integer, factors, etc.) - var <- data.frame( - character = paste0("gene", seq_len(n_vars)), - integer = seq_len(n_vars), - factor = factor(paste0("gene", seq_len(n_vars))), - factor_ordered = factor(paste0("gene", seq_len(n_vars)), ordered = TRUE), - logical = sample(c(TRUE, FALSE), n_vars, replace = TRUE), - numeric = runif(n_vars), - character_with_nas = c(paste0("gene", seq_len(n_vars - 1)), NA), - integer_with_nas = c(seq_len(n_vars - 1), NA), - factor_with_nas = c( - factor(paste0("gene", seq_len(n_vars - 1))), - NA_character_ - ), - factor_ordered_with_nas = c( - factor(paste0("gene", seq_len(n_vars - 1)), ordered = TRUE), - NA_character_ - ), - logical_with_nas = c(sample(c(TRUE, FALSE), n_vars - 1, replace = TRUE), NA), - numeric_with_nas = c(runif(n_vars - 1), NA) - ) - - # generate obs_names - obs_names <- paste0("cell", seq_len(n_obs)) - - # generate var_names - var_names <- paste0("gene", seq_len(n_vars)) - - list( - X = X, - obs = obs, - obs_names = obs_names, - var = var, - var_names = var_names, - layers = layers - ) -} - -#' Dummy SingleCellExperiment -#' -#' Generate a dummy dataset as a SingleCellExperiment object -#' -#' @param ... Parameters passed to `dummy_list` -#' -#' @return SingleCellExperiment containing the generated data -dummy_SingleCellExperiment <- function(...) { # nolint - if (!requireNamespace("SingleCellExperiment", quietly = TRUE)) { - stop( - "Creating a SingleCellExperiment requires the 'SingleCellExperiment'", - "package to be installed" - ) - } - - dummy <- dummy_data(...) - - assays_list <- c( - list(X = dummy$X), - dummy$layers - ) - assays_list <- lapply(assays_list, t) - - sce <- SingleCellExperiment::SingleCellExperiment( - assays = assays_list, - rowData = dummy$var, - colData = dummy$obs - ) - colnames(sce) <- dummy$obs_names - rownames(sce) <- dummy$var_names - - return(sce) -} - -#' Dummy Seurat -#' -#' Generate a dummy dataset as a Seurat object -#' -#' @param ... Parameters passed to `dummy_list` -#' -#' @return Seurat containing the generated data -dummy_Seurat <- function(...) { # nolint - if (!requireNamespace("SeuratObject", quietly = TRUE)) { - stop( - "Creating a Seurat requires the 'SeuratObject' package to be installed" - ) - } - - dummy <- dummy_data(...) - - X <- t(dummy$layers[["dense"]]) - colnames(X) <- dummy$obs_names - rownames(X) <- dummy$var_names - - seurat <- SeuratObject::CreateSeuratObject(X) - - X2 <- as(t(dummy$layers[["CsparseMatrix"]]), "CsparseMatrix") - colnames(X2) <- dummy$obs_names - rownames(X2) <- dummy$var_names - seurat <- SeuratObject::SetAssayData(seurat, "data", X2) - - # seurat doesn't support RsparseMatrices - X3 <- as.matrix(t(dummy$layers[["RsparseMatrix"]])) - colnames(X3) <- dummy$obs_names - rownames(X3) <- dummy$var_names - seurat <- SeuratObject::SetAssayData(seurat, "scale.data", X3) - - seurat <- SeuratObject::AddMetaData(seurat, dummy$obs) - - return(seurat) -} diff --git a/tests/testthat/test-HDF5-reticulate-roundtrip.R b/tests/testthat/test-HDF5-reticulate-roundtrip.R index 7551aa26..d699b5fb 100644 --- a/tests/testthat/test-HDF5-reticulate-roundtrip.R +++ b/tests/testthat/test-HDF5-reticulate-roundtrip.R @@ -2,7 +2,7 @@ skip_if_no_anndata() skip_if_not_installed("rhdf5") # construct dummy objects -dummy <- dummy_data(10L, 20L) +dummy <- generate_dataset(10L, 20L) test_that("test Python -> R", { # create anndata in python diff --git a/tests/testthat/test-HDF5-write.R b/tests/testthat/test-HDF5-write.R index 0bb8e566..1c506e2e 100644 --- a/tests/testthat/test-HDF5-write.R +++ b/tests/testthat/test-HDF5-write.R @@ -162,7 +162,7 @@ test_that("writing H5AD from SingleCellExperiment works", { file <- withr::local_file("SingleCellExperiment.h5ad") - sce <- dummy_data(output = "SingleCellExperiment") + sce <- generate_dataset(format = "SingleCellExperiment") write_h5ad(sce, file) expect_true(file.exists(file)) }) @@ -172,7 +172,7 @@ test_that("writing H5AD from Seurat works", { file <- withr::local_file("Seurat.h5ad") - seurat <- dummy_data(output = "Seurat") + seurat <- generate_dataset(format = "Seurat") write_h5ad(seurat, file) expect_true(file.exists(file)) }) diff --git a/tests/testthat/test-InMemoryAnnData.R b/tests/testthat/test-InMemoryAnnData.R index 75dc48ff..2a6d9592 100644 --- a/tests/testthat/test-InMemoryAnnData.R +++ b/tests/testthat/test-InMemoryAnnData.R @@ -1,4 +1,4 @@ -dummy <- dummy_data(10L, 20L) +dummy <- generate_dataset(10L, 20L) # GETTERS ---------------------------------------------------------------- test_that("create inmemory anndata", { diff --git a/tests/testthat/test-Seurat.R b/tests/testthat/test-Seurat.R index d133ec5c..4e9630de 100644 --- a/tests/testthat/test-Seurat.R +++ b/tests/testthat/test-Seurat.R @@ -1,4 +1,4 @@ -dummy <- dummy_data(10L, 20L) +dummy <- generate_dataset(10L, 20L) test_that("to_Seurat with inmemoryanndata", { ad <- AnnData( diff --git a/tests/testthat/test-dummy_data.R b/tests/testthat/test-dummy_data.R deleted file mode 100644 index f1c3261f..00000000 --- a/tests/testthat/test-dummy_data.R +++ /dev/null @@ -1,21 +0,0 @@ -test_that("generating dummy data works", { - dummy <- dummy_data() - expect_type(dummy, "list") - expect_identical( - names(dummy), - c("X", "obs", "obs_names", "var", "var_names", "layers") - ) - expect_identical(dim(dummy$X), c(10L, 20L)) -}) - -test_that("generating dummy SingleCellExperiment works", { - dummy <- dummy_data(output = "SingleCellExperiment") - expect_s4_class(dummy, "SingleCellExperiment") -}) - -suppressPackageStartupMessages(library(SeuratObject)) - -test_that("generating dummy Seurat works", { - dummy <- dummy_data(output = "Seurat") - expect_s4_class(dummy, "Seurat") -}) diff --git a/tests/testthat/test-generate_dataset.R b/tests/testthat/test-generate_dataset.R new file mode 100644 index 00000000..cc69eb9c --- /dev/null +++ b/tests/testthat/test-generate_dataset.R @@ -0,0 +1,21 @@ +test_that("generating dummy data works", { + dataset <- generate_dataset() + expect_type(dataset, "list") + expect_setequal( + names(dataset), + c("X", "obs", "obsp", "obsm", "obs_names", "var", "varp", "varm", "var_names", "layers", "uns") + ) + expect_identical(dim(dataset$X), c(10L, 20L)) +}) + +test_that("generating dummy SingleCellExperiment works", { + dummy <- generate_dataset(format = "SingleCellExperiment") + expect_s4_class(dummy, "SingleCellExperiment") +}) + +suppressPackageStartupMessages(library(SeuratObject)) + +test_that("generating dummy Seurat works", { + dummy <- generate_dataset(format = "Seurat") + expect_s4_class(dummy, "Seurat") +})