Skip to content

Commit

Permalink
Extend roundtrip tests (#107)
Browse files Browse the repository at this point in the history
* add more types to dummy data

* refactor dataset generators

* apply styler

* fix issues in generation functions

* split up reticulate roundtrip test

* run styler

* skip commented code linting

* incorporate regular roundtrip tests

* add parameter to stop on error or not

* refactor tests

* improve tests

* add has_row_names function

* use inherits instead of is

* don't generate an rd for generate_* functions

* add workaround by manually converting densematrix to matrix

* don't remove dimnames in validate_aligned_array

* fix test after merge

* disable tests with NAs for now

* set default `include_index` to false

* disable matrices with NAs for now

* dont set dimnames on NULL

* fix paste

* format

* fix bugs in tests, and disable tests that are failing

* re-enable commented code in generator, filter in tests

* disable some tests due to warnings

* fix linting issues

* use inherits instead of is
  • Loading branch information
rcannood authored Dec 2, 2023
1 parent cd0dfff commit f9e8a9a
Show file tree
Hide file tree
Showing 21 changed files with 1,163 additions and 357 deletions.
15 changes: 14 additions & 1 deletion R/SingleCellExperiment.R
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,20 @@ from_SingleCellExperiment <- function(sce, output_class = c("InMemory", "HDF5Ann
# trackstatus: class=SingleCellExperiment, feature=set_layers, status=done
x_and_layers <- lapply(
SummarizedExperiment::assays(sce, withDimnames = FALSE),
t
function(mat) {
m <- t(mat)
# nolint start
# WORKAROUND: convert denseMatrix to matrix, because otherwise:
# - Could not write element '/layers/integer_dense' of type 'dgeMatrix':
# no applicable method for 'h5writeDataset' applied to an object of class "c('dgeMatrix', 'unpackedMatrix', 'ddenseMatrix', 'generalMatrix', 'dMatrix', 'denseMatrix', 'compMa
# - Could not write element '/layers/integer_dense_with_nas' of type 'dgeMatrix':
# no applicable method for 'h5writeDataset' applied to an object of class "c('dgeMatrix', 'unpackedMatrix', 'ddenseMatrix', 'generalMatrix', 'dMatrix', 'denseMatrix', 'compMatrix', 'Matrix', 'replValueSp')"
# nolint end
if (inherits(m, "denseMatrix")) {
m <- as.matrix(m)
}
m
}
)
if (length(x_and_layers) == 0L) {
x <- NULL
Expand Down
18 changes: 18 additions & 0 deletions R/generate_dataframe.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#' Generate a dataframe
#'
#' Generate a dataframe with different types of columns
#'
#' @param num_rows Number of rows to generate
#' @param types Types of columns to generate
#'
#' @return A dataframe with the generated columns
#'
#' @noRd
#'
#' @examples
#' generate_dataframe(10L)
generate_dataframe <- function(num_rows, types = names(vector_generators)) {
data <- lapply(types, generate_vector, n = num_rows)
names(data) <- types
as.data.frame(data)
}
214 changes: 214 additions & 0 deletions R/generate_dataset.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
#' Generate a dataset
#'
#' Generate a dataset with different types of columns and layers
#'
#' @param n_obs Number of observations to generate
#' @param n_vars Number of variables to generate
#' @param format Object type to output, one of "list", "SingleCellExperiment",
#' or "Seurat"
#' @param ... Arguments passed to generate_dataset_as_list
#'
#' @return Object containing the generated dataset as defined by `output`
#'
#' @noRd
#'
#' @examples
#' dummy <- generate_dataset()
generate_dataset <- function(
n_obs = 10L,
n_vars = 20L,
format = c("list", "SingleCellExperiment", "Seurat"),
...) {
format <- match.arg(format)

fun <- switch(format,
"list" = generate_dataset_as_list,
"SingleCellExperiment" = generate_dataset_as_sce,
"Seurat" = generate_dataset_as_seurat
)

fun(n_obs = n_obs, n_vars = n_vars, ...)
}

#' Dummy data list
#'
#' Generate a dummy dataset as a list
#'
#' @param n_obs Number of observations to generate
#' @param n_vars Number of variables to generate
#'
#' @return A list with the generated dataset
#'
#' @noRd
generate_dataset_as_list <- function(
n_obs = 10L,
n_vars = 20L,
x_type = names(matrix_generators)[[1]],
layer_types = names(matrix_generators),
obs_types = names(vector_generators),
var_types = names(vector_generators),
obsm_types = c(names(matrix_generators), names(vector_generators)),
varm_types = c(names(matrix_generators), names(vector_generators)),
obsp_types = names(matrix_generators),
varp_types = names(matrix_generators)) {
# generate X
X <- generate_matrix(n_obs, n_vars, x_type)

# generate layers
layers <- lapply(layer_types, generate_matrix, n_obs = n_obs, n_vars = n_vars)
names(layers) <- layer_types

# generate obs
obs <- generate_dataframe(n_obs, obs_types)

# generate var
var <- generate_dataframe(n_vars, var_types)

# generate obs_names
obs_names <- paste0("cell", seq_len(n_obs))

# generate var_names
var_names <- paste0("gene", seq_len(n_vars))

# generate obsm
obsm <- lapply(obsm_types, function(obsm_type) {
if (obsm_type %in% names(vector_generators)) {
generate_dataframe(n_obs, obsm_type)
} else {
generate_matrix(n_obs, n_vars = 10L, obsm_type)
}
})
names(obsm) <- obsm_types

# generate varm
varm <- lapply(varm_types, function(varm_type) {
if (varm_type %in% names(vector_generators)) {
generate_dataframe(n_vars, varm_type)
} else {
generate_matrix(n_vars, n_vars = 10L, varm_type)
}
})
names(varm) <- varm_types

# generate obsp
obsp <- lapply(obsp_types, generate_matrix, n_obs = n_obs, n_vars = n_obs)
names(obsp) <- obsp_types

# generate varp
varp <- lapply(varp_types, generate_matrix, n_obs = n_vars, n_vars = n_vars)
names(varp) <- varp_types

# generate uns by combining other classes
uns <- list(
integer = 1L,
numeric = 1,
character = "a",
factor = factor("a"),
logical = TRUE,
integer_na = NA_integer_,
numeric_na = NA_real_,
character_na = NA_character_,
factor_na = NA_character_,
logical_na = NA,
list = list(1L, 1, "a", factor("a"), TRUE)
)
vectors_for_uns <- lapply(names(vector_generators), generate_vector, n = 10L)
names(vectors_for_uns) <- paste0("vec_", names(vector_generators))
obsm_for_uns <- obsm
names(obsm_for_uns) <- paste0("obsm_", names(obsm_for_uns))

uns <- c(
uns,
vectors_for_uns,
obsm_for_uns
)

list(
X = X,
obs = obs,
obs_names = obs_names,
obsm = obsm,
obsp = obsp,
var = var,
var_names = var_names,
varm = varm,
varp = varp,
layers = layers,
uns = uns
)
}

#' Dummy SingleCellExperiment
#'
#' Generate a dummy dataset as a SingleCellExperiment object
#'
#' @param ... Parameters passed to `generate_dataset_as_list`
#'
#' @return SingleCellExperiment containing the generated data
#'
#' @noRd
generate_dataset_as_sce <- function(...) {
if (!requireNamespace("SingleCellExperiment", quietly = TRUE)) {
stop(
"Creating a SingleCellExperiment requires the 'SingleCellExperiment'",
"package to be installed"
)
}

dummy <- generate_dataset_as_list(...)

assays_list <- c(
list(X = dummy$X),
dummy$layers
)
assays_list <- lapply(assays_list, Matrix::t)

sce <- SingleCellExperiment::SingleCellExperiment(
assays = assays_list,
rowData = dummy$var,
colData = dummy$obs
)
colnames(sce) <- dummy$obs_names
rownames(sce) <- dummy$var_names

return(sce)
}

#' Dummy Seurat
#'
#' Generate a dummy dataset as a Seurat object
#'
#' @param ... Parameters passed to `generate_dataset_as_list`
#'
#' @return Seurat containing the generated data
#'
#' @noRd
generate_dataset_as_seurat <- function(...) {
if (!requireNamespace("SeuratObject", quietly = TRUE)) {
stop(
"Creating a Seurat requires the 'SeuratObject' package to be installed"
)
}

dummy <- generate_dataset_as_list(...)

X <- t(dummy$layers[["integer_csparse"]])
colnames(X) <- dummy$obs_names
rownames(X) <- dummy$var_names

seurat <- SeuratObject::CreateSeuratObject(X)

X2 <- Matrix::t(dummy$layers[["numeric_csparse"]])
colnames(X2) <- dummy$obs_names
rownames(X2) <- dummy$var_names
seurat <- SeuratObject::SetAssayData(seurat, "data", X2)

X3 <- Matrix::t(dummy$layers[["numeric_matrix"]])
colnames(X3) <- dummy$obs_names
rownames(X3) <- dummy$var_names
seurat <- SeuratObject::SetAssayData(seurat, "scale.data", X3)

seurat <- SeuratObject::AddMetaData(seurat, dummy$obs)

return(seurat)
}
92 changes: 92 additions & 0 deletions R/generate_matrix.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# nolint start
matrix_generators <- list(
numeric_matrix = function(n_obs, n_vars) {
matrix(runif(n_obs * n_vars), nrow = n_obs, ncol = n_vars)
},
numeric_dense = function(n_obs, n_vars) {
m <- matrix(runif(n_obs * n_vars), nrow = n_obs, ncol = n_vars)
as(m, "denseMatrix")
},
numeric_csparse = function(n_obs, n_vars) {
m <- Matrix::rsparsematrix(nrow = n_obs, ncol = n_vars, density = .1)
as(m, "CsparseMatrix")
},
numeric_rsparse = function(n_obs, n_vars) {
m <- Matrix::rsparsematrix(nrow = n_obs, ncol = n_vars, density = .1)
as(m, "RsparseMatrix")
},
numeric_matrix_with_nas = function(n_obs, n_vars) {
m <- matrix(runif(n_obs * n_vars), nrow = n_obs, ncol = n_vars)
m[seq(1, n_obs * n_vars, by = 2)] <- NA_real_
m
},
numeric_dense_with_nas = function(n_obs, n_vars) {
m <- matrix(runif(n_obs * n_vars), nrow = n_obs, ncol = n_vars)
m[seq(1, n_obs * n_vars, by = 2)] <- NA_real_
as(m, "denseMatrix")
},
numeric_csparse_with_nas = function(n_obs, n_vars) {
m <- Matrix::rsparsematrix(nrow = n_obs, ncol = n_vars, density = .1)
m[seq(1, n_obs * n_vars, by = 2)] <- NA_real_
as(m, "CsparseMatrix")
},
numeric_rsparse_with_nas = function(n_obs, n_vars) {
m <- Matrix::rsparsematrix(nrow = n_obs, ncol = n_vars, density = .1)
m[seq(1, n_obs * n_vars, by = 2)] <- NA_real_
as(m, "RsparseMatrix")
},
integer_matrix = function(n_obs, n_vars) {
matrix(sample.int(100L, n_obs * n_vars, replace = TRUE), nrow = n_obs, ncol = n_vars)
},
integer_dense = function(n_obs, n_vars) {
m <- matrix(sample.int(100L, n_obs * n_vars, replace = TRUE), nrow = n_obs, ncol = n_vars)
as(m, "denseMatrix")
},
integer_csparse = function(n_obs, n_vars) {
m <- Matrix::rsparsematrix(nrow = n_obs, ncol = n_vars, density = .1)
as(m, "CsparseMatrix")
},
integer_rsparse = function(n_obs, n_vars) {
m <- Matrix::rsparsematrix(nrow = n_obs, ncol = n_vars, density = .1)
as(m, "RsparseMatrix")
},
integer_matrix_with_nas = function(n_obs, n_vars) {
m <- matrix(sample.int(100L, n_obs * n_vars, replace = TRUE), nrow = n_obs, ncol = n_vars)
m[seq(1, n_obs * n_vars, by = 2)] <- NA_integer_
m
},
integer_dense_with_nas = function(n_obs, n_vars) {
m <- matrix(sample.int(100L, n_obs * n_vars, replace = TRUE), nrow = n_obs, ncol = n_vars)
m[seq(1, n_obs * n_vars, by = 2)] <- NA_integer_
as(m, "denseMatrix")
},
integer_csparse_with_nas = function(n_obs, n_vars) {
m <- Matrix::rsparsematrix(nrow = n_obs, ncol = n_vars, density = .1)
m[seq(1, n_obs * n_vars, by = 2)] <- NA_integer_
as(m, "CsparseMatrix")
},
integer_rsparse_with_nas = function(n_obs, n_vars) {
m <- Matrix::rsparsematrix(nrow = n_obs, ncol = n_vars, density = .1)
m[seq(1, n_obs * n_vars, by = 2)] <- NA_integer_
as(m, "RsparseMatrix")
}
)
# nolint end

#' Generate a matrix
#'
#' Generate a matrix of a given type
#'
#' @param n_obs Number of observations to generate
#' @param n_vars Number of variables to generate
#'
#' @return A matrix of the given type
#'
#' @noRd
#'
#' @examples
#' generate_matrix(10L, 20L)
generate_matrix <- function(n_obs, n_vars, type = names(matrix_generators)) {
type <- match.arg(type)
matrix_generators[[type]](n_obs, n_vars)
}
Loading

0 comments on commit f9e8a9a

Please sign in to comment.