-
-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* add more types to dummy data * refactor dataset generators * apply styler * fix issues in generation functions * split up reticulate roundtrip test * run styler * skip commented code linting * incorporate regular roundtrip tests * add parameter to stop on error or not * refactor tests * improve tests * add has_row_names function * use inherits instead of is * don't generate an rd for generate_* functions * add workaround by manually converting densematrix to matrix * don't remove dimnames in validate_aligned_array * fix test after merge * disable tests with NAs for now * set default `include_index` to false * disable matrices with NAs for now * dont set dimnames on NULL * fix paste * format * fix bugs in tests, and disable tests that are failing * re-enable commented code in generator, filter in tests * disable some tests due to warnings * fix linting issues * use inherits instead of is
- Loading branch information
Showing
21 changed files
with
1,163 additions
and
357 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
#' Generate a dataframe | ||
#' | ||
#' Generate a dataframe with different types of columns | ||
#' | ||
#' @param num_rows Number of rows to generate | ||
#' @param types Types of columns to generate | ||
#' | ||
#' @return A dataframe with the generated columns | ||
#' | ||
#' @noRd | ||
#' | ||
#' @examples | ||
#' generate_dataframe(10L) | ||
generate_dataframe <- function(num_rows, types = names(vector_generators)) { | ||
data <- lapply(types, generate_vector, n = num_rows) | ||
names(data) <- types | ||
as.data.frame(data) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,214 @@ | ||
#' Generate a dataset | ||
#' | ||
#' Generate a dataset with different types of columns and layers | ||
#' | ||
#' @param n_obs Number of observations to generate | ||
#' @param n_vars Number of variables to generate | ||
#' @param format Object type to output, one of "list", "SingleCellExperiment", | ||
#' or "Seurat" | ||
#' @param ... Arguments passed to generate_dataset_as_list | ||
#' | ||
#' @return Object containing the generated dataset as defined by `output` | ||
#' | ||
#' @noRd | ||
#' | ||
#' @examples | ||
#' dummy <- generate_dataset() | ||
generate_dataset <- function( | ||
n_obs = 10L, | ||
n_vars = 20L, | ||
format = c("list", "SingleCellExperiment", "Seurat"), | ||
...) { | ||
format <- match.arg(format) | ||
|
||
fun <- switch(format, | ||
"list" = generate_dataset_as_list, | ||
"SingleCellExperiment" = generate_dataset_as_sce, | ||
"Seurat" = generate_dataset_as_seurat | ||
) | ||
|
||
fun(n_obs = n_obs, n_vars = n_vars, ...) | ||
} | ||
|
||
#' Dummy data list | ||
#' | ||
#' Generate a dummy dataset as a list | ||
#' | ||
#' @param n_obs Number of observations to generate | ||
#' @param n_vars Number of variables to generate | ||
#' | ||
#' @return A list with the generated dataset | ||
#' | ||
#' @noRd | ||
generate_dataset_as_list <- function( | ||
n_obs = 10L, | ||
n_vars = 20L, | ||
x_type = names(matrix_generators)[[1]], | ||
layer_types = names(matrix_generators), | ||
obs_types = names(vector_generators), | ||
var_types = names(vector_generators), | ||
obsm_types = c(names(matrix_generators), names(vector_generators)), | ||
varm_types = c(names(matrix_generators), names(vector_generators)), | ||
obsp_types = names(matrix_generators), | ||
varp_types = names(matrix_generators)) { | ||
# generate X | ||
X <- generate_matrix(n_obs, n_vars, x_type) | ||
|
||
# generate layers | ||
layers <- lapply(layer_types, generate_matrix, n_obs = n_obs, n_vars = n_vars) | ||
names(layers) <- layer_types | ||
|
||
# generate obs | ||
obs <- generate_dataframe(n_obs, obs_types) | ||
|
||
# generate var | ||
var <- generate_dataframe(n_vars, var_types) | ||
|
||
# generate obs_names | ||
obs_names <- paste0("cell", seq_len(n_obs)) | ||
|
||
# generate var_names | ||
var_names <- paste0("gene", seq_len(n_vars)) | ||
|
||
# generate obsm | ||
obsm <- lapply(obsm_types, function(obsm_type) { | ||
if (obsm_type %in% names(vector_generators)) { | ||
generate_dataframe(n_obs, obsm_type) | ||
} else { | ||
generate_matrix(n_obs, n_vars = 10L, obsm_type) | ||
} | ||
}) | ||
names(obsm) <- obsm_types | ||
|
||
# generate varm | ||
varm <- lapply(varm_types, function(varm_type) { | ||
if (varm_type %in% names(vector_generators)) { | ||
generate_dataframe(n_vars, varm_type) | ||
} else { | ||
generate_matrix(n_vars, n_vars = 10L, varm_type) | ||
} | ||
}) | ||
names(varm) <- varm_types | ||
|
||
# generate obsp | ||
obsp <- lapply(obsp_types, generate_matrix, n_obs = n_obs, n_vars = n_obs) | ||
names(obsp) <- obsp_types | ||
|
||
# generate varp | ||
varp <- lapply(varp_types, generate_matrix, n_obs = n_vars, n_vars = n_vars) | ||
names(varp) <- varp_types | ||
|
||
# generate uns by combining other classes | ||
uns <- list( | ||
integer = 1L, | ||
numeric = 1, | ||
character = "a", | ||
factor = factor("a"), | ||
logical = TRUE, | ||
integer_na = NA_integer_, | ||
numeric_na = NA_real_, | ||
character_na = NA_character_, | ||
factor_na = NA_character_, | ||
logical_na = NA, | ||
list = list(1L, 1, "a", factor("a"), TRUE) | ||
) | ||
vectors_for_uns <- lapply(names(vector_generators), generate_vector, n = 10L) | ||
names(vectors_for_uns) <- paste0("vec_", names(vector_generators)) | ||
obsm_for_uns <- obsm | ||
names(obsm_for_uns) <- paste0("obsm_", names(obsm_for_uns)) | ||
|
||
uns <- c( | ||
uns, | ||
vectors_for_uns, | ||
obsm_for_uns | ||
) | ||
|
||
list( | ||
X = X, | ||
obs = obs, | ||
obs_names = obs_names, | ||
obsm = obsm, | ||
obsp = obsp, | ||
var = var, | ||
var_names = var_names, | ||
varm = varm, | ||
varp = varp, | ||
layers = layers, | ||
uns = uns | ||
) | ||
} | ||
|
||
#' Dummy SingleCellExperiment | ||
#' | ||
#' Generate a dummy dataset as a SingleCellExperiment object | ||
#' | ||
#' @param ... Parameters passed to `generate_dataset_as_list` | ||
#' | ||
#' @return SingleCellExperiment containing the generated data | ||
#' | ||
#' @noRd | ||
generate_dataset_as_sce <- function(...) { | ||
if (!requireNamespace("SingleCellExperiment", quietly = TRUE)) { | ||
stop( | ||
"Creating a SingleCellExperiment requires the 'SingleCellExperiment'", | ||
"package to be installed" | ||
) | ||
} | ||
|
||
dummy <- generate_dataset_as_list(...) | ||
|
||
assays_list <- c( | ||
list(X = dummy$X), | ||
dummy$layers | ||
) | ||
assays_list <- lapply(assays_list, Matrix::t) | ||
|
||
sce <- SingleCellExperiment::SingleCellExperiment( | ||
assays = assays_list, | ||
rowData = dummy$var, | ||
colData = dummy$obs | ||
) | ||
colnames(sce) <- dummy$obs_names | ||
rownames(sce) <- dummy$var_names | ||
|
||
return(sce) | ||
} | ||
|
||
#' Dummy Seurat | ||
#' | ||
#' Generate a dummy dataset as a Seurat object | ||
#' | ||
#' @param ... Parameters passed to `generate_dataset_as_list` | ||
#' | ||
#' @return Seurat containing the generated data | ||
#' | ||
#' @noRd | ||
generate_dataset_as_seurat <- function(...) { | ||
if (!requireNamespace("SeuratObject", quietly = TRUE)) { | ||
stop( | ||
"Creating a Seurat requires the 'SeuratObject' package to be installed" | ||
) | ||
} | ||
|
||
dummy <- generate_dataset_as_list(...) | ||
|
||
X <- t(dummy$layers[["integer_csparse"]]) | ||
colnames(X) <- dummy$obs_names | ||
rownames(X) <- dummy$var_names | ||
|
||
seurat <- SeuratObject::CreateSeuratObject(X) | ||
|
||
X2 <- Matrix::t(dummy$layers[["numeric_csparse"]]) | ||
colnames(X2) <- dummy$obs_names | ||
rownames(X2) <- dummy$var_names | ||
seurat <- SeuratObject::SetAssayData(seurat, "data", X2) | ||
|
||
X3 <- Matrix::t(dummy$layers[["numeric_matrix"]]) | ||
colnames(X3) <- dummy$obs_names | ||
rownames(X3) <- dummy$var_names | ||
seurat <- SeuratObject::SetAssayData(seurat, "scale.data", X3) | ||
|
||
seurat <- SeuratObject::AddMetaData(seurat, dummy$obs) | ||
|
||
return(seurat) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
# nolint start | ||
matrix_generators <- list( | ||
numeric_matrix = function(n_obs, n_vars) { | ||
matrix(runif(n_obs * n_vars), nrow = n_obs, ncol = n_vars) | ||
}, | ||
numeric_dense = function(n_obs, n_vars) { | ||
m <- matrix(runif(n_obs * n_vars), nrow = n_obs, ncol = n_vars) | ||
as(m, "denseMatrix") | ||
}, | ||
numeric_csparse = function(n_obs, n_vars) { | ||
m <- Matrix::rsparsematrix(nrow = n_obs, ncol = n_vars, density = .1) | ||
as(m, "CsparseMatrix") | ||
}, | ||
numeric_rsparse = function(n_obs, n_vars) { | ||
m <- Matrix::rsparsematrix(nrow = n_obs, ncol = n_vars, density = .1) | ||
as(m, "RsparseMatrix") | ||
}, | ||
numeric_matrix_with_nas = function(n_obs, n_vars) { | ||
m <- matrix(runif(n_obs * n_vars), nrow = n_obs, ncol = n_vars) | ||
m[seq(1, n_obs * n_vars, by = 2)] <- NA_real_ | ||
m | ||
}, | ||
numeric_dense_with_nas = function(n_obs, n_vars) { | ||
m <- matrix(runif(n_obs * n_vars), nrow = n_obs, ncol = n_vars) | ||
m[seq(1, n_obs * n_vars, by = 2)] <- NA_real_ | ||
as(m, "denseMatrix") | ||
}, | ||
numeric_csparse_with_nas = function(n_obs, n_vars) { | ||
m <- Matrix::rsparsematrix(nrow = n_obs, ncol = n_vars, density = .1) | ||
m[seq(1, n_obs * n_vars, by = 2)] <- NA_real_ | ||
as(m, "CsparseMatrix") | ||
}, | ||
numeric_rsparse_with_nas = function(n_obs, n_vars) { | ||
m <- Matrix::rsparsematrix(nrow = n_obs, ncol = n_vars, density = .1) | ||
m[seq(1, n_obs * n_vars, by = 2)] <- NA_real_ | ||
as(m, "RsparseMatrix") | ||
}, | ||
integer_matrix = function(n_obs, n_vars) { | ||
matrix(sample.int(100L, n_obs * n_vars, replace = TRUE), nrow = n_obs, ncol = n_vars) | ||
}, | ||
integer_dense = function(n_obs, n_vars) { | ||
m <- matrix(sample.int(100L, n_obs * n_vars, replace = TRUE), nrow = n_obs, ncol = n_vars) | ||
as(m, "denseMatrix") | ||
}, | ||
integer_csparse = function(n_obs, n_vars) { | ||
m <- Matrix::rsparsematrix(nrow = n_obs, ncol = n_vars, density = .1) | ||
as(m, "CsparseMatrix") | ||
}, | ||
integer_rsparse = function(n_obs, n_vars) { | ||
m <- Matrix::rsparsematrix(nrow = n_obs, ncol = n_vars, density = .1) | ||
as(m, "RsparseMatrix") | ||
}, | ||
integer_matrix_with_nas = function(n_obs, n_vars) { | ||
m <- matrix(sample.int(100L, n_obs * n_vars, replace = TRUE), nrow = n_obs, ncol = n_vars) | ||
m[seq(1, n_obs * n_vars, by = 2)] <- NA_integer_ | ||
m | ||
}, | ||
integer_dense_with_nas = function(n_obs, n_vars) { | ||
m <- matrix(sample.int(100L, n_obs * n_vars, replace = TRUE), nrow = n_obs, ncol = n_vars) | ||
m[seq(1, n_obs * n_vars, by = 2)] <- NA_integer_ | ||
as(m, "denseMatrix") | ||
}, | ||
integer_csparse_with_nas = function(n_obs, n_vars) { | ||
m <- Matrix::rsparsematrix(nrow = n_obs, ncol = n_vars, density = .1) | ||
m[seq(1, n_obs * n_vars, by = 2)] <- NA_integer_ | ||
as(m, "CsparseMatrix") | ||
}, | ||
integer_rsparse_with_nas = function(n_obs, n_vars) { | ||
m <- Matrix::rsparsematrix(nrow = n_obs, ncol = n_vars, density = .1) | ||
m[seq(1, n_obs * n_vars, by = 2)] <- NA_integer_ | ||
as(m, "RsparseMatrix") | ||
} | ||
) | ||
# nolint end | ||
|
||
#' Generate a matrix | ||
#' | ||
#' Generate a matrix of a given type | ||
#' | ||
#' @param n_obs Number of observations to generate | ||
#' @param n_vars Number of variables to generate | ||
#' | ||
#' @return A matrix of the given type | ||
#' | ||
#' @noRd | ||
#' | ||
#' @examples | ||
#' generate_matrix(10L, 20L) | ||
generate_matrix <- function(n_obs, n_vars, type = names(matrix_generators)) { | ||
type <- match.arg(type) | ||
matrix_generators[[type]](n_obs, n_vars) | ||
} |
Oops, something went wrong.