-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
12 changed files
with
326 additions
and
103 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,9 @@ | ||
from importlib.metadata import version | ||
|
||
from . import pl, pp, tl | ||
|
||
__all__ = ["pl", "pp", "tl"] | ||
from .generate_dataframe import generate_dataframe | ||
from .generate_dataset import generate_dataset | ||
from .generate_dict import generate_scalar, generate_type, generate_dict | ||
from .generate_matrix import generate_matrix | ||
from .generate_vector import generate_vector | ||
|
||
__version__ = version("dummy-anndata") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
import pandas as pd | ||
from generate_vector import vector_generators | ||
|
||
|
||
def generate_dataframe(n_rows, types=None): | ||
""" | ||
Generate a pandas DataFrame with specified number of rows and column types. | ||
Parameters: | ||
n_rows (int): The number of rows in the DataFrame. | ||
types (list, optional): A list of column types to include in the DataFrame. | ||
Choose from the list of vector_generators keys. | ||
If not provided, all available column types will be included. | ||
Returns: | ||
pandas.DataFrame: The generated DataFrame. | ||
""" | ||
if types is None: | ||
types = list(vector_generators.keys()) | ||
|
||
data = {t: vector_generators[t](n_rows) for t in types} | ||
return pd.DataFrame(data) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
import anndata as ad | ||
|
||
from generate_matrix import matrix_generators | ||
from generate_vector import vector_generators | ||
from generate_dataframe import generate_dataframe | ||
from generate_dict import scalar_generators, generate_dict | ||
|
||
|
||
def generate_dataset( | ||
n_obs=10, | ||
n_vars=20, | ||
x_type="generate_integer_matrix", | ||
layer_types=None, | ||
obs_types=None, | ||
var_types=None, | ||
obsm_types=None, | ||
varm_types=None, | ||
obsp_types=None, | ||
varp_types=None, | ||
uns_types=None, | ||
): | ||
|
||
assert x_type in matrix_generators, f"Unknown matrix type: {x_type}" | ||
assert layer_types is None or all( | ||
t in matrix_generators.keys() for t in layer_types | ||
), "Unknown layer type" | ||
assert obs_types is None or all( | ||
t in vector_generators.keys() for t in obs_types | ||
), "Unknown obs type" | ||
assert var_types is None or all( | ||
t in vector_generators.keys() for t in var_types | ||
), "Unknown var type" | ||
assert obsm_types is None or all( | ||
t in matrix_generators.keys() or t in vector_generators.keys() | ||
for t in obsm_types | ||
), "Unknown obsm type" | ||
assert varm_types is None or all( | ||
t in matrix_generators.keys() or t in vector_generators.keys() | ||
for t in varm_types | ||
), "Unknown varm type" | ||
assert obsp_types is None or all( | ||
t in matrix_generators.keys() for t in obsp_types | ||
), "Unknown obsp type" | ||
assert varp_types is None or all( | ||
t in matrix_generators.keys() for t in varp_types | ||
), "Unknown varp type" | ||
# TODO uns types | ||
|
||
if layer_types is None: # layer_types are all matrices | ||
layer_types = list(matrix_generators.keys()) | ||
if obs_types is None: # obs_types are all vectors | ||
obs_types = list(vector_generators.keys()) | ||
if var_types is None: # var_types are all vectors | ||
var_types = list(vector_generators.keys()) | ||
if obsm_types is None: # obsm_types are all matrices or vectors | ||
obsm_types = list(matrix_generators.keys()) + list(vector_generators.keys()) | ||
if varm_types is None: # varm_types are all matrices or vectors | ||
varm_types = list(matrix_generators.keys()) + list(vector_generators.keys()) | ||
if obsp_types is None: # obsp_types are all matrices | ||
obsp_types = list(matrix_generators.keys()) | ||
if varp_types is None: # varp_types are all matrices | ||
varp_types = list(matrix_generators.keys()) | ||
if uns_types is None: | ||
uns_types = ( | ||
list(vector_generators.keys()) | ||
+ list(matrix_generators.keys()) | ||
+ list(scalar_generators.keys()) | ||
) | ||
|
||
X = matrix_generators[x_type](n_obs, n_vars) | ||
layers = {t: matrix_generators[t](n_obs, n_vars) for t in layer_types} | ||
|
||
obs_names = [f"Cell{i:03d}" for i in range(n_obs)] | ||
var_names = [f"Gene{i:03d}" for i in range(n_vars)] | ||
|
||
obs = generate_dataframe(n_obs, obs_types) | ||
var = generate_dataframe(n_vars, var_types) | ||
obs.index = obs_names | ||
var.index = var_names | ||
|
||
obsm = {} | ||
for t in obsm_types: | ||
if t in matrix_generators.keys(): | ||
obsm[t] = matrix_generators[t](n_obs, n_obs) | ||
elif t in vector_generators.keys(): | ||
obsm[t] = vector_generators[t](n_obs) | ||
|
||
varm = {} | ||
for t in varm_types: | ||
if t in matrix_generators.keys(): | ||
varm[t] = matrix_generators[t](n_vars, n_vars) | ||
elif t in vector_generators.keys(): | ||
varm[t] = vector_generators[t](n_vars) | ||
|
||
obsp = {t: matrix_generators[t](n_obs, n_obs) for t in obsp_types} | ||
varp = {t: matrix_generators[t](n_vars, n_vars) for t in varp_types} | ||
|
||
uns = generate_dict(n_obs, n_vars, uns_types) | ||
|
||
return ad.AnnData( | ||
X, | ||
layers=layers, | ||
obs=obs, | ||
var=var, | ||
obsm=obsm, | ||
varm=varm, | ||
obsp=obsp, | ||
varp=varp, | ||
uns=uns, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
from generate_vector import vector_generators | ||
from generate_matrix import matrix_generators | ||
|
||
import pandas as pd | ||
import numpy as np | ||
|
||
scalar_generators = { | ||
"string": "version", | ||
"char": "a", | ||
"integer": 1, | ||
"float": 1.0, | ||
"boolean": True, | ||
"none": None, | ||
# "NA": pd.NA, cannot write to h5 group | ||
"nan": np.nan, | ||
} | ||
|
||
|
||
def generate_scalar(scalar_type): | ||
if scalar_type[:7] == "scalar_": | ||
return vector_generators[scalar_type[7:]](1) | ||
return scalar_generators[scalar_type] | ||
|
||
|
||
def generate_type(type, n_rows, n_cols): | ||
if type in scalar_generators or type[:7] == "scalar_": | ||
return generate_scalar(type) | ||
if type in vector_generators: | ||
return vector_generators[type](n_rows) | ||
if type in matrix_generators: | ||
return matrix_generators[type](n_rows, n_cols) | ||
return None | ||
|
||
|
||
def generate_dict(n_rows, n_cols, types=None, nested=True): | ||
if types is None: # types are all vectors and all matrices | ||
scalar_types = list(scalar_generators.keys()) + [ | ||
f"scalar_{t}" for t in vector_generators.keys() | ||
] | ||
types = ( | ||
scalar_types | ||
+ list(vector_generators.keys()) | ||
+ list(matrix_generators.keys()) | ||
) | ||
|
||
data = {t: generate_type(t, n_rows, n_cols) for t in types} | ||
if nested: | ||
data["nested"] = generate_dict(n_rows, n_cols, types, False) | ||
|
||
return data |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
import numpy as np | ||
import scipy as sp | ||
|
||
|
||
def float_mtx(n_obs, n_vars, NAs=False): | ||
# add 0.5 to easily spot conversion issues | ||
mtx = np.arange(n_obs * n_vars, dtype=float).reshape(n_obs, n_vars) + 0.5 | ||
if NAs: # numpy matrices do no support pd.NA | ||
mtx[0, 0] = np.nan | ||
return mtx | ||
|
||
|
||
def int_mtx(n_obs, n_vars): | ||
mtx = np.arange(n_obs * n_vars).reshape(n_obs, n_vars) | ||
return mtx | ||
|
||
|
||
# Possible matrix generators | ||
# integer matrices do not support NAs in Python | ||
matrix_generators = { | ||
"generate_float_matrix": lambda n_obs, n_vars: float_mtx(n_obs, n_vars), | ||
"generate_float_matrix_nas": lambda n_obs, n_vars: float_mtx( | ||
n_obs, n_vars, NAs=True | ||
), | ||
"generate_float_csparse": lambda n_obs, n_vars: sp.sparse.csc_matrix( | ||
float_mtx(n_obs, n_vars) | ||
), | ||
"generate_float_csparse_nas": lambda n_obs, n_vars: sp.sparse.csc_matrix( | ||
float_mtx(n_obs, n_vars, NAs=True) | ||
), | ||
"generate_float_rsparse": lambda n_obs, n_vars: sp.sparse.csr_matrix( | ||
float_mtx(n_obs, n_vars) | ||
), | ||
"generate_float_rsparse_nas": lambda n_obs, n_vars: sp.sparse.csr_matrix( | ||
float_mtx(n_obs, n_vars, NAs=True) | ||
), | ||
"generate_integer_matrix": lambda n_obs, n_vars: int_mtx(n_obs, n_vars), | ||
"generate_integer_csparse": lambda n_obs, n_vars: sp.sparse.csc_matrix( | ||
int_mtx(n_obs, n_vars) | ||
), | ||
"generate_integer_rsparse": lambda n_obs, n_vars: sp.sparse.csr_matrix( | ||
int_mtx(n_obs, n_vars) | ||
), | ||
} | ||
|
||
|
||
def generate_matrix(n_obs, n_vars, matrix_type): | ||
""" | ||
Generate a matrix of given dimensions and type. | ||
Parameters: | ||
n_obs (int): The number of observations (rows) in the matrix. | ||
n_vars (int): The number of variables (columns) in the matrix. | ||
matrix_type (str): The type of matrix to generate. | ||
Returns: | ||
The generated matrix, either numpy.ndarray or scipy.sparse.csc_matrix or scipy.sparse.csr_matrix. | ||
Raises: | ||
AssertionError: If the matrix_type is unknown. | ||
""" | ||
assert matrix_type in matrix_generators, f"Unknown matrix type: {matrix_type}" | ||
|
||
return matrix_generators[matrix_type](n_obs, n_vars) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
import pandas as pd | ||
import numpy as np | ||
|
||
|
||
def nullable_integer_array(n): | ||
assert n > 0, "an integer array must be at least one value" | ||
nullable_array = [i for i in range(n)] | ||
# np.nan, pd.NA and None should all end up as null values, masked in the h5ad file | ||
nullable_array[0] = np.nan | ||
return pd.array(nullable_array, dtype="Int64") | ||
|
||
|
||
def nullable_boolean_array(n): | ||
assert n > 0, "a boolean array must be at least one value" | ||
nullable_array = pd.array([[True, False][i % 2] for i in range(n)], dtype="boolean") | ||
# np.nan, pd.NA and None should all end up as null values, masked in the h5ad file | ||
nullable_array[0] = pd.NA | ||
return nullable_array | ||
|
||
|
||
def missing_values_categorical(n, ordered=True): | ||
assert n > 0, "a categorical must be at least one value" | ||
missing_values = pd.Categorical( | ||
[["Value1", "Value2"][i % 2] for i in range(n)], | ||
categories=["Value1", "Value2"], | ||
ordered=ordered, | ||
) | ||
# They should all end up as code -1 in the h5ad file | ||
missing_values[0] = np.nan | ||
return missing_values | ||
|
||
|
||
vector_generators = { | ||
"categorical": lambda n: pd.Categorical( | ||
[["Value1", "Value2"][i % 2] for i in range(n)] | ||
), | ||
"categorical_ordered": lambda n: pd.Categorical( | ||
[["Value1", "Value2"][i % 2] for i in range(n)], ordered=True | ||
), | ||
"categorical_missing_values": lambda n: missing_values_categorical( | ||
n, ordered=False | ||
), | ||
"categorical_ordered_missing_values": lambda n: missing_values_categorical( | ||
n, ordered=True | ||
), | ||
"string_array": lambda n: np.array([f"value_{i}" for i in range(n)]), | ||
# should we also check a 1d sparse array? We should probably leave it for the matrix generation? | ||
"dense_array": lambda n: np.arange(n, dtype=float) + 0.5, | ||
"integer_array": lambda n: np.array([i for i in range(n)]), | ||
"nullable_integer_array": nullable_integer_array, | ||
"boolean_array": lambda n: np.array([[True, False][i % 2] for i in range(n)]), | ||
"nullable_boolean_array": nullable_boolean_array, | ||
} | ||
|
||
|
||
def generate_vector(n, vector_type): | ||
""" | ||
Generate a vector of a specified type. | ||
Parameters: | ||
vector_type (str): The type of vector to generate. | ||
n (int): The length of the vector. | ||
Returns: | ||
list: The generated vector. | ||
Raises: | ||
AssertionError: If the vector_type is unknown. | ||
""" | ||
# check if vector_type is valid | ||
assert vector_type in vector_generators, f"Unknown vector type: {vector_type}" | ||
|
||
return vector_generators[vector_type](n) |
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.